1 {-# LANGUAGE MultiParamTypeClasses, FlexibleContexts #-} 2 3 module Text.RegExp.Matching where 4 5 import Data.Semiring 6 import Text.RegExp.Data 7 8 import Text.RegExp.Matching.Leftmost.Type 9 import Text.RegExp.Matching.Longest.Type 10 import Text.RegExp.Matching.LeftLong.Type 11 12 -- | 13 -- Checks whether a regular expression matches the given word. For 14 -- example, @acceptFull (fromString \"b|abc\") \"b\"@ yields @True@ 15 -- because the first alternative of @b|abc@ matches the string 16 -- @\"b\"@. 17 -- 18 acceptFull :: RegExp c -> [c] -> Bool 19 acceptFull r = fullMatch r 20 21 -- | 22 -- Checks whether a regular expression matches a subword of the given 23 -- word. For example, @acceptPartial (fromString \"b\") \"abc\"@ 24 -- yields @True@ because @\"abc\"@ contains the substring @\"b\"@. 25 -- 26 acceptPartial :: RegExp c -> [c] -> Bool 27 acceptPartial r = partialMatch r 28 29 -- | 30 -- Computes in how many ways a word can be matched against a regular 31 -- expression. 32 -- 33 matchingCount :: Num a => RegExp c -> [c] -> a 34 matchingCount r = getNumeric . fullMatch r 35 36 {-# SPECIALIZE matchingCount :: RegExp c -> [c] -> Int #-} 37 38 -- | 39 -- Matches a regular expression against a word computing a weight in 40 -- an arbitrary semiring. 41 -- 42 -- The symbols can have associated weights associated by the 43 -- 'symWeight' function of the 'Weight' class. This function also 44 -- allows to adjust the type of the used alphabet such that, for 45 -- example, positional information can be taken into account by 46 -- 'zip'ping the word with positions. 47 -- 48 fullMatch :: Weight a b w => RegExp a -> [b] -> w 49 fullMatch (RegExp r) = matchW (weighted r) 50 51 {-# SPECIALIZE fullMatch :: RegExp c -> [c] -> Bool #-} 52 {-# SPECIALIZE fullMatch :: RegExp c -> [c] -> Numeric Int #-} 53 {-# SPECIALIZE fullMatch :: Num a => RegExp c -> [c] -> Numeric a #-} 54 {-# SPECIALIZE fullMatch :: RegExp c -> [(Int,c)] -> Leftmost #-} 55 {-# SPECIALIZE fullMatch :: RegExp c -> [c] -> Longest #-} 56 {-# SPECIALIZE fullMatch :: RegExp c -> [(Int,c)] -> LeftLong #-} 57 58 -- | 59 -- Matches a regular expression against substrings of a word computing 60 -- a weight in an arbitrary semiring. Similar to 'fullMatch' the 61 -- 'Weight' class is used to associate weights to the symbols of the 62 -- regular expression. 63 -- 64 partialMatch :: Weight a b w => RegExp a -> [b] -> w 65 partialMatch (RegExp r) = matchW (arb `seqW` weighted r `seqW` arb) 66 where RegExp arb = rep anySym 67 68 {-# SPECIALIZE partialMatch :: RegExp c -> [c] -> Bool #-} 69 {-# SPECIALIZE partialMatch :: RegExp c -> [c] -> Numeric Int #-} 70 {-# SPECIALIZE partialMatch :: Num a => RegExp c -> [c] -> Numeric a #-} 71 {-# SPECIALIZE partialMatch :: RegExp c -> [(Int,c)] -> Leftmost #-} 72 {-# SPECIALIZE partialMatch :: RegExp c -> [c] -> Longest #-} 73 {-# SPECIALIZE partialMatch :: RegExp c -> [(Int,c)] -> LeftLong #-} 74 75 matchW :: Semiring w => RegW w c -> [c] -> w 76 matchW r [] = empty r 77 matchW r (c:cs) = final (foldl (shiftW zero) (shiftW one r c) cs) 78 79 {-# SPECIALIZE matchW :: RegW Bool c -> [c] -> Bool #-} 80 {-# SPECIALIZE matchW :: RegW (Numeric Int) c -> [c] -> Numeric Int #-} 81 {-# SPECIALIZE matchW :: Num a => RegW (Numeric a) c -> [c] -> Numeric a #-} 82 {-# SPECIALIZE matchW :: RegW Leftmost (Int,c) -> [(Int,c)] -> Leftmost #-} 83 {-# SPECIALIZE matchW :: RegW Longest c -> [c] -> Longest #-} 84 {-# SPECIALIZE matchW :: RegW LeftLong (Int,c) -> [(Int,c)] -> LeftLong #-} 85 86 shiftW :: Semiring w => w -> RegW w c -> c -> RegW w c 87 shiftW w r c | active r || w /= zero = shift w (reg r) c 88 | otherwise = r 89 90 shift :: Semiring w => w -> Reg w c -> c -> RegW w c 91 shift _ Eps _ = epsW 92 shift w (Sym s f) c = let w' = w .*. f c 93 in (symW s f) { active = w' /= zero, final_ = w' } 94 shift w (Alt p q) c = altW (shiftW w p c) (shiftW w q c) 95 shift w (Seq p q) c = seqW (shiftW w p c) 96 (shiftW (w .*. empty p .+. final p) q c) 97 shift w (Rep r) c = repW (shiftW (w .+. final r) r c)