1 {-# LANGUAGE MultiParamTypeClasses, FlexibleContexts #-}
    2 
    3 module Text.RegExp.Matching where
    4 
    5 import Data.Semiring
    6 import Text.RegExp.Data
    7 
    8 import Text.RegExp.Matching.Leftmost.Type
    9 import Text.RegExp.Matching.Longest.Type
   10 import Text.RegExp.Matching.LeftLong.Type
   11 
   12 -- |
   13 -- Checks whether a regular expression matches the given word. For
   14 -- example, @acceptFull (fromString \"b|abc\") \"b\"@ yields @True@
   15 -- because the first alternative of @b|abc@ matches the string
   16 -- @\"b\"@.
   17 -- 
   18 acceptFull :: RegExp c -> [c] -> Bool
   19 acceptFull r = fullMatch r
   20 
   21 -- |
   22 -- Checks whether a regular expression matches a subword of the given
   23 -- word. For example, @acceptPartial (fromString \"b\") \"abc\"@
   24 -- yields @True@ because @\"abc\"@ contains the substring @\"b\"@.
   25 -- 
   26 acceptPartial :: RegExp c -> [c] -> Bool
   27 acceptPartial r = partialMatch r
   28 
   29 -- |
   30 -- Computes in how many ways a word can be matched against a regular
   31 -- expression.
   32 -- 
   33 matchingCount :: Num a => RegExp c -> [c] -> a
   34 matchingCount r = getNumeric . fullMatch r
   35 
   36 {-# SPECIALIZE matchingCount :: RegExp c -> [c] -> Int #-}
   37 
   38 -- |
   39 -- Matches a regular expression against a word computing a weight in
   40 -- an arbitrary semiring.
   41 -- 
   42 -- The symbols can have associated weights associated by the
   43 -- 'symWeight' function of the 'Weight' class. This function also
   44 -- allows to adjust the type of the used alphabet such that, for
   45 -- example, positional information can be taken into account by
   46 -- 'zip'ping the word with positions.
   47 -- 
   48 fullMatch :: Weight a b w => RegExp a -> [b] -> w
   49 fullMatch (RegExp r) = matchW (weighted r)
   50 
   51 {-# SPECIALIZE fullMatch :: RegExp c -> [c] -> Bool #-}
   52 {-# SPECIALIZE fullMatch :: RegExp c -> [c] -> Numeric Int #-}
   53 {-# SPECIALIZE fullMatch :: Num a => RegExp c -> [c] -> Numeric a #-}
   54 {-# SPECIALIZE fullMatch :: RegExp c -> [(Int,c)] -> Leftmost #-}
   55 {-# SPECIALIZE fullMatch :: RegExp c -> [c] -> Longest #-}
   56 {-# SPECIALIZE fullMatch :: RegExp c -> [(Int,c)] -> LeftLong #-}
   57 
   58 -- |
   59 -- Matches a regular expression against substrings of a word computing
   60 -- a weight in an arbitrary semiring. Similar to 'fullMatch' the
   61 -- 'Weight' class is used to associate weights to the symbols of the
   62 -- regular expression.
   63 -- 
   64 partialMatch :: Weight a b w => RegExp a -> [b] -> w
   65 partialMatch (RegExp r) = matchW (arb `seqW` weighted r `seqW` arb)
   66  where RegExp arb = rep anySym
   67 
   68 {-# SPECIALIZE partialMatch :: RegExp c -> [c] -> Bool #-}
   69 {-# SPECIALIZE partialMatch :: RegExp c -> [c] -> Numeric Int #-}
   70 {-# SPECIALIZE partialMatch :: Num a => RegExp c -> [c] -> Numeric a #-}
   71 {-# SPECIALIZE partialMatch :: RegExp c -> [(Int,c)] -> Leftmost #-}
   72 {-# SPECIALIZE partialMatch :: RegExp c -> [c] -> Longest #-}
   73 {-# SPECIALIZE partialMatch :: RegExp c -> [(Int,c)] -> LeftLong #-}
   74 
   75 matchW :: Semiring w => RegW w c -> [c] -> w
   76 matchW r []     = empty r
   77 matchW r (c:cs) = final (foldl (shiftW zero) (shiftW one r c) cs)
   78 
   79 {-# SPECIALIZE matchW :: RegW Bool c -> [c] -> Bool #-}
   80 {-# SPECIALIZE matchW :: RegW (Numeric Int) c -> [c] -> Numeric Int #-}
   81 {-# SPECIALIZE matchW :: Num a => RegW (Numeric a) c -> [c] -> Numeric a #-}
   82 {-# SPECIALIZE matchW :: RegW Leftmost (Int,c) -> [(Int,c)] -> Leftmost #-}
   83 {-# SPECIALIZE matchW :: RegW Longest c -> [c] -> Longest #-}
   84 {-# SPECIALIZE matchW :: RegW LeftLong (Int,c) -> [(Int,c)] -> LeftLong #-}
   85 
   86 shiftW :: Semiring w => w -> RegW w c -> c -> RegW w c
   87 shiftW w r c | active r || w /= zero = shift w (reg r) c
   88              | otherwise             = r
   89 
   90 shift :: Semiring w => w -> Reg w c -> c -> RegW w c
   91 shift _ Eps       _ = epsW
   92 shift w (Sym s f) c = let w' = w .*. f c
   93                        in (symW s f) { active = w' /= zero, final_ = w' }
   94 shift w (Alt p q) c = altW (shiftW w p c) (shiftW w q c)
   95 shift w (Seq p q) c = seqW (shiftW w p c)
   96                            (shiftW (w .*. empty p .+. final p) q c)
   97 shift w (Rep r)   c = repW (shiftW (w .+. final r) r c)