せっかくパースしたのでマッチも。
とりあえずNFAに変換します。NFAの各ステートは、入力文字列から次のステートと残りの入力への関数と考えられます。
data NFAState a = NFAState { runNFA :: [a] -> [(NFAState a, [a])] } | NFAEndState compile :: Regex -> NFAState Char compile (Regex branch) = compileBranch NFAEndState branch compileBranch :: NFAState Char -> Branch -> NFAState Char compileBranch next branch = NFAState $ newState where newState s = zip (compileBranch' branch) (cycle [s]) compileBranch' [] = [next] compileBranch' seqs = map (compileSeq next) seqs compileSeq ::NFAState Char -> Seq -> NFAState Char compileSeq next (piece:seq) = compilePiece (compileSeq next seq) piece compileSeq next [] = next compilePiece :: NFAState Char -> Piece -> NFAState Char compilePiece next (a, None) = compileAtom next a compilePiece next (a, Optional) = NFAState $ newState where newState s = [(compileAtom next a, s), (next, s)] compilePiece next (a, Repeat) = NFAState $ newState where newState s = [(compileAtom (NFAState newState) a, s), (next, s)] compileAtom :: NFAState Char -> Atom -> NFAState Char compileAtom next (CharAtom char) = NFAState $ newState where newState (c:cs) | c == char = [(next, cs)] newState _ = [] compileAtom next (Group branch) = compileBranch next branch
そのまま非決定性計算を使ってマッチさせています。
match :: String -> String -> Maybe Bool match regex s = parse regex >>= Just . flip matchRegex s matchRegex :: Regex -> String -> Bool matchRegex = matchNFA . compile matchNFA :: NFAState Char -> String -> Bool matchNFA nfa = or . matchNFA' nfa where matchNFA' :: NFAState Char -> String -> [Bool] matchNFA' NFAEndState [] = [True] matchNFA' NFAEndState _ = [False] matchNFA' nfa s = let next = runNFA nfa s in concatMap (uncurry matchNFA') next searchNFA :: NFAState Char -> String -> Bool searchNFA nfa = or . concatMap (searchNFA' nfa) . takeWhile (not . null) . iterate tail where searchNFA' :: NFAState Char -> String -> [Bool] searchNFA' NFAEndState _ = [True] searchNFA' nfa s = let next = runNFA nfa s in concatMap (uncurry searchNFA') next
これを使って作ったgrepも。
grep :: String -> [FilePath] -> IO () grep regex paths = case parse regex of Just regex -> mapM_ (flip grepNFA $ compile regex) paths Nothing -> putStrLn $ "Invalid pattern: `" ++ regex ++ "'" grepNFA :: FilePath -> NFAState Char -> IO () grepNFA path nfa = catch (readFile path >>= mapM_ (uncurry printLine) . matchedLines . lines) (\e -> putStrLn $ "File not found: " ++ path ++ " (" ++ show e ++ ")") where matchedLines :: (Enum a, Num a) => [String] -> [(String, a)] matchedLines l = [ (s, n) | (s, n) <- zip l [1..], searchNFA nfa s ] printLine :: Show a => String -> a -> IO () printLine s n = putStrLn (path ++ ":" ++ show n ++ ":" ++ s)