だいぶ昔にパーサーモナドの勉強のために書いた正規表現のパーサーをApplicativeを使って書き直してみました。
import Control.Applicative
import Control.Arrow (first)
import Prelude hiding (seq)
newtype Parser input output = Parser { runParser :: input -> Maybe (output, input) }
instance Functor (Parser input) where
fmap f parser = Parser $ \i -> first f <$> runParser parser i
instance Applicative (Parser input) where
pure v = Parser $ \i -> pure (v, i)
f <*> v = Parser $ \i -> do (g, r) <- runParser f i
(o, r') <- runParser v r
return (g o, r')
instance Alternative (Parser input) where
empty = Parser $ const empty
l <|> r = Parser $ \i -> runParser l i <|> runParser r i
token :: Eq a => a -> Parser [a] a
token c = tokenOf (== c)
tokenOf :: (a -> Bool) -> Parser [a] a
tokenOf p = Parser tokenOf'
where
tokenOf' (c:cs) | p c = pure (c, cs)
tokenOf' _ = empty
sepBy1 :: Eq a => Parser [a] b -> a -> Parser [a] [b]
sepBy1 parser sep = some
where
some = many <|> pure <$> parser
many = (:) <$> (parser <* token sep) <*> some
end :: Parser [a] ()
end = Parser end'
where
end' [] = pure ((), [])
end' _ = empty
newtype Regex = Regex Branch
deriving Show
type Branch = [Seq]
type Seq = [Piece]
type Piece = (Atom, Quantifier)
data Atom = CharAtom Char
| Group Branch
deriving Show
data Quantifier = None
| Optional
| Repeat
deriving Show
parse :: String -> Maybe Regex
parse s = (runParser parser) s >>= return . Regex . fst
where
parser = branch <* end
branch :: Parser String Branch
branch = sepBy1 seq '|'
seq :: Parser String Seq
seq = many piece
piece :: Parser String Piece
piece = (,) <$> atom <*> quantifier
atom :: Parser String Atom
atom = charAtom <|> group
charAtom :: Parser String Atom
charAtom = CharAtom <$> charOf isAtomChar
group :: Parser String Atom
group = Group <$> (char '(' *> branch <* char ')')
quantifier :: Parser String Quantifier
quantifier = Repeat <$ char '*' <|> Optional <$ char '?' <|> pure None
char :: Char -> Parser String Char
char = token
charOf :: (Char -> Bool) -> Parser String Char
charOf = tokenOf
isAtomChar :: Char -> Bool
isAtomChar = flip notElem "|()*?"
たとえば、モナドを使うと、
piece = do a <- atom
q <- quantifier
return (a, q)
group = do char '('
b <- branch
char ')'
return $ Group b
のような書き方になるでしょうか*1。
慣れの問題かもしれませんが、モナドを使って中間の値に名前を付けるスタイルの方がぱっと見のわかりやすさでは有利かもしれません。
*1 もちろん、モナドを使った場合でも、apとかliftM系を使って書くことはできますけど