├── .gitignore ├── ChangeLog.md ├── LICENSE ├── README.md ├── Setup.hs ├── app └── Main.hs ├── package.yaml ├── publish.sh ├── src ├── DFA.hs ├── NFA.hs ├── Regex.hs └── RegexEquality.hs ├── stack.yaml └── test └── Spec.hs /.gitignore: -------------------------------------------------------------------------------- 1 | .stack-work/ 2 | regex-equality.cabal 3 | *~ -------------------------------------------------------------------------------- /ChangeLog.md: -------------------------------------------------------------------------------- 1 | # Changelog for regex-equality 2 | 3 | ## Unreleased changes 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright Joonatan Saarhelo (c) 2018 2 | 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | * Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | 11 | * Redistributions in binary form must reproduce the above 12 | copyright notice, this list of conditions and the following 13 | disclaimer in the documentation and/or other materials provided 14 | with the distribution. 15 | 16 | * Neither the name of Author name here nor the names of other 17 | contributors may be used to endorse or promote products derived 18 | from this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Regex Equivalence Checker 2 | 3 | [Try it online!](https://regex-equality.herokuapp.com) 4 | 5 | Understands `+`, `*`, `|` and parens. The aforementioned characters can be escaped with a backslash. 6 | 7 | Build with `stack build`. -------------------------------------------------------------------------------- /Setup.hs: -------------------------------------------------------------------------------- 1 | import Distribution.Simple 2 | main = defaultMain 3 | -------------------------------------------------------------------------------- /app/Main.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | {-# LANGUAGE QuasiQuotes #-} 3 | 4 | import Network.Wai 5 | import Network.Wai.Handler.Warp 6 | import Network.HTTP.Types (status200) 7 | import Network.HTTP.Types.URI (queryToQueryText) 8 | import Control.Monad (join) 9 | import System.Environment (getEnv) 10 | import Text.Hamlet 11 | import Text.Blaze.Html.Renderer.Utf8 (renderHtmlBuilder) 12 | import Prelude hiding (null) 13 | import Data.Text 14 | 15 | import qualified RegexEquality as R 16 | 17 | main = do 18 | port <- getEnv "PORT" 19 | run (read port) app 20 | 21 | app :: Network.Wai.Request 22 | -> (Network.Wai.Response -> IO ResponseReceived) 23 | -> IO ResponseReceived 24 | app req respond = respond $ responseBuilder status200 headers content where 25 | headers = 26 | [("Content-Type", "text/html")] 27 | content = 28 | renderHtmlBuilder $ template result render 29 | result = 30 | handle $ queryToQueryText $ queryString req 31 | 32 | data Route = Frontpage 33 | 34 | render :: Route -> [(Text, Text)] -> Text 35 | render Frontpage _ = "/" 36 | 37 | template :: Maybe (Either Text Comparison) -> HtmlUrl Route 38 | template result = [hamlet| 39 | $doctype 5 40 | 41 | 42 | 43 | Regex Equivalence 44 | <meta name="viewport" content="width=device-width, initial-scale=1.0"> 45 | <meta charset="utf-8"> 46 | 47 | <body> 48 | <h1>Regex Equivalence 49 | <p>Find out if two regular expressions match the same language 50 | 51 | <form action=@{Frontpage} method=GET> 52 | <input type=text name=a> 53 | <input type=text name=b> 54 | <input type=submit value=compare> 55 | 56 | $maybe res <- result 57 | $case res 58 | $of Left err 59 | <p>#{err} 60 | $of Right Nothing 61 | <p>They are equivalent! 62 | $of Right (Just (example, (matching, not_matching))) 63 | <p> 64 | '#{matching}' matches the 65 | $if null example 66 | empty string 67 | $else 68 | string '#{example}' 69 | unlike '#{not_matching}' 70 | $nothing 71 | 72 | <p>A subset of Perl-style regex is supported: 73 | <ul> 74 | <li> character -> character 75 | <li> x* -> zero or more x 76 | <li> x+ -> one or more x 77 | <li> x? -> one or zero x 78 | <li> foo|bar -> either foo or bar 79 | <li> parens to indicate precedence 80 | <li> backslash to escape aforementioned characters 81 | |] 82 | 83 | type Comparison = Maybe (Text, (Text, Text)) 84 | 85 | handle :: [(Text, Maybe Text)] -> Maybe (Either Text Comparison) 86 | handle q = do 87 | a <- join $ lookup "a" q 88 | b <- join $ lookup "b" q 89 | return $ 90 | case R.counterexample a b of 91 | Right (Just (example, firstMatches)) -> 92 | Right (Just (pack example, (if firstMatches then (a, b) else (b, a)))) 93 | 94 | Right Nothing -> Right Nothing 95 | Left err -> Left $ pack err 96 | -------------------------------------------------------------------------------- /package.yaml: -------------------------------------------------------------------------------- 1 | name: regex-equality 2 | version: 0.1.0.0 3 | github: "githubuser/regex-equality" 4 | license: BSD3 5 | author: "Author name here" 6 | maintainer: "example@example.com" 7 | copyright: "2018 Author name here" 8 | 9 | extra-source-files: 10 | - README.md 11 | - ChangeLog.md 12 | 13 | # Metadata used when publishing your package 14 | # synopsis: Short description of your package 15 | # category: Web 16 | 17 | # To avoid duplicated efforts in documentation and dealing with the 18 | # complications of embedding Haddock markup inside cabal files, it is 19 | # common to point users to the README.md file. 20 | description: Please see the README on Github at <https://github.com/githubuser/regex-equality#readme> 21 | 22 | dependencies: 23 | - base >= 4.7 && < 5 24 | - containers >= 0.5.11 25 | - mtl 26 | - multimap 27 | - hspec 28 | - parsec 29 | - warp 30 | - wai 31 | - wai-extra 32 | - http-types 33 | - text 34 | - shakespeare 35 | - blaze-html 36 | 37 | library: 38 | source-dirs: src 39 | 40 | executables: 41 | regex-equality-exe: 42 | main: Main.hs 43 | source-dirs: app 44 | ghc-options: 45 | - -threaded 46 | - -rtsopts 47 | - -with-rtsopts=-N 48 | dependencies: 49 | - regex-equality 50 | 51 | tests: 52 | regex-equality-test: 53 | main: Spec.hs 54 | source-dirs: test 55 | ghc-options: 56 | - -threaded 57 | - -rtsopts 58 | - -with-rtsopts=-N 59 | dependencies: 60 | - regex-equality 61 | -------------------------------------------------------------------------------- /publish.sh: -------------------------------------------------------------------------------- 1 | stack install --local-bin-path heroku/regex-equality/bin/ 2 | cd heroku/regex-equality 3 | echo "web: bin/regex-equality-exe" > Procfile 4 | git add . 5 | git commit -m "asd" 6 | git push heroku master -------------------------------------------------------------------------------- /src/DFA.hs: -------------------------------------------------------------------------------- 1 | module DFA where 2 | 3 | import qualified Data.Map as Map 4 | import Data.Map.Merge.Strict 5 | import qualified Data.Set as Set 6 | import Data.Set (Set) 7 | import Data.Maybe (fromMaybe) 8 | 9 | 10 | data DFA node symbol = 11 | DFA{transitions :: Map.Map node (Map.Map symbol node) 12 | , accepting :: Set.Set node 13 | , start :: node 14 | } 15 | 16 | recognizedByOne :: (Ord n, Ord s) => DFA n s -> DFA n s -> Maybe ([s], Bool) 17 | recognizedByOne a b = result where 18 | 19 | result = 20 | case differing (Just $ start a, Just $ start b) [] Set.empty of 21 | Right _ -> Nothing 22 | Left path -> Just path 23 | 24 | differing pos path visited = 25 | if Set.member pos visited then 26 | Right visited 27 | else if difference pos then 28 | Left (reverse path, accepts (fst pos) a) 29 | else 30 | foldr 31 | step 32 | (Right visited) 33 | (neighbors pos) 34 | where 35 | step (c, n) (Right visited) = 36 | differing n (c:path) $ Set.insert pos visited 37 | step _ (Left path) = 38 | Left path 39 | 40 | neighbors (x, y) = Map.toList $ merge' (oneSide x a) (oneSide y b) where 41 | oneSide mState dfa = 42 | fromMaybe Map.empty 43 | (mState >>= (\s -> Map.lookup s $ transitions dfa)) 44 | 45 | merge' = 46 | merge 47 | (mapMissing $ \k x -> (Just x, Nothing)) 48 | (mapMissing $ \k x -> (Nothing, Just x)) 49 | (zipWithMatched $ \k a b -> (Just a, Just b)) 50 | 51 | difference (x, y) = 52 | accepts x a /= accepts y b 53 | 54 | accepts (Just x) a = 55 | Set.member x (accepting a) 56 | accepts Nothing _ = 57 | False 58 | 59 | dfs :: Ord a => (a -> [a]) -> a -> Set a 60 | dfs neighbors start = inner start $ Set.singleton start where 61 | inner pos visited = 62 | foldr (\n v -> 63 | if Set.member n v then 64 | v 65 | else 66 | inner n (Set.insert n v) 67 | ) 68 | visited 69 | (neighbors pos) 70 | -------------------------------------------------------------------------------- /src/NFA.hs: -------------------------------------------------------------------------------- 1 | module NFA where 2 | 3 | import DFA 4 | import Regex 5 | import Data.Map (Map) 6 | import qualified Data.Map as Map 7 | import Data.Set (Set) 8 | import qualified Data.Set as Set 9 | import Control.Monad.State 10 | import qualified Data.MultiMap as MultiMap 11 | 12 | 13 | data NFA node symbol = 14 | NFA (Map node (Map symbol (Set node))) (Set node) node 15 | 16 | toDFA :: (Ord node, Ord sym) => NFA node sym -> DFA (Set node) sym 17 | toDFA (NFA arrows accepting start) = DFA arrows' accepting' start' where 18 | accepting' = 19 | Set.filter (not . Set.null . Set.intersection accepting) states' 20 | 21 | arrows' = Map.fromList $ 22 | fmap (\x -> (x, conns x)) $ Set.toList states' 23 | 24 | states' = 25 | dfs neighbors $ start' 26 | 27 | neighbors = 28 | Map.elems . conns 29 | 30 | conns = 31 | Map.unionsWith Set.union . fmap (\x -> Map.findWithDefault Map.empty x arrows) . 32 | Set.toList 33 | 34 | start' = Set.singleton start 35 | 36 | fromRegex :: Regex -> NFA Int Char 37 | fromRegex regex = epsilonToNFA res where 38 | res = evalState (buildNFA regex) 0 39 | 40 | data NFAPart = NFAPart 41 | { arrows :: Map Int (Map Char Int) 42 | , epsilons :: [(Int, Int)] 43 | , start :: Int 44 | , end :: Int 45 | } 46 | 47 | epsilonToNFA :: NFAPart -> NFA Int Char 48 | epsilonToNFA (NFAPart arrows epsilons start end) = NFA arrows' accepting' start where 49 | arrows' = 50 | Map.fromList $ 51 | fmap (\x -> (x, indirectArrows x)) states 52 | 53 | states = 54 | Set.toList $ 55 | Set.fromList (Map.keys arrows) `Set.union` Set.fromList (MultiMap.keys epsilons') 56 | 57 | indirectArrows :: Int -> Map Char (Set Int) 58 | indirectArrows = 59 | Map.unionsWith Set.union . Set.toList . 60 | Set.map (\x -> Map.map Set.singleton $ Map.findWithDefault Map.empty x arrows) . 61 | connectedViaEpsilon 62 | 63 | accepting' = 64 | Set.union (Set.singleton end) $ Set.fromList $ filter connectedToEnd states 65 | 66 | connectedToEnd = 67 | Set.member end . connectedViaEpsilon 68 | 69 | connectedViaEpsilon = 70 | dfs (\x -> MultiMap.lookup x epsilons') 71 | 72 | epsilons' = 73 | MultiMap.fromList epsilons 74 | 75 | buildNFA :: Regex -> State Int NFAPart 76 | buildNFA (OneOf regexes) = buildHelper f where 77 | f entry exit = do 78 | nfas <- sequence $ map buildNFA regexes 79 | 80 | let fromEntry = map ( (,) entry . NFA.start ) nfas 81 | let toExit = map (\x -> (end x, exit) ) nfas 82 | 83 | let arrows' = foldr1 combine $ map arrows nfas 84 | let epsilons' = foldr (++) [] (map epsilons nfas) ++ fromEntry ++ toExit 85 | 86 | return (arrows', epsilons') 87 | 88 | 89 | buildNFA (Consecutive regexes) = 90 | fmap (foldr1 putInFront) $ 91 | sequence $ map buildNFA regexes 92 | 93 | buildNFA (Character c) = buildHelper 94 | (\entry exit -> return 95 | ( (Map.singleton entry (Map.singleton c exit)) 96 | , [] 97 | ) 98 | ) 99 | 100 | buildNFA (Maybe regex) = extraConnections 101 | (\entry exit -> [(entry, exit)]) 102 | regex 103 | 104 | buildNFA (OneOrMore regex) = extraConnections 105 | (\entry exit -> [(exit, entry)]) 106 | regex 107 | 108 | buildNFA (NTimes regex) = extraConnections 109 | (\entry exit -> [(entry, exit), (exit, entry)]) 110 | regex 111 | 112 | extraConnections :: (Int -> Int -> [(Int, Int)]) -> Regex -> State Int NFAPart 113 | extraConnections g regex = buildHelper f where 114 | f entry exit = do 115 | NFAPart arrows epsilons childEntry childExit <- buildNFA regex 116 | return 117 | ( arrows 118 | , g childEntry childExit 119 | ++ [(entry, childEntry), (childExit, exit)] 120 | ++ epsilons 121 | ) 122 | 123 | buildHelper f = do 124 | entry <- nextId 125 | exit <- nextId 126 | (arrows, epsilons) <- f entry exit 127 | return $ 128 | NFAPart arrows epsilons entry exit 129 | 130 | combine = 131 | Map.unionWith Map.union 132 | 133 | putInFront (NFAPart arr1 eps1 s m1) (NFAPart arr2 eps2 m2 e) = 134 | NFAPart (combine arr1 arr2) ((m1, m2) : (eps1++eps2)) s e 135 | 136 | nextId = do 137 | current <- get 138 | put (current+1) 139 | return current -------------------------------------------------------------------------------- /src/Regex.hs: -------------------------------------------------------------------------------- 1 | module Regex where 2 | 3 | import Text.ParserCombinators.Parsec as Parsec 4 | import Text.ParserCombinators.Parsec.Error 5 | import Text.Parsec.Text 6 | import Data.Text 7 | import Control.Arrow 8 | 9 | data Regex 10 | = Character Char 11 | | NTimes Regex 12 | | Maybe Regex 13 | | OneOrMore Regex 14 | | OneOf [Regex] 15 | | Consecutive [Regex] 16 | deriving (Show, Eq) 17 | 18 | parse :: Text -> Either String Regex 19 | parse = 20 | left show . 21 | Parsec.parse (regex <* eof) "not a regex" 22 | 23 | ifMany _ [x] = x 24 | ifMany f list = f list 25 | 26 | regex = 27 | fmap (ifMany OneOf) $ sepBy1 consecutive (char '|') 28 | 29 | consecutive = 30 | fmap (ifMany Consecutive) $ many1 $ term 31 | 32 | term = do 33 | base <- literalCharacter <|> parens 34 | 35 | modifier <- optionMaybe $ oneOf modifiers 36 | return $ 37 | case modifier of 38 | Just '+' -> OneOrMore base 39 | Just '*' -> NTimes base 40 | Just '?' -> Maybe base 41 | Nothing -> base 42 | 43 | modifiers = "+*?" 44 | specialCharacters = modifiers ++ "|()\\" 45 | 46 | literalCharacter = 47 | fmap Character $ 48 | (noneOf specialCharacters <|> escapedCharacter) 49 | 50 | escapedCharacter = do 51 | char '\\' 52 | oneOf specialCharacters 53 | 54 | parens = do 55 | char '(' 56 | r <- regex 57 | char ')' 58 | 59 | return r -------------------------------------------------------------------------------- /src/RegexEquality.hs: -------------------------------------------------------------------------------- 1 | module RegexEquality where 2 | 3 | import Data.Text 4 | import DFA 5 | import qualified NFA 6 | import qualified Regex 7 | 8 | counterexample :: Text -> Text -> Either String (Maybe (String, Bool)) 9 | counterexample a b = do 10 | a' <- Regex.parse a 11 | b' <- Regex.parse b 12 | return $ 13 | recognizedByOne (toDfa a') (toDfa b') 14 | 15 | toDfa = 16 | NFA.toDFA . NFA.fromRegex -------------------------------------------------------------------------------- /stack.yaml: -------------------------------------------------------------------------------- 1 | flags: {} 2 | packages: 3 | - . 4 | extra-deps: 5 | - containers-0.5.11.0 6 | resolver: lts-10.3 7 | -------------------------------------------------------------------------------- /test/Spec.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | 3 | import Test.Hspec 4 | 5 | import qualified RegexEquality 6 | import Regex 7 | 8 | equal a b = res == Nothing where 9 | Right res = RegexEquality.counterexample a b 10 | 11 | expectLeft (Left _) = True 12 | expectLeft (Right _) = False 13 | 14 | main :: IO () 15 | main = hspec $ do 16 | 17 | describe "RegexEquality.counterexample" $ do 18 | it "works on identical regex" $ 19 | equal "a" "a" 20 | 21 | it "finds difference" $ 22 | not $ equal "a" "b" 23 | 24 | it "works on equivalent regex" $ 25 | equal "a|a" "a" 26 | 27 | it "complicated pair" $ 28 | equal "((a+)*)+" "a*" 29 | 30 | it "regression" $ 31 | not $ equal "a|b" "a+|b" 32 | 33 | it "the latter didn't match the empty string (regression)" $ 34 | equal "0*a*" "(0*|a)(a*)*" 35 | 36 | describe "Parsing" $ do 37 | let a = Character 'a' 38 | let b = Character 'b' 39 | let c = Character 'c' 40 | 41 | it "single letter" $ 42 | parse "a" `shouldBe` Right a 43 | 44 | it "multiple letters" $ 45 | parse "bac" `shouldBe` Right (Consecutive [b, a, c]) 46 | 47 | it "simple or" $ 48 | parse "a|b" `shouldBe` Right (OneOf [a, b]) 49 | 50 | it "or with multiple letters on the sides" $ 51 | parse "ab|bc" `shouldBe` Right (OneOf [Consecutive [a, b], Consecutive [b, c]]) 52 | 53 | it "star" $ 54 | parse "a*" `shouldBe` Right (NTimes a) 55 | 56 | it "plus" $ 57 | parse "a+" `shouldBe` Right (OneOrMore a) 58 | 59 | it "single parens" $ 60 | parse "(ab)*" `shouldBe` Right (NTimes $ Consecutive [a, b]) 61 | 62 | it "escaped special character" $ 63 | parse "\\*" `shouldBe` Right (Character '*') 64 | 65 | it "fails on unescaped special character" $ 66 | expectLeft $ parse "+" 67 | 68 | it "parses ? in parens" $ 69 | parse "(a+b?)" `shouldBe` Right (Consecutive [OneOrMore a, Maybe b]) --------------------------------------------------------------------------------