├── .gitignore
├── LICENSE
├── Main.hs
├── Makefile
├── README
├── Setup.lhs
├── binaries
    └── osx
    │   └── genex
├── dist
    └── doc
    │   └── html
    │       └── regex-genex
    │           ├── Regex-Genex.html
    │           ├── doc-index.html
    │           ├── frames.html
    │           ├── haddock-util.js
    │           ├── hslogo-16.png
    │           ├── index-frames.html
    │           ├── index.html
    │           ├── mini_Regex-Genex.html
    │           ├── minus.gif
    │           ├── ocean.css
    │           ├── plus.gif
    │           ├── regex-genex.haddock
    │           └── synopsis.png
├── regex-genex.cabal
└── src
    └── Regex
        ├── Genex.hs
        └── Genex
            ├── Normalize.hs
            └── Pure.hs


/.gitignore:
--------------------------------------------------------------------------------
1 | binaries/osx/yices
2 | dist
3 | .*~
4 | tags
5 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | 
  4 | 
  5 | 			 The "Artistic License"
  6 | 
  7 | 				Preamble
  8 | 
  9 | The intent of this document is to state the conditions under which a
 10 | Package may be copied, such that the Copyright Holder maintains some
 11 | semblance of artistic control over the development of the package,
 12 | while giving the users of the package the right to use and distribute
 13 | the Package in a more-or-less customary fashion, plus the right to make
 14 | reasonable modifications.
 15 | 
 16 | Definitions:
 17 | 
 18 | 	"Package" refers to the collection of files distributed by the
 19 | 	Copyright Holder, and derivatives of that collection of files
 20 | 	created through textual modification.
 21 | 
 22 | 	"Standard Version" refers to such a Package if it has not been
 23 | 	modified, or has been modified in accordance with the wishes
 24 | 	of the Copyright Holder as specified below.
 25 | 
 26 | 	"Copyright Holder" is whoever is named in the copyright or
 27 | 	copyrights for the package.
 28 | 
 29 | 	"You" is you, if you're thinking about copying or distributing
 30 | 	this Package.
 31 | 
 32 | 	"Reasonable copying fee" is whatever you can justify on the
 33 | 	basis of media cost, duplication charges, time of people involved,
 34 | 	and so on.  (You will not be required to justify it to the
 35 | 	Copyright Holder, but only to the computing community at large
 36 | 	as a market that must bear the fee.)
 37 | 
 38 | 	"Freely Available" means that no fee is charged for the item
 39 | 	itself, though there may be fees involved in handling the item.
 40 | 	It also means that recipients of the item may redistribute it
 41 | 	under the same conditions they received it.
 42 | 
 43 | 1. You may make and give away verbatim copies of the source form of the
 44 | Standard Version of this Package without restriction, provided that you
 45 | duplicate all of the original copyright notices and associated disclaimers.
 46 | 
 47 | 2. You may apply bug fixes, portability fixes and other modifications
 48 | derived from the Public Domain or from the Copyright Holder.  A Package
 49 | modified in such a way shall still be considered the Standard Version.
 50 | 
 51 | 3. You may otherwise modify your copy of this Package in any way, provided
 52 | that you insert a prominent notice in each changed file stating how and
 53 | when you changed that file, and provided that you do at least ONE of the
 54 | following:
 55 | 
 56 |     a) place your modifications in the Public Domain or otherwise make them
 57 |     Freely Available, such as by posting said modifications to Usenet or
 58 |     an equivalent medium, or placing the modifications on a major archive
 59 |     site such as uunet.uu.net, or by allowing the Copyright Holder to include
 60 |     your modifications in the Standard Version of the Package.
 61 | 
 62 |     b) use the modified Package only within your corporation or organization.
 63 | 
 64 |     c) rename any non-standard executables so the names do not conflict
 65 |     with standard executables, which must also be provided, and provide
 66 |     a separate manual page for each non-standard executable that clearly
 67 |     documents how it differs from the Standard Version.
 68 | 
 69 |     d) make other distribution arrangements with the Copyright Holder.
 70 | 
 71 | 4. You may distribute the programs of this Package in object code or
 72 | executable form, provided that you do at least ONE of the following:
 73 | 
 74 |     a) distribute a Standard Version of the executables and library files,
 75 |     together with instructions (in the manual page or equivalent) on where
 76 |     to get the Standard Version.
 77 | 
 78 |     b) accompany the distribution with the machine-readable source of
 79 |     the Package with your modifications.
 80 | 
 81 |     c) give non-standard executables non-standard names, and clearly
 82 |     document the differences in manual pages (or equivalent), together
 83 |     with instructions on where to get the Standard Version.
 84 | 
 85 |     d) make other distribution arrangements with the Copyright Holder.
 86 | 
 87 | 5. You may charge a reasonable copying fee for any distribution of this
 88 | Package.  You may charge any fee you choose for support of this
 89 | Package.  You may not charge a fee for this Package itself.  However,
 90 | you may distribute this Package in aggregate with other (possibly
 91 | commercial) programs as part of a larger (possibly commercial) software
 92 | distribution provided that you do not advertise this Package as a
 93 | product of your own.  You may embed this Package's interpreter within
 94 | an executable of yours (by linking); this shall be construed as a mere
 95 | form of aggregation, provided that the complete Standard Version of the
 96 | interpreter is so embedded.
 97 | 
 98 | 6. The scripts and library files supplied as input to or produced as
 99 | output from the programs of this Package do not automatically fall
100 | under the copyright of this Package, but belong to whoever generated
101 | them, and may be sold commercially, and may be aggregated with this
102 | Package.  If such scripts or library files are aggregated with this
103 | Package via the so-called "undump" or "unexec" methods of producing a
104 | binary executable image, then distribution of such an image shall
105 | neither be construed as a distribution of this Package nor shall it
106 | fall under the restrictions of Paragraphs 3 and 4, provided that you do
107 | not represent such an executable image as a Standard Version of this
108 | Package.
109 | 
110 | 7. C subroutines (or comparably compiled subroutines in other
111 | languages) supplied by you and linked into this Package in order to
112 | emulate subroutines and variables of the language defined by this
113 | Package shall not be considered part of this Package, but are the
114 | equivalent of input as in Paragraph 6, provided these subroutines do
115 | not change the language in any way that would cause it to fail the
116 | regression tests for the language.
117 | 
118 | 8. Aggregation of this Package with a commercial distribution is always
119 | permitted provided that the use of this Package is embedded; that is,
120 | when no overt attempt is made to make this Package's interfaces visible
121 | to the end user of the commercial distribution.  Such use shall not be
122 | construed as a distribution of this Package.
123 | 
124 | 9. The name of the Copyright Holder may not be used to endorse or promote
125 | products derived from this software without specific prior written permission.
126 | 
127 | 10. THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR
128 | IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
129 | WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
130 | 
131 | 				The End
132 | 


--------------------------------------------------------------------------------
/Main.hs:
--------------------------------------------------------------------------------
 1 | module Main where
 2 | import Regex.Genex
 3 | import System.IO
 4 | import System.Environment
 5 | import Data.Char (isDigit)
 6 | 
 7 | defaultRegex :: String
 8 | defaultRegex = "a(b|c)d{2,3}e*"
 9 | 
10 | main :: IO ()
11 | main = do
12 |     hSetBuffering stdout NoBuffering
13 |     args <- getArgs
14 |     case args of
15 |         [] -> do
16 |             prog <- getProgName
17 |             if prog == "<interactive>" then run defaultRegex else do
18 |                 fail $ "Usage: " ++ prog ++ " regex [regex...]"
19 |         rx | all isPure rx -> mapM_ ((putStr "0 " >>) . print) (genexPure rx)
20 |            | otherwise     -> genexPrint rx
21 |     where
22 |     isPure [] = True
23 |     isPure ('\\':'\\':cs) = isPure cs
24 |     isPure ('\\':'b':_) = False
25 |     isPure ('\\':c:cs)
26 |         | isDigit c = False
27 |         | otherwise = isPure cs
28 |     isPure ('^':_) = False
29 |     isPure ('$':_) = False
30 |     isPure (_:cs) = isPure cs
31 | 
32 | run :: String -> IO ()
33 | run regex = genexPrint [regex]
34 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | all :: install
 2 | 
 3 | test :: binaries/osx/z3 binaries/osx/genex
 4 | 	env PATH=./binaries/osx:$$PATH genex "a(b|c)d{2,3}e*"
 5 | 	env PATH=./binaries/osx:$$PATH genex "a(b|c)d{2,3}e*\1"
 6 | 
 7 | binaries/osx/z3 :
 8 | 	curl https://research.microsoft.com/en-us/um/redmond/projects/z3/z3-osx-4.1-x64.tar.gz | tar zxf -
 9 | 	cp z3/bin/z3 binaries/osx/
10 | 	rm -rf z3
11 | 
12 | binaries/osx/yices :
13 | 	curl 'http://yices.csl.sri.com/cgi-bin/yices-newdownload.cgi?file=yices2smt09-x86_64-apple-darwin9.8.0-static-gmp.tgz&accept=I+accept' | tar zxf -
14 | 	cp yices2smt09/bin/yices binaries/osx/
15 | 	rm -rf yices2smt09
16 | 
17 | binaries/osx/genex :
18 | 	cabal configure
19 | 	cabal build
20 | 	cp dist/build/genex/genex binaries/osx/
21 | 	strip binaries/osx/genex
22 | 
23 | install ::
24 | 	cabal install
25 | 	cp dist/build/genex/genex binaries/osx/
26 | 	strip binaries/osx/genex
27 | 
28 | ghci ::
29 | 	ghci -isrc Main.hs
30 | 


--------------------------------------------------------------------------------
/README:
--------------------------------------------------------------------------------
 1 | Available on Hackage as: http://hackage.haskell.org/package/regex-genex
 2 | 
 3 | The "genex" program finds all permutations of strings that matches every
 4 | regular expressions specified in the command line, with full support
 5 | for back references (\1 .. \9) and word boundaries (\b).
 6 | 
 7 | The output is unsorted, but the order is deterministic across multiple runs:
 8 | 
 9 |     $ genex '\d' '[123abc]' # Must match both
10 |     1.00000000              "2"
11 |     1.00000000              "3"
12 |     1.00000000              "1"
13 | 
14 | To enforce a fixed ordering for alternations, pipe the output to "sort -n":
15 | 
16 |     $ genex '(__|<>){1,3}' | sort -n
17 |     2.00000000              "<>"
18 |     2.00000001              "__"
19 |     4.00000002              "<><>"
20 |     4.00000003              "__<>"
21 |     4.00000006              "<>__"
22 |     4.00000007              "____"
23 |     6.00000010              "<><><>"
24 |     6.00000011              "__<><>"
25 |     6.00000014              "<>__<>"
26 |     6.00000015              "____<>"
27 |     6.00000026              "<><>__"
28 |     6.00000027              "__<>__"
29 |     6.00000030              "<>____"
30 |     6.00000031              "______"
31 | 
32 | Output size and maximum string length are both capped at 65535 currently,
33 | but both can be raised if needed.
34 | 
35 | Because genex generates matches lazily, we can use "head -n" to display
36 | only part of its output:
37 | 
38 |     genex '[abc]+[123]+.+' | head -n 10
39 | 
40 | Some caveats:
41 | 
42 | - We translate * and + quantifiers into {0,3} and {1,4}, to make output
43 |   appear more unique.
44 | 
45 | - The set of . \D \W \S characters are limited to printable characters,
46 |   again to make the output more pretty.
47 | 
48 | - The ^ and $ anchors are taken to mean begin-of-line and end-of-line
49 |   (implicit /m), since we already implicitly anchor on both ends.
50 | 
51 | - No support yet for \l \u \L \U \Q \E (case and quotemeta modifiers)
52 | 
53 | - No named Unicode properties or POSIX [[:upper:]] classes yet.
54 | 
55 | Required Hackage libraries: sbv regex-tdfa stream-monad text
56 | 
57 | Required binary in PATH:
58 | 
59 |     yices # Download it from http://yices.csl.sri.com/download-yices2.shtml
60 | 
61 | You can directly run the Main.hs in the checkout directory as well:
62 | 
63 |     runghc Main.hs 'your regex here'
64 | 
65 | Pre-built MacOSX binaries are in binaries/osx/; try "make test" for a sample run.
66 | 
67 | Share and enjoy!
68 | Audrey
69 | 


--------------------------------------------------------------------------------
/Setup.lhs:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env runghc
2 | > import Distribution.Simple
3 | > import System.Cmd (rawSystem)
4 | > 
5 | > main :: IO ()
6 | > main = defaultMainWithHooks simpleUserHooks
7 | 


--------------------------------------------------------------------------------
/binaries/osx/genex:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/audreyt/regex-genex/eacf18333725e32dd6baaee8cc5b9bc709d861ca/binaries/osx/genex


--------------------------------------------------------------------------------
/dist/doc/html/regex-genex/Regex-Genex.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html xmlns="http://www.w3.org/1999/xhtml"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8" /><title>Regex.Genex</title><link href="ocean.css" rel="stylesheet" type="text/css" title="Ocean" /><script src="haddock-util.js" type="text/javascript"></script><script type="text/javascript">//<![CDATA[
 2 | window.onload = function () {pageLoad();setSynopsis("mini_Regex-Genex.html");};
 3 | //]]>
 4 | </script></head><body><div id="package-header"><ul class="links" id="page-menu"><li><a href="index.html">Contents</a></li><li><a href="doc-index.html">Index</a></li></ul><p class="caption">regex-genex-0.6.0: From a regex, generate all possible strings it can match</p></div><div id="content"><div id="module-header"><table class="info"><tr><th>Safe Haskell</th><td>Safe-Infered</td></tr></table><p class="caption">Regex.Genex</p></div><div id="description"><p class="caption">Description</p><div class="doc"><p>This module and the accompanying <code><a href="Regex-Genex.html#v:genex">genex</a></code> program finds all permutations
 5 | of strings that matches every input regular expressions, ordered from
 6 | shortest to longest, with full support for back references ('\1' .. '\9')
 7 | and word boundaries ('\b').
 8 | </p><p>It requires the <code>yices</code> binary in PATH; please download it from:
 9 | <a href="http://yices.csl.sri.com/download-yices2.shtml">http://yices.csl.sri.com/download-yices2.shtml</a>
10 | </p></div></div><div id="synopsis"><p id="control.syn" class="caption expander" onclick="toggleSection('syn')">Synopsis</p><ul id="section.syn" class="hide" onclick="toggleSection('syn')"><li class="src short"><span class="keyword">data</span>  <a href="#t:Model">Model</a>  = <a href="#v:Model">Model</a> {<ul class="subs"><li><a href="#v:modelChars">modelChars</a> :: [<a href="/Library/Frameworks/GHC.framework/Versions/7.4.1-i386/usr/share/doc/ghc/html/libraries/base-4.5.0.0/Data-Word.html#t:Word8">Word8</a>]</li><li><a href="#v:modelRank">modelRank</a> :: <a href="/Library/Frameworks/GHC.framework/Versions/7.4.1-i386/usr/share/doc/ghc/html/libraries/base-4.5.0.0/Data-Word.html#t:Word64">Word64</a></li></ul>}</li><li class="src short"><a href="#v:genex">genex</a> :: [<a href="/Library/Frameworks/GHC.framework/Versions/7.4.1-i386/usr/share/doc/ghc/html/libraries/base-4.5.0.0/Data-String.html#t:String">String</a>] -&gt; <a href="/Library/Frameworks/GHC.framework/Versions/7.4.1-i386/usr/share/doc/ghc/html/libraries/base-4.5.0.0/System-IO.html#t:IO">IO</a> [<a href="/Library/Frameworks/GHC.framework/Versions/7.4.1-i386/usr/share/doc/ghc/html/libraries/base-4.5.0.0/Data-String.html#t:String">String</a>]</li><li class="src short"><a href="#v:genexPure">genexPure</a> :: [<a href="/Library/Frameworks/GHC.framework/Versions/7.4.1-i386/usr/share/doc/ghc/html/libraries/base-4.5.0.0/Data-String.html#t:String">String</a>] -&gt; [<a href="/Library/Frameworks/GHC.framework/Versions/7.4.1-i386/usr/share/doc/ghc/html/libraries/base-4.5.0.0/Data-String.html#t:String">String</a>]</li><li class="src short"><a href="#v:genexPrint">genexPrint</a> :: [<a href="/Library/Frameworks/GHC.framework/Versions/7.4.1-i386/usr/share/doc/ghc/html/libraries/base-4.5.0.0/Data-String.html#t:String">String</a>] -&gt; <a href="/Library/Frameworks/GHC.framework/Versions/7.4.1-i386/usr/share/doc/ghc/html/libraries/base-4.5.0.0/System-IO.html#t:IO">IO</a> <a href="/Library/Frameworks/GHC.framework/Versions/7.4.1-i386/usr/share/doc/ghc/html/libraries/ghc-prim-0.2.0.0/GHC-Tuple.html#t:-40--41-">()</a></li><li class="src short"><a href="#v:genexModels">genexModels</a> :: [<a href="/Library/Frameworks/GHC.framework/Versions/7.4.1-i386/usr/share/doc/ghc/html/libraries/base-4.5.0.0/Data-String.html#t:String">String</a>] -&gt; <a href="/Library/Frameworks/GHC.framework/Versions/7.4.1-i386/usr/share/doc/ghc/html/libraries/base-4.5.0.0/System-IO.html#t:IO">IO</a> [<a href="Regex-Genex.html#t:Model">Model</a>]</li><li class="src short"><a href="#v:genexWith">genexWith</a> :: ([maxRepeat :: <a href="/Library/Frameworks/GHC.framework/Versions/7.4.1-i386/usr/share/doc/ghc/html/libraries/base-4.5.0.0/Data-Int.html#t:Int">Int</a>], <a href="/Library/Frameworks/GHC.framework/Versions/7.4.1-i386/usr/share/doc/ghc/html/libraries/base-4.5.0.0/Data-Monoid.html#t:Monoid">Monoid</a> a) =&gt; ([<a href="/Users/audreyt/Library/Haskell/ghc-7.4.1/lib/sbv-2.3/doc/html/Data-SBV.html#t:SatResult">SatResult</a>] -&gt; Hits -&gt; (Hits -&gt; <a href="/Library/Frameworks/GHC.framework/Versions/7.4.1-i386/usr/share/doc/ghc/html/libraries/base-4.5.0.0/System-IO.html#t:IO">IO</a> a) -&gt; <a href="/Library/Frameworks/GHC.framework/Versions/7.4.1-i386/usr/share/doc/ghc/html/libraries/base-4.5.0.0/System-IO.html#t:IO">IO</a> a) -&gt; [[<a href="/Library/Frameworks/GHC.framework/Versions/7.4.1-i386/usr/share/doc/ghc/html/libraries/base-4.5.0.0/Data-Char.html#t:Char">Char</a>]] -&gt; <a href="/Library/Frameworks/GHC.framework/Versions/7.4.1-i386/usr/share/doc/ghc/html/libraries/base-4.5.0.0/System-IO.html#t:IO">IO</a> a</li><li class="src short"><a href="#v:regexMatch">regexMatch</a> :: [maxRepeat :: <a href="/Library/Frameworks/GHC.framework/Versions/7.4.1-i386/usr/share/doc/ghc/html/libraries/base-4.5.0.0/Data-Int.html#t:Int">Int</a>] =&gt; [[<a href="/Library/Frameworks/GHC.framework/Versions/7.4.1-i386/usr/share/doc/ghc/html/libraries/base-4.5.0.0/Data-Char.html#t:Char">Char</a>]] -&gt; Str -&gt; <a href="/Users/audreyt/Library/Haskell/ghc-7.4.1/lib/sbv-2.3/doc/html/Data-SBV.html#t:Symbolic">Symbolic</a> <a href="/Users/audreyt/Library/Haskell/ghc-7.4.1/lib/sbv-2.3/doc/html/Data-SBV.html#t:SBool">SBool</a></li></ul></div><div id="interface"><h1>Documentation</h1><div class="top"><p class="src"><span class="keyword">data</span>  <a name="t:Model" class="def">Model</a>  </p><div class="doc"><p>A match consists of a string (list of codepoints), and a rank representing alternation order.
11 | </p></div><div class="subs constructors"><p class="caption">Constructors</p><table><tr><td class="src"><a name="v:Model" class="def">Model</a></td><td class="doc empty">&nbsp;</td></tr><tr><td colspan="2"><div class="subs fields"><p class="caption">Fields</p><dl><dt class="src"><a name="v:modelChars" class="def">modelChars</a> :: [<a href="/Library/Frameworks/GHC.framework/Versions/7.4.1-i386/usr/share/doc/ghc/html/libraries/base-4.5.0.0/Data-Word.html#t:Word8">Word8</a>]</dt><dd class="doc empty">&nbsp;</dd><dt class="src"><a name="v:modelRank" class="def">modelRank</a> :: <a href="/Library/Frameworks/GHC.framework/Versions/7.4.1-i386/usr/share/doc/ghc/html/libraries/base-4.5.0.0/Data-Word.html#t:Word64">Word64</a></dt><dd class="doc empty">&nbsp;</dd></dl><div class="clear"></div></div></td></tr></table></div><div class="subs instances"><p id="control.i:Model" class="caption collapser" onclick="toggleSection('i:Model')">Instances</p><div id="section.i:Model" class="show"><table><tr><td class="src"><a href="/Library/Frameworks/GHC.framework/Versions/7.4.1-i386/usr/share/doc/ghc/html/libraries/base-4.5.0.0/Data-Eq.html#t:Eq">Eq</a> <a href="Regex-Genex.html#t:Model">Model</a></td><td class="doc empty">&nbsp;</td></tr><tr><td class="src"><a href="/Library/Frameworks/GHC.framework/Versions/7.4.1-i386/usr/share/doc/ghc/html/libraries/base-4.5.0.0/Data-Ord.html#t:Ord">Ord</a> <a href="Regex-Genex.html#t:Model">Model</a></td><td class="doc empty">&nbsp;</td></tr><tr><td class="src"><a href="/Library/Frameworks/GHC.framework/Versions/7.4.1-i386/usr/share/doc/ghc/html/libraries/base-4.5.0.0/Text-Show.html#t:Show">Show</a> <a href="Regex-Genex.html#t:Model">Model</a></td><td class="doc empty">&nbsp;</td></tr></table></div></div></div><div class="top"><p class="src"><a name="v:genex" class="def">genex</a> :: [<a href="/Library/Frameworks/GHC.framework/Versions/7.4.1-i386/usr/share/doc/ghc/html/libraries/base-4.5.0.0/Data-String.html#t:String">String</a>] -&gt; <a href="/Library/Frameworks/GHC.framework/Versions/7.4.1-i386/usr/share/doc/ghc/html/libraries/base-4.5.0.0/System-IO.html#t:IO">IO</a> [<a href="/Library/Frameworks/GHC.framework/Versions/7.4.1-i386/usr/share/doc/ghc/html/libraries/base-4.5.0.0/Data-String.html#t:String">String</a>]</p><div class="doc"><p>Given a list of regular repressions, returns all possible strings that matches every one of them.
12 |  Guarantees to return shorter strings before longer ones.
13 | </p></div></div><div class="top"><p class="src"><a name="v:genexPure" class="def">genexPure</a> :: [<a href="/Library/Frameworks/GHC.framework/Versions/7.4.1-i386/usr/share/doc/ghc/html/libraries/base-4.5.0.0/Data-String.html#t:String">String</a>] -&gt; [<a href="/Library/Frameworks/GHC.framework/Versions/7.4.1-i386/usr/share/doc/ghc/html/libraries/base-4.5.0.0/Data-String.html#t:String">String</a>]</p><div class="doc"><p>A pure and much faster variant of <code><a href="Regex-Genex.html#v:genex">genex</a></code>, but without support for
14 |    back-references, anchors or word boundaries.
15 |  Does not guarantee orders about length of strings.
16 |  Does not depend on the external <code>yices</code> SMT solver.
17 | </p></div></div><div class="top"><p class="src"><a name="v:genexPrint" class="def">genexPrint</a> :: [<a href="/Library/Frameworks/GHC.framework/Versions/7.4.1-i386/usr/share/doc/ghc/html/libraries/base-4.5.0.0/Data-String.html#t:String">String</a>] -&gt; <a href="/Library/Frameworks/GHC.framework/Versions/7.4.1-i386/usr/share/doc/ghc/html/libraries/base-4.5.0.0/System-IO.html#t:IO">IO</a> <a href="/Library/Frameworks/GHC.framework/Versions/7.4.1-i386/usr/share/doc/ghc/html/libraries/ghc-prim-0.2.0.0/GHC-Tuple.html#t:-40--41-">()</a></p><div class="doc"><p>Same as <code><a href="Regex-Genex.html#v:genexModels">genexModels</a></code>, but print the models to standard output instead.
18 | </p></div></div><div class="top"><p class="src"><a name="v:genexModels" class="def">genexModels</a> :: [<a href="/Library/Frameworks/GHC.framework/Versions/7.4.1-i386/usr/share/doc/ghc/html/libraries/base-4.5.0.0/Data-String.html#t:String">String</a>] -&gt; <a href="/Library/Frameworks/GHC.framework/Versions/7.4.1-i386/usr/share/doc/ghc/html/libraries/base-4.5.0.0/System-IO.html#t:IO">IO</a> [<a href="Regex-Genex.html#t:Model">Model</a>]</p><div class="doc"><p>Same as <code><a href="Regex-Genex.html#v:genex">genex</a></code>, but with the entire model returned instead.
19 | </p></div></div><div class="top"><p class="src"><a name="v:genexWith" class="def">genexWith</a> :: ([maxRepeat :: <a href="/Library/Frameworks/GHC.framework/Versions/7.4.1-i386/usr/share/doc/ghc/html/libraries/base-4.5.0.0/Data-Int.html#t:Int">Int</a>], <a href="/Library/Frameworks/GHC.framework/Versions/7.4.1-i386/usr/share/doc/ghc/html/libraries/base-4.5.0.0/Data-Monoid.html#t:Monoid">Monoid</a> a) =&gt; ([<a href="/Users/audreyt/Library/Haskell/ghc-7.4.1/lib/sbv-2.3/doc/html/Data-SBV.html#t:SatResult">SatResult</a>] -&gt; Hits -&gt; (Hits -&gt; <a href="/Library/Frameworks/GHC.framework/Versions/7.4.1-i386/usr/share/doc/ghc/html/libraries/base-4.5.0.0/System-IO.html#t:IO">IO</a> a) -&gt; <a href="/Library/Frameworks/GHC.framework/Versions/7.4.1-i386/usr/share/doc/ghc/html/libraries/base-4.5.0.0/System-IO.html#t:IO">IO</a> a) -&gt; [[<a href="/Library/Frameworks/GHC.framework/Versions/7.4.1-i386/usr/share/doc/ghc/html/libraries/base-4.5.0.0/Data-Char.html#t:Char">Char</a>]] -&gt; <a href="/Library/Frameworks/GHC.framework/Versions/7.4.1-i386/usr/share/doc/ghc/html/libraries/base-4.5.0.0/System-IO.html#t:IO">IO</a> a</p></div><div class="top"><p class="src"><a name="v:regexMatch" class="def">regexMatch</a> :: [maxRepeat :: <a href="/Library/Frameworks/GHC.framework/Versions/7.4.1-i386/usr/share/doc/ghc/html/libraries/base-4.5.0.0/Data-Int.html#t:Int">Int</a>] =&gt; [[<a href="/Library/Frameworks/GHC.framework/Versions/7.4.1-i386/usr/share/doc/ghc/html/libraries/base-4.5.0.0/Data-Char.html#t:Char">Char</a>]] -&gt; Str -&gt; <a href="/Users/audreyt/Library/Haskell/ghc-7.4.1/lib/sbv-2.3/doc/html/Data-SBV.html#t:Symbolic">Symbolic</a> <a href="/Users/audreyt/Library/Haskell/ghc-7.4.1/lib/sbv-2.3/doc/html/Data-SBV.html#t:SBool">SBool</a></p></div></div></div><div id="footer"><p>Produced by <a href="http://www.haskell.org/haddock/">Haddock</a> version 2.10.0</p></div></body></html>


--------------------------------------------------------------------------------
/dist/doc/html/regex-genex/doc-index.html:
--------------------------------------------------------------------------------
1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html xmlns="http://www.w3.org/1999/xhtml"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8" /><title>regex-genex-0.6.0: From a regex, generate all possible strings it can match (Index)</title><link href="ocean.css" rel="stylesheet" type="text/css" title="Ocean" /><script src="haddock-util.js" type="text/javascript"></script><script type="text/javascript">//<![CDATA[
2 | window.onload = function () {pageLoad();};
3 | //]]>
4 | </script></head><body><div id="package-header"><ul class="links" id="page-menu"><li><a href="index.html">Contents</a></li><li><a href="doc-index.html">Index</a></li></ul><p class="caption">regex-genex-0.6.0: From a regex, generate all possible strings it can match</p></div><div id="content"><div id="index"><p class="caption">Index</p><table><tr><td class="src">genex</td><td class="module"><a href="Regex-Genex.html#v:genex">Regex.Genex</a></td></tr><tr><td class="src">genexModels</td><td class="module"><a href="Regex-Genex.html#v:genexModels">Regex.Genex</a></td></tr><tr><td class="src">genexPrint</td><td class="module"><a href="Regex-Genex.html#v:genexPrint">Regex.Genex</a></td></tr><tr><td class="src">genexPure</td><td class="module"><a href="Regex-Genex.html#v:genexPure">Regex.Genex</a></td></tr><tr><td class="src">genexWith</td><td class="module"><a href="Regex-Genex.html#v:genexWith">Regex.Genex</a></td></tr><tr><td class="src">Model</td><td>&nbsp;</td></tr><tr><td class="alt">1 (Type/Class)</td><td class="module"><a href="Regex-Genex.html#t:Model">Regex.Genex</a></td></tr><tr><td class="alt">2 (Data Constructor)</td><td class="module"><a href="Regex-Genex.html#v:Model">Regex.Genex</a></td></tr><tr><td class="src">modelChars</td><td class="module"><a href="Regex-Genex.html#v:modelChars">Regex.Genex</a></td></tr><tr><td class="src">modelRank</td><td class="module"><a href="Regex-Genex.html#v:modelRank">Regex.Genex</a></td></tr><tr><td class="src">normalize</td><td class="module"><a href="Regex-Genex-Normalize.html#v:normalize">Regex.Genex.Normalize</a></td></tr><tr><td class="src">regexMatch</td><td class="module"><a href="Regex-Genex.html#v:regexMatch">Regex.Genex</a></td></tr></table></div></div><div id="footer"><p>Produced by <a href="http://www.haskell.org/haddock/">Haddock</a> version 2.10.0</p></div></body></html>


--------------------------------------------------------------------------------
/dist/doc/html/regex-genex/frames.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html 
 2 |      PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN"
 3 |      "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd">
 4 | <html xmlns="http://www.w3.org/1999/xhtml">
 5 | <head>
 6 | <script src="haddock-util.js" type="text/javascript"></script>
 7 | <script type="text/javascript"><!--
 8 | /*
 9 | 
10 |   The synopsis frame needs to be updated using javascript, so we hide
11 |   it by default and only show it if javascript is enabled.
12 | 
13 |   TODO: provide some means to disable it.
14 | */
15 | function load() {
16 |   var d = document.getElementById("inner-fs");
17 |   d.rows = "50%,50%";
18 |   postReframe();
19 | }
20 | --></script>
21 | <frameset id="outer-fs" cols="25%,75%" onload="load()">
22 |   <frameset id="inner-fs" rows="100%,0%">
23 |     <frame src="index-frames.html" name="modules">
24 |     <frame src="" name="synopsis">
25 |   </frameset>
26 |   <frame src="index.html" name="main">
27 | </frameset>
28 | </html>
29 | 


--------------------------------------------------------------------------------
/dist/doc/html/regex-genex/haddock-util.js:
--------------------------------------------------------------------------------
  1 | // Haddock JavaScript utilities
  2 | 
  3 | var rspace = /\s\s+/g,
  4 | 	  rtrim = /^\s+|\s+$/g;
  5 | 
  6 | function spaced(s) { return (" " + s + " ").replace(rspace, " "); }
  7 | function trim(s)   { return s.replace(rtrim, ""); }
  8 | 
  9 | function hasClass(elem, value) {
 10 |   var className = spaced(elem.className || "");
 11 |   return className.indexOf( " " + value + " " ) >= 0;
 12 | }
 13 | 
 14 | function addClass(elem, value) {
 15 |   var className = spaced(elem.className || "");
 16 |   if ( className.indexOf( " " + value + " " ) < 0 ) {
 17 |     elem.className = trim(className + " " + value);
 18 |   }
 19 | }
 20 | 
 21 | function removeClass(elem, value) {
 22 |   var className = spaced(elem.className || "");
 23 |   className = className.replace(" " + value + " ", " ");
 24 |   elem.className = trim(className);
 25 | }
 26 | 
 27 | function toggleClass(elem, valueOn, valueOff, bool) {
 28 |   if (bool == null) { bool = ! hasClass(elem, valueOn); }
 29 |   if (bool) {
 30 |     removeClass(elem, valueOff);
 31 |     addClass(elem, valueOn);
 32 |   }
 33 |   else {
 34 |     removeClass(elem, valueOn);
 35 |     addClass(elem, valueOff);
 36 |   }
 37 |   return bool;
 38 | }
 39 | 
 40 | 
 41 | function makeClassToggle(valueOn, valueOff)
 42 | {
 43 |   return function(elem, bool) {
 44 |     return toggleClass(elem, valueOn, valueOff, bool);
 45 |   }
 46 | }
 47 | 
 48 | toggleShow = makeClassToggle("show", "hide");
 49 | toggleCollapser = makeClassToggle("collapser", "expander");
 50 | 
 51 | function toggleSection(id)
 52 | {
 53 |   var b = toggleShow(document.getElementById("section." + id));
 54 |   toggleCollapser(document.getElementById("control." + id), b);
 55 |   rememberCollapsed(id, b);
 56 |   return b;
 57 | }
 58 | 
 59 | var collapsed = {};
 60 | function rememberCollapsed(id, b)
 61 | {
 62 |   if(b)
 63 |     delete collapsed[id]
 64 |   else
 65 |     collapsed[id] = null;
 66 | 
 67 |   var sections = [];
 68 |   for(var i in collapsed)
 69 |   {
 70 |     if(collapsed.hasOwnProperty(i))
 71 |       sections.push(i);
 72 |   }
 73 |   // cookie specific to this page; don't use setCookie which sets path=/
 74 |   document.cookie = "collapsed=" + escape(sections.join('+'));
 75 | }
 76 | 
 77 | function restoreCollapsed()
 78 | {
 79 |   var cookie = getCookie("collapsed");
 80 |   if(!cookie)
 81 |     return;
 82 | 
 83 |   var ids = cookie.split('+');
 84 |   for(var i in ids)
 85 |   {
 86 |     if(document.getElementById("section." + ids[i]))
 87 |       toggleSection(ids[i]);
 88 |   }
 89 | }
 90 | 
 91 | function setCookie(name, value) {
 92 |   document.cookie = name + "=" + escape(value) + ";path=/;";
 93 | }
 94 | 
 95 | function clearCookie(name) {
 96 |   document.cookie = name + "=;path=/;expires=Thu, 01-Jan-1970 00:00:01 GMT;";
 97 | }
 98 | 
 99 | function getCookie(name) {
100 |   var nameEQ = name + "=";
101 |   var ca = document.cookie.split(';');
102 |   for(var i=0;i < ca.length;i++) {
103 |     var c = ca[i];
104 |     while (c.charAt(0)==' ') c = c.substring(1,c.length);
105 |     if (c.indexOf(nameEQ) == 0) {
106 |       return unescape(c.substring(nameEQ.length,c.length));
107 |     }
108 |   }
109 |   return null;
110 | }
111 | 
112 | 
113 | 
114 | var max_results = 75; // 50 is not enough to search for map in the base libraries
115 | var shown_range = null;
116 | var last_search = null;
117 | 
118 | function quick_search()
119 | {
120 |     perform_search(false);
121 | }
122 | 
123 | function full_search()
124 | {
125 |     perform_search(true);
126 | }
127 | 
128 | 
129 | function perform_search(full)
130 | {
131 |     var text = document.getElementById("searchbox").value.toLowerCase();
132 |     if (text == last_search && !full) return;
133 |     last_search = text;
134 |     
135 |     var table = document.getElementById("indexlist");
136 |     var status = document.getElementById("searchmsg");
137 |     var children = table.firstChild.childNodes;
138 |     
139 |     // first figure out the first node with the prefix
140 |     var first = bisect(-1);
141 |     var last = (first == -1 ? -1 : bisect(1));
142 | 
143 |     if (first == -1)
144 |     {
145 |         table.className = "";
146 |         status.innerHTML = "No results found, displaying all";
147 |     }
148 |     else if (first == 0 && last == children.length - 1)
149 |     {
150 |         table.className = "";
151 |         status.innerHTML = "";
152 |     }
153 |     else if (last - first >= max_results && !full)
154 |     {
155 |         table.className = "";
156 |         status.innerHTML = "More than " + max_results + ", press Search to display";
157 |     }
158 |     else
159 |     {
160 |         // decide what you need to clear/show
161 |         if (shown_range)
162 |             setclass(shown_range[0], shown_range[1], "indexrow");
163 |         setclass(first, last, "indexshow");
164 |         shown_range = [first, last];
165 |         table.className = "indexsearch";
166 |         status.innerHTML = "";
167 |     }
168 | 
169 |     
170 |     function setclass(first, last, status)
171 |     {
172 |         for (var i = first; i <= last; i++)
173 |         {
174 |             children[i].className = status;
175 |         }
176 |     }
177 |     
178 |     
179 |     // do a binary search, treating 0 as ...
180 |     // return either -1 (no 0's found) or location of most far match
181 |     function bisect(dir)
182 |     {
183 |         var first = 0, finish = children.length - 1;
184 |         var mid, success = false;
185 | 
186 |         while (finish - first > 3)
187 |         {
188 |             mid = Math.floor((finish + first) / 2);
189 | 
190 |             var i = checkitem(mid);
191 |             if (i == 0) i = dir;
192 |             if (i == -1)
193 |                 finish = mid;
194 |             else
195 |                 first = mid;
196 |         }
197 |         var a = (dir == 1 ? first : finish);
198 |         var b = (dir == 1 ? finish : first);
199 |         for (var i = b; i != a - dir; i -= dir)
200 |         {
201 |             if (checkitem(i) == 0) return i;
202 |         }
203 |         return -1;
204 |     }    
205 |     
206 |     
207 |     // from an index, decide what the result is
208 |     // 0 = match, -1 is lower, 1 is higher
209 |     function checkitem(i)
210 |     {
211 |         var s = getitem(i).toLowerCase().substr(0, text.length);
212 |         if (s == text) return 0;
213 |         else return (s > text ? -1 : 1);
214 |     }
215 |     
216 |     
217 |     // from an index, get its string
218 |     // this abstracts over alternates
219 |     function getitem(i)
220 |     {
221 |         for ( ; i >= 0; i--)
222 |         {
223 |             var s = children[i].firstChild.firstChild.data;
224 |             if (s.indexOf(' ') == -1)
225 |                 return s;
226 |         }
227 |         return ""; // should never be reached
228 |     }
229 | }
230 | 
231 | function setSynopsis(filename) {
232 |     if (parent.window.synopsis) {
233 |         if (parent.window.synopsis.location.replace) {
234 |             // In Firefox this avoids adding the change to the history.
235 |             parent.window.synopsis.location.replace(filename);
236 |         } else {
237 |             parent.window.synopsis.location = filename;
238 |         }
239 |     }
240 | }
241 | 
242 | function addMenuItem(html) {
243 |   var menu = document.getElementById("page-menu");
244 |   if (menu) {
245 |     var btn = menu.firstChild.cloneNode(false);
246 |     btn.innerHTML = html;
247 |     menu.appendChild(btn);
248 |   }
249 | }
250 | 
251 | function adjustForFrames() {
252 |   var bodyCls;
253 |   
254 |   if (parent.location.href == window.location.href) {
255 |     // not in frames, so add Frames button
256 |     addMenuItem("<a href='#' onclick='reframe();return true;'>Frames</a>");
257 |     bodyCls = "no-frame";
258 |   }
259 |   else {
260 |     bodyCls = "in-frame";
261 |   }
262 |   addClass(document.body, bodyCls);
263 | }
264 | 
265 | function reframe() {
266 |   setCookie("haddock-reframe", document.URL);
267 |   window.location = "frames.html";
268 | }
269 | 
270 | function postReframe() {
271 |   var s = getCookie("haddock-reframe");
272 |   if (s) {
273 |     parent.window.main.location = s;
274 |     clearCookie("haddock-reframe");
275 |   }
276 | }
277 | 
278 | function styles() {
279 |   var i, a, es = document.getElementsByTagName("link"), rs = [];
280 |   for (i = 0; a = es[i]; i++) {
281 |     if(a.rel.indexOf("style") != -1 && a.title) {
282 |       rs.push(a);
283 |     }
284 |   }
285 |   return rs;
286 | }
287 | 
288 | function addStyleMenu() {
289 |   var as = styles();
290 |   var i, a, btns = "";
291 |   for(i=0; a = as[i]; i++) {
292 |     btns += "<li><a href='#' onclick=\"setActiveStyleSheet('"
293 |       + a.title + "'); return false;\">"
294 |       + a.title + "</a></li>"
295 |   }
296 |   if (as.length > 1) {
297 |     var h = "<div id='style-menu-holder'>"
298 |       + "<a href='#' onclick='styleMenu(); return false;'>Style &#9662;</a>"
299 |       + "<ul id='style-menu' class='hide'>" + btns + "</ul>"
300 |       + "</div>";
301 |     addMenuItem(h);
302 |   }
303 | }
304 | 
305 | function setActiveStyleSheet(title) {
306 |   var as = styles();
307 |   var i, a, found;
308 |   for(i=0; a = as[i]; i++) {
309 |     a.disabled = true;
310 |           // need to do this always, some browsers are edge triggered
311 |     if(a.title == title) {
312 |       found = a;
313 |     }
314 |   }
315 |   if (found) {
316 |     found.disabled = false;
317 |     setCookie("haddock-style", title);
318 |   }
319 |   else {
320 |     as[0].disabled = false;
321 |     clearCookie("haddock-style");
322 |   }
323 |   styleMenu(false);
324 | }
325 | 
326 | function resetStyle() {
327 |   var s = getCookie("haddock-style");
328 |   if (s) setActiveStyleSheet(s);
329 | }
330 | 
331 | 
332 | function styleMenu(show) {
333 |   var m = document.getElementById('style-menu');
334 |   if (m) toggleShow(m, show);
335 | }
336 | 
337 | 
338 | function pageLoad() {
339 |   addStyleMenu();
340 |   adjustForFrames();
341 |   resetStyle();
342 |   restoreCollapsed();
343 | }
344 | 
345 | 


--------------------------------------------------------------------------------
/dist/doc/html/regex-genex/hslogo-16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/audreyt/regex-genex/eacf18333725e32dd6baaee8cc5b9bc709d861ca/dist/doc/html/regex-genex/hslogo-16.png


--------------------------------------------------------------------------------
/dist/doc/html/regex-genex/index-frames.html:
--------------------------------------------------------------------------------
1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html xmlns="http://www.w3.org/1999/xhtml"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8" /><title>regex-genex-0.6.0: From a regex, generate all possible strings it can match</title><link href="ocean.css" rel="stylesheet" type="text/css" title="Ocean" /><script src="haddock-util.js" type="text/javascript"></script><script type="text/javascript">//<![CDATA[
2 | window.onload = function () {pageLoad();};
3 | //]]>
4 | </script></head><body id="mini"><div id="module-list"><p class="caption">Modules</p><ul><li class="module"><a href="Regex-Genex.html" target="main">Regex.Genex</a></li><li class="module"><a href="Regex-Genex-Normalize.html" target="main">Regex.Genex.Normalize</a></li></ul></div></body></html>


--------------------------------------------------------------------------------
/dist/doc/html/regex-genex/index.html:
--------------------------------------------------------------------------------
1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html xmlns="http://www.w3.org/1999/xhtml"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8" /><title>regex-genex-0.6.0: From a regex, generate all possible strings it can match</title><link href="ocean.css" rel="stylesheet" type="text/css" title="Ocean" /><script src="haddock-util.js" type="text/javascript"></script><script type="text/javascript">//<![CDATA[
2 | window.onload = function () {pageLoad();};
3 | //]]>
4 | </script></head><body><div id="package-header"><ul class="links" id="page-menu"><li><a href="index.html">Contents</a></li><li><a href="doc-index.html">Index</a></li></ul><p class="caption">regex-genex-0.6.0: From a regex, generate all possible strings it can match</p></div><div id="content"><div id="description"><h1>regex-genex-0.6.0: From a regex, generate all possible strings it can match</h1><div class="doc"><p>From a regex, generate all possible strings it can match
5 | </p></div></div><div id="module-list"><p class="caption">Modules</p><ul><li><span id="control.n.1" class="module collapser" onclick="toggleSection('n.1')">Regex</span><ul id="section.n.1" class="show"><li><span class="module"><span id="control.n.1.1" class="collapser" onclick="toggleSection('n.1.1')">&nbsp;</span><a href="Regex-Genex.html">Regex.Genex</a></span><ul id="section.n.1.1" class="show"><li><span class="module"><a href="Regex-Genex-Normalize.html">Regex.Genex.Normalize</a></span></li></ul></li></ul></li></ul></div></div><div id="footer"><p>Produced by <a href="http://www.haskell.org/haddock/">Haddock</a> version 2.10.0</p></div></body></html>


--------------------------------------------------------------------------------
/dist/doc/html/regex-genex/mini_Regex-Genex.html:
--------------------------------------------------------------------------------
1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html xmlns="http://www.w3.org/1999/xhtml"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8" /><title>Regex.Genex</title><link href="ocean.css" rel="stylesheet" type="text/css" title="Ocean" /><script src="haddock-util.js" type="text/javascript"></script><script type="text/javascript">//<![CDATA[
2 | window.onload = function () {pageLoad();};
3 | //]]>
4 | </script></head><body id="mini"><div id="module-header"><p class="caption">Regex.Genex</p></div><div id="interface"><div class="top"><p class="src"><span class="keyword">data</span> <a href="Regex-Genex.html#t:Model" target="main">Model</a> </p></div><div class="top"><p class="src"><a href="Regex-Genex.html#v:genex" target="main">genex</a></p></div><div class="top"><p class="src"><a href="Regex-Genex.html#v:genexPure" target="main">genexPure</a></p></div><div class="top"><p class="src"><a href="Regex-Genex.html#v:genexPrint" target="main">genexPrint</a></p></div><div class="top"><p class="src"><a href="Regex-Genex.html#v:genexModels" target="main">genexModels</a></p></div><div class="top"><p class="src"><a href="Regex-Genex.html#v:genexWith" target="main">genexWith</a></p></div><div class="top"><p class="src"><a href="Regex-Genex.html#v:regexMatch" target="main">regexMatch</a></p></div></div></body></html>


--------------------------------------------------------------------------------
/dist/doc/html/regex-genex/minus.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/audreyt/regex-genex/eacf18333725e32dd6baaee8cc5b9bc709d861ca/dist/doc/html/regex-genex/minus.gif


--------------------------------------------------------------------------------
/dist/doc/html/regex-genex/ocean.css:
--------------------------------------------------------------------------------
  1 | /* @group Fundamentals */
  2 | 
  3 | * { margin: 0; padding: 0 }
  4 | 
  5 | /* Is this portable? */
  6 | html {
  7 |   background-color: white;
  8 |   width: 100%;
  9 |   height: 100%;
 10 | }
 11 | 
 12 | body {
 13 |   background: white;
 14 |   color: black;
 15 |   text-align: left;
 16 |   min-height: 100%;
 17 |   position: relative;
 18 | }
 19 | 
 20 | p {
 21 |   margin: 0.8em 0;
 22 | }
 23 | 
 24 | ul, ol {
 25 |   margin: 0.8em 0 0.8em 2em;
 26 | }
 27 | 
 28 | dl {
 29 |   margin: 0.8em 0;
 30 | }
 31 | 
 32 | dt {
 33 |   font-weight: bold;
 34 | }
 35 | dd {
 36 |   margin-left: 2em;
 37 | }
 38 | 
 39 | a { text-decoration: none; }
 40 | a[href]:link { color: rgb(196,69,29); }
 41 | a[href]:visited { color: rgb(171,105,84); }
 42 | a[href]:hover { text-decoration:underline; }
 43 | 
 44 | /* @end */
 45 | 
 46 | /* @group Fonts & Sizes */
 47 | 
 48 | /* Basic technique & IE workarounds from YUI 3
 49 |    For reasons, see:
 50 |       http://yui.yahooapis.com/3.1.1/build/cssfonts/fonts.css
 51 |  */
 52 |  
 53 | body {
 54 | 	font:13px/1.4 sans-serif;
 55 | 	*font-size:small; /* for IE */
 56 | 	*font:x-small; /* for IE in quirks mode */
 57 | }
 58 | 
 59 | h1 { font-size: 146.5%; /* 19pt */ } 
 60 | h2 { font-size: 131%;   /* 17pt */ }
 61 | h3 { font-size: 116%;   /* 15pt */ }
 62 | h4 { font-size: 100%;   /* 13pt */ }
 63 | h5 { font-size: 100%;   /* 13pt */ }
 64 | 
 65 | select, input, button, textarea {
 66 | 	font:99% sans-serif;
 67 | }
 68 | 
 69 | table {
 70 | 	font-size:inherit;
 71 | 	font:100%;
 72 | }
 73 | 
 74 | pre, code, kbd, samp, tt, .src {
 75 | 	font-family:monospace;
 76 | 	*font-size:108%;
 77 | 	line-height: 124%;
 78 | }
 79 | 
 80 | .links, .link {
 81 |   font-size: 85%; /* 11pt */
 82 | }
 83 | 
 84 | #module-header .caption {
 85 |   font-size: 182%; /* 24pt */
 86 | }
 87 | 
 88 | .info  {
 89 |   font-size: 85%; /* 11pt */
 90 | }
 91 | 
 92 | #table-of-contents, #synopsis  {
 93 |   /* font-size: 85%; /* 11pt */
 94 | }
 95 | 
 96 | 
 97 | /* @end */
 98 | 
 99 | /* @group Common */
100 | 
101 | .caption, h1, h2, h3, h4, h5, h6 { 
102 |   font-weight: bold;
103 |   color: rgb(78,98,114);
104 |   margin: 0.8em 0 0.4em;
105 | }
106 | 
107 | * + h1, * + h2, * + h3, * + h4, * + h5, * + h6 {
108 |   margin-top: 2em;
109 | }
110 | 
111 | h1 + h2, h2 + h3, h3 + h4, h4 + h5, h5 + h6 {
112 |   margin-top: inherit;
113 | }
114 | 
115 | ul.links {
116 |   list-style: none;
117 |   text-align: left;
118 |   float: right;
119 |   display: inline-table;
120 |   margin: 0 0 0 1em;
121 | }
122 | 
123 | ul.links li {
124 |   display: inline;
125 |   border-left: 1px solid #d5d5d5; 
126 |   white-space: nowrap;
127 |   padding: 0;
128 | }
129 | 
130 | ul.links li a {
131 |   padding: 0.2em 0.5em;
132 | }
133 | 
134 | .hide { display: none; }
135 | .show { display: inherit; }
136 | .clear { clear: both; }
137 | 
138 | .collapser {
139 |   background-image: url(minus.gif);
140 |   background-repeat: no-repeat;
141 | }
142 | .expander {
143 |   background-image: url(plus.gif);
144 |   background-repeat: no-repeat;
145 | }
146 | p.caption.collapser,
147 | p.caption.expander {
148 |   background-position: 0 0.4em;
149 | }
150 | .collapser, .expander {
151 |   padding-left: 14px;
152 |   margin-left: -14px;
153 |   cursor: pointer;
154 | }
155 | 
156 | pre {
157 |   padding: 0.25em;
158 |   margin: 0.8em 0;
159 |   background: rgb(229,237,244);
160 |   overflow: auto;
161 |   border-bottom: 0.25em solid white;
162 |   /* white border adds some space below the box to compensate
163 |      for visual extra space that paragraphs have between baseline
164 |      and the bounding box */
165 | }
166 | 
167 | .src {
168 |   background: #f0f0f0;
169 |   padding: 0.2em 0.5em;
170 | }
171 | 
172 | .keyword { font-weight: normal; }
173 | .def { font-weight: bold; }
174 | 
175 | 
176 | /* @end */
177 | 
178 | /* @group Page Structure */
179 | 
180 | #content {
181 |   margin: 0 auto;
182 |   padding: 0 2em 6em;
183 | }
184 | 
185 | #package-header {
186 |   background: rgb(41,56,69);
187 |   border-top: 5px solid rgb(78,98,114);
188 |   color: #ddd;
189 |   padding: 0.2em;
190 |   position: relative;
191 |   text-align: left;
192 | }
193 | 
194 | #package-header .caption {
195 |   background: url(hslogo-16.png) no-repeat 0em;
196 |   color: white;
197 |   margin: 0 2em;
198 |   font-weight: normal;
199 |   font-style: normal;
200 |   padding-left: 2em;
201 | }
202 | 
203 | #package-header a:link, #package-header a:visited { color: white; }
204 | #package-header a:hover { background: rgb(78,98,114); }
205 | 
206 | #module-header .caption {
207 |   color: rgb(78,98,114);
208 |   font-weight: bold;
209 |   border-bottom: 1px solid #ddd;
210 | }
211 | 
212 | table.info {
213 |   float: right;
214 |   padding: 0.5em 1em;
215 |   border: 1px solid #ddd;
216 |   color: rgb(78,98,114);
217 |   background-color: #fff;
218 |   max-width: 40%;
219 |   border-spacing: 0;
220 |   position: relative;
221 |   top: -0.5em;
222 |   margin: 0 0 0 2em;
223 | }
224 | 
225 | .info th {
226 | 	padding: 0 1em 0 0;
227 | }
228 | 
229 | div#style-menu-holder {
230 |   position: relative;
231 |   z-index: 2;
232 |   display: inline;
233 | }
234 | 
235 | #style-menu {
236 |   position: absolute;
237 |   z-index: 1;
238 |   overflow: visible;
239 |   background: #374c5e;
240 |   margin: 0;
241 |   text-align: center;
242 |   right: 0;
243 |   padding: 0;
244 |   top: 1.25em;
245 | }
246 | 
247 | #style-menu li {
248 | 	display: list-item;
249 | 	border-style: none;
250 | 	margin: 0;
251 | 	padding: 0;
252 | 	color: #000;
253 | 	list-style-type: none;
254 | }
255 | 
256 | #style-menu li + li {
257 | 	border-top: 1px solid #919191;
258 | }
259 | 
260 | #style-menu a {
261 |   width: 6em;
262 |   padding: 3px;
263 |   display: block;
264 | }
265 | 
266 | #footer {
267 |   background: #ddd;
268 |   border-top: 1px solid #aaa;
269 |   padding: 0.5em 0;
270 |   color: #666;
271 |   text-align: center;
272 |   position: absolute;
273 |   bottom: 0;
274 |   width: 100%;
275 |   height: 3em;
276 | }
277 | 
278 | /* @end */
279 | 
280 | /* @group Front Matter */
281 | 
282 | #table-of-contents {
283 |   float: right;
284 |   clear: right;
285 |   background: #faf9dc;
286 |   border: 1px solid #d8d7ad;
287 |   padding: 0.5em 1em;
288 |   max-width: 20em;
289 |   margin: 0.5em 0 1em 1em;
290 | }
291 | 
292 | #table-of-contents .caption {
293 |   text-align: center;
294 |   margin: 0;
295 | }
296 | 
297 | #table-of-contents ul {
298 |   list-style: none;
299 |   margin: 0;
300 | }
301 | 
302 | #table-of-contents ul ul {
303 |   margin-left: 2em;
304 | }
305 | 
306 | #description .caption {
307 |   display: none;
308 | }
309 | 
310 | #synopsis {
311 |   display: none;
312 | }
313 | 
314 | .no-frame #synopsis {
315 |   display: block;
316 |   position: fixed;
317 |   right: 0;
318 |   height: 80%;
319 |   top: 10%;
320 |   padding: 0;
321 | }
322 | 
323 | #synopsis .caption {
324 |   float: left;
325 |   width: 29px;
326 |   color: rgba(255,255,255,0);
327 |   height: 110px;
328 |   margin: 0;
329 |   font-size: 1px;
330 |   padding: 0;
331 | }
332 | 
333 | #synopsis p.caption.collapser {
334 |   background: url(synopsis.png) no-repeat -64px -8px;
335 | }
336 | 
337 | #synopsis p.caption.expander {
338 |   background: url(synopsis.png) no-repeat 0px -8px;
339 | }
340 | 
341 | #synopsis ul {
342 |   height: 100%;
343 |   overflow: auto;
344 |   padding: 0.5em;
345 |   margin: 0;
346 | }
347 | 
348 | #synopsis ul ul {
349 |   overflow: hidden;
350 | }
351 | 
352 | #synopsis ul,
353 | #synopsis ul li.src {
354 |   background-color: #faf9dc;
355 |   white-space: nowrap;
356 |   list-style: none;
357 |   margin-left: 0;
358 | }
359 | 
360 | /* @end */
361 | 
362 | /* @group Main Content */
363 | 
364 | #interface div.top { margin: 2em 0; }
365 | #interface h1 + div.top,
366 | #interface h2 + div.top,
367 | #interface h3 + div.top,
368 | #interface h4 + div.top,
369 | #interface h5 + div.top {
370 |  	margin-top: 1em;
371 | }
372 | #interface p.src .link {
373 |   float: right;
374 |   color: #919191;
375 |   border-left: 1px solid #919191;
376 |   background: #f0f0f0;
377 |   padding: 0 0.5em 0.2em;
378 |   margin: 0 -0.5em 0 0.5em;
379 | }
380 | 
381 | #interface table { border-spacing: 2px; }
382 | #interface td {
383 |   vertical-align: top;
384 |   padding-left: 0.5em;
385 | }
386 | #interface td.src {
387 |   white-space: nowrap;
388 | }
389 | #interface td.doc p {
390 |   margin: 0;
391 | }
392 | #interface td.doc p + p {
393 |   margin-top: 0.8em;
394 | }
395 | 
396 | .subs dl {
397 |   margin: 0;
398 | }
399 | 
400 | .subs dt {
401 |   float: left;
402 |   clear: left;
403 |   display: block;
404 |   margin: 1px 0;
405 | }
406 | 
407 | .subs dd {
408 |   float: right;
409 |   width: 90%;
410 |   display: block;
411 |   padding-left: 0.5em;
412 |   margin-bottom: 0.5em;
413 | }
414 | 
415 | .subs dd.empty {
416 |   display: none;
417 | }
418 | 
419 | .subs dd p {
420 |   margin: 0;
421 | }
422 | 
423 | .top p.src {
424 |   border-top: 1px solid #ccc;
425 | }
426 | 
427 | .subs, .doc {
428 |   /* use this selector for one level of indent */
429 |   padding-left: 2em;
430 | }
431 | 
432 | .arguments {
433 |   margin-top: -0.4em;
434 | }
435 | .arguments .caption {
436 |   display: none;
437 | }
438 | 
439 | .fields { padding-left: 1em; }
440 | 
441 | .fields .caption { display: none; }
442 | 
443 | .fields p { margin: 0 0; }
444 | 
445 | /* this seems bulky to me
446 | .methods, .constructors {
447 |   background: #f8f8f8;
448 |   border: 1px solid #eee;
449 | }
450 | */
451 | 
452 | /* @end */
453 | 
454 | /* @group Auxillary Pages */
455 | 
456 | #mini {
457 |   margin: 0 auto;
458 |   padding: 0 1em 1em;
459 | }
460 | 
461 | #mini > * {
462 |   font-size: 93%; /* 12pt */  
463 | }
464 | 
465 | #mini #module-list .caption,
466 | #mini #module-header .caption {
467 |   font-size: 125%; /* 15pt */
468 | }
469 | 
470 | #mini #interface h1,
471 | #mini #interface h2,
472 | #mini #interface h3,
473 | #mini #interface h4 {
474 |   font-size: 109%; /* 13pt */
475 |   margin: 1em 0 0;
476 | }
477 | 
478 | #mini #interface .top,
479 | #mini #interface .src {
480 |   margin: 0;
481 | }
482 | 
483 | #mini #module-list ul {
484 |   list-style: none;
485 |   margin: 0;
486 | }
487 | 
488 | #alphabet ul {
489 | 	list-style: none;
490 | 	padding: 0;
491 | 	margin: 0.5em 0 0;
492 | 	text-align: center;
493 | }
494 | 
495 | #alphabet li {
496 | 	display: inline;
497 | 	margin: 0 0.25em;
498 | }
499 | 
500 | #alphabet a {
501 | 	font-weight: bold;
502 | }
503 | 
504 | #index .caption,
505 | #module-list .caption { font-size: 131%; /* 17pt */ }
506 | 
507 | #index table {
508 |   margin-left: 2em;
509 | }
510 | 
511 | #index .src {
512 |   font-weight: bold;
513 | }
514 | #index .alt {
515 |   font-size: 77%; /* 10pt */
516 |   font-style: italic;
517 |   padding-left: 2em;
518 | }
519 | 
520 | #index td + td {
521 |   padding-left: 1em;
522 | }
523 | 
524 | #module-list ul {
525 |   list-style: none;
526 |   margin: 0 0 0 2em;
527 | }
528 | 
529 | #module-list li {
530 |   clear: right;
531 | }
532 | 
533 | #module-list span.collapser,
534 | #module-list span.expander {
535 |   background-position: 0 0.3em;
536 | }
537 | 
538 | #module-list .package {
539 |   float: right;
540 | }
541 | 
542 | /* @end */
543 | 


--------------------------------------------------------------------------------
/dist/doc/html/regex-genex/plus.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/audreyt/regex-genex/eacf18333725e32dd6baaee8cc5b9bc709d861ca/dist/doc/html/regex-genex/plus.gif


--------------------------------------------------------------------------------
/dist/doc/html/regex-genex/regex-genex.haddock:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/audreyt/regex-genex/eacf18333725e32dd6baaee8cc5b9bc709d861ca/dist/doc/html/regex-genex/regex-genex.haddock


--------------------------------------------------------------------------------
/dist/doc/html/regex-genex/synopsis.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/audreyt/regex-genex/eacf18333725e32dd6baaee8cc5b9bc709d861ca/dist/doc/html/regex-genex/synopsis.png


--------------------------------------------------------------------------------
/regex-genex.cabal:
--------------------------------------------------------------------------------
 1 | Name            : regex-genex
 2 | Version         : 0.7.0
 3 | license         : OtherLicense
 4 | license-file    : LICENSE
 5 | cabal-version   : >= 1.6
 6 | copyright       : 2011-2015 Audrey Tang
 7 | maintainer      : Audrey Tang <audreyt@audreyt.org>
 8 | category        : Text, Regex
 9 | stability       : experimental
10 | build-type      : Simple
11 | homepage        : https://github.com/audreyt/regex-genex
12 | synopsis        : From a regex, generate all possible strings it can match
13 | description     : From a regex, generate all possible strings it can match
14 | author          : Audrey Tang <audreyt@audreyt.org>
15 | Tested-With:    GHC==7.10.1
16 | 
17 | library
18 |     hs-source-dirs:     . src
19 |     exposed-modules:    Regex.Genex Regex.Genex.Normalize
20 |     other-modules:    Regex.Genex.Pure
21 |     extensions      : ImplicitParams, NamedFieldPuns, ParallelListComp, PatternGuards, RecordWildCards
22 |     build-depends:
23 |         base >= 3 && < 5, mtl, containers, sbv >= 5 && < 6, regex-tdfa, stream-monad, text, logict
24 | 
25 | executable genex
26 |     main-is:            Main.hs
27 |     hs-source-dirs:     . src
28 |     extensions      : ImplicitParams, NamedFieldPuns, ParallelListComp, PatternGuards, RecordWildCards
29 |     build-depends:
30 |         base >= 3 && < 5, mtl, containers, sbv >= 5 && < 6, regex-tdfa
31 | 
32 | source-repository head
33 |   type:     git
34 |   location: http://github.com/audreyt/regex-genex
35 | 


--------------------------------------------------------------------------------
/src/Regex/Genex.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE ImplicitParams, NamedFieldPuns, ParallelListComp, PatternGuards #-}
  2 | {-|
  3 | 
  4 | This module and the accompanying 'genex' program finds all permutations
  5 | of strings that matches every input regular expressions, ordered from
  6 | shortest to longest, with full support for back references ('\1' .. '\9')
  7 | and word boundaries ('\b').
  8 | 
  9 | It requires the @z3@ or @yices@ binary in PATH. The latter may be downloaded from:
 10 | <http://yices.csl.sri.com/download-yices2.shtml>
 11 | 
 12 | -}
 13 | module Regex.Genex (Model(..), genex, genexPure, genexPrint, genexModels, genexWith, regexMatch) where
 14 | import Data.SBV
 15 | import Data.SBV.Internals (SBV)
 16 | import Data.Set (toList)
 17 | import Data.Monoid
 18 | import Control.Monad.State
 19 | import qualified Data.Char
 20 | import qualified Regex.Genex.Pure as Pure
 21 | import Text.Regex.TDFA.Pattern
 22 | import Regex.Genex.Normalize (normalize)
 23 | import Text.Regex.TDFA.ReadRegex (parseRegex)
 24 | import Data.IntSet (IntSet)
 25 | import qualified Data.IntSet as IntSet
 26 | import Data.IntMap (IntMap)
 27 | import qualified Data.IntMap as IntMap
 28 | import System.IO.Unsafe (unsafeInterleaveIO)
 29 | 
 30 | -- | Given a list of regular repressions, returns all possible strings that matches every one of them.
 31 | -- Guarantees to return shorter strings before longer ones.
 32 | genex :: [String] -> IO [String]
 33 | genex = let ?maxRepeat = maxRepeatDefault
 34 |         in genexWith getString
 35 | 
 36 | -- | A match consists of a string (list of codepoints), and a rank representing alternation order.
 37 | data Model = Model
 38 |     { modelChars :: [Word8]
 39 |     , modelRank :: Word64
 40 |     }
 41 |     deriving (Show, Eq, Ord)
 42 | 
 43 | -- | Same as 'genex', but with the entire model returned instead.
 44 | genexModels :: [String] -> IO [Model]
 45 | genexModels = let ?maxRepeat = maxRepeatDefault
 46 |               in genexWith (getStringWith id)
 47 | 
 48 | -- | Same as 'genexModels', but print the models to standard output instead.
 49 | genexPrint :: [String] -> IO ()
 50 | genexPrint = let ?maxRepeat = maxRepeatDefault
 51 |              in genexWith displayString
 52 | 
 53 | -- | A pure and much faster variant of 'genex', but without support for
 54 | --   back-references, anchors or word boundaries.
 55 | -- Does not guarantee orders about length of strings.
 56 | -- Does not depend on the external @yices@ SMT solver.
 57 | genexPure :: [String] -> [String]
 58 | genexPure = Pure.genexPure
 59 | 
 60 | type Len = Word16
 61 | type SChar = SWord8
 62 | type Str = [SChar]
 63 | type Offset = SBV Len
 64 | type Flips = [SWord64]
 65 | type Captures = SFunArray Word8 Len
 66 | type Hits = Word16
 67 | 
 68 | maxHits :: Hits
 69 | maxHits = maxBound -- 65535
 70 | 
 71 | -- controlled by an implicit parameter, but this is the default
 72 | -- when instantiated from functions that do not expose the implicit
 73 | -- parameter to the user
 74 | maxRepeatDefault :: Int
 75 | maxRepeatDefault = 3 -- 7 and 15 are also good
 76 | 
 77 | maxLength :: Len
 78 | maxLength = maxBound -- 65535
 79 | 
 80 | -- lengths p = let ?grp = mempty in IntSet.toList . fst $ runState (possibleLengths $ parse p) mempty
 81 | 
 82 | minLen :: (?maxRepeat :: Int, ?grp :: GroupLens) => Pattern -> Int
 83 | minLen p = case p of
 84 |     PEscape {getPatternChar = ch}
 85 |         | Data.Char.isDigit ch -> let num = charToDigit ch in
 86 |             IntSet.findMin (IntMap.findWithDefault (IntSet.singleton 0) num ?grp)
 87 |     _ -> IntSet.findMin . fst $ runState (possibleLengths p) mempty
 88 | 
 89 | parse :: String -> Pattern
 90 | parse r = case parseRegex r of
 91 |     Right (pattern, _) -> pattern
 92 |     Left x -> error $ show x
 93 | 
 94 | type GroupLens = IntMap IntSet
 95 | type BackReferences = IntSet
 96 | 
 97 | possibleLengths :: (?maxRepeat :: Int, ?grp :: GroupLens) => Pattern -> State (GroupLens, BackReferences) IntSet
 98 | possibleLengths pat = case pat of
 99 |     _ | isOne pat -> one
100 |     PGroup (Just idx) p -> do
101 |         lenP <- possibleLengths p
102 |         modify $ \(g, b) -> (IntMap.insert idx lenP g, b)
103 |         return lenP
104 |     PGroup _ p -> possibleLengths p
105 |     PCarat{} -> zero
106 |     PDollar{} -> zero
107 |     PQuest p -> maybeGroup p (`mappend` zeroSet)
108 |     POr ps -> fmap mconcat $ mapM possibleLengths ps
109 |     PConcat [] -> zero
110 |     PConcat ps -> fmap (foldl1 sumSets) (mapM possibleLengths ps)
111 |     PEscape {getPatternChar = ch}
112 |         | ch `elem` "ntrfaedwsWSD" -> one
113 |         | ch `elem` "b" -> zero
114 |         | Data.Char.isDigit ch -> do
115 |             let num = charToDigit ch
116 |             modify $ \(g, b) -> (g, IntSet.insert num b)
117 |             gets $ (IntMap.findWithDefault (IntMap.findWithDefault (error $ "No such capture: " ++ [ch]) num ?grp) num) . fst
118 |         | Data.Char.isAlpha ch -> error $ "Unsupported escape: " ++ [ch]
119 |         | otherwise -> one
120 |     PBound low (Just high) p -> manyTimes p low high
121 |     PBound low _ p -> manyTimes p low (low + ?maxRepeat)
122 |     PPlus p -> manyTimes p 1 (?maxRepeat+1)
123 |     PStar _ p -> manyTimes p 0 ?maxRepeat
124 |     PEmpty -> zero
125 |     _ -> error $ show pat
126 |     where
127 |     one = return $ IntSet.singleton 1
128 |     zero = return $ IntSet.singleton 0
129 |     zeroSet = IntSet.singleton 0
130 |     sumSets s1 s2 = IntSet.unions [ IntSet.map (+elm) s2 | elm <- IntSet.elems s1 ]
131 |     manyTimes p low high = maybeGroup p $ \lenP -> IntSet.unions
132 |         [ foldl sumSets (IntSet.singleton 0) (replicate i lenP)
133 |         | i <- [low..high]
134 |         ]
135 |     maybeGroup p@(PGroup (Just idx) _) f = do
136 |         lenP <- possibleLengths p
137 |         let lenP' = f lenP
138 |         modify $ \(g, b) -> (IntMap.insert idx lenP' g, b)
139 |         return lenP'
140 |     maybeGroup p f = fmap f (possibleLengths p)
141 | 
142 | charToDigit :: Char -> Int
143 | charToDigit ch = Data.Char.ord ch - Data.Char.ord '0'
144 | 
145 | exactMatch :: (?maxRepeat :: Int, ?pats :: [(Pattern, GroupLens)]) => Len -> Symbolic SBool
146 | exactMatch len = do
147 |     str <- mkExistVars $ fromEnum len
148 |     initialFlips <- mkExistVars 1
149 |     captureAt <- newArray_ (Just minBound)
150 |     captureLen <- newArray_ (Just minBound)
151 |     let ?str = str
152 |     let initialStatus = Status
153 |             { ok = true
154 |             , pos = strLen
155 |             , flips = initialFlips
156 |             , captureAt = captureAt
157 |             , captureLen = captureLen
158 |             }
159 |         strLen = literal len
160 |         runPat s (pat, groupLens) = let ?pat = pat in let ?grp = groupLens in
161 |             ite (ok s &&& pos s .== strLen)
162 |                 (match s{ pos = 0, captureAt, captureLen })
163 |                 s{ ok = false, pos = maxBound, flips = [maxBound] }
164 |     let Status{ ok, pos, flips } = foldl runPat initialStatus ?pats
165 |     return (bAll (.== 0) flips &&& pos .== strLen &&& ok)
166 | 
167 | data Status = Status
168 |     { ok :: SBool
169 |     , pos :: Offset
170 |     , flips :: Flips
171 |     , captureAt :: Captures
172 |     , captureLen :: Captures
173 |     }
174 | 
175 | instance Mergeable Status where
176 |   symbolicMerge f t s1 s2 = Status
177 |     { ok = symbolicMerge f t (ok s1) (ok s2)
178 |     , pos = symbolicMerge f t (pos s1) (pos s2)
179 |     , flips = symbolicMerge f t (flips s1) (flips s2)
180 |     , captureAt = symbolicMerge f t (captureAt s1) (captureAt s2)
181 |     , captureLen = symbolicMerge f t (captureLen s1) (captureLen s2)
182 |     }
183 | 
184 | choice :: (?str :: Str, ?pat :: Pattern) => Flips -> [Flips -> Status] -> Status
185 | choice _ [] = error "X"
186 | choice flips [a] = a flips
187 | choice flips [a, b] = ite (lsb flip) (b flips') (a flips')
188 |     where
189 |     flip = head flips
190 |     flips' = [flip `shiftR` 1]
191 | choice flips xs = select (map ($ flips') xs) (head xs [thisFlip]){ ok = false } thisFlip
192 |     where
193 |     bits = log2 $ length xs
194 |     flips' = [head flips `shiftR` bits]
195 |     thisFlip = head flips `shiftL` (64 - bits) `shiftR` (64 - bits)
196 | 
197 | log2 :: Int -> Int
198 | log2 1 = 0
199 | log2 n = 1 + log2 ((n + 1) `div` 2)
200 | 
201 | writeCapture :: Captures -> Int -> Offset -> Captures
202 | writeCapture cap idx val = writeArray cap (toEnum idx) val
203 | 
204 | readCapture :: Captures -> Int -> Offset
205 | readCapture a = readArray a . toEnum
206 |     
207 | isOne :: Pattern -> Bool
208 | isOne PChar{} = True
209 | isOne PDot{} = True
210 | isOne PAny {} = True
211 | isOne PAnyNot {} = True
212 | isOne (PGroup Nothing p) = isOne p
213 | isOne PEscape {getPatternChar = ch}
214 |     | ch `elem` "ntrfaedwsWSD" = True
215 |     | ch `elem` "b" = False
216 |     | Data.Char.isDigit ch = False
217 |     | Data.Char.isAlpha ch = error $ "Unsupported escape: " ++ [ch]
218 |     | otherwise = True
219 | isOne _ = False
220 | 
221 | matchOne :: (?pat :: Pattern) => SChar -> SBool
222 | matchOne cur = case ?pat of
223 |     PChar {getPatternChar = ch} -> isChar ch
224 |     PDot{} -> isDot
225 |     PGroup Nothing p -> let ?pat = p in matchOne cur
226 |     PAny {getPatternSet = pset} -> case pset of
227 |         PatternSet (Just cset) _ _ _ -> oneOf $ toList cset
228 |         _ -> error "TODO"
229 |     PAnyNot {getPatternSet = pset} -> case pset of
230 |         PatternSet (Just cset) _ _ _ -> noneOf $ toList cset
231 |         _ -> error "TODO"
232 |     PEscape {getPatternChar = ch} -> case ch of
233 |         'n' -> isChar '\n'
234 |         't' -> isChar '\t'
235 |         'r' -> isChar '\r'
236 |         'f' -> isChar '\f'
237 |         'a' -> isChar '\a'
238 |         'e' -> isChar '\ESC'
239 |         'd' -> isDigit
240 |         'w' -> isWordChar
241 |         's' -> isWhiteSpace
242 |         'W' -> (isDot &&& bnot isWordChar)
243 |         'S' -> (isDot &&& bnot isWhiteSpace)
244 |         'D' -> (isDot &&& bnot isDigit)
245 |         _   -> isChar ch
246 |     _ -> false
247 |     where
248 |     ord = toEnum . Data.Char.ord
249 |     isChar ch = cur .== ord ch
250 |     isDot = (cur .>= ord ' ' &&& cur .<= ord '~')
251 |     oneOf cs = bOr [ ord ch .== cur | ch <- cs ]
252 |     noneOf cs = bAnd ((cur .>= ord ' ') : (cur .<= ord '~') : [ ord ch ./= cur | ch <- cs ])
253 |     isDigit = (ord '0' .<= cur &&& ord '9' .>= cur)
254 |     isWordChar = (cur .>= ord 'A' &&& cur .<= ord 'Z')
255 |              ||| (cur .>= ord 'a' &&& cur .<= ord 'z')
256 |              ||| (cur .== ord '_')
257 |     isWhiteSpace = cur .== 32 ||| (9 .<= cur &&& 13 .>= cur &&& 11 ./= cur)
258 | 
259 | 
260 | match :: (?maxRepeat :: Int, ?str :: Str, ?pat :: Pattern, ?grp :: GroupLens) => Status -> Status
261 | match s@Status{ pos, flips, captureAt, captureLen }
262 |   | isOne ?pat = ite (pos .>= strLen) __FAIL__ one
263 |   | otherwise = ite (pos + (toEnum $ minLen ?pat) .> strLen) __FAIL__ $ case ?pat of
264 |     PGroup (Just idx) p -> let s'@Status{ pos = pos', ok = ok' } = next p in 
265 |         ite ok' (s'
266 |             { captureAt = writeCapture captureAt idx pos
267 |             , captureLen = writeCapture captureLen idx (pos' - pos)
268 |             }) __FAIL__
269 |     PGroup _ p -> next p
270 |     PCarat{} -> ite (isBegin ||| (charAt (pos-1) .== ord '\n')) s __FAIL__
271 |     PDollar{} -> ite (isEnd ||| (charAt (pos+1) .== ord '\n')) s __FAIL__
272 |     PQuest p -> choice flips [\b -> let ?pat = p in match s{ flips = b }, \b -> s{ flips = b }]
273 |     POr [p] -> next p
274 |     POr ps -> choice flips $ map (\p -> \b -> let ?pat = p in match s{ flips = b }) ps
275 |     PConcat [] -> s
276 |     PConcat [p] -> next p
277 |     PConcat ps
278 |         | all isOne ps -> ite (
279 |             (bAnd [ let ?pat = p in matchOne (charAt (pos+i))
280 |                   | p <- ps
281 |                   | i <- [0..]
282 |                   ])
283 |         ) s{ pos = pos + toEnum (length ps) } __FAIL__
284 |         | (ones@(_:_:_), rest) <- span isOne ps -> step [PConcat ones, PConcat rest] s
285 |         | (nones@(_:_), rest@(_:_:_)) <- span (not . isOne) ps -> step (nones ++ [PConcat rest]) s
286 |         | otherwise -> step ps s
287 |         where
288 |         step [] s' = s'
289 |         step (p':ps') s' = 
290 |             let s''@Status{ ok } = (let ?pat = p' in match s')
291 |                 res = step ps' s''
292 |              in ite ok res __FAIL__
293 |     PEscape {getPatternChar = ch} -> case ch of
294 |         'b' -> ite isWordBoundary s __FAIL__
295 |         _ | Data.Char.isDigit ch -> 
296 |             let from = readCapture captureAt num
297 |                 Just defaultLen = IntMap.lookup num ?grp 
298 |                 possibleLens = IntSet.toList defaultLen
299 |                 len = case possibleLens of
300 |                     []  -> 0
301 |                     [l] -> toEnum l
302 |                     _   -> readCapture captureLen num
303 |                 num = charToDigit ch
304 |              in ite (matchCapture (from :: Offset) len 0) s{ pos = pos+len } __FAIL__
305 |           | Data.Char.isAlpha ch -> error $ "Unsupported escape: " ++ [ch]
306 |           | otherwise  -> cond (ord ch .== cur)
307 |     PBound low (Just high) p -> let s'@Status{ ok = ok' } = (let ?pat = PConcat (replicate low p) in match s) in
308 |         if low == high then s' else ite ok' (let ?pat = p in (manyTimes s' $ high - low)) s'
309 |     PBound low _ p -> let ?pat = (PBound low (Just $ low + ?maxRepeat) p) in match s
310 |     PPlus p ->
311 |         let s'@Status{ok} = next p
312 |             res = let ?pat = PStar True p in match s'
313 |          in ite ok res s'
314 |     PStar _ p -> next $ PBound 0 Nothing p
315 |     PEmpty -> s
316 |     _ -> error $ show ?pat
317 |     where
318 |     one = cond $ matchOne cur
319 |     next p = let ?pat = p in match s
320 |     strLen = toEnum (length ?str)
321 |     manyTimes :: (?pat :: Pattern) => Status -> Int -> Status
322 |     manyTimes s'@Status{ flips = flips' } n
323 |         | n <= 0 = s'
324 |         | otherwise = choice flips' [\b -> s'{ flips = b }, nextTime]
325 |             where
326 |             nextTime b = let s''@Status{ ok = ok'', pos = pos'' } = match s'{ flips = b } in
327 |                 ite (pos'' .<= strLen &&& ok'') (manyTimes s'' (n-1)) s''
328 | 
329 |     cur = charAt pos
330 |     charAt = select ?str 0
331 |     cond b = ite b s{ pos = pos+1 } __FAIL__
332 |     ord = toEnum . Data.Char.ord
333 |     matchCapture :: Offset -> Offset -> Int -> SBool
334 |     matchCapture from len n
335 |         | n >= (length ?str) = true
336 |         | otherwise = (len .<= off) ||| (charAt (pos+off) .== charAt (from+off) &&& matchCapture from len (n+1))
337 |         where
338 |         off = toEnum n
339 |     __FAIL__ = s{ ok = false, pos = maxBound, flips = [maxBound] }
340 |     isEnd = (pos .== toEnum (length ?str))
341 |     isBegin = (pos .== 0)
342 |     isWordCharAt at = let char = charAt at in
343 |         (char .>= ord 'A' &&& char .<= ord 'Z')
344 |             |||
345 |         (char .>= ord 'a' &&& char .<= ord 'z')
346 |             |||
347 |         (char .== ord '_')
348 |     isWordBoundary = case length ?str of
349 |         0 -> false
350 |         _ -> (isEnd &&& isWordCharAt (pos-1)) |||
351 |              (isBegin &&& isWordCharAt pos) |||
352 |              (isWordCharAt (pos-1) <+> isWordCharAt pos)
353 | 
354 | 
355 | displayString :: [SatResult] -> Hits -> (Hits -> IO ()) -> IO ()
356 | displayString [] a next = next a
357 | displayString (r:rs) a next = do
358 |     let Right (_, (chars, rank)) = getModel r
359 |     putStr $ show (length (chars :: [Word8])) ++ "."
360 |     let n = show (rank :: Word64)
361 |     putStr (replicate (8 - length n) '0')
362 |     putStr n
363 |     putStr "\t\t"
364 |     print $ map chr chars
365 |     if (a+1 >= maxHits) then return () else
366 |         displayString rs (a+1) next
367 |     where
368 |     chr = Data.Char.chr . fromEnum
369 | 
370 | genexWith :: (?maxRepeat :: Int, Monoid a) => ([SatResult] -> Hits -> (Hits -> IO a) -> IO a) -> [[Char]] -> IO a
371 | genexWith f regexes = do
372 |     let ?grp = mempty
373 |     let p'lens = [ ((p', groupLens), lens)
374 |                  | p <- [ if r == "" then PEmpty else parse r | r <- regexes ]
375 |                  , let (lens, (groupLens, backRefs)) = runState (possibleLengths p) mempty
376 |                  , let p' = normalize backRefs p
377 |                  ]
378 |     let ?pats = map fst p'lens
379 |     let lens = IntSet.toAscList $ foldl1 IntSet.intersection (map snd p'lens)
380 |     tryWith f (filter (<= maxLength) $ map toEnum lens) 0
381 | 
382 | tryWith :: (?maxRepeat :: Int, ?pats :: [(Pattern, GroupLens)]) => 
383 |     Monoid a => ResultHandler a -> [Len] -> Hits -> IO a
384 | tryWith _ [] _ = return mempty
385 | tryWith f (len:lens) acc = if len > maxLength then return mempty else do
386 |     AllSatResult (_, allRes) <- allSat $ exactMatch len
387 |     f (map SatResult allRes) acc $ tryWith f lens
388 | 
389 | type ResultHandler a = [SatResult] -> Hits -> (Hits -> IO a) -> IO a
390 | 
391 | getStringWith :: (Model -> a) -> [SatResult] -> Hits -> (Hits -> IO [a]) -> IO [a]
392 | getStringWith _ [] a next = next a
393 | getStringWith f (r:rs) a next = do
394 |     let Right (_, (chars, rank)) = getModel r
395 |     rest <- if (a+1 >= maxHits) then return [] else
396 |         unsafeInterleaveIO $ getStringWith f rs (a+1) next
397 |     return (f (Model chars rank):rest)
398 | 
399 | getString :: [SatResult] -> Hits -> (Hits -> IO [String]) -> IO [String]
400 | getString = getStringWith $ \Model{ modelChars } -> map chr modelChars
401 |     where
402 |     chr = Data.Char.chr . fromEnum
403 | 
404 | -- Given a regex and a symbolic string, returns true if regex matches the string
405 | regexMatch :: (?maxRepeat :: Int) => [[Char]] -> Str -> Symbolic SBool
406 | regexMatch regexes str = do
407 |     let ?grp = mempty
408 |     let p'lens = [ ((p', groupLens), lens)
409 |                  | p <- [ if r == "" then PEmpty else parse r | r <- regexes ]
410 |                  , let (lens, (groupLens, backRefs)) = runState (possibleLengths p) mempty
411 |                  , let p' = normalize backRefs p
412 |                  ]
413 |     let ?pats = map fst p'lens
414 |     let lens = IntSet.toAscList $ foldl1 IntSet.intersection (map snd p'lens)
415 |     initialFlips <- mkExistVars 1
416 |     captureAt <- newArray_ (Just minBound)
417 |     captureLen <- newArray_ (Just minBound)
418 |     let ?str = str
419 |     let strLen = literal (fromIntegral (length str))
420 |     let initialStatus = Status
421 |             { ok = true
422 |             , pos = strLen
423 |             , flips = initialFlips
424 |             , captureAt = captureAt
425 |             , captureLen = captureLen
426 |             }
427 |         runPat s (pat, groupLens) = let ?pat = pat in let ?grp = groupLens in
428 |             ite (ok s &&& pos s .== strLen)
429 |                 (match s{ pos = 0, captureAt, captureLen })
430 |                 s{ ok = false, pos = maxBound, flips = [maxBound] }
431 |     let Status{ ok, pos, flips } = foldl runPat initialStatus ?pats
432 |     return (bAll (.== 0) flips &&& pos .== strLen &&& ok)
433 | 


--------------------------------------------------------------------------------
/src/Regex/Genex/Normalize.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE ImplicitParams, NamedFieldPuns, PatternGuards #-}
  2 | module Regex.Genex.Normalize (normalize) where
  3 | import Data.Set (toList, Set)
  4 | import Text.Regex.TDFA.Pattern
  5 | import Text.Regex.TDFA.ReadRegex (parseRegex)
  6 | import Data.IntSet (IntSet)
  7 | import qualified Data.IntSet as IntSet
  8 | import qualified Data.Set as Set
  9 | 
 10 | type BackReferences = IntSet
 11 | 
 12 | -- | Normalize a regex into @strong star normal form@, as defined in the paper
 13 | --   @Simplifying Regular Expressions: A Quantitative Perspective@.
 14 | normalize :: BackReferences -> Pattern -> Pattern
 15 | normalize refs p = black $ let ?refs = refs in simplify p
 16 | 
 17 | nullable :: Pattern -> Bool
 18 | nullable pat = case pat of
 19 |     PGroup _ p -> nullable p
 20 |     PQuest{} -> True
 21 |     POr ps -> any nullable ps
 22 |     PConcat ps -> all nullable ps
 23 |     PBound 0 _ _ -> True
 24 |     PBound _ _ _ -> False
 25 |     PStar{} -> True
 26 |     PEmpty -> True
 27 |     _ -> False
 28 | 
 29 | white :: Pattern -> Pattern
 30 | white pat = case pat of
 31 |     PQuest p -> white p
 32 |     PStar _ p -> white p
 33 |     PGroup x p -> PGroup x $ white p
 34 |     POr ps -> POr (map white ps)
 35 |     PConcat ps -> if nullable pat
 36 |         then POr (map white ps)
 37 |         else pat
 38 |     PPlus p -> if nullable pat
 39 |         then PConcat [p, white p]
 40 |         else pat
 41 |     _ -> pat
 42 | 
 43 | black :: Pattern -> Pattern
 44 | black pat = case pat of
 45 |     POr ps -> POr (map black ps)
 46 |     PConcat ps -> PConcat (map black ps)
 47 |     PGroup x p -> PGroup x $ black p
 48 |     PStar x p -> PStar x $ white (black p)
 49 |     PPlus p -> PConcat [p, PStar (nullable p) (white $ black p)]
 50 |     PBound 0 Nothing p -> PStar (nullable p) (white $ black p)
 51 |     PBound x Nothing p -> PConcat [PBound x (Just x) p, PStar (nullable p) (white $ black p)]
 52 |     PBound x y p -> PBound x y $ black p
 53 |     PQuest p -> if nullable p
 54 |         then black p
 55 |         else PQuest $ black p
 56 |     _ -> pat
 57 | 
 58 | _parse :: String -> Pattern
 59 | _parse r = case parseRegex r of
 60 |     Right (pattern, _) -> pattern
 61 |     Left x -> error $ show x
 62 | 
 63 | foldChars :: (Set Char, [Pattern]) -> Pattern -> (Set Char, [Pattern])
 64 | foldChars (cset, rest) pat = case pat of
 65 |     PChar { getPatternChar = ch } -> (Set.insert ch cset, rest)
 66 |     PAny {getPatternSet = PatternSet (Just cset') _ _ _} -> (Set.union cset cset', rest)
 67 |     _ -> (cset, pat:rest)
 68 | 
 69 | simplify :: (?refs :: BackReferences) => Pattern -> Pattern
 70 | simplify pat = case pat of
 71 |     PGroup (Just idx) p -> if idx `IntSet.member` ?refs then PGroup (Just idx) (simplify p) else simplify p
 72 |     PGroup _ p -> simplify p
 73 |     PQuest p -> case simplify p of
 74 |         PEmpty -> PEmpty
 75 |         p'     -> PQuest p'
 76 |     PAny {getPatternSet = pset, getDoPa} -> case pset of
 77 |         PatternSet (Just cset) _ _ _ -> case toList cset of
 78 |             [ch] -> PChar { getPatternChar = ch, getDoPa }
 79 |             _    -> pat
 80 |         _ -> pat
 81 |     POr [] -> PEmpty
 82 |     POr [p] -> simplify p
 83 |     POr ps -> let ps' = map simplify ps in 
 84 |         case foldl foldChars (Set.empty, []) ps' of
 85 |             (cset, rest)
 86 |                 | null rest     -> anySet
 87 |                 | Set.null cset -> POr rest
 88 |                 | [r] <- rest   -> POr [anySet, r]
 89 |                 | otherwise     -> POr [anySet, POr rest]
 90 |                 where
 91 |                 anySet = case Set.size cset of
 92 |                     1 -> PChar { getPatternChar = Set.findMin cset, getDoPa = toEnum 0 }
 93 |                     _ -> PAny { getPatternSet = PatternSet (Just cset) Nothing Nothing Nothing, getDoPa = toEnum 0 }
 94 |     PConcat [] -> PEmpty
 95 |     PConcat [p] -> simplify p
 96 |     PConcat ps -> case concatMap (fromConcat . simplify) ps of
 97 |         [] -> PEmpty
 98 |         ps' -> PConcat ps'
 99 |         where
100 |         fromConcat (PConcat ps') = ps'
101 |         fromConcat PEmpty        = []
102 |         fromConcat p             = [p]
103 |     PBound low (Just high) p
104 |         | high == low -> simplify $ PConcat (replicate low (simplify p))
105 |     PBound low high p -> PBound low high (simplify p)
106 |     PPlus p -> PPlus (simplify p)
107 |     PStar x p -> PStar x (simplify p)
108 |     _ -> pat
109 | 
110 | 


--------------------------------------------------------------------------------
/src/Regex/Genex/Pure.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE RecordWildCards, NamedFieldPuns #-}
 2 | module Regex.Genex.Pure (genexPure) where
 3 | import qualified Data.Text as T
 4 | import qualified Data.IntSet as IntSet
 5 | import qualified Data.Set as Set
 6 | import Data.List (intersect, (\\))
 7 | import Control.Monad
 8 | import Control.Monad.Stream
 9 | import qualified Control.Monad.Stream as Stream
10 | import Regex.Genex.Normalize (normalize)
11 | import Debug.Trace
12 | import Text.Regex.TDFA.Pattern
13 | import Text.Regex.TDFA.ReadRegex (parseRegex)
14 | import Control.Monad.State
15 | import Control.Applicative
16 | 
17 | parse :: String -> Pattern
18 | parse r = case parseRegex r of
19 |     Right (pattern, _) -> pattern
20 |     Left x -> error $ show x
21 | 
22 | genexPure :: [String] -> [String]
23 | genexPure = map T.unpack . foldl1 intersect . map (Stream.runStream . run . normalize IntSet.empty . parse)
24 | 
25 | maxRepeat :: Int
26 | maxRepeat = 10
27 | 
28 | each = foldl1 (<|>) . map return
29 | 
30 | run :: Pattern -> Stream T.Text
31 | run p = case p of
32 |     PEmpty -> pure T.empty
33 |     PChar{..} -> isChar getPatternChar
34 |     PAny {getPatternSet = PatternSet (Just cset) _ _ _} -> each $ map T.singleton $ Set.toList cset
35 |     PAnyNot {getPatternSet = PatternSet (Just cset) _ _ _} -> chars $ notChars $ concatMap expandEscape $ Set.toList cset
36 |     PQuest p -> pure T.empty <|> run p
37 |     PPlus p -> run $ PBound 1 Nothing p
38 |     PStar _ p -> run $ PBound 0 Nothing p
39 |     PBound low high p -> do
40 |         n <- each [low..maybe (low+maxRepeat) id high]
41 |         fmap T.concat . sequence $ replicate n (run p) 
42 |     PConcat ps -> fmap T.concat . suspended . sequence $ map run ps
43 |     POr xs -> foldl1 mplus $ map run xs
44 |     PDot{} -> chars $ notChars []
45 |     PEscape {..} -> chars $ expandEscape getPatternChar
46 |     _      -> error $ show p
47 |     where
48 |     isChar = return . T.singleton
49 |     chars = each . map T.singleton
50 |     notChars = ([' '..'~'] \\)
51 |     expandEscape ch = case ch of
52 |         'n' -> "\n"
53 |         't' -> "\t"
54 |         'r' -> "\r"
55 |         'f' -> "\f"
56 |         'a' -> "\a"
57 |         'e' -> "\ESC"
58 |         'd' -> ['0'..'9']
59 |         'w' -> ['0'..'9'] ++ '_' : ['a'..'z'] ++ ['A'..'Z']
60 |         's' -> "\9\32"
61 |         'D' -> notChars $ ['0'..'9']
62 |         'W' -> notChars $ ['0'..'9'] ++ '_' : ['a'..'z'] ++ ['A'..'Z']
63 |         'S' -> notChars "\9\32"
64 |         ch  -> [ch]
65 | 


--------------------------------------------------------------------------------