├── .github
    └── workflows
    │   └── haskell.yml
├── .gitignore
├── .travis.yml
├── ChangeLog.md
├── LICENSE
├── README.md
├── Setup.hs
├── bench
    ├── Bench.hs
    └── data
    │   ├── bible.txt
    │   ├── big.txt
    │   └── small-bible.txt
├── examples
    └── LogParser.hs
├── lens-regex-pcre.cabal
├── mac-build.sh
├── package.yaml
├── src
    └── Control
    │   └── Lens
    │       └── Regex
    │           ├── ByteString.hs
    │           └── Text.hs
├── stack.yaml
├── stack.yaml.lock
└── test
    ├── ByteString.hs
    ├── Spec.hs
    └── Text.hs


/.github/workflows/haskell.yml:
--------------------------------------------------------------------------------
 1 | name: Haskell CI
 2 | 
 3 | on: [push]
 4 | 
 5 | jobs:
 6 |   build:
 7 | 
 8 |     runs-on: ubuntu-latest
 9 | 
10 |     steps:
11 |     - uses: actions/checkout@v1
12 |     - uses: actions/setup-haskell@v1
13 |       with:
14 |         ghc-version: '8.6.5'
15 |         cabal-version: '3.0'
16 |     - name: Install dependencies
17 |       run: |
18 |         cabal update
19 |         cabal new-build --only-dependencies
20 |     - name: Build
21 |       run: |
22 |         cabal new-configure --enable-tests
23 |         cabal new-build
24 |     - name: Run tests
25 |       run: cabal new-test
26 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .stack-work/
2 | *~
3 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: haskell
 2 | dist: trusty
 3 | 
 4 | cache:
 5 |   directories:
 6 |     - $HOME/.cabal/store
 7 | 
 8 | cabal: "2.4"
 9 | 
10 | matrix:
11 |   include:
12 |     - ghc: "8.6.5"
13 |     - ghc: "8.4.4"
14 | 
15 | install:
16 |  - cabal --version
17 |  - ghc --version
18 | 
19 | script:
20 |  - cabal v2-update
21 |  - cabal v2-build
22 |  - cabal v2-test --enable-test
23 |  - cabal new-haddock
24 |  - cabal check
25 |  - cabal sdist   # tests that a source-distribution can be generated
26 | 
27 | # Check that the resulting source distribution can be built & installed.
28 | # If there are no other `.tar.gz` files in `dist`, this can be even simpler:
29 | # `cabal install --force-reinstalls dist/*-*.tar.gz`
30 | # - SRC_TGZ=$(cabal info . | awk '{print $2;exit}').tar.gz &&
31 | #   (cd dist && cabal install --force-reinstalls "$SRC_TGZ")
32 | 


--------------------------------------------------------------------------------
/ChangeLog.md:
--------------------------------------------------------------------------------
 1 | # Changelog for lens-regex-pcre
 2 | 
 3 | # 1.0.1.0
 4 | 
 5 | ### *BREAKING CHANGES*
 6 | This release fixes a pretty major bugs surrounding the behaviour of optional groups. It's **unlikely** but still possible that the change to grouping behaviour has changed the behaviour of your application, but most likely it just fixed some bugs you didn't know you had yet...
 7 | 
 8 | - Handle optional or alternated groups like `pcre-heavy`. This may change group behaviour on regular expressions which had groups with optional groups. E.g.:
 9 |     - `A(x)?(B)`
10 |     - `(A)|(B)|(C)`
11 | - Switch `groups` from `IndexedTraversal'` to `IndexedLens'`. Since all lenses are valid traversals this shouldn't cause any breakages.
12 | - Add `namedGroups` and `namedGroup`
13 | 
14 | # 1.0.0.0
15 | - Add `regexing` and `makeRegexTraversalQQ`
16 | - Replace `regex` traversal maker with `regex` QuasiQuoter
17 | - Split Control.Lens.Regex into Control.Lens.Regex.Text and Control.Lens.Regex.ByteString
18 | - Move regexBS to `Control.Lens.Regex.ByteString.regex`
19 | - Change whole implementation to use ByteString Builders for a massive speedup
20 | - Monomorphise `Match text` -> `Match`
21 | - Add groups to index of `match` and match to index of `groups` & `group`
22 | - Add `group = groups . ix n` for accessing a single group.
23 | 
24 | # 0.3.1.0 
25 | - Match -> Match text
26 | - Added regexBS to run regex on ByteStrings directly
27 | 
28 | # 0.3.0.0 
29 | - Unify `iregex` into `regex` as a single indexed traversal
30 | 
31 | # 0.2.0.0 
32 | - Unify `grouped`, `groups`, and `igroups` into just `groups` with optional traversal
33 | 
34 | # 0.1.1.0 
35 | - Adds `grouped` and `matchAndGroups`
36 | 
37 | # 0.1.0.1 
38 | - Doc fixes
39 | 
40 | # 0.1.0.0 
41 | - Initial Release
42 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright Author name here (c) 2019
 2 | 
 3 | All rights reserved.
 4 | 
 5 | Redistribution and use in source and binary forms, with or without
 6 | modification, are permitted provided that the following conditions are met:
 7 | 
 8 |     * Redistributions of source code must retain the above copyright
 9 |       notice, this list of conditions and the following disclaimer.
10 | 
11 |     * Redistributions in binary form must reproduce the above
12 |       copyright notice, this list of conditions and the following
13 |       disclaimer in the documentation and/or other materials provided
14 |       with the distribution.
15 | 
16 |     * Neither the name of Author name here nor the names of other
17 |       contributors may be used to endorse or promote products derived
18 |       from this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # lens-regex-pcre
  2 | 
  3 | [Hackage and Docs](http://hackage.haskell.org/package/lens-regex-pcre)
  4 | 
  5 | Based on `pcre-heavy`; so it should support any regexes or options which it supports.
  6 | 
  7 | Performance is [equal, sometimes **better**](https://github.com/ChrisPenner/lens-regex-pcre#performance) than that of `pcre-heavy` alone.
  8 | 
  9 | Which module should you use?
 10 | 
 11 | If you need unicode support, use `Control.Lens.Regex.Text`, if not then `Control.Lens.Regex.ByteString` is faster.
 12 | 
 13 | Working with Regexes in Haskell kinda sucks; it's tough to figure out which libs
 14 | to use, and even after you pick one it's tough to figure out how to use it; `lens-regex-pcre` hopes to replace most other solutions by being fast, easy to set up, more adaptable with a more consistent interface.
 15 | 
 16 | It helps that there are already HUNDREDS of combinators which interop with lenses :smile:.
 17 | 
 18 | As it turns out; regexes are a very lens-like tool; Traversals allow you to select
 19 | and alter zero or more matches; traversals can even carry indexes so you know which match or group you're working
 20 | on.
 21 | 
 22 | # Examples
 23 | 
 24 | ```haskell
 25 | import Control.Lens.Regex.Text
 26 | 
 27 | txt :: Text
 28 | txt = "raindrops on roses and whiskers on kittens"
 29 | 
 30 | -- Search
 31 | >>> has [regex|whisk|] txt
 32 | True
 33 | 
 34 | -- Get matches
 35 | >>> txt ^.. [regex|\br\w+|] . match
 36 | ["raindrops","roses"]
 37 | 
 38 | -- Edit matches
 39 | >>> txt & [regex|\br\w+|] . match %~ T.intersperse '-' . T.toUpper
 40 | "R-A-I-N-D-R-O-P-S on R-O-S-E-S and whiskers on kittens"
 41 | 
 42 | -- Get Groups
 43 | >>> txt ^.. [regex|(\w+) on (\w+)|] . groups
 44 | [["raindrops","roses"],["whiskers","kittens"]]
 45 | 
 46 | -- Edit Groups
 47 | >>> txt & [regex|(\w+) on (\w+)|] . groups %~ reverse
 48 | "roses on raindrops and kittens on whiskers"
 49 | 
 50 | -- Get the third match
 51 | >>> txt ^? [regex|\w+|] . index 2 . match
 52 | Just "roses"
 53 | 
 54 | -- Match integers, 'Read' them into ints, then sort them in-place
 55 | -- dumping them back into the source text afterwards.
 56 | >>> "Monday: 29, Tuesday: 99, Wednesday: 3" 
 57 |    & partsOf ([regex|\d+|] . match . unpacked . _Show @Int) %~ sort
 58 | "Monday: 3, Tuesday: 29, Wednesday: 99"
 59 | 
 60 | ```
 61 | 
 62 | Basically anything you want to do is possible somehow.
 63 | 
 64 | # Performance
 65 | 
 66 | See the [benchmarks](https://github.com/ChrisPenner/lens-regex-pcre/blob/master/bench/Bench.hs).
 67 | 
 68 | ## Summary
 69 | 
 70 | Caveat: I'm by no means a benchmarking expert; if you have tips on how to do this better I'm all ears!
 71 | 
 72 | * **Search** `lens-regex-pcre` is *marginally* slower than `pcre-heavy`, but well within acceptable margins (within 0.6%)
 73 | * **Replace** `lens-regex-pcre` beats `pcre-heavy` by ~10%
 74 | * **Modify** `pcre-heavy` doesn't support this operation at all, so I guess `lens-regex-pcre` wins here :)
 75 | 
 76 | How can it possibly be **faster** if it's based on `pcre-heavy`? `lens-regex-pcre` only uses `pcre-heavy` for **finding** the matches, not substitution/replacement. After that it splits the text into chunks and traverses over them with whichever operation you've chosen. The nature of this implementation makes it a lot easier to understand than imperative implementations of the same thing. This means it's pretty easy to make edits, and is also the reason we can support arbitrary traversals/actions. It was easy enough, so I went ahead and made the whole thing use ByteString Builders, which sped it up a lot. I suspect that `pcre-heavy` can benefit from the same optimization if anyone feels like back-porting it; it could be (almost) as nicely using simple `traverse` without any lenses. The whole thing is only about 25 LOC.
 77 | 
 78 | I'm neither a benchmarks nor stats person, so please open an issue if anything here seems fishy.
 79 | 
 80 | Without `pcre-light` and `pcre-heavy` this library wouldn't be possible, so huge thanks to all contributors!
 81 | 
 82 | Here are the benchmarks on my 2013 Macbook (2.6 Ghz i5)
 83 | 
 84 | ```haskell
 85 | benchmarking static pattern search/pcre-heavy ... took 20.78 s, total 56 iterations
 86 | benchmarked static pattern search/pcre-heavy
 87 | time                 375.3 ms   (372.0 ms .. 378.5 ms)
 88 |                      1.000 R²   (0.999 R² .. 1.000 R²)
 89 | mean                 378.1 ms   (376.4 ms .. 380.8 ms)
 90 | std dev              3.747 ms   (922.3 μs .. 5.609 ms)
 91 | 
 92 | benchmarking static pattern search/lens-regex-pcre ... took 20.79 s, total 56 iterations
 93 | benchmarked static pattern search/lens-regex-pcre
 94 | time                 379.5 ms   (376.2 ms .. 382.4 ms)
 95 |                      1.000 R²   (1.000 R² .. 1.000 R²)
 96 | mean                 377.3 ms   (376.5 ms .. 378.4 ms)
 97 | std dev              1.667 ms   (1.075 ms .. 2.461 ms)
 98 | 
 99 | benchmarking complex pattern search/pcre-heavy ... took 95.95 s, total 56 iterations
100 | benchmarked complex pattern search/pcre-heavy
101 | time                 1.741 s    (1.737 s .. 1.746 s)
102 |                      1.000 R²   (1.000 R² .. 1.000 R²)
103 | mean                 1.746 s    (1.744 s .. 1.749 s)
104 | std dev              4.499 ms   (3.186 ms .. 6.080 ms)
105 | 
106 | benchmarking complex pattern search/lens-regex-pcre ... took 97.26 s, total 56 iterations
107 | benchmarked complex pattern search/lens-regex-pcre
108 | time                 1.809 s    (1.736 s .. 1.908 s)
109 |                      0.996 R²   (0.991 R² .. 1.000 R²)
110 | mean                 1.757 s    (1.742 s .. 1.810 s)
111 | std dev              42.83 ms   (11.51 ms .. 70.69 ms)
112 | 
113 | benchmarking simple replacement/pcre-heavy ... took 23.32 s, total 56 iterations
114 | benchmarked simple replacement/pcre-heavy
115 | time                 423.8 ms   (422.4 ms .. 425.3 ms)
116 |                      1.000 R²   (1.000 R² .. 1.000 R²)
117 | mean                 424.0 ms   (422.9 ms .. 426.2 ms)
118 | std dev              2.684 ms   (1.239 ms .. 4.270 ms)
119 | 
120 | benchmarking simple replacement/lens-regex-pcre ... took 20.84 s, total 56 iterations
121 | benchmarked simple replacement/lens-regex-pcre
122 | time                 382.8 ms   (374.3 ms .. 391.5 ms)
123 |                      0.999 R²   (0.999 R² .. 1.000 R²)
124 | mean                 378.2 ms   (376.3 ms .. 381.0 ms)
125 | std dev              3.794 ms   (2.577 ms .. 5.418 ms)
126 | 
127 | benchmarking complex replacement/pcre-heavy ... took 24.77 s, total 56 iterations
128 | benchmarked complex replacement/pcre-heavy
129 | time                 448.1 ms   (444.7 ms .. 450.0 ms)
130 |                      1.000 R²   (1.000 R² .. 1.000 R²)
131 | mean                 450.8 ms   (449.5 ms .. 453.9 ms)
132 | std dev              3.129 ms   (947.0 μs .. 4.841 ms)
133 | 
134 | benchmarking complex replacement/lens-regex-pcre ... took 21.99 s, total 56 iterations
135 | benchmarked complex replacement/lens-regex-pcre
136 | time                 399.9 ms   (398.4 ms .. 402.2 ms)
137 |                      1.000 R²   (1.000 R² .. 1.000 R²)
138 | mean                 399.6 ms   (399.0 ms .. 400.4 ms)
139 | std dev              1.135 ms   (826.2 μs .. 1.604 ms)
140 | 
141 | Benchmark lens-regex-pcre-bench: FINISH
142 | ```
143 | 
144 | # Behaviour
145 | 
146 | Precise Expected behaviour (and examples) can be found in the test suites:
147 | 
148 | * [ByteString tests](https://github.com/ChrisPenner/lens-regex-pcre/blob/master/test/ByteString.hs)
149 | * [Text tests](https://github.com/ChrisPenner/lens-regex-pcre/blob/master/test/Text.hs)
150 | 


--------------------------------------------------------------------------------
/Setup.hs:
--------------------------------------------------------------------------------
1 | import Distribution.Simple
2 | main = defaultMain
3 | 


--------------------------------------------------------------------------------
/bench/Bench.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE QuasiQuotes #-}
 2 | {-# LANGUAGE OverloadedStrings #-}
 3 | 
 4 | import Gauge.Benchmark
 5 | import Gauge.Main
 6 | import Data.ByteString as BS
 7 | import qualified Text.Regex.PCRE.Heavy as PCRE
 8 | import Control.Lens
 9 | import Control.Lens.Regex.ByteString
10 | 
11 | main :: IO ()
12 | main = do
13 |     srcFile <- BS.readFile "./bench/data/small-bible.txt"
14 |     defaultMain
15 |       [ bgroup "static pattern search"
16 |         [ bench "pcre-heavy" $ nf (heavySearch [PCRE.re|Moses|]) srcFile
17 |         , bench "lens-regex-pcre" $ nf (lensSearch [PCRE.re|Moses|]) srcFile
18 |         ]
19 |       , bgroup "complex pattern search"
20 |         [ bench "pcre-heavy" $ nf (heavySearch [PCRE.re|l\w+e|]) srcFile
21 |         , bench "lens-regex-pcre" $ nf (lensSearch [PCRE.re|l\w+e|]) srcFile
22 |         ]
23 |       , bgroup "simple replacement"
24 |         [ bench "pcre-heavy" $ nf (heavyReplace [PCRE.re|Moses|] "Jarvis") srcFile
25 |         , bench "lens-regex-pcre" $ nf (lensReplace [PCRE.re|Moses|] "Jarvis") srcFile
26 |         ]
27 |       , bgroup "complex replacement"
28 |         [ bench "pcre-heavy" $ nf (heavyReplace [PCRE.re|M\w*s\w*s|] "Jarvis") srcFile
29 |         , bench "lens-regex-pcre" $ nf (lensReplace [PCRE.re|M\w*s\w*s|] "Jarvis") srcFile
30 |         ]
31 |       ]
32 | 
33 | heavySearch :: Regex -> BS.ByteString -> [BS.ByteString]
34 | heavySearch pat src = fst <$> PCRE.scan pat src
35 | 
36 | lensSearch :: Regex -> BS.ByteString -> [BS.ByteString]
37 | lensSearch pat src = src ^.. regexing pat . match
38 | 
39 | heavyReplace :: Regex -> BS.ByteString -> BS.ByteString -> BS.ByteString
40 | heavyReplace pat replacement src = PCRE.gsub pat replacement src
41 | 
42 | lensReplace :: Regex -> BS.ByteString -> BS.ByteString -> BS.ByteString
43 | lensReplace pat replacement src = src & regexing pat . match .~ replacement
44 | 


--------------------------------------------------------------------------------
/examples/LogParser.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE OverloadedStrings #-}
 2 | {-# LANGUAGE QuasiQuotes #-}
 3 | module LogParser where
 4 | 
 5 | import Control.Lens
 6 | import Control.Lens.Regex
 7 | import Text.RawString.QQ
 8 | import Data.Text as T
 9 | import Text.Read
10 | 
11 | logs :: Text
12 | logs =[r|<Error> <App> Config-file not found
13 | <Error> <Kafka> No network access
14 | <Info> <App> Waldo logged in
15 | <Warn> <Kafka> 90% of disk space used
16 | <Info> <App> Carmen logged out
17 | <Error> <Kafka> Something really bad happened|]
18 | 
19 | -- define types to represent our log messages
20 | data LogLevel = Info | Warn | Error deriving (Show, Read, Eq)
21 | data LogService = Kafka | App deriving (Show, Read, Eq)
22 | data Log =
23 |     Log
24 |     { logLevel :: LogLevel
25 |     , logService :: LogService
26 |     , logText  :: Text
27 |     } deriving Show
28 | 
29 | -- Our parser is just a prism from a list of groups to our type!
30 | _Log :: Prism' [Text] Log
31 | _Log = prism' toGroups toLog
32 |   where
33 |     toGroups (Log level service msgText) = [pack (show level), pack (show service), msgText]
34 |     toLog [levelText, serviceText, msgText] = do
35 |         Log <$> readMaybe (unpack levelText)
36 |             <*> readMaybe (unpack serviceText)
37 |             <*> pure msgText
38 |     toLog _ = Nothing
39 | 
40 | logPattern :: Regex
41 | logPattern = [rx|<([\w]+)> <(\w+)> (.*)|]
42 | 
43 | -- Now you're ready to hack'n'slash!
44 | allInfos :: [Log]
45 | allInfos = logs ^.. regex logPattern . groups . _Log . filtered ((== Info) . logLevel)
46 | -- [ Log {logLevel = Info, logService = App, logText = "Waldo logged in"}
47 | -- , Log {logLevel = Info, logService = App, logText = "Carmen logged out"}]
48 | 
49 | allKafkaErrorMessages :: [Text]
50 | allKafkaErrorMessages =
51 |     logs
52 |     ^.. regex logPattern
53 |     . groups
54 |     . _Log
55 |     . filtered ((== Error) . logLevel)
56 |     . filtered ((== Kafka) . logService)
57 |     . to logText
58 | -- ["No network access","Something really bad happened"]
59 | 
60 | downGradeErrors :: Text
61 | downGradeErrors =
62 |     logs & regex logPattern . groups . _Log . filtered ((== Error) . logLevel) %~ toWarning
63 |   where
64 |     toWarning (Log _level service txt) = Log Warn service (T.toUpper txt)
65 | -- <Warn> <App> CONFIG-FILE NOT FOUND
66 | -- <Warn> <Kafka> NO NETWORK ACCESS
67 | -- <Info> <App> Waldo logged in
68 | -- <Warn> <Kafka> 90% of disk space used
69 | -- <Info> <App> Carmen logged out
70 | -- <Warn> <Kafka> SOMETHING REALLY BAD HAPPENED
71 | 


--------------------------------------------------------------------------------
/lens-regex-pcre.cabal:
--------------------------------------------------------------------------------
 1 | cabal-version: 1.12
 2 | 
 3 | -- This file has been generated from package.yaml by hpack version 0.37.0.
 4 | --
 5 | -- see: https://github.com/sol/hpack
 6 | --
 7 | -- hash: 6ab2b296761109d7cc7b2a86dd4ca15b111214086ceff217b38b72b7de9e5629
 8 | 
 9 | name:           lens-regex-pcre
10 | version:        1.1.1.0
11 | synopsis:       A lensy interface to regular expressions
12 | description:    Please see the README on GitHub at <https://github.com/ChrisPenner/lens-regex-pcre#readme>
13 | category:       Regex
14 | homepage:       https://github.com/ChrisPenner/lens-regex-pcre#readme
15 | bug-reports:    https://github.com/ChrisPenner/lens-regex-pcre/issues
16 | author:         Chris Penner
17 | maintainer:     christopher.penner@gmail.com
18 | copyright:      2019 Chris Penner
19 | license:        BSD3
20 | license-file:   LICENSE
21 | build-type:     Simple
22 | extra-source-files:
23 |     README.md
24 |     ChangeLog.md
25 | 
26 | source-repository head
27 |   type: git
28 |   location: https://github.com/ChrisPenner/lens-regex-pcre
29 | 
30 | library
31 |   exposed-modules:
32 |       Control.Lens.Regex.ByteString
33 |       Control.Lens.Regex.Text
34 |   other-modules:
35 |       Paths_lens_regex_pcre
36 |   hs-source-dirs:
37 |       src
38 |   ghc-options: -Wall
39 |   build-depends:
40 |       base >=4.7 && <5
41 |     , bytestring
42 |     , containers
43 |     , lens
44 |     , pcre-heavy
45 |     , pcre-light >=0.4.1.0
46 |     , template-haskell
47 |     , text
48 |   default-language: Haskell2010
49 | 
50 | test-suite lens-regex-pcre-test
51 |   type: exitcode-stdio-1.0
52 |   main-is: Spec.hs
53 |   other-modules:
54 |       ByteString
55 |       Text
56 |       Paths_lens_regex_pcre
57 |   hs-source-dirs:
58 |       test
59 |   ghc-options: -Wall -threaded -rtsopts -with-rtsopts=-N
60 |   build-depends:
61 |       base >=4.7 && <5
62 |     , bytestring
63 |     , containers
64 |     , hspec
65 |     , lens
66 |     , lens-regex-pcre
67 |     , pcre-heavy
68 |     , pcre-light >=0.4.1.0
69 |     , template-haskell
70 |     , text
71 |   default-language: Haskell2010
72 | 


--------------------------------------------------------------------------------
/mac-build.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | # Ensures we have all the correct C-libs in scope
4 | stack  build --fast --extra-include-dirs "$(brew --prefix)/include" --extra-lib-dirs "$(brew --prefix)/lib"
5 | 


--------------------------------------------------------------------------------
/package.yaml:
--------------------------------------------------------------------------------
 1 | name:                lens-regex-pcre
 2 | version:             1.1.2.0
 3 | github:              "ChrisPenner/lens-regex-pcre"
 4 | license:             BSD3
 5 | author:              "Chris Penner"
 6 | maintainer:          "christopher.penner@gmail.com"
 7 | copyright:           "2019 Chris Penner"
 8 | 
 9 | extra-source-files:
10 | - README.md
11 | - ChangeLog.md
12 | 
13 | # Metadata used when publishing your package
14 | synopsis:            A lensy interface to regular expressions
15 | category:            Regex
16 | 
17 | # To avoid duplicated efforts in documentation and dealing with the
18 | # complications of embedding Haddock markup inside cabal files, it is
19 | # common to point users to the README.md file.
20 | description:         Please see the README on GitHub at <https://github.com/ChrisPenner/lens-regex-pcre#readme>
21 | 
22 | dependencies:
23 | - base >= 4.7 && < 5
24 | - pcre-heavy
25 | - pcre-light >= 0.4.1.0
26 | - text
27 | - bytestring
28 | - lens
29 | - template-haskell
30 | - containers
31 | 
32 | ghc-options:
33 | - -Wall
34 | library:
35 |   source-dirs: src
36 | 
37 | tests:
38 |   lens-regex-pcre-test:
39 |     main:                Spec.hs
40 |     source-dirs:         test
41 |     ghc-options:
42 |     - -threaded
43 |     - -rtsopts
44 |     - -with-rtsopts=-N
45 |     dependencies:
46 |     - lens-regex-pcre
47 |     - hspec
48 | 
49 | # benchmarks:
50 | #   lens-regex-pcre-bench:
51 | #     main:                Bench.hs
52 | #     source-dirs:         bench
53 | #     ghc-options:
54 | #     - -threaded
55 | #     - -rtsopts
56 | #     - -with-rtsopts=-N
57 | #     dependencies:
58 | #     - lens-regex-pcre
59 | #     - gauge
60 | 


--------------------------------------------------------------------------------
/src/Control/Lens/Regex/ByteString.hs:
--------------------------------------------------------------------------------
  1 | {-|
  2 | Module      : Control.Lens.Regex.ByteString
  3 | Description : ByteString PCRE Regex library with a lensy interface.
  4 | Copyright   : (c) Chris Penner, 2019
  5 | License     : BSD3
  6 | -}
  7 | 
  8 | {-# LANGUAGE FlexibleContexts #-}
  9 | {-# LANGUAGE OverloadedStrings #-}
 10 | {-# LANGUAGE PartialTypeSignatures #-}
 11 | {-# LANGUAGE QuasiQuotes #-}
 12 | {-# LANGUAGE RankNTypes #-}
 13 | {-# LANGUAGE TemplateHaskell #-}
 14 | {-# LANGUAGE DataKinds #-}
 15 | {-# LANGUAGE UndecidableInstances #-}
 16 | {-# LANGUAGE TypeOperators #-}
 17 | {-# LANGUAGE TupleSections #-}
 18 | {-# LANGUAGE DerivingStrategies #-}
 19 | 
 20 | module Control.Lens.Regex.ByteString
 21 |     (
 22 |     -- * Basics
 23 |       regex
 24 |     , match
 25 |     , groups
 26 |     , group
 27 |     , namedGroups
 28 |     , namedGroup
 29 |     , matchAndGroups
 30 | 
 31 |     -- * Compiling regexes to Traversals
 32 |     , regexing
 33 |     , mkRegexTraversalQQ
 34 | 
 35 |     -- * Types
 36 |     , Match
 37 |     , PCRE.Regex
 38 |     ) where
 39 | 
 40 | import qualified Data.ByteString as BS
 41 | import qualified Data.ByteString.Lazy as BL
 42 | import qualified Data.ByteString.Builder as BS
 43 | import qualified Text.Regex.PCRE.Heavy as PCRE
 44 | import qualified Text.Regex.PCRE.Light as PCRE
 45 | import Control.Lens hiding (re)
 46 | import Data.Bifunctor
 47 | import qualified Language.Haskell.TH.Quote as TH
 48 | import qualified Language.Haskell.TH.Syntax as TH
 49 | import qualified Language.Haskell.TH as TH
 50 | import GHC.TypeLits
 51 | import qualified Data.Map as M
 52 | import Data.Tuple (swap)
 53 | 
 54 | -- $setup
 55 | -- >>> :set -XQuasiQuotes
 56 | -- >>> :set -XOverloadedStrings
 57 | -- >>> :set -XTypeApplications
 58 | -- >>> import qualified Data.ByteString.Char8 as Char8
 59 | -- >>> import Data.Char
 60 | -- >>> import Data.List hiding (group)
 61 | -- >>> import Data.ByteString.Lens
 62 | 
 63 | type MatchRange = (Int, Int)
 64 | type GroupRanges = [(Int, Int)]
 65 | 
 66 | unBuilder :: BS.Builder -> BS.ByteString
 67 | unBuilder = BL.toStrict . BS.toLazyByteString
 68 | 
 69 | building :: Iso' BS.Builder BS.ByteString
 70 | building = iso unBuilder BS.byteString
 71 | 
 72 | 
 73 | -- | Match represents an opaque regex match.
 74 | -- You can drill into it using 'match', 'groups', 'group', 'namedGroup', 'namedGroups' or 'matchAndGroups'
 75 | data Match =
 76 |     Match { _chunks    :: [Either BS.Builder BS.Builder]
 77 |           , _matchRegex :: PCRE.Regex
 78 |           }
 79 | 
 80 | instance Eq Match where
 81 |   a == b = (_matchRegex a == _matchRegex b)
 82 |     && (((bimap unBuilder unBuilder) <$> _chunks a) == ((bimap unBuilder unBuilder) <$> _chunks b))
 83 | 
 84 | instance Ord Match where
 85 |   compare a b = compare (_matchRegex a) (_matchRegex b)
 86 |     <> compare ((bimap unBuilder unBuilder) <$> _chunks a) ((bimap unBuilder unBuilder) <$> _chunks b)
 87 | 
 88 | 
 89 | 
 90 | makeLensesFor [("_chunks", "chunks")] ''Match
 91 | 
 92 | instance TypeError
 93 |   ('Text "You're trying to 'show' a raw 'Match' object."
 94 |    ':$$: 'Text "You likely missed adding a 'match' or 'groups' or 'group' call after your 'regex' call :)")
 95 |   => Show Match where
 96 |   show _ = "This is a raw Match object, did you miss a 'match' or 'groups' or 'group' call after your 'regex'?"
 97 | 
 98 | -- | Access all groups of a match as a list. Stashes the full match text as the index in case
 99 | -- you need it.
100 | --
101 | -- Changing the length of the list has behaviour similar to 'partsOf'.
102 | --
103 | -- Get all matched groups:
104 | --
105 | -- >>> "raindrops on roses and whiskers on kittens" ^.. [regex|(\w+) on (\w+)|] . groups
106 | -- [["raindrops","roses"],["whiskers","kittens"]]
107 | --
108 | -- You can access a specific group combining with 'ix', or just use 'group' instead
109 | --
110 | -- >>> "raindrops on roses and whiskers on kittens" ^.. [regex|(\w+) on (\w+)|] . groups .  ix 1
111 | -- ["roses","kittens"]
112 | --
113 | -- Editing groups:
114 | --
115 | -- >>> "raindrops on roses and whiskers on kittens" & [regex|(\w+) on (\w+)|] . groups .  ix 1 %~ Char8.map toUpper
116 | -- "raindrops on ROSES and whiskers on KITTENS"
117 | --
118 | -- Editing the list rearranges groups
119 | --
120 | -- >>> "raindrops on roses and whiskers on kittens" & [regex|(\w+) on (\w+)|] . groups %~ reverse
121 | -- "roses on raindrops and kittens on whiskers"
122 | --
123 | -- You can traverse the list to flatten out all groups
124 | --
125 | -- >>> "raindrops on roses and whiskers on kittens" ^.. [regex|(\w+) on (\w+)|] . groups . traversed
126 | -- ["raindrops","roses","whiskers","kittens"]
127 | --
128 | -- Use indexed helpers to access the full match when operating on a group.
129 | --
130 | -- This replaces each group with the full match text wrapped in parens:
131 | --
132 | -- >>> "one-two" & [regex|(\w+)-(\w+)|] . groups <. traversed %@~ \mtch grp -> grp <> ":(" <> mtch <> ")"
133 | -- "one:(one-two)-two:(one-two)"
134 | groups :: IndexedLens' BS.ByteString Match [BS.ByteString]
135 | groups = conjoined groupsT (reindexed (view match) selfIndex <. groupsT)
136 |     where
137 |       groupsT :: Lens' Match [BS.ByteString]
138 |       groupsT = chunks . partsOf (traversed . _Right . building)
139 | 
140 | -- | Access a specific group of a match. Numbering starts at 0.
141 | --
142 | -- Stashes the full match text as the index in case you need it.
143 | --
144 | -- See 'groups' for more info on grouping
145 | --
146 | -- >>> "key:value, a:b" ^.. [regex|(\w+):(\w+)|] . group 0
147 | -- ["key","a"]
148 | --
149 | -- >>> "key:value, a:b" ^.. [regex|(\w+):(\w+)|] . group 1
150 | -- ["value","b"]
151 | --
152 | -- >>> "key:value, a:b" & [regex|(\w+):(\w+)|] . group 1 %~ Char8.map toUpper
153 | -- "key:VALUE, a:B"
154 | --
155 | -- Replace the first capture group with the full match:
156 | --
157 | -- >>> "a, b" & [regex|(\w+), (\w+)|] . group 0 .@~ \i -> "(" <> i <> ")"
158 | -- "(a, b), b"
159 | group :: Int -> IndexedTraversal' BS.ByteString Match BS.ByteString
160 | group n = groups <. ix n
161 | 
162 | -- | Access all the named groups of a match as a 'M.Map'. Stashes the full match text as the index in case
163 | -- you need it.
164 | --
165 | -- Note that you can edit the groups through this lens, but the behaviour is undefined when editing inner elements of __nested__ groups.
166 | -- Behaviour is undefined if groups are removed from the map (so don't do that).
167 | --
168 | -- NOTE: There's currently some strange behaviour in pcre-heavy where trailing unmatched optional groups are omitted, I'm looking into getting that patched, but for now, note the difference in behaviour:
169 | --
170 | -- >>> "A" ^? [regex|(?<a>A)|(?<b>B)|] . namedGroups
171 | -- Just (fromList [("a","A")])
172 | --
173 | -- >>> "B" ^? [regex|(?<a>A)|(?<b>B)|] . namedGroups
174 | -- Just (fromList [("a",""),("b","B")])
175 | --
176 | -- Get all matched groups:
177 | --
178 | -- >>> "raindrops on roses and whiskers on kittens" ^.. [regex|(?<first>\w+) on (?<second>\w+)|] . namedGroups
179 | -- [fromList [("first","raindrops"),("second","roses")],fromList [("first","whiskers"),("second","kittens")]]
180 | --
181 | -- You can access a specific group combining with 'ix', or just use 'namedGroup' instead
182 | --
183 | -- >>> "raindrops on roses and whiskers on kittens" ^.. [regex|(?<first>\w+) on (?<second>\w+)|] . namedGroups .  ix "second"
184 | -- ["roses","kittens"]
185 | --
186 | -- Editing groups:
187 | --
188 | -- >>> "raindrops on roses and whiskers on kittens" & [regex|(?<first>\w+) on (?<second>\w+)|] . namedGroups . ix "second" %~ Char8.map toUpper
189 | -- "raindrops on ROSES and whiskers on KITTENS"
190 | --
191 | -- Use indexed helpers to access the full match when operating on a group.
192 | --
193 | -- This replaces the "first" group with the full match text wrapped in parens:
194 | --
195 | -- >>> "one-two" & [regex|(?<first>\w+)-(\w+)|] . namedGroups <. ix "first" %@~ \mtch grp -> grp <> ":(" <> mtch <> ")"
196 | -- "one:(one-two)-two"
197 | namedGroups :: IndexedLens' BS.ByteString Match (M.Map BS.ByteString BS.ByteString)
198 | namedGroups = conjoined stepOne (reindexed (view match) selfIndex <. stepOne)
199 |     where
200 |       -- stepOne :: Traversal' Match (M.Map BS.ByteString BS.ByteString)
201 |       stepOne :: Lens' Match (M.Map BS.ByteString BS.ByteString)
202 |       stepOne f m = m & (groups . zipT . converterT (_matchRegex m) . partsOf (traversed . _Right) . mapL) %%~ f
203 |       zipT :: Iso' [a]  [(Int, a)]
204 |       zipT = iso (zip [0..]) (fmap snd)
205 |       converterT :: PCRE.Regex -> Lens' [(Int, BS.ByteString)] [Either (Int, BS.ByteString) (BS.ByteString, BS.ByteString)]
206 |       converterT pattern f xs =
207 |           f (converter pattern xs) <&> itraversed %@~ \i l -> either id ((i,) . snd) l
208 |       converter :: PCRE.Regex -> [(Int, BS.ByteString)] -> [Either (Int, BS.ByteString) (BS.ByteString, BS.ByteString)]
209 |       converter pattern = fmap $ \(i, s) ->
210 |           case M.lookup i (names pattern) of
211 |               Nothing -> Left (i, s)
212 |               Just n -> Right (n, s)
213 |       mapL :: Lens' [(BS.ByteString, BS.ByteString)] (M.Map BS.ByteString BS.ByteString)
214 |       mapL = lens M.fromList setter
215 |         where
216 |           setter :: [(BS.ByteString, BS.ByteString)] -> M.Map BS.ByteString BS.ByteString -> [(BS.ByteString, BS.ByteString)]
217 |           setter xs m = xs <&> \(k, _) -> (k, M.findWithDefault "" k m)
218 |       names :: PCRE.Regex -> M.Map Int BS.ByteString
219 |       names pattern = M.fromList . fmap swap $ PCRE.captureNames pattern
220 | 
221 | -- | Access a specific named group of a match
222 | --
223 | -- See 'namedGroups' for caveats and more info.
224 | --
225 | -- Stashes the full match text as the index in case you need it.
226 | --
227 | -- >>> "key:value, a:b" ^.. [regex|(?<first>\w+):(?<second>\w+)|] . namedGroup "first"
228 | -- ["key","a"]
229 | --
230 | -- >>> "key:value, a:b" ^.. [regex|(?<first>\w+):(?<second>\w+)|] . namedGroup "second"
231 | -- ["value","b"]
232 | --
233 | -- >>> "key:value, a:b" & [regex|(?<first>\w+):(?<second>\w+)|] . namedGroup "second" %~ Char8.map toUpper
234 | -- "key:VALUE, a:B"
235 | --
236 | -- Replace the first capture group with the full match:
237 | --
238 | -- >>> "a, b" & [regex|(?<first>\w+), (?<second>\w+)|] . namedGroup "first" .@~ \i -> "(" <> i <> ")"
239 | -- "(a, b), b"
240 | namedGroup :: BS.ByteString -> IndexedTraversal' BS.ByteString Match BS.ByteString
241 | namedGroup name = namedGroups <. ix name
242 | 
243 | -- | Traverse each match
244 | --
245 | -- Stashes any matched groups into the index in case you need them.
246 | --
247 | --  Get a match if one exists:
248 | --
249 | -- >>> "find a needle in a haystack" ^? [regex|n..dle|] . match
250 | -- Just "needle"
251 | --
252 | --  Collect all matches
253 | --
254 | -- >>> "one _two_ three _four_" ^.. [regex|_\w+_|] . match
255 | -- ["_two_","_four_"]
256 | --
257 | -- You can edit the traversal to perform a regex replace/substitution
258 | --
259 | -- >>> "one _two_ three _four_" & [regex|_\w+_|] . match %~ Char8.map toUpper
260 | -- "one _TWO_ three _FOUR_"
261 | --
262 | -- Here we use the group matches stored in the index to form key-value pairs, replacing the entire match.
263 | --
264 | -- >>> "abc-def, ghi-jkl" & [regex|(\w+)-(\w+)|] . match %@~ \[k, v] _ -> "{" <> k <> ":" <> v <> "}"
265 | -- "{abc:def}, {ghi:jkl}"
266 | match :: IndexedTraversal' [BS.ByteString] Match BS.ByteString
267 | match = conjoined matchBS (reindexed (view groups) selfIndex <. matchBS)
268 |   where
269 |     matchBS :: Traversal' Match BS.ByteString
270 |     matchBS = chunks . matchT . building
271 |     matchT :: Traversal' [Either BS.Builder BS.Builder] BS.Builder
272 |     matchT f grps =
273 |         (:[]) . Right <$> f (grps ^. folded . chosen)
274 | 
275 | -- | Build a traversal from the provided 'PCRE.Regex', this is handy if you're QuasiQuoter
276 | -- averse, or if you already have a 'PCRE.Regex' object floating around.
277 | --
278 | -- Also see 'mkRegexTraversalQQ'
279 | regexing :: PCRE.Regex -> IndexedTraversal' Int BS.ByteString Match
280 | regexing pattern = conjoined (regexT pattern) (indexing (regexT pattern)) . asMatch
281 |   where
282 |     -- Unlawful iso, but since the Regex field of Match isn't exported it's fine.
283 |     asMatch :: Iso' [Either BS.Builder BS.Builder] Match
284 |     asMatch = iso to' from'
285 |     to' xs = Match xs pattern
286 |     from' (Match xs _) = xs
287 | 
288 | -- | Base regex traversal helper
289 | regexT :: PCRE.Regex -> Traversal' BS.ByteString [Either BS.Builder BS.Builder]
290 | regexT pattern f txt = unBuilder . collapseMatch <$> apply (splitAll txt matches)
291 |   where
292 |     matches :: [(MatchRange, GroupRanges)]
293 |     matches = PCRE.scanRanges pattern txt
294 |     collapseMatch :: [Either BS.Builder [Either BS.Builder BS.Builder]] -> BS.Builder
295 |     collapseMatch xs = xs ^. folded . beside id (traversed . chosen)
296 |     apply xs = xs & traversed . _Right %%~ f
297 | 
298 | -- | Collect both the match text AND all the matching groups
299 | --
300 | -- >>> "raindrops on roses and whiskers on kittens" ^.. [regex|(\w+) on (\w+)|] . matchAndGroups
301 | -- [("raindrops on roses",["raindrops","roses"]),("whiskers on kittens",["whiskers","kittens"])]
302 | matchAndGroups :: Getter Match (BS.ByteString, [BS.ByteString])
303 | matchAndGroups = to $ \m -> (m ^. match, m ^. groups)
304 | 
305 | -- | Builds a traversal over text using a Regex pattern
306 | --
307 | -- It's a 'TH.QuasiQuoter' which creates a Traversal out of the given regex string.
308 | -- It's equivalent to calling 'regexing' on a 'PCRE.Regex' created using the
309 | -- 'PCRE.re' QuasiQuoter.
310 | --
311 | -- The "real" type is:
312 | --
313 | -- > regex :: Regex -> IndexedTraversal' Int BS.ByteString Match
314 | --
315 | -- It's a traversal which selects 'Match'es; compose it with 'match' or 'groups'
316 | -- to get the relevant parts of your match.
317 | --
318 | -- >>> txt = "raindrops on roses and whiskers on kittens"
319 | --
320 | -- Search
321 | --
322 | -- >>> has ([regex|whisk|]) txt
323 | -- True
324 | --
325 | -- Get matches
326 | --
327 | -- >>> txt ^.. [regex|\br\w+|] . match
328 | -- ["raindrops","roses"]
329 | --
330 | -- Edit matches
331 | --
332 | -- >>> txt & [regex|\br\w+|] . match %~ Char8.intersperse '-' . Char8.map toUpper
333 | -- "R-A-I-N-D-R-O-P-S on R-O-S-E-S and whiskers on kittens"
334 | --
335 | -- Get Groups
336 | --
337 | -- >>> txt ^.. [regex|(\w+) on (\w+)|] . groups
338 | -- [["raindrops","roses"],["whiskers","kittens"]]
339 | --
340 | -- Edit Groups
341 | --
342 | -- >>> txt & [regex|(\w+) on (\w+)|] . groups %~ reverse
343 | -- "roses on raindrops and kittens on whiskers"
344 | --
345 | -- Get the third match
346 | --
347 | -- >>> txt ^? [regex|\w+|] . index 2 . match
348 | --Just "roses"
349 | --
350 | -- Edit matches
351 | --
352 | -- >>> txt & [regex|\br\w+|] . match %~ Char8.intersperse '-' . Char8.map toUpper
353 | -- "R-A-I-N-D-R-O-P-S on R-O-S-E-S and whiskers on kittens"
354 | --
355 | -- Get Groups
356 | --
357 | -- >>> txt ^.. [regex|(\w+) on (\w+)|] . groups
358 | -- [["raindrops","roses"],["whiskers","kittens"]]
359 | --
360 | -- Edit Groups
361 | --
362 | -- >>> txt & [regex|(\w+) on (\w+)|] . groups %~ reverse
363 | -- "roses on raindrops and kittens on whiskers"
364 | --
365 | -- Get the third match
366 | --
367 | -- >>> txt ^? [regex|\w+|] . index 2 . match
368 | -- Just "roses"
369 | --
370 | -- Match integers, 'Read' them into ints, then sort them in-place
371 | -- dumping them back into the source text afterwards.
372 | --
373 | -- >>> "Monday: 29, Tuesday: 99, Wednesday: 3" & partsOf ([regex|\d+|] . match . from packedChars . _Show @Int) %~ sort
374 | -- "Monday: 3, Tuesday: 29, Wednesday: 99"
375 | --
376 | -- To alter behaviour of the regex you may wish to pass 'PCRE.PCREOption's when compiling it.
377 | -- The default behaviour may seem strange in certain cases; e.g. it operates in 'single-line'
378 | -- mode. You can 'PCRE.compile' the 'PCRE.Regex' separately and add any options you like, then pass the resulting
379 | -- 'PCRE.Regex' into 'regex';
380 | -- Alternatively can make your own version of the QuasiQuoter with any options you want embedded
381 | -- by using 'PCRE.mkRegexQQ'.
382 | regex :: TH.QuasiQuoter
383 | regex = PCRE.re{TH.quoteExp=quoter}
384 |   where
385 |     quoter str = do
386 |         rgx <- TH.quoteExp PCRE.re str
387 |         regexExpr <- TH.varE 'regexing
388 |         return $ TH.AppE regexExpr rgx
389 | 
390 | -- | Build a QuasiQuoter just like 'regex' but with the provided 'PCRE.PCREOption' overrides.
391 | mkRegexTraversalQQ :: [PCRE.PCREOption] -> TH.QuasiQuoter
392 | mkRegexTraversalQQ opts = (PCRE.mkRegexQQ opts){TH.quoteExp=quoter}
393 |   where
394 |     quoter str = do
395 |         rgx <- TH.quoteExp (PCRE.mkRegexQQ opts) str
396 |         regexExpr <- TH.varE 'regexing
397 |         return $ TH.AppE regexExpr rgx
398 | 
399 | ---------------------------------------------------------------------------------------------
400 | 
401 | splitAll :: BS.ByteString -> [(MatchRange, GroupRanges)] -> [Either BS.Builder [Either BS.Builder BS.Builder]]
402 | splitAll txt matches = fmap (second (\(txt', (start,_), grps) -> groupSplit txt' start grps)) splitUp
403 |   where
404 |     splitUp = splits txt 0 matches
405 | 
406 | groupSplit :: BS.ByteString -> Int -> GroupRanges -> [Either BS.Builder BS.Builder]
407 | groupSplit txt _ [] = [Left $ BS.byteString txt]
408 | groupSplit txt offset ((-1, -1) : rest) = Right "" : groupSplit txt offset rest
409 | groupSplit txt offset ((grpStart, grpEnd) : rest) | offset == grpStart =
410 |     let (prefix, suffix) = BS.splitAt (grpEnd - offset) txt
411 |      in Right (BS.byteString prefix) : groupSplit suffix grpEnd rest
412 | groupSplit txt offset ((grpStart, grpEnd) : rest) =
413 |     let (prefix, suffix) = BS.splitAt (grpStart - offset) txt
414 |      in Left (BS.byteString prefix) : groupSplit suffix grpStart ((grpStart, grpEnd) : rest)
415 | 
416 | splits :: BS.ByteString -> Int -> [(MatchRange, GroupRanges)] -> [Either BS.Builder (BS.ByteString, MatchRange, GroupRanges)]
417 | -- No more matches left
418 | splits txt _ [] = [Left $ BS.byteString txt]
419 | -- We're positioned at a match
420 | splits txt offset (((start, end), grps) : rest) | offset == start =
421 |     let (prefix, suffix) = BS.splitAt (end - offset) txt
422 |      in (Right (prefix, (start, end), grps)) : splits suffix end rest
423 | -- jump to the next match
424 | splits txt offset matches@(((start, _), _) : _) =
425 |     let (prefix, suffix) = BS.splitAt (start - offset) txt
426 |      in (Left $ BS.byteString prefix) : splits suffix start matches
427 | 


--------------------------------------------------------------------------------
/src/Control/Lens/Regex/Text.hs:
--------------------------------------------------------------------------------
  1 | {-|
  2 | Module      : Control.Lens.Regex.Text
  3 | Description : Text PCRE Regex library with a lensy interface.
  4 | Copyright   : (c) Chris Penner, 2019
  5 | License     : BSD3
  6 | -}
  7 | 
  8 | {-# LANGUAGE FlexibleContexts #-}
  9 | {-# LANGUAGE OverloadedStrings #-}
 10 | {-# LANGUAGE PartialTypeSignatures #-}
 11 | {-# LANGUAGE QuasiQuotes #-}
 12 | {-# LANGUAGE RankNTypes #-}
 13 | {-# LANGUAGE TemplateHaskell #-}
 14 | 
 15 | module Control.Lens.Regex.Text
 16 |     (
 17 |     -- * Basics
 18 |       regex
 19 |     , match
 20 |     , groups
 21 |     , group
 22 |     , namedGroups
 23 |     , namedGroup
 24 |     , matchAndGroups
 25 | 
 26 |     -- * Compiling regexes to Traversals
 27 |     , regexing
 28 |     , mkRegexTraversalQQ
 29 | 
 30 |     -- * Types
 31 |     , RBS.Match
 32 |     , PCRE.Regex
 33 |     ) where
 34 | 
 35 | import qualified Data.Text as T
 36 | import qualified Data.Text.Encoding as T
 37 | import qualified Data.Text.Encoding.Error as T
 38 | import qualified Data.ByteString as BS
 39 | import qualified Text.Regex.PCRE.Heavy as PCRE
 40 | import qualified Data.Map as M
 41 | import Control.Lens hiding (re, matching)
 42 | import qualified Language.Haskell.TH as TH
 43 | import qualified Language.Haskell.TH.Quote as TH
 44 | 
 45 | import qualified Control.Lens.Regex.ByteString as RBS
 46 | 
 47 | -- $setup
 48 | -- >>> :set -XQuasiQuotes
 49 | -- >>> :set -XOverloadedStrings
 50 | -- >>> :set -XTypeApplications
 51 | -- >>> import Data.Text.Lens (unpacked)
 52 | -- >>> import qualified Data.Text as T
 53 | -- >>> import Data.List (sort)
 54 | 
 55 | utf8 :: Iso' T.Text BS.ByteString
 56 | utf8 = iso T.encodeUtf8 (T.decodeUtf8With T.lenientDecode)
 57 | 
 58 | -- | Builds a traversal over text using a Regex pattern
 59 | --
 60 | -- It's a 'TH.QuasiQuoter' which creates a Traversal out of the given regex string.
 61 | -- It's equivalent to calling 'regexing' on a 'PCRE.Regex' created using the
 62 | -- 'PCRE.re' QuasiQuoter.
 63 | --
 64 | -- The "real" type is:
 65 | --
 66 | -- > regex :: Regex -> IndexedTraversal' Int T.Text Match
 67 | --
 68 | -- It's a traversal which selects 'RBS.Match'es; compose it with 'match' or 'groups'
 69 | -- to get the relevant parts of your match.
 70 | --
 71 | -- >>> txt = "raindrops on roses and whiskers on kittens"
 72 | --
 73 | -- Search
 74 | --
 75 | -- >>> has ([regex|whisk|]) txt
 76 | -- True
 77 | --
 78 | -- Get matches
 79 | --
 80 | -- >>> txt ^.. [regex|\br\w+|] . match
 81 | -- ["raindrops","roses"]
 82 | --
 83 | -- Edit matches
 84 | --
 85 | -- >>> txt & [regex|\br\w+|] . match %~ T.intersperse '-' . T.toUpper
 86 | -- "R-A-I-N-D-R-O-P-S on R-O-S-E-S and whiskers on kittens"
 87 | --
 88 | -- Get Groups
 89 | --
 90 | -- >>> txt ^.. [regex|(\w+) on (\w+)|] . groups
 91 | -- [["raindrops","roses"],["whiskers","kittens"]]
 92 | --
 93 | -- Edit Groups
 94 | --
 95 | -- >>> txt & [regex|(\w+) on (\w+)|] . groups %~ reverse
 96 | -- "roses on raindrops and kittens on whiskers"
 97 | --
 98 | -- Get the third match
 99 | --
100 | -- >>> txt ^? [regex|\w+|] . index 2 . match
101 | --Just "roses"
102 | --
103 | -- Edit matches
104 | --
105 | -- >>> txt & [regex|\br\w+|] . match %~ T.intersperse '-' . T.toUpper
106 | -- "R-A-I-N-D-R-O-P-S on R-O-S-E-S and whiskers on kittens"
107 | --
108 | -- Get Groups
109 | --
110 | -- >>> txt ^.. [regex|(\w+) on (\w+)|] . groups
111 | -- [["raindrops","roses"],["whiskers","kittens"]]
112 | --
113 | -- Edit Groups
114 | --
115 | -- >>> txt & [regex|(\w+) on (\w+)|] . groups %~ reverse
116 | -- "roses on raindrops and kittens on whiskers"
117 | --
118 | -- Get the third match
119 | --
120 | -- >>> txt ^? [regex|\w+|] . index 2 . match
121 | -- Just "roses"
122 | --
123 | -- Match integers, 'Read' them into ints, then sort them in-place
124 | -- dumping them back into the source text afterwards.
125 | --
126 | -- >>> "Monday: 29, Tuesday: 99, Wednesday: 3" & partsOf ([regex|\d+|] . match . unpacked . _Show @Int) %~ sort
127 | -- "Monday: 3, Tuesday: 29, Wednesday: 99"
128 | --
129 | -- To alter behaviour of the regex you may wish to pass 'PCRE.PCREOption's when compiling it.
130 | -- The default behaviour may seem strange in certain cases; e.g. it operates in 'single-line'
131 | -- mode. You can 'PCRE.compile' the 'PCRE.Regex' separately and add any options you like, then pass the resulting
132 | -- 'PCRE.Regex' into 'regex';
133 | -- Alternatively can make your own version of the QuasiQuoter with any options you want embedded
134 | -- by using 'PCRE.mkRegexQQ'.
135 | -- regex :: Regex -> IndexedTraversal' Int T.Text RBS.Match
136 | regex :: TH.QuasiQuoter
137 | regex = PCRE.re{TH.quoteExp=quoter}
138 |   where
139 |     quoter str = do
140 |         rgx <- TH.quoteExp PCRE.re str
141 |         regexExpr <- TH.varE 'regexing
142 |         return $ TH.AppE regexExpr rgx
143 | 
144 | -- | Build a QuasiQuoter just like 'regex' but with the provided 'PCRE.PCREOption' overrides.
145 | mkRegexTraversalQQ :: [PCRE.PCREOption] -> TH.QuasiQuoter
146 | mkRegexTraversalQQ opts = (PCRE.mkRegexQQ opts){TH.quoteExp=quoter}
147 |   where
148 |     quoter str = do
149 |         rgx <- TH.quoteExp (PCRE.mkRegexQQ opts) str
150 |         regexExpr <- TH.varE 'regexing
151 |         return $ TH.AppE regexExpr rgx
152 | 
153 | -- | Build a traversal from the provided 'PCRE.Regex', this is handy if you're QuasiQuoter
154 | -- averse, or if you already have a 'PCRE.Regex' object floating around.
155 | --
156 | -- Also see 'mkRegexTraversalQQ'
157 | regexing :: PCRE.Regex -> IndexedTraversal' Int T.Text RBS.Match
158 | regexing pat = utf8 . RBS.regexing pat
159 | 
160 | -- | Access all groups of a match as a list. Also keeps full match text as the index in case
161 | -- you need it.
162 | --
163 | -- Note that you can edit the groups through this lens,
164 | -- Changing the length of the list has behaviour similar to 'partsOf'.
165 | --
166 | -- Get all matched groups:
167 | --
168 | -- >>> "raindrops on roses and whiskers on kittens" ^.. [regex|(\w+) on (\w+)|] . groups
169 | -- [["raindrops","roses"],["whiskers","kittens"]]
170 | --
171 | -- You can access a specific group combining with 'ix', or just use 'group' instead
172 | --
173 | -- >>> "raindrops on roses and whiskers on kittens" ^.. [regex|(\w+) on (\w+)|] . groups .  ix 1
174 | -- ["roses","kittens"]
175 | --
176 | -- Editing groups:
177 | --
178 | -- >>> "raindrops on roses and whiskers on kittens" & [regex|(\w+) on (\w+)|] . groups .  ix 1 %~ T.toUpper
179 | -- "raindrops on ROSES and whiskers on KITTENS"
180 | --
181 | -- Editing the list rearranges groups
182 | --
183 | -- >>> "raindrops on roses and whiskers on kittens" & [regex|(\w+) on (\w+)|] . groups %~ Prelude.reverse
184 | -- "roses on raindrops and kittens on whiskers"
185 | --
186 | -- You can traverse the list to flatten out all groups
187 | --
188 | -- >>> "raindrops on roses and whiskers on kittens" ^.. [regex|(\w+) on (\w+)|] . groups . traversed
189 | -- ["raindrops","roses","whiskers","kittens"]
190 | --
191 | -- This replaces each group with the full match text wrapped in parens:
192 | --
193 | -- >>> "one-two" & [regex|(\w+)-(\w+)|] . groups <. traversed %@~ \mtch grp -> grp <> ":(" <> mtch <> ")"
194 | -- "one:(one-two)-two:(one-two)"
195 | groups :: IndexedLens' T.Text RBS.Match [T.Text]
196 | groups = reindexed (view $ from utf8) RBS.groups <. mapping (from utf8)
197 | 
198 | -- | Access a specific group of a match. Numbering starts at 0.
199 | --
200 | -- Stashes the full match text as the index in case you need it.
201 | --
202 | -- See 'groups' for more info on grouping
203 | --
204 | -- >>> "key:value, a:b" ^.. [regex|(\w+):(\w+)|] . group 0
205 | -- ["key","a"]
206 | --
207 | -- >>> "key:value, a:b" ^.. [regex|(\w+):(\w+)|] . group 1
208 | -- ["value","b"]
209 | --
210 | -- >>> "key:value, a:b" & [regex|(\w+):(\w+)|] . group 1 %~ T.toUpper
211 | -- "key:VALUE, a:B"
212 | --
213 | -- >>> "key:value, a:b" & [regex|(\w+):(\w+)|] . group 1 %~ T.toUpper
214 | -- "key:VALUE, a:B"
215 | --
216 | -- Replace the first capture group with the full match:
217 | --
218 | -- >>> "a, b" & [regex|(\w+), (\w+)|] . group 0 .@~ \i -> "(" <> i <> ")"
219 | -- "(a, b), b"
220 | group :: Int -> IndexedTraversal' T.Text RBS.Match T.Text
221 | group n = groups <. ix n
222 | 
223 | -- | Access all the named groups of a match as a 'M.Map'. Stashes the full match text as the index in case
224 | -- you need it.
225 | --
226 | -- Note that you can edit the groups through this lens, but the behaviour is undefined when editing inner elements of __nested__ groups.
227 | -- Behaviour is undefined if groups are removed from the map (so don't do that).
228 | --
229 | -- NOTE: There's currently some strange behaviour in pcre-heavy where trailing unmatched optional groups are omitted, I'm looking into getting that patched, but for now, note the difference in behaviour:
230 | --
231 | -- >>> "A" ^? [regex|(?<a>A)|(?<b>B)|] . namedGroups
232 | -- Just (fromList [("a","A")])
233 | --
234 | -- >>> "B" ^? [regex|(?<a>A)|(?<b>B)|] . namedGroups
235 | -- Just (fromList [("a",""),("b","B")])
236 | --
237 | -- Get all matched groups:
238 | --
239 | -- >>> "raindrops on roses and whiskers on kittens" ^.. [regex|(?<first>\w+) on (?<second>\w+)|] . namedGroups
240 | -- [fromList [("first","raindrops"),("second","roses")],fromList [("first","whiskers"),("second","kittens")]]
241 | --
242 | -- You can access a specific group combining with 'ix', or just use 'namedGroup' instead
243 | --
244 | -- >>> "raindrops on roses and whiskers on kittens" ^.. [regex|(?<first>\w+) on (?<second>\w+)|] . namedGroups .  ix "second"
245 | -- ["roses","kittens"]
246 | --
247 | -- Editing groups:
248 | --
249 | -- >>> "raindrops on roses and whiskers on kittens" & [regex|(?<first>\w+) on (?<second>\w+)|] . namedGroups . ix "second" %~ T.toUpper
250 | -- "raindrops on ROSES and whiskers on KITTENS"
251 | --
252 | -- Use indexed helpers to access the full match when operating on a group.
253 | --
254 | -- This replaces the "first" group with the full match text wrapped in parens:
255 | --
256 | -- >>> "one-two" & [regex|(?<first>\w+)-(\w+)|] . namedGroups <. ix "first" %@~ \mtch grp -> grp <> ":(" <> mtch <> ")"
257 | -- "one:(one-two)-two"
258 | namedGroups :: IndexedLens' T.Text RBS.Match (M.Map T.Text T.Text)
259 | namedGroups = reindexed (view $ from utf8) RBS.namedGroups <. mapAsTxt
260 |   where
261 |     mapAsTxt :: Iso' (M.Map BS.ByteString BS.ByteString) (M.Map T.Text T.Text)
262 |     mapAsTxt = iso (M.mapKeys (review utf8)) (M.mapKeys (view utf8)) . mapping (from utf8)
263 | 
264 | -- | Access a specific named group of a match
265 | --
266 | -- See 'namedGroups' for caveats and more info.
267 | --
268 | -- Stashes the full match text as the index in case you need it.
269 | --
270 | -- >>> "key:value, a:b" ^.. [regex|(?<first>\w+):(?<second>\w+)|] . namedGroup "first"
271 | -- ["key","a"]
272 | --
273 | -- >>> "key:value, a:b" ^.. [regex|(?<first>\w+):(?<second>\w+)|] . namedGroup "second"
274 | -- ["value","b"]
275 | --
276 | -- >>> "key:value, a:b" & [regex|(?<first>\w+):(?<second>\w+)|] . namedGroup "second" %~ T.toUpper
277 | -- "key:VALUE, a:B"
278 | --
279 | -- Replace the first capture group with the full match:
280 | --
281 | -- >>> "a, b" & [regex|(?<first>\w+), (?<second>\w+)|] . namedGroup "first" .@~ \i -> "(" <> i <> ")"
282 | -- "(a, b), b"
283 | namedGroup :: T.Text -> IndexedTraversal' T.Text RBS.Match T.Text
284 | namedGroup name = namedGroups <. ix name
285 | 
286 | -- | Traverse each match
287 | --
288 | -- Stashes any matched groups into the index in case you need them.
289 | --
290 | --  Get a match if one exists:
291 | --
292 | -- >>> "find a needle in a haystack" ^? [regex|n..dle|] . match
293 | -- Just "needle"
294 | --
295 | --  Collect all matches
296 | --
297 | -- >>> "one _two_ three _four_" ^.. [regex|_\w+_|] . match
298 | -- ["_two_","_four_"]
299 | --
300 | -- You can edit the traversal to perform a regex replace/substitution
301 | --
302 | -- >>> "one _two_ three _four_" & [regex|_\w+_|] . match %~ T.toUpper
303 | -- "one _TWO_ three _FOUR_"
304 | --
305 | -- Here we use the group matches stored in the index to form key-value pairs, replacing the entire match.
306 | --
307 | -- >>> "abc-def, ghi-jkl" & [regex|(\w+)-(\w+)|] . match %@~ \[k, v] _ -> "{" <> k <> ":" <> v <> "}"
308 | -- "{abc:def}, {ghi:jkl}"
309 | match :: IndexedTraversal' [T.Text] RBS.Match T.Text
310 | match = reindexed (fmap (view $ from utf8)) RBS.match <. from utf8
311 | 
312 | -- | Collect both the match text AND all the matching groups
313 | --
314 | -- >>> "raindrops on roses and whiskers on kittens" ^.. [regex|(\w+) on (\w+)|] . matchAndGroups
315 | -- [("raindrops on roses",["raindrops","roses"]),("whiskers on kittens",["whiskers","kittens"])]
316 | matchAndGroups :: Getter RBS.Match (T.Text, [T.Text])
317 | matchAndGroups = to $ \m -> (m ^. match, m ^. groups)
318 | 


--------------------------------------------------------------------------------
/stack.yaml:
--------------------------------------------------------------------------------
1 | resolver: lts-18.28
2 | 
3 | packages:
4 | - .
5 | 
6 | extra-deps: []
7 | 


--------------------------------------------------------------------------------
/stack.yaml.lock:
--------------------------------------------------------------------------------
 1 | # This file was autogenerated by Stack.
 2 | # You should not edit this file by hand.
 3 | # For more information, please see the documentation at:
 4 | #   https://docs.haskellstack.org/en/stable/lock_files
 5 | 
 6 | packages: []
 7 | snapshots:
 8 | - completed:
 9 |     sha256: 428ec8d5ce932190d3cbe266b9eb3c175cd81e984babf876b64019e2cbe4ea68
10 |     size: 590100
11 |     url: https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/18/28.yaml
12 |   original: lts-18.28
13 | 


--------------------------------------------------------------------------------
/test/ByteString.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE QuasiQuotes #-}
  2 | {-# LANGUAGE OverloadedStrings #-}
  3 | module ByteString where
  4 | 
  5 | import Control.Lens
  6 | import Control.Lens.Regex.ByteString
  7 | import qualified Data.ByteString.Char8 as C8 hiding (index)
  8 | import Data.Char
  9 | import qualified Data.Map as M
 10 | import Test.Hspec
 11 | 
 12 | spec :: Spec
 13 | spec = do
 14 |     describe "regex" $ do
 15 |         xdescribe "pcre-heavy-compat" $ do
 16 |             it "should handle crazy nested groups" $ do
 17 |                 "abcdefhijklm" ^? [regex|^(?:a(b(c)))(?:d(e(f)))(?:h(i(j)))(?:k(l(m)))$|] . matchAndGroups
 18 |                     `shouldBe` Just ("abcdefhijklm", ["bc", "c", "ef", "f", "ij", "j", "lm", "m"])
 19 |         describe "match" $ do
 20 |             describe "getting" $ do
 21 |                 it "should find one match" $ do
 22 |                     "abc" ^.. [regex|b|] . match
 23 |                     `shouldBe` ["b"]
 24 | 
 25 |                 it "should find many matches" $ do
 26 |                     "a b c" ^.. [regex|\w|] . match
 27 |                     `shouldBe` ["a", "b", "c"]
 28 | 
 29 |                 it "should fold" $ do
 30 |                     "a b c" ^. [regex|\w|] . match
 31 |                     `shouldBe` "abc"
 32 | 
 33 |                 it "should match with a group" $ do
 34 |                     "a b c" ^.. [regex|(\w)|] . match
 35 |                     `shouldBe` ["a", "b", "c"]
 36 | 
 37 |                 it "should match with many groups" $ do
 38 |                     "a b c" ^.. [regex|(\w) (\w)|] . match
 39 |                     `shouldBe` ["a b"]
 40 | 
 41 |                 it "should be greedy when overlapping" $ do
 42 |                     "abc" ^.. [regex|\w+|] . match
 43 |                     `shouldBe`["abc"]
 44 | 
 45 |                 it "should respect lazy modifiers" $ do
 46 |                     "abc" ^.. [regex|\w+?|] . match
 47 |                     `shouldBe`["a", "b", "c"]
 48 | 
 49 |             describe "setting" $ do
 50 |                 it "should allow setting" $ do
 51 |                     ("one two three" & [regex|two|] . match .~ "new")
 52 |                     `shouldBe` "one new three"
 53 | 
 54 |                 it "should allow setting many" $ do
 55 |                     ("one <two> three" & [regex|\w+|] . match .~ "new")
 56 |                     `shouldBe` "new <new> new"
 57 | 
 58 |                 it "should allow mutating" $ do
 59 |                     ("one two three" & [regex|two|] . match %~ (<> "!!"). C8.map toUpper)
 60 |                     `shouldBe` "one TWO!! three"
 61 | 
 62 |                 it "should allow mutating many" $ do
 63 |                     ("one two three" & [regex|two|] . match %~ C8.map toUpper)
 64 |                     `shouldBe` "one TWO three"
 65 | 
 66 |         describe "indexed" $ do
 67 |             it "should allow folding with index" $ do
 68 |                 ("one two three" ^.. ([regex|\w+|] <. match) . withIndex)
 69 |                 `shouldBe` [(0, "one"), (1, "two"), (2, "three")]
 70 | 
 71 |             it "should allow getting with index" $ do
 72 |                 ("one two three" ^.. [regex|\w+|] . index 1 . match)
 73 |                 `shouldBe` ["two"]
 74 | 
 75 |             it "should allow setting with index" $ do
 76 |                 ("one two three" & [regex|\w+|] <. match .@~ C8.pack . show)
 77 |                 `shouldBe` "0 1 2"
 78 | 
 79 |             it "should allow mutating with index" $ do
 80 |                 ("one two three" & [regex|\w+|] <. match %@~ \i s -> (C8.pack $ show i) <> ": " <> s)
 81 |                 `shouldBe` "0: one 1: two 2: three"
 82 | 
 83 |     describe "groups" $ do
 84 |         describe "getting" $ do
 85 |             it "should get groups" $ do
 86 |                 "a b c" ^.. [regex|(\w)|] . groups
 87 |                 `shouldBe` [["a"], ["b"], ["c"]]
 88 | 
 89 |             it "should get multiple groups" $ do
 90 |                 "raindrops on roses and whiskers on kittens" ^.. [regex|(\w+) on (\w+)|] . groups
 91 |                 `shouldBe` [["raindrops","roses"],["whiskers","kittens"]]
 92 | 
 93 |             it "should allow getting a specific index" $ do
 94 |                 ("one two three four" ^.. [regex|(\w+) (\w+)|] . groups . ix 1)
 95 |                 `shouldBe` ["two", "four"]
 96 | 
 97 |             it "should handle weird group alternation" $ do
 98 |                 ("AB" ^.. [regex|A(x)?(B)|] . groups `shouldBe` [["", "B"]])
 99 |                 ("B" ^.. [regex|(A)|(B)|] . groups `shouldBe` [["", "B"]])
100 |                 -- This behaviour is consistent with pcre-heavy
101 |                 ("A" ^.. [regex|(A)|(B)|] . groups `shouldBe` [["A"]])
102 | 
103 |         describe "setting" $ do
104 |             it "should allow setting groups as a list" $ do
105 |                 ("one two three" & [regex|(\w+) (\w+)|] . groups .~ ["1", "2"])
106 |                 `shouldBe` "1 2 three"
107 | 
108 |             it "should allow editing when result list is the same length" $ do
109 |                 ("raindrops on roses and whiskers on kittens" & [regex|(\w+) on (\w+)|] . groups %~ reverse)
110 |                 `shouldBe` "roses on raindrops and kittens on whiskers"
111 | 
112 |     describe "group" $ do
113 |         it "should get a single group" $ do
114 |                 "a:b c:d" ^.. [regex|(\w):(\w)|] . group 1
115 |                 `shouldBe` ["b", "d"]
116 | 
117 |         it "should set a single group" $ do
118 |                 "a:b c:d" & [regex|(\w):(\w)|] . group 1 %~ C8.map toUpper
119 |                 `shouldBe` "a:B c:D"
120 | 
121 |         describe "traversed" $ do
122 |             it "should allow setting all group matches" $ do
123 |                 ("one two three" & [regex|(\w+) (\w+)|] . groups . traversed .~ "new")
124 |                 `shouldBe` "new new three"
125 | 
126 |             it "should allow mutating" $ do
127 |                 ("one two three four" & [regex|one (two) (three)|] . groups . traversed %~ (<> "!!") . C8.map toUpper)
128 |                 `shouldBe` "one TWO!! THREE!! four"
129 | 
130 |             it "should allow folding with index" $ do
131 |                 ("one two three four" ^.. [regex|(\w+) (\w+)|] . groups . traversed . withIndex)
132 |                 `shouldBe` [(0, "one"), (1, "two"), (0, "three"), (1, "four")]
133 | 
134 |             it "should allow setting with index" $ do
135 |                 ("one two three four" & [regex|(\w+) (\w+)|] . groups . traversed .@~ C8.pack . show)
136 |                 `shouldBe` "0 1 0 1"
137 | 
138 |             it "should allow mutating with index" $ do
139 |                 ("one two three four" & [regex|(\w+) (\w+)|] . groups . traversed %@~ \i s -> (C8.pack $ show i) <> ": " <> s)
140 |                 `shouldBe` "0: one 1: two 0: three 1: four"
141 | 
142 |             it "should compose indices with matches" $ do
143 |                 ("one two three four" ^.. ([regex|(\w+) (\w+)|] <.> groups . traversed) . withIndex)
144 |                 `shouldBe` [((0, 0), "one"), ((0, 1), "two"), ((1, 0), "three"), ((1, 1), "four")]
145 | 
146 |     describe "namedGroups" $ do
147 |         describe "getting" $ do
148 |             it "should get named groups" $ do
149 |                 "a b c" ^.. [regex|(?<mygroup>\w)|] . namedGroups
150 |                 `shouldBe` [M.fromList [("mygroup", "a")], M.fromList [("mygroup", "b")], M.fromList [("mygroup", "c")]]
151 | 
152 |             it "should get multiple named groups" $ do
153 |                 "raindrops on roses and whiskers on kittens" ^.. [regex|(?<one>\w+) on (?<two>\w+)|] . namedGroups
154 |                 `shouldBe` [M.fromList [("one", "raindrops"), ("two", "roses")], M.fromList [("one", "whiskers"), ("two", "kittens")]]
155 | 
156 |             it "should allow getting a specific named group" $ do
157 |                 ("raindrops on roses and whiskers on kittens" ^.. [regex|(?<one>\w+) on (?<two>\w+)|] . namedGroups . ix "two")
158 |                 `shouldBe` ["roses", "kittens"]
159 | 
160 |             it "should handle weird group alternation" $ do
161 |                 ("AB" ^.. [regex|A(?<opt>x)?(?<always>B)|] . namedGroups `shouldBe` [M.fromList [("opt", ""), ("always", "B")]])
162 |                 ("B" ^.. [regex|(?<a>A)|(?<b>B)|] . namedGroups `shouldBe` [M.fromList [("a", ""), ("b", "B")]])
163 |                 -- This is the behaviour of pcre-heavy, it's a bit unfortunate
164 |                 ("A" ^.. [regex|(?<a>A)|(?<b>B)|] . namedGroups `shouldBe` [M.fromList [("a", "A")]])
165 | 
166 |         describe "setting" $ do
167 |             it "should allow setting groups as a map" $ do
168 |                 ("one two three" & [regex|(?<a>\w+) (?<b>\w+)|] . namedGroups .~ M.fromList [("a", "1"), ("b", "2")])
169 |                 `shouldBe` "1 2 three"
170 | 
171 |     describe "namedGroup" $ do
172 |         it "should get a single named group" $ do
173 |                 "a:b c:d" ^.. [regex|(?<before>\w):(?<after>\w)|] . namedGroup "after"
174 |                 `shouldBe` ["b", "d"]
175 | 
176 |         it "should set a single group" $ do
177 |                 "a:b c:d" & [regex|(\w):(?<after>\w)|] . namedGroup "after" %~ C8.map toUpper
178 |                 `shouldBe` "a:B c:D"
179 | 
180 |     describe "matchAndGroups" $ do
181 |         it "should get match and groups" $ do
182 |             "raindrops on roses and whiskers on kittens" ^.. [regex|(\w+) on (\w+)|] . matchAndGroups
183 |             `shouldBe` [("raindrops on roses",["raindrops","roses"]),("whiskers on kittens",["whiskers","kittens"])]
184 | 
185 | 


--------------------------------------------------------------------------------
/test/Spec.hs:
--------------------------------------------------------------------------------
1 | import Test.Hspec
2 | import Text
3 | import ByteString
4 | 
5 | main :: IO ()
6 | main = hspec $ do
7 |     describe "text" Text.spec
8 |     describe "bytestring" ByteString.spec
9 | 


--------------------------------------------------------------------------------
/test/Text.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE QuasiQuotes #-}
  2 | {-# LANGUAGE OverloadedStrings #-}
  3 | module Text where
  4 | 
  5 | import Control.Lens
  6 | import Control.Lens.Regex.Text
  7 | import qualified Data.Text as T
  8 | import qualified Data.Map as M
  9 | import Test.Hspec
 10 | 
 11 | spec :: Spec
 12 | spec = do
 13 |     describe "regex" $ do
 14 |         describe "match" $ do
 15 |             describe "getting" $ do
 16 |                 it "should find one match" $ do
 17 |                     "abc" ^.. [regex|b|] . match
 18 |                     `shouldBe` ["b"]
 19 | 
 20 |                 it "should find many matches" $ do
 21 |                     "a b c" ^.. [regex|\w|] . match
 22 |                     `shouldBe` ["a", "b", "c"]
 23 | 
 24 |                 it "should fold" $ do
 25 |                     "a b c" ^. [regex|\w|] . match
 26 |                     `shouldBe` "abc"
 27 | 
 28 |                 it "should match with a group" $ do
 29 |                     "a b c" ^.. [regex|(\w)|] . match
 30 |                     `shouldBe` ["a", "b", "c"]
 31 | 
 32 |                 it "should match with many groups" $ do
 33 |                     "a b c" ^.. [regex|(\w) (\w)|] . match
 34 |                     `shouldBe` ["a b"]
 35 | 
 36 |                 it "should be greedy when overlapping" $ do
 37 |                     "abc" ^.. [regex|\w+|] . match
 38 |                     `shouldBe`["abc"]
 39 | 
 40 |                 it "should respect lazy modifiers" $ do
 41 |                     "abc" ^.. [regex|\w+?|] . match
 42 |                     `shouldBe`["a", "b", "c"]
 43 | 
 44 |                 it "should handle unicode in source text properly" $ do
 45 |                     "🍕 test 🍔" ^. [regex|test|] . match
 46 |                         `shouldBe` "test"
 47 |                     ("🍕 test 🍔" & [regex|🍔|] . match .~ "👻🙈")
 48 |                         `shouldBe` "🍕 test 👻🙈"
 49 | 
 50 |                 it "should handle unicode in patterns properly" $ do
 51 |                     "*🍕 test 🍔*" ^. [regex|🍕 \w+ 🍔|] . match
 52 |                     `shouldBe` "🍕 test 🍔"
 53 | 
 54 |             describe "setting" $ do
 55 |                 it "should allow setting" $ do
 56 |                     ("one two three" & [regex|two|] . match .~ "new")
 57 |                     `shouldBe` "one new three"
 58 | 
 59 |                 it "should allow setting many" $ do
 60 |                     ("one <two> three" & [regex|\w+|] . match .~ "new")
 61 |                     `shouldBe` "new <new> new"
 62 | 
 63 |                 it "should allow mutating" $ do
 64 |                     ("one two three" & [regex|two|] . match %~ (<> "!!"). T.toUpper)
 65 |                     `shouldBe` "one TWO!! three"
 66 | 
 67 |                 it "should allow mutating many" $ do
 68 |                     ("one two three" & [regex|two|] . match %~ T.toUpper)
 69 |                     `shouldBe` "one TWO three"
 70 | 
 71 |         describe "indexed" $ do
 72 |             it "should allow folding with index" $ do
 73 |                 ("one two three" ^.. ([regex|\w+|] <. match) . withIndex)
 74 |                 `shouldBe` [(0, "one"), (1, "two"), (2, "three")]
 75 | 
 76 |             it "should allow getting with index" $ do
 77 |                 ("one two three" ^.. [regex|\w+|] . index 1 . match)
 78 |                 `shouldBe` ["two"]
 79 | 
 80 |             it "should allow setting with index" $ do
 81 |                 ("one two three" & [regex|\w+|] <. match .@~ T.pack . show)
 82 |                 `shouldBe` "0 1 2"
 83 | 
 84 |             it "should allow mutating with index" $ do
 85 |                 ("one two three" & [regex|\w+|] <. match %@~ \i s -> (T.pack $ show i) <> ": " <> s)
 86 |                 `shouldBe` "0: one 1: two 2: three"
 87 | 
 88 |     describe "groups" $ do
 89 |         describe "getting" $ do
 90 |             it "should get groups" $ do
 91 |                 "a b c" ^.. [regex|(\w)|] . groups
 92 |                 `shouldBe` [["a"], ["b"], ["c"]]
 93 | 
 94 |             it "should get multiple groups" $ do
 95 |                 "raindrops on roses and whiskers on kittens" ^.. [regex|(\w+) on (\w+)|] . groups
 96 |                 `shouldBe` [["raindrops","roses"],["whiskers","kittens"]]
 97 | 
 98 |             it "should allow getting a specific index" $ do
 99 |                 ("one two three four" ^.. [regex|(\w+) (\w+)|] . groups . ix 1)
100 |                 `shouldBe` ["two", "four"]
101 | 
102 |             it "should handle weird group alternation" $ do
103 |                 ("AB" ^.. [regex|A(x)?(B)|] . groups `shouldBe` [["", "B"]])
104 |                 ("B" ^.. [regex|(A)|(B)|] . groups `shouldBe` [["", "B"]])
105 |                 -- This behaviour is consistent with pcre-heavy
106 |                 ("A" ^.. [regex|(A)|(B)|] . groups `shouldBe` [["A"]])
107 | 
108 |         describe "setting" $ do
109 |             it "should allow setting groups as a list" $ do
110 |                 ("one two three" & [regex|(\w+) (\w+)|] . groups .~ ["1", "2"])
111 |                 `shouldBe` "1 2 three"
112 | 
113 |             it "should allow editing when result list is the same length" $ do
114 |                 ("raindrops on roses and whiskers on kittens" & [regex|(\w+) on (\w+)|] . groups %~ reverse)
115 |                 `shouldBe` "roses on raindrops and kittens on whiskers"
116 | 
117 |     describe "group" $ do
118 |         it "should get a single group" $ do
119 |                 "a:b c:d" ^.. [regex|(\w):(\w)|] . group 1
120 |                 `shouldBe` ["b", "d"]
121 | 
122 |         it "should set a single group" $ do
123 |                 "a:b c:d" & [regex|(\w):(\w)|] . group 1 %~ T.toUpper
124 |                 `shouldBe` "a:B c:D"
125 | 
126 |         describe "traversed" $ do
127 |             it "should allow setting all group matches" $ do
128 |                 ("one two three" & [regex|(\w+) (\w+)|] . groups . traversed .~ "new")
129 |                 `shouldBe` "new new three"
130 | 
131 |             it "should allow mutating" $ do
132 |                 ("one two three four" & [regex|one (two) (three)|] . groups . traversed %~ (<> "!!") . T.toUpper)
133 |                 `shouldBe` "one TWO!! THREE!! four"
134 | 
135 |             it "should allow folding with index" $ do
136 |                 ("one two three four" ^.. [regex|(\w+) (\w+)|] . groups . traversed . withIndex)
137 |                 `shouldBe` [(0, "one"), (1, "two"), (0, "three"), (1, "four")]
138 | 
139 |             it "should allow setting with index" $ do
140 |                 ("one two three four" & [regex|(\w+) (\w+)|] . groups . traversed .@~ T.pack . show)
141 |                 `shouldBe` "0 1 0 1"
142 | 
143 |             it "should allow mutating with index" $ do
144 |                 ("one two three four" & [regex|(\w+) (\w+)|] . groups . traversed %@~ \i s -> (T.pack $ show i) <> ": " <> s)
145 |                 `shouldBe` "0: one 1: two 0: three 1: four"
146 | 
147 |             it "should compose indices with matches" $ do
148 |                 ("one two three four" ^.. ([regex|(\w+) (\w+)|] <.> groups . traversed) . withIndex)
149 |                 `shouldBe` [((0, 0), "one"), ((0, 1), "two"), ((1, 0), "three"), ((1, 1), "four")]
150 | 
151 |     describe "namedGroups" $ do
152 |         describe "getting" $ do
153 |             it "should get named groups" $ do
154 |                 "a b c" ^.. [regex|(?<mygroup>\w)|] . namedGroups
155 |                 `shouldBe` [M.fromList [("mygroup", "a")], M.fromList [("mygroup", "b")], M.fromList [("mygroup", "c")]]
156 | 
157 |             it "should get multiple named groups" $ do
158 |                 "raindrops on roses and whiskers on kittens" ^.. [regex|(?<one>\w+) on (?<two>\w+)|] . namedGroups
159 |                 `shouldBe` [M.fromList [("one", "raindrops"), ("two", "roses")], M.fromList [("one", "whiskers"), ("two", "kittens")]]
160 | 
161 |             it "should allow getting a specific named group" $ do
162 |                 ("raindrops on roses and whiskers on kittens" ^.. [regex|(?<one>\w+) on (?<two>\w+)|] . namedGroups . ix "two")
163 |                 `shouldBe` ["roses", "kittens"]
164 | 
165 |             it "should handle weird group alternation" $ do
166 |                 ("AB" ^.. [regex|A(?<opt>x)?(?<always>B)|] . namedGroups `shouldBe` [M.fromList [("opt", ""), ("always", "B")]])
167 |                 ("B" ^.. [regex|(?<a>A)|(?<b>B)|] . namedGroups `shouldBe` [M.fromList [("a", ""), ("b", "B")]])
168 |                 -- This is the behaviour of pcre-heavy, it's a bit unfortunate
169 |                 ("A" ^.. [regex|(?<a>A)|(?<b>B)|] . namedGroups `shouldBe` [M.fromList [("a", "A")]])
170 | 
171 |         describe "setting" $ do
172 |             it "should allow setting groups as a map" $ do
173 |                 ("one two three" & [regex|(?<a>\w+) (?<b>\w+)|] . namedGroups .~ M.fromList [("a", "1"), ("b", "2")])
174 |                 `shouldBe` "1 2 three"
175 | 
176 |     describe "namedGroup" $ do
177 |         it "should get a single named group" $ do
178 |                 "a:b c:d" ^.. [regex|(?<before>\w):(?<after>\w)|] . namedGroup "after"
179 |                 `shouldBe` ["b", "d"]
180 | 
181 |         it "should set a single group" $ do
182 |                 "a:b c:d" & [regex|(\w):(?<after>\w)|] . namedGroup "after" %~ T.toUpper
183 |                 `shouldBe` "a:B c:D"
184 | 
185 |     describe "matchAndGroups" $ do
186 |         it "should get match and groups" $ do
187 |             "raindrops on roses and whiskers on kittens" ^.. [regex|(\w+) on (\w+)|] . matchAndGroups
188 |             `shouldBe` [("raindrops on roses",["raindrops","roses"]),("whiskers on kittens",["whiskers","kittens"])]
189 | 
190 | 


--------------------------------------------------------------------------------