├── .gitignore
├── LICENCE
├── README.rst
├── circle.yml
├── nre.nimble
├── runtests.sh
├── src
    ├── .gitignore
    ├── nre.nim
    └── nre
    │   └── private
    │       └── util.nim
├── test
    ├── captures.nim
    ├── escape.nim
    ├── find.nim
    ├── init.nim
    ├── match.nim
    ├── misc.nim
    ├── optional_nonstrict.nim
    ├── replace.nim
    ├── split.nim
    └── testall.nim
└── web
    ├── logo.png
    └── logo.svg


/.gitignore:
--------------------------------------------------------------------------------
 1 | # all executables
 2 | *
 3 | !*/
 4 | !*.*
 5 | *.exe
 6 | 
 7 | # Wildcard patterns.
 8 | *.swp
 9 | nimcache
10 | 


--------------------------------------------------------------------------------
/LICENCE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2015 Flaviu Tamas
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the "Software"), to deal
 5 | in the Software without restriction, including without limitation the rights
 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | copies of the Software, and to permit persons to whom the Software is
 8 | furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in
11 | all copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 | THE SOFTWARE.
20 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
  1 | What is NRE?
  2 | ============
  3 | 
  4 | A regular expression library for Nim using PCRE to do the hard work. The top
  5 | priorities are ergonomics & ease of use.
  6 | 
  7 | For documentation on how to write patterns, there exists `the official PCRE
  8 | pattern documentation
  9 | <https://www.pcre.org/original/doc/html/pcrepattern.html>`_. You can also
 10 | search the internet for a wide variety of third-party documentation and
 11 | tools.
 12 | 
 13 | Notes
 14 | -----
 15 | 
 16 | Issues with toSeq
 17 | ~~~~~~~~~~~~~~~~~
 18 | 
 19 | If you love ``sequtils.toSeq`` we have bad news for you. This library doesn't
 20 | work with it due to documented compiler limitations. As a workaround, use this:
 21 | 
 22 | .. code-block:: nim
 23 | 
 24 |    import nre except toSeq
 25 | 
 26 | Licencing
 27 | ~~~~~~~~~
 28 | 
 29 | PCRE has `some additional terms`_ that you must agree to in order to use
 30 | this module.
 31 | 
 32 | .. _`some additional terms`: http://pcre.sourceforge.net/license.txt
 33 | 
 34 | Empty string splitting
 35 | ~~~~~~~~~~~~~~~~~~~~~~
 36 | 
 37 | This library handles splitting with an empty string, i.e. if the splitting
 38 | regex is empty (``""``), the same way as `Perl <https://ideone.com/dDMjmz>`__,
 39 | `Javascript <http://jsfiddle.net/xtcbxurg/>`__, and `Java
 40 | <https://ideone.com/hYJuJ5>`__.
 41 | 
 42 | This means that ``"123".split(re"") == @["1", "2", "3"]``, as opposed to the
 43 | Nim stdlib's ``@["123"]``
 44 | 
 45 | Types
 46 | -----
 47 | 
 48 | ``type Regex* = ref object``
 49 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 50 | Represents the pattern that things are matched against, constructed with
 51 | ``re(string)``. Examples: ``re"foo"``, ``re(r"(*ANYCRLF)(?x)foo #
 52 | comment".``
 53 | 
 54 | ``pattern: string``
 55 |     the string that was used to create the pattern. For details on how
 56 |     to write a pattern, please see `the official PCRE pattern
 57 |     documentation.
 58 |     <https://www.pcre.org/original/doc/html/pcrepattern.html>`_
 59 | 
 60 | ``captureCount: int``
 61 |     the number of captures that the pattern has.
 62 | 
 63 | ``captureNameId: Table[string, int]``
 64 |     a table from the capture names to their numeric id.
 65 | 
 66 | 
 67 | Options
 68 | .......
 69 | 
 70 | The following options may appear anywhere in the pattern, and they affect
 71 | the rest of it.
 72 | 
 73 | -  ``(?i)`` - case insensitive
 74 | -  ``(?m)`` - multi-line: ``^`` and ``$`` match the beginning and end of
 75 |    lines, not of the subject string
 76 | -  ``(?s)`` - ``.`` also matches newline (*dotall*)
 77 | -  ``(?U)`` - expressions are not greedy by default. ``?`` can be added
 78 |    to a qualifier to make it greedy
 79 | -  ``(?x)`` - whitespace and comments (``#``) are ignored (*extended*)
 80 | -  ``(?X)`` - character escapes without special meaning (``\w`` vs.
 81 |    ``\a``) are errors (*extra*)
 82 | 
 83 | One or a combination of these options may appear only at the beginning
 84 | of the pattern:
 85 | 
 86 | -  ``(*UTF8)`` - treat both the pattern and subject as UTF-8
 87 | -  ``(*UCP)`` - Unicode character properties; ``\w`` matches ``я``
 88 | -  ``(*U)`` - a combination of the two options above
 89 | -  ``(*FIRSTLINE*)`` - fails if there is not a match on the first line
 90 | -  ``(*NO_AUTO_CAPTURE)`` - turn off auto-capture for groups;
 91 |    ``(?<name>...)`` can be used to capture
 92 | -  ``(*CR)`` - newlines are separated by ``\r``
 93 | -  ``(*LF)`` - newlines are separated by ``\n`` (UNIX default)
 94 | -  ``(*CRLF)`` - newlines are separated by ``\r\n`` (Windows default)
 95 | -  ``(*ANYCRLF)`` - newlines are separated by any of the above
 96 | -  ``(*ANY)`` - newlines are separated by any of the above and Unicode
 97 |    newlines:
 98 | 
 99 |     single characters VT (vertical tab, U+000B), FF (form feed, U+000C),
100 |     NEL (next line, U+0085), LS (line separator, U+2028), and PS
101 |     (paragraph separator, U+2029). For the 8-bit library, the last two
102 |     are recognized only in UTF-8 mode.
103 |     —  man pcre
104 | 
105 | -  ``(*JAVASCRIPT_COMPAT)`` - JavaScript compatibility
106 | -  ``(*NO_STUDY)`` - turn off studying; study is enabled by default
107 | 
108 | For more details on the leading option groups, see the `Option
109 | Setting <http://man7.org/linux/man-pages/man3/pcresyntax.3.html#OPTION_SETTING>`_
110 | and the `Newline
111 | Convention <http://man7.org/linux/man-pages/man3/pcresyntax.3.html#NEWLINE_CONVENTION>`_
112 | sections of the `PCRE syntax
113 | manual <http://man7.org/linux/man-pages/man3/pcresyntax.3.html>`_.
114 | 
115 | Some of these options are not part of PCRE and are converted by nre
116 | into PCRE flags. These include ``NEVER_UTF``, ``ANCHORED``,
117 | ``DOLLAR_ENDONLY``, ``FIRSTLINE``, ``NO_AUTO_CAPTURE``,
118 | ``JAVASCRIPT_COMPAT``, ``U``, ``NO_STUDY``. In other PCRE wrappers, you
119 | will need to pass these as seperate flags to PCRE.
120 | 
121 | ``type RegexMatch* = object``
122 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
123 | Usually seen as Option[RegexMatch], it represents the result of an
124 | execution. On failure, it is none, on success, it is some.
125 | 
126 | ``pattern: Regex``
127 |     the pattern that is being matched
128 | 
129 | ``str: string``
130 |     the string that was matched against
131 | 
132 | ``captures[]: string``
133 |     the string value of whatever was captured at that id. If the value
134 |     is invalid, then behavior is undefined. If the id is ``-1``, then
135 |     the whole match is returned. If the given capture was not matched,
136 |     ``nil`` is returned.
137 | 
138 |     -  ``"abc".match(re"(\w)").get.captures[0] == "a"``
139 |     -  ``"abc".match(re"(?<letter>\w)").get.captures["letter"] == "a"``
140 |     -  ``"abc".match(re"(\w)\w").get.captures[-1] == "ab"``
141 | 
142 | ``captureBounds[]: HSlice[int, int]``
143 |     gets the bounds of the given capture according to the same rules as
144 |     the above. If the capture is not filled, then ``None`` is returned.
145 |     The bounds are both inclusive.
146 | 
147 |     -  ``"abc".match(re"(\w)").get.captureBounds[0] == 0 .. 0``
148 |     -  ``0 in "abc".match(re"(\w)").get.captureBounds == true``
149 |     -  ``"abc".match(re"").get.captureBounds[-1] == 0 .. -1``
150 |     -  ``"abc".match(re"abc").get.captureBounds[-1] == 0 .. 2``
151 | 
152 | ``match: string``
153 |     the full text of the match.
154 | 
155 | ``matchBounds: HSlice[int, int]``
156 |     the bounds of the match, as in ``captureBounds[]``
157 | 
158 | ``(captureBounds|captures).toTable``
159 |     returns a table with each named capture as a key.
160 | 
161 | ``(captureBounds|captures).toSeq``
162 |     returns all the captures by their number.
163 | 
164 | ``$: string``
165 |     same as ``match``
166 | 
167 | 
168 | ``type RegexInternalError* = ref object of RegexException``
169 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
170 | Internal error in the module, this probably means that there is a bug
171 | 
172 | 
173 | ``type InvalidUnicodeError* = ref object of RegexException``
174 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
175 | Thrown when matching fails due to invalid unicode in strings
176 | 
177 | 
178 | ``type SyntaxError* = ref object of RegexException``
179 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
180 | Thrown when there is a syntax error in the
181 | regular expression string passed in
182 | 
183 | 
184 | ``type StudyError* = ref object of RegexException``
185 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
186 | Thrown when studying the regular expression failes
187 | for whatever reason. The message contains the error
188 | code.
189 | 
190 | 
191 | Operations
192 | ----------
193 | 
194 | ``proc match*(str: string, pattern: Regex, start = 0, endpos = int.high): Option[RegexMatch]``
195 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
196 | Like ```find(...)`` <#proc-find>`__, but anchored to the start of the
197 | string. This means that ``"foo".match(re"f").isSome == true``, but
198 | ``"foo".match(re"o").isSome == false``.
199 | 
200 | 
201 | ``iterator findIter*(str: string, pattern: Regex, start = 0, endpos = int.high): RegexMatch``
202 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
203 | Works the same as ```find(...)`` <#proc-find>`__, but finds every
204 | non-overlapping match. ``"2222".findIter(re"22")`` is ``"22", "22"``, not
205 | ``"22", "22", "22"``.
206 | 
207 | Arguments are the same as ```find(...)`` <#proc-find>`__
208 | 
209 | Variants:
210 | 
211 | -  ``proc findAll(...)`` returns a ``seq[string]``
212 | 
213 | 
214 | ``proc find*(str: string, pattern: Regex, start = 0, endpos = int.high): Option[RegexMatch]``
215 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
216 | Finds the given pattern in the string between the end and start
217 | positions.
218 | 
219 | ``start``
220 |     The start point at which to start matching. ``|abc`` is ``0``;
221 |     ``a|bc`` is ``1``
222 | 
223 | ``endpos``
224 |     The maximum index for a match; ``int.high`` means the end of the
225 |     string, otherwise it’s an inclusive upper bound.
226 | 
227 | 
228 | ``proc split*(str: string, pattern: Regex, maxSplit = -1, start = 0): seq[string]``
229 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
230 | Splits the string with the given regex. This works according to the
231 | rules that Perl and Javascript use:
232 | 
233 | -  If the match is zero-width, then the string is still split:
234 |    ``"123".split(r"") == @["1", "2", "3"]``.
235 | 
236 | -  If the pattern has a capture in it, it is added after the string
237 |    split: ``"12".split(re"(\d)") == @["", "1", "", "2", ""]``.
238 | 
239 | -  If ``maxsplit != -1``, then the string will only be split
240 |    ``maxsplit - 1`` times. This means that there will be ``maxsplit``
241 |    strings in the output seq.
242 |    ``"1.2.3".split(re"\.", maxsplit = 2) == @["1", "2.3"]``
243 | 
244 | ``start`` behaves the same as in ```find(...)`` <#proc-find>`__.
245 | 
246 | 
247 | ``proc replace*(str: string, pattern: Regex, subproc: proc (match: RegexMatch): string): string``
248 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
249 | Replaces each match of Regex in the string with ``subproc``, which should
250 | never be or return ``nil``.
251 | 
252 | If ``subproc`` is a ``proc (RegexMatch): string``, then it is executed with
253 | each match and the return value is the replacement value.
254 | 
255 | If ``subproc`` is a ``proc (string): string``, then it is executed with the
256 | full text of the match and and the return value is the replacement
257 | value.
258 | 
259 | If ``subproc`` is a string, the syntax is as follows:
260 | 
261 | -  ``$$`` - literal ``$``
262 | -  ``$123`` - capture number ``123``
263 | -  ``$foo`` - named capture ``foo``
264 | -  ``${foo}`` - same as above
265 | -  ``$1$#`` - first and second captures
266 | -  ``$#`` - first capture
267 | -  ``$0`` - full match
268 | 
269 | If a given capture is missing, ``IndexError`` thrown for un-named captures
270 | and ``KeyError`` for named captures.
271 | 
272 | ``proc escapeRe*(str: string): string``
273 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
274 | Escapes the string so it doesn’t match any special characters.
275 | Incompatible with the Extra flag (``X``).
276 | 
277 | 
278 | 


--------------------------------------------------------------------------------
/circle.yml:
--------------------------------------------------------------------------------
 1 | dependencies:
 2 |   pre:
 3 |     - |
 4 |         if [ ! -x ~/nim/bin/nim ]; then
 5 |           sudo apt-get install gcc
 6 |           git clone -b devel --depth 1 git://github.com/araq/nim ~/nim/
 7 |           git clone -b devel --depth 1 git://github.com/nim-lang/csources ~/nim/csources/
 8 |           cd ~/nim/csources; sh build.sh; cd ..
 9 |           rm -rf csources
10 |           bin/nim c koch
11 |           ./koch boot -d:release
12 |           ln -fs ~/nim/bin/nim ~/bin/nim
13 |         else
14 |           cd ~/nim
15 |           git fetch origin
16 |           if ! git merge FETCH_HEAD | grep "Already up-to-date"; then
17 |             bin/nim c koch
18 |             ./koch boot -d:release
19 |           fi
20 |         fi
21 |     - |
22 |         if [ ! -x ~/.nimble/bin/nimble ]; then
23 |           git clone --depth 1 git://github.com/nim-lang/nimble ~/nimble/
24 |           cd ~/nimble/
25 |           nim c src/nimble.nim
26 |           ./src/nimble install
27 |           ln -fs ~/.nimble/bin/nimble ~/bin/nimble
28 |         fi
29 |     - nimble update
30 |     - nimble build
31 | 
32 |   cache_directories:
33 |     - "~/bin/"
34 |     - "~/nim/"
35 |     - "~/.nimble/"
36 | 
37 | test:
38 |   override:
39 |     - ./runtests.sh
40 | 


--------------------------------------------------------------------------------
/nre.nimble:
--------------------------------------------------------------------------------
 1 | [Package]
 2 | name        = "nre"
 3 | author      = "Flaviu Tamas"
 4 | version     = "2.0.2"
 5 | description = "Yet another PCRE library"
 6 | license     = "MIT"
 7 | srcDir      = "src"
 8 | 
 9 | [Deps]
10 | Requires: "nim >= 0.19.0"
11 | 


--------------------------------------------------------------------------------
/runtests.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | nim c --path:src -r --verbosity:0 --hints:off --linedir:on --debuginfo \
3 |   --stacktrace:on --linetrace:on "$@" ./test/testall.nim 
4 | 


--------------------------------------------------------------------------------
/src/.gitignore:
--------------------------------------------------------------------------------
 1 | # all executables
 2 | *
 3 | !*/
 4 | !*.*
 5 | *.exe
 6 | 
 7 | # Wildcard patterns.
 8 | *.swp
 9 | nimcache
10 | 


--------------------------------------------------------------------------------
/src/nre.nim:
--------------------------------------------------------------------------------
  1 | #
  2 | #            Nim's Runtime Library
  3 | #        (c) Copyright 2015 Nim Contributors
  4 | #
  5 | #    See the file "copying.txt", included in this
  6 | #    distribution, for details about the copyright.
  7 | #
  8 | 
  9 | 
 10 | from pcre import nil
 11 | import ./nre/private/util
 12 | import tables
 13 | from strutils import `%`
 14 | from math import ceil
 15 | import options
 16 | from unicode import runeLenAt
 17 | 
 18 | export options
 19 | 
 20 | 
 21 | ## What is NRE?
 22 | ## ============
 23 | ##
 24 | ## A regular expression library for Nim using PCRE to do the hard work. The top
 25 | ## priorities are ergonomics & ease of use.
 26 | ##
 27 | ## For documentation on how to write patterns, there exists `the official PCRE
 28 | ## pattern documentation
 29 | ## <https://www.pcre.org/original/doc/html/pcrepattern.html>`_. You can also
 30 | ## search the internet for a wide variety of third-party documentation and
 31 | ## tools.
 32 | ##
 33 | ## Notes
 34 | ## -----
 35 | ##
 36 | ## Issues with toSeq
 37 | ## ~~~~~~~~~~~~~~~~~
 38 | ##
 39 | ## If you love ``sequtils.toSeq`` we have bad news for you. This library doesn't
 40 | ## work with it due to documented compiler limitations. As a workaround, use this:
 41 | ##
 42 | ## .. code-block:: nim
 43 | ##
 44 | ##    import nre except toSeq
 45 | ##
 46 | ## For more details, see `nim-lang/Nim#7322 <https://github.com/nim-lang/Nim/issues/7322>`_
 47 | ##
 48 | ## Licencing
 49 | ## ~~~~~~~~~
 50 | ##
 51 | ## PCRE has `some additional terms`_ that you must agree to in order to use
 52 | ## this module.
 53 | ##
 54 | ## .. _`some additional terms`: http://pcre.sourceforge.net/license.txt
 55 | ##
 56 | ## Empty string splitting
 57 | ## ~~~~~~~~~~~~~~~~~~~~~~
 58 | ##
 59 | ## This library handles splitting with an empty string, i.e. if the splitting
 60 | ## regex is empty (``""``), the same way as `Perl <https://ideone.com/dDMjmz>`__,
 61 | ## `Javascript <http://jsfiddle.net/xtcbxurg/>`__, and `Java
 62 | ## <https://ideone.com/hYJuJ5>`__.
 63 | ##
 64 | ## This means that ``"123".split(re"") == @["1", "2", "3"]``, as opposed to the
 65 | ## Nim stdlib's ``@["123"]``
 66 | runnableExamples:
 67 |   let vowels = re"[aeoui]"
 68 | 
 69 |   let expectedResults = [
 70 |     1 .. 1,
 71 |     2 .. 2,
 72 |     4 .. 4,
 73 |     6 .. 6,
 74 |     7 .. 7,
 75 |   ]
 76 |   var i = 0
 77 |   for match in "moigagoo".findIter(vowels):
 78 |     doAssert match.matchBounds == expectedResults[i]
 79 |     inc i
 80 | 
 81 |   let firstVowel = "foo".find(vowels)
 82 |   let hasVowel = firstVowel.isSome()
 83 |   if hasVowel:
 84 |     let matchBounds = firstVowel.get().captureBounds[-1]
 85 |     doAssert matchBounds.a == 1
 86 | 
 87 | 
 88 | # Type definitions {{{
 89 | type
 90 |   Regex* = ref object
 91 |     ## Represents the pattern that things are matched against, constructed with
 92 |     ## ``re(string)``. Examples: ``re"foo"``, ``re(r"(*ANYCRLF)(?x)foo #
 93 |     ## comment".``
 94 |     ##
 95 |     ## ``pattern: string``
 96 |     ##     the string that was used to create the pattern. For details on how
 97 |     ##     to write a pattern, please see `the official PCRE pattern
 98 |     ##     documentation.
 99 |     ##     <https://www.pcre.org/original/doc/html/pcrepattern.html>`_
100 |     ##
101 |     ## ``captureCount: int``
102 |     ##     the number of captures that the pattern has.
103 |     ##
104 |     ## ``captureNameId: Table[string, int]``
105 |     ##     a table from the capture names to their numeric id.
106 |     ##
107 |     ##
108 |     ## Options
109 |     ## .......
110 |     ##
111 |     ## The following options may appear anywhere in the pattern, and they affect
112 |     ## the rest of it.
113 |     ##
114 |     ## -  ``(?i)`` - case insensitive
115 |     ## -  ``(?m)`` - multi-line: ``^`` and ``$`` match the beginning and end of
116 |     ##    lines, not of the subject string
117 |     ## -  ``(?s)`` - ``.`` also matches newline (*dotall*)
118 |     ## -  ``(?U)`` - expressions are not greedy by default. ``?`` can be added
119 |     ##    to a qualifier to make it greedy
120 |     ## -  ``(?x)`` - whitespace and comments (``#``) are ignored (*extended*)
121 |     ## -  ``(?X)`` - character escapes without special meaning (``\w`` vs.
122 |     ##    ``\a``) are errors (*extra*)
123 |     ##
124 |     ## One or a combination of these options may appear only at the beginning
125 |     ## of the pattern:
126 |     ##
127 |     ## -  ``(*UTF8)`` - treat both the pattern and subject as UTF-8
128 |     ## -  ``(*UCP)`` - Unicode character properties; ``\w`` matches ``я``
129 |     ## -  ``(*U)`` - a combination of the two options above
130 |     ## -  ``(*FIRSTLINE*)`` - fails if there is not a match on the first line
131 |     ## -  ``(*NO_AUTO_CAPTURE)`` - turn off auto-capture for groups;
132 |     ##    ``(?<name>...)`` can be used to capture
133 |     ## -  ``(*CR)`` - newlines are separated by ``\r``
134 |     ## -  ``(*LF)`` - newlines are separated by ``\n`` (UNIX default)
135 |     ## -  ``(*CRLF)`` - newlines are separated by ``\r\n`` (Windows default)
136 |     ## -  ``(*ANYCRLF)`` - newlines are separated by any of the above
137 |     ## -  ``(*ANY)`` - newlines are separated by any of the above and Unicode
138 |     ##    newlines:
139 |     ##
140 |     ##     single characters VT (vertical tab, U+000B), FF (form feed, U+000C),
141 |     ##     NEL (next line, U+0085), LS (line separator, U+2028), and PS
142 |     ##     (paragraph separator, U+2029). For the 8-bit library, the last two
143 |     ##     are recognized only in UTF-8 mode.
144 |     ##     —  man pcre
145 |     ##
146 |     ## -  ``(*JAVASCRIPT_COMPAT)`` - JavaScript compatibility
147 |     ## -  ``(*NO_STUDY)`` - turn off studying; study is enabled by default
148 |     ##
149 |     ## For more details on the leading option groups, see the `Option
150 |     ## Setting <http://man7.org/linux/man-pages/man3/pcresyntax.3.html#OPTION_SETTING>`_
151 |     ## and the `Newline
152 |     ## Convention <http://man7.org/linux/man-pages/man3/pcresyntax.3.html#NEWLINE_CONVENTION>`_
153 |     ## sections of the `PCRE syntax
154 |     ## manual <http://man7.org/linux/man-pages/man3/pcresyntax.3.html>`_.
155 |     ##
156 |     ## Some of these options are not part of PCRE and are converted by nre
157 |     ## into PCRE flags. These include ``NEVER_UTF``, ``ANCHORED``,
158 |     ## ``DOLLAR_ENDONLY``, ``FIRSTLINE``, ``NO_AUTO_CAPTURE``,
159 |     ## ``JAVASCRIPT_COMPAT``, ``U``, ``NO_STUDY``. In other PCRE wrappers, you
160 |     ## will need to pass these as seperate flags to PCRE.
161 |     pattern*: string  ## not nil
162 |     pcreObj: ptr pcre.Pcre  ## not nil
163 |     pcreExtra: ptr pcre.ExtraData  ## nil
164 | 
165 |     captureNameToId: Table[string, int]
166 | 
167 |   RegexMatch* = object
168 |     ## Usually seen as Option[RegexMatch], it represents the result of an
169 |     ## execution. On failure, it is none, on success, it is some.
170 |     ##
171 |     ## ``pattern: Regex``
172 |     ##     the pattern that is being matched
173 |     ##
174 |     ## ``str: string``
175 |     ##     the string that was matched against
176 |     ##
177 |     ## ``captures[]: string``
178 |     ##     the string value of whatever was captured at that id. If the value
179 |     ##     is invalid, then behavior is undefined. If the id is ``-1``, then
180 |     ##     the whole match is returned. If the given capture was not matched,
181 |     ##     ``nil`` is returned.
182 |     ##
183 |     ##     -  ``"abc".match(re"(\w)").get.captures[0] == "a"``
184 |     ##     -  ``"abc".match(re"(?<letter>\w)").get.captures["letter"] == "a"``
185 |     ##     -  ``"abc".match(re"(\w)\w").get.captures[-1] == "ab"``
186 |     ##
187 |     ## ``captureBounds[]: HSlice[int, int]``
188 |     ##     gets the bounds of the given capture according to the same rules as
189 |     ##     the above. If the capture is not filled, then ``None`` is returned.
190 |     ##     The bounds are both inclusive.
191 |     ##
192 |     ##     -  ``"abc".match(re"(\w)").get.captureBounds[0] == 0 .. 0``
193 |     ##     -  ``0 in "abc".match(re"(\w)").get.captureBounds == true``
194 |     ##     -  ``"abc".match(re"").get.captureBounds[-1] == 0 .. -1``
195 |     ##     -  ``"abc".match(re"abc").get.captureBounds[-1] == 0 .. 2``
196 |     ##
197 |     ## ``match: string``
198 |     ##     the full text of the match.
199 |     ##
200 |     ## ``matchBounds: HSlice[int, int]``
201 |     ##     the bounds of the match, as in ``captureBounds[]``
202 |     ##
203 |     ## ``(captureBounds|captures).toTable``
204 |     ##     returns a table with each named capture as a key.
205 |     ##
206 |     ## ``(captureBounds|captures).toSeq``
207 |     ##     returns all the captures by their number.
208 |     ##
209 |     ## ``$: string``
210 |     ##     same as ``match``
211 |     pattern*: Regex  ## The regex doing the matching.
212 |                      ## Not nil.
213 |     str*: string  ## The string that was matched against.
214 |                   ## Not nil.
215 |     pcreMatchBounds: seq[HSlice[cint, cint]] ## First item is the bounds of the match
216 |                                             ## Other items are the captures
217 |                                             ## `a` is inclusive start, `b` is exclusive end
218 | 
219 |   Captures* = distinct RegexMatch
220 |   CaptureBounds* = distinct RegexMatch
221 | 
222 |   RegexError* = ref object of Exception
223 | 
224 |   RegexInternalError* = ref object of RegexError
225 |     ## Internal error in the module, this probably means that there is a bug
226 | 
227 |   InvalidUnicodeError* = ref object of RegexError
228 |     ## Thrown when matching fails due to invalid unicode in strings
229 |     pos*: int  ## the location of the invalid unicode in bytes
230 | 
231 |   SyntaxError* = ref object of RegexError
232 |     ## Thrown when there is a syntax error in the
233 |     ## regular expression string passed in
234 |     pos*: int  ## the location of the syntax error in bytes
235 |     pattern*: string  ## the pattern that caused the problem
236 | 
237 |   StudyError* = ref object of RegexError
238 |     ## Thrown when studying the regular expression failes
239 |     ## for whatever reason. The message contains the error
240 |     ## code.
241 | 
242 | runnableExamples:
243 |     # This MUST be kept in sync with the examples in RegexMatch
244 |     doAssert "abc".match(re"(\w)").get.captures[0] == "a"
245 |     doAssert "abc".match(re"(?<letter>\w)").get.captures["letter"] == "a"
246 |     doAssert "abc".match(re"(\w)\w").get.captures[-1] == "ab"
247 | 
248 |     doAssert "abc".match(re"(\w)").get.captureBounds[0] == 0 .. 0
249 |     doAssert 0 in "abc".match(re"(\w)").get.captureBounds == true
250 |     doAssert "abc".match(re"").get.captureBounds[-1] == 0 .. -1
251 |     doAssert "abc".match(re"abc").get.captureBounds[-1] == 0 .. 2
252 | # }}}
253 | 
254 | proc getinfo[T](pattern: Regex, opt: cint): T =
255 |   let retcode = pcre.fullinfo(pattern.pcreObj, pattern.pcreExtra, opt, addr result)
256 | 
257 |   if retcode < 0:
258 |     # XXX Error message that doesn't expose implementation details
259 |     raise newException(FieldError, "Invalid getinfo for $1, errno $2" % [$opt, $retcode])
260 | 
261 | # Regex accessors {{{
262 | proc captureCount*(pattern: Regex): int =
263 |   return getinfo[cint](pattern, pcre.INFO_CAPTURECOUNT)
264 | 
265 | proc captureNameId*(pattern: Regex): Table[string, int] =
266 |   return pattern.captureNameToId
267 | 
268 | proc matchesCrLf(pattern: Regex): bool =
269 |   let flags = uint32(getinfo[culong](pattern, pcre.INFO_OPTIONS))
270 |   let newlineFlags = flags and (pcre.NEWLINE_CRLF or
271 |                                 pcre.NEWLINE_ANY or
272 |                                 pcre.NEWLINE_ANYCRLF)
273 |   if newLineFlags > 0u32:
274 |     return true
275 | 
276 |   # get flags from build config
277 |   var confFlags: cint
278 |   if pcre.config(pcre.CONFIG_NEWLINE, addr confFlags) != 0:
279 |     assert(false, "CONFIG_NEWLINE apparently got screwed up")
280 | 
281 |   case confFlags
282 |   of 13: return false
283 |   of 10: return false
284 |   of (13 shl 8) or 10: return true
285 |   of -2: return true
286 |   of -1: return true
287 |   else: return false
288 | # }}}
289 | 
290 | # Capture accessors {{{
291 | func captureBounds*(pattern: RegexMatch): CaptureBounds = return CaptureBounds(pattern)
292 | 
293 | func captures*(pattern: RegexMatch): Captures = return Captures(pattern)
294 | 
295 | func contains*(pattern: CaptureBounds, i: int): bool =
296 |   let pattern = RegexMatch(pattern)
297 |   pattern.pcreMatchBounds[i + 1].a != -1
298 | 
299 | func contains*(pattern: Captures, i: int): bool =
300 |   i in CaptureBounds(pattern)
301 | 
302 | func `[]`*(pattern: CaptureBounds, i: int): HSlice[int, int] =
303 |   let pattern = RegexMatch(pattern)
304 |   if not (i in pattern.captureBounds):
305 |     raise newException(IndexError, "Group '" & $i & "' was not captured")
306 | 
307 |   let bounds = pattern.pcreMatchBounds[i + 1]
308 |   int(bounds.a)..int(bounds.b-1)
309 | 
310 | func `[]`*(pattern: Captures, i: int): string =
311 |   let pattern = RegexMatch(pattern)
312 |   let bounds = pattern.captureBounds[i]
313 | 
314 |   pattern.str.substr(bounds.a, bounds.b)
315 | 
316 | func match*(pattern: RegexMatch): string =
317 |   return pattern.captures[-1]
318 | 
319 | func matchBounds*(pattern: RegexMatch): HSlice[int, int] =
320 |   return pattern.captureBounds[-1]
321 | 
322 | func contains*(pattern: CaptureBounds, name: string): bool =
323 |   let pattern = RegexMatch(pattern)
324 |   let nameToId = pattern.pattern.captureNameToId
325 |   if not (name in nameToId):
326 |       return false
327 |   nameToId[name] in pattern.captureBounds
328 | 
329 | func contains*(pattern: Captures, name: string): bool =
330 |   name in CaptureBounds(pattern)
331 | 
332 | func checkNamedCaptured(pattern: RegexMatch, name: string): void =
333 |   if not (name in pattern.captureBounds):
334 |     raise newException(KeyError, "Group '" & name & "' was not captured")
335 | 
336 | func `[]`*(pattern: CaptureBounds, name: string): HSlice[int, int] =
337 |   let pattern = RegexMatch(pattern)
338 |   checkNamedCaptured(pattern, name)
339 |   pattern.captureBounds[pattern.pattern.captureNameToId[name]]
340 | 
341 | func `[]`*(pattern: Captures, name: string): string =
342 |   let pattern = RegexMatch(pattern)
343 |   checkNamedCaptured(pattern, name)
344 |   return pattern.captures[pattern.pattern.captureNameToId[name]]
345 | 
346 | template toTableImpl() {.dirty.} =
347 |   for key in RegexMatch(pattern).pattern.captureNameId.keys:
348 |     if key in pattern:
349 |         result[key] = pattern[key]
350 | 
351 | func toTable*(pattern: Captures): Table[string, string] =
352 |   result = initTable[string, string]()
353 |   toTableImpl()
354 | 
355 | func toTable*(pattern: CaptureBounds): Table[string, HSlice[int, int]] =
356 |   result = initTable[string, HSlice[int, int]]()
357 |   toTableImpl()
358 | 
359 | template itemsImpl() {.dirty.} =
360 |   for i in 0 ..< RegexMatch(pattern).pattern.captureCount:
361 |     # done in this roundabout way to avoid multiple yields (potential code
362 |     # bloat)
363 |     let nextYieldVal = if i in pattern:
364 |       some(pattern[i])
365 |     else:
366 |       default
367 | 
368 |     yield nextYieldVal
369 | 
370 | iterator items*(pattern: CaptureBounds,
371 |                 default = none(HSlice[int, int])): Option[HSlice[int, int]] =
372 |   itemsImpl()
373 | 
374 | iterator items*(pattern: Captures,
375 |                 default: Option[string] = none(string)): Option[string] =
376 |   itemsImpl()
377 | 
378 | proc toSeq*(pattern: CaptureBounds,
379 |             default = none(HSlice[int, int])): seq[Option[HSlice[int, int]]] =
380 |   accumulateResult(pattern.items(default))
381 | 
382 | proc toSeq*(pattern: Captures,
383 |             default: Option[string] = none(string)): seq[Option[string]] =
384 |   accumulateResult(pattern.items(default))
385 | 
386 | proc `$`*(pattern: RegexMatch): string =
387 |   return pattern.captures[-1]
388 | 
389 | proc `==`*(a, b: Regex): bool =
390 |   if not a.isNil and not b.isNil:
391 |     return a.pattern == b.pattern and
392 |            a.pcreObj == b.pcreObj and
393 |            a.pcreExtra == b.pcreExtra
394 |   else:
395 |     return system.`==`(a, b)
396 | 
397 | proc `==`*(a, b: RegexMatch): bool =
398 |   return a.pattern == b.pattern and
399 |          a.str == b.str
400 | # }}}
401 | 
402 | # Creation & Destruction {{{
403 | # PCRE Options {{{
404 | const PcreOptions = {
405 |   "NEVER_UTF": pcre.NEVER_UTF,
406 |   "ANCHORED": pcre.ANCHORED,
407 |   "DOLLAR_ENDONLY": pcre.DOLLAR_ENDONLY,
408 |   "FIRSTLINE": pcre.FIRSTLINE,
409 |   "NO_AUTO_CAPTURE": pcre.NO_AUTO_CAPTURE,
410 |   "JAVASCRIPT_COMPAT": pcre.JAVASCRIPT_COMPAT,
411 |   "U": pcre.UTF8 or pcre.UCP
412 | }.toTable
413 | 
414 | # Options that are supported inside regular expressions themselves
415 | const SkipOptions = [
416 |   "LIMIT_MATCH=", "LIMIT_RECURSION=", "NO_AUTO_POSSESS", "NO_START_OPT",
417 |   "UTF8", "UTF16", "UTF32", "UTF", "UCP",
418 |   "CR", "LF", "CRLF", "ANYCRLF", "ANY", "BSR_ANYCRLF", "BSR_UNICODE"
419 | ]
420 | 
421 | proc extractOptions(pattern: string): tuple[pattern: string, flags: int, study: bool] =
422 |   result = ("", 0, true)
423 | 
424 |   var optionStart = 0
425 |   var equals = false
426 |   for i, c in pattern:
427 |     if optionStart == i:
428 |       if c != '(':
429 |         break
430 |       optionStart = i
431 | 
432 |     elif optionStart == i-1:
433 |       if c != '*':
434 |         break
435 | 
436 |     elif c == ')':
437 |       let name = pattern[optionStart+2 .. i-1]
438 |       if equals or name in SkipOptions:
439 |         result.pattern.add pattern[optionStart .. i]
440 |       elif PcreOptions.hasKey name:
441 |         result.flags = result.flags or PcreOptions[name]
442 |       elif name == "NO_STUDY":
443 |         result.study = false
444 |       else:
445 |         break
446 |       optionStart = i+1
447 |       equals = false
448 | 
449 |     elif not equals:
450 |       if c == '=':
451 |         equals = true
452 |         if pattern[optionStart+2 .. i] notin SkipOptions:
453 |           break
454 |       elif c notin {'A'..'Z', '0'..'9', '_'}:
455 |         break
456 | 
457 |   result.pattern.add pattern[optionStart .. pattern.high]
458 | 
459 | # }}}
460 | 
461 | proc destroyRegex(pattern: Regex) =
462 |   pcre.free_substring(cast[cstring](pattern.pcreObj))
463 |   pattern.pcreObj = nil
464 |   if pattern.pcreExtra != nil:
465 |     pcre.free_study(pattern.pcreExtra)
466 | 
467 | proc getNameToNumberTable(pattern: Regex): Table[string, int] =
468 |   let entryCount = getinfo[cint](pattern, pcre.INFO_NAMECOUNT)
469 |   let entrySize = getinfo[cint](pattern, pcre.INFO_NAMEENTRYSIZE)
470 |   let table = cast[ptr UncheckedArray[uint8]](
471 |                 getinfo[int](pattern, pcre.INFO_NAMETABLE))
472 | 
473 |   result = initTable[string, int]()
474 | 
475 |   for i in 0 ..< entryCount:
476 |     let pos = i * entrySize
477 |     let num = (int(table[pos]) shl 8) or int(table[pos + 1]) - 1
478 |     var name = ""
479 | 
480 |     var idx = 2
481 |     while table[pos + idx] != 0:
482 |       name.add(char(table[pos + idx]))
483 |       idx += 1
484 | 
485 |     result[name] = num
486 | 
487 | proc initRegex(pattern: string, flags: int, study = true): Regex =
488 |   new(result, destroyRegex)
489 |   result.pattern = pattern
490 | 
491 |   var errorMsg: cstring
492 |   var errOffset: cint
493 | 
494 |   result.pcreObj = pcre.compile(cstring(pattern),
495 |                                 # better hope int is at least 4 bytes..
496 |                                 cint(flags), addr errorMsg,
497 |                                 addr errOffset, nil)
498 |   if result.pcreObj == nil:
499 |     # failed to compile
500 |     raise SyntaxError(msg: $errorMsg, pos: errOffset, pattern: pattern)
501 | 
502 |   if study:
503 |     var options: cint = 0
504 |     var hasJit: cint
505 |     if pcre.config(pcre.CONFIG_JIT, addr hasJit) == 0:
506 |       if hasJit == 1'i32:
507 |         options = pcre.STUDY_JIT_COMPILE
508 |     result.pcreExtra = pcre.study(result.pcreObj, options, addr errorMsg)
509 |     if errorMsg != nil:
510 |       raise StudyError(msg: $errorMsg)
511 | 
512 |   result.captureNameToId = result.getNameToNumberTable()
513 | 
514 | proc re*(pattern: string): Regex =
515 |   let (pattern, flags, study) = extractOptions(pattern)
516 |   initRegex(pattern, flags, study)
517 | # }}}
518 | 
519 | # Operations {{{
520 | proc matchImpl(str: string, pattern: Regex, start, endpos: int, flags: int): Option[RegexMatch] =
521 |   var myResult = RegexMatch(pattern : pattern, str : str)
522 |   # See PCRE man pages.
523 |   # 2x capture count to make room for start-end pairs
524 |   # 1x capture count as slack space for PCRE
525 |   let vecsize = (pattern.captureCount() + 1) * 3
526 |   # div 2 because each element is 2 cints long
527 |   # plus 1 because we need the ceiling, not the floor
528 |   myResult.pcreMatchBounds = newSeq[HSlice[cint, cint]]((vecsize + 1) div 2)
529 |   myResult.pcreMatchBounds.setLen(vecsize div 3)
530 | 
531 |   let strlen = if endpos == int.high: str.len else: endpos+1
532 |   doAssert(strlen <= str.len)  # don't want buffer overflows
533 | 
534 |   let execRet = pcre.exec(pattern.pcreObj,
535 |                           pattern.pcreExtra,
536 |                           cstring(str),
537 |                           cint(strlen),
538 |                           cint(start),
539 |                           cint(flags),
540 |                           cast[ptr cint](addr myResult.pcreMatchBounds[0]),
541 |                           cint(vecsize))
542 |   if execRet >= 0:
543 |     return some(myResult)
544 | 
545 |   case execRet:
546 |     of pcre.ERROR_NOMATCH:
547 |       return none(RegexMatch)
548 |     of pcre.ERROR_NULL:
549 |       raise newException(AccessViolationError, "Expected non-null parameters")
550 |     of pcre.ERROR_BADOPTION:
551 |       raise RegexInternalError(msg : "Unknown pattern flag. Either a bug or " &
552 |         "outdated PCRE.")
553 |     of pcre.ERROR_BADUTF8, pcre.ERROR_SHORTUTF8, pcre.ERROR_BADUTF8_OFFSET:
554 |       raise InvalidUnicodeError(msg : "Invalid unicode byte sequence",
555 |         pos : myResult.pcreMatchBounds[0].a)
556 |     else:
557 |       raise RegexInternalError(msg : "Unknown internal error: " & $execRet)
558 | 
559 | proc match*(str: string, pattern: Regex, start = 0, endpos = int.high): Option[RegexMatch] =
560 |   ## Like ` ``find(...)`` <#proc-find>`_, but anchored to the start of the
561 |   ## string. This means that ``"foo".match(re"f").isSome == true``, but
562 |   ## ``"foo".match(re"o").isSome == false``.
563 |   runnableExamples:
564 |     doAssert "foo".match(re"f").isSome
565 |     doAssert "foo".match(re"o").isNone
566 | 
567 |   return str.matchImpl(pattern, start, endpos, pcre.ANCHORED)
568 | 
569 | iterator findIter*(str: string, pattern: Regex, start = 0, endpos = int.high): RegexMatch =
570 |   ## Works the same as ` ``find(...)`` <#proc-find>`_, but finds every
571 |   ## non-overlapping match. ``"2222".find(re"22")`` is ``"22", "22"``, not
572 |   ## ``"22", "22", "22"``.
573 |   ##
574 |   ## Arguments are the same as ` ``find(...)`` <#proc-find>`_
575 |   ##
576 |   ## Variants:
577 |   ##
578 |   ## -  ``proc findAll(...)`` returns a ``seq[string]``
579 |   # see pcredemo for explanation
580 |   let matchesCrLf = pattern.matchesCrLf()
581 |   let unicode = uint32(getinfo[culong](pattern, pcre.INFO_OPTIONS) and
582 |     pcre.UTF8) > 0u32
583 |   let strlen = if endpos == int.high: str.len else: endpos+1
584 |   var offset = start
585 |   var match: Option[RegexMatch]
586 |   var neverMatched = true
587 | 
588 |   while true:
589 |     var flags = 0
590 |     if match.isSome and
591 |        match.get.matchBounds.a > match.get.matchBounds.b:
592 |       # 0-len match
593 |       flags = pcre.NOTEMPTY_ATSTART
594 |     match = str.matchImpl(pattern, offset, endpos, flags)
595 | 
596 |     if match.isNone:
597 |       # either the end of the input or the string
598 |       # cannot be split here - we also need to bail
599 |       # if we've never matched and we've already tried to...
600 |       if offset >= strlen or neverMatched:
601 |         break
602 | 
603 |       if matchesCrLf and offset < (str.len - 1) and
604 |          str[offset] == '\r' and str[offset + 1] == '\L':
605 |         # if PCRE treats CrLf as newline, skip both at the same time
606 |         offset += 2
607 |       elif unicode:
608 |         # XXX what about invalid unicode?
609 |         offset += str.runeLenAt(offset)
610 |         assert(offset <= strlen)
611 |       else:
612 |         offset += 1
613 |     else:
614 |       neverMatched = false
615 |       offset = match.get.matchBounds.b + 1
616 | 
617 |       yield match.get
618 | 
619 | proc find*(str: string, pattern: Regex, start = 0, endpos = int.high): Option[RegexMatch] =
620 |   ## Finds the given pattern in the string between the end and start
621 |   ## positions.
622 |   ##
623 |   ## ``start``
624 |   ##     The start point at which to start matching. ``|abc`` is ``0``;
625 |   ##     ``a|bc`` is ``1``
626 |   ##
627 |   ## ``endpos``
628 |   ##     The maximum index for a match; ``int.high`` means the end of the
629 |   ##     string, otherwise it’s an inclusive upper bound.
630 |   return str.matchImpl(pattern, start, endpos, 0)
631 | 
632 | proc findAll*(str: string, pattern: Regex, start = 0, endpos = int.high): seq[string] =
633 |   result = @[]
634 |   for match in str.findIter(pattern, start, endpos):
635 |     result.add(match.match)
636 | 
637 | proc contains*(str: string, pattern: Regex, start = 0, endpos = int.high): bool =
638 |   ## Determine if the string contains the given pattern between the end and
639 |   ## start positions:
640 |   ## This function is equivalent to ``isSome(str.find(pattern, start, endpos))``.
641 |   ##
642 |   runnableExamples:
643 |     doAssert "abc".contains(re"bc") == true
644 |     doAssert "abc".contains(re"cd") == false
645 |     doAssert "abc".contains(re"a", start = 1) == false
646 | 
647 |   return isSome(str.find(pattern, start, endpos))
648 | 
649 | proc split*(str: string, pattern: Regex, maxSplit = -1, start = 0): seq[string] =
650 |   ## Splits the string with the given regex. This works according to the
651 |   ## rules that Perl and Javascript use.
652 |   ##
653 |   ## ``start`` behaves the same as in ` ``find(...)`` <#proc-find>`_.
654 |   ##
655 |   runnableExamples:
656 |     # -  If the match is zero-width, then the string is still split:
657 |     doAssert "123".split(re"") == @["1", "2", "3"]
658 | 
659 |     # -  If the pattern has a capture in it, it is added after the string
660 |     #    split:
661 |     doAssert "12".split(re"(\d)") == @["", "1", "", "2", ""]
662 | 
663 |     # -  If ``maxsplit != -1``, then the string will only be split
664 |     #    ``maxsplit - 1`` times. This means that there will be ``maxsplit``
665 |     #    strings in the output seq.
666 |     doAssert "1.2.3".split(re"\.", maxsplit = 2) == @["1", "2.3"]
667 | 
668 |   result = @[]
669 |   var lastIdx = start
670 |   var splits = 0
671 |   var bounds = 0 .. -1
672 |   var never_ran = true
673 | 
674 |   for match in str.findIter(pattern, start = start):
675 |     never_ran = false
676 | 
677 |     # bounds are inclusive:
678 |     #
679 |     # 0123456
680 |     #  ^^^
681 |     # (1, 3)
682 |     bounds = match.matchBounds
683 | 
684 |     # "12".split("") would be @["", "1", "2"], but
685 |     # if we skip an empty first match, it's the correct
686 |     # @["1", "2"]
687 |     if bounds.a <= bounds.b or bounds.a > start:
688 |       result.add(str.substr(lastIdx, bounds.a - 1))
689 |       splits += 1
690 | 
691 |     lastIdx = bounds.b + 1
692 | 
693 |     for cap in match.captures:
694 |       # if there are captures, include them in the result
695 |       if cap.isSome:
696 |         result.add(cap.get)
697 | 
698 |     if splits == maxSplit - 1:
699 |       break
700 | 
701 |   # "12".split("\b") would be @["1", "2", ""], but
702 |   # if we skip an empty last match, it's the correct
703 |   # @["1", "2"]
704 |   # If matches were never found, then the input string is the result
705 |   if bounds.a <= bounds.b or bounds.b < str.high or never_ran:
706 |     # last match: Each match takes the previous substring,
707 |     # but "1 2".split(/ /) needs to return @["1", "2"].
708 |     # This handles "2"
709 |     result.add(str.substr(bounds.b + 1, str.high))
710 | 
711 | template replaceImpl(str: string, pattern: Regex,
712 |                      replacement: untyped) {.dirty.} =
713 |   # XXX seems very similar to split, maybe I can reduce code duplication
714 |   # somehow?
715 |   result = ""
716 |   var lastIdx = 0
717 |   for match {.inject.} in str.findIter(pattern):
718 |     let bounds = match.matchBounds
719 |     result.add(str.substr(lastIdx, bounds.a - 1))
720 |     let nextVal = replacement
721 |     result.add(nextVal)
722 | 
723 |     lastIdx = bounds.b + 1
724 | 
725 |   result.add(str.substr(lastIdx, str.len - 1))
726 |   return result
727 | 
728 | proc replace*(str: string, pattern: Regex,
729 |               subproc: proc (match: RegexMatch): string): string =
730 |   ## Replaces each match of Regex in the string with ``subproc``, which should
731 |   ## never be or return ``nil``.
732 |   ##
733 |   ## If ``subproc`` is a ``proc (RegexMatch): string``, then it is executed with
734 |   ## each match and the return value is the replacement value.
735 |   ##
736 |   ## If ``subproc`` is a ``proc (string): string``, then it is executed with the
737 |   ## full text of the match and and the return value is the replacement
738 |   ## value.
739 |   ##
740 |   ## If ``subproc`` is a string, the syntax is as follows:
741 |   ##
742 |   ## -  ``$$`` - literal ``$``
743 |   ## -  ``$123`` - capture number ``123``
744 |   ## -  ``$foo`` - named capture ``foo``
745 |   ## -  ``${foo}`` - same as above
746 |   ## -  ``$1$#`` - first and second captures
747 |   ## -  ``$#`` - first capture
748 |   ## -  ``$0`` - full match
749 |   ##
750 |   ## If a given capture is missing, ``IndexError`` thrown for un-named captures
751 |   ## and ``KeyError`` for named captures.
752 |   replaceImpl(str, pattern, subproc(match))
753 | 
754 | proc replace*(str: string, pattern: Regex,
755 |               subproc: proc (match: string): string): string =
756 |   replaceImpl(str, pattern, subproc(match.match))
757 | 
758 | proc replace*(str: string, pattern: Regex, sub: string): string =
759 |   # - 1 because the string numbers are 0-indexed
760 |   replaceImpl(str, pattern,
761 |     formatStr(sub, match.captures[name], match.captures[id - 1]))
762 | 
763 | # }}}
764 | 
765 | let SpecialCharMatcher = re"([\\+*?[^\]$(){}=!<>|:-])"
766 | proc escapeRe*(str: string): string =
767 |   ## Escapes the string so it doesn’t match any special characters.
768 |   ## Incompatible with the Extra flag (``X``).
769 |   str.replace(SpecialCharMatcher, "\\$1")
770 | 


--------------------------------------------------------------------------------
/src/nre/private/util.nim:
--------------------------------------------------------------------------------
 1 | ## INTERNAL FILE FOR USE ONLY BY nre.nim.
 2 | import tables
 3 | 
 4 | const Ident = {'a'..'z', 'A'..'Z', '0'..'9', '_', '\128'..'\255'}
 5 | const StartIdent = Ident - {'0'..'9'}
 6 | 
 7 | template formatStr*(howExpr, namegetter, idgetter): untyped =
 8 |   let how = howExpr
 9 |   var val = newStringOfCap(how.len)
10 |   var i = 0
11 |   var lastNum = 1
12 | 
13 |   while i < how.len:
14 |     if how[i] != '$':
15 |       val.add(how[i])
16 |       i += 1
17 |     else:
18 |       if how[i + 1] == '$':
19 |         val.add('$')
20 |         i += 2
21 |       elif how[i + 1] == '#':
22 |         var id {.inject.} = lastNum
23 |         val.add(idgetter)
24 |         lastNum += 1
25 |         i += 2
26 |       elif how[i + 1] in {'0'..'9'}:
27 |         i += 1
28 |         var id {.inject.} = 0
29 |         while i < how.len and how[i] in {'0'..'9'}:
30 |           id += (id * 10) + (ord(how[i]) - ord('0'))
31 |           i += 1
32 |         val.add(idgetter)
33 |         lastNum = id + 1
34 |       elif how[i + 1] in StartIdent:
35 |         i += 1
36 |         var name {.inject.} = ""
37 |         while i < how.len and how[i] in Ident:
38 |           name.add(how[i])
39 |           i += 1
40 |         val.add(namegetter)
41 |       elif how[i + 1] == '{':
42 |         i += 2
43 |         var name {.inject.} = ""
44 |         while i < how.len and how[i] != '}':
45 |           name.add(how[i])
46 |           i += 1
47 |         i += 1
48 |         val.add(namegetter)
49 |       else:
50 |         raise newException(Exception, "Syntax error in format string at " & $i)
51 |   val
52 | 


--------------------------------------------------------------------------------
/test/captures.nim:
--------------------------------------------------------------------------------
 1 | import unittest, optional_nonstrict
 2 | include ../src/nre
 3 | 
 4 | suite "captures":
 5 |   test "map capture names to numbers":
 6 |     check(getNameToNumberTable(re("(?<v1>1(?<v2>2(?<v3>3))(?'v4'4))()")) ==
 7 |       { "v1" : 0, "v2" : 1, "v3" : 2, "v4" : 3 }.toTable())
 8 | 
 9 |   test "capture bounds are correct":
10 |     let ex1 = re("([0-9])")
11 |     check("1 23".find(ex1).matchBounds == 0 .. 0)
12 |     check("1 23".find(ex1).captureBounds[0] == 0 .. 0)
13 |     check("1 23".find(ex1, 1).matchBounds == 2 .. 2)
14 |     check("1 23".find(ex1, 3).matchBounds == 3 .. 3)
15 | 
16 |     let ex2 = re("()()()()()()()()()()([0-9])")
17 |     check("824".find(ex2).captureBounds[0] == 0 .. -1)
18 |     check("824".find(ex2).captureBounds[10] == 0 .. 0)
19 | 
20 |     let ex3 = re("([0-9]+)")
21 |     check("824".find(ex3).captureBounds[0] == 0 .. 2)
22 | 
23 |   test "named captures":
24 |     let ex1 = "foobar".find(re("(?<foo>foo)(?<bar>bar)"))
25 |     check(ex1.captures["foo"] == "foo")
26 |     check(ex1.captures["bar"] == "bar")
27 | 
28 |     let ex2 = "foo".find(re("(?<foo>foo)(?<bar>bar)?"))
29 |     check("foo" in ex2.captureBounds)
30 |     check(ex2.captures["foo"] == "foo")
31 |     check(not ("bar" in ex2.captures))
32 |     expect KeyError:
33 |         discard ex2.captures["bar"]
34 | 
35 |   test "named capture bounds":
36 |     let ex1 = "foo".find(re("(?<foo>foo)(?<bar>bar)?"))
37 |     check("foo" in ex1.captureBounds)
38 |     check(ex1.captureBounds["foo"] == 0..2)
39 |     check(not ("bar" in ex1.captures))
40 |     expect KeyError:
41 |         discard ex1.captures["bar"]
42 | 
43 |   test "capture count":
44 |     let ex1 = re("(?<foo>foo)(?<bar>bar)?")
45 |     check(ex1.captureCount == 2)
46 |     check(ex1.captureNameId == {"foo" : 0, "bar" : 1}.toTable())
47 | 
48 |   test "named capture table":
49 |     let ex1 = "foo".find(re("(?<foo>foo)(?<bar>bar)?"))
50 |     check(ex1.captures.toTable == {"foo" : "foo"}.toTable())
51 |     check(ex1.captureBounds.toTable == {"foo" : 0..2}.toTable())
52 | 
53 |     let ex2 = "foobar".find(re("(?<foo>foo)(?<bar>bar)?"))
54 |     check(ex2.captures.toTable == {"foo" : "foo", "bar" : "bar"}.toTable())
55 | 
56 |   test "capture sequence":
57 |     let ex1 = "foo".find(re("(?<foo>foo)(?<bar>bar)?"))
58 |     check(ex1.captures.toSeq == @[some("foo"), none(string)])
59 |     check(ex1.captureBounds.toSeq == @[some(0..2), none(Slice[int])])
60 |     check(ex1.captures.toSeq(some("")) == @[some("foo"), some("")])
61 | 
62 |     let ex2 = "foobar".find(re("(?<foo>foo)(?<bar>bar)?"))
63 |     check(ex2.captures.toSeq == @[some("foo"), some("bar")])
64 | 
65 | 


--------------------------------------------------------------------------------
/test/escape.nim:
--------------------------------------------------------------------------------
1 | import ../src/nre, unittest
2 | 
3 | suite "escape strings":
4 |   test "escape strings":
5 |     check("123".escapeRe() == "123")
6 |     check("[]".escapeRe() == r"\[\]")
7 |     check("()".escapeRe() == r"\(\)")
8 | 


--------------------------------------------------------------------------------
/test/find.nim:
--------------------------------------------------------------------------------
 1 | import unittest, sequtils
 2 | import ../src/nre except toSeq
 3 | import optional_nonstrict
 4 | import times, strutils
 5 | 
 6 | suite "find":
 7 |   test "find text":
 8 |     check("3213a".find(re"[a-z]").match == "a")
 9 |     check(toSeq(findIter("1 2 3 4 5 6 7 8 ", re" ")).map(
10 |       proc (a: RegexMatch): string = a.match
11 |     ) == @[" ", " ", " ", " ", " ", " ", " ", " "])
12 | 
13 |   test "find bounds":
14 |     check(toSeq(findIter("1 2 3 4 5 ", re" ")).map(
15 |       proc (a: RegexMatch): Slice[int] = a.matchBounds
16 |     ) == @[1..1, 3..3, 5..5, 7..7, 9..9])
17 | 
18 |   test "overlapping find":
19 |     check("222".findAll(re"22") == @["22"])
20 |     check("2222".findAll(re"22") == @["22", "22"])
21 | 
22 |   test "len 0 find":
23 |     check("".findAll(re"\ ") == newSeq[string]())
24 |     check("".findAll(re"") == @[""])
25 |     check("abc".findAll(re"") == @["", "", "", ""])
26 |     check("word word".findAll(re"\b") == @["", "", "", ""])
27 |     check("word\r\lword".findAll(re"(*ANYCRLF)(?m)$") == @["", ""])
28 |     check("слово слово".findAll(re"(*U)\b") == @["", "", "", ""])
29 | 
30 |   test "bail early":
31 |     ## we expect nothing to be found and we should be bailing out early which means that
32 |     ## the timing difference between searching in small and large data should be well
33 |     ## within a tolerance margin
34 |     const small = 10
35 |     const large = 1000
36 |     var smallData = repeat("url.sequence = \"http://whatever.com/jwhrejrhrjrhrjhrrjhrjrhrjrh\" ", small)
37 |     var largeData = repeat("url.sequence = \"http://whatever.com/jwhrejrhrjrhrjhrrjhrjrhrjrh\" ", large)
38 |     var expression = re"^url.* = &#34;(.*?)&#34;"
39 | 
40 |     check(smallData.findAll(expression) == newSeq[string]())
41 |     check(largeData.findAll(expression) == newSeq[string]())
42 | 


--------------------------------------------------------------------------------
/test/init.nim:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | include ../src/nre
 3 | 
 4 | suite "Test NRE initialization":
 5 |   test "correct initialization":
 6 |     check(re("[0-9]+") != nil)
 7 |     check(re("(?i)[0-9]+") != nil)
 8 | 
 9 |   test "options":
10 |     check(extractOptions("(*NEVER_UTF)") ==
11 |           ("", pcre.NEVER_UTF, true))
12 |     check(extractOptions("(*UTF8)(*ANCHORED)(*UCP)z") ==
13 |           ("(*UTF8)(*UCP)z", pcre.ANCHORED, true))
14 |     check(extractOptions("(*ANCHORED)(*UTF8)(*JAVASCRIPT_COMPAT)z") ==
15 |           ("(*UTF8)z", pcre.ANCHORED or pcre.JAVASCRIPT_COMPAT, true))
16 | 
17 |     check(extractOptions("(*NO_STUDY)(") == ("(", 0, false))
18 | 
19 |     check(extractOptions("(*LIMIT_MATCH=6)(*ANCHORED)z") ==
20 |           ("(*LIMIT_MATCH=6)z", pcre.ANCHORED, true))
21 | 
22 |   test "incorrect options":
23 |     for s in ["CR", "(CR", "(*CR", "(*abc)", "(*abc)CR",
24 |               "(?i)",
25 |               "(*LIMIT_MATCH=5", "(*NO_AUTO_POSSESS=5)"]:
26 |       let ss = s & "(*NEVER_UTF)"
27 |       check(extractOptions(ss) == (ss, 0, true))
28 | 
29 |   test "invalid regex":
30 |     expect(SyntaxError): discard re("[0-9")
31 |     try:
32 |       discard re("[0-9")
33 |     except SyntaxError:
34 |       let ex = SyntaxError(getCurrentException())
35 |       check(ex.pos == 4)
36 |       check(ex.pattern == "[0-9")
37 | 


--------------------------------------------------------------------------------
/test/match.nim:
--------------------------------------------------------------------------------
 1 | import unittest, optional_nonstrict
 2 | include ../src/nre
 3 | 
 4 | suite "match":
 5 |   test "upper bound must be inclusive":
 6 |     check("abc".match(re"abc", endpos = -1) == none(RegexMatch))
 7 |     check("abc".match(re"abc", endpos = 1) == none(RegexMatch))
 8 |     check("abc".match(re"abc", endpos = 2) != none(RegexMatch))
 9 | 
10 |   test "match examples":
11 |     check("abc".match(re"(\w)").captures[0] == "a")
12 |     check("abc".match(re"(?<letter>\w)").captures["letter"] == "a")
13 |     check("abc".match(re"(\w)\w").captures[-1] == "ab")
14 |     check("abc".match(re"(\w)").captureBounds[0] == 0 .. 0)
15 |     check("abc".match(re"").captureBounds[-1] == 0 .. -1)
16 |     check("abc".match(re"abc").captureBounds[-1] == 0 .. 2)
17 | 
18 |   test "match test cases":
19 |     check("123".match(re"").matchBounds == 0 .. -1)
20 | 


--------------------------------------------------------------------------------
/test/misc.nim:
--------------------------------------------------------------------------------
 1 | import unittest, ../src/nre, strutils, optional_nonstrict
 2 | 
 3 | suite "Misc tests":
 4 |   test "unicode":
 5 |     check("".find(re"(*UTF8)").match == "")
 6 |     check("перевірка".replace(re"(*U)\w", "") == "")
 7 | 
 8 |   test "empty or non-empty match":
 9 |     check("abc".findall(re"|.").join(":") == ":a::b::c:")
10 |     check("abc".findall(re".|").join(":") == "a:b:c:")
11 | 
12 |     check("abc".replace(re"|.", "x") == "xxxxxxx")
13 |     check("abc".replace(re".|", "x") == "xxxx")
14 | 
15 |     check("abc".split(re"|.").join(":") == ":::::")
16 |     check("abc".split(re".|").join(":") == ":::")
17 | 


--------------------------------------------------------------------------------
/test/optional_nonstrict.nim:
--------------------------------------------------------------------------------
1 | import options
2 | converter option2val*[T](val: Option[T]): T =
3 |   return val.get()
4 | 


--------------------------------------------------------------------------------
/test/replace.nim:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | include ../src/nre
 3 | 
 4 | suite "replace":
 5 |   test "replace with 0-length strings":
 6 |     check("".replace(re"1", proc (v: RegexMatch): string = "1") == "")
 7 |     check(" ".replace(re"", proc (v: RegexMatch): string = "1") == "1 1")
 8 |     check("".replace(re"", proc (v: RegexMatch): string = "1") == "1")
 9 | 
10 |   test "regular replace":
11 |     check("123".replace(re"\d", "foo") == "foofoofoo")
12 |     check("123".replace(re"(\d)", "$1$1") == "112233")
13 |     check("123".replace(re"(\d)(\d)", "$1$2") == "123")
14 |     check("123".replace(re"(\d)(\d)", "$#$#") == "123")
15 |     check("123".replace(re"(?<foo>\d)(\d)", "$foo$#$#") == "1123")
16 |     check("123".replace(re"(?<foo>\d)(\d)", "${foo}$#$#") == "1123")
17 | 
18 |   test "replacing missing captures should throw instead of segfaulting":
19 |     expect IndexError: discard "ab".replace(re"(a)|(b)", "$1$2")
20 |     expect IndexError: discard "b".replace(re"(a)?(b)", "$1$2")
21 |     expect KeyError: discard "b".replace(re"(a)?", "${foo}")
22 |     expect KeyError: discard "b".replace(re"(?<foo>a)?", "${foo}")
23 | 


--------------------------------------------------------------------------------
/test/split.nim:
--------------------------------------------------------------------------------
 1 | import unittest, strutils
 2 | include ../src/nre
 3 | 
 4 | suite "string splitting":
 5 |   test "splitting strings":
 6 |     check("1 2 3 4 5 6 ".split(re" ") == @["1", "2", "3", "4", "5", "6", ""])
 7 |     check("1  2  ".split(re(" ")) == @["1", "", "2", "", ""])
 8 |     check("1 2".split(re(" ")) == @["1", "2"])
 9 |     check("foo".split(re("foo")) == @["", ""])
10 |     check("".split(re"foo") == @[""])
11 |     check("9".split(re"\son\s") == @["9"])
12 | 
13 |   test "captured patterns":
14 |     check("12".split(re"(\d)") == @["", "1", "", "2", ""])
15 | 
16 |   test "maxsplit":
17 |     check("123".split(re"", maxsplit = 2) == @["1", "23"])
18 |     check("123".split(re"", maxsplit = 1) == @["123"])
19 |     check("123".split(re"", maxsplit = -1) == @["1", "2", "3"])
20 | 
21 |   test "split with 0-length match":
22 |     check("12345".split(re("")) == @["1", "2", "3", "4", "5"])
23 |     check("".split(re"") == newSeq[string]())
24 |     check("word word".split(re"\b") == @["word", " ", "word"])
25 |     check("word\r\lword".split(re"(*ANYCRLF)(?m)$") == @["word", "\r\lword"])
26 |     check("слово слово".split(re"(*U)(\b)") == @["", "слово", "", " ", "", "слово", ""])
27 | 
28 |   test "perl split tests":
29 |     check("forty-two"                    .split(re"")      .join(",") == "f,o,r,t,y,-,t,w,o")
30 |     check("forty-two"                    .split(re"", 3)   .join(",") == "f,o,rty-two")
31 |     check("split this string"            .split(re" ")     .join(",") == "split,this,string")
32 |     check("split this string"            .split(re" ", 2)  .join(",") == "split,this string")
33 |     check("try$this$string"              .split(re"\$")    .join(",") == "try,this,string")
34 |     check("try$this$string"              .split(re"\$", 2) .join(",") == "try,this$string")
35 |     check("comma, separated, values"     .split(re", ")    .join("|") == "comma|separated|values")
36 |     check("comma, separated, values"     .split(re", ", 2) .join("|") == "comma|separated, values")
37 |     check("Perl6::Camelia::Test"         .split(re"::")    .join(",") == "Perl6,Camelia,Test")
38 |     check("Perl6::Camelia::Test"         .split(re"::", 2) .join(",") == "Perl6,Camelia::Test")
39 |     check("split,me,please"              .split(re",")     .join("|") == "split|me|please")
40 |     check("split,me,please"              .split(re",", 2)  .join("|") == "split|me,please")
41 |     check("Hello World    Goodbye   Mars".split(re"\s+")   .join(",") == "Hello,World,Goodbye,Mars")
42 |     check("Hello World    Goodbye   Mars".split(re"\s+", 3).join(",") == "Hello,World,Goodbye   Mars")
43 |     check("Hello test"                   .split(re"(\s+)") .join(",") == "Hello, ,test")
44 |     check("this will be split"           .split(re" ")     .join(",") == "this,will,be,split")
45 |     check("this will be split"           .split(re" ", 3)  .join(",") == "this,will,be split")
46 |     check("a.b"                          .split(re"\.")    .join(",") == "a,b")
47 | 
48 |     check(""                             .split(re"")      .len == 0)
49 |     check(":"                            .split(re"")      .len == 1)
50 | 
51 |   test "start position":
52 |     check("abc".split(re"", start = 1) == @["b", "c"])
53 |     check("abc".split(re"", start = 2) == @["c"])
54 |     check("abc".split(re"", start = 3) == newSeq[string]())
55 | 


--------------------------------------------------------------------------------
/test/testall.nim:
--------------------------------------------------------------------------------
 1 | import ../src/nre
 2 | import escape
 3 | import find
 4 | import init
 5 | import match
 6 | import misc
 7 | import replace
 8 | import split
 9 | import captures
10 | 


--------------------------------------------------------------------------------
/web/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flaviut/nre/52f8117d3b8f2bfc6f94616b2b676c434b59c4f7/web/logo.png


--------------------------------------------------------------------------------
/web/logo.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns:svg="http://www.w3.org/2000/svg" xmlns="http://www.w3.org/2000/svg" width="627" height="140" version="1.1"><style>.s0{-inkscape-font-specification:DejaVu Sans Mono;fill:#c17d11;font-family:DejaVu Sans Mono;font-size:35.7;}.s1{-inkscape-font-specification:DejaVu Sans Mono;fill:#c00;font-family:DejaVu Sans Mono;font-size:35.7;}.s2{-inkscape-font-specification:DejaVu Sans Mono;fill:#4e9a06;font-family:DejaVu Sans Mono;font-size:35.7;}</style><g transform="translate(0,-912.36217)"><g transform="matrix(3.3601391,0,0,3.3601391,-52.384026,-75.180678)" style="fill:#000;font-family:Sans;font-size:40;letter-spacing:0;line-height:125;word-spacing:0"><path d="m77.2 293.9 125 0 0 41.7-125 0z" style="fill:#e9b96e;opacity:0.5"/><path d="m91.3 296 0 9.7-3 0 0-9.7 3 0 -6.7 0 0 9.7-3 0 0-9.7 3 0" class="s0"/><path d="m93.5 298.6 7.4 0 0 2.5-4.2 0 0 26.9 4.2 0 0 2.5-7.4 0 0-31.8" class="s1"/><path d="m109.4 314.4c0-0.6 0.2-1.2 0.7-1.7 0.5-0.5 1-0.7 1.6-0.7 0.7 0 1.2 0.2 1.7 0.7 0.5 0.5 0.7 1 0.7 1.7 0 0.7-0.2 1.2-0.7 1.7-0.5 0.5-1 0.7-1.7 0.7-0.7 0-1.2-0.2-1.6-0.7-0.4-0.4-0.6-1-0.6-1.7m2.3-10.6c-1.6 0-2.9 0.9-3.7 2.7-0.8 1.8-1.2 4.5-1.2 8.1 0 3.6 0.4 6.3 1.2 8.1 0.8 1.8 2 2.7 3.7 2.7 1.7 0 2.9-0.9 3.7-2.7 0.8-1.8 1.2-4.5 1.2-8.1 0-3.6-0.4-6.3-1.2-8.1-0.8-1.8-2-2.7-3.7-2.7m0-2.8c2.8 0 4.9 1.1 6.3 3.4 1.4 2.3 2.1 5.6 2.1 10.1 0 4.4-0.7 7.8-2.1 10.1-1.4 2.3-3.5 3.4-6.3 3.4-2.8 0-4.9-1.1-6.3-3.4-1.4-2.3-2.1-5.6-2.1-10.1 0-4.5 0.7-7.8 2.1-10.1 1.4-2.3 3.5-3.4 6.3-3.4" class="s2"/><path d="m124.6 313.1 9.1 0 0 2.9-9.1 0 0-2.9" class="s1"/><path d="m146.4 316.1c1.5 0 2.7-0.5 3.5-1.6 0.9-1.1 1.3-2.6 1.3-4.5 0-1.9-0.4-3.4-1.3-4.5-0.8-1.1-2-1.6-3.5-1.6-1.6 0-2.7 0.5-3.5 1.6-0.8 1-1.2 2.5-1.2 4.6 0 2 0.4 3.5 1.2 4.6 0.8 1 2 1.5 3.5 1.5m-6.3 11 0-3.2c0.7 0.4 1.5 0.8 2.3 1 0.8 0.2 1.7 0.3 2.6 0.3 2.2 0 3.9-0.8 5.1-2.5 1.2-1.7 1.7-4.2 1.7-7.4-0.5 1.2-1.3 2.1-2.3 2.7-1 0.6-2.1 0.9-3.4 0.9-2.5 0-4.5-0.8-5.8-2.3-1.4-1.5-2.1-3.7-2.1-6.6 0-2.8 0.7-5 2.1-6.5 1.4-1.5 3.4-2.3 6-2.3 3 0 5.2 1.1 6.6 3.3 1.4 2.2 2.1 5.6 2.1 10.3 0 4.4-0.8 7.7-2.5 10-1.7 2.3-4.1 3.5-7.4 3.5-0.8 0-1.7-0.1-2.6-0.3-0.9-0.2-1.7-0.4-2.5-0.8" class="s2"/><path d="m165.2 298.6 0 31.8-7.4 0 0-2.5 4.2 0 0-26.9-4.2 0 0-2.5 7.4 0M179.9 305.3l0 7.7 7.8 0 0 3-7.8 0 0 7.7-2.9 0 0-7.7-7.7 0 0-3 7.7 0 0-7.7 2.9 0" class="s1"/><path d="m197.7 296 0 9.7-3 0 0-9.7 3 0 -6.7 0 0 9.7-3 0 0-9.7 3 0" class="s0"/><g transform="translate(-2.0396636,0.18413477)" style="fill:#000;font-family:Sans;font-size:35.7"><path d="m38.6 313.9 0 11.8-3.2 0 0-11.7c0-1.8-0.4-3.2-1.1-4.2-0.7-0.9-1.8-1.4-3.2-1.4-1.7 0-3.1 0.6-4.1 1.7-1 1.1-1.5 2.6-1.5 4.5l0 11-3.2 0 0-19.5 3.2 0 0 3c0.8-1.2 1.7-2.1 2.7-2.6 1-0.6 2.2-0.9 3.6-0.9 2.2 0 3.9 0.7 5.1 2.1 1.2 1.4 1.7 3.4 1.7 6.1M56.4 309.1c-0.4-0.2-0.8-0.4-1.2-0.5-0.4-0.1-0.9-0.2-1.4-0.2-1.8 0-3.2 0.6-4.2 1.8-1 1.2-1.4 2.9-1.4 5.1l0 10.3-3.2 0 0-19.5 3.2 0 0 3c0.7-1.2 1.6-2.1 2.6-2.6 1.1-0.6 2.4-0.9 3.9-0.9 0.2 0 0.5 0 0.7 0.1 0.3 0 0.6 0.1 0.9 0.1l0 3.3M75.7 315.1l0 1.6-14.8 0c0.1 2.2 0.8 3.9 2 5.1 1.2 1.2 2.9 1.7 5 1.7 1.2 0 2.4-0.2 3.6-0.5 1.2-0.3 2.3-0.8 3.5-1.4l0 3c-1.2 0.5-2.3 0.9-3.5 1.1-1.2 0.3-2.4 0.4-3.7 0.4-3.1 0-5.6-0.9-7.4-2.7-1.8-1.8-2.7-4.3-2.7-7.4 0-3.2 0.9-5.7 2.6-7.6 1.7-1.9 4.1-2.8 7-2.8 2.6 0 4.7 0.8 6.2 2.5 1.5 1.7 2.3 4 2.3 6.9m-3.2-0.9c0-1.8-0.5-3.2-1.5-4.2-1-1-2.2-1.6-3.8-1.6-1.8 0-3.2 0.5-4.3 1.5-1.1 1-1.7 2.4-1.8 4.3l11.4 0"/></g></g></g></svg>
2 | 


--------------------------------------------------------------------------------