├── .gitignore
├── CHANGELOG.md
├── LICENSE
├── README.md
├── Setup.hs
├── cgrep.cabal
├── cgreprc
├── script
    └── profile.sh
├── src
    ├── CGrep
    │   ├── Boundary.hs
    │   ├── Common.hs
    │   ├── ContextFilter.hs
    │   ├── Distance.hs
    │   ├── FileKind.hs
    │   ├── FileType.hs
    │   ├── FileTypeMap.hs
    │   ├── Output.hs
    │   ├── Parser
    │   │   ├── Atom.hs
    │   │   ├── Char.hs
    │   │   ├── Chunk.hs
    │   │   ├── Line.hs
    │   │   └── Token.hs
    │   ├── Search.hs
    │   ├── Strategy
    │   │   ├── BoyerMoore.hs
    │   │   ├── Levenshtein.hs
    │   │   ├── Regex.hs
    │   │   ├── Semantic.hs
    │   │   └── Tokenizer.hs
    │   └── Types.hs
    ├── CmdOptions.hs
    ├── Config.hs
    ├── Main.hs
    ├── Options.hs
    ├── Reader.hs
    ├── Search.hs
    ├── Util.hs
    └── Verbose.hs
├── stack.yaml
└── test
    ├── CMakeLists.txt
    ├── Makefile
    ├── test.c
    ├── test.chpl
    ├── test.coffee
    ├── test.cpp
    ├── test.dhall
    ├── test.erl
    ├── test.fs
    ├── test.go
    ├── test.h
    ├── test.hs
    ├── test.html
    ├── test.ini
    ├── test.js
    ├── test.lua
    ├── test.ml
    ├── test.php3
    ├── test.pl
    ├── test.py
    ├── test.rb
    ├── test.rs
    ├── test.sh
    ├── test.tex
    ├── test.toml
    ├── test.u
    ├── test.utf8
    └── test.zig


/.gitignore:
--------------------------------------------------------------------------------
1 | /.cabal-sandbox/
2 | /cabal.sandbox.config
3 | /dist/
4 | .hie
5 | .stack-work
6 | dist-newstyle
7 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | ## Changelog
 2 | 
 3 | # cgrep v8.0.0
 4 | 
 5 | - **Performance Enhancements:** In this release, significant improvements have been made to enhance the performance of cgrep. The performance is now more on par with ripgrep, with search speeds ranging from 3 times to 13 times faster than the previous release.
 6 | 
 7 | - **Expanded Language and Configuration Support:** We have expanded the language and configuration support in cgrep. Starting from this version, it includes support for searching within Zig, Union, Dhall, Fish shell, Toml, and Ini config files. This means you can now easily search for patterns and text within files of these formats.
 8 | 
 9 | - **New Semantic Token Filter:** A new semantic token filter has been introduced in this release. This filter is designed to identify and filter out specific native types present in different file types. This enhances the search results by providing more precise and relevant matches based on the semantic meaning of the tokens.
10 | 
11 | - **Kind Filter Selector:** We have introduced a new feature called the Kind Filter Selector. With this filter selector, users can now specify the kind of files they want to search within. The available options for the filter include Text, Config, Language, Data, Markup, and Script. This allows for more focused and targeted searches based on the desired file type, especially useful in large codebases.
12 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |              GNU GENERAL PUBLIC LICENSE
  2 |                 Version 2, June 1991
  3 | 
  4 |  Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
  5 |  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  6 |  Everyone is permitted to copy and distribute verbatim copies
  7 |  of this license document, but changing it is not allowed.
  8 | 
  9 |                      Preamble
 10 | 
 11 |   The licenses for most software are designed to take away your
 12 | freedom to share and change it.  By contrast, the GNU General Public
 13 | License is intended to guarantee your freedom to share and change free
 14 | software--to make sure the software is free for all its users.  This
 15 | General Public License applies to most of the Free Software
 16 | Foundation's software and to any other program whose authors commit to
 17 | using it.  (Some other Free Software Foundation software is covered by
 18 | the GNU Lesser General Public License instead.)  You can apply it to
 19 | your programs, too.
 20 | 
 21 |   When we speak of free software, we are referring to freedom, not
 22 | price.  Our General Public Licenses are designed to make sure that you
 23 | have the freedom to distribute copies of free software (and charge for
 24 | this service if you wish), that you receive source code or can get it
 25 | if you want it, that you can change the software or use pieces of it
 26 | in new free programs; and that you know you can do these things.
 27 | 
 28 |   To protect your rights, we need to make restrictions that forbid
 29 | anyone to deny you these rights or to ask you to surrender the rights.
 30 | These restrictions translate to certain responsibilities for you if you
 31 | distribute copies of the software, or if you modify it.
 32 | 
 33 |   For example, if you distribute copies of such a program, whether
 34 | gratis or for a fee, you must give the recipients all the rights that
 35 | you have.  You must make sure that they, too, receive or can get the
 36 | source code.  And you must show them these terms so they know their
 37 | rights.
 38 | 
 39 |   We protect your rights with two steps: (1) copyright the software, and
 40 | (2) offer you this license which gives you legal permission to copy,
 41 | distribute and/or modify the software.
 42 | 
 43 |   Also, for each author's protection and ours, we want to make certain
 44 | that everyone understands that there is no warranty for this free
 45 | software.  If the software is modified by someone else and passed on, we
 46 | want its recipients to know that what they have is not the original, so
 47 | that any problems introduced by others will not reflect on the original
 48 | authors' reputations.
 49 | 
 50 |   Finally, any free program is threatened constantly by software
 51 | patents.  We wish to avoid the danger that redistributors of a free
 52 | program will individually obtain patent licenses, in effect making the
 53 | program proprietary.  To prevent this, we have made it clear that any
 54 | patent must be licensed for everyone's free use or not licensed at all.
 55 | 
 56 |   The precise terms and conditions for copying, distribution and
 57 | modification follow.
 58 | 
 59 |              GNU GENERAL PUBLIC LICENSE
 60 |    TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
 61 | 
 62 |   0. This License applies to any program or other work which contains
 63 | a notice placed by the copyright holder saying it may be distributed
 64 | under the terms of this General Public License.  The "Program", below,
 65 | refers to any such program or work, and a "work based on the Program"
 66 | means either the Program or any derivative work under copyright law:
 67 | that is to say, a work containing the Program or a portion of it,
 68 | either verbatim or with modifications and/or translated into another
 69 | language.  (Hereinafter, translation is included without limitation in
 70 | the term "modification".)  Each licensee is addressed as "you".
 71 | 
 72 | Activities other than copying, distribution and modification are not
 73 | covered by this License; they are outside its scope.  The act of
 74 | running the Program is not restricted, and the output from the Program
 75 | is covered only if its contents constitute a work based on the
 76 | Program (independent of having been made by running the Program).
 77 | Whether that is true depends on what the Program does.
 78 | 
 79 |   1. You may copy and distribute verbatim copies of the Program's
 80 | source code as you receive it, in any medium, provided that you
 81 | conspicuously and appropriately publish on each copy an appropriate
 82 | copyright notice and disclaimer of warranty; keep intact all the
 83 | notices that refer to this License and to the absence of any warranty;
 84 | and give any other recipients of the Program a copy of this License
 85 | along with the Program.
 86 | 
 87 | You may charge a fee for the physical act of transferring a copy, and
 88 | you may at your option offer warranty protection in exchange for a fee.
 89 | 
 90 |   2. You may modify your copy or copies of the Program or any portion
 91 | of it, thus forming a work based on the Program, and copy and
 92 | distribute such modifications or work under the terms of Section 1
 93 | above, provided that you also meet all of these conditions:
 94 | 
 95 |     a) You must cause the modified files to carry prominent notices
 96 |     stating that you changed the files and the date of any change.
 97 | 
 98 |     b) You must cause any work that you distribute or publish, that in
 99 |     whole or in part contains or is derived from the Program or any
100 |     part thereof, to be licensed as a whole at no charge to all third
101 |     parties under the terms of this License.
102 | 
103 |     c) If the modified program normally reads commands interactively
104 |     when run, you must cause it, when started running for such
105 |     interactive use in the most ordinary way, to print or display an
106 |     announcement including an appropriate copyright notice and a
107 |     notice that there is no warranty (or else, saying that you provide
108 |     a warranty) and that users may redistribute the program under
109 |     these conditions, and telling the user how to view a copy of this
110 |     License.  (Exception: if the Program itself is interactive but
111 |     does not normally print such an announcement, your work based on
112 |     the Program is not required to print an announcement.)
113 | 
114 | These requirements apply to the modified work as a whole.  If
115 | identifiable sections of that work are not derived from the Program,
116 | and can be reasonably considered independent and separate works in
117 | themselves, then this License, and its terms, do not apply to those
118 | sections when you distribute them as separate works.  But when you
119 | distribute the same sections as part of a whole which is a work based
120 | on the Program, the distribution of the whole must be on the terms of
121 | this License, whose permissions for other licensees extend to the
122 | entire whole, and thus to each and every part regardless of who wrote it.
123 | 
124 | Thus, it is not the intent of this section to claim rights or contest
125 | your rights to work written entirely by you; rather, the intent is to
126 | exercise the right to control the distribution of derivative or
127 | collective works based on the Program.
128 | 
129 | In addition, mere aggregation of another work not based on the Program
130 | with the Program (or with a work based on the Program) on a volume of
131 | a storage or distribution medium does not bring the other work under
132 | the scope of this License.
133 | 
134 |   3. You may copy and distribute the Program (or a work based on it,
135 | under Section 2) in object code or executable form under the terms of
136 | Sections 1 and 2 above provided that you also do one of the following:
137 | 
138 |     a) Accompany it with the complete corresponding machine-readable
139 |     source code, which must be distributed under the terms of Sections
140 |     1 and 2 above on a medium customarily used for software interchange; or,
141 | 
142 |     b) Accompany it with a written offer, valid for at least three
143 |     years, to give any third party, for a charge no more than your
144 |     cost of physically performing source distribution, a complete
145 |     machine-readable copy of the corresponding source code, to be
146 |     distributed under the terms of Sections 1 and 2 above on a medium
147 |     customarily used for software interchange; or,
148 | 
149 |     c) Accompany it with the information you received as to the offer
150 |     to distribute corresponding source code.  (This alternative is
151 |     allowed only for noncommercial distribution and only if you
152 |     received the program in object code or executable form with such
153 |     an offer, in accord with Subsection b above.)
154 | 
155 | The source code for a work means the preferred form of the work for
156 | making modifications to it.  For an executable work, complete source
157 | code means all the source code for all modules it contains, plus any
158 | associated interface definition files, plus the scripts used to
159 | control compilation and installation of the executable.  However, as a
160 | special exception, the source code distributed need not include
161 | anything that is normally distributed (in either source or binary
162 | form) with the major components (compiler, kernel, and so on) of the
163 | operating system on which the executable runs, unless that component
164 | itself accompanies the executable.
165 | 
166 | If distribution of executable or object code is made by offering
167 | access to copy from a designated place, then offering equivalent
168 | access to copy the source code from the same place counts as
169 | distribution of the source code, even though third parties are not
170 | compelled to copy the source along with the object code.
171 | 
172 |   4. You may not copy, modify, sublicense, or distribute the Program
173 | except as expressly provided under this License.  Any attempt
174 | otherwise to copy, modify, sublicense or distribute the Program is
175 | void, and will automatically terminate your rights under this License.
176 | However, parties who have received copies, or rights, from you under
177 | this License will not have their licenses terminated so long as such
178 | parties remain in full compliance.
179 | 
180 |   5. You are not required to accept this License, since you have not
181 | signed it.  However, nothing else grants you permission to modify or
182 | distribute the Program or its derivative works.  These actions are
183 | prohibited by law if you do not accept this License.  Therefore, by
184 | modifying or distributing the Program (or any work based on the
185 | Program), you indicate your acceptance of this License to do so, and
186 | all its terms and conditions for copying, distributing or modifying
187 | the Program or works based on it.
188 | 
189 |   6. Each time you redistribute the Program (or any work based on the
190 | Program), the recipient automatically receives a license from the
191 | original licensor to copy, distribute or modify the Program subject to
192 | these terms and conditions.  You may not impose any further
193 | restrictions on the recipients' exercise of the rights granted herein.
194 | You are not responsible for enforcing compliance by third parties to
195 | this License.
196 | 
197 |   7. If, as a consequence of a court judgment or allegation of patent
198 | infringement or for any other reason (not limited to patent issues),
199 | conditions are imposed on you (whether by court order, agreement or
200 | otherwise) that contradict the conditions of this License, they do not
201 | excuse you from the conditions of this License.  If you cannot
202 | distribute so as to satisfy simultaneously your obligations under this
203 | License and any other pertinent obligations, then as a consequence you
204 | may not distribute the Program at all.  For example, if a patent
205 | license would not permit royalty-free redistribution of the Program by
206 | all those who receive copies directly or indirectly through you, then
207 | the only way you could satisfy both it and this License would be to
208 | refrain entirely from distribution of the Program.
209 | 
210 | If any portion of this section is held invalid or unenforceable under
211 | any particular circumstance, the balance of the section is intended to
212 | apply and the section as a whole is intended to apply in other
213 | circumstances.
214 | 
215 | It is not the purpose of this section to induce you to infringe any
216 | patents or other property right claims or to contest validity of any
217 | such claims; this section has the sole purpose of protecting the
218 | integrity of the free software distribution system, which is
219 | implemented by public license practices.  Many people have made
220 | generous contributions to the wide range of software distributed
221 | through that system in reliance on consistent application of that
222 | system; it is up to the author/donor to decide if he or she is willing
223 | to distribute software through any other system and a licensee cannot
224 | impose that choice.
225 | 
226 | This section is intended to make thoroughly clear what is believed to
227 | be a consequence of the rest of this License.
228 | 
229 |   8. If the distribution and/or use of the Program is restricted in
230 | certain countries either by patents or by copyrighted interfaces, the
231 | original copyright holder who places the Program under this License
232 | may add an explicit geographical distribution limitation excluding
233 | those countries, so that distribution is permitted only in or among
234 | countries not thus excluded.  In such case, this License incorporates
235 | the limitation as if written in the body of this License.
236 | 
237 |   9. The Free Software Foundation may publish revised and/or new versions
238 | of the General Public License from time to time.  Such new versions will
239 | be similar in spirit to the present version, but may differ in detail to
240 | address new problems or concerns.
241 | 
242 | Each version is given a distinguishing version number.  If the Program
243 | specifies a version number of this License which applies to it and "any
244 | later version", you have the option of following the terms and conditions
245 | either of that version or of any later version published by the Free
246 | Software Foundation.  If the Program does not specify a version number of
247 | this License, you may choose any version ever published by the Free Software
248 | Foundation.
249 | 
250 |   10. If you wish to incorporate parts of the Program into other free
251 | programs whose distribution conditions are different, write to the author
252 | to ask for permission.  For software which is copyrighted by the Free
253 | Software Foundation, write to the Free Software Foundation; we sometimes
254 | make exceptions for this.  Our decision will be guided by the two goals
255 | of preserving the free status of all derivatives of our free software and
256 | of promoting the sharing and reuse of software generally.
257 | 
258 |                      NO WARRANTY
259 | 
260 |   11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
268 | REPAIR OR CORRECTION.
269 | 
270 |   12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
278 | POSSIBILITY OF SUCH DAMAGES.
279 | 
280 |               END OF TERMS AND CONDITIONS
281 | 
282 |      How to Apply These Terms to Your New Programs
283 | 
284 |   If you develop a new program, and you want it to be of the greatest
285 | possible use to the public, the best way to achieve this is to make it
286 | free software which everyone can redistribute and change under these terms.
287 | 
288 |   To do so, attach the following notices to the program.  It is safest
289 | to attach them to the start of each source file to most effectively
290 | convey the exclusion of warranty; and each file should have at least
291 | the "copyright" line and a pointer to where the full notice is found.
292 | 
293 |     <one line to give the program's name and a brief idea of what it does.>
294 |     Copyright (C) <year>  <name of author>
295 | 
296 |     This program is free software; you can redistribute it and/or modify
297 |     it under the terms of the GNU General Public License as published by
298 |     the Free Software Foundation; either version 2 of the License, or
299 |     (at your option) any later version.
300 | 
301 |     This program is distributed in the hope that it will be useful,
302 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
303 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
304 |     GNU General Public License for more details.
305 | 
306 |     You should have received a copy of the GNU General Public License along
307 |     with this program; if not, write to the Free Software Foundation, Inc.,
308 |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
309 | 
310 | Also add information on how to contact you by electronic and paper mail.
311 | 
312 | If the program is interactive, make it output a short notice like this
313 | when it starts in an interactive mode:
314 | 
315 |     Gnomovision version 69, Copyright (C) year name of author
316 |     Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
317 |     This is free software, and you are welcome to redistribute it
318 |     under certain conditions; type `show c' for details.
319 | 
320 | The hypothetical commands `show w' and `show c' should show the appropriate
321 | parts of the General Public License.  Of course, the commands you use may
322 | be called something other than `show w' and `show c'; they could even be
323 | mouse-clicks or menu items--whatever suits your program.
324 | 
325 | You should also get your employer (if you work as a programmer) or your
326 | school, if any, to sign a "copyright disclaimer" for the program, if
327 | necessary.  Here is a sample; alter the names:
328 | 
329 |   Yoyodyne, Inc., hereby disclaims all copyright interest in the program
330 |   `Gnomovision' (which makes passes at compilers) written by James Hacker.
331 | 
332 |   <signature of Ty Coon>, 1 April 1989
333 |   Ty Coon, President of Vice
334 | 
335 | This General Public License does not permit incorporating your program into
336 | proprietary programs.  If your program is a subroutine library, you may
337 | consider it more useful to permit linking proprietary applications with the
338 | library.  If this is what you want to do, use the GNU Lesser General
339 | Public License instead of this License.
340 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | CGrep: a context-aware grep for source codes
 2 | ============================================
 3 | 
 4 | [![Hackage](https://img.shields.io/hackage/v/cgrep.svg?style=flat)](https://hackage.haskell.org/package/cgrep)
 5 | [![Join the chat at https://gitter.im/awgn/cgrep](https://badges.gitter.im/awgn/cgrep.svg)](https://gitter.im/awgn/cgrep?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
 6 | 
 7 | Usage
 8 | -----
 9 | 
10 | ```
11 | Cgrep 8.1.3. Usage: cgrep [OPTION] [PATTERN] files...
12 | 
13 | cgrep [OPTIONS] [ITEM]
14 | 
15 | Pattern:
16 |   -f --file=FILE          Read PATTERNs from file (one per line)
17 |   -w --word               Force word matching
18 |   -p --prefix             Force prefix matching
19 |   -s --suffix             Force suffix matching
20 |   -e --edit               Use edit distance
21 |   -G --regex              Use regex matching (posix)
22 |   -P --pcre               Use regex matching (pcre)
23 |   -i --ignore-case        Ignore case distinctions
24 | 
25 | Context filters:
26 |   -c --code               Enable search in source code
27 |   -m --comment            Enable search in comments
28 |   -l --literal            Enable search in string literals
29 | 
30 | Token filters:
31 |      --name --identifier  Identifiers
32 |      --type --native      Native Types
33 |      --keyword            Keywords
34 |      --number             Literal numbers
35 |      --string             Literal strings
36 |      --op                 Operators
37 | 
38 | File filters:
39 |   -t --type-filter=ITEM   Specify file types. ie: Cpp, +Haskell, -Makefile
40 |   -k --kind-filter=ITEM   Specify file kinds. Text, Config, Language, Data,
41 |                           Markup or Script
42 |      --code-only          Parse code modules only (skip headers/interfaces)
43 |      --hdr-only           Parse headers/interfaces only (skip modules)
44 |   -T --skip-test          Skip files that have 'test' in the name
45 |      --prune-dir=ITEM     Do not descend into dir
46 |   -r --recursive          Enable recursive search (don't follow symlinks)
47 |   -L --follow             Follow symlinks
48 | 
49 | Semantic:
50 |   -S --semantic           "code" pattern: _, _1, _2... (identifiers), $, $1,
51 |                           $2... (optionals), ANY, KEY, STR, LIT, NUM, HEX, OCT,
52 |                           OR
53 | 
54 | Control:
55 |      --max-count=INT      Stop search in files after INT matches
56 |      --force-type=ITEM    Force the type of file
57 |      --type-list          List the supported file types
58 |   -v --invert-match       Select non-matching lines
59 |      --multiline=INT      Enable multi-line matching
60 |   -j --threads=INT        Number threads to run in parallel
61 | 
62 | Output format:
63 |      --show-match         Show list of matching tokens
64 |      --color              Use colors to highlight the match strings
65 |      --no-color           Do not use colors (override config file)
66 |   -h --no-filename        Suppress the file name prefix on output
67 |      --no-numbers         Suppress both line and column numbers on output
68 |      --no-column          Suppress the column number on output
69 |      --count              Print only a count of matching lines per file
70 |      --filename-only      Print only the name of files containing matches
71 |      --json               Format output as json object
72 |      --vim                Run vim editor passing the files that match
73 |      --editor             Run the editor specified by EDITOR var., passing
74 |                           the files that match
75 |      --fileline           When edit option is specified, pass the list of
76 |                           matching files in file:line format (e.g. vim
77 |                           'file-line' plugin)
78 | 
79 | Miscellaneous:
80 |      --verbose=INT        Verbose level: 1, 2 or 3
81 |      --no-shallow         Disable shallow-search
82 |      --palette            Show color palette
83 |   -? --help               Display help message
84 |   -V --version            Print version information
85 |      --numeric-version    Print just the version number
86 | ```
87 | 


--------------------------------------------------------------------------------
/Setup.hs:
--------------------------------------------------------------------------------
1 | import Distribution.Simple
2 | main = defaultMain
3 | 


--------------------------------------------------------------------------------
/cgrep.cabal:
--------------------------------------------------------------------------------
  1 | Cabal-version:       2.2
  2 | Name:                cgrep
  3 | Description:         Cgrep: a context-aware grep for source codes
  4 | Version:             8.1.3
  5 | Synopsis:            Command line tool
  6 | Homepage:            http://awgn.github.io/cgrep/
  7 | License:             GPL-2.0-or-later
  8 | License-file:        LICENSE
  9 | Author:              Nicola Bonelli
 10 | Maintainer:          Nicola Bonelli <nicola@larthia.com>
 11 | Category:            Utils
 12 | Build-type:          Simple
 13 | Stability:           Experimental
 14 | Extra-source-files:  README.md
 15 | 
 16 | Common common-options
 17 |   build-depends:       base ^>= 4.15.0.0
 18 | 
 19 |   ghc-options:         -Wall
 20 |                        -Wcompat
 21 |                        -Widentities
 22 |                        -Wincomplete-uni-patterns
 23 |                        -Wincomplete-record-updates
 24 |   if impl(ghc >= 8.0)
 25 |     ghc-options:       -Wredundant-constraints
 26 |   if impl(ghc >= 8.2)
 27 |     ghc-options:       -fhide-source-paths
 28 |   if impl(ghc >= 8.4)
 29 |     ghc-options:       -Wmissing-export-lists
 30 |                        -Wpartial-fields
 31 |   if impl(ghc >= 8.8)
 32 |     ghc-options:       -Wmissing-deriving-strategies
 33 | 
 34 |   default-language:    Haskell2010
 35 | 
 36 | Executable cgrep
 37 |   Main-Is:             Main.hs
 38 |   Hs-Source-Dirs:      src
 39 |   Default-Extensions: FlexibleContexts
 40 |                       FlexibleInstances
 41 |                       GeneralisedNewtypeDeriving
 42 |                       DerivingStrategies
 43 |                       MultiWayIf
 44 |                       LambdaCase
 45 |                       OverloadedLists
 46 |                       OverloadedRecordDot
 47 |                       OverloadedStrings
 48 |                       PatternSynonyms
 49 |                       RecordWildCards
 50 |                       ScopedTypeVariables
 51 |                       TupleSections
 52 |                       TypeApplications
 53 |                       UnboxedSums
 54 |                       UnboxedTuples
 55 |                       ViewPatterns
 56 |                       BangPatterns
 57 |                       MagicHash
 58 | 
 59 |   Other-Modules:       Options
 60 |                        Verbose
 61 |                        CmdOptions
 62 |                        Util
 63 |                        Config
 64 |                        Reader
 65 |                        Search
 66 |                        CGrep.FileType
 67 |                        CGrep.FileKind
 68 |                        CGrep.FileTypeMap
 69 |                        CGrep.ContextFilter
 70 |                        CGrep.Types
 71 |                        CGrep.Output
 72 |                        CGrep.Distance
 73 |                        CGrep.Search
 74 |                        CGrep.Common
 75 |                        CGrep.Boundary
 76 |                        CGrep.Parser.Char
 77 |                        CGrep.Parser.Chunk
 78 |                        CGrep.Parser.Token
 79 |                        CGrep.Parser.Atom
 80 |                        CGrep.Parser.Line
 81 |                        CGrep.Strategy.Semantic
 82 |                        CGrep.Strategy.Tokenizer
 83 |                        CGrep.Strategy.Levenshtein
 84 |                        CGrep.Strategy.BoyerMoore
 85 |                        CGrep.Strategy.Regex
 86 |                        Paths_cgrep
 87 |   Autogen-modules:     Paths_cgrep
 88 | 
 89 |   Build-Depends:       base < 5.0,
 90 |                        cmdargs,
 91 |                        bytestring,
 92 |                        directory,
 93 |                        filepath,
 94 |                        stm,
 95 |                        containers,
 96 |                        vector,
 97 |                        array,
 98 |                        ghc-prim,
 99 |                        dlist,
100 |                        ansi-terminal,
101 |                        split,
102 |                        safe,
103 |                        stringsearch,
104 |                        unordered-containers,
105 |                        regex-base,
106 |                        regex-posix,
107 |                        regex-pcre,
108 |                        either,
109 |                        mtl,
110 |                        unix-compat,
111 |                        async,
112 |                        utf8-string,
113 |                        unicode-show,
114 |                        transformers,
115 |                        process,
116 |                        aeson,
117 |                        yaml,
118 |                        exceptions,
119 |                        mono-traversable,
120 |                        bytestring-strict-builder,
121 |                        bitwise,
122 |                        mmap,
123 |                        unagi-chan,
124 |                        posix-paths,
125 |                        rawfilepath,
126 |                        monad-loops,
127 |                        deepseq,
128 |                        bitarray,
129 |                        text,
130 |                        extra
131 | 
132 |   Ghc-options:  -O2 -optc-O3
133 |                 -funbox-strict-fields
134 |                 -fwrite-ide-info
135 |                 -hiedir=.hie
136 |                 -threaded
137 |                 -rtsopts "-with-rtsopts=-N -H1g -qn2"
138 | 
139 |   Default-language:    Haskell2010
140 | 


--------------------------------------------------------------------------------
/cgreprc:
--------------------------------------------------------------------------------
 1 | #
 2 | # Cgrep config file
 3 | #
 4 | 
 5 | file_types:   [ Agda , Assembly , Awk , Bash, C , CMake , Cabal , Chapel , Clojure , Coffee , Conf , Cpp  , Csh, Csharp , Css ,
 6 |                 D , Dart , Dhall, Elm , Elixir , Erlang , Fish, Fortran , Fsharp , Go , GoMod, Haskell , Html , Idris , Java , Javascript , Json , Ksh, Kotlin ,
 7 |                 Latex , Lisp , Lua , Make , Nmap , OCaml , ObjectiveC , PHP , Perl , Python , R , Ruby , Rust , Scala , SmallTalk , Shell , Swift , Tcl,
 8 |                     Text , Unison, VHDL , Verilog , Yaml, Toml, Ini, Zig, Zsh ]
 9 | 
10 | prune_dirs       : [".svn", ".git", "CMakeFiles", ".stack-work" ]
11 | 
12 | colors           : True
13 | color_filename   : "2:3:5"
14 | color_match      : "5:0:1"
15 | 
16 | threads: 12
17 | 
18 | #
19 | # file_line option: enable edit for file:line
20 | # vim: see 'file-line' plugin
21 | #
22 | #
23 | # file_line : True
24 | #
25 | 


--------------------------------------------------------------------------------
/script/profile.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | rm cgrep.prof
3 | stack build --profile -v
4 | echo "running: cgrep ${@}..." 
5 | stack exec --profile -- cgrep $@ +RTS -P | wc -l
6 | profiteur cgrep.prof
7 | open cgrep.prof.html
8 | cat cgrep.prof
9 | 


--------------------------------------------------------------------------------
/src/CGrep/Boundary.hs:
--------------------------------------------------------------------------------
 1 | --
 2 | -- Copyright (c) 2013-2023 Nicola Bonelli <nicola@larthia.com>
 3 | --
 4 | -- This program is free software; you can redistribute it and/or modify
 5 | -- it under the terms of the GNU General Public License as published by
 6 | -- the Free Software Foundation; either version 2 of the License, or
 7 | -- (at your option) any later version.
 8 | --
 9 | -- This program is distributed in the hope that it will be useful,
10 | -- but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 | -- GNU General Public License for more details.
13 | --
14 | -- You should have received a copy of the GNU General Public License
15 | -- along with this program; if not, write to the Free Software
16 | -- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 | --
18 | 
19 | module CGrep.Boundary (
20 |     Boundary (..),
21 |     BoundaryType (..),
22 |     pattern Begin,
23 |     pattern End,
24 | ) where
25 | 
26 | import qualified Data.ByteString.Char8 as C
27 | import Data.Word (Word8)
28 | 
29 | data Boundary = Boundary
30 |     { bBegin :: C.ByteString
31 |     , bEnd :: C.ByteString
32 |     }
33 |     deriving stock (Show, Eq)
34 | 
35 | newtype BoundaryType = BoundaryType {unpackBoundaryType :: Word8}
36 |     deriving newtype (Eq, Ord)
37 | 
38 | instance Show BoundaryType where
39 |     show Begin = "begin"
40 |     show End = "end"
41 | 
42 | pattern Begin :: BoundaryType
43 | pattern Begin = BoundaryType 0
44 | pattern End :: BoundaryType
45 | pattern End = BoundaryType 1
46 | 
47 | {-# COMPLETE Begin, End #-}


--------------------------------------------------------------------------------
/src/CGrep/Common.hs:
--------------------------------------------------------------------------------
 1 | --
 2 | -- Copyright (c) 2013-2023 Nicola Bonelli <nicola@larthia.com>
 3 | --
 4 | -- This program is free software; you can redistribute it and/or modify
 5 | -- it under the terms of the GNU General Public License as published by
 6 | -- the Free Software Foundation; either version 2 of the License, or
 7 | -- (at your option) any later version.
 8 | --
 9 | -- This program is distributed in the hope that it will be useful,
10 | -- but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 | -- GNU General Public License for more details.
13 | --
14 | -- You should have received a copy of the GNU General Public License
15 | -- along with this program; if not, write to the Free Software
16 | -- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 | --
18 | 
19 | module CGrep.Common (
20 |     Text8,
21 |     getTargetName,
22 |     getTargetContents,
23 |     expandMultiline,
24 |     ignoreCase,
25 |     subText,
26 |     trim,
27 |     trim8,
28 |     takeN,
29 | ) where
30 | 
31 | import CGrep.Parser.Char (isSpace)
32 | import CGrep.Types (Offset, Text8)
33 | import Data.Char (toLower)
34 | 
35 | import Options (
36 |     Options (Options, ignore_case, multiline, no_shallow),
37 |  )
38 | 
39 | import Data.Int (Int64)
40 | import System.IO.MMap (mmapFileByteString)
41 | import Util (spanGroup)
42 | 
43 | import Data.List (group, groupBy, sort, sortOn)
44 | import qualified Data.Vector.Unboxed as UV
45 | import System.Posix.FilePath (RawFilePath)
46 | 
47 | import GHC.Exts (groupWith)
48 | 
49 | import qualified Data.ByteString.Char8 as C
50 | 
51 | takeN :: Int -> String -> String
52 | takeN n xs
53 |     | length xs > n = take n xs <> "..."
54 |     | otherwise = xs
55 | {-# INLINE takeN #-}
56 | 
57 | trim :: String -> String
58 | trim = (dropWhile isSpace . reverse) . dropWhile isSpace . reverse
59 | {-# INLINE trim #-}
60 | 
61 | trim8 :: Text8 -> Text8
62 | trim8 = (C.dropWhile isSpace . C.reverse) . C.dropWhile isSpace . C.reverse
63 | {-# INLINE trim8 #-}
64 | 
65 | getTargetName :: RawFilePath -> RawFilePath
66 | getTargetName (C.null -> True) = "<STDIN>"
67 | getTargetName name = name
68 | {-# INLINE getTargetName #-}
69 | 
70 | getTargetContents :: RawFilePath -> IO Text8
71 | getTargetContents (C.null -> True) = C.getContents
72 | getTargetContents xs = mmapFileByteString (C.unpack xs) Nothing
73 | {-# INLINE getTargetContents #-}
74 | 
75 | expandMultiline :: Options -> Text8 -> Text8
76 | expandMultiline Options{multiline = n} xs
77 |     | n == 1 = xs
78 |     | otherwise = C.unlines $ map C.unwords $ spanGroup n (C.lines xs)
79 | {-# INLINE expandMultiline #-}
80 | 
81 | ignoreCase :: Options -> Text8 -> Text8
82 | ignoreCase opt
83 |     | ignore_case opt = C.map toLower
84 |     | otherwise = id
85 | {-# INLINE ignoreCase #-}
86 | 
87 | subText :: [[Offset]] -> Text8 -> Text8
88 | subText [] txt = txt
89 | subText indices txt = case C.elemIndex '\n' (C.drop maxOff txt) of
90 |     Nothing -> txt
91 |     (Just n) -> C.take (maxOff + n) txt
92 |   where
93 |     maxOff = fromIntegral $ maximum (lastDef 0 <$> indices)
94 |     lastDef def xs = if null xs then def else last xs
95 | {-# INLINE subText #-}
96 | 


--------------------------------------------------------------------------------
/src/CGrep/ContextFilter.hs:
--------------------------------------------------------------------------------
  1 | --
  2 | -- Copyright (c) 2013-2023 Nicola Bonelli <nicola@larthia.com>
  3 | --
  4 | -- This program is free software; you can redistribute it and/or modify
  5 | -- it under the terms of the GNU General Public License as published by
  6 | -- the Free Software Foundation; either version 2 of the License, or
  7 | -- (at your option) any later version.
  8 | --
  9 | -- This program is distributed in the hope that it will be useful,
 10 | -- but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 | -- GNU General Public License for more details.
 13 | --
 14 | -- You should have received a copy of the GNU General Public License
 15 | -- along with this program; if not, write to the Free Software
 16 | -- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 17 | --
 18 | 
 19 | module CGrep.ContextFilter where
 20 | 
 21 | import CGrep.Parser.Char (chr, isSpace, ord)
 22 | import CGrep.Types (Text8)
 23 | 
 24 | import qualified Data.ByteString.Char8 as C
 25 | import qualified Data.ByteString.Short as B
 26 | 
 27 | import qualified Data.Map as Map
 28 | 
 29 | import CGrep.Boundary (Boundary (..))
 30 | import qualified Data.Aeson.KeyMap as B
 31 | import Data.List (find, findIndex, nub)
 32 | import Data.Maybe (fromMaybe, isJust)
 33 | import Options (Options (..))
 34 | 
 35 | import Data.Bits (Bits (complement, shiftL, shiftR, xor, (.&.), (.|.)))
 36 | import Data.Int (Int32, Int64)
 37 | 
 38 | import qualified Data.Vector as V
 39 | import qualified Data.Vector.Unboxed as UV
 40 | 
 41 | import qualified Data.ByteString.Unsafe as U
 42 | import Data.HashMap.Internal.Strict (alter)
 43 | import Data.Word (Word64)
 44 | 
 45 | import Util (findWithIndex)
 46 | 
 47 | type FilterFunction = ContextFilter -> Text8 -> Text8
 48 | 
 49 | data Context = Code | Comment | Literal
 50 |     deriving stock (Eq, Show)
 51 | 
 52 | newtype ContextBit = ContextBit Int32
 53 |     deriving stock (Show)
 54 |     deriving newtype (Eq, Bits)
 55 | 
 56 | contextBitEmpty :: ContextBit
 57 | contextBitEmpty = ContextBit 0
 58 | contextBitCode :: ContextBit
 59 | contextBitCode = ContextBit 0x1
 60 | contextBitComment :: ContextBit
 61 | contextBitComment = ContextBit 0x2
 62 | contextBitLiteral :: ContextBit
 63 | contextBitLiteral = ContextBit 0x4
 64 | 
 65 | (~=) :: ContextBit -> Bool -> ContextBit
 66 | b ~= True = b
 67 | _ ~= False = contextBitEmpty
 68 | {-# INLINE (~=) #-}
 69 | 
 70 | (~?) :: ContextFilter -> ContextBit -> Bool
 71 | f ~? b = (unFilter f .&. b) /= contextBitEmpty
 72 | {-# INLINE (~?) #-}
 73 | 
 74 | (~!) :: ContextFilter -> ContextBit -> ContextFilter
 75 | a ~! b = ContextFilter $ unFilter a .&. complement b
 76 | {-# INLINE (~!) #-}
 77 | 
 78 | newtype ContextFilter = ContextFilter {unFilter :: ContextBit}
 79 |     deriving stock (Show)
 80 |     deriving newtype (Eq, Bits)
 81 | 
 82 | contextFilterAll :: ContextFilter
 83 | contextFilterAll = ContextFilter (contextBitCode .|. contextBitComment .|. contextBitLiteral)
 84 | {-# NOINLINE contextFilterAll #-}
 85 | 
 86 | isContextFilterAll :: ContextFilter -> Bool
 87 | isContextFilterAll f = f == contextFilterAll
 88 | {-# INLINE isContextFilterAll #-}
 89 | 
 90 | codeFilter :: ContextFilter -> Bool
 91 | codeFilter f = (unFilter f .&. contextBitCode) /= contextBitEmpty
 92 | {-# INLINE codeFilter #-}
 93 | 
 94 | commentFilter :: ContextFilter -> Bool
 95 | commentFilter f = (unFilter f .&. contextBitComment) /= contextBitEmpty
 96 | {-# INLINE commentFilter #-}
 97 | 
 98 | literalFilter :: ContextFilter -> Bool
 99 | literalFilter f = (unFilter f .&. contextBitLiteral) /= contextBitEmpty
100 | {-# INLINE literalFilter #-}
101 | 
102 | data ParConfig = ParConfig
103 |     { commBound :: [Boundary]
104 |     , litrBound :: [Boundary]
105 |     , rawBound :: [Boundary]
106 |     , chrBound :: [Boundary]
107 |     , inits :: B.ShortByteString
108 |     , alterBoundary :: Bool
109 |     }
110 | 
111 | mkParConfig :: [Boundary] -> [Boundary] -> [Boundary] -> [Boundary] -> Bool -> ParConfig
112 | mkParConfig cs ls rs chs ab =
113 |     ParConfig
114 |         { commBound = cs
115 |         , litrBound = ls
116 |         , rawBound = rs
117 |         , chrBound = chs
118 |         , inits =
119 |             (B.pack . nub)
120 |                 ( (fromIntegral . ord . C.head . bBegin <$> cs)
121 |                     <> (fromIntegral . ord . C.head . bBegin <$> ls)
122 |                     <> (fromIntegral . ord . C.head . bBegin <$> rs)
123 |                     <> (fromIntegral . ord . C.head . bBegin <$> chs)
124 |                 )
125 |         , alterBoundary = ab
126 |         }
127 | 
128 | data ParState = ParState
129 |     { ctxState :: !ContextState
130 |     , nextState :: !ContextState
131 |     , display :: !Bool
132 |     , skip :: {-# UNPACK #-} !Int
133 |     }
134 |     deriving stock (Show)
135 | 
136 | data ContextState
137 |     = CodeState1
138 |     | CodeStateN
139 |     | CommState1 {-# UNPACK #-} !Int
140 |     | CommStateN {-# UNPACK #-} !Int
141 |     | ChrState {-# UNPACK #-} !Int
142 |     | LitrState1 {-# UNPACK #-} !Int
143 |     | LitrStateN {-# UNPACK #-} !Int
144 |     | RawState {-# UNPACK #-} !Int
145 |     deriving stock (Show, Eq, Ord)
146 | 
147 | mkContextFilter :: Options -> ContextFilter
148 | mkContextFilter Options{..} =
149 |     if not (code || comment || literal)
150 |         then contextFilterAll
151 |         else ContextFilter $ contextBitCode ~= code .|. contextBitComment ~= comment .|. contextBitLiteral ~= literal
152 | 
153 | unpackBoundary :: Boundary -> (String, String)
154 | unpackBoundary (Boundary a b) = (C.unpack a, C.unpack b)
155 | {-# INLINE unpackBoundary #-}
156 | 
157 | getContext :: ContextState -> Context
158 | getContext CodeState1 = Code
159 | getContext CodeStateN = Code
160 | getContext (CommState1 _) = Comment
161 | getContext (CommStateN _) = Comment
162 | getContext (LitrState1 _) = Literal
163 | getContext (LitrStateN _) = Literal
164 | getContext (RawState _) = Literal
165 | getContext (ChrState _) = Literal
166 | {-# INLINE getContext #-}
167 | 
168 | -- contextFilterFun:
169 | --
170 | 
171 | data ParData = ParData
172 |     { pdText :: {-# UNPACK #-} !Text8
173 |     , pdState :: !ParState
174 |     }
175 | 
176 | runContextFilter :: ParConfig -> ContextFilter -> Text8 -> Text8
177 | runContextFilter conf@ParConfig{..} f txt
178 |     | alterBoundary = fst $ C.unfoldrN (C.length txt) (contextFilter' conf) (ParData txt (ParState CodeState1 CodeState1 (codeFilter f) 0))
179 |     | otherwise = fst $ C.unfoldrN (C.length txt) (contextFilter'' conf) (ParData txt (ParState CodeState1 CodeState1 (codeFilter f) 0))
180 |   where
181 |     contextFilter' :: ParConfig -> ParData -> Maybe (Char, ParData)
182 |     contextFilter' c (ParData txt@(C.uncons -> Just (x, xs)) s) =
183 |         let !s' = nextContextState c s txt f
184 |          in if display s'
185 |                 then case (# getContext (ctxState s), getContext (ctxState s') #) of
186 |                     (# Code, Literal #) -> Just (chr 2, ParData xs s')
187 |                     (# Literal, Code #) -> Just (chr 3, ParData xs s')
188 |                     _ -> Just (x, ParData xs s')
189 |                 else
190 |                     if isSpace x
191 |                         then Just (x, ParData xs s')
192 |                         else Just (' ', ParData xs s')
193 |     contextFilter' _ (ParData (C.uncons -> Nothing) _) = Nothing
194 | 
195 |     contextFilter'' :: ParConfig -> ParData -> Maybe (Char, ParData)
196 |     contextFilter'' c (ParData txt@(C.uncons -> Just (x, xs)) s) =
197 |         let !s' = nextContextState c s txt f
198 |          in if display s' || isSpace x
199 |                 then Just (x, ParData xs s')
200 |                 else Just (' ', ParData xs s')
201 | 
202 | {-# INLINE nextContextState #-}
203 | nextContextState :: ParConfig -> ParState -> Text8 -> ContextFilter -> ParState
204 | nextContextState c s@ParState{..} txt f
205 |     | skip > 0 = {-# SCC skip #-} transState s{skip = skip - 1}
206 |     | CodeState1 <- ctxState =
207 |         {-# SCC next_code1 #-}
208 |         if U.unsafeHead txt `B.elem` inits c
209 |             then case findPrefixBoundary txt (commBound c) of
210 |                 (# i, Just b #) -> {-# SCC next_code1_1 #-} transState s{nextState = CommState1 i, display = commentFilter f, skip = C.length (bBegin b) - 1}
211 |                 _ -> case findPrefixBoundary txt (litrBound c) of
212 |                     (# i, Just b #) -> {-# SCC next_code1_2 #-} transState s{nextState = LitrState1 i, display = codeFilter f, skip = C.length (bBegin b) - 1}
213 |                     _ -> case findPrefixBoundary txt (rawBound c) of
214 |                         (# i, Just b #) -> {-# SCC next_code1_3 #-} transState s{nextState = RawState i, display = codeFilter f, skip = C.length (bBegin b) - 1}
215 |                         _ -> case findPrefixBoundary' txt (chrBound c) of
216 |                             (# i, Just b #) -> transState s{nextState = ChrState i, display = codeFilter f, skip = C.length (bBegin b) - 1}
217 |                             _ -> {-# SCC next_code1_5 #-} s{ctxState = CodeStateN, nextState = CodeStateN, display = codeFilter f, skip = 0}
218 |             else {-# SCC next_code1_0 #-} s{ctxState = CodeStateN, nextState = CodeStateN, display = codeFilter f, skip = 0}
219 |     | CodeStateN <- ctxState =
220 |         {-# SCC next_code #-}
221 |         if {-# SCC next_code_if #-} U.unsafeHead txt `B.elem` inits c
222 |             then
223 |                 {-# SCC next_code_then #-}
224 |                 case findPrefixBoundary txt (commBound c) of
225 |                     (# i, Just b #) -> {-# SCC next_code1_1 #-} transState s{nextState = CommState1 i, display = commentFilter f, skip = C.length (bBegin b) - 1}
226 |                     _ -> case findPrefixBoundary txt (litrBound c) of
227 |                         (# i, Just b #) -> {-# SCC next_code1_2 #-} transState s{nextState = LitrState1 i, display = codeFilter f, skip = C.length (bBegin b) - 1}
228 |                         _ -> case findPrefixBoundary txt (rawBound c) of
229 |                             (# i, Just b #) -> {-# SCC next_code1_3 #-} transState s{nextState = RawState i, display = codeFilter f, skip = C.length (bBegin b) - 1}
230 |                             _ -> case findPrefixBoundary' txt (chrBound c) of
231 |                                 (# i, Just b #) -> transState s{nextState = ChrState i, display = codeFilter f, skip = C.length (bBegin b) - 1}
232 |                                 _ -> {-# SCC next_code_5 #-} s
233 |             else {-# SCC next_code_else #-} s
234 |     | CommState1 n <- ctxState =
235 |         let Boundary _ e = commBound c !! n
236 |          in {-# SCC next_comm1 #-}
237 |             if e `C.isPrefixOf` txt
238 |                 then transState $ s{nextState = CodeState1, display = commentFilter f, skip = C.length e - 1}
239 |                 else s{ctxState = CommStateN n, nextState = CommStateN n, display = commentFilter f, skip = 0}
240 |     | CommStateN n <- ctxState =
241 |         let Boundary _ e = commBound c !! n
242 |          in {-# SCC next_comm #-}
243 |             if e `C.isPrefixOf` txt
244 |                 then transState $ s{nextState = CodeState1, display = commentFilter f, skip = C.length e - 1}
245 |                 else s
246 |     | LitrState1 n <- ctxState =
247 |         if C.head txt == '\\'
248 |             then s{display = displayContext ctxState f, skip = 1}
249 |             else
250 |                 let Boundary _ e = litrBound c !! n
251 |                  in {-# SCC next_liter #-}
252 |                     if e `C.isPrefixOf` txt
253 |                         then s{ctxState = CodeState1, nextState = CodeState1, display = codeFilter f, skip = C.length e - 1}
254 |                         else s{ctxState = LitrStateN n, nextState = LitrStateN n, display = literalFilter f, skip = 0}
255 |     | LitrStateN n <- ctxState =
256 |         if C.head txt == '\\'
257 |             then s{display = displayContext ctxState f, skip = 1}
258 |             else
259 |                 let Boundary _ e = litrBound c !! n
260 |                  in {-# SCC next_liter #-}
261 |                     if e `C.isPrefixOf` txt
262 |                         then s{ctxState = CodeState1, nextState = CodeState1, display = codeFilter f, skip = C.length e - 1}
263 |                         else s
264 |     | ChrState n <- ctxState =
265 |         if C.head txt == '\\'
266 |             then s{display = displayContext ctxState f, skip = 1}
267 |             else
268 |                 let Boundary _ e = chrBound c !! n
269 |                  in {-# SCC next_chr #-}
270 |                     if e `C.isPrefixOf` txt
271 |                         then s{ctxState = CodeState1, nextState = CodeState1, display = codeFilter f, skip = C.length e - 1}
272 |                         else s{display = literalFilter f, skip = 0}
273 |     | RawState n <- ctxState =
274 |         let Boundary _ e = rawBound c !! n
275 |          in {-# SCC next_raw #-}
276 |             if e `C.isPrefixOf` txt
277 |                 then s{ctxState = CodeState1, nextState = CodeState1, display = codeFilter f, skip = C.length e - 1}
278 |                 else s{display = literalFilter f, skip = 0}
279 | 
280 | displayContext :: ContextState -> ContextFilter -> Bool
281 | displayContext CodeState1 cf = cf ~? contextBitCode
282 | displayContext CodeStateN cf = cf ~? contextBitCode
283 | displayContext (CommState1 _) cf = cf ~? contextBitComment
284 | displayContext (CommStateN _) cf = cf ~? contextBitComment
285 | displayContext (LitrState1 _) cf = cf ~? contextBitLiteral
286 | displayContext (LitrStateN _) cf = cf ~? contextBitLiteral
287 | displayContext (RawState _) cf = cf ~? contextBitLiteral
288 | displayContext (ChrState _) cf = cf ~? contextBitLiteral
289 | {-# INLINE displayContext #-}
290 | 
291 | transState :: ParState -> ParState
292 | transState s@ParState{..}
293 |     | skip == 0 = s{ctxState = nextState}
294 |     | otherwise = s
295 | {-# INLINE transState #-}
296 | 
297 | findPrefixBoundary :: Text8 -> [Boundary] -> (# Int, Maybe Boundary #)
298 | findPrefixBoundary xs vb =
299 |     {-# SCC findPrefixBoundary #-}
300 |     findWithIndex (\(Boundary b _) -> b `C.isPrefixOf` xs) vb
301 | {-# INLINE findPrefixBoundary #-}
302 | 
303 | findPrefixBoundary' :: Text8 -> [Boundary] -> (# Int, Maybe Boundary #)
304 | findPrefixBoundary' txt bs =
305 |     case findWithIndex (\(Boundary beg _) -> beg `C.isPrefixOf` txt) bs of
306 |         elm@(# idx, Just b@(Boundary _ end) #) -> case C.tail txt of
307 |             (C.uncons -> Just (y, ys)) ->
308 |                 let skip = if y == '\\' then 1 else 0
309 |                  in if end `C.isPrefixOf` C.drop skip ys then elm else (# 0, Nothing #)
310 |             _ -> (# 0, Nothing #)
311 |         _ -> (# 0, Nothing #)
312 | 


--------------------------------------------------------------------------------
/src/CGrep/Distance.hs:
--------------------------------------------------------------------------------
 1 | --
 2 | -- Copyright (c) 2013-2023 Nicola Bonelli <nicola@larthia.com>
 3 | --
 4 | -- This program is free software; you can redistribute it and/or modify
 5 | -- it under the terms of the GNU General Public License as published by
 6 | -- the Free Software Foundation; either version 2 of the License, or
 7 | -- (at your option) any later version.
 8 | --
 9 | -- This program is distributed in the hope that it will be useful,
10 | -- but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 | -- GNU General Public License for more details.
13 | --
14 | -- You should have received a copy of the GNU General Public License
15 | -- along with this program; if not, write to the Free Software
16 | -- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 | --
18 | 
19 | module CGrep.Distance (distance, (~==)) where
20 | 
21 | -- from http://www.haskell.org/haskellwiki/Edit_distance
22 | --
23 | 
24 | distance :: (Eq a) => [a] -> [a] -> Int
25 | distance a b =
26 |     last
27 |         ( if lab == 0
28 |             then mainDiag
29 |             else
30 |                 if lab > 0
31 |                     then lowers !! (lab - 1)
32 |                     else {- < 0 -} uppers !! (-1 - lab)
33 |         )
34 |   where
35 |     mainDiag = oneDiag a b (head uppers) (-1 : head lowers)
36 |     uppers = eachDiag a b (mainDiag : uppers) -- upper diagonals
37 |     lowers = eachDiag b a (mainDiag : lowers) -- lower diagonals
38 |     eachDiag _a [] _diags = []
39 |     eachDiag a' (_bch : bs) (lastDiag : diags) = oneDiag a' bs nextDiag lastDiag : eachDiag a' bs diags
40 |       where
41 |         nextDiag = head (tail diags)
42 |     eachDiag _ _ [] = undefined -- the original implementation does not cover this case...
43 |     oneDiag a' b' diagAbove diagBelow = thisdiag
44 |       where
45 |         doDiag [] _b _nw _n _w = []
46 |         doDiag _a [] _nw _n _w = []
47 |         doDiag (ach : as) (bch : bs) nw n w = me : doDiag as bs me (tail n) (tail w)
48 |           where
49 |             me = if ach == bch then nw else 1 + min3 (head w) nw (head n)
50 |         firstelt = 1 + head diagBelow
51 |         thisdiag = firstelt : doDiag a' b' firstelt diagAbove (tail diagBelow)
52 |     lab = length a - length b
53 |     min3 x y z = if x < y then x else min y z
54 | 
55 | (~==) :: String -> String -> Bool
56 | a ~== b
57 |     | len < 5 = dist < 3
58 |     | otherwise = dist < (len * 40 `div` 100)
59 |   where
60 |     len = fromIntegral (length a `min` length b)
61 |     dist = distance a b
62 | {-# INLINE (~==) #-}
63 | 


--------------------------------------------------------------------------------
/src/CGrep/FileKind.hs:
--------------------------------------------------------------------------------
 1 | --
 2 | -- Copyright (c) 2013-2023 Nicola Bonelli <nicola@larthia.com>
 3 | --
 4 | -- This program is free software; you can redistribute it and/or modify
 5 | -- it under the terms of the GNU General Public License as published by
 6 | -- the Free Software Foundation; either version 2 of the License, or
 7 | -- (at your option) any later version.
 8 | --
 9 | -- This program is distributed in the hope that it will be useful,
10 | -- but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 | -- GNU General Public License for more details.
13 | --
14 | -- You should have received a copy of the GNU General Public License
15 | -- along with this program; if not, write to the Free Software
16 | -- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 | --
18 | 
19 | module CGrep.FileKind (
20 |     FileKind (..),
21 | ) where
22 | 
23 | data FileKind = KindText | KindConfig | KindLanguage | KindData | KindMarkup | KindScript
24 |     deriving stock (Eq, Ord, Enum, Bounded)
25 | 
26 | instance Show FileKind where
27 |     show KindText = "Text"
28 |     show KindConfig = "Config"
29 |     show KindLanguage = "Language"
30 |     show KindData = "Data"
31 |     show KindMarkup = "Markup"
32 |     show KindScript = "Script"
33 | 
34 | instance Read FileKind where
35 |     readsPrec _ "Text" = [(KindText, "")]
36 |     readsPrec _ "Config" = [(KindConfig, "")]
37 |     readsPrec _ "Language" = [(KindLanguage, "")]
38 |     readsPrec _ "Data" = [(KindData, "")]
39 |     readsPrec _ "Markup" = [(KindMarkup, "")]
40 |     readsPrec _ "Script" = [(KindScript, "")]
41 |     readsPrec _ _ = []
42 | 


--------------------------------------------------------------------------------
/src/CGrep/FileType.hs:
--------------------------------------------------------------------------------
  1 | ---
  2 | -- Copyright (c) 2013-2023 Nicola Bonelli <nicola@larthia.com>
  3 | --
  4 | -- This program is free software; you can redistribute it and/or modify
  5 | -- it under the terms of the GNU General Public License as published by
  6 | -- the Free Software Foundation; either version 2 of the License, or
  7 | -- (at your option) any later version.
  8 | --
  9 | -- This program is distributed in the hope that it will be useful,
 10 | -- but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 | -- GNU General Public License for more details.
 13 | --
 14 | -- You should have received a copy of the GNU General Public License
 15 | -- along with this program; if not, write to the Free Software
 16 | -- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 17 | --
 18 | 
 19 | module CGrep.FileType (
 20 |     FileType (..),
 21 |     FileSelector (..),
 22 |     readTypeList,
 23 |     readKindList,
 24 | )
 25 | where
 26 | 
 27 | import Control.Applicative (Alternative ((<|>)))
 28 | import Control.Monad (forM_)
 29 | import qualified Data.Map as Map
 30 | import Data.Maybe (fromJust)
 31 | 
 32 | import CGrep.FileKind (FileKind)
 33 | import qualified Data.ByteString.Char8 as C
 34 | import Options (Options (Options, type_force))
 35 | import System.Posix.FilePath (RawFilePath)
 36 | import Util (prettyRead)
 37 | 
 38 | data FileType
 39 |     = Agda
 40 |     | Assembly
 41 |     | Awk
 42 |     | Bash
 43 |     | C
 44 |     | CMake
 45 |     | Cabal
 46 |     | Chapel
 47 |     | Clojure
 48 |     | Coffee
 49 |     | Conf
 50 |     | Cpp
 51 |     | Csh
 52 |     | Csharp
 53 |     | Css
 54 |     | Cql
 55 |     | D
 56 |     | Dart
 57 |     | Dhall
 58 |     | Elm
 59 |     | Elixir
 60 |     | Erlang
 61 |     | Fish
 62 |     | Fortran
 63 |     | Fsharp
 64 |     | Go
 65 |     | GoMod
 66 |     | Haskell
 67 |     | Html
 68 |     | Idris
 69 |     | Java
 70 |     | Javascript
 71 |     | Json
 72 |     | Julia
 73 |     | Kotlin
 74 |     | Ksh
 75 |     | Latex
 76 |     | Lisp
 77 |     | Lua
 78 |     | Make
 79 |     | Nim
 80 |     | Nmap
 81 |     | OCaml
 82 |     | ObjectiveC
 83 |     | PHP
 84 |     | Perl
 85 |     | Python
 86 |     | R
 87 |     | Ruby
 88 |     | Rust
 89 |     | Scala
 90 |     | SmallTalk
 91 |     | Swift
 92 |     | Sql
 93 |     | Tcl
 94 |     | Text
 95 |     | Unison
 96 |     | VHDL
 97 |     | Verilog
 98 |     | Yaml
 99 |     | Toml
100 |     | Ini
101 |     | Zig
102 |     | Zsh
103 |     deriving stock (Read, Show, Eq, Ord, Bounded)
104 | 
105 | data FileSelector = Name RawFilePath | Ext C.ByteString | Hdr C.ByteString
106 |     deriving stock (Eq, Ord)
107 | 
108 | instance Show FileSelector where
109 |     show (Name x) = C.unpack x
110 |     show (Ext e) = "*." <> C.unpack e
111 |     show (Hdr e) = "*." <> C.unpack e
112 | 
113 | -- utility functions
114 | 
115 | readTypeList :: [String] -> ([FileType], [FileType], [FileType])
116 | readTypeList = foldl run ([], [], [])
117 |   where
118 |     run :: ([FileType], [FileType], [FileType]) -> String -> ([FileType], [FileType], [FileType])
119 |     run (l1, l2, l3) l
120 |         | '+' : xs <- l = (l1, prettyRead xs "Type" : l2, l3)
121 |         | '-' : xs <- l = (l1, l2, prettyRead xs "Type" : l3)
122 |         | otherwise = (prettyRead l "Type" : l1, l2, l3)
123 | 
124 | readKindList :: [String] -> [FileKind]
125 | readKindList = map (`prettyRead` "Kind")


--------------------------------------------------------------------------------
/src/CGrep/Output.hs:
--------------------------------------------------------------------------------
  1 | --
  2 | -- Copyright (c) 2013-2023 Nicola Bonelli <nicola@larthia.com>
  3 | --
  4 | -- This program is free software; you can redistribute it and/or modify
  5 | -- it under the terms of the GNU General Public License as published by
  6 | -- the Free Software Foundation; either version 2 of the License, or
  7 | -- (at your option) any later version.
  8 | --
  9 | -- This program is distributed in the hope that it will be useful,
 10 | -- but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 | -- GNU General Public License for more details.
 13 | --
 14 | -- You should have received a copy of the GNU General Public License
 15 | -- along with this program; if not, write to the Free Software
 16 | -- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 17 | --
 18 | {-# LANGUAGE ExistentialQuantification #-}
 19 | 
 20 | module CGrep.Output (
 21 |     Output (..),
 22 |     mkOutputElements,
 23 |     putOutputElements,
 24 |     runSearch,
 25 |     showFileName,
 26 |     showBold,
 27 | ) where
 28 | 
 29 | import qualified Data.ByteString as B
 30 | import qualified Data.ByteString.Builder as B
 31 | 
 32 | import qualified Data.ByteString.Char8 as C
 33 | import qualified Data.ByteString.Lazy.Char8 as LC
 34 | import qualified Data.ByteString.Unsafe as BU
 35 | 
 36 | import qualified Data.Vector.Unboxed as UV
 37 | 
 38 | import Data.Vector.Unboxed ((!))
 39 | 
 40 | import System.Console.ANSI (
 41 |     ConsoleIntensity (BoldIntensity),
 42 |     SGR (SetConsoleIntensity),
 43 |     setSGRCode,
 44 |  )
 45 | 
 46 | import Control.Monad.IO.Class (MonadIO (liftIO))
 47 | import Control.Monad.Trans.Reader (ask, reader)
 48 | 
 49 | import Data.Function (on)
 50 | import Data.List (
 51 |     foldl',
 52 |     genericLength,
 53 |     groupBy,
 54 |     intersperse,
 55 |     isPrefixOf,
 56 |     nub,
 57 |     sort,
 58 |     sortBy,
 59 |  )
 60 | 
 61 | import CGrep.Parser.Chunk (Chunk (..), MatchLine (..))
 62 | import CGrep.Types (Offset, Text8)
 63 | 
 64 | import Config (Config (configColorFile, configColorMatch))
 65 | import Data.ByteString.Internal (c2w)
 66 | import Data.Int (Int64)
 67 | import qualified Data.Vector.Fusion.Util as VU (Box (..))
 68 | import Data.Word (Word8)
 69 | import Reader (Env (..), ReaderIO)
 70 | 
 71 | import System.Posix.FilePath (RawFilePath)
 72 | 
 73 | import CGrep.Parser.Line (getLineOffsets)
 74 | import qualified Data.Vector.Generic as GV
 75 | import Options (
 76 |     Options (
 77 |         Options,
 78 |         color,
 79 |         count,
 80 |         filename_only,
 81 |         invert_match,
 82 |         json,
 83 |         no_color,
 84 |         no_column,
 85 |         no_filename,
 86 |         no_numbers,
 87 |         no_shallow,
 88 |         show_match
 89 |     ),
 90 |  )
 91 | 
 92 | data Output = Output
 93 |     { outFilePath :: RawFilePath
 94 |     , outLineNumb :: {-# UNPACK #-} !Int64
 95 |     , outLine :: {-# UNPACK #-} !Text8
 96 |     , outChunks :: ![Chunk]
 97 |     }
 98 | 
 99 | outTokens :: Output -> [Text8]
100 | outTokens (Output fp ln l cs) = cToken <$> cs
101 | {-# INLINE outTokens #-}
102 | 
103 | insertIndex :: UV.Vector Offset -> Offset -> Int
104 | insertIndex vs x = search vs 0 (UV.length vs)
105 |   where
106 |     search xs !lo !hi
107 |         | lo == hi = lo
108 |         | otherwise =
109 |             let !mid = (lo + hi) `quot` 2
110 |              in if x < VU.unBox (xs `GV.basicUnsafeIndexM` mid)
111 |                     then search xs lo mid
112 |                     else search xs (mid + 1) hi
113 | 
114 | getLineNumberAndOffset :: UV.Vector Offset -> Offset -> (# Int, Offset #)
115 | getLineNumberAndOffset xs x =
116 |     let idx = insertIndex xs x
117 |      in (# idx, x - xs `UV.unsafeIndex` (idx - 1) #)
118 | {-# INLINE getLineNumberAndOffset #-}
119 | 
120 | mkOutputElements :: UV.Vector Int64 -> RawFilePath -> Text8 -> Text8 -> [Chunk] -> ReaderIO [Output]
121 | mkOutputElements lineOffsets f text multi ts = do
122 |     invert <- invert_match <$> reader opt
123 |     return $
124 |         if invert
125 |             then map (\(MatchLine n xs) -> Output f n (ls !! fromIntegral (n - 1)) xs) . invertLines (length ls) $ mkMatchLines lineOffsets multi ts
126 |             else map (\(MatchLine n xs) -> Output f n (ls !! fromIntegral (n - 1)) xs) $ mkMatchLines lineOffsets multi ts
127 |   where
128 |     ls = C.lines text
129 | {-# INLINE mkOutputElements #-}
130 | 
131 | mkMatchLines :: UV.Vector Int64 -> Text8 -> [Chunk] -> [MatchLine]
132 | mkMatchLines lineOffsets _ [] = []
133 | mkMatchLines lineOffsets text ts =
134 |     map mergeGroup $
135 |         groupBy ((==) `on` lOffset) . sortBy (compare `on` lOffset) $
136 |             (\chunk -> let (# r, c #) = getLineNumberAndOffset lineOffsets (cOffset chunk) in MatchLine (fromIntegral r) [Chunk (cTyp chunk) (cToken chunk) c]) <$> ts
137 |   where
138 |     mergeGroup :: [MatchLine] -> MatchLine
139 |     mergeGroup ls = MatchLine ((lOffset . head) ls) (foldl' (\l m -> l <> lChunks m) [] ls)
140 | 
141 | invertLines :: Int -> [MatchLine] -> [MatchLine]
142 | invertLines n xs = filter (\(MatchLine i _) -> i `notElem` idx) $ take n [MatchLine i [] | i <- [1 ..]]
143 |   where
144 |     idx = lOffset <$> xs
145 | {-# INLINE invertLines #-}
146 | 
147 | putOutputElements :: [Output] -> ReaderIO (Maybe B.Builder)
148 | putOutputElements [] = pure Nothing
149 | putOutputElements out = do
150 |     Env{..} <- ask
151 |     if
152 |         | json opt -> Just <$> jsonOutput out
153 |         | filename_only opt -> Just <$> filenameOutput out
154 |         | otherwise -> Just <$> defaultOutput out
155 | 
156 | runSearch ::
157 |     Options ->
158 |     RawFilePath ->
159 |     Bool ->
160 |     ReaderIO [Output] ->
161 |     ReaderIO [Output]
162 | runSearch opt filename eligible doSearch =
163 |     if eligible || no_shallow opt
164 |         then doSearch
165 |         else mkOutputElements UV.empty filename C.empty C.empty ([] :: [Chunk])
166 | 
167 | defaultOutput :: [Output] -> ReaderIO B.Builder
168 | defaultOutput xs = do
169 |     Env{..} <- ask
170 |     if
171 |         | Options{no_filename = False, no_numbers = False, count = False} <- opt ->
172 |             pure $ mconcat . intersperse (B.char8 '\n') $ map (\out -> buildFileName conf opt out <> B.char8 ':' <> buildLineCol opt out <> B.char8 ':' <> buildTokens opt out <> buildLine conf opt out) xs
173 |         | Options{no_filename = False, no_numbers = True, count = False} <- opt ->
174 |             pure $ mconcat . intersperse (B.char8 '\n') $ map (\out -> buildFileName conf opt out <> B.char8 ':' <> buildTokens opt out <> buildLine conf opt out) xs
175 |         | Options{no_filename = True, no_numbers = False, count = False} <- opt ->
176 |             pure $ mconcat . intersperse (B.char8 '\n') $ map (\out -> buildTokens opt out <> buildLine conf opt out) xs
177 |         | Options{no_filename = True, no_numbers = True, count = False} <- opt ->
178 |             pure $ mconcat . intersperse (B.char8 '\n') $ map (\out -> buildTokens opt out <> buildLine conf opt out) xs
179 |         | Options{no_filename = False, count = True} <- opt ->
180 |             do
181 |                 let gs = groupBy (\(Output f1 _ _ _) (Output f2 _ _ _) -> f1 == f2) xs
182 |                 pure $ mconcat . intersperse (B.char8 '\n') $ (\ys@(y : _) -> buildFileName conf opt y <> B.char8 ':' <> B.intDec (length ys)) <$> gs
183 |         | Options{count = True} <- opt ->
184 |             do
185 |                 let gs = groupBy (\(Output f1 _ _ _) (Output f2 _ _ _) -> f1 == f2) xs
186 |                 pure $ mconcat . intersperse (B.char8 '\n') $ (\ys@(y : _) -> B.intDec (length ys)) <$> gs
187 | 
188 | jsonOutput :: [Output] -> ReaderIO B.Builder
189 | jsonOutput [] = pure mempty
190 | jsonOutput outs =
191 |     pure $
192 |         mconcat . intersperse (B.char8 '\n') $
193 |             [B.byteString "{ \"file\":\"" <> B.byteString fname <> B.byteString "\", \"matches\":["]
194 |                 <> [mconcat $ intersperse (B.char8 ',') (foldl mkMatch [] outs)]
195 |                 <> [B.byteString "]}"]
196 |   where
197 |     fname | (Output f _ _ _) <- head outs = f
198 |     mkJToken chunk = B.byteString "{ \"col\":" <> B.int64Dec (cOffset chunk) <> B.byteString ", \"token\":\"" <> B.byteString (cToken chunk) <> B.byteString "\" }"
199 |     mkMatch xs (Output _ n _ ts) =
200 |         xs
201 |             <> [ B.byteString "{ \"row\": "
202 |                     <> B.int64Dec n
203 |                     <> B.byteString ", \"tokens\":["
204 |                     <> mconcat (intersperse (B.byteString ",") (map mkJToken ts))
205 |                     <> B.byteString "] }"
206 |                ]
207 | 
208 | filenameOutput :: [Output] -> ReaderIO B.Builder
209 | filenameOutput outs = return $ mconcat . intersperse (B.char8 '\n') $ B.byteString <$> nub ((\(Output fname _ _ _) -> fname) <$> outs)
210 | {-# INLINE filenameOutput #-}
211 | 
212 | bold, reset :: C.ByteString
213 | bold = C.pack $ setSGRCode [SetConsoleIntensity BoldIntensity]
214 | reset = C.pack $ setSGRCode []
215 | {-# NOINLINE bold #-}
216 | {-# NOINLINE reset #-}
217 | 
218 | boldBuilder, resetBuilder :: B.Builder
219 | boldBuilder = B.byteString bold
220 | resetBuilder = B.byteString reset
221 | {-# NOINLINE boldBuilder #-}
222 | {-# NOINLINE resetBuilder #-}
223 | 
224 | type ColorString = C.ByteString
225 | 
226 | buildFileName :: Config -> Options -> Output -> B.Builder
227 | buildFileName conf opt = buildFileName' conf opt . outFilePath
228 |   where
229 |     buildFileName' :: Config -> Options -> B.ByteString -> B.Builder
230 |     buildFileName' conf opt = buildColoredAs opt $ C.pack (setSGRCode (configColorFile conf))
231 | {-# INLINE buildFileName #-}
232 | 
233 | buildColoredAs :: Options -> ColorString -> B.ByteString -> B.Builder
234 | buildColoredAs Options{color = c, no_color = c'} colorCode str
235 |     | c && not c' = B.byteString colorCode <> B.byteString str <> resetBuilder
236 |     | otherwise = B.byteString str
237 | {-# INLINE buildColoredAs #-}
238 | 
239 | buildLineCol :: Options -> Output -> B.Builder
240 | buildLineCol Options{no_numbers = True} _ = mempty
241 | buildLineCol Options{no_numbers = False, no_column = True} (Output _ n _ _) = B.int64Dec n
242 | buildLineCol Options{no_numbers = False, no_column = False} (Output _ n _ []) = B.int64Dec n
243 | buildLineCol Options{no_numbers = False, no_column = False} (Output _ n _ ts) = B.int64Dec n <> B.char8 ':' <> B.int64Dec ((+ 1) . cOffset . head $ ts)
244 | {-# INLINE buildLineCol #-}
245 | 
246 | buildTokens :: Options -> Output -> B.Builder
247 | buildTokens Options{show_match = st} out
248 |     | st = boldBuilder <> mconcat (B.byteString <$> outTokens out) <> resetBuilder <> B.char8 ':'
249 |     | otherwise = mempty
250 | 
251 | buildLine :: Config -> Options -> Output -> B.Builder
252 | buildLine conf Options{color = c, no_color = c'} out
253 |     | c && not c' = highlightLine conf (sortBy (flip compare `on` (C.length . cToken)) (outChunks out)) (outLine out)
254 |     | otherwise = B.byteString $ outLine out
255 | {-# INLINE buildLine #-}
256 | 
257 | showFileName :: Config -> Options -> RawFilePath -> RawFilePath
258 | showFileName conf opt = showColoredAs opt $ C.pack (setSGRCode (configColorFile conf))
259 | {-# INLINE showFileName #-}
260 | 
261 | showBold :: Options -> C.ByteString -> C.ByteString
262 | showBold opt = showColoredAs opt bold
263 | {-# INLINE showBold #-}
264 | 
265 | showColoredAs :: Options -> C.ByteString -> C.ByteString -> C.ByteString
266 | showColoredAs Options{color = c, no_color = c'} colorCode str
267 |     | c && not c' = colorCode <> str <> reset
268 |     | otherwise = str
269 | {-# INLINE showColoredAs #-}
270 | 
271 | highlightLine :: Config -> [Chunk] -> Text8 -> B.Builder
272 | highlightLine conf ts = highlightLine' (highlightIndexes ts, 0, 0)
273 |   where
274 |     highlightLine' :: ([(Int64, Int64)], Int64, Int) -> C.ByteString -> B.Builder
275 |     highlightLine' _ (C.uncons -> Nothing) = mempty
276 |     highlightLine' (ns, !n, !bs) s@(C.uncons -> Just (x, _)) =
277 |         ( if
278 |             | check && bs' == 0 -> if fst stack > 0 then B.string8 colorMatch <> B.char8 x <> resetBuilder else B.char8 x <> resetBuilder
279 |             | check && bs' > 0 -> B.string8 colorMatch <> B.char8 x
280 |             | otherwise -> B.byteString next
281 |         )
282 |             <> highlightLine' (ns, n + nn, bs') rest
283 |       where
284 |         stack = foldr (\(a, b) (c, d) -> (c + fromEnum (a == n), d + fromEnum (b == n))) (0, 0) ns
285 |         check = fst stack > 0 || snd stack > 0
286 |         colorMatch = setSGRCode (configColorMatch conf)
287 |         bs' = bs + fst stack - snd stack
288 |         plain = nub . sort $ foldr (\(a, b) acc -> a : b : acc) [] ns
289 |         nn
290 |             | check = 1
291 |             | null plain' = fromIntegral (C.length s)
292 |             | otherwise = head plain' - n
293 |           where
294 |             plain' = dropWhile (<= n) plain
295 |         (next, rest) = C.splitAt (fromIntegral nn) s
296 |     highlightLine' _ _ = undefined
297 | 
298 | highlightIndexes :: [Chunk] -> [(Int64, Int64)]
299 | highlightIndexes = foldr (\chunk a -> let b = cOffset chunk in (fromIntegral b, b + fromIntegral (C.length (cToken chunk)) - 1) : a) [] . filter (not . B.null . cToken)
300 | {-# INLINE highlightIndexes #-}
301 | 


--------------------------------------------------------------------------------
/src/CGrep/Parser/Atom.hs:
--------------------------------------------------------------------------------
  1 | --
  2 | -- Copyright (c) 2013-2023 Nicola Bonelli <nicola@larthia.com>
  3 | --
  4 | -- This program is free software; you can redistribute it and/or modify
  5 | -- it under the terms of the GNU General Public License as published by
  6 | -- the Free Software Foundation; either version 2 of the License, or
  7 | -- (at your option) any later version.
  8 | --
  9 | -- This program is distributed in the hope that it will be useful,
 10 | -- but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 | -- GNU General Public License for more details.
 13 | --
 14 | -- You should have received a copy of the GNU General Public License
 15 | -- along with this program; if not, write to the Free Software
 16 | -- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 17 | --
 18 | 
 19 | module CGrep.Parser.Atom (
 20 |     Atom (..),
 21 |     Atoms,
 22 |     mkAtomFromToken,
 23 |     combineAtoms,
 24 |     filterTokensWithAtoms,
 25 |     wildCardMap,
 26 |     wildCardMatch,
 27 |     wildCardsMatch,
 28 | ) where
 29 | 
 30 | import qualified Data.Map as M
 31 | 
 32 | import CGrep.Common (trim, trim8)
 33 | import CGrep.Distance ((~==))
 34 | import CGrep.Parser.Char (isDigit)
 35 | 
 36 | import Data.List (
 37 |     findIndices,
 38 |     isInfixOf,
 39 |     isPrefixOf,
 40 |     isSuffixOf,
 41 |     subsequences,
 42 |  )
 43 | import Options (
 44 |     Options (edit_dist, prefix_match, suffix_match, word_match),
 45 |  )
 46 | import Util (rmQuote8, spanGroup)
 47 | 
 48 | import qualified CGrep.Parser.Chunk as T
 49 | import qualified CGrep.Parser.Token as T
 50 | import qualified Data.ByteString.Char8 as C
 51 | import GHC.Stack (errorWithStackTrace)
 52 | 
 53 | data Atom
 54 |     = Any
 55 |     | Keyword
 56 |     | Number
 57 |     | Oct
 58 |     | Hex
 59 |     | String
 60 |     | Literal
 61 |     | Identifier C.ByteString
 62 |     | Raw T.Token
 63 |     deriving stock (Eq, Ord, Show)
 64 | 
 65 | type Atoms = [Atom]
 66 | 
 67 | wildCardMap :: M.Map C.ByteString Atom
 68 | wildCardMap =
 69 |     M.fromList
 70 |         [ ("ANY", Any)
 71 |         , ("KEY", Keyword)
 72 |         , ("OCT", Oct)
 73 |         , ("HEX", Hex)
 74 |         , ("NUM", Number)
 75 |         , ("STR", String)
 76 |         , ("LIT", String)
 77 |         ]
 78 | 
 79 | mkAtomFromToken :: T.Token -> Atom
 80 | mkAtomFromToken t
 81 |     | T.isTokenIdentifier t = case () of
 82 |         _
 83 |             | Just wc <- M.lookup str wildCardMap -> wc
 84 |             | isAtomIdentifier str -> Identifier str
 85 |             | otherwise -> Raw $ T.mkTokenIdentifier (rmAtomEscape str) (T.tOffset t)
 86 |           where
 87 |             str = T.tToken t
 88 |     | otherwise = Raw t
 89 | 
 90 | combineAtoms :: [Atoms] -> [Atoms]
 91 | combineAtoms (m1 : r@(m2 : m3 : ms))
 92 |     | [Raw b] <- m2, T.tToken b == "OR" = combineAtoms $ (m1 <> m3) : ms
 93 |     | otherwise = m1 : combineAtoms r
 94 | combineAtoms [m1, m2] = [m1, m2]
 95 | combineAtoms [m1] = [m1]
 96 | combineAtoms [] = []
 97 | 
 98 | {-# INLINE filterTokensWithAtoms #-}
 99 | filterTokensWithAtoms :: Options -> [Atoms] -> [T.Token] -> [T.Token]
100 | filterTokensWithAtoms opt ws ts = go opt (spanOptionalCards ws) ts
101 |   where
102 |     go :: Options -> [[Atoms]] -> [T.Token] -> [T.Token]
103 |     go _ [] _ = []
104 |     go opt (g : gs) ts =
105 |         {-# SCC "atom_find_total" #-} concatMap (take grpLen . (`drop` ts)) ({-# SCC "atom_find_indices" #-} findIndices (wildCardsCompare opt g) grp) <> {-# SCC atom_find_req #-} go opt gs ts
106 |       where
107 |         grp = {-# SCC "atomSpanGroup" #-} spanGroup grpLen ts
108 |         grpLen = length g
109 | 
110 | spanOptionalCards :: [Atoms] -> [[Atoms]]
111 | spanOptionalCards wc = map (`filterCardIndices` wc') idx
112 |   where
113 |     wc' = zip [0 ..] wc
114 |     idx =
115 |         subsequences $
116 |             findIndices
117 |                 ( \case
118 |                     [Identifier (C.uncons -> Just ('$', _))] -> True
119 |                     _ -> False
120 |                 )
121 |                 wc
122 | 
123 | filterCardIndices :: [Int] -> [(Int, Atoms)] -> [Atoms]
124 | filterCardIndices ns ps = map snd $ filter (\(n, _) -> n `notElem` ns) ps
125 | {-# INLINE filterCardIndices #-}
126 | 
127 | wildCardsCompare :: Options -> [Atoms] -> [T.Token] -> Bool
128 | wildCardsCompare opt l r =
129 |     wildCardsCompareAll ts && wildCardsCheckOccurrences ts
130 |   where
131 |     ts = wildCardsGroupCompare opt l r
132 | {-# INLINE wildCardsCompare #-}
133 | 
134 | isAtomIdentifier :: C.ByteString -> Bool
135 | isAtomIdentifier s =
136 |     if
137 |         | Just (x, C.uncons -> Just (y, xs)) <- C.uncons s -> wprefix x && isDigit y
138 |         | Just (x, "") <- C.uncons s -> wprefix x
139 |         | otherwise -> errorWithoutStackTrace "CGrep: isAtomIdentifier"
140 |   where
141 |     wprefix x = x == '$' || x == '_'
142 | 
143 | rmAtomEscape :: C.ByteString -> C.ByteString
144 | rmAtomEscape (C.uncons -> Just ('$', xs)) = xs
145 | rmAtomEscape (C.uncons -> Just ('_', xs)) = xs
146 | rmAtomEscape xs = xs
147 | {-# INLINE rmAtomEscape #-}
148 | 
149 | wildCardsCompareAll :: [(Bool, (Atoms, [C.ByteString]))] -> Bool
150 | wildCardsCompareAll = all fst
151 | {-# INLINE wildCardsCompareAll #-}
152 | {-# SCC wildCardsCompareAll #-}
153 | 
154 | -- Note: pattern $ and _ match any token, whereas $1 $2 (_1 _2 etc.) match tokens
155 | --       that must compare equal in the respective occurrences
156 | 
157 | wildCardsCheckOccurrences :: [(Bool, (Atoms, [C.ByteString]))] -> Bool
158 | wildCardsCheckOccurrences ts = M.foldr (\xs r -> r && all (== head xs) xs) True m
159 |   where
160 |     m =
161 |         M.mapWithKey
162 |             ( \k xs ->
163 |                 case k of
164 |                     [Identifier "_0"] -> xs
165 |                     [Identifier "_1"] -> xs
166 |                     [Identifier "_2"] -> xs
167 |                     [Identifier "_3"] -> xs
168 |                     [Identifier "_4"] -> xs
169 |                     [Identifier "_5"] -> xs
170 |                     [Identifier "_6"] -> xs
171 |                     [Identifier "_7"] -> xs
172 |                     [Identifier "_8"] -> xs
173 |                     [Identifier "_9"] -> xs
174 |                     [Identifier "$0"] -> xs
175 |                     [Identifier "$1"] -> xs
176 |                     [Identifier "$2"] -> xs
177 |                     [Identifier "$3"] -> xs
178 |                     [Identifier "$4"] -> xs
179 |                     [Identifier "$5"] -> xs
180 |                     [Identifier "$6"] -> xs
181 |                     [Identifier "$7"] -> xs
182 |                     [Identifier "$8"] -> xs
183 |                     [Identifier "$9"] -> xs
184 |                     _ -> []
185 |             )
186 |             $ M.fromListWith (<>) (map snd ts)
187 | {-# INLINE wildCardsCheckOccurrences #-}
188 | {-# SCC wildCardsCheckOccurrences #-}
189 | 
190 | wildCardsGroupCompare :: Options -> [Atoms] -> [T.Token] -> [(Bool, (Atoms, [C.ByteString]))]
191 | wildCardsGroupCompare opt ls rs
192 |     | length rs >= length ls = zipWith (tokensZip opt) ls rs
193 |     | otherwise = [(False, ([Any], []))]
194 | {-# INLINE wildCardsGroupCompare #-}
195 | {-# SCC wildCardsGroupCompare #-}
196 | 
197 | tokensZip :: Options -> Atoms -> T.Token -> (Bool, (Atoms, [C.ByteString]))
198 | tokensZip opt l r
199 |     | wildCardsMatch opt l r = (True, (l, [T.tToken r]))
200 |     | otherwise = (False, ([Any], []))
201 | {-# INLINE tokensZip #-}
202 | {-# SCC tokensZip #-}
203 | 
204 | wildCardsMatch :: Options -> Atoms -> T.Token -> Bool
205 | wildCardsMatch opt m t = any (\w -> wildCardMatch opt w t) m
206 | {-# INLINE wildCardsMatch #-}
207 | {-# SCC wildCardsMatch #-}
208 | 
209 | {-# SCC wildCardMatch #-}
210 | wildCardMatch :: Options -> Atom -> T.Token -> Bool
211 | wildCardMatch opt (Raw l) r
212 |     | T.isTokenIdentifier l && T.isTokenIdentifier r =
213 |         {-# SCC wildcard_raw_0 #-}
214 |         if
215 |             | word_match opt -> T.tToken l == T.tToken r
216 |             | prefix_match opt -> T.tToken l `C.isPrefixOf` T.tToken r
217 |             | suffix_match opt -> T.tToken l `C.isSuffixOf` T.tToken r
218 |             | edit_dist opt -> (C.unpack . T.tToken) l ~== C.unpack (T.tToken r)
219 |             | otherwise -> T.tToken l `C.isInfixOf` T.tToken r
220 |     | T.isTokenString l && T.isTokenString r =
221 |         {-# SCC wildcard_raw_1 #-}
222 |         if
223 |             | word_match opt -> ls == rs
224 |             | prefix_match opt -> ls `C.isPrefixOf` rs
225 |             | suffix_match opt -> ls `C.isSuffixOf` rs
226 |             | edit_dist opt -> C.unpack ls ~== C.unpack rs
227 |             | otherwise -> ls `C.isInfixOf` rs
228 |     | otherwise = {-# SCC wildcard_raw_2 #-} l `T.eqToken` r
229 |   where
230 |     ls = rmQuote8 $ trim8 (T.tToken l)
231 |     rs = rmQuote8 $ trim8 (T.tToken r)
232 | wildCardMatch _ Any _ = {-# SCC wildcard_any #-} True
233 | wildCardMatch _ (Identifier _) t = {-# SCC wildcard_identifier #-} T.isTokenIdentifier t
234 | wildCardMatch _ Keyword t = {-# SCC wildcard_keyword #-} T.isTokenKeyword t
235 | wildCardMatch _ String t = {-# SCC wildcard_string #-} T.isTokenString t
236 | wildCardMatch _ Literal t = {-# SCC wildcard_lit #-} T.isTokenString t
237 | wildCardMatch _ Number t = {-# SCC wildcard_number #-} T.isTokenNumber t
238 | wildCardMatch _ Oct t = {-# SCC wildcard_octal #-} T.isTokenNumber t && case C.uncons (T.tToken t) of Just ('0', C.uncons -> Just (d, _)) -> isDigit d; _ -> False
239 | wildCardMatch _ Hex t = {-# SCC wildcard_hex #-} T.isTokenNumber t && case C.uncons (T.tToken t) of Just ('0', C.uncons -> Just ('x', _)) -> True; _ -> False
240 | 


--------------------------------------------------------------------------------
/src/CGrep/Parser/Char.hs:
--------------------------------------------------------------------------------
  1 | --
  2 | -- Copyright (c) 2013-2023 Nicola Bonelli <nicola@larthia.com>
  3 | --
  4 | -- This program is free software; you can redistribute it and/or modify
  5 | -- it under the terms of the GNU General Public License as published by
  6 | -- the Free Software Foundation; either version 2 of the License, or
  7 | -- (at your option) any later version.
  8 | --
  9 | -- This program is distributed in the hope that it will be useful,
 10 | -- but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 | -- GNU General Public License for more details.
 13 | --
 14 | -- You should have received a copy of the GNU General Public License
 15 | -- along with this program; if not, write to the Free Software
 16 | -- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 17 | --
 18 | 
 19 | module CGrep.Parser.Char (
 20 |     chr,
 21 |     ord,
 22 |     isDigit,
 23 |     isSpace,
 24 |     isHexDigit,
 25 |     isCharNumber,
 26 |     isAlphaNum,
 27 |     isAlpha,
 28 |     isAlphaNum_,
 29 |     isAlpha_,
 30 |     isAlpha_',
 31 |     isAlphaNum_',
 32 |     isBracket',
 33 |     isPunctuation,
 34 |     isAlpha_and,
 35 |     isAlphaNum_and,
 36 | ) where
 37 | 
 38 | import GHC.Base (chr#, int2Word#, isTrue#, leWord#)
 39 | import GHC.Exts (Char (C#), Int (I#), ord#)
 40 | 
 41 | ord :: Char -> Int
 42 | ord (C# c#) = I# (ord# c#)
 43 | {-# INLINE ord #-}
 44 | 
 45 | chr :: Int -> Char
 46 | chr i@(I# i#)
 47 |     | isTrue# (int2Word# i# `leWord#` 0x10FFFF##) = C# (chr# i#)
 48 |     | otherwise =
 49 |         errorWithoutStackTrace ("CGrep: chr bad argument: " <> show i)
 50 | {-# INLINE chr #-}
 51 | 
 52 | isDigit :: Char -> Bool
 53 | isDigit c = (fromIntegral (ord c - ord '0') :: Word) <= 9
 54 | {-# INLINE isDigit #-}
 55 | 
 56 | isSpace :: Char -> Bool
 57 | isSpace c = uc == 32 || uc == 0xa0 || (uc - 0x9 <= 4) && not ctrl
 58 |   where
 59 |     uc = ord c
 60 |     ctrl = uc == 2 || uc == 3
 61 | {-# INLINE isSpace #-}
 62 | 
 63 | isHexDigit :: Char -> Bool
 64 | isHexDigit c =
 65 |     isDigit c
 66 |         || (fromIntegral (ord c - ord 'A') :: Word) <= 5
 67 |         || (fromIntegral (ord c - ord 'a') :: Word) <= 5
 68 | {-# INLINE isHexDigit #-}
 69 | 
 70 | isCharNumber :: Char -> Bool
 71 | isCharNumber c = isHexDigit c || c `elem` (".xX" :: String)
 72 | {-# INLINE isCharNumber #-}
 73 | 
 74 | isAlphaNum :: Char -> Bool
 75 | isAlphaNum c =
 76 |     o >= 97 && o <= 122
 77 |         || o >= 65 && o <= 90
 78 |         || o >= 48 && o <= 57
 79 |   where
 80 |     o = ord c
 81 | {-# INLINE isAlphaNum #-}
 82 | 
 83 | isAlpha :: Char -> Bool
 84 | isAlpha c =
 85 |     o >= 97 && o <= 122
 86 |         || o >= 65 && o <= 90
 87 |   where
 88 |     o = ord c
 89 | {-# INLINE isAlpha #-}
 90 | 
 91 | isAlphaNum_ :: Char -> Bool
 92 | isAlphaNum_ c =
 93 |     o >= 97 && o <= 122
 94 |         || o >= 65 && o <= 90
 95 |         || o >= 48 && o <= 57
 96 |         || c == '_'
 97 |   where
 98 |     o = ord c
 99 | {-# INLINE isAlphaNum_ #-}
100 | 
101 | isAlpha_ :: Char -> Bool
102 | isAlpha_ c =
103 |     o >= 97 && o <= 122
104 |         || o >= 65 && o <= 90
105 |         || c == '_'
106 |   where
107 |     o = ord c
108 | {-# INLINE isAlpha_ #-}
109 | 
110 | isAlpha_' :: Char -> Bool
111 | isAlpha_' c = isAlpha_ c || c == '_' || c == '\''
112 | {-# INLINE isAlpha_' #-}
113 | 
114 | isAlphaNum_' :: Char -> Bool
115 | isAlphaNum_' c = isAlphaNum_ c || c == '_' || c == '\''
116 | {-# INLINE isAlphaNum_' #-}
117 | 
118 | isBracket' :: Char -> Bool
119 | isBracket' c = c `elem` ("[]{}()" :: String)
120 | {-# INLINE isBracket' #-}
121 | 
122 | isPunctuation :: Char -> Bool
123 | isPunctuation c = c `elem` (":;,." :: String)
124 | {-# INLINE isPunctuation #-}
125 | 
126 | isAlpha_and :: String -> Char -> Bool
127 | isAlpha_and s c = isAlpha_ c || c == '_' || c `elem` s
128 | {-# INLINE isAlpha_and #-}
129 | 
130 | isAlphaNum_and :: String -> Char -> Bool
131 | isAlphaNum_and s c = isAlphaNum_ c || c == '_' || c `elem` s
132 | {-# INLINE isAlphaNum_and #-}
133 | 


--------------------------------------------------------------------------------
/src/CGrep/Parser/Chunk.hs:
--------------------------------------------------------------------------------
  1 | --
  2 | -- Copyright (c) 2013-2023 Nicola Bonelli <nicola@larthia.com>
  3 | --
  4 | -- This program is free software; you can redistribute it and/or modify
  5 | -- it under the terms of the GNU General Public License as published by
  6 | -- the Free Software Foundation; either version 2 of the License, or
  7 | -- (at your option) any later version.
  8 | --
  9 | -- This program is distributed in the hope that it will be useful,
 10 | -- but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 | -- GNU General Public License for more details.
 13 | --
 14 | -- You should have received a copy of the GNU General Public License
 15 | -- along with this program; if not, write to the Free Software
 16 | -- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 17 | --
 18 | 
 19 | module CGrep.Parser.Chunk (
 20 |     parseChunks,
 21 |     Chunk (..),
 22 |     MatchLine (..),
 23 |     ChunkType,
 24 |     pattern ChunkIdentifier,
 25 |     pattern ChunkKeyword,
 26 |     pattern ChunkDigit,
 27 |     pattern ChunkBracket,
 28 |     pattern ChunkString,
 29 |     pattern ChunkNativeType,
 30 |     pattern ChunkOperator,
 31 |     pattern ChunkUnspec,
 32 | ) where
 33 | 
 34 | import CGrep.Parser.Char (
 35 |     isAlphaNum_,
 36 |     isAlpha_,
 37 |     isBracket',
 38 |     isCharNumber,
 39 |     isDigit,
 40 |     isSpace,
 41 |  )
 42 | 
 43 | import CGrep.FileTypeMap (FileTypeInfo (..))
 44 | import CGrep.Types (Offset, Text8)
 45 | import Data.List (genericLength)
 46 | 
 47 | import qualified Data.ByteString.Char8 as C
 48 | import qualified Data.ByteString.Internal as BI
 49 | import qualified Data.ByteString.Lazy as LB
 50 | 
 51 | import qualified ByteString.StrictBuilder as B
 52 | 
 53 | import Data.MonoTraversable (MonoFoldable (oforM_))
 54 | 
 55 | import Control.Monad.ST (ST, runST)
 56 | import Data.STRef (STRef, newSTRef, readSTRef, writeSTRef)
 57 | 
 58 | import Data.Maybe (fromMaybe)
 59 | import Data.Sequence (Seq (Empty, (:<|), (:|>)), (|>))
 60 | import qualified Data.Sequence as S
 61 | import Data.Word (Word8)
 62 | 
 63 | newtype ChunkType = ChunkType {unChunkType :: Word8}
 64 |     deriving newtype (Eq, Ord)
 65 | 
 66 | instance Show ChunkType where
 67 |     show ChunkUnspec = "*"
 68 |     show ChunkIdentifier = "identifier"
 69 |     show ChunkKeyword = "keyword"
 70 |     show ChunkDigit = "digit"
 71 |     show ChunkBracket = "bracket"
 72 |     show ChunkOperator = "operator"
 73 |     show ChunkString = "string"
 74 |     show ChunkNativeType = "native-type"
 75 |     {-# INLINE show #-}
 76 | 
 77 | pattern ChunkUnspec :: ChunkType
 78 | pattern ChunkUnspec = ChunkType 0
 79 | 
 80 | pattern ChunkIdentifier :: ChunkType
 81 | pattern ChunkIdentifier = ChunkType 1
 82 | 
 83 | pattern ChunkKeyword :: ChunkType
 84 | pattern ChunkKeyword = ChunkType 2
 85 | 
 86 | pattern ChunkDigit :: ChunkType
 87 | pattern ChunkDigit = ChunkType 3
 88 | 
 89 | pattern ChunkBracket :: ChunkType
 90 | pattern ChunkBracket = ChunkType 4
 91 | 
 92 | pattern ChunkOperator :: ChunkType
 93 | pattern ChunkOperator = ChunkType 5
 94 | 
 95 | pattern ChunkString :: ChunkType
 96 | pattern ChunkString = ChunkType 6
 97 | 
 98 | pattern ChunkNativeType :: ChunkType
 99 | pattern ChunkNativeType = ChunkType 7
100 | 
101 | {-# COMPLETE ChunkIdentifier, ChunkKeyword, ChunkDigit, ChunkBracket, ChunkOperator, ChunkString, ChunkNativeType, ChunkUnspec #-}
102 | 
103 | data Chunk = Chunk
104 |     { cTyp :: {-# UNPACK #-} !ChunkType
105 |     , cToken :: {-# UNPACK #-} !Text8
106 |     , cOffset :: {-# UNPACK #-} !Offset
107 |     }
108 |     deriving stock (Eq, Show, Ord)
109 | 
110 | data MatchLine = MatchLine
111 |     { lOffset :: {-# UNPACK #-} !Offset
112 |     , lChunks :: [Chunk]
113 |     }
114 |     deriving stock (Eq, Show)
115 | 
116 | newtype ChunkState = ChunkState {unChunkState :: Word8}
117 |     deriving newtype (Eq, Ord)
118 | 
119 | instance Show ChunkState where
120 |     show StateSpace = "space"
121 |     show StateAlpha = "alpha"
122 |     show StateDigit = "digit"
123 |     show StateBracket = "bracket"
124 |     show StateOther = "other"
125 |     {-# INLINE show #-}
126 | 
127 | pattern StateSpace :: ChunkState
128 | pattern StateSpace = ChunkState 0
129 | 
130 | pattern StateAlpha :: ChunkState
131 | pattern StateAlpha = ChunkState 1
132 | 
133 | pattern StateDigit :: ChunkState
134 | pattern StateDigit = ChunkState 2
135 | 
136 | pattern StateBracket :: ChunkState
137 | pattern StateBracket = ChunkState 3
138 | 
139 | pattern StateOther :: ChunkState
140 | pattern StateOther = ChunkState 4
141 | 
142 | {-# COMPLETE StateSpace, StateAlpha, StateDigit, StateBracket, StateOther #-}
143 | 
144 | (<~) :: STRef s a -> a -> ST s ()
145 | ref <~ !x = writeSTRef ref x
146 | {-# INLINE (<~) #-}
147 | 
148 | {-# INLINE parseChunks #-}
149 | parseChunks :: Maybe FileTypeInfo -> Text8 -> S.Seq Chunk
150 | parseChunks l t = runST $ case l >>= \FileTypeInfo{..} -> ftIdentifierChars of
151 |     Just (isAlpha1, isAlphaN) -> parseChunks' isAlpha_ isAlphaNum_ t
152 |     _ -> parseChunks' isAlpha_ isAlphaNum_ t
153 |   where
154 |     parseChunks' :: (Char -> Bool) -> (Char -> Bool) -> C.ByteString -> ST s (S.Seq Chunk)
155 |     parseChunks' isAlpha1 isAlphaN txt = do
156 |         stateR <- newSTRef StateSpace
157 |         offR <- newSTRef 0
158 |         accR <- newSTRef (mempty :: B.Builder)
159 |         tokensR <- newSTRef S.empty
160 |         oforM_ txt $ \w -> do
161 |             let x = BI.w2c w
162 |             state <- readSTRef stateR
163 |             off <- readSTRef offR
164 |             acc <- readSTRef accR
165 |             tokens <- readSTRef tokensR
166 |             case state of
167 |                 StateSpace ->
168 |                     if
169 |                         | isSpace x -> do stateR <~ StateSpace; accR <~ mempty
170 |                         | isAlpha1 x -> do stateR <~ StateAlpha; accR <~ B.asciiChar x
171 |                         | isDigit x -> do stateR <~ StateDigit; accR <~ B.asciiChar x
172 |                         | isBracket' x -> do stateR <~ StateBracket; accR <~ B.asciiChar x
173 |                         | otherwise -> do stateR <~ StateOther; accR <~ B.asciiChar x
174 |                 StateAlpha ->
175 |                     if
176 |                         | isAlphaN x -> do stateR <~ StateAlpha; accR <~ (acc <> B.asciiChar x)
177 |                         | isSpace x -> do stateR <~ StateSpace; accR <~ mempty; tokensR <~ (tokens |> toChunk off acc)
178 |                         | isBracket' x -> do stateR <~ StateBracket; accR <~ B.asciiChar x; tokensR <~ (tokens |> toChunk off acc)
179 |                         | otherwise -> do stateR <~ StateOther; accR <~ B.asciiChar x; tokensR <~ (tokens |> toChunk off acc)
180 |                 StateDigit ->
181 |                     if
182 |                         | isCharNumber x -> do stateR <~ StateDigit; accR <~ (acc <> B.asciiChar x)
183 |                         | isSpace x -> do stateR <~ StateSpace; accR <~ mempty; tokensR <~ (tokens |> toChunk off acc)
184 |                         | isAlpha1 x -> do stateR <~ StateAlpha; accR <~ B.asciiChar x; tokensR <~ (tokens |> toChunk off acc)
185 |                         | isBracket' x -> do stateR <~ StateBracket; accR <~ B.asciiChar x; tokensR <~ (tokens |> toChunk off acc)
186 |                         | otherwise -> do stateR <~ StateOther; accR <~ B.asciiChar x; tokensR <~ (tokens |> toChunk off acc)
187 |                 StateBracket ->
188 |                     if
189 |                         | isSpace x -> do stateR <~ StateSpace; accR <~ mempty; tokensR <~ (tokens |> toChunk off acc)
190 |                         | isAlpha1 x -> do stateR <~ StateAlpha; accR <~ B.asciiChar x; tokensR <~ (tokens |> toChunk off acc)
191 |                         | isDigit x -> do stateR <~ StateDigit; accR <~ B.asciiChar x; tokensR <~ (tokens |> toChunk off acc)
192 |                         | isBracket' x -> do stateR <~ StateBracket; accR <~ B.asciiChar x; tokensR <~ (tokens |> toChunk off acc)
193 |                         | otherwise -> do stateR <~ StateOther; accR <~ B.asciiChar x; tokensR <~ (tokens |> toChunk off acc)
194 |                 StateOther ->
195 |                     if
196 |                         | isSpace x -> do stateR <~ StateSpace; accR <~ mempty; tokensR <~ (tokens |> toChunk off acc)
197 |                         | isAlpha1 x -> do stateR <~ StateAlpha; accR <~ B.asciiChar x; tokensR <~ (tokens |> toChunk off acc)
198 |                         | isDigit x ->
199 |                             if B.builderBytes acc == "."
200 |                                 then do stateR <~ StateDigit; accR <~ (acc <> B.asciiChar x)
201 |                                 else do stateR <~ StateDigit; accR <~ B.asciiChar x; tokensR <~ (tokens |> toChunk off acc)
202 |                         | isBracket' x -> do stateR <~ StateBracket; accR <~ B.asciiChar x; tokensR <~ (tokens |> toChunk off acc)
203 |                         | otherwise -> do stateR <~ StateOther; accR <~ (acc <> B.asciiChar x)
204 |             offR <~ (off + 1)
205 | 
206 |         lastAcc <- readSTRef accR
207 |         tokens <- readSTRef tokensR
208 | 
209 |         if B.builderLength lastAcc == 0
210 |             then return tokens
211 |             else do
212 |                 state <- readSTRef stateR
213 |                 off <- readSTRef offR
214 |                 return $ tokens |> toChunk off lastAcc
215 | 
216 | toChunk :: Offset -> B.Builder -> Chunk
217 | toChunk off b = Chunk ChunkUnspec str (off - fromIntegral (B.builderLength b))
218 |   where
219 |     str = B.builderBytes b
220 | {-# INLINE toChunk #-}
221 | 


--------------------------------------------------------------------------------
/src/CGrep/Parser/Line.hs:
--------------------------------------------------------------------------------
 1 | --
 2 | -- Copyright (c) 2013-2023 Nicola Bonelli <nicola@larthia.com>
 3 | --
 4 | -- This program is free software; you can redistribute it and/or modify
 5 | -- it under the terms of the GNU General Public License as published by
 6 | -- the Free Software Foundation; either version 2 of the License, or
 7 | -- (at your option) any later version.
 8 | --
 9 | -- This program is distributed in the hope that it will be useful,
10 | -- but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 | -- GNU General Public License for more details.
13 | --
14 | -- You should have received a copy of the GNU General Public License
15 | -- along with this program; if not, write to the Free Software
16 | -- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 | --
18 | 
19 | module CGrep.Parser.Line (
20 |     getLineOffsets,
21 |     getAllLineOffsets,
22 |     getLineByOffset,
23 |     lowerBound,
24 | ) where
25 | 
26 | import qualified Data.ByteString.Char8 as C
27 | import qualified Data.ByteString.Lazy.Char8 as LC
28 | 
29 | import CGrep.Types (LText8, Offset, Text8)
30 | import Data.ByteString.Internal (c2w)
31 | import qualified Data.ByteString.Unsafe as BU
32 | import Data.Int (Int64)
33 | import Data.Vector.Unboxed ((!))
34 | import qualified Data.Vector.Unboxed as UV
35 | 
36 | -- Returns a vector of offsets for a given character in a ByteString, up to the given maximum offset.
37 | charOffsets :: Char -> Int64 -> C.ByteString -> UV.Vector Int64
38 | charOffsets c maxOff bs = UV.unfoldrN (fromIntegral maxOff) (findOffsets bs maxOff) 0
39 |   where
40 |     findOffsets :: C.ByteString -> Int64 -> Int64 -> Maybe (Int64, Int64)
41 |     findOffsets bs' maxOff' i
42 |         | i >= maxOff' = Nothing
43 |         | BU.unsafeIndex bs' (fromIntegral i) == c2w c = Just (fromIntegral i, i + 1)
44 |         | otherwise = findOffsets bs' maxOff' (i + 1)
45 | 
46 | getLineOffsets :: Int64 -> Text8 -> UV.Vector Offset
47 | getLineOffsets maxOff text =
48 |     let idx = nlOffsets (fromIntegral maxOff) text
49 |      in if UV.null idx
50 |             then idx
51 |             else
52 |                 if UV.last idx == fromIntegral (C.length text - 1)
53 |                     then UV.init idx
54 |                     else idx
55 | 
56 | {-# INLINE nlOffsets #-}
57 | nlOffsets :: Int -> Text8 -> UV.Vector Int64
58 | nlOffsets maxOff' bs' = UV.unfoldrN maxOff' (findOffsets maxOff' bs') (-1)
59 | 
60 | findOffsets :: Int -> Text8 -> Int -> Maybe (Int64, Int)
61 | findOffsets max ts !i
62 |     | i == -1 = Just (0, 0)
63 |     | i >= max = Nothing
64 |     | BU.unsafeIndex ts (fromIntegral i) == c2w '\n' = Just (fromIntegral i + 1, i + 1)
65 |     | otherwise = findOffsets max ts (i + 1)
66 | 
67 | getAllLineOffsets :: Text8 -> UV.Vector Offset
68 | getAllLineOffsets ts = getLineOffsets (fromIntegral $ C.length ts) ts
69 | {-# INLINE getAllLineOffsets #-}
70 | 
71 | lowerBound :: UV.Vector Int64 -> Int64 -> Int64
72 | lowerBound vec v = lowerBoundGo vec v 0 (UV.length vec - 1)
73 | 
74 | lowerBoundGo :: UV.Vector Int64 -> Int64 -> Int -> Int -> Int64
75 | lowerBoundGo vec v !left !right
76 |     | left > right = if right >= 0 then vec `UV.unsafeIndex` right else -1
77 |     | otherwise = case v `compare` midValue of
78 |         LT -> lowerBoundGo vec v left (mid - 1)
79 |         EQ -> midValue
80 |         _ -> lowerBoundGo vec v (mid + 1) right
81 |   where
82 |     mid = (left + right) `div` 2
83 |     midValue = vec `UV.unsafeIndex` mid
84 | 
85 | getLineByOffset :: Offset -> Text8 -> UV.Vector Int64 -> (# Text8, Offset #)
86 | getLineByOffset off text vec = (# (head . C.lines) (C.drop (fromIntegral lb) text), lb #)
87 |   where
88 |     lb = lowerBound vec off
89 | {-# INLINE getLineByOffset #-}
90 | 


--------------------------------------------------------------------------------
/src/CGrep/Parser/Token.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE DuplicateRecordFields #-}
  2 | --
  3 | -- Copyright (c) 2013-2023 Nicola Bonelli <nicola@larthia.com>
  4 | --
  5 | -- This program is free software; you can redistribute it and/or modify
  6 | -- it under the terms of the GNU General Public License as published by
  7 | -- the Free Software Foundation; either version 2 of the License, or
  8 | -- (at your option) any later version.
  9 | --
 10 | -- This program is distributed in the hope that it will be useful,
 11 | -- but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 | -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 | -- GNU General Public License for more details.
 14 | --
 15 | -- You should have received a copy of the GNU General Public License
 16 | -- along with this program; if not, write to the Free Software
 17 | -- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 18 | --
 19 | {-# LANGUAGE OverloadedRecordDot #-}
 20 | 
 21 | module CGrep.Parser.Token (
 22 |     parseTokens,
 23 |     filterToken,
 24 |     Token (..),
 25 |     TokenFilter (..),
 26 |     mkTokenFilter,
 27 |     eqToken,
 28 |     isTokenIdentifier,
 29 |     isTokenKeyword,
 30 |     isTokenNumber,
 31 |     isTokenBracket,
 32 |     isTokenString,
 33 |     isTokenOperator,
 34 |     isTokenUnspecified,
 35 |     tTyp,
 36 |     tToken,
 37 |     tOffset,
 38 |     mkTokenIdentifier,
 39 |     mkTokenKeyword,
 40 |     mkTokenDigit,
 41 |     mkTokenBracket,
 42 |     mkTokenString,
 43 |     mkTokenOperator,
 44 | ) where
 45 | 
 46 | import qualified Data.ByteString.Char8 as C
 47 | import qualified Data.ByteString.Internal as BI
 48 | import qualified Data.ByteString.Lazy as LB
 49 | import qualified Data.DList as DL
 50 | 
 51 | import CGrep.Parser.Char (
 52 |     chr,
 53 |     isAlphaNum_,
 54 |     isAlpha_,
 55 |     isBracket',
 56 |     isCharNumber,
 57 |     isDigit,
 58 |     isPunctuation,
 59 |     isSpace,
 60 |  )
 61 | 
 62 | import CGrep.Types (Offset, Text8)
 63 | import Data.List (genericLength)
 64 | 
 65 | import CGrep.FileTypeMap (
 66 |     CharIdentifierF,
 67 |     FileTypeInfo (ftIdentifierChars, ftKeywords),
 68 |     WordType (..),
 69 |  )
 70 | 
 71 | import qualified Data.HashMap.Strict as HM
 72 | import Data.Sequence (Seq (Empty, (:<|), (:|>)), (|>))
 73 | import qualified Data.Sequence as S
 74 | 
 75 | import Control.Monad.ST (ST, runST)
 76 | import Data.MonoTraversable (MonoFoldable (oforM_))
 77 | import Data.STRef (STRef, modifySTRef, modifySTRef', newSTRef, readSTRef, writeSTRef)
 78 | import Data.Word (Word8)
 79 | 
 80 | import qualified ByteString.StrictBuilder as B
 81 | import CGrep.Parser.Chunk
 82 | 
 83 | import Data.Coerce (coerce)
 84 | import Data.Text.Internal.Read (T)
 85 | import GHC.Exts (inline)
 86 | 
 87 | newtype TokenState = TokenState {unTokenState :: Int}
 88 |     deriving newtype (Eq)
 89 | 
 90 | instance Show TokenState where
 91 |     show StateSpace = "space"
 92 |     show StateIdentifier = "identifier"
 93 |     show StateDigit = "digit"
 94 |     show StateBracket = "bracket"
 95 |     show StateLiteral = "literal"
 96 |     show StateOther = "other"
 97 |     {-# INLINE show #-}
 98 | 
 99 | pattern StateSpace :: TokenState
100 | pattern StateSpace = TokenState 0
101 | 
102 | pattern StateIdentifier :: TokenState
103 | pattern StateIdentifier = TokenState 1
104 | 
105 | pattern StateDigit :: TokenState
106 | pattern StateDigit = TokenState 2
107 | 
108 | pattern StateBracket :: TokenState
109 | pattern StateBracket = TokenState 3
110 | 
111 | pattern StateLiteral :: TokenState
112 | pattern StateLiteral = TokenState 4
113 | 
114 | pattern StateOther :: TokenState
115 | pattern StateOther = TokenState 5
116 | 
117 | newtype Token = Token Chunk
118 |     deriving newtype (Eq, Ord)
119 | 
120 | instance Show Token where
121 |     show (Token (Chunk typ bs off)) = "(" ++ show typ ++ " " ++ C.unpack bs ++ " @" ++ show off ++ ")"
122 |     {-# INLINE show #-}
123 | 
124 | eqToken :: Token -> Token -> Bool
125 | eqToken a b =
126 |     tToken a == tToken b
127 |         && tTyp a == tTyp b
128 | {-# INLINE eqToken #-}
129 | 
130 | mkTokenIdentifier :: C.ByteString -> Offset -> Token
131 | mkTokenIdentifier bs off = Token $ Chunk ChunkIdentifier bs off
132 | {-# INLINE mkTokenIdentifier #-}
133 | 
134 | mkTokenKeyword :: C.ByteString -> Offset -> Token
135 | mkTokenKeyword bs off = Token $ Chunk ChunkKeyword bs off
136 | {-# INLINE mkTokenKeyword #-}
137 | 
138 | mkTokenDigit :: C.ByteString -> Offset -> Token
139 | mkTokenDigit bs off = Token $ Chunk ChunkDigit bs off
140 | {-# INLINE mkTokenDigit #-}
141 | 
142 | mkTokenBracket :: C.ByteString -> Offset -> Token
143 | mkTokenBracket bs off = Token $ Chunk ChunkBracket bs off
144 | {-# INLINE mkTokenBracket #-}
145 | 
146 | mkTokenOperator :: C.ByteString -> Offset -> Token
147 | mkTokenOperator bs off = Token $ Chunk ChunkOperator bs off
148 | {-# INLINE mkTokenOperator #-}
149 | 
150 | mkTokenString :: C.ByteString -> Offset -> Token
151 | mkTokenString bs off = Token $ Chunk ChunkString bs off
152 | {-# INLINE mkTokenString #-}
153 | 
154 | mkTokenNativeType :: C.ByteString -> Offset -> Token
155 | mkTokenNativeType bs off = Token $ Chunk ChunkNativeType bs off
156 | {-# INLINE mkTokenNativeType #-}
157 | 
158 | mkTokenFromWord :: Maybe FileTypeInfo -> C.ByteString -> Offset -> Token
159 | mkTokenFromWord Nothing txt off = mkTokenIdentifier txt off
160 | mkTokenFromWord (Just info) txt off =
161 |     case HM.lookup txt (ftKeywords info) of
162 |         Just typ -> case typ of
163 |             Keyword -> mkTokenKeyword txt off
164 |             NativeType -> mkTokenNativeType txt off
165 |         _ -> mkTokenIdentifier txt off
166 | {-# INLINEABLE mkTokenFromWord #-}
167 | 
168 | mkToken :: Maybe FileTypeInfo -> TokenState -> C.ByteString -> Offset -> Token
169 | mkToken _ StateSpace = mkTokenOperator
170 | mkToken info StateIdentifier = mkTokenFromWord info
171 | mkToken _ StateDigit = mkTokenDigit
172 | mkToken _ StateBracket = mkTokenBracket
173 | mkToken _ StateLiteral = mkTokenString
174 | mkToken _ StateOther = mkTokenOperator
175 | 
176 | tTyp :: Token -> ChunkType
177 | tTyp = cTyp . coerce
178 | {-# INLINE tTyp #-}
179 | 
180 | tOffset :: Token -> Offset
181 | tOffset t = cOffset (coerce t :: Chunk)
182 | {-# INLINE tOffset #-}
183 | 
184 | tToken :: Token -> Text8
185 | tToken t = cToken (coerce t :: Chunk)
186 | {-# INLINE tToken #-}
187 | 
188 | isTokenIdentifier :: Token -> Bool
189 | isTokenIdentifier t = cTyp (coerce t) == ChunkIdentifier
190 | {-# INLINE isTokenIdentifier #-}
191 | 
192 | isTokenKeyword :: Token -> Bool
193 | isTokenKeyword t = cTyp (coerce t) == ChunkKeyword
194 | {-# INLINE isTokenKeyword #-}
195 | 
196 | isTokenNumber :: Token -> Bool
197 | isTokenNumber t = cTyp (coerce t) == ChunkDigit
198 | {-# INLINE isTokenNumber #-}
199 | 
200 | isTokenBracket :: Token -> Bool
201 | isTokenBracket t = cTyp (coerce t) == ChunkBracket
202 | {-# INLINE isTokenBracket #-}
203 | 
204 | isTokenOperator :: Token -> Bool
205 | isTokenOperator t = cTyp (coerce t) == ChunkOperator
206 | {-# INLINE isTokenOperator #-}
207 | 
208 | isTokenString :: Token -> Bool
209 | isTokenString t = cTyp (coerce t) == ChunkString
210 | {-# INLINE isTokenString #-}
211 | 
212 | isTokenNativeType :: Token -> Bool
213 | isTokenNativeType t = cTyp (coerce t) == ChunkNativeType
214 | {-# INLINE isTokenNativeType #-}
215 | 
216 | isTokenUnspecified :: Token -> Bool
217 | isTokenUnspecified t = cTyp (coerce t) == ChunkUnspec
218 | {-# INLINE isTokenUnspecified #-}
219 | 
220 | data TokenFilter = TokenFilter
221 |     { tfIdentifier :: !Bool
222 |     , tfKeyword :: !Bool
223 |     , tfNativeType :: !Bool
224 |     , tfString :: !Bool
225 |     , tfNumber :: !Bool
226 |     , tfOperator :: !Bool
227 |     , tfBracket :: !Bool
228 |     }
229 |     deriving stock (Eq, Show)
230 | 
231 | filterToken :: TokenFilter -> Token -> Bool
232 | filterToken f t = case cTyp (coerce t :: Chunk) of
233 |     ChunkIdentifier -> tfIdentifier f
234 |     ChunkKeyword -> tfKeyword f
235 |     ChunkDigit -> tfNumber f
236 |     ChunkOperator -> tfOperator f
237 |     ChunkString -> tfString f
238 |     ChunkNativeType -> tfNativeType f
239 |     ChunkBracket -> tfBracket f
240 |     ChunkUnspec -> False
241 | 
242 | mkTokenFilter :: (Traversable t) => t ChunkType -> TokenFilter
243 | mkTokenFilter = foldr go (TokenFilter False False False False False False False)
244 |   where
245 |     go ChunkIdentifier f = f{tfIdentifier = True}
246 |     go ChunkKeyword f = f{tfKeyword = True}
247 |     go ChunkNativeType f = f{tfNativeType = True}
248 |     go ChunkDigit f = f{tfNumber = True}
249 |     go ChunkOperator f = f{tfOperator = True}
250 |     go ChunkString f = f{tfString = True}
251 |     go ChunkBracket f = f{tfBracket = True}
252 |     go ChunkUnspec f = f
253 | 
254 | (<~) :: STRef s a -> a -> ST s ()
255 | ref <~ !x = writeSTRef ref x
256 | {-# INLINE (<~) #-}
257 | 
258 | data TokenIdx = TokenIdx
259 |     { offset :: {-# UNPACK #-} !Int
260 |     , len :: {-# UNPACK #-} !Int
261 |     }
262 | 
263 | tkString :: TokenIdx -> C.ByteString -> C.ByteString
264 | tkString (TokenIdx off len) = C.take len . C.drop off
265 | {-# INLINE tkString #-}
266 | 
267 | data AccOp = Reset | Start {-# UNPACK #-} !Int | Append {-# UNPACK #-} !Int
268 | 
269 | (<<~) :: STRef s TokenIdx -> AccOp -> ST s ()
270 | ref <<~ Reset = writeSTRef ref (TokenIdx (-1) 0)
271 | ref <<~ Start cur = writeSTRef ref (TokenIdx cur 1)
272 | ref <<~ Append cur = modifySTRef' ref $ \case
273 |     TokenIdx (-1) 0 -> TokenIdx cur 1
274 |     TokenIdx off len -> TokenIdx off (len + 1)
275 | {-# INLINE (<<~) #-}
276 | 
277 | {-# INLINE parseTokens #-}
278 | parseTokens :: TokenFilter -> Maybe FileTypeInfo -> C.ByteString -> S.Seq Token
279 | parseTokens f@TokenFilter{..} l t =
280 |     runST
281 |         ( case l >>= ftIdentifierChars of
282 |             Nothing -> parseToken' isAlpha_ isAlphaNum_ l t
283 |             Just (isAlpha1, isAlphaN) -> parseToken' isAlpha1 isAlphaN l t
284 |         )
285 |   where
286 |     parseToken' :: CharIdentifierF -> CharIdentifierF -> Maybe FileTypeInfo -> C.ByteString -> ST a (S.Seq Token)
287 |     parseToken' isAlpha1 isAlphaN info txt = do
288 |         stateR <- newSTRef StateSpace
289 |         accR <- newSTRef (TokenIdx (-1) (-1))
290 |         tokensR <- newSTRef S.empty
291 |         curR <- newSTRef 0
292 | 
293 |         oforM_ txt $ \w -> do
294 |             let x = BI.w2c w
295 |             cur <- readSTRef curR
296 |             state <- readSTRef stateR
297 | 
298 |             case state of
299 |                 StateSpace ->
300 |                     {-# SCC "StateSpace" #-}
301 |                     if
302 |                         | isSpace x -> do accR <<~ Reset
303 |                         | inline isAlpha1 x -> do stateR <~ StateIdentifier; accR <<~ Start cur
304 |                         | x == chr 2 -> do stateR <~ StateLiteral; accR <<~ Reset
305 |                         | isDigit x -> do stateR <~ StateDigit; accR <<~ Start cur
306 |                         | isBracket' x -> do stateR <~ StateBracket; accR <<~ Start cur
307 |                         | otherwise -> do stateR <~ StateOther; accR <<~ Start cur
308 |                 StateIdentifier ->
309 |                     {-# SCC "StateIdentifier" #-}
310 |                     if isAlphaN x
311 |                         then accR <<~ Append cur
312 |                         else do
313 |                             acc <- readSTRef accR
314 |                             tokens <- readSTRef tokensR
315 |                             if
316 |                                 | isSpace x -> do stateR <~ StateSpace; accR <<~ Reset; tokensR <~ (tokens |> buildToken_ tfIdentifier tfKeyword tfNativeType (mkTokenFromWord info) acc txt)
317 |                                 | x == chr 2 -> do stateR <~ StateLiteral; accR <<~ Reset; tokensR <~ (tokens |> buildToken_ tfIdentifier tfKeyword tfNativeType (mkTokenFromWord info) acc txt)
318 |                                 | isBracket' x -> do stateR <~ StateBracket; accR <<~ Start cur; tokensR <~ (tokens |> buildToken_ tfIdentifier tfKeyword tfNativeType (mkTokenFromWord info) acc txt)
319 |                                 | otherwise -> do stateR <~ StateOther; accR <<~ Start cur; tokensR <~ (tokens |> buildToken_ tfIdentifier tfKeyword tfNativeType (mkTokenFromWord info) acc txt)
320 |                 StateDigit ->
321 |                     {-# SCC "StateDigit" #-}
322 |                     if isCharNumber x
323 |                         then accR <<~ Append cur
324 |                         else do
325 |                             acc <- readSTRef accR
326 |                             tokens <- readSTRef tokensR
327 |                             if
328 |                                 | isSpace x -> do stateR <~ StateSpace; accR <<~ Reset; tokensR <~ (tokens |> buildToken tfNumber mkTokenDigit acc txt)
329 |                                 | x == chr 2 -> do stateR <~ StateLiteral; accR <<~ Reset; tokensR <~ (tokens |> buildToken tfNumber mkTokenDigit acc txt)
330 |                                 | inline isAlpha1 x -> do stateR <~ StateIdentifier; accR <<~ Start cur; tokensR <~ (tokens |> buildToken tfNumber mkTokenDigit acc txt)
331 |                                 | isBracket' x -> do stateR <~ StateBracket; accR <<~ Start cur; tokensR <~ (tokens |> buildToken tfNumber mkTokenDigit acc txt)
332 |                                 | otherwise -> do stateR <~ StateOther; accR <<~ Start cur; tokensR <~ (tokens |> buildToken tfNumber mkTokenDigit acc txt)
333 |                 StateLiteral ->
334 |                     {-# SCC "StateLiteral" #-}
335 |                     if x == chr 3
336 |                         then do
337 |                             acc <- readSTRef accR
338 |                             tokens <- readSTRef tokensR
339 |                             stateR <~ StateSpace
340 |                             accR <<~ Reset
341 |                             tokensR <~ (tokens |> buildToken tfString mkTokenString acc txt)
342 |                         else do accR <<~ Append cur
343 |                 StateBracket ->
344 |                     {-# SCC "StateBracket" #-}
345 |                     do
346 |                         acc <- readSTRef accR
347 |                         tokens <- readSTRef tokensR
348 |                         if
349 |                             | isSpace x -> do stateR <~ StateSpace; accR <<~ Reset; tokensR <~ (tokens |> buildToken tfBracket mkTokenBracket acc txt)
350 |                             | inline isAlpha1 x -> do stateR <~ StateIdentifier; accR <<~ Start cur; tokensR <~ (tokens |> buildToken tfBracket mkTokenBracket acc txt)
351 |                             | isDigit x -> do stateR <~ StateDigit; accR <<~ Start cur; tokensR <~ (tokens |> buildToken tfBracket mkTokenBracket acc txt)
352 |                             | isBracket' x -> do accR <<~ Start cur; tokensR <~ (tokens |> buildToken tfBracket mkTokenBracket acc txt)
353 |                             | x == chr 2 -> do stateR <~ StateLiteral; accR <<~ Reset; tokensR <~ (tokens |> buildToken tfBracket mkTokenBracket acc txt)
354 |                             | otherwise -> do stateR <~ StateOther; accR <<~ Start cur; tokensR <~ (tokens |> buildToken tfBracket mkTokenBracket acc txt)
355 |                 StateOther ->
356 |                     {-# SCC "StateOther" #-}
357 |                     do
358 |                         acc <- readSTRef accR
359 |                         tokens <- readSTRef tokensR
360 |                         if
361 |                             | isSpace x -> do stateR <~ StateSpace; accR <<~ Reset; tokensR <~ (tokens |> buildToken tfOperator mkTokenOperator acc txt)
362 |                             | inline isAlpha1 x -> do stateR <~ StateIdentifier; accR <<~ Start cur; tokensR <~ (tokens |> buildToken tfOperator mkTokenOperator acc txt)
363 |                             | isDigit x ->
364 |                                 if tkString acc txt == "."
365 |                                     then do stateR <~ StateDigit; accR <<~ Append cur
366 |                                     else do stateR <~ StateDigit; accR <<~ Start cur; tokensR <~ (tokens |> buildToken tfOperator mkTokenOperator acc txt)
367 |                             | isBracket' x -> do stateR <~ StateBracket; accR <<~ Append cur; tokensR <~ (tokens |> buildToken tfOperator mkTokenOperator acc txt)
368 |                             | x == chr 2 -> do stateR <~ StateLiteral; accR <<~ Reset; tokensR <~ (tokens |> buildToken tfBracket mkTokenBracket acc txt)
369 |                             | isPunctuation x -> do accR <<~ Start cur; tokensR <~ (tokens |> buildToken tfOperator mkTokenOperator acc txt)
370 |                             | otherwise -> do accR <<~ Append cur
371 | 
372 |             curR <~ (cur + 1)
373 | 
374 |         lastAcc <- readSTRef accR
375 |         tokens <- readSTRef tokensR
376 | 
377 |         if lastAcc.len == 0
378 |             then return tokens
379 |             else do
380 |                 state <- readSTRef stateR
381 |                 cur <- readSTRef curR
382 |                 return $ tokens |> buildFilteredToken f (mkToken info state) lastAcc txt
383 | 
384 | buildFilteredToken :: TokenFilter -> (C.ByteString -> Offset -> Token) -> TokenIdx -> C.ByteString -> Token
385 | buildFilteredToken tf f (TokenIdx start len) txt =
386 |     let t = f (subByteString start len txt) (fromIntegral start)
387 |      in if filterToken tf t
388 |             then t
389 |             else unspecifiedToken
390 | {-# INLINE buildFilteredToken #-}
391 | 
392 | buildToken :: Bool -> (C.ByteString -> Offset -> Token) -> TokenIdx -> C.ByteString -> Token
393 | buildToken True f (TokenIdx start len) txt = f (subByteString start len txt) (fromIntegral start)
394 | buildToken False f (TokenIdx start len) txt = unspecifiedToken
395 | {-# INLINE buildToken #-}
396 | 
397 | buildToken_ :: Bool -> Bool -> Bool -> (C.ByteString -> Offset -> Token) -> TokenIdx -> C.ByteString -> Token
398 | buildToken_ i k t f (TokenIdx start len) txt =
399 |     if i && isTokenIdentifier tok || k && isTokenKeyword tok || t && isTokenNativeType tok
400 |         then tok
401 |         else unspecifiedToken
402 |   where
403 |     tok = f (subByteString start len txt) (fromIntegral start)
404 | 
405 | subByteString :: Int -> Int -> C.ByteString -> C.ByteString
406 | subByteString i n = C.take n . C.drop i
407 | {-# INLINE subByteString #-}
408 | 
409 | unspecifiedToken :: Token
410 | unspecifiedToken = Token $ Chunk ChunkUnspec C.empty 0
411 | 


--------------------------------------------------------------------------------
/src/CGrep/Search.hs:
--------------------------------------------------------------------------------
 1 | --
 2 | -- Copyright (c) 2013-2023 Nicola Bonelli <nicola@larthia.com>
 3 | --
 4 | -- This program is free software; you can redistribute it and/or modify
 5 | -- it under the terms of the GNU General Public License as published by
 6 | -- the Free Software Foundation; either version 2 of the License, or
 7 | -- (at your option) any later version.
 8 | --
 9 | -- This program is distributed in the hope that it will be useful,
10 | -- but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 | -- GNU General Public License for more details.
13 | --
14 | -- You should have received a copy of the GNU General Public License
15 | -- along with this program; if not, write to the Free Software
16 | -- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 | --
18 | 
19 | module CGrep.Search (
20 |     searchStringIndices,
21 |     searchStringTaggedIndices,
22 |     eligibleForSearch,
23 |     TaggedIx (..),
24 | ) where
25 | 
26 | import CGrep.Types (Text8)
27 | import Data.Int (Int64)
28 | import GHC.Exts (groupWith)
29 | 
30 | import qualified Data.ByteString.Char8 as C
31 | import qualified Data.ByteString.Search as BM
32 | import qualified Data.ByteString.Search.DFA as DFA
33 | 
34 | import qualified Data.ByteString.Lazy.Search as LBM
35 | import qualified Data.ByteString.Lazy.Search.DFA as LDFA
36 | import Data.List.Extra (notNull)
37 | 
38 | findIndices :: Text8 -> Text8 -> [Int]
39 | findIndices p =
40 |     if C.length p <= 3
41 |         then DFA.indices p
42 |         else BM.indices p
43 | {-# INLINE findIndices #-}
44 | 
45 | searchStringIndices :: [Text8] -> Text8 -> [[Int64]]
46 | searchStringIndices ps text = ps >>= \p -> [fromIntegral <$> p `findIndices` text]
47 | {-# INLINE searchStringIndices #-}
48 | 
49 | data TaggedIx a = TaggedIx
50 |     { index :: {-# UNPACK #-} !Int
51 |     , tags :: [a]
52 |     }
53 |     deriving stock (Show)
54 | 
55 | instance Eq (TaggedIx a) where
56 |     (TaggedIx i1 _) == (TaggedIx i2 _) = i1 == i2
57 | 
58 | instance Ord (TaggedIx a) where
59 |     compare (TaggedIx i1 _) (TaggedIx i2 _) = compare i1 i2
60 | 
61 | -- >>> searchStringTaggedIndices [("a",2),("b",1),("a",0), ("he", 42)] "aheba"
62 | -- [TaggedIx {index = 0, tags = [2,0]},TaggedIx {index = 1, tags = [42]},TaggedIx {index = 3, tags = [1]},TaggedIx {index = 4, tags = [2,0]}]
63 | 
64 | searchStringTaggedIndices :: [(Text8, a)] -> Text8 -> [TaggedIx a]
65 | searchStringTaggedIndices ps text =
66 |     let res =
67 |             ps >>= \p ->
68 |                 let pat = fst p
69 |                     tag = snd p
70 |                     ids = findIndices pat text
71 |                  in (\i -> TaggedIx (fromIntegral i) [tag]) <$> ids
72 |      in fuseGroup <$> groupWith index res
73 |   where
74 |     {-# INLINE fuseGroup #-}
75 |     fuseGroup :: [TaggedIx a] -> TaggedIx a
76 |     fuseGroup xs = TaggedIx (index $ head xs) $ concatMap tags xs
77 | 
78 | eligibleForSearch :: [a] -> [[Int64]] -> Bool
79 | eligibleForSearch [_] = all notNull
80 | eligibleForSearch _ = any notNull
81 | {-# INLINE eligibleForSearch #-}
82 | 


--------------------------------------------------------------------------------
/src/CGrep/Strategy/BoyerMoore.hs:
--------------------------------------------------------------------------------
  1 | --
  2 | -- Copyright (c) 2013-2023 Nicola Bonelli <nicola@larthia.com>
  3 | --
  4 | -- This program is free software; you can redistribute it and/or modify
  5 | -- it under the terms of the GNU General Public License as published by
  6 | -- the Free Software Foundation; either version 2 of the License, or
  7 | -- (at your option) any later version.
  8 | --
  9 | -- This program is distributed in the hope that it will be useful,
 10 | -- but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 | -- GNU General Public License for more details.
 13 | --
 14 | -- You should have received a copy of the GNU General Public License
 15 | -- along with this program; if not, write to the Free Software
 16 | -- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 17 | --
 18 | 
 19 | module CGrep.Strategy.BoyerMoore (search) where
 20 | 
 21 | import qualified Data.ByteString.Char8 as C
 22 | import qualified Data.ByteString.Lazy.Char8 as LC
 23 | 
 24 | import Control.Monad.IO.Class (MonadIO (liftIO))
 25 | import Control.Monad.Trans.Reader (ask, reader)
 26 | import Data.List (genericLength, isPrefixOf, isSuffixOf)
 27 | 
 28 | import CGrep.Common (
 29 |     Text8,
 30 |     expandMultiline,
 31 |     getTargetContents,
 32 |     getTargetName,
 33 |     ignoreCase,
 34 |  )
 35 | import CGrep.ContextFilter (mkContextFilter)
 36 | import CGrep.FileType (FileType)
 37 | import CGrep.FileTypeMap (FileTypeInfo, contextFilter, fileTypeLookup)
 38 | import CGrep.Output (Output, mkOutputElements, runSearch)
 39 | import CGrep.Search
 40 | import CGrep.Types (Offset)
 41 | 
 42 | import CGrep.Parser.Chunk
 43 | import Data.Int (Int64)
 44 | import Options (Options (prefix_match, suffix_match, word_match))
 45 | import Reader (Env (..), ReaderIO)
 46 | import Verbose (putMsgLnVerbose)
 47 | 
 48 | import System.IO (stderr)
 49 | import System.Posix.FilePath (RawFilePath)
 50 | 
 51 | import CGrep.Parser.Line (getLineByOffset, getLineOffsets)
 52 | import Data.Array (indices)
 53 | import qualified Data.Vector.Unboxed as UV
 54 | 
 55 | search :: Maybe (FileType, FileTypeInfo) -> RawFilePath -> [Text8] -> ReaderIO [Output]
 56 | search info f patterns = do
 57 |     Env{..} <- ask
 58 | 
 59 |     text <- liftIO $ getTargetContents f
 60 | 
 61 |     let filename = getTargetName f
 62 | 
 63 |     -- transform text
 64 | 
 65 |     let ctxFilter = mkContextFilter opt
 66 | 
 67 |     let [text''', _, text', _] =
 68 |             scanr
 69 |                 ($)
 70 |                 text
 71 |                 [ expandMultiline opt
 72 |                 , contextFilter (fst <$> info) ctxFilter False
 73 |                 , ignoreCase opt
 74 |                 ]
 75 | 
 76 |     -- make shallow search
 77 | 
 78 |     let indices' = searchStringIndices patterns text'
 79 |     let indices''' = searchStringIndices patterns text'''
 80 | 
 81 |     -- search for matching tokens
 82 | 
 83 |     let ctor = Chunk ChunkUnspec
 84 | 
 85 |     let chunks = concat $ zipWith (\p xs -> (p `ctor`) <$> xs) patterns indices'''
 86 | 
 87 |     -- filter exact/partial matching tokens
 88 | 
 89 |     let lineOffsets = getLineOffsets (fromIntegral $ C.length text) text
 90 | 
 91 |     let chunks' =
 92 |             if word_match opt || prefix_match opt || suffix_match opt
 93 |                 then filter (checkChunk opt lineOffsets (snd <$> info) text''') chunks
 94 |                 else chunks
 95 | 
 96 |     putMsgLnVerbose 2 stderr $ "strategy  : running Boyer-Moore search on " <> filename
 97 |     putMsgLnVerbose 3 stderr $ "---\n" <> text''' <> "\n---"
 98 | 
 99 |     runSearch opt filename (eligibleForSearch patterns indices') $ do
100 |         putMsgLnVerbose 2 stderr $ "chunks'   : " <> show chunks'
101 |         mkOutputElements lineOffsets filename text text''' chunks'
102 | 
103 | checkChunk :: Options -> UV.Vector Int64 -> Maybe FileTypeInfo -> Text8 -> Chunk -> Bool
104 | checkChunk opt vec info text chunk
105 |     | word_match opt = let !off = cOffset chunk - off' in any (\chunk' -> cOffset chunk' == off && cToken chunk' == cToken chunk) cs
106 |     | prefix_match opt = any (\chunk' -> cToken chunk `C.isPrefixOf` cToken chunk' && cOffset chunk' + off' == cOffset chunk) cs
107 |     | suffix_match opt = any (\chunk' -> cToken chunk `C.isSuffixOf` cToken chunk' && cOffset chunk' + off' + fromIntegral (C.length (cToken chunk') - C.length (cToken chunk)) == cOffset chunk) cs
108 |     | otherwise = undefined
109 |   where
110 |     (# line', off' #) = getLineByOffset (cOffset chunk) text vec
111 |     cs = parseChunks info line'
112 | 


--------------------------------------------------------------------------------
/src/CGrep/Strategy/Levenshtein.hs:
--------------------------------------------------------------------------------
 1 | --
 2 | -- Copyright (c) 2013-2023 Nicola Bonelli <nicola@larthia.com>
 3 | --
 4 | -- This program is free software; you can redistribute it and/or modify
 5 | -- it under the terms of the GNU General Public License as published by
 6 | -- the Free Software Foundation; either version 2 of the License, or
 7 | -- (at your option) any later version.
 8 | --
 9 | -- This program is distributed in the hope that it will be useful,
10 | -- but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 | -- GNU General Public License for more details.
13 | --
14 | -- You should have received a copy of the GNU General Public License
15 | -- along with this program; if not, write to the Free Software
16 | -- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 | --
18 | 
19 | module CGrep.Strategy.Levenshtein (search) where
20 | 
21 | import CGrep.Parser.Line (getAllLineOffsets)
22 | 
23 | import qualified Data.ByteString.Char8 as C
24 | 
25 | import Control.Monad.IO.Class (MonadIO (liftIO))
26 | import Control.Monad.Trans.Reader (ask, reader)
27 | 
28 | import CGrep.Common (
29 |     Text8,
30 |     expandMultiline,
31 |     getTargetContents,
32 |     getTargetName,
33 |     ignoreCase,
34 |  )
35 | import CGrep.ContextFilter (mkContextFilter)
36 | import CGrep.Distance ((~==))
37 | import CGrep.FileType (FileType)
38 | import CGrep.FileTypeMap (
39 |     FileTypeInfo,
40 |     contextFilter,
41 |     fileTypeLookup,
42 |  )
43 | import CGrep.Output (Output, mkOutputElements)
44 | import CGrep.Parser.Chunk (Chunk, cToken, parseChunks)
45 | 
46 | import Data.Foldable (Foldable (toList))
47 | import Reader (Env (..), ReaderIO)
48 | import System.IO (stderr)
49 | import System.Posix.FilePath (RawFilePath)
50 | import Verbose (putMsgLnVerbose)
51 | 
52 | search :: Maybe (FileType, FileTypeInfo) -> RawFilePath -> [Text8] -> ReaderIO [Output]
53 | search info f patterns = do
54 |     Env{..} <- ask
55 | 
56 |     text <- liftIO $ getTargetContents f
57 | 
58 |     let filename = getTargetName f
59 | 
60 |     -- transform text
61 | 
62 |     let ctxFilter = mkContextFilter opt
63 | 
64 |     let [text''', _, _, _] =
65 |             scanr
66 |                 ($)
67 |                 text
68 |                 [ expandMultiline opt
69 |                 , contextFilter (fst <$> fileTypeLookup opt filename) ctxFilter False
70 |                 , ignoreCase opt
71 |                 ]
72 | 
73 |         -- parse source code, get the Cpp.Token list...
74 | 
75 |         tokens' = parseChunks (snd <$> info) text'''
76 | 
77 |         -- filter tokens...
78 | 
79 |         patterns' = map C.unpack patterns
80 |         matches = filter (\t -> any (\p -> p ~== C.unpack (cToken t)) patterns') (toList tokens')
81 | 
82 |     putMsgLnVerbose 2 stderr $ "strategy  : running edit-distance (Levenshtein) search on " <> filename <> "..."
83 |     putMsgLnVerbose 3 stderr $ "---\n" <> text''' <> "\n---"
84 | 
85 |     putMsgLnVerbose 2 stderr $ "tokens    : " <> show tokens'
86 |     putMsgLnVerbose 2 stderr $ "matches   : " <> show matches
87 | 
88 |     let lineOffsets = getAllLineOffsets text
89 | 
90 |     mkOutputElements lineOffsets filename text text''' matches
91 | 


--------------------------------------------------------------------------------
/src/CGrep/Strategy/Regex.hs:
--------------------------------------------------------------------------------
 1 | --
 2 | -- Copyright (c) 2013-2023 Nicola Bonelli <nicola@larthia.com>
 3 | --
 4 | -- This program is free software; you can redistribute it and/or modify
 5 | -- it under the terms of the GNU General Public License as published by
 6 | -- the Free Software Foundation; either version 2 of the License, or
 7 | -- (at your option) any later version.
 8 | --
 9 | -- This program is distributed in the hope that it will be useful,
10 | -- but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 | -- GNU General Public License for more details.
13 | --
14 | -- You should have received a copy of the GNU General Public License
15 | -- along with this program; if not, write to the Free Software
16 | -- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 | --
18 | 
19 | module CGrep.Strategy.Regex (search) where
20 | 
21 | import qualified Data.ByteString.Char8 as C
22 | 
23 | import Control.Monad.IO.Class (MonadIO (liftIO))
24 | import Control.Monad.Trans.Reader (ask, reader)
25 | 
26 | import Text.Regex.Base (
27 |     AllTextMatches (getAllTextMatches),
28 |     MatchText,
29 |  )
30 | import Text.Regex.PCRE ((=~))
31 | import Text.Regex.Posix ((=~))
32 | 
33 | import Data.Array (Array, elems)
34 | 
35 | import CGrep.Common (
36 |     Text8,
37 |     expandMultiline,
38 |     getTargetContents,
39 |     getTargetName,
40 |     ignoreCase,
41 |  )
42 | import CGrep.ContextFilter (mkContextFilter)
43 | import CGrep.FileType (FileType)
44 | import CGrep.FileTypeMap (FileTypeInfo (..), contextFilter, fileTypeLookup)
45 | import CGrep.Output (Output, mkOutputElements)
46 | 
47 | import Options (Options (regex_pcre))
48 | import Reader (Env (..), ReaderIO)
49 | import Verbose (putMsgLnVerbose)
50 | 
51 | import CGrep.Parser.Chunk
52 | import CGrep.Parser.Line (getAllLineOffsets)
53 | 
54 | import System.IO (stderr)
55 | import System.Posix.FilePath (RawFilePath)
56 | 
57 | search :: Maybe (FileType, FileTypeInfo) -> RawFilePath -> [Text8] -> ReaderIO [Output]
58 | search info f patterns = do
59 |     Env{..} <- ask
60 | 
61 |     text <- liftIO $ getTargetContents f
62 | 
63 |     let filename = getTargetName f
64 | 
65 |     -- transform text
66 | 
67 |     let ctxFilter = mkContextFilter opt
68 | 
69 |     let [text''', _, _, _] =
70 |             scanr
71 |                 ($)
72 |                 text
73 |                 [ expandMultiline opt
74 |                 , contextFilter (fst <$> fileTypeLookup opt filename) ctxFilter False
75 |                 , ignoreCase opt
76 |                 ]
77 | 
78 |         -- search for matching tokens
79 | 
80 |         (=~~~) = if regex_pcre opt then (Text.Regex.PCRE.=~) else (Text.Regex.Posix.=~)
81 | 
82 |         tokens =
83 |             map (\(str, (off, _)) -> Chunk ChunkUnspec str (fromIntegral off)) $
84 |                 concatMap elems $
85 |                     patterns >>= (\p -> elems (getAllTextMatches $ text''' =~~~ p :: (Array Int) (MatchText Text8)))
86 | 
87 |     putMsgLnVerbose 2 stderr $ "strategy  : running regex " <> (if regex_pcre opt then "(pcre)" else "(posix)") <> " search on " <> filename <> "..."
88 |     putMsgLnVerbose 3 stderr $ "---\n" <> text''' <> "\n---"
89 |     putMsgLnVerbose 2 stderr $ "tokens    : " <> show tokens
90 | 
91 |     let lineOffsets = getAllLineOffsets text
92 | 
93 |     mkOutputElements lineOffsets filename text text''' tokens
94 | 


--------------------------------------------------------------------------------
/src/CGrep/Strategy/Semantic.hs:
--------------------------------------------------------------------------------
  1 | --
  2 | -- Copyright (c) 2013-2023 Nicola Bonelli <nicola@larthia.com>
  3 | --
  4 | -- This program is free software; you can redistribute it and/or modify
  5 | -- it under the terms of the GNU General Public License as published by
  6 | -- the Free Software Foundation; either version 2 of the License, or
  7 | -- (at your option) any later version.
  8 | --
  9 | -- This program is distributed in the hope that it will be useful,
 10 | -- but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 | -- GNU General Public License for more details.
 13 | --
 14 | -- You should have received a copy of the GNU General Public License
 15 | -- along with this program; if not, write to the Free Software
 16 | -- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 17 | --
 18 | 
 19 | module CGrep.Strategy.Semantic (search) where
 20 | 
 21 | import CGrep.Parser.Token
 22 | import qualified Data.ByteString.Char8 as C
 23 | 
 24 | import CGrep.Common (
 25 |     Text8,
 26 |     expandMultiline,
 27 |     getTargetContents,
 28 |     getTargetName,
 29 |     ignoreCase,
 30 |     subText,
 31 |     trim,
 32 |     trim8,
 33 |  )
 34 | import CGrep.ContextFilter (
 35 |     contextBitComment,
 36 |     mkContextFilter,
 37 |     (~!),
 38 |  )
 39 | 
 40 | import CGrep.Output (Output, mkOutputElements, runSearch)
 41 | import CGrep.Parser.Line (getAllLineOffsets)
 42 | import CGrep.Search (eligibleForSearch, searchStringIndices)
 43 | 
 44 | import CGrep.Parser.Atom (
 45 |     Atom (..),
 46 |     combineAtoms,
 47 |     filterTokensWithAtoms,
 48 |     mkAtomFromToken,
 49 |  )
 50 | 
 51 | import Control.Monad.IO.Class (MonadIO (liftIO))
 52 | import Control.Monad.Trans.Reader (ask, reader)
 53 | 
 54 | import Data.Function (on)
 55 | import Data.List (nub, sortBy)
 56 | import Data.Maybe (mapMaybe)
 57 | 
 58 | import CGrep.Parser.Chunk
 59 | import Reader (Env (..), ReaderIO)
 60 | import Util (rmQuote8)
 61 | import Verbose (putMsgLnVerbose)
 62 | 
 63 | import System.Posix.FilePath (RawFilePath, takeBaseName)
 64 | 
 65 | import CGrep.FileType (FileType)
 66 | import CGrep.FileTypeMap (
 67 |     FileTypeInfo,
 68 |     contextFilter,
 69 |     fileTypeLookup,
 70 |  )
 71 | import System.IO (stderr)
 72 | 
 73 | import Data.Coerce (coerce)
 74 | import Data.Foldable (Foldable (toList))
 75 | import qualified Data.Sequence as S
 76 | 
 77 | search :: Maybe (FileType, FileTypeInfo) -> RawFilePath -> [Text8] -> ReaderIO [Output]
 78 | search info f ps = do
 79 |     Env{..} <- ask
 80 | 
 81 |     text <- liftIO $ getTargetContents f
 82 | 
 83 |     let filename = getTargetName f
 84 | 
 85 |     let [text''', _, text', _] =
 86 |             scanr
 87 |                 ($)
 88 |                 text
 89 |                 [ expandMultiline opt
 90 |                 , contextFilter (fst <$> fileTypeLookup opt filename) filt True
 91 |                 , ignoreCase opt
 92 |                 ]
 93 | 
 94 |         filt = mkContextFilter opt ~! contextBitComment
 95 | 
 96 |         -- pre-process patterns
 97 | 
 98 |         pfilter =
 99 |             TokenFilter
100 |                 { tfIdentifier = True
101 |                 , tfKeyword = True
102 |                 , tfNativeType = True
103 |                 , tfString = True
104 |                 , tfNumber = True
105 |                 , tfOperator = True
106 |                 , tfBracket = True
107 |                 }
108 | 
109 |         patterns = map (parseTokens pfilter (snd <$> info) . contextFilter (fst <$> fileTypeLookup opt filename) filt True) ps
110 |         patterns' = map (mkAtomFromToken <$>) patterns
111 |         patterns'' = map (combineAtoms . map (: [])) (toList <$> patterns')
112 | 
113 |         identifiers =
114 |             mapMaybe
115 |                 ( \case
116 |                     Raw (Token (Chunk ChunkString xs _)) -> Just (rmQuote8 $ trim8 xs)
117 |                     Raw (Token (Chunk ChunkIdentifier "OR" _)) -> Nothing
118 |                     Raw t -> Just (tToken t)
119 |                     _ -> Nothing
120 |                 )
121 |                 (concatMap toList patterns')
122 | 
123 |     -- put banners...
124 | 
125 |     putMsgLnVerbose 2 stderr $ "strategy  : running generic semantic search on " <> filename <> "..."
126 |     putMsgLnVerbose 2 stderr $ "atoms     : " <> show patterns'' <> " -> identifiers: " <> show identifiers
127 |     putMsgLnVerbose 3 stderr $ "---\n" <> text''' <> "\n---"
128 | 
129 |     let indices' = searchStringIndices identifiers text'
130 | 
131 |     runSearch opt filename (eligibleForSearch identifiers indices') $ do
132 |         -- parse source code, get the Generic.Chunk list...
133 | 
134 |         let tfilter = mkTokenFilter $ cTyp . coerce <$> concatMap toList patterns
135 | 
136 |         let tokens = toList $ parseTokens tfilter (snd <$> info) (subText indices' text''')
137 | 
138 |         -- get matching tokens ...
139 | 
140 |         let tokens' = sortBy (compare `on` tOffset) $ nub $ concatMap (\ms -> filterTokensWithAtoms opt ms tokens) patterns''
141 | 
142 |         -- convert Tokens to Chunks
143 | 
144 |         let matches = coerce tokens' :: [Chunk]
145 | 
146 |         putMsgLnVerbose 2 stderr $ "tokens    : " <> show tokens
147 |         putMsgLnVerbose 2 stderr $ "matches   : " <> show matches
148 | 
149 |         let lineOffsets = getAllLineOffsets text
150 | 
151 |         mkOutputElements lineOffsets filename text text''' matches
152 | 


--------------------------------------------------------------------------------
/src/CGrep/Strategy/Tokenizer.hs:
--------------------------------------------------------------------------------
  1 | --
  2 | -- Copyright (c) 2013-2023 Nicola Bonelli <nicola@larthia.com>
  3 | --
  4 | -- This program is free software; you can redistribute it and/or modify
  5 | -- it under the terms of the GNU General Public License as published by
  6 | -- the Free Software Foundation; either version 2 of the License, or
  7 | -- (at your option) any later version.
  8 | --
  9 | -- This program is distributed in the hope that it will be useful,
 10 | -- but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 | -- GNU General Public License for more details.
 13 | --
 14 | -- You should have received a copy of the GNU General Public License
 15 | -- along with this program; if not, write to the Free Software
 16 | -- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 17 | --
 18 | 
 19 | module CGrep.Strategy.Tokenizer (search) where
 20 | 
 21 | import Control.Monad.IO.Class (MonadIO (liftIO))
 22 | import Control.Monad.Trans.Reader (ask, reader)
 23 | import qualified Data.ByteString.Char8 as C
 24 | 
 25 | import CGrep.Common (
 26 |     Text8,
 27 |     expandMultiline,
 28 |     getTargetContents,
 29 |     getTargetName,
 30 |     ignoreCase,
 31 |     subText,
 32 |  )
 33 | import CGrep.ContextFilter (
 34 |     contextBitComment,
 35 |     mkContextFilter,
 36 |     (~!),
 37 |  )
 38 | import CGrep.Distance ((~==))
 39 | import CGrep.Output (Output, mkOutputElements, runSearch)
 40 | 
 41 | import CGrep.Parser.Line
 42 | import CGrep.Parser.Token
 43 | 
 44 | import CGrep.FileType (FileType)
 45 | import CGrep.FileTypeMap (
 46 |     FileTypeInfo,
 47 |     contextFilter,
 48 |     fileTypeLookup,
 49 |  )
 50 | 
 51 | import CGrep.Search (eligibleForSearch, searchStringIndices)
 52 | import Data.List (isInfixOf, isPrefixOf, isSuffixOf)
 53 | 
 54 | import Options (
 55 |     Options (
 56 |         edit_dist,
 57 |         identifier,
 58 |         keyword,
 59 |         nativeType,
 60 |         number,
 61 |         operator,
 62 |         prefix_match,
 63 |         string,
 64 |         suffix_match,
 65 |         word_match
 66 |     ),
 67 |  )
 68 | import Reader (Env (..), ReaderIO)
 69 | import Verbose (putMsgLnVerbose)
 70 | 
 71 | import CGrep.Parser.Chunk (Chunk (..))
 72 | import System.IO (stderr)
 73 | import System.Posix.FilePath (RawFilePath)
 74 | 
 75 | import CGrep.Types (Offset)
 76 | import Data.Coerce (coerce)
 77 | import Data.Foldable (Foldable (toList))
 78 | 
 79 | import qualified Data.Sequence as S
 80 | import Util (mapMaybe')
 81 | 
 82 | search :: Maybe (FileType, FileTypeInfo) -> RawFilePath -> [Text8] -> ReaderIO [Output]
 83 | search info f ps = do
 84 |     Env{..} <- ask
 85 | 
 86 |     text <- liftIO $ getTargetContents f
 87 | 
 88 |     let filename = getTargetName f
 89 | 
 90 |     -- transform text
 91 | 
 92 |     let filt = mkContextFilter opt ~! contextBitComment
 93 | 
 94 |     let [text''', _, text', _] =
 95 |             scanr
 96 |                 ($)
 97 |                 text
 98 |                 [ expandMultiline opt
 99 |                 , contextFilter (fst <$> fileTypeLookup opt filename) filt True
100 |                 , ignoreCase opt
101 |                 ]
102 | 
103 |     putMsgLnVerbose 2 stderr $ "strategy: running token search on " <> filename <> "..."
104 |     putMsgLnVerbose 3 stderr $ "---\n" <> text''' <> "\n---"
105 | 
106 |     let indices' = searchStringIndices ps text'
107 | 
108 |     runSearch opt filename (eligibleForSearch ps indices') $ do
109 |         -- parse source code, get the token list...
110 | 
111 |         let tfilter =
112 |                 TokenFilter
113 |                     { tfIdentifier = identifier opt
114 |                     , tfKeyword = keyword opt
115 |                     , tfNativeType = nativeType opt
116 |                     , tfString = string opt
117 |                     , tfNumber = number opt
118 |                     , tfOperator = operator opt
119 |                     , tfBracket = False
120 |                     }
121 | 
122 |         let tokens = {-# SCC tok_0 #-} parseTokens tfilter (snd <$> info) (subText indices' text''')
123 | 
124 |             -- filter tokens and make chunks
125 | 
126 |             matches = {-# SCC tok_3 #-} mapMaybe' (tokenizerFilter opt ps) tokens
127 | 
128 |         putMsgLnVerbose 2 stderr $ "tokens    : " <> show tokens
129 |         putMsgLnVerbose 2 stderr $ "matches   : " <> show matches
130 | 
131 |         let lineOffsets = getAllLineOffsets text
132 | 
133 |         mkOutputElements lineOffsets filename text text''' matches
134 | 
135 | tokenizerFilter :: Options -> [C.ByteString] -> Token -> Maybe Chunk
136 | tokenizerFilter opt patterns token
137 |     | isTokenUnspecified token = Nothing
138 |     | tokenPredicate opt patterns token = Just $ coerce token
139 |     | otherwise = Nothing
140 | {-# INLINE tokenizerFilter #-}
141 | 
142 | tokenPredicate :: Options -> [C.ByteString] -> Token -> Bool
143 | tokenPredicate opt patterns tokens
144 |     | edit_dist opt = (\t -> any (\p -> C.unpack p ~== (C.unpack . tToken) t) patterns) tokens
145 |     | word_match opt = ((`elem` patterns) . tToken) tokens
146 |     | prefix_match opt = ((\t -> any (`C.isPrefixOf` t) patterns) . tToken) tokens
147 |     | suffix_match opt = ((\t -> any (`C.isSuffixOf` t) patterns) . tToken) tokens
148 |     | otherwise = ((\t -> any (`C.isInfixOf` t) patterns) . tToken) tokens
149 | 


--------------------------------------------------------------------------------
/src/CGrep/Types.hs:
--------------------------------------------------------------------------------
 1 | --
 2 | -- Copyright (c) 2013-2023 Nicola Bonelli <nicola@larthia.com>
 3 | --
 4 | -- This program is free software; you can redistribute it and/or modify
 5 | -- it under the terms of the GNU General Public License as published by
 6 | -- the Free Software Foundation; either version 2 of the License, or
 7 | -- (at your option) any later version.
 8 | --
 9 | -- This program is distributed in the hope that it will be useful,
10 | -- but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 | -- GNU General Public License for more details.
13 | --
14 | -- You should have received a copy of the GNU General Public License
15 | -- along with this program; if not, write to the Free Software
16 | -- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 | --
18 | 
19 | module CGrep.Types (
20 |     Offset,
21 |     Text8,
22 |     LText8,
23 | ) where
24 | 
25 | import Data.ByteString.Char8 as C (ByteString)
26 | import Data.ByteString.Lazy.Char8 as LC (ByteString)
27 | import Data.Int (Int64)
28 | 
29 | type Offset = Int64
30 | type Text8 = C.ByteString
31 | type LText8 = LC.ByteString
32 | 


--------------------------------------------------------------------------------
/src/CmdOptions.hs:
--------------------------------------------------------------------------------
 1 | --
 2 | -- Copyright (c) 2013-2023 Nicola Bonelli <nicola@larthia.com>
 3 | --
 4 | -- This program is free software; you can redistribute it and/or modify
 5 | -- it under the terms of the GNU General Public License as published by
 6 | -- the Free Software Foundation; either version 2 of the License, or
 7 | -- (at your option) any later version.
 8 | --
 9 | -- This program is distributed in the hope that it will be useful,
10 | -- but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 | -- GNU General Public License for more details.
13 | --
14 | -- You should have received a copy of the GNU General Public License
15 | -- along with this program; if not, write to the Free Software
16 | -- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 | --
18 | 
19 | module CmdOptions (
20 |     options,
21 | ) where
22 | 
23 | import Data.Version (showVersion)
24 | import System.Console.CmdArgs (
25 |     CmdArgs,
26 |     Mode,
27 |     args,
28 |     cmdArgsMode,
29 |     explicit,
30 |     groupname,
31 |     help,
32 |     name,
33 |     program,
34 |     summary,
35 |     typ,
36 |     (&=),
37 |  )
38 | 
39 | import Options (Options (..))
40 | import Paths_cgrep (version)
41 | 
42 | options :: Mode (CmdArgs Options)
43 | options =
44 |     cmdArgsMode $
45 |         Options
46 |             { file = "" &= typ "FILE" &= groupname "Pattern" &= help "Read PATTERNs from file (one per line)"
47 |             , word_match = False &= help "Force word matching" &= explicit &= name "word" &= name "w"
48 |             , prefix_match = False &= help "Force prefix matching" &= explicit &= name "prefix" &= name "p"
49 |             , suffix_match = False &= help "Force suffix matching" &= explicit &= name "suffix" &= name "s"
50 |             , edit_dist = False &= help "Use edit distance" &= explicit &= name "edit" &= name "e"
51 |             , regex_posix = False &= help "Use regex matching (posix)" &= explicit &= name "G" &= name "regex"
52 |             , regex_pcre = False &= help "Use regex matching (pcre)" &= explicit &= name "P" &= name "pcre"
53 |             , ignore_case = False &= help "Ignore case distinctions"
54 |             , code = False &= groupname "\nContext filters" &= help "Enable search in source code" &= explicit &= name "c" &= name "code"
55 |             , comment = False &= help "Enable search in comments" &= explicit &= name "m" &= name "comment"
56 |             , literal = False &= help "Enable search in string literals" &= explicit &= name "l" &= name "literal"
57 |             , identifier = False &= groupname "\nToken filters" &= help "Identifiers" &= explicit &= name "identifier" &= name "name"
58 |             , nativeType = False &= help "Native Types" &= explicit &= name "native" &= name "type"
59 |             , keyword = False &= help "Keywords" &= explicit &= name "keyword"
60 |             , number = False &= help "Literal numbers" &= explicit &= name "number"
61 |             , string = False &= help "Literal strings" &= explicit &= name "string"
62 |             , operator = False &= help "Operators" &= explicit &= name "op"
63 |             , type_filter = [] &= groupname "\nFile filters" &= help "Specify file types. ie: Cpp, +Haskell, -Makefile"
64 |             , kind_filter = [] &= help "Specify file kinds. Text, Config, Language, Data, Markup or Script"
65 |             , code_only = False &= help "Parse code modules only (skip headers/interfaces)" &= explicit &= name "code-only"
66 |             , hdr_only = False &= help "Parse headers/interfaces only (skip modules)" &= explicit &= name "hdr-only"
67 |             , skip_test = False &= help "Skip files that have 'test' in the name" &= explicit &= name "skip-test" &= name "T"
68 |             , prune_dir = [] &= help "Do not descend into dir" &= explicit &= name "prune-dir"
69 |             , recursive = False &= help "Enable recursive search (don't follow symlinks)" &= explicit &= name "recursive" &= name "r"
70 |             , follow = False &= help "Follow symlinks" &= explicit &= name "follow" &= name "L"
71 |             , semantic = False &= groupname "\nSemantic" &= help "\"code\" pattern: _, _1, _2... (identifiers), $, $1, $2... (optionals), ANY, KEY, STR, LIT, NUM, HEX, OCT, OR" &= explicit &= name "S" &= name "semantic"
72 |             , max_count = maxBound &= groupname "\nControl" &= help "Stop search in files after INT matches" &= explicit &= name "max-count"
73 |             , type_force = Nothing &= help "Force the type of file" &= explicit &= name "force-type"
74 |             , type_map = False &= help "List the supported file types" &= explicit &= name "type-list"
75 |             , invert_match = False &= help "Select non-matching lines" &= explicit &= name "invert-match" &= name "v"
76 |             , multiline = 1 &= help "Enable multi-line matching"
77 |             , jobs = Nothing &= help "Number threads to run in parallel" &= explicit &= name "threads" &= name "j"
78 |             , show_match = False &= groupname "\nOutput format" &= help "Show list of matching tokens" &= explicit &= name "show-match"
79 |             , color = False &= help "Use colors to highlight the match strings" &= explicit &= name "color"
80 |             , no_color = False &= help "Do not use colors (override config file)" &= explicit &= name "no-color"
81 |             , no_filename = False &= help "Suppress the file name prefix on output" &= explicit &= name "h" &= name "no-filename"
82 |             , no_numbers = False &= help "Suppress both line and column numbers on output" &= explicit &= name "no-numbers"
83 |             , no_column = False &= help "Suppress the column number on output" &= explicit &= name "no-column"
84 |             , count = False &= help "Print only a count of matching lines per file" &= explicit &= name "count"
85 |             , filename_only = False &= help "Print only the name of files containing matches" &= explicit &= name "filename-only"
86 |             , vim = False &= help "Run vim editor passing the files that match" &= explicit &= name "vim"
87 |             , editor = False &= help "Run the editor specified by EDITOR var., passing the files that match" &= explicit &= name "editor"
88 |             , fileline = False &= help "When edit option is specified, pass the list of matching files in file:line format (e.g. vim 'file-line' plugin)" &= explicit &= name "fileline"
89 |             , json = False &= help "Format output as json object" &= explicit &= name "json"
90 |             , verbose = 0 &= groupname "\nMiscellaneous" &= help "Verbose level: 1, 2 or 3" &= explicit &= name "verbose"
91 |             , no_shallow = False &= help "Disable shallow-search" &= explicit &= name "no-shallow"
92 |             , show_palette = False &= help "Show color palette" &= explicit &= name "palette"
93 |             , others = [] &= args
94 |             }
95 |             &= summary ("Cgrep " <> showVersion version <> ". Usage: cgrep [OPTION] [PATTERN] files...")
96 |             &= program "cgrep"
97 | 


--------------------------------------------------------------------------------
/src/Config.hs:
--------------------------------------------------------------------------------
  1 | --
  2 | -- Copyright (c) 2013-2023 Nicola Bonelli <nicola@larthia.com>
  3 | --
  4 | -- This program is free software; you can redistribute it and/or modify
  5 | -- it under the terms of the GNU General Public License as published by
  6 | -- the Free Software Foundation; either version 2 of the License, or
  7 | -- (at your option) any later version.
  8 | --
  9 | -- This program is distributed in the hope that it will be useful,
 10 | -- but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 | -- GNU General Public License for more details.
 13 | --
 14 | -- You should have received a copy of the GNU General Public License
 15 | -- along with this program; if not, write to the Free Software
 16 | -- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 17 | --
 18 | {-# LANGUAGE DeriveGeneric #-}
 19 | 
 20 | module Config (
 21 |     Config (..),
 22 |     dumpPalette,
 23 |     getConfig,
 24 | ) where
 25 | 
 26 | import Control.Monad (MonadPlus (mzero), filterM, forM_)
 27 | import System.Console.ANSI (
 28 |     Color (Blue, Cyan, Green, Magenta, Red, White, Yellow),
 29 |     ColorIntensity (Vivid),
 30 |     ConsoleIntensity (BoldIntensity),
 31 |     ConsoleLayer (Foreground),
 32 |     SGR (SetColor, SetConsoleIntensity),
 33 |     setSGRCode,
 34 |  )
 35 | import System.Directory (doesFileExist, getHomeDirectory)
 36 | 
 37 | import System.Console.ANSI.Types (
 38 |     Color (Blue, Cyan, Green, Magenta, Red, White, Yellow),
 39 |     ColorIntensity (Vivid),
 40 |     ConsoleIntensity (BoldIntensity),
 41 |     ConsoleLayer (Foreground),
 42 |     SGR (SetColor, SetConsoleIntensity, SetPaletteColor),
 43 |     xterm6LevelRGB,
 44 |  )
 45 | 
 46 | import Data.Aeson (FromJSON (parseJSON), (.!=), (.:?))
 47 | import Data.Maybe (fromMaybe, mapMaybe)
 48 | import qualified Data.Yaml as Y
 49 | 
 50 | import CGrep.FileType (FileType)
 51 | import GHC.Generics (Generic)
 52 | 
 53 | import qualified Data.ByteString as B
 54 | import qualified Data.ByteString.Char8 as C
 55 | import Data.ByteString.RawFilePath (RawFilePath)
 56 | import Data.List.Split (splitOn)
 57 | import System.FilePath ((</>))
 58 | 
 59 | import CGrep.FileKind (FileKind)
 60 | import Data.List.Extra (notNull)
 61 | import Text.Read (readMaybe)
 62 | 
 63 | cgreprc :: FilePath
 64 | cgreprc = "cgreprc"
 65 | 
 66 | data Config = Config
 67 |     { configFileTypes :: [FileType]
 68 |     , configFileKinds :: [FileKind]
 69 |     , configPruneDirs :: [RawFilePath]
 70 |     , configColors :: Bool
 71 |     , configColorFile :: [SGR]
 72 |     , configColorMatch :: [SGR]
 73 |     , configFileLine :: Bool
 74 |     , configJobs :: Maybe Int
 75 |     }
 76 |     deriving stock (Show, Read)
 77 | 
 78 | defaultConfig :: Config
 79 | defaultConfig =
 80 |     Config
 81 |         { configFileTypes = []
 82 |         , configFileKinds = []
 83 |         , configPruneDirs = []
 84 |         , configColors = False
 85 |         , configColorFile = [SetConsoleIntensity BoldIntensity, SetColor Foreground Vivid Blue]
 86 |         , configColorMatch = [SetConsoleIntensity BoldIntensity]
 87 |         , configFileLine = False
 88 |         , configJobs = Nothing
 89 |         }
 90 | 
 91 | mkConfig :: YamlConfig -> Config
 92 | mkConfig YamlConfig{..} =
 93 |     let configFileTypes = mapMaybe readMaybe yamlFileTypes
 94 |         configFileKinds = mapMaybe readMaybe yamlFileKinds
 95 |         configPruneDirs = C.pack <$> yamlPruneDirs
 96 |         configColors = yamlColors
 97 |         configColorFile = fromMaybe [] (yamlColorFileName >>= readColor)
 98 |         configColorMatch = fromMaybe [] (yamlColorMatch >>= readColor)
 99 |         configFileLine = yamlFileLine
100 |         configJobs = yamlJobs
101 |      in Config{..}
102 | 
103 | data YamlConfig = YamlConfig
104 |     { yamlFileTypes :: [String]
105 |     , yamlFileKinds :: [String]
106 |     , yamlPruneDirs :: [String]
107 |     , yamlColors :: Bool
108 |     , yamlColorFileName :: Maybe String
109 |     , yamlColorMatch :: Maybe String
110 |     , yamlFileLine :: Bool
111 |     , yamlJobs :: Maybe Int
112 |     }
113 |     deriving stock (Show, Generic)
114 | 
115 | instance Y.FromJSON YamlConfig where
116 |     parseJSON (Y.Object v) =
117 |         YamlConfig
118 |             <$> v .:? "file_types" .!= []
119 |             <*> v .:? "file_kinds" .!= []
120 |             <*> v .:? "prune_dirs" .!= []
121 |             <*> v .:? "colors" .!= False
122 |             <*> v .:? "color_filename" .!= Nothing
123 |             <*> v .:? "color_match" .!= Nothing
124 |             <*> v .:? "file_line" .!= False
125 |             <*> v .:? "threads" .!= Nothing
126 |     parseJSON _ = mzero
127 | 
128 | getConfig :: IO (Config, Maybe FilePath)
129 | getConfig = do
130 |     home <- getHomeDirectory
131 |     confs <- filterM doesFileExist [cgreprc, "." <> cgreprc, home </> "." <> cgreprc, "/etc" </> cgreprc]
132 |     if notNull confs
133 |         then do
134 |             conf <- Y.decodeFileEither (head confs)
135 |             case conf of
136 |                 Left e -> errorWithoutStackTrace $ "CGrep:" <> Y.prettyPrintParseException e
137 |                 Right yconf -> return (mkConfig yconf, Just (head confs))
138 |         else return (defaultConfig, Nothing)
139 | 
140 | readColor :: String -> Maybe [SGR]
141 | readColor "Bold" = Just [SetConsoleIntensity BoldIntensity]
142 | readColor "Red" = Just [SetConsoleIntensity BoldIntensity, SetColor Foreground Vivid Red]
143 | readColor "Green" = Just [SetConsoleIntensity BoldIntensity, SetColor Foreground Vivid Green]
144 | readColor "Yellow" = Just [SetConsoleIntensity BoldIntensity, SetColor Foreground Vivid Yellow]
145 | readColor "Blue" = Just [SetConsoleIntensity BoldIntensity, SetColor Foreground Vivid Blue]
146 | readColor "Magenta" = Just [SetConsoleIntensity BoldIntensity, SetColor Foreground Vivid Magenta]
147 | readColor "Cyan" = Just [SetConsoleIntensity BoldIntensity, SetColor Foreground Vivid Cyan]
148 | readColor "White" = Just [SetConsoleIntensity BoldIntensity, SetColor Foreground Vivid White]
149 | readColor "Orange" = Just [SetConsoleIntensity BoldIntensity, SetPaletteColor Foreground $ xterm6LevelRGB 5 2 0]
150 | readColor "Acqua" = Just [SetConsoleIntensity BoldIntensity, SetPaletteColor Foreground $ xterm6LevelRGB 2 5 4]
151 | readColor xs = case splitOn ":" xs of
152 |     [r, g, b] -> Just [SetConsoleIntensity BoldIntensity, SetPaletteColor Foreground $ xterm6LevelRGB (read r) (read g) (read b)]
153 |     _ -> Nothing
154 | 
155 | dumpPalette :: IO ()
156 | dumpPalette = do
157 |     let palette = [(r, g, b) | r <- [0 .. 5], g <- [0 .. 5], b <- [0 .. 5]]
158 |     forM_ palette $ \(r, g, b) -> do
159 |         putStrLn $ setSGRCode [SetConsoleIntensity BoldIntensity, SetPaletteColor Foreground $ xterm6LevelRGB r g b] <> "COLOR " <> show r <> ":" <> show g <> ":" <> show b <> setSGRCode []
160 | 


--------------------------------------------------------------------------------
/src/Main.hs:
--------------------------------------------------------------------------------
  1 | --
  2 | -- Copyright (c) 2013-2023 Nicola Bonelli <nicola@larthia.com>
  3 | --
  4 | -- This program is free software; you can redistribute it and/or modify
  5 | -- it under the terms of the GNU General Public License as published by
  6 | -- the Free Software Foundation; either version 2 of the License, or
  7 | -- (at your option) any later version.
  8 | --
  9 | -- This program is distributed in the hope that it will be useful,
 10 | -- but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 | -- GNU General Public License for more details.
 13 | --
 14 | -- You should have received a copy of the GNU General Public License
 15 | -- along with this program; if not, write to the Free Software
 16 | -- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 17 | --
 18 | 
 19 | module Main where
 20 | 
 21 | import qualified Codec.Binary.UTF8.String as UC
 22 | import qualified Data.ByteString.Char8 as C
 23 | 
 24 | import Data.Char (toLower)
 25 | import Data.List (elemIndex, genericLength, isInfixOf, isPrefixOf, isSuffixOf, nub, partition, sort, union, (\\))
 26 | import Data.Maybe (catMaybes)
 27 | import Data.Version (showVersion)
 28 | 
 29 | import Control.Monad (void, when)
 30 | import Control.Monad.Trans.Reader (ReaderT (runReaderT), ask)
 31 | 
 32 | import qualified Data.Map as M
 33 | import GHC.Conc (getNumCapabilities, setNumCapabilities)
 34 | import GHC.IO.Handle (hIsTerminalDevice)
 35 | 
 36 | import System.Console.CmdArgs (cmdArgsRun)
 37 | import System.Environment (withArgs)
 38 | import System.Exit (exitSuccess)
 39 | import System.IO (stderr, stdin, stdout)
 40 | 
 41 | import CGrep.Common (trim8)
 42 | import CGrep.FileType (readKindList, readTypeList)
 43 | import CGrep.FileTypeMap (dumpFileTypeInfoMap, fileTypeInfoMap)
 44 | import CGrep.Parser.Atom (wildCardMap)
 45 | 
 46 | import CmdOptions (options)
 47 | import Config (
 48 |     Config (configColors, configFileKinds, configFileTypes, configJobs),
 49 |     dumpPalette,
 50 |     getConfig,
 51 |  )
 52 | import Options (Options (..))
 53 | import Paths_cgrep (version)
 54 | import Verbose (putMsgLnVerbose)
 55 | 
 56 | import Reader (Env (..), ReaderIO)
 57 | import Search (isRegexp, parallelSearch)
 58 | import System.Posix.FilePath (RawFilePath)
 59 | import Util (partitionM)
 60 | 
 61 | import Control.Applicative (Alternative ((<|>)))
 62 | import Data.Functor (void, ($>))
 63 | import Data.List.Extra (notNull)
 64 | 
 65 | main :: IO ()
 66 | main = do
 67 |     -- check whether this is a terminal device
 68 |     isTermIn <- hIsTerminalDevice stdin
 69 |     isTermOut <- hIsTerminalDevice stdout
 70 | 
 71 |     -- read config options
 72 |     (conf, _) <- getConfig
 73 | 
 74 |     -- read command-line options
 75 |     opt@Options{..} <-
 76 |         ( if isTermOut
 77 |                 then \o -> o{color = color o || configColors conf}
 78 |                 else id
 79 |             )
 80 |             <$> cmdArgsRun options
 81 | 
 82 |     -- check for multiple backends...
 83 |     when (length (catMaybes [if json then Just "" else Nothing]) > 1) $
 84 |         errorWithoutStackTrace "CGrep: you can use one back-end at time!"
 85 | 
 86 |     -- display lang-map and exit...
 87 |     when type_map $
 88 |         dumpFileTypeInfoMap fileTypeInfoMap >> exitSuccess
 89 | 
 90 |     -- display color palette and exit...
 91 |     when show_palette $
 92 |         dumpPalette >> exitSuccess
 93 | 
 94 |     -- check whether the pattern list is empty, display help message if it's the case
 95 |     when (null others && isTermIn && null file) $
 96 |         withArgs ["--help"] $
 97 |             void (cmdArgsRun options)
 98 | 
 99 |     let others' = C.pack <$> others
100 | 
101 |     -- load patterns
102 |     patterns <-
103 |         if null file
104 |             then pure $ readPatternsFromCommandLine others'
105 |             else readPatternsFromFile (C.pack file)
106 | 
107 |     let patterns' = map (if ignore_case then ic else id) patterns
108 |           where
109 |             ic
110 |                 | (not . isRegexp) opt && semantic = C.unwords . map (\p -> if p `elem` wildCardTokens then p else C.map toLower p) . C.words
111 |                 | otherwise = C.map toLower
112 |               where
113 |                 wildCardTokens = "OR" : M.keys wildCardMap -- "OR" is not included in wildCardMap
114 | 
115 |     -- display the configuration in use
116 | 
117 |     -- when (isJust confpath) $
118 |     --    hPutStrLn stderr $ showBold opt ("Using '" <> fromJust confpath <> "' configuration file...")
119 | 
120 |     -- load files to parse:
121 |     let paths = getFilePaths (notNull file) others'
122 | 
123 |     -- parse cmd line language list:
124 |     let (l0, l1, l2) = readTypeList type_filter
125 | 
126 |     -- file type enabled:
127 |     let types = (if null l0 then configFileTypes conf else l0 `union` l1) \\ l2
128 |         kinds = if null kind_filter then configFileKinds conf else readKindList kind_filter
129 | 
130 |     runReaderT
131 |         ( do
132 |             putMsgLnVerbose 1 stderr $ "cgrep " <> showVersion version <> "!"
133 |             putMsgLnVerbose 1 stderr $ "File types: " <> show type_filter
134 |             putMsgLnVerbose 1 stderr $ "File kinds: " <> show kinds
135 |         )
136 |         (Env conf opt)
137 | 
138 |     -- specify number of cores
139 |     cap <- case jobs <|> configJobs conf of
140 |         (Just j) -> setNumCapabilities (j + 1) $> j
141 |         Nothing -> getNumCapabilities
142 | 
143 |     -- run search
144 |     runReaderT (parallelSearch paths patterns' types kinds isTermIn) (Env conf opt{jobs = Just cap})
145 | 
146 | readPatternsFromFile :: RawFilePath -> IO [C.ByteString]
147 | readPatternsFromFile "" = return []
148 | readPatternsFromFile f = map trim8 . C.lines <$> C.readFile (C.unpack f)
149 | 
150 | readPatternsFromCommandLine :: [C.ByteString] -> [C.ByteString]
151 | readPatternsFromCommandLine [] = []
152 | readPatternsFromCommandLine xs
153 |     | ":" `elem` xs = takeWhile (/= ":") xs
154 |     | otherwise = [head xs]
155 | 
156 | getFilePaths :: Bool -> [RawFilePath] -> [RawFilePath]
157 | getFilePaths False xs = case ":" `elemIndex` xs of
158 |     Nothing -> if null xs then [] else tail xs
159 |     (Just n) -> drop (n + 1) xs
160 | getFilePaths True xs = xs


--------------------------------------------------------------------------------
/src/Options.hs:
--------------------------------------------------------------------------------
 1 | --
 2 | -- Copyright (c) 2013-2023 Nicola Bonelli <nicola@larthia.com>
 3 | --
 4 | -- This program is free software; you can redistribute it and/or modify
 5 | -- it under the terms of the GNU General Public License as published by
 6 | -- the Free Software Foundation; either version 2 of the License, or
 7 | -- (at your option) any later version.
 8 | --
 9 | -- This program is distributed in the hope that it will be useful,
10 | -- but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 | -- GNU General Public License for more details.
13 | --
14 | -- You should have received a copy of the GNU General Public License
15 | -- along with this program; if not, write to the Free Software
16 | -- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 | --
18 | {-# LANGUAGE DeriveDataTypeable #-}
19 | 
20 | module Options where
21 | 
22 | import Data.Data (Data, Typeable)
23 | 
24 | data Options = Options
25 |     -- Pattern:
26 |     { file :: String
27 |     , word_match :: Bool
28 |     , prefix_match :: Bool
29 |     , suffix_match :: Bool
30 |     , edit_dist :: Bool
31 |     , regex_posix :: Bool
32 |     , regex_pcre :: Bool
33 |     , ignore_case :: Bool
34 |     , -- Context:
35 |       code :: Bool
36 |     , comment :: Bool
37 |     , literal :: Bool
38 |     , -- Token filters:
39 |       identifier :: Bool
40 |     , nativeType :: Bool
41 |     , keyword :: Bool
42 |     , number :: Bool
43 |     , string :: Bool
44 |     , operator :: Bool
45 |     , -- File filters:
46 |       type_filter :: [String]
47 |     , kind_filter :: [String]
48 |     , code_only :: Bool
49 |     , hdr_only :: Bool
50 |     , skip_test :: Bool
51 |     , prune_dir :: [FilePath]
52 |     , recursive :: Bool
53 |     , follow :: Bool
54 |     , -- Semantic:
55 |       semantic :: Bool
56 |     , -- Control:
57 |       max_count :: Int
58 |     , type_force :: Maybe String
59 |     , type_map :: Bool
60 |     , invert_match :: Bool
61 |     , multiline :: Int
62 |     , jobs :: Maybe Int
63 |     , -- Output format:
64 |       show_match :: Bool
65 |     , color :: Bool
66 |     , no_color :: Bool
67 |     , no_filename :: Bool
68 |     , no_numbers :: Bool
69 |     , no_column :: Bool
70 |     , count :: Bool
71 |     , filename_only :: Bool
72 |     , json :: Bool
73 |     , vim :: Bool
74 |     , editor :: Bool
75 |     , fileline :: Bool
76 |     , -- Misc:
77 |       verbose :: Int
78 |     , no_shallow :: Bool
79 |     , show_palette :: Bool
80 |     , others :: [String]
81 |     }
82 |     deriving stock (Data, Typeable, Show)
83 | 


--------------------------------------------------------------------------------
/src/Reader.hs:
--------------------------------------------------------------------------------
 1 | --
 2 | -- Copyright (c) 2013-2023 Nicola Bonelli <nicola@larthia.com>
 3 | --
 4 | -- This program is free software; you can redistribute it and/or modify
 5 | -- it under the terms of the GNU General Public License as published by
 6 | -- the Free Software Foundation; either version 2 of the License, or
 7 | -- (at your option) any later version.
 8 | --
 9 | -- This program is distributed in the hope that it will be useful,
10 | -- but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 | -- GNU General Public License for more details.
13 | --
14 | -- You should have received a copy of the GNU General Public License
15 | -- along with this program; if not, write to the Free Software
16 | -- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 | --
18 | 
19 | module Reader where
20 | 
21 | import Control.Monad.Trans.Reader (ReaderT)
22 | 
23 | import Config (Config)
24 | import Options (Options)
25 | 
26 | data Env = Env
27 |     { conf :: Config
28 |     , opt :: Options
29 |     }
30 | 
31 | type ReaderIO = ReaderT Env IO
32 | 


--------------------------------------------------------------------------------
/src/Search.hs:
--------------------------------------------------------------------------------
  1 | --
  2 | -- Copyright (c) 2013-2023 Nicola Bonelli <nicola@larthia.com>
  3 | --
  4 | -- This program is free software; you can redistribute it and/or modify
  5 | -- it under the terms of the GNU General Public License as published by
  6 | -- the Free Software Foundation; either version 2 of the License, or
  7 | -- (at your option) any later version.
  8 | --
  9 | -- This program is distributed in the hope that it will be useful,
 10 | -- but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 | -- GNU General Public License for more details.
 13 | --
 14 | -- You should have received a copy of the GNU General Public License
 15 | -- along with this program; if not, write to the Free Software
 16 | -- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 17 | -- nc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 18 | --
 19 | 
 20 | module Search (
 21 |     parallelSearch,
 22 |     isRegexp,
 23 | ) where
 24 | 
 25 | import Data.Function (fix)
 26 | import Data.List (elemIndex, intersperse, isPrefixOf, isSuffixOf, partition)
 27 | import Data.List.Split (chunksOf)
 28 | import qualified Data.Map as M
 29 | import Data.Maybe (catMaybes, fromJust, fromMaybe, isJust)
 30 | import qualified Data.Set as S
 31 | 
 32 | import Control.Concurrent (MVar, forkIO, forkOn, putMVar, threadDelay)
 33 | import Control.Exception as E (SomeException, catch)
 34 | 
 35 | import Control.Applicative (
 36 |     Alternative ((<|>)),
 37 |     Applicative (liftA2),
 38 |  )
 39 | import Control.Monad (forM, forM_, forever, replicateM_, unless, void, when)
 40 | import Control.Monad.Trans (MonadIO (liftIO))
 41 | import Control.Monad.Trans.Except (runExceptT, throwE)
 42 | import Control.Monad.Trans.Reader (
 43 |     ReaderT (runReaderT),
 44 |     ask,
 45 |     local,
 46 |     reader,
 47 |  )
 48 | 
 49 | import System.Environment (lookupEnv)
 50 | import System.IO (
 51 |     BufferMode (BlockBuffering),
 52 |     hPutStrLn,
 53 |     hSetBinaryMode,
 54 |     hSetBuffering,
 55 |     stderr,
 56 |     stdin,
 57 |     stdout,
 58 |  )
 59 | import System.PosixCompat.Files as PC (
 60 |     FileStatus,
 61 |     getFileStatus,
 62 |     getSymbolicLinkStatus,
 63 |     isDirectory,
 64 |  )
 65 | 
 66 | import System.Process (runProcess, waitForProcess)
 67 | 
 68 | import CGrep.Common (Text8, getTargetName, takeN)
 69 | import CGrep.Output (
 70 |     Output (..),
 71 |     putOutputElements,
 72 |     showFileName,
 73 |  )
 74 | import Config (
 75 |     Config (
 76 |         Config,
 77 |         configColorFile,
 78 |         configColorMatch,
 79 |         configColors,
 80 |         configFileLine,
 81 |         configFileTypes,
 82 |         configPruneDirs
 83 |     ),
 84 |     dumpPalette,
 85 |  )
 86 | import Options (Options (..))
 87 | import Reader (
 88 |     Env (..),
 89 |     ReaderIO,
 90 |  )
 91 | 
 92 | import qualified Data.ByteString as B
 93 | import qualified Data.ByteString.Builder as B
 94 | import qualified Data.ByteString.Builder.Extra as B
 95 | 
 96 | import qualified Codec.Binary.UTF8.String as UC
 97 | import qualified Data.ByteString.Char8 as C
 98 | import qualified Data.ByteString.Lazy.Char8 as LB
 99 | 
100 | import qualified Data.Bifunctor
101 | import Data.Tuple.Extra ()
102 | 
103 | import Data.Vector ((!))
104 | import qualified Data.Vector as V hiding ((!))
105 | 
106 | import Control.Concurrent.Chan.Unagi.Bounded (
107 |     newChan,
108 |     readChan,
109 |     writeChan,
110 |  )
111 | import Data.IORef (
112 |     IORef,
113 |     atomicModifyIORef,
114 |     atomicModifyIORef',
115 |     modifyIORef,
116 |     modifyIORef',
117 |     newIORef,
118 |     readIORef,
119 |  )
120 | 
121 | import Control.Arrow (Arrow ((&&&)))
122 | import Control.Concurrent.Async (Async, async, asyncOn, forConcurrently, forConcurrently_, mapConcurrently_, wait)
123 | import Data.Functor (void, ($>), (<&>))
124 | import RawFilePath.Directory (doesDirectoryExist)
125 | import System.Directory (canonicalizePath, makeAbsolute)
126 | import System.Posix.Directory.Foreign (dtDir)
127 | import System.Posix.Directory.Traversals (getDirectoryContents)
128 | import System.Posix.FilePath (RawFilePath, takeBaseName, (</>))
129 | 
130 | import qualified CGrep.Strategy.BoyerMoore as BoyerMoore
131 | import qualified CGrep.Strategy.Levenshtein as Levenshtein
132 | import qualified CGrep.Strategy.Regex as Regex
133 | import qualified CGrep.Strategy.Semantic as Semantic
134 | import qualified CGrep.Strategy.Tokenizer as Tokenizer
135 | import Control.Monad.Catch (MonadCatch (catch), SomeException)
136 | import Control.Monad.IO.Class (MonadIO (liftIO))
137 | 
138 | import CGrep.FileType (FileType)
139 | import CGrep.FileTypeMap (
140 |     FileTypeInfo,
141 |     fileTypeInfoLookup,
142 |     fileTypeLookup,
143 |  )
144 | 
145 | import CGrep.FileKind (FileKind)
146 | import Control.Concurrent.MVar (newMVar, takeMVar)
147 | import Control.Monad.Loops (whileM_)
148 | import Data.IORef.Extra (atomicWriteIORef')
149 | import qualified Data.List.NonEmpty as NE (unzip)
150 | import Verbose (putMsgLn, putMsgLnVerbose)
151 | 
152 | withRecursiveContents ::
153 |     Options ->
154 |     RawFilePath ->
155 |     [FileType] ->
156 |     [FileKind] ->
157 |     [RawFilePath] ->
158 |     S.Set RawFilePath ->
159 |     IORef Int ->
160 |     ([RawFilePath] -> IO ()) ->
161 |     IO ()
162 | withRecursiveContents opt@Options{..} dir fTypes fKinds pdirs visited walkers action = do
163 |     xs <- getDirectoryContents dir
164 |     let (dirs, files) = partition ((== dtDir) . fst) xs
165 | 
166 |     -- filter the list of files
167 |     let files' = (dir </>) . snd <$> filter (\f -> fileFilter opt fTypes fKinds (snd f) && (not skip_test || isNotTestFile (snd f))) files
168 |     let dirs' = (dir </>) . snd <$> dirs
169 | 
170 |     -- run IO action
171 |     mapM_ action (chunksOf 8 files')
172 | 
173 |     -- process directories recursively...
174 |     foreach <-
175 |         readIORef walkers >>= \tot -> do
176 |             if tot < 64
177 |                 then pure (forConcurrently_ @[])
178 |                 else pure forM_
179 | 
180 |     foreach dirs' $ \dirPath -> do
181 |         unless (isPrunableDir dirPath pdirs) $
182 |             -- this is a good directory, unless already visited...
183 |             -- this is a good directory, unless already visited...
184 |             -- this is a good directory, unless already visited...
185 |             -- this is a good directory, unless already visited...
186 | 
187 |             -- this is a good directory, unless already visited...
188 | 
189 |             -- this is a good directory, unless already visited...
190 | 
191 |             -- this is a good directory, unless already visited...
192 |             -- this is a good directory, unless already visited...
193 | 
194 |             -- this is a good directory, unless already visited...
195 |             makeRawAbsolute dirPath >>= \cpath ->
196 |                 unless (cpath `S.member` visited) $
197 |                     incrRef walkers
198 |                         *> withRecursiveContents opt dirPath fTypes fKinds pdirs (S.insert cpath visited) walkers action
199 | 
200 |     decrRef walkers
201 | 
202 | parallelSearch :: [RawFilePath] -> [C.ByteString] -> [FileType] -> [FileKind] -> Bool -> ReaderIO ()
203 | parallelSearch paths patterns fTypes fKinds isTermIn = do
204 |     Env{..} <- ask
205 | 
206 |     let Config{..} = conf
207 |         Options{..} = opt
208 | 
209 |     let multiplier = 4
210 |         jobs' = fromMaybe 1 jobs
211 |         totalJobs = jobs' * multiplier
212 | 
213 |     -- create channels ...
214 |     fileCh <- liftIO $ newChan 65536
215 | 
216 |     -- recursively traverse the filesystem ...
217 |     _ <- liftIO . forkOn 0 $ do
218 |         walkers <- newIORef (0 :: Int)
219 |         if recursive || follow
220 |             then forM_ (if null paths then ["."] else paths) $ \p ->
221 |                 doesDirectoryExist p >>= \case
222 |                     True ->
223 |                         incrRef walkers
224 |                             *> withRecursiveContents
225 |                                 opt
226 |                                 p
227 |                                 fTypes
228 |                                 fKinds
229 |                                 (mkPrunableDirName <$> configPruneDirs <> (C.pack <$> prune_dir))
230 |                                 (S.singleton p)
231 |                                 walkers
232 |                                 ( do
233 |                                     writeChan (fst fileCh)
234 |                                 )
235 |                     _ -> writeChan (fst fileCh) [p]
236 |             else
237 |                 forM_
238 |                     ( if null paths && not isTermIn
239 |                         then [("", 0)]
240 |                         else paths `zip` [0 ..]
241 |                     )
242 |                     (\(p, idx) -> writeChan (fst fileCh) [p])
243 | 
244 |         -- enqueue EOF messages...
245 |         when (verbose > 0) $ putMsgLn @Text8 stderr "filesystem traversal completed!"
246 |         replicateM_ totalJobs $ writeChan (fst fileCh) []
247 | 
248 |     -- launch the worker threads...
249 |     matchingFiles <- liftIO $ newIORef S.empty
250 | 
251 |     let env = Env conf opt
252 |         runSearch = getSearcher env
253 | 
254 |     workers <- forM ([0 .. totalJobs - 1] :: [Int]) $ \idx -> do
255 |         let processor = 1 + idx `div` multiplier
256 |         liftIO . asyncOn processor $ void . runExceptT $ do
257 |             asRef <- liftIO $ newIORef ([] :: [Async ()])
258 |             forever $ do
259 |                 fs <- liftIO $ readChan (snd fileCh)
260 |                 liftIO $
261 |                     E.catch
262 |                         ( case fs of
263 |                             [] -> liftIO $ readIORef asRef >>= mapM_ wait
264 |                             fs ->
265 |                                 runReaderT
266 |                                     ( do
267 |                                         out <-
268 |                                             catMaybes
269 |                                                 <$> forM
270 |                                                     fs
271 |                                                     ( \f -> do
272 |                                                         out' <- take max_count <$> runSearch (fileTypeInfoLookup opt f) f patterns
273 |                                                         when (vim || editor) $
274 |                                                             liftIO $
275 |                                                                 mapM_ (modifyIORef matchingFiles . S.insert . (outFilePath &&& outLineNumb)) out'
276 |                                                         putOutputElements out'
277 |                                                     )
278 |                                         unless (null out) $
279 |                                             liftIO $
280 |                                                 async
281 |                                                     ( do
282 |                                                         let !dump = LB.toStrict $ B.toLazyByteString (mconcat ((<> B.char8 '\n') <$> out))
283 |                                                         B.hPut stdout dump
284 |                                                     )
285 |                                                     >>= \a -> modifyIORef' asRef (a :)
286 |                                     )
287 |                                     env
288 |                         )
289 |                         ( \e ->
290 |                             let msg = show (e :: SomeException)
291 |                              in C.hPutStrLn stderr (showFileName conf opt (getTargetName (head fs)) <> ": error: " <> C.pack (takeN 120 msg))
292 |                         )
293 |                 when (null fs) $ do
294 |                     when (verbose > 0) $ putMsgLn stderr $ "[" <> C.pack (show idx) <> "]@" <> C.pack (show processor) <> " searcher done!"
295 |                     throwE ()
296 | 
297 |     -- wait workers to complete the job
298 |     liftIO $ mapM_ wait workers
299 | 
300 |     -- run editor...
301 |     when (vim || editor) $ liftIO $ do
302 |         editor' <-
303 |             if vim
304 |                 then return (Just "vim")
305 |                 else lookupEnv "EDITOR"
306 | 
307 |         files <- S.toList <$> readIORef matchingFiles
308 |         let filesUnpacked = Data.Bifunctor.first C.unpack <$> files
309 | 
310 |         let editFiles =
311 |                 ( if fileline || configFileLine
312 |                     then fmap (\(a, b) -> a <> ":" <> show b)
313 |                     else fmap fst
314 |                 )
315 |                     filesUnpacked
316 | 
317 |         putStrLn $ "cgrep: open files " <> unwords editFiles <> "..."
318 | 
319 |         void $
320 |             runProcess
321 |                 (fromJust $ editor' <|> Just "vi")
322 |                 editFiles
323 |                 Nothing
324 |                 Nothing
325 |                 (Just stdin)
326 |                 (Just stdout)
327 |                 (Just stderr)
328 |                 >>= waitForProcess
329 | 
330 | getSearcher :: Env -> (Maybe (FileType, FileTypeInfo) -> RawFilePath -> [Text8] -> ReaderIO [Output])
331 | getSearcher Env{..} = do
332 |     if
333 |         | (not . isRegexp) opt && not (hasTokenizerOpt opt) && not (semantic opt) && edit_dist opt -> Levenshtein.search
334 |         | (not . isRegexp) opt && not (hasTokenizerOpt opt) && not (semantic opt) -> BoyerMoore.search
335 |         | (not . isRegexp) opt && semantic opt -> Semantic.search
336 |         | (not . isRegexp) opt -> Tokenizer.search
337 |         | isRegexp opt -> Regex.search
338 |         | otherwise -> undefined
339 | 
340 | makeRawAbsolute :: RawFilePath -> IO RawFilePath
341 | makeRawAbsolute p = makeAbsolute (C.unpack p) <&> C.pack
342 | {-# INLINE makeRawAbsolute #-}
343 | 
344 | incrRef :: IORef Int -> IO ()
345 | incrRef ref = atomicModifyIORef' ref (\n -> (n + 1, ()))
346 | {-# INLINE incrRef #-}
347 | 
348 | decrRef :: IORef Int -> IO ()
349 | decrRef ref = atomicModifyIORef' ref (\n -> (n - 1, ()))
350 | {-# INLINE decrRef #-}
351 | 
352 | fileFilter :: Options -> [FileType] -> [FileKind] -> RawFilePath -> Bool
353 | fileFilter opt fTypes fKinds filename = fileFilterTypes typ && fileFilterKinds kin
354 |   where
355 |     (typ, kin) = NE.unzip $ fileTypeLookup opt filename
356 |     fileFilterTypes = maybe False (liftA2 (||) (const $ null fTypes) (`elem` fTypes))
357 |     fileFilterKinds = maybe False (liftA2 (||) (const $ null fKinds) (`elem` fKinds))
358 | 
359 | isNotTestFile :: RawFilePath -> Bool
360 | isNotTestFile f =
361 |     let fs = [("_test" `C.isSuffixOf`), ("-test" `C.isSuffixOf`), ("test-" `C.isPrefixOf`), ("test_" `C.isPrefixOf`), ("test" ==)] :: [C.ByteString -> Bool]
362 |      in not $ any ($ takeBaseName f) fs
363 | {-# INLINE isNotTestFile #-}
364 | 
365 | isPrunableDir :: RawFilePath -> [RawFilePath] -> Bool
366 | isPrunableDir dir = any (`C.isSuffixOf` pdir)
367 |   where
368 |     pdir = mkPrunableDirName dir
369 | {-# INLINE isPrunableDir #-}
370 | 
371 | mkPrunableDirName :: RawFilePath -> RawFilePath
372 | mkPrunableDirName xs
373 |     | "/" `C.isSuffixOf` xs = xs
374 |     | otherwise = xs <> "/"
375 | {-# INLINE mkPrunableDirName #-}
376 | 
377 | (.!.) :: V.Vector a -> Int -> a
378 | v .!. i = v ! (i `mod` V.length v)
379 | {-# INLINE (.!.) #-}
380 | 
381 | hasFileType :: RawFilePath -> Options -> [FileType] -> Bool
382 | hasFileType path opt xs = isJust $ fileTypeLookup opt path >>= (\(typ, _) -> typ `elemIndex` xs)
383 | {-# INLINE hasFileType #-}
384 | 
385 | hasTokenizerOpt :: Options -> Bool
386 | hasTokenizerOpt Options{..} =
387 |     identifier
388 |         || nativeType
389 |         || keyword
390 |         || number
391 |         || string
392 |         || operator
393 | 
394 | isRegexp :: Options -> Bool
395 | isRegexp opt = regex_posix opt || regex_pcre opt
396 | {-# INLINE isRegexp #-}
397 | 


--------------------------------------------------------------------------------
/src/Util.hs:
--------------------------------------------------------------------------------
 1 | --
 2 | -- Copyright (c) 2013-2023 Nicola Bonelli <nicola@larthia.com>
 3 | --
 4 | -- This program is free software; you can redistribute it and/or modify
 5 | -- it under the terms of the GNU General Public License as published by
 6 | -- the Free Software Foundation; either version 2 of the License, or
 7 | -- (at your option) any later version.
 8 | --
 9 | -- This program is distributed in the hope that it will be useful,
10 | -- but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 | -- GNU General Public License for more details.
13 | --
14 | -- You should have received a copy of the GNU General Public License
15 | -- along with this program; if not, write to the Free Software
16 | -- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 | --
18 | 
19 | module Util where
20 | 
21 | import Data.Char (toLower)
22 | import Data.Maybe (listToMaybe)
23 | 
24 | import qualified Data.ByteString.Char8 as C
25 | import Data.Sequence (Seq (Empty, (:<|), (:|>)), (|>))
26 | import qualified Data.Sequence as S
27 | import Text.Read (readMaybe)
28 | 
29 | partitionM :: (Monad m) => (a -> m Bool) -> [a] -> m ([a], [a])
30 | partitionM _ [] = return ([], [])
31 | partitionM f (x : xs) = do
32 |     res <- f x
33 |     (as, bs) <- partitionM f xs
34 |     return ([x | res] <> as, [x | not res] <> bs)
35 | {-# INLINE partitionM #-}
36 | 
37 | xor :: Bool -> Bool -> Bool
38 | a `xor` b = a && not b || not a && b
39 | {-# INLINE xor #-}
40 | 
41 | prettyRead :: (Read a) => String -> String -> a
42 | prettyRead xs err =
43 |     case readMaybe xs of
44 |         Just v -> v
45 |         _ -> errorWithoutStackTrace $ err <> ": parse error near '" <> take 40 xs <> "'"
46 | 
47 | spanGroup :: Int -> [a] -> [[a]]
48 | spanGroup _ [] = []
49 | spanGroup 1 xs = map (: []) xs
50 | spanGroup n xs = take n xs : spanGroup n (tail xs)
51 | {-# INLINE spanGroup #-}
52 | 
53 | spanGroupSeq :: Int -> S.Seq a -> [S.Seq a]
54 | spanGroupSeq _ S.Empty = []
55 | spanGroupSeq 1 xs = [xs]
56 | spanGroupSeq n xs = S.take n xs : spanGroupSeq n (S.drop 1 xs)
57 | {-# INLINE spanGroupSeq #-}
58 | 
59 | rmQuote :: String -> String
60 | rmQuote [] = []
61 | rmQuote [x] = [x]
62 | rmQuote y@(x : xs)
63 |     | x == '"' || x == '\'' =
64 |         if x == last xs
65 |             then init xs
66 |             else y
67 |     | otherwise = y
68 | {-# INLINE rmQuote #-}
69 | 
70 | rmQuote8 :: C.ByteString -> C.ByteString
71 | rmQuote8 b
72 |     | C.length b < 2 = b
73 |     | otherwise =
74 |         case C.uncons b of
75 |             Just (x, xs) -> if (x == '"' || x == '\'') && (x == C.last b) then C.init xs else b
76 |             _ -> b
77 | {-# INLINE rmQuote8 #-}
78 | 
79 | mapMaybe' :: (Foldable f) => (a -> Maybe b) -> f a -> [b]
80 | mapMaybe' f = foldr g []
81 |   where
82 |     g x rest
83 |         | Just y <- f x = y : rest
84 |         | otherwise = rest
85 | 
86 | findWithIndex :: forall a. (a -> Bool) -> [a] -> (# Int, Maybe a #)
87 | findWithIndex predicate = go predicate 0
88 |   where
89 |     go :: forall a. (a -> Bool) -> Int -> [a] -> (# Int, Maybe a #)
90 |     go p _ [] = (# 0, Nothing #)
91 |     go p !index (x : xs)
92 |         | p x = (# index, Just x #)
93 |         | otherwise = go p (index + 1) xs


--------------------------------------------------------------------------------
/src/Verbose.hs:
--------------------------------------------------------------------------------
 1 | --
 2 | -- Copyright (c) 2013-2023 Nicola Bonelli <nicola@larthia.com>
 3 | --
 4 | -- This program is free software; you can redistribute it and/or modify
 5 | -- it under the terms of the GNU General Public License as published by
 6 | -- the Free Software Foundation; either version 2 of the License, or
 7 | -- (at your option) any later version.
 8 | --
 9 | -- This program is distributed in the hope that it will be useful,
10 | -- but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 | -- GNU General Public License for more details.
13 | --
14 | -- You should have received a copy of the GNU General Public License
15 | -- along with this program; if not, write to the Free Software
16 | -- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 | --
18 | 
19 | module Verbose where
20 | 
21 | import Control.Monad.Trans.Reader ( reader )
22 | import Control.Monad.IO.Class ( MonadIO(liftIO) )
23 | import Control.Monad ( when )
24 | 
25 | import Options ( Options(verbose) )
26 | import Reader ( ReaderIO, Env(..) )
27 | 
28 | import qualified Data.ByteString as C (hPutStr, hPut)
29 | import GHC.IO.Handle ( Handle )
30 | import System.IO ( Handle, hPutStrLn, hPutStr )
31 | import Data.String ( IsString )
32 | 
33 | import qualified Data.ByteString.Char8 as C
34 | import qualified Data.Text as T
35 | import qualified Data.Text.IO as T
36 | 
37 | 
38 | class (IsString a) => PutStr a where
39 |     putStringLn :: Handle -> a -> IO ()
40 |     putString :: Handle -> a -> IO ()
41 | 
42 | instance PutStr String where
43 |     putStringLn = hPutStrLn
44 |     putString = hPutStr
45 | 
46 | instance PutStr C.ByteString where
47 |     putStringLn = C.hPutStrLn
48 |     putString = C.hPutStr
49 | 
50 | instance PutStr T.Text where
51 |     putStringLn = T.hPutStrLn
52 |     putString = T.hPutStr
53 | 
54 | 
55 | putMsgLnVerbose :: (PutStr a) => Int -> Handle -> a -> ReaderIO ()
56 | putMsgLnVerbose l h xs = do
57 |     n <- reader $ verbose . opt
58 |     when (n >= l) $
59 |         liftIO $ putStringLn h xs
60 | {-# INLINE putMsgLnVerbose #-}
61 | 
62 | 
63 | putMsgLn :: (PutStr a, MonadIO m) => Handle -> a -> m ()
64 | putMsgLn h xs =
65 |     liftIO $ putStringLn h xs
66 | {-# INLINE putMsgLn #-}


--------------------------------------------------------------------------------
/stack.yaml:
--------------------------------------------------------------------------------
 1 | # This file was automatically generated by 'stack init'
 2 | #
 3 | # Some commonly used options have been documented as comments in this file.
 4 | # For advanced use and comprehensive documentation of the format, please see:
 5 | # http://docs.haskellstack.org/en/stable/yaml_configuration/
 6 | 
 7 | # Resolver to choose a 'specific' stackage snapshot or a compiler version.
 8 | # A snapshot resolver dictates the compiler version and the set of packages
 9 | # to be used for project dependencies. For example:
10 | #
11 | # resolver: lts-3.5
12 | # resolver: nightly-2015-09-21
13 | # resolver: ghc-7.10.2
14 | # resolver: ghcjs-0.1.0_ghc-7.10.2
15 | # resolver:
16 | #  name: custom-snapshot
17 | #  location: "./custom-snapshot.yaml"
18 | resolver: lts-21.25
19 | 
20 | # User packages to be built.
21 | # Various formats can be used as shown in the example below.
22 | #
23 | # packages:
24 | # - some-directory
25 | # - https://example.com/foo/bar/baz-0.0.2.tar.gz
26 | # - location:
27 | #    git: https://github.com/commercialhaskell/stack.git
28 | #    commit: e7b331f14bcffb8367cd58fbfc8b40ec7642100a
29 | # - location: https://github.com/commercialhaskell/stack/commit/e7b331f14bcffb8367cd58fbfc8b40ec7642100a
30 | #   extra-dep: true
31 | #  subdirs:
32 | #  - auto-update
33 | #  - wai
34 | #
35 | # A package marked 'extra-dep: true' will only be built if demanded by a
36 | # non-dependency (i.e. a user package), and its test suites and benchmarks
37 | # will not be run. This is useful for tweaking upstream packages.
38 | packages:
39 | - '.'
40 | # Dependency packages to be pulled from upstream that are not in the resolver
41 | # (e.g., acme-missiles-0.3)
42 | extra-deps:
43 |   - bitwise-1.0.0.1
44 | 
45 | # Override default flag values for local packages and extra-deps
46 | flags: {}
47 | 
48 | # Extra package databases containing global packages
49 | extra-package-dbs: []
50 | 
51 | # Control whether we use the GHC we find on the path
52 | # system-ghc: true
53 | #
54 | # Require a specific version of stack, using version ranges
55 | # require-stack-version: -any # Default
56 | # require-stack-version: ">=1.3"
57 | #
58 | # Override the architecture used by stack, especially useful on Windows
59 | # arch: i386
60 | # arch: x86_64
61 | #
62 | # Extra directories used by stack for building
63 | # extra-include-dirs: [/path/to/dir]
64 | # extra-lib-dirs: [/path/to/dir]
65 | #
66 | # Allow a newer minor version of GHC than the snapshot specifies
67 | # compiler-check: newer-minor
68 | 


--------------------------------------------------------------------------------
/test/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # 
2 | # Lorem ipsum dolor sit. Amet perferendis metus feugiat. Suspendisse massa egestas quam. 
3 | # Morbi vivamus dolor nisl mauris ultricies molestie lacus. Proin ad nullam id integer.
4 | # 
5 | 
6 | message(STATUS "Sed etiam a suspendisse. \"Aliquam nulla erat risus.\"")
7 | 


--------------------------------------------------------------------------------
/test/Makefile:
--------------------------------------------------------------------------------
1 | #
2 | # Lorem ipsum dolor sit. Amet perferendis metus feugiat. Suspendisse massa egestas quam.
3 | # Morbi vivamus dolor nisl mauris ultricies molestie lacus. Proin ad nullam id integer.
4 | #
5 | 
6 | all:
7 | 		@echo "Sed etiam a suspendisse. \"Aliquam nulla erat risus.\""
8 | 		@echo 'Sed etiam a suspendisse. "Aliquam nulla erat risus."'
9 | 


--------------------------------------------------------------------------------
/test/test.c:
--------------------------------------------------------------------------------
 1 | /* hello world */
 2 | int
 3 | main(int argc, char *argv[])
 4 | {
 5 |     const char * x = "hello world";
 6 |     int ab = 10;
 7 |      return 0;
 8 | }
 9 | 
10 | 


--------------------------------------------------------------------------------
/test/test.chpl:
--------------------------------------------------------------------------------
 1 | //
 2 | // Lorem ipsum dolor sit. Amet perferendis metus feugiat. Suspendisse massa egestas quam. 
 3 | // Morbi vivamus dolor nisl mauris ultricies molestie lacus. Proin ad nullam id integer.
 4 | //
 5 | 
 6 | /*
 7 |  * Eget orci rutrum vel. Elit nullam amet integer. Fusce tellus ut massa. Maecenas risus dictum risus. 
 8 |  * Augue aliquam molestie id. Commodo ultricies pede massa fusce ullamcorper dapibus dui. 
 9 |  * Maecenas elementum duis porttitor facilisis lectus eleifend nec. Arcu et pellentesque 
10 |  * tellus non tristique suscipit nec. Tempor iaculis orci nec enim ac.
11 |  */
12 | 
13 |     writeln("Sed etiam a suspendisse. \"Aliquam nulla erat risus.\"");
14 | 
15 | 


--------------------------------------------------------------------------------
/test/test.coffee:
--------------------------------------------------------------------------------
 1 | #
 2 | # Lorem ipsum dolor sit. Amet perferendis metus feugiat. Suspendisse massa egestas quam. 
 3 | # Morbi vivamus dolor nisl mauris ultricies molestie lacus. Proin ad nullam id integer.
 4 | #
 5 | 
 6 | ###
 7 | 
 8 |   Eget orci rutrum vel. Elit nullam amet integer. Fusce tellus ut massa. Maecenas risus dictum risus. 
 9 |   Augue aliquam molestie id. Commodo ultricies pede massa fusce ullamcorper dapibus dui. 
10 |   Maecenas elementum duis porttitor facilisis lectus eleifend nec. Arcu et pellentesque 
11 |   tellus non tristique suscipit nec. Tempor iaculis orci nec enim ac.
12 | 
13 | ### 
14 | 
15 |   console.log "Sed etiam a suspendisse. \"Aliquam nulla erat risus.\" "
16 | 
17 | 


--------------------------------------------------------------------------------
/test/test.cpp:
--------------------------------------------------------------------------------
 1 | //
 2 | // Lorem ipsum dolor sit. Amet perferendis metus feugiat. Suspendisse massa egestas quam.
 3 | // Morbi vivamus dolor nisl mauris ultricies molestie lacus. Proin ad nullam id integer.
 4 | //
 5 | 
 6 | /*
 7 |  * Eget orci rutrum vel. Elit nullam amet integer. Fusce tellus ut massa. Maecenas risus dictum risus.
 8 |  * Augue aliquam molestie id. Commodo ultricies pede massa fusce ullamcorper dapibus dui.
 9 |  * Maecenas elementum duis porttitor facilisis lectus eleifend nec. Arcu et pellentesque
10 |  * tellus non tristique suscipit nec. Tempor iaculis orci nec enim ac.
11 |  */
12 | 
13 | int
14 | main(int argc, char *argv[])
15 | {
16 |     char a = 'a', b = '"', c = '\'', d = '\n', e = '"';
17 | 
18 |     const char * msg = "Sed etiam a suspendisse. \"Aliquam nulla erat risus.\"";
19 |     const char * raw = R"P(This is a raw string)P";
20 |     return 0;
21 | }
22 | 
23 | 


--------------------------------------------------------------------------------
/test/test.dhall:
--------------------------------------------------------------------------------
 1 | {- This is an example Dhall configuration file
 2 | 
 3 |    Can you spot the mistake?
 4 | 
 5 |    Fix the typo, then move onto the "Definitions"
 6 |    example
 7 | -}
 8 | 
 9 | { home       = "/home/bill"
10 | , privateKey = "/home/bill/.ssh/id_ed25519"
11 | , publicKey  = "/home/blil/.ssh/id_ed25519.pub"
12 | }
13 | 


--------------------------------------------------------------------------------
/test/test.erl:
--------------------------------------------------------------------------------
 1 | % 
 2 | % Lorem ipsum dolor sit. Amet perferendis metus feugiat. Suspendisse massa egestas quam. 
 3 | % Morbi vivamus dolor nisl mauris ultricies molestie lacus. Proin ad nullam id integer.
 4 | % 
 5 | 
 6 | 
 7 | io:format("Sed etiam a suspendisse. \"Aliquam nulla erat risus.\"~n").
 8 | 
 9 | 
10 | 


--------------------------------------------------------------------------------
/test/test.fs:
--------------------------------------------------------------------------------
 1 | //
 2 | // Lorem ipsum dolor sit. Amet perferendis metus feugiat. Suspendisse massa egestas quam. 
 3 | // Morbi vivamus dolor nisl mauris ultricies molestie lacus. Proin ad nullam id integer.
 4 | //
 5 | 
 6 | (*
 7 |  * Eget orci rutrum vel. Elit nullam amet integer. Fusce tellus ut massa. Maecenas risus dictum risus. 
 8 |  * Augue aliquam molestie id. Commodo ultricies pede massa fusce ullamcorper dapibus dui. 
 9 |  * Maecenas elementum duis porttitor facilisis lectus eleifend nec. Arcu et pellentesque 
10 |  * tellus non tristique suscipit nec. Tempor iaculis orci nec enim ac.
11 |  *)
12 | 
13 |  
14 |  printfn "%s" "Sed etiam a suspendisse. \"Aliquam nulla erat risus.\"";
15 |  printfn "%s" @"Sed etiam a suspendisse. ""Aliquam nulla erat risus.""";
16 | 
17 | 


--------------------------------------------------------------------------------
/test/test.go:
--------------------------------------------------------------------------------
 1 | //
 2 | // Lorem ipsum dolor sit. Amet perferendis metus feugiat. Suspendisse massa egestas quam.
 3 | // Morbi vivamus dolor nisl mauris ultricies molestie lacus. Proin ad nullam id integer.
 4 | //
 5 | 
 6 | /*
 7 |  * Eget orci rutrum vel. Elit nullam amet integer. Fusce tellus ut massa. Maecenas risus dictum risus.
 8 |  * Augue aliquam molestie id. Commodo ultricies pede massa fusce ullamcorper dapibus dui.
 9 |  * Maecenas elementum duis porttitor facilisis lectus eleifend nec. Arcu et pellentesque
10 |  * tellus non tristique suscipit nec. Tempor iaculis orci nec enim ac.
11 |  */
12 | package main
13 | 
14 | import "fmt"
15 | 
16 | func main() {
17 | 	var a = `a`
18 | 	var b = `"`
19 | 	var c = "'"
20 | 	var d = "\n"
21 | 
22 | 	var msg1 = "Sed etiam a suspendisse. \"Aliquam nulla erat risus.\""
23 | 	var msg2 = `Sed etiam a suspendisse. "Aliquam nulla erat risus."\`
24 | 
25 | 	fmt.Print(a, b, c, d, msg1, msg2)
26 | }
27 | 


--------------------------------------------------------------------------------
/test/test.h:
--------------------------------------------------------------------------------
 1 | /* hello world */
 2 | static inline
 3 | int fun(int argc, char *argv[])
 4 | {
 5 |     const char * x = "hello world";
 6 |     int ab = 10;
 7 |     return 0;
 8 | }
 9 | 
10 | 


--------------------------------------------------------------------------------
/test/test.hs:
--------------------------------------------------------------------------------
 1 | --
 2 | -- Lorem ipsum dolor sit. Amet perferendis metus feugiat. Suspendisse massa egestas quam.
 3 | -- Morbi vivamus dolor nisl mauris ultricies molestie lacus. Proin ad nullam id integer.
 4 | --
 5 | 
 6 | {-
 7 |       Eget orci rutrum vel. Elit nullam amet integer. Fusce tellus ut massa. Maecenas risus dictum risus.
 8 |       Augue aliquam molestie id. Commodo ultricies pede massa fusce ullamcorper dapibus dui.
 9 |       Maecenas elementum duis porttitor facilisis lectus eleifend nec. Arcu et pellentesque
10 |       tellus non tristique suscipit nec. Tempor iaculis orci nec enim ac.
11 |  -}
12 | 
13 | {-# LANGUAGE QuasiQuotes #-}
14 | 
15 | import Data.String.Here
16 | 
17 | main = do
18 |        putStrLn "Sed etiam a suspendisse. \"Aliquam nulla erat risus.\""
19 |        putStrLn [here| Sed etiam a suspendisse. "Aliquam nulla erat risus." |]
20 | 


--------------------------------------------------------------------------------
/test/test.html:
--------------------------------------------------------------------------------
1 |  
2 | <!-- Lorem ipsum dolor sit. Amet perferendis metus feugiat. Suspendisse massa egestas quam. 
3 | Morbi vivamus dolor nisl mauris ultricies molestie lacus. Proin ad nullam id integer. --> 
4 | 
5 | <meta name="generator" content="Sed etiam a suspendisse. Aliquam nulla erat risus." />
6 | 
7 | 


--------------------------------------------------------------------------------
/test/test.ini:
--------------------------------------------------------------------------------
 1 | ; last modified 1 April 2001 by John Doe
 2 | [owner]
 3 | name = John Doe
 4 | organization = Acme Widgets Inc.
 5 | 
 6 | [database]
 7 | ; use IP address in case network name resolution is not working
 8 | server = 192.0.2.62     
 9 | port = 143
10 | file = "payroll.dat"
11 | 


--------------------------------------------------------------------------------
/test/test.js:
--------------------------------------------------------------------------------
 1 | //
 2 | // Lorem ipsum dolor sit. Amet perferendis metus feugiat. Suspendisse massa egestas quam. 
 3 | // Morbi vivamus dolor nisl mauris ultricies molestie lacus. Proin ad nullam id integer.
 4 | //
 5 | 
 6 | /*
 7 |  * Eget orci rutrum vel. Elit nullam amet integer. Fusce tellus ut massa. Maecenas risus dictum risus. 
 8 |  * Augue aliquam molestie id. Commodo ultricies pede massa fusce ullamcorper dapibus dui. 
 9 |  * Maecenas elementum duis porttitor facilisis lectus eleifend nec. Arcu et pellentesque 
10 |  * tellus non tristique suscipit nec. Tempor iaculis orci nec enim ac.
11 |  */
12 | 
13 |     
14 |   document.write("Sed etiam a suspendisse. \"Aliquam nulla erat risus.\"");
15 | 
16 | 


--------------------------------------------------------------------------------
/test/test.lua:
--------------------------------------------------------------------------------
 1 | -- 
 2 | -- Lorem ipsum dolor sit. Amet perferendis metus feugiat. Suspendisse massa egestas quam. 
 3 | -- Morbi vivamus dolor nisl mauris ultricies molestie lacus. Proin ad nullam id integer.
 4 | -- 
 5 | 
 6 | --[[
 7 | Eget orci rutrum vel. Elit nullam amet integer. Fusce tellus ut massa. Maecenas risus dictum risus. 
 8 | Augue aliquam molestie id. Commodo ultricies pede massa fusce ullamcorper dapibus dui. 
 9 | Maecenas elementum duis porttitor facilisis lectus eleifend nec. Arcu et pellentesque 
10 | tellus non tristique suscipit nec. Tempor iaculis orci nec enim ac.
11 | --]]
12 | 
13 |     print "Sed etiam a suspendisse. \"Aliquam nulla erat risus.\""
14 |     print 'Sed etiam a suspendisse. "Aliquam nulla erat risus.'
15 | 
16 |     print [===[Sed etiam a suspendisse. [==[ Aliquam nulla erat risus. ]===]
17 |     print [==[Sed etiam a suspendisse. [=[ Aliquam nulla erat risus. ]==]
18 |     print [=[Sed etiam a suspendisse. [[ Aliquam nulla erat risus. ]=]
19 |     print [[Sed etiam a suspendisse. Aliquam nulla erat risus. ]]
20 | 
21 | 


--------------------------------------------------------------------------------
/test/test.ml:
--------------------------------------------------------------------------------
 1 | (*
 2 |  * Eget orci rutrum vel. Elit nullam amet integer. Fusce tellus ut massa. Maecenas risus dictum risus. 
 3 |  * Augue aliquam molestie id. Commodo ultricies pede massa fusce ullamcorper dapibus dui. 
 4 |  * Maecenas elementum duis porttitor facilisis lectus eleifend nec. Arcu et pellentesque 
 5 |  * tellus non tristique suscipit nec. Tempor iaculis orci nec enim ac.
 6 |  *)
 7 | 
 8 |  
 9 |  print_endline "Sed etiam a suspendisse. \"Aliquam nulla erat risus.\"";
10 | 
11 | 


--------------------------------------------------------------------------------
/test/test.php3:
--------------------------------------------------------------------------------
 1 | //
 2 | // Lorem ipsum dolor sit. Amet perferendis metus feugiat. Suspendisse massa egestas quam. 
 3 | // Morbi vivamus dolor nisl mauris ultricies molestie lacus. Proin ad nullam id integer.
 4 | //
 5 | 
 6 | # 
 7 | # Lorem ipsum dolor sit. Amet perferendis metus feugiat. Suspendisse massa egestas quam. 
 8 | # Morbi vivamus dolor nisl mauris ultricies molestie lacus. Proin ad nullam id integer.
 9 | # 
10 | 
11 | /*
12 |  * Eget orci rutrum vel. Elit nullam amet integer. Fusce tellus ut massa. Maecenas risus dictum risus. 
13 |  * Augue aliquam molestie id. Commodo ultricies pede massa fusce ullamcorper dapibus dui. 
14 |  * Maecenas elementum duis porttitor facilisis lectus eleifend nec. Arcu et pellentesque 
15 |  * tellus non tristique suscipit nec. Tempor iaculis orci nec enim ac.
16 |  */
17 | 
18 | <?php
19 | echo "Sed etiam a suspendisse. \"Aliquam nulla erat risus.\""; 
20 | echo 'Sed etiam a suspendisse. "Aliquam nulla erat risus."'; 
21 | ?>
22 | 
23 | 
24 | 


--------------------------------------------------------------------------------
/test/test.pl:
--------------------------------------------------------------------------------
 1 | # 
 2 | # Lorem ipsum dolor sit. Amet perferendis metus feugiat. Suspendisse massa egestas quam. 
 3 | # Morbi vivamus dolor nisl mauris ultricies molestie lacus. Proin ad nullam id integer.
 4 | # 
 5 | 
 6 | =pod
 7 |   Eget orci rutrum vel. Elit nullam amet integer. Fusce tellus ut massa. Maecenas risus dictum risus. 
 8 |   Augue aliquam molestie id. Commodo ultricies pede massa fusce ullamcorper dapibus dui. 
 9 |   Maecenas elementum duis porttitor facilisis lectus eleifend nec. Arcu et pellentesque 
10 |   tellus non tristique suscipit nec. Tempor iaculis orci nec enim ac.
11 | =cut
12 | 
13 |     print "Sed etiam a suspendisse. \"Aliquam nulla erat risus.\"";
14 |     print 'Sed etiam a suspendisse. "Aliquam nulla erat risus."';
15 | 
16 | 


--------------------------------------------------------------------------------
/test/test.py:
--------------------------------------------------------------------------------
 1 | # 
 2 | # Lorem ipsum dolor sit. Amet perferendis metus feugiat. Suspendisse massa egestas quam. 
 3 | # Morbi vivamus dolor nisl mauris ultricies molestie lacus. Proin ad nullam id integer.
 4 | # 
 5 | 
 6 | print """Sed etiam a suspendisse. \"Aliquam nulla erat risus. """
 7 | print '''Sed etiam a suspendisse. "Aliquam nulla erat risus. '''
 8 | 
 9 | print "Sed etiam a suspendisse. \"Aliquam nulla erat risus.\"";
10 | print 'Sed etiam a suspendisse. "Aliquam nulla erat risus."';
11 | 
12 | 
13 | 
14 | 


--------------------------------------------------------------------------------
/test/test.rb:
--------------------------------------------------------------------------------
 1 | # 
 2 | # Lorem ipsum dolor sit. Amet perferendis metus feugiat. Suspendisse massa egestas quam. 
 3 | # Morbi vivamus dolor nisl mauris ultricies molestie lacus. Proin ad nullam id integer.
 4 | # 
 5 | 
 6 | =begin
 7 |   Eget orci rutrum vel. Elit nullam amet integer. Fusce tellus ut massa. Maecenas risus dictum risus. 
 8 |   Augue aliquam molestie id. Commodo ultricies pede massa fusce ullamcorper dapibus dui. 
 9 |   Maecenas elementum duis porttitor facilisis lectus eleifend nec. Arcu et pellentesque 
10 |   tellus non tristique suscipit nec. Tempor iaculis orci nec enim ac.
11 | =end
12 | 
13 |     puts "Sed etiam a suspendisse. \"Aliquam nulla erat risus.\"";
14 |     puts 'Sed etiam a suspendisse. \'Aliquam nulla erat risus.\'';
15 |     puts %q(Sed etiam a suspendisse. 'Aliquam nulla erat risus.')
16 |     puts %Q(Sed etiam a suspendisse. 'Aliquam nulla erat risus.')
17 |     puts  %|Sed etiam a suspendisse. 'Aliquam nulla erat risus.'|
18 | 


--------------------------------------------------------------------------------
/test/test.rs:
--------------------------------------------------------------------------------
1 | 
2 | pub struct Gateway<S> {
3 |     config: Config,
4 |     drain: drain::Watch,
5 |     stack: Stack<S>,
6 | }
7 | 


--------------------------------------------------------------------------------
/test/test.sh:
--------------------------------------------------------------------------------
1 | # 
2 | # Lorem ipsum dolor sit. Amet perferendis metus feugiat. Suspendisse massa egestas quam. 
3 | # Morbi vivamus dolor nisl mauris ultricies molestie lacus. Proin ad nullam id integer.
4 | # 
5 | 
6 |     echo "Sed etiam a suspendisse. \"Aliquam nulla erat risus.\"";
7 | 
8 | 


--------------------------------------------------------------------------------
/test/test.tex:
--------------------------------------------------------------------------------
1 | % 
2 | % Lorem ipsum dolor sit. Amet perferendis metus feugiat. Suspendisse massa egestas quam. 
3 | % Morbi vivamus dolor nisl mauris ultricies molestie lacus. Proin ad nullam id integer.
4 | % 
5 | 
6 | Sed etiam a suspendisse. "Aliquam nulla erat risus."
7 | 
8 | 


--------------------------------------------------------------------------------
/test/test.toml:
--------------------------------------------------------------------------------
 1 | # This is a TOML document
 2 | 
 3 | title = "TOML Example"
 4 | 
 5 | [owner]
 6 | name = "Tom Preston-Werner"
 7 | dob = 1979-05-27T07:32:00-08:00
 8 | 
 9 | [database]
10 | enabled = true
11 | ports = [ 8000, 8001, 8002 ]
12 | data = [ ["delta", "phi"], [3.14] ]
13 | temp_targets = { cpu = 79.5, case = 72.0 }
14 | 
15 | [servers]
16 | 
17 | [servers.alpha]
18 | ip = "10.0.0.1"
19 | role = "frontend"
20 | 
21 | [servers.beta]
22 | ip = "10.0.0.2"
23 | role = "backend"
24 | 


--------------------------------------------------------------------------------
/test/test.u:
--------------------------------------------------------------------------------
1 | -- this is an hello world!
2 | 
3 | helloWorld : '{IO, Exception} ()
4 | helloWorld = do
5 |   use Text ++
6 |   name = !readLine
7 |   printLine ("Hello " ++ name)
8 | 


--------------------------------------------------------------------------------
/test/test.utf8:
--------------------------------------------------------------------------------
  1 | Original by Markus Kuhn, adapted for HTML by Martin Dürst.
  2 | 
  3 | UTF-8 encoded sample plain-text file
  4 | ‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾
  5 | 
  6 | Markus Kuhn [ˈmaʳkʊs kuːn] <mkuhn@acm.org> — 1999-08-20
  7 | 
  8 | 
  9 | The ASCII compatible UTF-8 encoding of ISO 10646 and Unicode
 10 | plain-text files is defined in RFC 2279 and in ISO 10646-1 Annex R.
 11 | 
 12 | 
 13 | Using Unicode/UTF-8, you can write in emails and source code things such as
 14 | 
 15 | Mathematics and Sciences:
 16 | 
 17 |   ∮ E⋅da = Q,  n → ∞, ∑ f(i) = ∏ g(i), ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β),
 18 | 
 19 |   ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (A ⇔ B),
 20 | 
 21 |   2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm
 22 | 
 23 | Linguistics and dictionaries:
 24 | 
 25 |   ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn
 26 |   Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ]
 27 | 
 28 | APL:
 29 | 
 30 |   ((V⍳V)=⍳⍴V)/V←,V    ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈
 31 | 
 32 | Nicer typography in plain text files:
 33 | 
 34 |   ╔══════════════════════════════════════════╗
 35 |   ║                                          ║
 36 |   ║   • ‘single’ and “double” quotes         ║
 37 |   ║                                          ║
 38 |   ║   • Curly apostrophes: “We’ve been here” ║
 39 |   ║                                          ║
 40 |   ║   • Latin-1 apostrophe and accents: '´`  ║
 41 |   ║                                          ║
 42 |   ║   • ‚deutsche‘ „Anführungszeichen“       ║
 43 |   ║                                          ║
 44 |   ║   • †, ‡, ‰, •, 3–4, —, −5/+5, ™, …      ║
 45 |   ║                                          ║
 46 |   ║   • ASCII safety test: 1lI|, 0OD, 8B     ║
 47 |   ║                      ╭─────────╮         ║
 48 |   ║   • the euro symbol: │ 14.95 € │         ║
 49 |   ║                      ╰─────────╯         ║
 50 |   ╚══════════════════════════════════════════╝
 51 | 
 52 | Greek (in Polytonic):
 53 | 
 54 |   The Greek anthem:
 55 | 
 56 |   Σὲ γνωρίζω ἀπὸ τὴν κόψη
 57 |   τοῦ σπαθιοῦ τὴν τρομερή,
 58 |   σὲ γνωρίζω ἀπὸ τὴν ὄψη
 59 |   ποὺ μὲ βία μετράει τὴ γῆ.
 60 | 
 61 |   ᾿Απ᾿ τὰ κόκκαλα βγαλμένη
 62 |   τῶν ῾Ελλήνων τὰ ἱερά
 63 |   καὶ σὰν πρῶτα ἀνδρειωμένη
 64 |   χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά!
 65 | 
 66 |   From a speech of Demosthenes in the 4th century BC:
 67 | 
 68 |   Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι,
 69 |   ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς
 70 |   λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ
 71 |   τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ 
 72 |   εἰς τοῦτο προήκοντα,  ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ
 73 |   πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν
 74 |   οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι,
 75 | xxx  οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν
 76 |   ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον
 77 |   τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι
 78 |   γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν
 79 |   προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους
 80 |   σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ
 81 |   τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ
 82 |   τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς
 83 |   τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον.
 84 | 
 85 |   Δημοσθένους, Γ´ ᾿Ολυνθιακὸς
 86 | 
 87 | Georgian:
 88 | 
 89 |   From a Unicode conference invitation:
 90 | 
 91 |   გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო
 92 |   კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს,
 93 |   ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს
 94 |   ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი,
 95 |   ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება
 96 |   ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში,
 97 |   ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში.
 98 | 
 99 | Russian:
100 | 
101 |   From a Unicode conference invitation:
102 | 
103 |   Зарегистрируйтесь сейчас на Десятую Международную Конференцию по
104 |   Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии.
105 |   Конференция соберет широкий круг экспертов по  вопросам глобального
106 |   Интернета и Unicode, локализации и интернационализации, воплощению и
107 |   применению Unicode в различных операционных системах и программных
108 |   приложениях, шрифтах, верстке и многоязычных компьютерных системах.
109 | 
110 | Thai (UCS Level 2):
111 | 
112 |   Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese
113 |   classic 'San Gua'):
114 | 
115 |   [----------------------------|------------------------]
116 |     ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช  พระปกเกศกองบู๊กู้ขึ้นใหม่
117 |   สิบสองกษัตริย์ก่อนหน้าแลถัดไป       สององค์ไซร้โง่เขลาเบาปัญญา
118 |     ทรงนับถือขันทีเป็นที่พึ่ง           บ้านเมืองจึงวิปริตเป็นนักหนา
119 |   โฮจิ๋นเรียกทัพทั่วหัวเมืองมา         หมายจะฆ่ามดชั่วตัวสำคัญ
120 |     เหมือนขับไสไล่เสือจากเคหา      รับหมาป่าเข้ามาเลยอาสัญ
121 |   ฝ่ายอ้องอุ้นยุแยกให้แตกกัน          ใช้สาวนั้นเป็นชนวนชื่นชวนใจ
122 |     พลันลิฉุยกุยกีกลับก่อเหตุ          ช่างอาเพศจริงหนาฟ้าร้องไห้
123 |   ต้องรบราฆ่าฟันจนบรรลัย           ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ
124 | 
125 |   (The above is a two-column text. If combining characters are handled
126 |   correctly, the lines of the second column should be aligned with the
127 |   | character above.)
128 | 
129 | Ethiopian:
130 | 
131 |   Proverbs in the Amharic language:
132 | 
133 |   ሰማይ አይታረስ ንጉሥ አይከሰስ።
134 |   ብላ ካለኝ እንደአባቴ በቆመጠኝ።
135 |   ጌጥ ያለቤቱ ቁምጥና ነው።
136 |   ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው።
137 |   የአፍ ወለምታ በቅቤ አይታሽም።
138 |   አይጥ በበላ ዳዋ ተመታ።
139 |   ሲተረጉሙ ይደረግሙ።
140 |   ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል።
141 |   ድር ቢያብር አንበሳ ያስር።
142 |   ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም።
143 |   እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም።
144 |   የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ።
145 |   ሥራ ከመፍታት ልጄን ላፋታት።
146 |   ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል።
147 |   የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ።
148 |   ተንጋሎ ቢተፉ ተመልሶ ባፉ።
149 |   ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው።
150 |   እግርህን በፍራሽህ ልክ ዘርጋ።
151 | 
152 | Runes:
153 | 
154 |   ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ
155 | 
156 |   (Old English, which transcribed into Latin reads 'He cwaeth that he
157 |   bude thaem lande northweardum with tha Westsae.' and means 'He said
158 |   that he lived in the northern land near the Western Sea.')
159 | 
160 | Braille:
161 | 
162 |   ⡌⠁⠧⠑ ⠼⠁⠒  ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌
163 | 
164 |   ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞
165 |   ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎
166 |   ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂
167 |   ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙
168 |   ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ 
169 |   ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲
170 | 
171 |   ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲
172 | 
173 |   ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹
174 |   ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞
175 |   ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕
176 |   ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ 
177 |   ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ 
178 |   ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎
179 |   ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳
180 |   ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞
181 |   ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲
182 | 
183 |   (The first couple of paragraphs of "A Christmas Carol" by Dickens)
184 | 
185 | Compact font selection example text:
186 | 
187 |   ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789
188 |   abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ
189 |   –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд
190 |   ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ ﬁ�⑀₂ἠḂӥẄɐː⍎אԱა
191 | 
192 | Greetings in various languages:
193 | 
194 |   Hello world, Καλημέρα κόσμε, コンニチハ
195 | 
196 | Box drawing alignment tests:                                          █
197 |                                                                       ▉
198 |   ╔══╦══╗  ┌──┬──┐  ╭──┬──╮  ╭──┬──╮  ┏━━┳━━┓  ┎┒┏┑   ╷  ╻ ┏┯┓ ┌┰┐    ▊ ╱╲╱╲╳╳╳
199 |   ║┌─╨─┐║  │╔═╧═╗│  │╒═╪═╕│  │╓─╁─╖│  ┃┌─╂─┐┃  ┗╃╄┙  ╶┼╴╺╋╸┠┼┨ ┝╋┥    ▋ ╲╱╲╱╳╳╳
200 |   ║│╲ ╱│║  │║   ║│  ││ │ ││  │║ ┃ ║│  ┃│ ╿ │┃  ┍╅╆┓   ╵  ╹ ┗┷┛ └┸┘    ▌ ╱╲╱╲╳╳╳
201 |   ╠╡ ╳ ╞╣  ├╢   ╟┤  ├┼─┼─┼┤  ├╫─╂─╫┤  ┣┿╾┼╼┿┫  ┕┛┖┚     ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳
202 |   ║│╱ ╲│║  │║   ║│  ││ │ ││  │║ ┃ ║│  ┃│ ╽ │┃  ░░▒▒▓▓██ ┊  ┆ ╎ ╏  ┇ ┋ ▎
203 |   ║└─╥─┘║  │╚═╤═╝│  │╘═╪═╛│  │╙─╀─╜│  ┃└─╂─┘┃  ░░▒▒▓▓██ ┊  ┆ ╎ ╏  ┇ ┋ ▏
204 |   ╚══╩══╝  └──┴──┘  ╰──┴──╯  ╰──┴──╯  ┗━━┻━━┛           └╌╌┘ ╎ ┗╍╍┛ ┋  ▁▂▃▄▅▆▇█
205 | 


--------------------------------------------------------------------------------
/test/test.zig:
--------------------------------------------------------------------------------
 1 | // this is a comment
 2 | const std = @import("std");
 3 | 
 4 | pub fn main() !void {
 5 |     const stdout = std.io.getStdOut().writer();
 6 |    
 7 |     const hello_world_in_c =
 8 |     \\#include <stdio.h>
 9 |     \\
10 |     \\int main(int argc, char **argv) {
11 |     \\    printf("hello world\n");
12 |     \\    return 0;
13 |     \\}
14 |     ;
15 | 
16 |     try stdout.print("Hello, {s}!\n", .{"world"});
17 | }
18 | 


--------------------------------------------------------------------------------