├── .gitignore ├── CHANGELOG.md ├── LICENSE ├── README.md ├── Setup.hs ├── cgrep.cabal ├── cgreprc ├── script └── profile.sh ├── src ├── CGrep │ ├── Boundary.hs │ ├── Common.hs │ ├── ContextFilter.hs │ ├── Distance.hs │ ├── FileKind.hs │ ├── FileType.hs │ ├── FileTypeMap.hs │ ├── Output.hs │ ├── Parser │ │ ├── Atom.hs │ │ ├── Char.hs │ │ ├── Chunk.hs │ │ ├── Line.hs │ │ └── Token.hs │ ├── Search.hs │ ├── Strategy │ │ ├── BoyerMoore.hs │ │ ├── Levenshtein.hs │ │ ├── Regex.hs │ │ ├── Semantic.hs │ │ └── Tokenizer.hs │ └── Types.hs ├── CmdOptions.hs ├── Config.hs ├── Main.hs ├── Options.hs ├── Reader.hs ├── Search.hs ├── Util.hs └── Verbose.hs ├── stack.yaml └── test ├── CMakeLists.txt ├── Makefile ├── test.c ├── test.chpl ├── test.coffee ├── test.cpp ├── test.dhall ├── test.erl ├── test.fs ├── test.go ├── test.h ├── test.hs ├── test.html ├── test.ini ├── test.js ├── test.lua ├── test.ml ├── test.php3 ├── test.pl ├── test.py ├── test.rb ├── test.rs ├── test.sh ├── test.tex ├── test.toml ├── test.u ├── test.utf8 └── test.zig /.gitignore: -------------------------------------------------------------------------------- 1 | /.cabal-sandbox/ 2 | /cabal.sandbox.config 3 | /dist/ 4 | .hie 5 | .stack-work 6 | dist-newstyle 7 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ## Changelog 2 | 3 | # cgrep v8.0.0 4 | 5 | - **Performance Enhancements:** In this release, significant improvements have been made to enhance the performance of cgrep. The performance is now more on par with ripgrep, with search speeds ranging from 3 times to 13 times faster than the previous release. 6 | 7 | - **Expanded Language and Configuration Support:** We have expanded the language and configuration support in cgrep. Starting from this version, it includes support for searching within Zig, Union, Dhall, Fish shell, Toml, and Ini config files. This means you can now easily search for patterns and text within files of these formats. 8 | 9 | - **New Semantic Token Filter:** A new semantic token filter has been introduced in this release. This filter is designed to identify and filter out specific native types present in different file types. This enhances the search results by providing more precise and relevant matches based on the semantic meaning of the tokens. 10 | 11 | - **Kind Filter Selector:** We have introduced a new feature called the Kind Filter Selector. With this filter selector, users can now specify the kind of files they want to search within. The available options for the filter include Text, Config, Language, Data, Markup, and Script. This allows for more focused and targeted searches based on the desired file type, especially useful in large codebases. 12 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 2, June 1991 3 | 4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc., 5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 6 | Everyone is permitted to copy and distribute verbatim copies 7 | of this license document, but changing it is not allowed. 8 | 9 | Preamble 10 | 11 | The licenses for most software are designed to take away your 12 | freedom to share and change it. By contrast, the GNU General Public 13 | License is intended to guarantee your freedom to share and change free 14 | software--to make sure the software is free for all its users. This 15 | General Public License applies to most of the Free Software 16 | Foundation's software and to any other program whose authors commit to 17 | using it. (Some other Free Software Foundation software is covered by 18 | the GNU Lesser General Public License instead.) You can apply it to 19 | your programs, too. 20 | 21 | When we speak of free software, we are referring to freedom, not 22 | price. Our General Public Licenses are designed to make sure that you 23 | have the freedom to distribute copies of free software (and charge for 24 | this service if you wish), that you receive source code or can get it 25 | if you want it, that you can change the software or use pieces of it 26 | in new free programs; and that you know you can do these things. 27 | 28 | To protect your rights, we need to make restrictions that forbid 29 | anyone to deny you these rights or to ask you to surrender the rights. 30 | These restrictions translate to certain responsibilities for you if you 31 | distribute copies of the software, or if you modify it. 32 | 33 | For example, if you distribute copies of such a program, whether 34 | gratis or for a fee, you must give the recipients all the rights that 35 | you have. You must make sure that they, too, receive or can get the 36 | source code. And you must show them these terms so they know their 37 | rights. 38 | 39 | We protect your rights with two steps: (1) copyright the software, and 40 | (2) offer you this license which gives you legal permission to copy, 41 | distribute and/or modify the software. 42 | 43 | Also, for each author's protection and ours, we want to make certain 44 | that everyone understands that there is no warranty for this free 45 | software. If the software is modified by someone else and passed on, we 46 | want its recipients to know that what they have is not the original, so 47 | that any problems introduced by others will not reflect on the original 48 | authors' reputations. 49 | 50 | Finally, any free program is threatened constantly by software 51 | patents. We wish to avoid the danger that redistributors of a free 52 | program will individually obtain patent licenses, in effect making the 53 | program proprietary. To prevent this, we have made it clear that any 54 | patent must be licensed for everyone's free use or not licensed at all. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | GNU GENERAL PUBLIC LICENSE 60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 61 | 62 | 0. This License applies to any program or other work which contains 63 | a notice placed by the copyright holder saying it may be distributed 64 | under the terms of this General Public License. The "Program", below, 65 | refers to any such program or work, and a "work based on the Program" 66 | means either the Program or any derivative work under copyright law: 67 | that is to say, a work containing the Program or a portion of it, 68 | either verbatim or with modifications and/or translated into another 69 | language. (Hereinafter, translation is included without limitation in 70 | the term "modification".) Each licensee is addressed as "you". 71 | 72 | Activities other than copying, distribution and modification are not 73 | covered by this License; they are outside its scope. The act of 74 | running the Program is not restricted, and the output from the Program 75 | is covered only if its contents constitute a work based on the 76 | Program (independent of having been made by running the Program). 77 | Whether that is true depends on what the Program does. 78 | 79 | 1. You may copy and distribute verbatim copies of the Program's 80 | source code as you receive it, in any medium, provided that you 81 | conspicuously and appropriately publish on each copy an appropriate 82 | copyright notice and disclaimer of warranty; keep intact all the 83 | notices that refer to this License and to the absence of any warranty; 84 | and give any other recipients of the Program a copy of this License 85 | along with the Program. 86 | 87 | You may charge a fee for the physical act of transferring a copy, and 88 | you may at your option offer warranty protection in exchange for a fee. 89 | 90 | 2. You may modify your copy or copies of the Program or any portion 91 | of it, thus forming a work based on the Program, and copy and 92 | distribute such modifications or work under the terms of Section 1 93 | above, provided that you also meet all of these conditions: 94 | 95 | a) You must cause the modified files to carry prominent notices 96 | stating that you changed the files and the date of any change. 97 | 98 | b) You must cause any work that you distribute or publish, that in 99 | whole or in part contains or is derived from the Program or any 100 | part thereof, to be licensed as a whole at no charge to all third 101 | parties under the terms of this License. 102 | 103 | c) If the modified program normally reads commands interactively 104 | when run, you must cause it, when started running for such 105 | interactive use in the most ordinary way, to print or display an 106 | announcement including an appropriate copyright notice and a 107 | notice that there is no warranty (or else, saying that you provide 108 | a warranty) and that users may redistribute the program under 109 | these conditions, and telling the user how to view a copy of this 110 | License. (Exception: if the Program itself is interactive but 111 | does not normally print such an announcement, your work based on 112 | the Program is not required to print an announcement.) 113 | 114 | These requirements apply to the modified work as a whole. If 115 | identifiable sections of that work are not derived from the Program, 116 | and can be reasonably considered independent and separate works in 117 | themselves, then this License, and its terms, do not apply to those 118 | sections when you distribute them as separate works. But when you 119 | distribute the same sections as part of a whole which is a work based 120 | on the Program, the distribution of the whole must be on the terms of 121 | this License, whose permissions for other licensees extend to the 122 | entire whole, and thus to each and every part regardless of who wrote it. 123 | 124 | Thus, it is not the intent of this section to claim rights or contest 125 | your rights to work written entirely by you; rather, the intent is to 126 | exercise the right to control the distribution of derivative or 127 | collective works based on the Program. 128 | 129 | In addition, mere aggregation of another work not based on the Program 130 | with the Program (or with a work based on the Program) on a volume of 131 | a storage or distribution medium does not bring the other work under 132 | the scope of this License. 133 | 134 | 3. You may copy and distribute the Program (or a work based on it, 135 | under Section 2) in object code or executable form under the terms of 136 | Sections 1 and 2 above provided that you also do one of the following: 137 | 138 | a) Accompany it with the complete corresponding machine-readable 139 | source code, which must be distributed under the terms of Sections 140 | 1 and 2 above on a medium customarily used for software interchange; or, 141 | 142 | b) Accompany it with a written offer, valid for at least three 143 | years, to give any third party, for a charge no more than your 144 | cost of physically performing source distribution, a complete 145 | machine-readable copy of the corresponding source code, to be 146 | distributed under the terms of Sections 1 and 2 above on a medium 147 | customarily used for software interchange; or, 148 | 149 | c) Accompany it with the information you received as to the offer 150 | to distribute corresponding source code. (This alternative is 151 | allowed only for noncommercial distribution and only if you 152 | received the program in object code or executable form with such 153 | an offer, in accord with Subsection b above.) 154 | 155 | The source code for a work means the preferred form of the work for 156 | making modifications to it. For an executable work, complete source 157 | code means all the source code for all modules it contains, plus any 158 | associated interface definition files, plus the scripts used to 159 | control compilation and installation of the executable. However, as a 160 | special exception, the source code distributed need not include 161 | anything that is normally distributed (in either source or binary 162 | form) with the major components (compiler, kernel, and so on) of the 163 | operating system on which the executable runs, unless that component 164 | itself accompanies the executable. 165 | 166 | If distribution of executable or object code is made by offering 167 | access to copy from a designated place, then offering equivalent 168 | access to copy the source code from the same place counts as 169 | distribution of the source code, even though third parties are not 170 | compelled to copy the source along with the object code. 171 | 172 | 4. You may not copy, modify, sublicense, or distribute the Program 173 | except as expressly provided under this License. Any attempt 174 | otherwise to copy, modify, sublicense or distribute the Program is 175 | void, and will automatically terminate your rights under this License. 176 | However, parties who have received copies, or rights, from you under 177 | this License will not have their licenses terminated so long as such 178 | parties remain in full compliance. 179 | 180 | 5. You are not required to accept this License, since you have not 181 | signed it. However, nothing else grants you permission to modify or 182 | distribute the Program or its derivative works. These actions are 183 | prohibited by law if you do not accept this License. Therefore, by 184 | modifying or distributing the Program (or any work based on the 185 | Program), you indicate your acceptance of this License to do so, and 186 | all its terms and conditions for copying, distributing or modifying 187 | the Program or works based on it. 188 | 189 | 6. Each time you redistribute the Program (or any work based on the 190 | Program), the recipient automatically receives a license from the 191 | original licensor to copy, distribute or modify the Program subject to 192 | these terms and conditions. You may not impose any further 193 | restrictions on the recipients' exercise of the rights granted herein. 194 | You are not responsible for enforcing compliance by third parties to 195 | this License. 196 | 197 | 7. If, as a consequence of a court judgment or allegation of patent 198 | infringement or for any other reason (not limited to patent issues), 199 | conditions are imposed on you (whether by court order, agreement or 200 | otherwise) that contradict the conditions of this License, they do not 201 | excuse you from the conditions of this License. If you cannot 202 | distribute so as to satisfy simultaneously your obligations under this 203 | License and any other pertinent obligations, then as a consequence you 204 | may not distribute the Program at all. For example, if a patent 205 | license would not permit royalty-free redistribution of the Program by 206 | all those who receive copies directly or indirectly through you, then 207 | the only way you could satisfy both it and this License would be to 208 | refrain entirely from distribution of the Program. 209 | 210 | If any portion of this section is held invalid or unenforceable under 211 | any particular circumstance, the balance of the section is intended to 212 | apply and the section as a whole is intended to apply in other 213 | circumstances. 214 | 215 | It is not the purpose of this section to induce you to infringe any 216 | patents or other property right claims or to contest validity of any 217 | such claims; this section has the sole purpose of protecting the 218 | integrity of the free software distribution system, which is 219 | implemented by public license practices. Many people have made 220 | generous contributions to the wide range of software distributed 221 | through that system in reliance on consistent application of that 222 | system; it is up to the author/donor to decide if he or she is willing 223 | to distribute software through any other system and a licensee cannot 224 | impose that choice. 225 | 226 | This section is intended to make thoroughly clear what is believed to 227 | be a consequence of the rest of this License. 228 | 229 | 8. If the distribution and/or use of the Program is restricted in 230 | certain countries either by patents or by copyrighted interfaces, the 231 | original copyright holder who places the Program under this License 232 | may add an explicit geographical distribution limitation excluding 233 | those countries, so that distribution is permitted only in or among 234 | countries not thus excluded. In such case, this License incorporates 235 | the limitation as if written in the body of this License. 236 | 237 | 9. The Free Software Foundation may publish revised and/or new versions 238 | of the General Public License from time to time. Such new versions will 239 | be similar in spirit to the present version, but may differ in detail to 240 | address new problems or concerns. 241 | 242 | Each version is given a distinguishing version number. If the Program 243 | specifies a version number of this License which applies to it and "any 244 | later version", you have the option of following the terms and conditions 245 | either of that version or of any later version published by the Free 246 | Software Foundation. If the Program does not specify a version number of 247 | this License, you may choose any version ever published by the Free Software 248 | Foundation. 249 | 250 | 10. If you wish to incorporate parts of the Program into other free 251 | programs whose distribution conditions are different, write to the author 252 | to ask for permission. For software which is copyrighted by the Free 253 | Software Foundation, write to the Free Software Foundation; we sometimes 254 | make exceptions for this. Our decision will be guided by the two goals 255 | of preserving the free status of all derivatives of our free software and 256 | of promoting the sharing and reuse of software generally. 257 | 258 | NO WARRANTY 259 | 260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY 261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN 262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED 264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS 266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE 267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, 268 | REPAIR OR CORRECTION. 269 | 270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR 272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, 273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING 274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED 275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY 276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER 277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE 278 | POSSIBILITY OF SUCH DAMAGES. 279 | 280 | END OF TERMS AND CONDITIONS 281 | 282 | How to Apply These Terms to Your New Programs 283 | 284 | If you develop a new program, and you want it to be of the greatest 285 | possible use to the public, the best way to achieve this is to make it 286 | free software which everyone can redistribute and change under these terms. 287 | 288 | To do so, attach the following notices to the program. It is safest 289 | to attach them to the start of each source file to most effectively 290 | convey the exclusion of warranty; and each file should have at least 291 | the "copyright" line and a pointer to where the full notice is found. 292 | 293 | 294 | Copyright (C) 295 | 296 | This program is free software; you can redistribute it and/or modify 297 | it under the terms of the GNU General Public License as published by 298 | the Free Software Foundation; either version 2 of the License, or 299 | (at your option) any later version. 300 | 301 | This program is distributed in the hope that it will be useful, 302 | but WITHOUT ANY WARRANTY; without even the implied warranty of 303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 304 | GNU General Public License for more details. 305 | 306 | You should have received a copy of the GNU General Public License along 307 | with this program; if not, write to the Free Software Foundation, Inc., 308 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 309 | 310 | Also add information on how to contact you by electronic and paper mail. 311 | 312 | If the program is interactive, make it output a short notice like this 313 | when it starts in an interactive mode: 314 | 315 | Gnomovision version 69, Copyright (C) year name of author 316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 317 | This is free software, and you are welcome to redistribute it 318 | under certain conditions; type `show c' for details. 319 | 320 | The hypothetical commands `show w' and `show c' should show the appropriate 321 | parts of the General Public License. Of course, the commands you use may 322 | be called something other than `show w' and `show c'; they could even be 323 | mouse-clicks or menu items--whatever suits your program. 324 | 325 | You should also get your employer (if you work as a programmer) or your 326 | school, if any, to sign a "copyright disclaimer" for the program, if 327 | necessary. Here is a sample; alter the names: 328 | 329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program 330 | `Gnomovision' (which makes passes at compilers) written by James Hacker. 331 | 332 | , 1 April 1989 333 | Ty Coon, President of Vice 334 | 335 | This General Public License does not permit incorporating your program into 336 | proprietary programs. If your program is a subroutine library, you may 337 | consider it more useful to permit linking proprietary applications with the 338 | library. If this is what you want to do, use the GNU Lesser General 339 | Public License instead of this License. 340 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | CGrep: a context-aware grep for source codes 2 | ============================================ 3 | 4 | [![Hackage](https://img.shields.io/hackage/v/cgrep.svg?style=flat)](https://hackage.haskell.org/package/cgrep) 5 | [![Join the chat at https://gitter.im/awgn/cgrep](https://badges.gitter.im/awgn/cgrep.svg)](https://gitter.im/awgn/cgrep?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) 6 | 7 | Usage 8 | ----- 9 | 10 | ``` 11 | Cgrep 8.1.3. Usage: cgrep [OPTION] [PATTERN] files... 12 | 13 | cgrep [OPTIONS] [ITEM] 14 | 15 | Pattern: 16 | -f --file=FILE Read PATTERNs from file (one per line) 17 | -w --word Force word matching 18 | -p --prefix Force prefix matching 19 | -s --suffix Force suffix matching 20 | -e --edit Use edit distance 21 | -G --regex Use regex matching (posix) 22 | -P --pcre Use regex matching (pcre) 23 | -i --ignore-case Ignore case distinctions 24 | 25 | Context filters: 26 | -c --code Enable search in source code 27 | -m --comment Enable search in comments 28 | -l --literal Enable search in string literals 29 | 30 | Token filters: 31 | --name --identifier Identifiers 32 | --type --native Native Types 33 | --keyword Keywords 34 | --number Literal numbers 35 | --string Literal strings 36 | --op Operators 37 | 38 | File filters: 39 | -t --type-filter=ITEM Specify file types. ie: Cpp, +Haskell, -Makefile 40 | -k --kind-filter=ITEM Specify file kinds. Text, Config, Language, Data, 41 | Markup or Script 42 | --code-only Parse code modules only (skip headers/interfaces) 43 | --hdr-only Parse headers/interfaces only (skip modules) 44 | -T --skip-test Skip files that have 'test' in the name 45 | --prune-dir=ITEM Do not descend into dir 46 | -r --recursive Enable recursive search (don't follow symlinks) 47 | -L --follow Follow symlinks 48 | 49 | Semantic: 50 | -S --semantic "code" pattern: _, _1, _2... (identifiers), $, $1, 51 | $2... (optionals), ANY, KEY, STR, LIT, NUM, HEX, OCT, 52 | OR 53 | 54 | Control: 55 | --max-count=INT Stop search in files after INT matches 56 | --force-type=ITEM Force the type of file 57 | --type-list List the supported file types 58 | -v --invert-match Select non-matching lines 59 | --multiline=INT Enable multi-line matching 60 | -j --threads=INT Number threads to run in parallel 61 | 62 | Output format: 63 | --show-match Show list of matching tokens 64 | --color Use colors to highlight the match strings 65 | --no-color Do not use colors (override config file) 66 | -h --no-filename Suppress the file name prefix on output 67 | --no-numbers Suppress both line and column numbers on output 68 | --no-column Suppress the column number on output 69 | --count Print only a count of matching lines per file 70 | --filename-only Print only the name of files containing matches 71 | --json Format output as json object 72 | --vim Run vim editor passing the files that match 73 | --editor Run the editor specified by EDITOR var., passing 74 | the files that match 75 | --fileline When edit option is specified, pass the list of 76 | matching files in file:line format (e.g. vim 77 | 'file-line' plugin) 78 | 79 | Miscellaneous: 80 | --verbose=INT Verbose level: 1, 2 or 3 81 | --no-shallow Disable shallow-search 82 | --palette Show color palette 83 | -? --help Display help message 84 | -V --version Print version information 85 | --numeric-version Print just the version number 86 | ``` 87 | -------------------------------------------------------------------------------- /Setup.hs: -------------------------------------------------------------------------------- 1 | import Distribution.Simple 2 | main = defaultMain 3 | -------------------------------------------------------------------------------- /cgrep.cabal: -------------------------------------------------------------------------------- 1 | Cabal-version: 2.2 2 | Name: cgrep 3 | Description: Cgrep: a context-aware grep for source codes 4 | Version: 8.1.3 5 | Synopsis: Command line tool 6 | Homepage: http://awgn.github.io/cgrep/ 7 | License: GPL-2.0-or-later 8 | License-file: LICENSE 9 | Author: Nicola Bonelli 10 | Maintainer: Nicola Bonelli 11 | Category: Utils 12 | Build-type: Simple 13 | Stability: Experimental 14 | Extra-source-files: README.md 15 | 16 | Common common-options 17 | build-depends: base ^>= 4.15.0.0 18 | 19 | ghc-options: -Wall 20 | -Wcompat 21 | -Widentities 22 | -Wincomplete-uni-patterns 23 | -Wincomplete-record-updates 24 | if impl(ghc >= 8.0) 25 | ghc-options: -Wredundant-constraints 26 | if impl(ghc >= 8.2) 27 | ghc-options: -fhide-source-paths 28 | if impl(ghc >= 8.4) 29 | ghc-options: -Wmissing-export-lists 30 | -Wpartial-fields 31 | if impl(ghc >= 8.8) 32 | ghc-options: -Wmissing-deriving-strategies 33 | 34 | default-language: Haskell2010 35 | 36 | Executable cgrep 37 | Main-Is: Main.hs 38 | Hs-Source-Dirs: src 39 | Default-Extensions: FlexibleContexts 40 | FlexibleInstances 41 | GeneralisedNewtypeDeriving 42 | DerivingStrategies 43 | MultiWayIf 44 | LambdaCase 45 | OverloadedLists 46 | OverloadedRecordDot 47 | OverloadedStrings 48 | PatternSynonyms 49 | RecordWildCards 50 | ScopedTypeVariables 51 | TupleSections 52 | TypeApplications 53 | UnboxedSums 54 | UnboxedTuples 55 | ViewPatterns 56 | BangPatterns 57 | MagicHash 58 | 59 | Other-Modules: Options 60 | Verbose 61 | CmdOptions 62 | Util 63 | Config 64 | Reader 65 | Search 66 | CGrep.FileType 67 | CGrep.FileKind 68 | CGrep.FileTypeMap 69 | CGrep.ContextFilter 70 | CGrep.Types 71 | CGrep.Output 72 | CGrep.Distance 73 | CGrep.Search 74 | CGrep.Common 75 | CGrep.Boundary 76 | CGrep.Parser.Char 77 | CGrep.Parser.Chunk 78 | CGrep.Parser.Token 79 | CGrep.Parser.Atom 80 | CGrep.Parser.Line 81 | CGrep.Strategy.Semantic 82 | CGrep.Strategy.Tokenizer 83 | CGrep.Strategy.Levenshtein 84 | CGrep.Strategy.BoyerMoore 85 | CGrep.Strategy.Regex 86 | Paths_cgrep 87 | Autogen-modules: Paths_cgrep 88 | 89 | Build-Depends: base < 5.0, 90 | cmdargs, 91 | bytestring, 92 | directory, 93 | filepath, 94 | stm, 95 | containers, 96 | vector, 97 | array, 98 | ghc-prim, 99 | dlist, 100 | ansi-terminal, 101 | split, 102 | safe, 103 | stringsearch, 104 | unordered-containers, 105 | regex-base, 106 | regex-posix, 107 | regex-pcre, 108 | either, 109 | mtl, 110 | unix-compat, 111 | async, 112 | utf8-string, 113 | unicode-show, 114 | transformers, 115 | process, 116 | aeson, 117 | yaml, 118 | exceptions, 119 | mono-traversable, 120 | bytestring-strict-builder, 121 | bitwise, 122 | mmap, 123 | unagi-chan, 124 | posix-paths, 125 | rawfilepath, 126 | monad-loops, 127 | deepseq, 128 | bitarray, 129 | text, 130 | extra 131 | 132 | Ghc-options: -O2 -optc-O3 133 | -funbox-strict-fields 134 | -fwrite-ide-info 135 | -hiedir=.hie 136 | -threaded 137 | -rtsopts "-with-rtsopts=-N -H1g -qn2" 138 | 139 | Default-language: Haskell2010 140 | -------------------------------------------------------------------------------- /cgreprc: -------------------------------------------------------------------------------- 1 | # 2 | # Cgrep config file 3 | # 4 | 5 | file_types: [ Agda , Assembly , Awk , Bash, C , CMake , Cabal , Chapel , Clojure , Coffee , Conf , Cpp , Csh, Csharp , Css , 6 | D , Dart , Dhall, Elm , Elixir , Erlang , Fish, Fortran , Fsharp , Go , GoMod, Haskell , Html , Idris , Java , Javascript , Json , Ksh, Kotlin , 7 | Latex , Lisp , Lua , Make , Nmap , OCaml , ObjectiveC , PHP , Perl , Python , R , Ruby , Rust , Scala , SmallTalk , Shell , Swift , Tcl, 8 | Text , Unison, VHDL , Verilog , Yaml, Toml, Ini, Zig, Zsh ] 9 | 10 | prune_dirs : [".svn", ".git", "CMakeFiles", ".stack-work" ] 11 | 12 | colors : True 13 | color_filename : "2:3:5" 14 | color_match : "5:0:1" 15 | 16 | threads: 12 17 | 18 | # 19 | # file_line option: enable edit for file:line 20 | # vim: see 'file-line' plugin 21 | # 22 | # 23 | # file_line : True 24 | # 25 | -------------------------------------------------------------------------------- /script/profile.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | rm cgrep.prof 3 | stack build --profile -v 4 | echo "running: cgrep ${@}..." 5 | stack exec --profile -- cgrep $@ +RTS -P | wc -l 6 | profiteur cgrep.prof 7 | open cgrep.prof.html 8 | cat cgrep.prof 9 | -------------------------------------------------------------------------------- /src/CGrep/Boundary.hs: -------------------------------------------------------------------------------- 1 | -- 2 | -- Copyright (c) 2013-2023 Nicola Bonelli 3 | -- 4 | -- This program is free software; you can redistribute it and/or modify 5 | -- it under the terms of the GNU General Public License as published by 6 | -- the Free Software Foundation; either version 2 of the License, or 7 | -- (at your option) any later version. 8 | -- 9 | -- This program is distributed in the hope that it will be useful, 10 | -- but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | -- GNU General Public License for more details. 13 | -- 14 | -- You should have received a copy of the GNU General Public License 15 | -- along with this program; if not, write to the Free Software 16 | -- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 17 | -- 18 | 19 | module CGrep.Boundary ( 20 | Boundary (..), 21 | BoundaryType (..), 22 | pattern Begin, 23 | pattern End, 24 | ) where 25 | 26 | import qualified Data.ByteString.Char8 as C 27 | import Data.Word (Word8) 28 | 29 | data Boundary = Boundary 30 | { bBegin :: C.ByteString 31 | , bEnd :: C.ByteString 32 | } 33 | deriving stock (Show, Eq) 34 | 35 | newtype BoundaryType = BoundaryType {unpackBoundaryType :: Word8} 36 | deriving newtype (Eq, Ord) 37 | 38 | instance Show BoundaryType where 39 | show Begin = "begin" 40 | show End = "end" 41 | 42 | pattern Begin :: BoundaryType 43 | pattern Begin = BoundaryType 0 44 | pattern End :: BoundaryType 45 | pattern End = BoundaryType 1 46 | 47 | {-# COMPLETE Begin, End #-} -------------------------------------------------------------------------------- /src/CGrep/Common.hs: -------------------------------------------------------------------------------- 1 | -- 2 | -- Copyright (c) 2013-2023 Nicola Bonelli 3 | -- 4 | -- This program is free software; you can redistribute it and/or modify 5 | -- it under the terms of the GNU General Public License as published by 6 | -- the Free Software Foundation; either version 2 of the License, or 7 | -- (at your option) any later version. 8 | -- 9 | -- This program is distributed in the hope that it will be useful, 10 | -- but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | -- GNU General Public License for more details. 13 | -- 14 | -- You should have received a copy of the GNU General Public License 15 | -- along with this program; if not, write to the Free Software 16 | -- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 17 | -- 18 | 19 | module CGrep.Common ( 20 | Text8, 21 | getTargetName, 22 | getTargetContents, 23 | expandMultiline, 24 | ignoreCase, 25 | subText, 26 | trim, 27 | trim8, 28 | takeN, 29 | ) where 30 | 31 | import CGrep.Parser.Char (isSpace) 32 | import CGrep.Types (Offset, Text8) 33 | import Data.Char (toLower) 34 | 35 | import Options ( 36 | Options (Options, ignore_case, multiline, no_shallow), 37 | ) 38 | 39 | import Data.Int (Int64) 40 | import System.IO.MMap (mmapFileByteString) 41 | import Util (spanGroup) 42 | 43 | import Data.List (group, groupBy, sort, sortOn) 44 | import qualified Data.Vector.Unboxed as UV 45 | import System.Posix.FilePath (RawFilePath) 46 | 47 | import GHC.Exts (groupWith) 48 | 49 | import qualified Data.ByteString.Char8 as C 50 | 51 | takeN :: Int -> String -> String 52 | takeN n xs 53 | | length xs > n = take n xs <> "..." 54 | | otherwise = xs 55 | {-# INLINE takeN #-} 56 | 57 | trim :: String -> String 58 | trim = (dropWhile isSpace . reverse) . dropWhile isSpace . reverse 59 | {-# INLINE trim #-} 60 | 61 | trim8 :: Text8 -> Text8 62 | trim8 = (C.dropWhile isSpace . C.reverse) . C.dropWhile isSpace . C.reverse 63 | {-# INLINE trim8 #-} 64 | 65 | getTargetName :: RawFilePath -> RawFilePath 66 | getTargetName (C.null -> True) = "" 67 | getTargetName name = name 68 | {-# INLINE getTargetName #-} 69 | 70 | getTargetContents :: RawFilePath -> IO Text8 71 | getTargetContents (C.null -> True) = C.getContents 72 | getTargetContents xs = mmapFileByteString (C.unpack xs) Nothing 73 | {-# INLINE getTargetContents #-} 74 | 75 | expandMultiline :: Options -> Text8 -> Text8 76 | expandMultiline Options{multiline = n} xs 77 | | n == 1 = xs 78 | | otherwise = C.unlines $ map C.unwords $ spanGroup n (C.lines xs) 79 | {-# INLINE expandMultiline #-} 80 | 81 | ignoreCase :: Options -> Text8 -> Text8 82 | ignoreCase opt 83 | | ignore_case opt = C.map toLower 84 | | otherwise = id 85 | {-# INLINE ignoreCase #-} 86 | 87 | subText :: [[Offset]] -> Text8 -> Text8 88 | subText [] txt = txt 89 | subText indices txt = case C.elemIndex '\n' (C.drop maxOff txt) of 90 | Nothing -> txt 91 | (Just n) -> C.take (maxOff + n) txt 92 | where 93 | maxOff = fromIntegral $ maximum (lastDef 0 <$> indices) 94 | lastDef def xs = if null xs then def else last xs 95 | {-# INLINE subText #-} 96 | -------------------------------------------------------------------------------- /src/CGrep/ContextFilter.hs: -------------------------------------------------------------------------------- 1 | -- 2 | -- Copyright (c) 2013-2023 Nicola Bonelli 3 | -- 4 | -- This program is free software; you can redistribute it and/or modify 5 | -- it under the terms of the GNU General Public License as published by 6 | -- the Free Software Foundation; either version 2 of the License, or 7 | -- (at your option) any later version. 8 | -- 9 | -- This program is distributed in the hope that it will be useful, 10 | -- but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | -- GNU General Public License for more details. 13 | -- 14 | -- You should have received a copy of the GNU General Public License 15 | -- along with this program; if not, write to the Free Software 16 | -- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 17 | -- 18 | 19 | module CGrep.ContextFilter where 20 | 21 | import CGrep.Parser.Char (chr, isSpace, ord) 22 | import CGrep.Types (Text8) 23 | 24 | import qualified Data.ByteString.Char8 as C 25 | import qualified Data.ByteString.Short as B 26 | 27 | import qualified Data.Map as Map 28 | 29 | import CGrep.Boundary (Boundary (..)) 30 | import qualified Data.Aeson.KeyMap as B 31 | import Data.List (find, findIndex, nub) 32 | import Data.Maybe (fromMaybe, isJust) 33 | import Options (Options (..)) 34 | 35 | import Data.Bits (Bits (complement, shiftL, shiftR, xor, (.&.), (.|.))) 36 | import Data.Int (Int32, Int64) 37 | 38 | import qualified Data.Vector as V 39 | import qualified Data.Vector.Unboxed as UV 40 | 41 | import qualified Data.ByteString.Unsafe as U 42 | import Data.HashMap.Internal.Strict (alter) 43 | import Data.Word (Word64) 44 | 45 | import Util (findWithIndex) 46 | 47 | type FilterFunction = ContextFilter -> Text8 -> Text8 48 | 49 | data Context = Code | Comment | Literal 50 | deriving stock (Eq, Show) 51 | 52 | newtype ContextBit = ContextBit Int32 53 | deriving stock (Show) 54 | deriving newtype (Eq, Bits) 55 | 56 | contextBitEmpty :: ContextBit 57 | contextBitEmpty = ContextBit 0 58 | contextBitCode :: ContextBit 59 | contextBitCode = ContextBit 0x1 60 | contextBitComment :: ContextBit 61 | contextBitComment = ContextBit 0x2 62 | contextBitLiteral :: ContextBit 63 | contextBitLiteral = ContextBit 0x4 64 | 65 | (~=) :: ContextBit -> Bool -> ContextBit 66 | b ~= True = b 67 | _ ~= False = contextBitEmpty 68 | {-# INLINE (~=) #-} 69 | 70 | (~?) :: ContextFilter -> ContextBit -> Bool 71 | f ~? b = (unFilter f .&. b) /= contextBitEmpty 72 | {-# INLINE (~?) #-} 73 | 74 | (~!) :: ContextFilter -> ContextBit -> ContextFilter 75 | a ~! b = ContextFilter $ unFilter a .&. complement b 76 | {-# INLINE (~!) #-} 77 | 78 | newtype ContextFilter = ContextFilter {unFilter :: ContextBit} 79 | deriving stock (Show) 80 | deriving newtype (Eq, Bits) 81 | 82 | contextFilterAll :: ContextFilter 83 | contextFilterAll = ContextFilter (contextBitCode .|. contextBitComment .|. contextBitLiteral) 84 | {-# NOINLINE contextFilterAll #-} 85 | 86 | isContextFilterAll :: ContextFilter -> Bool 87 | isContextFilterAll f = f == contextFilterAll 88 | {-# INLINE isContextFilterAll #-} 89 | 90 | codeFilter :: ContextFilter -> Bool 91 | codeFilter f = (unFilter f .&. contextBitCode) /= contextBitEmpty 92 | {-# INLINE codeFilter #-} 93 | 94 | commentFilter :: ContextFilter -> Bool 95 | commentFilter f = (unFilter f .&. contextBitComment) /= contextBitEmpty 96 | {-# INLINE commentFilter #-} 97 | 98 | literalFilter :: ContextFilter -> Bool 99 | literalFilter f = (unFilter f .&. contextBitLiteral) /= contextBitEmpty 100 | {-# INLINE literalFilter #-} 101 | 102 | data ParConfig = ParConfig 103 | { commBound :: [Boundary] 104 | , litrBound :: [Boundary] 105 | , rawBound :: [Boundary] 106 | , chrBound :: [Boundary] 107 | , inits :: B.ShortByteString 108 | , alterBoundary :: Bool 109 | } 110 | 111 | mkParConfig :: [Boundary] -> [Boundary] -> [Boundary] -> [Boundary] -> Bool -> ParConfig 112 | mkParConfig cs ls rs chs ab = 113 | ParConfig 114 | { commBound = cs 115 | , litrBound = ls 116 | , rawBound = rs 117 | , chrBound = chs 118 | , inits = 119 | (B.pack . nub) 120 | ( (fromIntegral . ord . C.head . bBegin <$> cs) 121 | <> (fromIntegral . ord . C.head . bBegin <$> ls) 122 | <> (fromIntegral . ord . C.head . bBegin <$> rs) 123 | <> (fromIntegral . ord . C.head . bBegin <$> chs) 124 | ) 125 | , alterBoundary = ab 126 | } 127 | 128 | data ParState = ParState 129 | { ctxState :: !ContextState 130 | , nextState :: !ContextState 131 | , display :: !Bool 132 | , skip :: {-# UNPACK #-} !Int 133 | } 134 | deriving stock (Show) 135 | 136 | data ContextState 137 | = CodeState1 138 | | CodeStateN 139 | | CommState1 {-# UNPACK #-} !Int 140 | | CommStateN {-# UNPACK #-} !Int 141 | | ChrState {-# UNPACK #-} !Int 142 | | LitrState1 {-# UNPACK #-} !Int 143 | | LitrStateN {-# UNPACK #-} !Int 144 | | RawState {-# UNPACK #-} !Int 145 | deriving stock (Show, Eq, Ord) 146 | 147 | mkContextFilter :: Options -> ContextFilter 148 | mkContextFilter Options{..} = 149 | if not (code || comment || literal) 150 | then contextFilterAll 151 | else ContextFilter $ contextBitCode ~= code .|. contextBitComment ~= comment .|. contextBitLiteral ~= literal 152 | 153 | unpackBoundary :: Boundary -> (String, String) 154 | unpackBoundary (Boundary a b) = (C.unpack a, C.unpack b) 155 | {-# INLINE unpackBoundary #-} 156 | 157 | getContext :: ContextState -> Context 158 | getContext CodeState1 = Code 159 | getContext CodeStateN = Code 160 | getContext (CommState1 _) = Comment 161 | getContext (CommStateN _) = Comment 162 | getContext (LitrState1 _) = Literal 163 | getContext (LitrStateN _) = Literal 164 | getContext (RawState _) = Literal 165 | getContext (ChrState _) = Literal 166 | {-# INLINE getContext #-} 167 | 168 | -- contextFilterFun: 169 | -- 170 | 171 | data ParData = ParData 172 | { pdText :: {-# UNPACK #-} !Text8 173 | , pdState :: !ParState 174 | } 175 | 176 | runContextFilter :: ParConfig -> ContextFilter -> Text8 -> Text8 177 | runContextFilter conf@ParConfig{..} f txt 178 | | alterBoundary = fst $ C.unfoldrN (C.length txt) (contextFilter' conf) (ParData txt (ParState CodeState1 CodeState1 (codeFilter f) 0)) 179 | | otherwise = fst $ C.unfoldrN (C.length txt) (contextFilter'' conf) (ParData txt (ParState CodeState1 CodeState1 (codeFilter f) 0)) 180 | where 181 | contextFilter' :: ParConfig -> ParData -> Maybe (Char, ParData) 182 | contextFilter' c (ParData txt@(C.uncons -> Just (x, xs)) s) = 183 | let !s' = nextContextState c s txt f 184 | in if display s' 185 | then case (# getContext (ctxState s), getContext (ctxState s') #) of 186 | (# Code, Literal #) -> Just (chr 2, ParData xs s') 187 | (# Literal, Code #) -> Just (chr 3, ParData xs s') 188 | _ -> Just (x, ParData xs s') 189 | else 190 | if isSpace x 191 | then Just (x, ParData xs s') 192 | else Just (' ', ParData xs s') 193 | contextFilter' _ (ParData (C.uncons -> Nothing) _) = Nothing 194 | 195 | contextFilter'' :: ParConfig -> ParData -> Maybe (Char, ParData) 196 | contextFilter'' c (ParData txt@(C.uncons -> Just (x, xs)) s) = 197 | let !s' = nextContextState c s txt f 198 | in if display s' || isSpace x 199 | then Just (x, ParData xs s') 200 | else Just (' ', ParData xs s') 201 | 202 | {-# INLINE nextContextState #-} 203 | nextContextState :: ParConfig -> ParState -> Text8 -> ContextFilter -> ParState 204 | nextContextState c s@ParState{..} txt f 205 | | skip > 0 = {-# SCC skip #-} transState s{skip = skip - 1} 206 | | CodeState1 <- ctxState = 207 | {-# SCC next_code1 #-} 208 | if U.unsafeHead txt `B.elem` inits c 209 | then case findPrefixBoundary txt (commBound c) of 210 | (# i, Just b #) -> {-# SCC next_code1_1 #-} transState s{nextState = CommState1 i, display = commentFilter f, skip = C.length (bBegin b) - 1} 211 | _ -> case findPrefixBoundary txt (litrBound c) of 212 | (# i, Just b #) -> {-# SCC next_code1_2 #-} transState s{nextState = LitrState1 i, display = codeFilter f, skip = C.length (bBegin b) - 1} 213 | _ -> case findPrefixBoundary txt (rawBound c) of 214 | (# i, Just b #) -> {-# SCC next_code1_3 #-} transState s{nextState = RawState i, display = codeFilter f, skip = C.length (bBegin b) - 1} 215 | _ -> case findPrefixBoundary' txt (chrBound c) of 216 | (# i, Just b #) -> transState s{nextState = ChrState i, display = codeFilter f, skip = C.length (bBegin b) - 1} 217 | _ -> {-# SCC next_code1_5 #-} s{ctxState = CodeStateN, nextState = CodeStateN, display = codeFilter f, skip = 0} 218 | else {-# SCC next_code1_0 #-} s{ctxState = CodeStateN, nextState = CodeStateN, display = codeFilter f, skip = 0} 219 | | CodeStateN <- ctxState = 220 | {-# SCC next_code #-} 221 | if {-# SCC next_code_if #-} U.unsafeHead txt `B.elem` inits c 222 | then 223 | {-# SCC next_code_then #-} 224 | case findPrefixBoundary txt (commBound c) of 225 | (# i, Just b #) -> {-# SCC next_code1_1 #-} transState s{nextState = CommState1 i, display = commentFilter f, skip = C.length (bBegin b) - 1} 226 | _ -> case findPrefixBoundary txt (litrBound c) of 227 | (# i, Just b #) -> {-# SCC next_code1_2 #-} transState s{nextState = LitrState1 i, display = codeFilter f, skip = C.length (bBegin b) - 1} 228 | _ -> case findPrefixBoundary txt (rawBound c) of 229 | (# i, Just b #) -> {-# SCC next_code1_3 #-} transState s{nextState = RawState i, display = codeFilter f, skip = C.length (bBegin b) - 1} 230 | _ -> case findPrefixBoundary' txt (chrBound c) of 231 | (# i, Just b #) -> transState s{nextState = ChrState i, display = codeFilter f, skip = C.length (bBegin b) - 1} 232 | _ -> {-# SCC next_code_5 #-} s 233 | else {-# SCC next_code_else #-} s 234 | | CommState1 n <- ctxState = 235 | let Boundary _ e = commBound c !! n 236 | in {-# SCC next_comm1 #-} 237 | if e `C.isPrefixOf` txt 238 | then transState $ s{nextState = CodeState1, display = commentFilter f, skip = C.length e - 1} 239 | else s{ctxState = CommStateN n, nextState = CommStateN n, display = commentFilter f, skip = 0} 240 | | CommStateN n <- ctxState = 241 | let Boundary _ e = commBound c !! n 242 | in {-# SCC next_comm #-} 243 | if e `C.isPrefixOf` txt 244 | then transState $ s{nextState = CodeState1, display = commentFilter f, skip = C.length e - 1} 245 | else s 246 | | LitrState1 n <- ctxState = 247 | if C.head txt == '\\' 248 | then s{display = displayContext ctxState f, skip = 1} 249 | else 250 | let Boundary _ e = litrBound c !! n 251 | in {-# SCC next_liter #-} 252 | if e `C.isPrefixOf` txt 253 | then s{ctxState = CodeState1, nextState = CodeState1, display = codeFilter f, skip = C.length e - 1} 254 | else s{ctxState = LitrStateN n, nextState = LitrStateN n, display = literalFilter f, skip = 0} 255 | | LitrStateN n <- ctxState = 256 | if C.head txt == '\\' 257 | then s{display = displayContext ctxState f, skip = 1} 258 | else 259 | let Boundary _ e = litrBound c !! n 260 | in {-# SCC next_liter #-} 261 | if e `C.isPrefixOf` txt 262 | then s{ctxState = CodeState1, nextState = CodeState1, display = codeFilter f, skip = C.length e - 1} 263 | else s 264 | | ChrState n <- ctxState = 265 | if C.head txt == '\\' 266 | then s{display = displayContext ctxState f, skip = 1} 267 | else 268 | let Boundary _ e = chrBound c !! n 269 | in {-# SCC next_chr #-} 270 | if e `C.isPrefixOf` txt 271 | then s{ctxState = CodeState1, nextState = CodeState1, display = codeFilter f, skip = C.length e - 1} 272 | else s{display = literalFilter f, skip = 0} 273 | | RawState n <- ctxState = 274 | let Boundary _ e = rawBound c !! n 275 | in {-# SCC next_raw #-} 276 | if e `C.isPrefixOf` txt 277 | then s{ctxState = CodeState1, nextState = CodeState1, display = codeFilter f, skip = C.length e - 1} 278 | else s{display = literalFilter f, skip = 0} 279 | 280 | displayContext :: ContextState -> ContextFilter -> Bool 281 | displayContext CodeState1 cf = cf ~? contextBitCode 282 | displayContext CodeStateN cf = cf ~? contextBitCode 283 | displayContext (CommState1 _) cf = cf ~? contextBitComment 284 | displayContext (CommStateN _) cf = cf ~? contextBitComment 285 | displayContext (LitrState1 _) cf = cf ~? contextBitLiteral 286 | displayContext (LitrStateN _) cf = cf ~? contextBitLiteral 287 | displayContext (RawState _) cf = cf ~? contextBitLiteral 288 | displayContext (ChrState _) cf = cf ~? contextBitLiteral 289 | {-# INLINE displayContext #-} 290 | 291 | transState :: ParState -> ParState 292 | transState s@ParState{..} 293 | | skip == 0 = s{ctxState = nextState} 294 | | otherwise = s 295 | {-# INLINE transState #-} 296 | 297 | findPrefixBoundary :: Text8 -> [Boundary] -> (# Int, Maybe Boundary #) 298 | findPrefixBoundary xs vb = 299 | {-# SCC findPrefixBoundary #-} 300 | findWithIndex (\(Boundary b _) -> b `C.isPrefixOf` xs) vb 301 | {-# INLINE findPrefixBoundary #-} 302 | 303 | findPrefixBoundary' :: Text8 -> [Boundary] -> (# Int, Maybe Boundary #) 304 | findPrefixBoundary' txt bs = 305 | case findWithIndex (\(Boundary beg _) -> beg `C.isPrefixOf` txt) bs of 306 | elm@(# idx, Just b@(Boundary _ end) #) -> case C.tail txt of 307 | (C.uncons -> Just (y, ys)) -> 308 | let skip = if y == '\\' then 1 else 0 309 | in if end `C.isPrefixOf` C.drop skip ys then elm else (# 0, Nothing #) 310 | _ -> (# 0, Nothing #) 311 | _ -> (# 0, Nothing #) 312 | -------------------------------------------------------------------------------- /src/CGrep/Distance.hs: -------------------------------------------------------------------------------- 1 | -- 2 | -- Copyright (c) 2013-2023 Nicola Bonelli 3 | -- 4 | -- This program is free software; you can redistribute it and/or modify 5 | -- it under the terms of the GNU General Public License as published by 6 | -- the Free Software Foundation; either version 2 of the License, or 7 | -- (at your option) any later version. 8 | -- 9 | -- This program is distributed in the hope that it will be useful, 10 | -- but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | -- GNU General Public License for more details. 13 | -- 14 | -- You should have received a copy of the GNU General Public License 15 | -- along with this program; if not, write to the Free Software 16 | -- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 17 | -- 18 | 19 | module CGrep.Distance (distance, (~==)) where 20 | 21 | -- from http://www.haskell.org/haskellwiki/Edit_distance 22 | -- 23 | 24 | distance :: (Eq a) => [a] -> [a] -> Int 25 | distance a b = 26 | last 27 | ( if lab == 0 28 | then mainDiag 29 | else 30 | if lab > 0 31 | then lowers !! (lab - 1) 32 | else {- < 0 -} uppers !! (-1 - lab) 33 | ) 34 | where 35 | mainDiag = oneDiag a b (head uppers) (-1 : head lowers) 36 | uppers = eachDiag a b (mainDiag : uppers) -- upper diagonals 37 | lowers = eachDiag b a (mainDiag : lowers) -- lower diagonals 38 | eachDiag _a [] _diags = [] 39 | eachDiag a' (_bch : bs) (lastDiag : diags) = oneDiag a' bs nextDiag lastDiag : eachDiag a' bs diags 40 | where 41 | nextDiag = head (tail diags) 42 | eachDiag _ _ [] = undefined -- the original implementation does not cover this case... 43 | oneDiag a' b' diagAbove diagBelow = thisdiag 44 | where 45 | doDiag [] _b _nw _n _w = [] 46 | doDiag _a [] _nw _n _w = [] 47 | doDiag (ach : as) (bch : bs) nw n w = me : doDiag as bs me (tail n) (tail w) 48 | where 49 | me = if ach == bch then nw else 1 + min3 (head w) nw (head n) 50 | firstelt = 1 + head diagBelow 51 | thisdiag = firstelt : doDiag a' b' firstelt diagAbove (tail diagBelow) 52 | lab = length a - length b 53 | min3 x y z = if x < y then x else min y z 54 | 55 | (~==) :: String -> String -> Bool 56 | a ~== b 57 | | len < 5 = dist < 3 58 | | otherwise = dist < (len * 40 `div` 100) 59 | where 60 | len = fromIntegral (length a `min` length b) 61 | dist = distance a b 62 | {-# INLINE (~==) #-} 63 | -------------------------------------------------------------------------------- /src/CGrep/FileKind.hs: -------------------------------------------------------------------------------- 1 | -- 2 | -- Copyright (c) 2013-2023 Nicola Bonelli 3 | -- 4 | -- This program is free software; you can redistribute it and/or modify 5 | -- it under the terms of the GNU General Public License as published by 6 | -- the Free Software Foundation; either version 2 of the License, or 7 | -- (at your option) any later version. 8 | -- 9 | -- This program is distributed in the hope that it will be useful, 10 | -- but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | -- GNU General Public License for more details. 13 | -- 14 | -- You should have received a copy of the GNU General Public License 15 | -- along with this program; if not, write to the Free Software 16 | -- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 17 | -- 18 | 19 | module CGrep.FileKind ( 20 | FileKind (..), 21 | ) where 22 | 23 | data FileKind = KindText | KindConfig | KindLanguage | KindData | KindMarkup | KindScript 24 | deriving stock (Eq, Ord, Enum, Bounded) 25 | 26 | instance Show FileKind where 27 | show KindText = "Text" 28 | show KindConfig = "Config" 29 | show KindLanguage = "Language" 30 | show KindData = "Data" 31 | show KindMarkup = "Markup" 32 | show KindScript = "Script" 33 | 34 | instance Read FileKind where 35 | readsPrec _ "Text" = [(KindText, "")] 36 | readsPrec _ "Config" = [(KindConfig, "")] 37 | readsPrec _ "Language" = [(KindLanguage, "")] 38 | readsPrec _ "Data" = [(KindData, "")] 39 | readsPrec _ "Markup" = [(KindMarkup, "")] 40 | readsPrec _ "Script" = [(KindScript, "")] 41 | readsPrec _ _ = [] 42 | -------------------------------------------------------------------------------- /src/CGrep/FileType.hs: -------------------------------------------------------------------------------- 1 | --- 2 | -- Copyright (c) 2013-2023 Nicola Bonelli 3 | -- 4 | -- This program is free software; you can redistribute it and/or modify 5 | -- it under the terms of the GNU General Public License as published by 6 | -- the Free Software Foundation; either version 2 of the License, or 7 | -- (at your option) any later version. 8 | -- 9 | -- This program is distributed in the hope that it will be useful, 10 | -- but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | -- GNU General Public License for more details. 13 | -- 14 | -- You should have received a copy of the GNU General Public License 15 | -- along with this program; if not, write to the Free Software 16 | -- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 17 | -- 18 | 19 | module CGrep.FileType ( 20 | FileType (..), 21 | FileSelector (..), 22 | readTypeList, 23 | readKindList, 24 | ) 25 | where 26 | 27 | import Control.Applicative (Alternative ((<|>))) 28 | import Control.Monad (forM_) 29 | import qualified Data.Map as Map 30 | import Data.Maybe (fromJust) 31 | 32 | import CGrep.FileKind (FileKind) 33 | import qualified Data.ByteString.Char8 as C 34 | import Options (Options (Options, type_force)) 35 | import System.Posix.FilePath (RawFilePath) 36 | import Util (prettyRead) 37 | 38 | data FileType 39 | = Agda 40 | | Assembly 41 | | Awk 42 | | Bash 43 | | C 44 | | CMake 45 | | Cabal 46 | | Chapel 47 | | Clojure 48 | | Coffee 49 | | Conf 50 | | Cpp 51 | | Csh 52 | | Csharp 53 | | Css 54 | | Cql 55 | | D 56 | | Dart 57 | | Dhall 58 | | Elm 59 | | Elixir 60 | | Erlang 61 | | Fish 62 | | Fortran 63 | | Fsharp 64 | | Go 65 | | GoMod 66 | | Haskell 67 | | Html 68 | | Idris 69 | | Java 70 | | Javascript 71 | | Json 72 | | Julia 73 | | Kotlin 74 | | Ksh 75 | | Latex 76 | | Lisp 77 | | Lua 78 | | Make 79 | | Nim 80 | | Nmap 81 | | OCaml 82 | | ObjectiveC 83 | | PHP 84 | | Perl 85 | | Python 86 | | R 87 | | Ruby 88 | | Rust 89 | | Scala 90 | | SmallTalk 91 | | Swift 92 | | Sql 93 | | Tcl 94 | | Text 95 | | Unison 96 | | VHDL 97 | | Verilog 98 | | Yaml 99 | | Toml 100 | | Ini 101 | | Zig 102 | | Zsh 103 | deriving stock (Read, Show, Eq, Ord, Bounded) 104 | 105 | data FileSelector = Name RawFilePath | Ext C.ByteString | Hdr C.ByteString 106 | deriving stock (Eq, Ord) 107 | 108 | instance Show FileSelector where 109 | show (Name x) = C.unpack x 110 | show (Ext e) = "*." <> C.unpack e 111 | show (Hdr e) = "*." <> C.unpack e 112 | 113 | -- utility functions 114 | 115 | readTypeList :: [String] -> ([FileType], [FileType], [FileType]) 116 | readTypeList = foldl run ([], [], []) 117 | where 118 | run :: ([FileType], [FileType], [FileType]) -> String -> ([FileType], [FileType], [FileType]) 119 | run (l1, l2, l3) l 120 | | '+' : xs <- l = (l1, prettyRead xs "Type" : l2, l3) 121 | | '-' : xs <- l = (l1, l2, prettyRead xs "Type" : l3) 122 | | otherwise = (prettyRead l "Type" : l1, l2, l3) 123 | 124 | readKindList :: [String] -> [FileKind] 125 | readKindList = map (`prettyRead` "Kind") -------------------------------------------------------------------------------- /src/CGrep/Output.hs: -------------------------------------------------------------------------------- 1 | -- 2 | -- Copyright (c) 2013-2023 Nicola Bonelli 3 | -- 4 | -- This program is free software; you can redistribute it and/or modify 5 | -- it under the terms of the GNU General Public License as published by 6 | -- the Free Software Foundation; either version 2 of the License, or 7 | -- (at your option) any later version. 8 | -- 9 | -- This program is distributed in the hope that it will be useful, 10 | -- but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | -- GNU General Public License for more details. 13 | -- 14 | -- You should have received a copy of the GNU General Public License 15 | -- along with this program; if not, write to the Free Software 16 | -- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 17 | -- 18 | {-# LANGUAGE ExistentialQuantification #-} 19 | 20 | module CGrep.Output ( 21 | Output (..), 22 | mkOutputElements, 23 | putOutputElements, 24 | runSearch, 25 | showFileName, 26 | showBold, 27 | ) where 28 | 29 | import qualified Data.ByteString as B 30 | import qualified Data.ByteString.Builder as B 31 | 32 | import qualified Data.ByteString.Char8 as C 33 | import qualified Data.ByteString.Lazy.Char8 as LC 34 | import qualified Data.ByteString.Unsafe as BU 35 | 36 | import qualified Data.Vector.Unboxed as UV 37 | 38 | import Data.Vector.Unboxed ((!)) 39 | 40 | import System.Console.ANSI ( 41 | ConsoleIntensity (BoldIntensity), 42 | SGR (SetConsoleIntensity), 43 | setSGRCode, 44 | ) 45 | 46 | import Control.Monad.IO.Class (MonadIO (liftIO)) 47 | import Control.Monad.Trans.Reader (ask, reader) 48 | 49 | import Data.Function (on) 50 | import Data.List ( 51 | foldl', 52 | genericLength, 53 | groupBy, 54 | intersperse, 55 | isPrefixOf, 56 | nub, 57 | sort, 58 | sortBy, 59 | ) 60 | 61 | import CGrep.Parser.Chunk (Chunk (..), MatchLine (..)) 62 | import CGrep.Types (Offset, Text8) 63 | 64 | import Config (Config (configColorFile, configColorMatch)) 65 | import Data.ByteString.Internal (c2w) 66 | import Data.Int (Int64) 67 | import qualified Data.Vector.Fusion.Util as VU (Box (..)) 68 | import Data.Word (Word8) 69 | import Reader (Env (..), ReaderIO) 70 | 71 | import System.Posix.FilePath (RawFilePath) 72 | 73 | import CGrep.Parser.Line (getLineOffsets) 74 | import qualified Data.Vector.Generic as GV 75 | import Options ( 76 | Options ( 77 | Options, 78 | color, 79 | count, 80 | filename_only, 81 | invert_match, 82 | json, 83 | no_color, 84 | no_column, 85 | no_filename, 86 | no_numbers, 87 | no_shallow, 88 | show_match 89 | ), 90 | ) 91 | 92 | data Output = Output 93 | { outFilePath :: RawFilePath 94 | , outLineNumb :: {-# UNPACK #-} !Int64 95 | , outLine :: {-# UNPACK #-} !Text8 96 | , outChunks :: ![Chunk] 97 | } 98 | 99 | outTokens :: Output -> [Text8] 100 | outTokens (Output fp ln l cs) = cToken <$> cs 101 | {-# INLINE outTokens #-} 102 | 103 | insertIndex :: UV.Vector Offset -> Offset -> Int 104 | insertIndex vs x = search vs 0 (UV.length vs) 105 | where 106 | search xs !lo !hi 107 | | lo == hi = lo 108 | | otherwise = 109 | let !mid = (lo + hi) `quot` 2 110 | in if x < VU.unBox (xs `GV.basicUnsafeIndexM` mid) 111 | then search xs lo mid 112 | else search xs (mid + 1) hi 113 | 114 | getLineNumberAndOffset :: UV.Vector Offset -> Offset -> (# Int, Offset #) 115 | getLineNumberAndOffset xs x = 116 | let idx = insertIndex xs x 117 | in (# idx, x - xs `UV.unsafeIndex` (idx - 1) #) 118 | {-# INLINE getLineNumberAndOffset #-} 119 | 120 | mkOutputElements :: UV.Vector Int64 -> RawFilePath -> Text8 -> Text8 -> [Chunk] -> ReaderIO [Output] 121 | mkOutputElements lineOffsets f text multi ts = do 122 | invert <- invert_match <$> reader opt 123 | return $ 124 | if invert 125 | then map (\(MatchLine n xs) -> Output f n (ls !! fromIntegral (n - 1)) xs) . invertLines (length ls) $ mkMatchLines lineOffsets multi ts 126 | else map (\(MatchLine n xs) -> Output f n (ls !! fromIntegral (n - 1)) xs) $ mkMatchLines lineOffsets multi ts 127 | where 128 | ls = C.lines text 129 | {-# INLINE mkOutputElements #-} 130 | 131 | mkMatchLines :: UV.Vector Int64 -> Text8 -> [Chunk] -> [MatchLine] 132 | mkMatchLines lineOffsets _ [] = [] 133 | mkMatchLines lineOffsets text ts = 134 | map mergeGroup $ 135 | groupBy ((==) `on` lOffset) . sortBy (compare `on` lOffset) $ 136 | (\chunk -> let (# r, c #) = getLineNumberAndOffset lineOffsets (cOffset chunk) in MatchLine (fromIntegral r) [Chunk (cTyp chunk) (cToken chunk) c]) <$> ts 137 | where 138 | mergeGroup :: [MatchLine] -> MatchLine 139 | mergeGroup ls = MatchLine ((lOffset . head) ls) (foldl' (\l m -> l <> lChunks m) [] ls) 140 | 141 | invertLines :: Int -> [MatchLine] -> [MatchLine] 142 | invertLines n xs = filter (\(MatchLine i _) -> i `notElem` idx) $ take n [MatchLine i [] | i <- [1 ..]] 143 | where 144 | idx = lOffset <$> xs 145 | {-# INLINE invertLines #-} 146 | 147 | putOutputElements :: [Output] -> ReaderIO (Maybe B.Builder) 148 | putOutputElements [] = pure Nothing 149 | putOutputElements out = do 150 | Env{..} <- ask 151 | if 152 | | json opt -> Just <$> jsonOutput out 153 | | filename_only opt -> Just <$> filenameOutput out 154 | | otherwise -> Just <$> defaultOutput out 155 | 156 | runSearch :: 157 | Options -> 158 | RawFilePath -> 159 | Bool -> 160 | ReaderIO [Output] -> 161 | ReaderIO [Output] 162 | runSearch opt filename eligible doSearch = 163 | if eligible || no_shallow opt 164 | then doSearch 165 | else mkOutputElements UV.empty filename C.empty C.empty ([] :: [Chunk]) 166 | 167 | defaultOutput :: [Output] -> ReaderIO B.Builder 168 | defaultOutput xs = do 169 | Env{..} <- ask 170 | if 171 | | Options{no_filename = False, no_numbers = False, count = False} <- opt -> 172 | pure $ mconcat . intersperse (B.char8 '\n') $ map (\out -> buildFileName conf opt out <> B.char8 ':' <> buildLineCol opt out <> B.char8 ':' <> buildTokens opt out <> buildLine conf opt out) xs 173 | | Options{no_filename = False, no_numbers = True, count = False} <- opt -> 174 | pure $ mconcat . intersperse (B.char8 '\n') $ map (\out -> buildFileName conf opt out <> B.char8 ':' <> buildTokens opt out <> buildLine conf opt out) xs 175 | | Options{no_filename = True, no_numbers = False, count = False} <- opt -> 176 | pure $ mconcat . intersperse (B.char8 '\n') $ map (\out -> buildTokens opt out <> buildLine conf opt out) xs 177 | | Options{no_filename = True, no_numbers = True, count = False} <- opt -> 178 | pure $ mconcat . intersperse (B.char8 '\n') $ map (\out -> buildTokens opt out <> buildLine conf opt out) xs 179 | | Options{no_filename = False, count = True} <- opt -> 180 | do 181 | let gs = groupBy (\(Output f1 _ _ _) (Output f2 _ _ _) -> f1 == f2) xs 182 | pure $ mconcat . intersperse (B.char8 '\n') $ (\ys@(y : _) -> buildFileName conf opt y <> B.char8 ':' <> B.intDec (length ys)) <$> gs 183 | | Options{count = True} <- opt -> 184 | do 185 | let gs = groupBy (\(Output f1 _ _ _) (Output f2 _ _ _) -> f1 == f2) xs 186 | pure $ mconcat . intersperse (B.char8 '\n') $ (\ys@(y : _) -> B.intDec (length ys)) <$> gs 187 | 188 | jsonOutput :: [Output] -> ReaderIO B.Builder 189 | jsonOutput [] = pure mempty 190 | jsonOutput outs = 191 | pure $ 192 | mconcat . intersperse (B.char8 '\n') $ 193 | [B.byteString "{ \"file\":\"" <> B.byteString fname <> B.byteString "\", \"matches\":["] 194 | <> [mconcat $ intersperse (B.char8 ',') (foldl mkMatch [] outs)] 195 | <> [B.byteString "]}"] 196 | where 197 | fname | (Output f _ _ _) <- head outs = f 198 | mkJToken chunk = B.byteString "{ \"col\":" <> B.int64Dec (cOffset chunk) <> B.byteString ", \"token\":\"" <> B.byteString (cToken chunk) <> B.byteString "\" }" 199 | mkMatch xs (Output _ n _ ts) = 200 | xs 201 | <> [ B.byteString "{ \"row\": " 202 | <> B.int64Dec n 203 | <> B.byteString ", \"tokens\":[" 204 | <> mconcat (intersperse (B.byteString ",") (map mkJToken ts)) 205 | <> B.byteString "] }" 206 | ] 207 | 208 | filenameOutput :: [Output] -> ReaderIO B.Builder 209 | filenameOutput outs = return $ mconcat . intersperse (B.char8 '\n') $ B.byteString <$> nub ((\(Output fname _ _ _) -> fname) <$> outs) 210 | {-# INLINE filenameOutput #-} 211 | 212 | bold, reset :: C.ByteString 213 | bold = C.pack $ setSGRCode [SetConsoleIntensity BoldIntensity] 214 | reset = C.pack $ setSGRCode [] 215 | {-# NOINLINE bold #-} 216 | {-# NOINLINE reset #-} 217 | 218 | boldBuilder, resetBuilder :: B.Builder 219 | boldBuilder = B.byteString bold 220 | resetBuilder = B.byteString reset 221 | {-# NOINLINE boldBuilder #-} 222 | {-# NOINLINE resetBuilder #-} 223 | 224 | type ColorString = C.ByteString 225 | 226 | buildFileName :: Config -> Options -> Output -> B.Builder 227 | buildFileName conf opt = buildFileName' conf opt . outFilePath 228 | where 229 | buildFileName' :: Config -> Options -> B.ByteString -> B.Builder 230 | buildFileName' conf opt = buildColoredAs opt $ C.pack (setSGRCode (configColorFile conf)) 231 | {-# INLINE buildFileName #-} 232 | 233 | buildColoredAs :: Options -> ColorString -> B.ByteString -> B.Builder 234 | buildColoredAs Options{color = c, no_color = c'} colorCode str 235 | | c && not c' = B.byteString colorCode <> B.byteString str <> resetBuilder 236 | | otherwise = B.byteString str 237 | {-# INLINE buildColoredAs #-} 238 | 239 | buildLineCol :: Options -> Output -> B.Builder 240 | buildLineCol Options{no_numbers = True} _ = mempty 241 | buildLineCol Options{no_numbers = False, no_column = True} (Output _ n _ _) = B.int64Dec n 242 | buildLineCol Options{no_numbers = False, no_column = False} (Output _ n _ []) = B.int64Dec n 243 | buildLineCol Options{no_numbers = False, no_column = False} (Output _ n _ ts) = B.int64Dec n <> B.char8 ':' <> B.int64Dec ((+ 1) . cOffset . head $ ts) 244 | {-# INLINE buildLineCol #-} 245 | 246 | buildTokens :: Options -> Output -> B.Builder 247 | buildTokens Options{show_match = st} out 248 | | st = boldBuilder <> mconcat (B.byteString <$> outTokens out) <> resetBuilder <> B.char8 ':' 249 | | otherwise = mempty 250 | 251 | buildLine :: Config -> Options -> Output -> B.Builder 252 | buildLine conf Options{color = c, no_color = c'} out 253 | | c && not c' = highlightLine conf (sortBy (flip compare `on` (C.length . cToken)) (outChunks out)) (outLine out) 254 | | otherwise = B.byteString $ outLine out 255 | {-# INLINE buildLine #-} 256 | 257 | showFileName :: Config -> Options -> RawFilePath -> RawFilePath 258 | showFileName conf opt = showColoredAs opt $ C.pack (setSGRCode (configColorFile conf)) 259 | {-# INLINE showFileName #-} 260 | 261 | showBold :: Options -> C.ByteString -> C.ByteString 262 | showBold opt = showColoredAs opt bold 263 | {-# INLINE showBold #-} 264 | 265 | showColoredAs :: Options -> C.ByteString -> C.ByteString -> C.ByteString 266 | showColoredAs Options{color = c, no_color = c'} colorCode str 267 | | c && not c' = colorCode <> str <> reset 268 | | otherwise = str 269 | {-# INLINE showColoredAs #-} 270 | 271 | highlightLine :: Config -> [Chunk] -> Text8 -> B.Builder 272 | highlightLine conf ts = highlightLine' (highlightIndexes ts, 0, 0) 273 | where 274 | highlightLine' :: ([(Int64, Int64)], Int64, Int) -> C.ByteString -> B.Builder 275 | highlightLine' _ (C.uncons -> Nothing) = mempty 276 | highlightLine' (ns, !n, !bs) s@(C.uncons -> Just (x, _)) = 277 | ( if 278 | | check && bs' == 0 -> if fst stack > 0 then B.string8 colorMatch <> B.char8 x <> resetBuilder else B.char8 x <> resetBuilder 279 | | check && bs' > 0 -> B.string8 colorMatch <> B.char8 x 280 | | otherwise -> B.byteString next 281 | ) 282 | <> highlightLine' (ns, n + nn, bs') rest 283 | where 284 | stack = foldr (\(a, b) (c, d) -> (c + fromEnum (a == n), d + fromEnum (b == n))) (0, 0) ns 285 | check = fst stack > 0 || snd stack > 0 286 | colorMatch = setSGRCode (configColorMatch conf) 287 | bs' = bs + fst stack - snd stack 288 | plain = nub . sort $ foldr (\(a, b) acc -> a : b : acc) [] ns 289 | nn 290 | | check = 1 291 | | null plain' = fromIntegral (C.length s) 292 | | otherwise = head plain' - n 293 | where 294 | plain' = dropWhile (<= n) plain 295 | (next, rest) = C.splitAt (fromIntegral nn) s 296 | highlightLine' _ _ = undefined 297 | 298 | highlightIndexes :: [Chunk] -> [(Int64, Int64)] 299 | highlightIndexes = foldr (\chunk a -> let b = cOffset chunk in (fromIntegral b, b + fromIntegral (C.length (cToken chunk)) - 1) : a) [] . filter (not . B.null . cToken) 300 | {-# INLINE highlightIndexes #-} 301 | -------------------------------------------------------------------------------- /src/CGrep/Parser/Atom.hs: -------------------------------------------------------------------------------- 1 | -- 2 | -- Copyright (c) 2013-2023 Nicola Bonelli 3 | -- 4 | -- This program is free software; you can redistribute it and/or modify 5 | -- it under the terms of the GNU General Public License as published by 6 | -- the Free Software Foundation; either version 2 of the License, or 7 | -- (at your option) any later version. 8 | -- 9 | -- This program is distributed in the hope that it will be useful, 10 | -- but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | -- GNU General Public License for more details. 13 | -- 14 | -- You should have received a copy of the GNU General Public License 15 | -- along with this program; if not, write to the Free Software 16 | -- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 17 | -- 18 | 19 | module CGrep.Parser.Atom ( 20 | Atom (..), 21 | Atoms, 22 | mkAtomFromToken, 23 | combineAtoms, 24 | filterTokensWithAtoms, 25 | wildCardMap, 26 | wildCardMatch, 27 | wildCardsMatch, 28 | ) where 29 | 30 | import qualified Data.Map as M 31 | 32 | import CGrep.Common (trim, trim8) 33 | import CGrep.Distance ((~==)) 34 | import CGrep.Parser.Char (isDigit) 35 | 36 | import Data.List ( 37 | findIndices, 38 | isInfixOf, 39 | isPrefixOf, 40 | isSuffixOf, 41 | subsequences, 42 | ) 43 | import Options ( 44 | Options (edit_dist, prefix_match, suffix_match, word_match), 45 | ) 46 | import Util (rmQuote8, spanGroup) 47 | 48 | import qualified CGrep.Parser.Chunk as T 49 | import qualified CGrep.Parser.Token as T 50 | import qualified Data.ByteString.Char8 as C 51 | import GHC.Stack (errorWithStackTrace) 52 | 53 | data Atom 54 | = Any 55 | | Keyword 56 | | Number 57 | | Oct 58 | | Hex 59 | | String 60 | | Literal 61 | | Identifier C.ByteString 62 | | Raw T.Token 63 | deriving stock (Eq, Ord, Show) 64 | 65 | type Atoms = [Atom] 66 | 67 | wildCardMap :: M.Map C.ByteString Atom 68 | wildCardMap = 69 | M.fromList 70 | [ ("ANY", Any) 71 | , ("KEY", Keyword) 72 | , ("OCT", Oct) 73 | , ("HEX", Hex) 74 | , ("NUM", Number) 75 | , ("STR", String) 76 | , ("LIT", String) 77 | ] 78 | 79 | mkAtomFromToken :: T.Token -> Atom 80 | mkAtomFromToken t 81 | | T.isTokenIdentifier t = case () of 82 | _ 83 | | Just wc <- M.lookup str wildCardMap -> wc 84 | | isAtomIdentifier str -> Identifier str 85 | | otherwise -> Raw $ T.mkTokenIdentifier (rmAtomEscape str) (T.tOffset t) 86 | where 87 | str = T.tToken t 88 | | otherwise = Raw t 89 | 90 | combineAtoms :: [Atoms] -> [Atoms] 91 | combineAtoms (m1 : r@(m2 : m3 : ms)) 92 | | [Raw b] <- m2, T.tToken b == "OR" = combineAtoms $ (m1 <> m3) : ms 93 | | otherwise = m1 : combineAtoms r 94 | combineAtoms [m1, m2] = [m1, m2] 95 | combineAtoms [m1] = [m1] 96 | combineAtoms [] = [] 97 | 98 | {-# INLINE filterTokensWithAtoms #-} 99 | filterTokensWithAtoms :: Options -> [Atoms] -> [T.Token] -> [T.Token] 100 | filterTokensWithAtoms opt ws ts = go opt (spanOptionalCards ws) ts 101 | where 102 | go :: Options -> [[Atoms]] -> [T.Token] -> [T.Token] 103 | go _ [] _ = [] 104 | go opt (g : gs) ts = 105 | {-# SCC "atom_find_total" #-} concatMap (take grpLen . (`drop` ts)) ({-# SCC "atom_find_indices" #-} findIndices (wildCardsCompare opt g) grp) <> {-# SCC atom_find_req #-} go opt gs ts 106 | where 107 | grp = {-# SCC "atomSpanGroup" #-} spanGroup grpLen ts 108 | grpLen = length g 109 | 110 | spanOptionalCards :: [Atoms] -> [[Atoms]] 111 | spanOptionalCards wc = map (`filterCardIndices` wc') idx 112 | where 113 | wc' = zip [0 ..] wc 114 | idx = 115 | subsequences $ 116 | findIndices 117 | ( \case 118 | [Identifier (C.uncons -> Just ('$', _))] -> True 119 | _ -> False 120 | ) 121 | wc 122 | 123 | filterCardIndices :: [Int] -> [(Int, Atoms)] -> [Atoms] 124 | filterCardIndices ns ps = map snd $ filter (\(n, _) -> n `notElem` ns) ps 125 | {-# INLINE filterCardIndices #-} 126 | 127 | wildCardsCompare :: Options -> [Atoms] -> [T.Token] -> Bool 128 | wildCardsCompare opt l r = 129 | wildCardsCompareAll ts && wildCardsCheckOccurrences ts 130 | where 131 | ts = wildCardsGroupCompare opt l r 132 | {-# INLINE wildCardsCompare #-} 133 | 134 | isAtomIdentifier :: C.ByteString -> Bool 135 | isAtomIdentifier s = 136 | if 137 | | Just (x, C.uncons -> Just (y, xs)) <- C.uncons s -> wprefix x && isDigit y 138 | | Just (x, "") <- C.uncons s -> wprefix x 139 | | otherwise -> errorWithoutStackTrace "CGrep: isAtomIdentifier" 140 | where 141 | wprefix x = x == '$' || x == '_' 142 | 143 | rmAtomEscape :: C.ByteString -> C.ByteString 144 | rmAtomEscape (C.uncons -> Just ('$', xs)) = xs 145 | rmAtomEscape (C.uncons -> Just ('_', xs)) = xs 146 | rmAtomEscape xs = xs 147 | {-# INLINE rmAtomEscape #-} 148 | 149 | wildCardsCompareAll :: [(Bool, (Atoms, [C.ByteString]))] -> Bool 150 | wildCardsCompareAll = all fst 151 | {-# INLINE wildCardsCompareAll #-} 152 | {-# SCC wildCardsCompareAll #-} 153 | 154 | -- Note: pattern $ and _ match any token, whereas $1 $2 (_1 _2 etc.) match tokens 155 | -- that must compare equal in the respective occurrences 156 | 157 | wildCardsCheckOccurrences :: [(Bool, (Atoms, [C.ByteString]))] -> Bool 158 | wildCardsCheckOccurrences ts = M.foldr (\xs r -> r && all (== head xs) xs) True m 159 | where 160 | m = 161 | M.mapWithKey 162 | ( \k xs -> 163 | case k of 164 | [Identifier "_0"] -> xs 165 | [Identifier "_1"] -> xs 166 | [Identifier "_2"] -> xs 167 | [Identifier "_3"] -> xs 168 | [Identifier "_4"] -> xs 169 | [Identifier "_5"] -> xs 170 | [Identifier "_6"] -> xs 171 | [Identifier "_7"] -> xs 172 | [Identifier "_8"] -> xs 173 | [Identifier "_9"] -> xs 174 | [Identifier "$0"] -> xs 175 | [Identifier "$1"] -> xs 176 | [Identifier "$2"] -> xs 177 | [Identifier "$3"] -> xs 178 | [Identifier "$4"] -> xs 179 | [Identifier "$5"] -> xs 180 | [Identifier "$6"] -> xs 181 | [Identifier "$7"] -> xs 182 | [Identifier "$8"] -> xs 183 | [Identifier "$9"] -> xs 184 | _ -> [] 185 | ) 186 | $ M.fromListWith (<>) (map snd ts) 187 | {-# INLINE wildCardsCheckOccurrences #-} 188 | {-# SCC wildCardsCheckOccurrences #-} 189 | 190 | wildCardsGroupCompare :: Options -> [Atoms] -> [T.Token] -> [(Bool, (Atoms, [C.ByteString]))] 191 | wildCardsGroupCompare opt ls rs 192 | | length rs >= length ls = zipWith (tokensZip opt) ls rs 193 | | otherwise = [(False, ([Any], []))] 194 | {-# INLINE wildCardsGroupCompare #-} 195 | {-# SCC wildCardsGroupCompare #-} 196 | 197 | tokensZip :: Options -> Atoms -> T.Token -> (Bool, (Atoms, [C.ByteString])) 198 | tokensZip opt l r 199 | | wildCardsMatch opt l r = (True, (l, [T.tToken r])) 200 | | otherwise = (False, ([Any], [])) 201 | {-# INLINE tokensZip #-} 202 | {-# SCC tokensZip #-} 203 | 204 | wildCardsMatch :: Options -> Atoms -> T.Token -> Bool 205 | wildCardsMatch opt m t = any (\w -> wildCardMatch opt w t) m 206 | {-# INLINE wildCardsMatch #-} 207 | {-# SCC wildCardsMatch #-} 208 | 209 | {-# SCC wildCardMatch #-} 210 | wildCardMatch :: Options -> Atom -> T.Token -> Bool 211 | wildCardMatch opt (Raw l) r 212 | | T.isTokenIdentifier l && T.isTokenIdentifier r = 213 | {-# SCC wildcard_raw_0 #-} 214 | if 215 | | word_match opt -> T.tToken l == T.tToken r 216 | | prefix_match opt -> T.tToken l `C.isPrefixOf` T.tToken r 217 | | suffix_match opt -> T.tToken l `C.isSuffixOf` T.tToken r 218 | | edit_dist opt -> (C.unpack . T.tToken) l ~== C.unpack (T.tToken r) 219 | | otherwise -> T.tToken l `C.isInfixOf` T.tToken r 220 | | T.isTokenString l && T.isTokenString r = 221 | {-# SCC wildcard_raw_1 #-} 222 | if 223 | | word_match opt -> ls == rs 224 | | prefix_match opt -> ls `C.isPrefixOf` rs 225 | | suffix_match opt -> ls `C.isSuffixOf` rs 226 | | edit_dist opt -> C.unpack ls ~== C.unpack rs 227 | | otherwise -> ls `C.isInfixOf` rs 228 | | otherwise = {-# SCC wildcard_raw_2 #-} l `T.eqToken` r 229 | where 230 | ls = rmQuote8 $ trim8 (T.tToken l) 231 | rs = rmQuote8 $ trim8 (T.tToken r) 232 | wildCardMatch _ Any _ = {-# SCC wildcard_any #-} True 233 | wildCardMatch _ (Identifier _) t = {-# SCC wildcard_identifier #-} T.isTokenIdentifier t 234 | wildCardMatch _ Keyword t = {-# SCC wildcard_keyword #-} T.isTokenKeyword t 235 | wildCardMatch _ String t = {-# SCC wildcard_string #-} T.isTokenString t 236 | wildCardMatch _ Literal t = {-# SCC wildcard_lit #-} T.isTokenString t 237 | wildCardMatch _ Number t = {-# SCC wildcard_number #-} T.isTokenNumber t 238 | wildCardMatch _ Oct t = {-# SCC wildcard_octal #-} T.isTokenNumber t && case C.uncons (T.tToken t) of Just ('0', C.uncons -> Just (d, _)) -> isDigit d; _ -> False 239 | wildCardMatch _ Hex t = {-# SCC wildcard_hex #-} T.isTokenNumber t && case C.uncons (T.tToken t) of Just ('0', C.uncons -> Just ('x', _)) -> True; _ -> False 240 | -------------------------------------------------------------------------------- /src/CGrep/Parser/Char.hs: -------------------------------------------------------------------------------- 1 | -- 2 | -- Copyright (c) 2013-2023 Nicola Bonelli 3 | -- 4 | -- This program is free software; you can redistribute it and/or modify 5 | -- it under the terms of the GNU General Public License as published by 6 | -- the Free Software Foundation; either version 2 of the License, or 7 | -- (at your option) any later version. 8 | -- 9 | -- This program is distributed in the hope that it will be useful, 10 | -- but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | -- GNU General Public License for more details. 13 | -- 14 | -- You should have received a copy of the GNU General Public License 15 | -- along with this program; if not, write to the Free Software 16 | -- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 17 | -- 18 | 19 | module CGrep.Parser.Char ( 20 | chr, 21 | ord, 22 | isDigit, 23 | isSpace, 24 | isHexDigit, 25 | isCharNumber, 26 | isAlphaNum, 27 | isAlpha, 28 | isAlphaNum_, 29 | isAlpha_, 30 | isAlpha_', 31 | isAlphaNum_', 32 | isBracket', 33 | isPunctuation, 34 | isAlpha_and, 35 | isAlphaNum_and, 36 | ) where 37 | 38 | import GHC.Base (chr#, int2Word#, isTrue#, leWord#) 39 | import GHC.Exts (Char (C#), Int (I#), ord#) 40 | 41 | ord :: Char -> Int 42 | ord (C# c#) = I# (ord# c#) 43 | {-# INLINE ord #-} 44 | 45 | chr :: Int -> Char 46 | chr i@(I# i#) 47 | | isTrue# (int2Word# i# `leWord#` 0x10FFFF##) = C# (chr# i#) 48 | | otherwise = 49 | errorWithoutStackTrace ("CGrep: chr bad argument: " <> show i) 50 | {-# INLINE chr #-} 51 | 52 | isDigit :: Char -> Bool 53 | isDigit c = (fromIntegral (ord c - ord '0') :: Word) <= 9 54 | {-# INLINE isDigit #-} 55 | 56 | isSpace :: Char -> Bool 57 | isSpace c = uc == 32 || uc == 0xa0 || (uc - 0x9 <= 4) && not ctrl 58 | where 59 | uc = ord c 60 | ctrl = uc == 2 || uc == 3 61 | {-# INLINE isSpace #-} 62 | 63 | isHexDigit :: Char -> Bool 64 | isHexDigit c = 65 | isDigit c 66 | || (fromIntegral (ord c - ord 'A') :: Word) <= 5 67 | || (fromIntegral (ord c - ord 'a') :: Word) <= 5 68 | {-# INLINE isHexDigit #-} 69 | 70 | isCharNumber :: Char -> Bool 71 | isCharNumber c = isHexDigit c || c `elem` (".xX" :: String) 72 | {-# INLINE isCharNumber #-} 73 | 74 | isAlphaNum :: Char -> Bool 75 | isAlphaNum c = 76 | o >= 97 && o <= 122 77 | || o >= 65 && o <= 90 78 | || o >= 48 && o <= 57 79 | where 80 | o = ord c 81 | {-# INLINE isAlphaNum #-} 82 | 83 | isAlpha :: Char -> Bool 84 | isAlpha c = 85 | o >= 97 && o <= 122 86 | || o >= 65 && o <= 90 87 | where 88 | o = ord c 89 | {-# INLINE isAlpha #-} 90 | 91 | isAlphaNum_ :: Char -> Bool 92 | isAlphaNum_ c = 93 | o >= 97 && o <= 122 94 | || o >= 65 && o <= 90 95 | || o >= 48 && o <= 57 96 | || c == '_' 97 | where 98 | o = ord c 99 | {-# INLINE isAlphaNum_ #-} 100 | 101 | isAlpha_ :: Char -> Bool 102 | isAlpha_ c = 103 | o >= 97 && o <= 122 104 | || o >= 65 && o <= 90 105 | || c == '_' 106 | where 107 | o = ord c 108 | {-# INLINE isAlpha_ #-} 109 | 110 | isAlpha_' :: Char -> Bool 111 | isAlpha_' c = isAlpha_ c || c == '_' || c == '\'' 112 | {-# INLINE isAlpha_' #-} 113 | 114 | isAlphaNum_' :: Char -> Bool 115 | isAlphaNum_' c = isAlphaNum_ c || c == '_' || c == '\'' 116 | {-# INLINE isAlphaNum_' #-} 117 | 118 | isBracket' :: Char -> Bool 119 | isBracket' c = c `elem` ("[]{}()" :: String) 120 | {-# INLINE isBracket' #-} 121 | 122 | isPunctuation :: Char -> Bool 123 | isPunctuation c = c `elem` (":;,." :: String) 124 | {-# INLINE isPunctuation #-} 125 | 126 | isAlpha_and :: String -> Char -> Bool 127 | isAlpha_and s c = isAlpha_ c || c == '_' || c `elem` s 128 | {-# INLINE isAlpha_and #-} 129 | 130 | isAlphaNum_and :: String -> Char -> Bool 131 | isAlphaNum_and s c = isAlphaNum_ c || c == '_' || c `elem` s 132 | {-# INLINE isAlphaNum_and #-} 133 | -------------------------------------------------------------------------------- /src/CGrep/Parser/Chunk.hs: -------------------------------------------------------------------------------- 1 | -- 2 | -- Copyright (c) 2013-2023 Nicola Bonelli 3 | -- 4 | -- This program is free software; you can redistribute it and/or modify 5 | -- it under the terms of the GNU General Public License as published by 6 | -- the Free Software Foundation; either version 2 of the License, or 7 | -- (at your option) any later version. 8 | -- 9 | -- This program is distributed in the hope that it will be useful, 10 | -- but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | -- GNU General Public License for more details. 13 | -- 14 | -- You should have received a copy of the GNU General Public License 15 | -- along with this program; if not, write to the Free Software 16 | -- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 17 | -- 18 | 19 | module CGrep.Parser.Chunk ( 20 | parseChunks, 21 | Chunk (..), 22 | MatchLine (..), 23 | ChunkType, 24 | pattern ChunkIdentifier, 25 | pattern ChunkKeyword, 26 | pattern ChunkDigit, 27 | pattern ChunkBracket, 28 | pattern ChunkString, 29 | pattern ChunkNativeType, 30 | pattern ChunkOperator, 31 | pattern ChunkUnspec, 32 | ) where 33 | 34 | import CGrep.Parser.Char ( 35 | isAlphaNum_, 36 | isAlpha_, 37 | isBracket', 38 | isCharNumber, 39 | isDigit, 40 | isSpace, 41 | ) 42 | 43 | import CGrep.FileTypeMap (FileTypeInfo (..)) 44 | import CGrep.Types (Offset, Text8) 45 | import Data.List (genericLength) 46 | 47 | import qualified Data.ByteString.Char8 as C 48 | import qualified Data.ByteString.Internal as BI 49 | import qualified Data.ByteString.Lazy as LB 50 | 51 | import qualified ByteString.StrictBuilder as B 52 | 53 | import Data.MonoTraversable (MonoFoldable (oforM_)) 54 | 55 | import Control.Monad.ST (ST, runST) 56 | import Data.STRef (STRef, newSTRef, readSTRef, writeSTRef) 57 | 58 | import Data.Maybe (fromMaybe) 59 | import Data.Sequence (Seq (Empty, (:<|), (:|>)), (|>)) 60 | import qualified Data.Sequence as S 61 | import Data.Word (Word8) 62 | 63 | newtype ChunkType = ChunkType {unChunkType :: Word8} 64 | deriving newtype (Eq, Ord) 65 | 66 | instance Show ChunkType where 67 | show ChunkUnspec = "*" 68 | show ChunkIdentifier = "identifier" 69 | show ChunkKeyword = "keyword" 70 | show ChunkDigit = "digit" 71 | show ChunkBracket = "bracket" 72 | show ChunkOperator = "operator" 73 | show ChunkString = "string" 74 | show ChunkNativeType = "native-type" 75 | {-# INLINE show #-} 76 | 77 | pattern ChunkUnspec :: ChunkType 78 | pattern ChunkUnspec = ChunkType 0 79 | 80 | pattern ChunkIdentifier :: ChunkType 81 | pattern ChunkIdentifier = ChunkType 1 82 | 83 | pattern ChunkKeyword :: ChunkType 84 | pattern ChunkKeyword = ChunkType 2 85 | 86 | pattern ChunkDigit :: ChunkType 87 | pattern ChunkDigit = ChunkType 3 88 | 89 | pattern ChunkBracket :: ChunkType 90 | pattern ChunkBracket = ChunkType 4 91 | 92 | pattern ChunkOperator :: ChunkType 93 | pattern ChunkOperator = ChunkType 5 94 | 95 | pattern ChunkString :: ChunkType 96 | pattern ChunkString = ChunkType 6 97 | 98 | pattern ChunkNativeType :: ChunkType 99 | pattern ChunkNativeType = ChunkType 7 100 | 101 | {-# COMPLETE ChunkIdentifier, ChunkKeyword, ChunkDigit, ChunkBracket, ChunkOperator, ChunkString, ChunkNativeType, ChunkUnspec #-} 102 | 103 | data Chunk = Chunk 104 | { cTyp :: {-# UNPACK #-} !ChunkType 105 | , cToken :: {-# UNPACK #-} !Text8 106 | , cOffset :: {-# UNPACK #-} !Offset 107 | } 108 | deriving stock (Eq, Show, Ord) 109 | 110 | data MatchLine = MatchLine 111 | { lOffset :: {-# UNPACK #-} !Offset 112 | , lChunks :: [Chunk] 113 | } 114 | deriving stock (Eq, Show) 115 | 116 | newtype ChunkState = ChunkState {unChunkState :: Word8} 117 | deriving newtype (Eq, Ord) 118 | 119 | instance Show ChunkState where 120 | show StateSpace = "space" 121 | show StateAlpha = "alpha" 122 | show StateDigit = "digit" 123 | show StateBracket = "bracket" 124 | show StateOther = "other" 125 | {-# INLINE show #-} 126 | 127 | pattern StateSpace :: ChunkState 128 | pattern StateSpace = ChunkState 0 129 | 130 | pattern StateAlpha :: ChunkState 131 | pattern StateAlpha = ChunkState 1 132 | 133 | pattern StateDigit :: ChunkState 134 | pattern StateDigit = ChunkState 2 135 | 136 | pattern StateBracket :: ChunkState 137 | pattern StateBracket = ChunkState 3 138 | 139 | pattern StateOther :: ChunkState 140 | pattern StateOther = ChunkState 4 141 | 142 | {-# COMPLETE StateSpace, StateAlpha, StateDigit, StateBracket, StateOther #-} 143 | 144 | (<~) :: STRef s a -> a -> ST s () 145 | ref <~ !x = writeSTRef ref x 146 | {-# INLINE (<~) #-} 147 | 148 | {-# INLINE parseChunks #-} 149 | parseChunks :: Maybe FileTypeInfo -> Text8 -> S.Seq Chunk 150 | parseChunks l t = runST $ case l >>= \FileTypeInfo{..} -> ftIdentifierChars of 151 | Just (isAlpha1, isAlphaN) -> parseChunks' isAlpha_ isAlphaNum_ t 152 | _ -> parseChunks' isAlpha_ isAlphaNum_ t 153 | where 154 | parseChunks' :: (Char -> Bool) -> (Char -> Bool) -> C.ByteString -> ST s (S.Seq Chunk) 155 | parseChunks' isAlpha1 isAlphaN txt = do 156 | stateR <- newSTRef StateSpace 157 | offR <- newSTRef 0 158 | accR <- newSTRef (mempty :: B.Builder) 159 | tokensR <- newSTRef S.empty 160 | oforM_ txt $ \w -> do 161 | let x = BI.w2c w 162 | state <- readSTRef stateR 163 | off <- readSTRef offR 164 | acc <- readSTRef accR 165 | tokens <- readSTRef tokensR 166 | case state of 167 | StateSpace -> 168 | if 169 | | isSpace x -> do stateR <~ StateSpace; accR <~ mempty 170 | | isAlpha1 x -> do stateR <~ StateAlpha; accR <~ B.asciiChar x 171 | | isDigit x -> do stateR <~ StateDigit; accR <~ B.asciiChar x 172 | | isBracket' x -> do stateR <~ StateBracket; accR <~ B.asciiChar x 173 | | otherwise -> do stateR <~ StateOther; accR <~ B.asciiChar x 174 | StateAlpha -> 175 | if 176 | | isAlphaN x -> do stateR <~ StateAlpha; accR <~ (acc <> B.asciiChar x) 177 | | isSpace x -> do stateR <~ StateSpace; accR <~ mempty; tokensR <~ (tokens |> toChunk off acc) 178 | | isBracket' x -> do stateR <~ StateBracket; accR <~ B.asciiChar x; tokensR <~ (tokens |> toChunk off acc) 179 | | otherwise -> do stateR <~ StateOther; accR <~ B.asciiChar x; tokensR <~ (tokens |> toChunk off acc) 180 | StateDigit -> 181 | if 182 | | isCharNumber x -> do stateR <~ StateDigit; accR <~ (acc <> B.asciiChar x) 183 | | isSpace x -> do stateR <~ StateSpace; accR <~ mempty; tokensR <~ (tokens |> toChunk off acc) 184 | | isAlpha1 x -> do stateR <~ StateAlpha; accR <~ B.asciiChar x; tokensR <~ (tokens |> toChunk off acc) 185 | | isBracket' x -> do stateR <~ StateBracket; accR <~ B.asciiChar x; tokensR <~ (tokens |> toChunk off acc) 186 | | otherwise -> do stateR <~ StateOther; accR <~ B.asciiChar x; tokensR <~ (tokens |> toChunk off acc) 187 | StateBracket -> 188 | if 189 | | isSpace x -> do stateR <~ StateSpace; accR <~ mempty; tokensR <~ (tokens |> toChunk off acc) 190 | | isAlpha1 x -> do stateR <~ StateAlpha; accR <~ B.asciiChar x; tokensR <~ (tokens |> toChunk off acc) 191 | | isDigit x -> do stateR <~ StateDigit; accR <~ B.asciiChar x; tokensR <~ (tokens |> toChunk off acc) 192 | | isBracket' x -> do stateR <~ StateBracket; accR <~ B.asciiChar x; tokensR <~ (tokens |> toChunk off acc) 193 | | otherwise -> do stateR <~ StateOther; accR <~ B.asciiChar x; tokensR <~ (tokens |> toChunk off acc) 194 | StateOther -> 195 | if 196 | | isSpace x -> do stateR <~ StateSpace; accR <~ mempty; tokensR <~ (tokens |> toChunk off acc) 197 | | isAlpha1 x -> do stateR <~ StateAlpha; accR <~ B.asciiChar x; tokensR <~ (tokens |> toChunk off acc) 198 | | isDigit x -> 199 | if B.builderBytes acc == "." 200 | then do stateR <~ StateDigit; accR <~ (acc <> B.asciiChar x) 201 | else do stateR <~ StateDigit; accR <~ B.asciiChar x; tokensR <~ (tokens |> toChunk off acc) 202 | | isBracket' x -> do stateR <~ StateBracket; accR <~ B.asciiChar x; tokensR <~ (tokens |> toChunk off acc) 203 | | otherwise -> do stateR <~ StateOther; accR <~ (acc <> B.asciiChar x) 204 | offR <~ (off + 1) 205 | 206 | lastAcc <- readSTRef accR 207 | tokens <- readSTRef tokensR 208 | 209 | if B.builderLength lastAcc == 0 210 | then return tokens 211 | else do 212 | state <- readSTRef stateR 213 | off <- readSTRef offR 214 | return $ tokens |> toChunk off lastAcc 215 | 216 | toChunk :: Offset -> B.Builder -> Chunk 217 | toChunk off b = Chunk ChunkUnspec str (off - fromIntegral (B.builderLength b)) 218 | where 219 | str = B.builderBytes b 220 | {-# INLINE toChunk #-} 221 | -------------------------------------------------------------------------------- /src/CGrep/Parser/Line.hs: -------------------------------------------------------------------------------- 1 | -- 2 | -- Copyright (c) 2013-2023 Nicola Bonelli 3 | -- 4 | -- This program is free software; you can redistribute it and/or modify 5 | -- it under the terms of the GNU General Public License as published by 6 | -- the Free Software Foundation; either version 2 of the License, or 7 | -- (at your option) any later version. 8 | -- 9 | -- This program is distributed in the hope that it will be useful, 10 | -- but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | -- GNU General Public License for more details. 13 | -- 14 | -- You should have received a copy of the GNU General Public License 15 | -- along with this program; if not, write to the Free Software 16 | -- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 17 | -- 18 | 19 | module CGrep.Parser.Line ( 20 | getLineOffsets, 21 | getAllLineOffsets, 22 | getLineByOffset, 23 | lowerBound, 24 | ) where 25 | 26 | import qualified Data.ByteString.Char8 as C 27 | import qualified Data.ByteString.Lazy.Char8 as LC 28 | 29 | import CGrep.Types (LText8, Offset, Text8) 30 | import Data.ByteString.Internal (c2w) 31 | import qualified Data.ByteString.Unsafe as BU 32 | import Data.Int (Int64) 33 | import Data.Vector.Unboxed ((!)) 34 | import qualified Data.Vector.Unboxed as UV 35 | 36 | -- Returns a vector of offsets for a given character in a ByteString, up to the given maximum offset. 37 | charOffsets :: Char -> Int64 -> C.ByteString -> UV.Vector Int64 38 | charOffsets c maxOff bs = UV.unfoldrN (fromIntegral maxOff) (findOffsets bs maxOff) 0 39 | where 40 | findOffsets :: C.ByteString -> Int64 -> Int64 -> Maybe (Int64, Int64) 41 | findOffsets bs' maxOff' i 42 | | i >= maxOff' = Nothing 43 | | BU.unsafeIndex bs' (fromIntegral i) == c2w c = Just (fromIntegral i, i + 1) 44 | | otherwise = findOffsets bs' maxOff' (i + 1) 45 | 46 | getLineOffsets :: Int64 -> Text8 -> UV.Vector Offset 47 | getLineOffsets maxOff text = 48 | let idx = nlOffsets (fromIntegral maxOff) text 49 | in if UV.null idx 50 | then idx 51 | else 52 | if UV.last idx == fromIntegral (C.length text - 1) 53 | then UV.init idx 54 | else idx 55 | 56 | {-# INLINE nlOffsets #-} 57 | nlOffsets :: Int -> Text8 -> UV.Vector Int64 58 | nlOffsets maxOff' bs' = UV.unfoldrN maxOff' (findOffsets maxOff' bs') (-1) 59 | 60 | findOffsets :: Int -> Text8 -> Int -> Maybe (Int64, Int) 61 | findOffsets max ts !i 62 | | i == -1 = Just (0, 0) 63 | | i >= max = Nothing 64 | | BU.unsafeIndex ts (fromIntegral i) == c2w '\n' = Just (fromIntegral i + 1, i + 1) 65 | | otherwise = findOffsets max ts (i + 1) 66 | 67 | getAllLineOffsets :: Text8 -> UV.Vector Offset 68 | getAllLineOffsets ts = getLineOffsets (fromIntegral $ C.length ts) ts 69 | {-# INLINE getAllLineOffsets #-} 70 | 71 | lowerBound :: UV.Vector Int64 -> Int64 -> Int64 72 | lowerBound vec v = lowerBoundGo vec v 0 (UV.length vec - 1) 73 | 74 | lowerBoundGo :: UV.Vector Int64 -> Int64 -> Int -> Int -> Int64 75 | lowerBoundGo vec v !left !right 76 | | left > right = if right >= 0 then vec `UV.unsafeIndex` right else -1 77 | | otherwise = case v `compare` midValue of 78 | LT -> lowerBoundGo vec v left (mid - 1) 79 | EQ -> midValue 80 | _ -> lowerBoundGo vec v (mid + 1) right 81 | where 82 | mid = (left + right) `div` 2 83 | midValue = vec `UV.unsafeIndex` mid 84 | 85 | getLineByOffset :: Offset -> Text8 -> UV.Vector Int64 -> (# Text8, Offset #) 86 | getLineByOffset off text vec = (# (head . C.lines) (C.drop (fromIntegral lb) text), lb #) 87 | where 88 | lb = lowerBound vec off 89 | {-# INLINE getLineByOffset #-} 90 | -------------------------------------------------------------------------------- /src/CGrep/Parser/Token.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DuplicateRecordFields #-} 2 | -- 3 | -- Copyright (c) 2013-2023 Nicola Bonelli 4 | -- 5 | -- This program is free software; you can redistribute it and/or modify 6 | -- it under the terms of the GNU General Public License as published by 7 | -- the Free Software Foundation; either version 2 of the License, or 8 | -- (at your option) any later version. 9 | -- 10 | -- This program is distributed in the hope that it will be useful, 11 | -- but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | -- GNU General Public License for more details. 14 | -- 15 | -- You should have received a copy of the GNU General Public License 16 | -- along with this program; if not, write to the Free Software 17 | -- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 18 | -- 19 | {-# LANGUAGE OverloadedRecordDot #-} 20 | 21 | module CGrep.Parser.Token ( 22 | parseTokens, 23 | filterToken, 24 | Token (..), 25 | TokenFilter (..), 26 | mkTokenFilter, 27 | eqToken, 28 | isTokenIdentifier, 29 | isTokenKeyword, 30 | isTokenNumber, 31 | isTokenBracket, 32 | isTokenString, 33 | isTokenOperator, 34 | isTokenUnspecified, 35 | tTyp, 36 | tToken, 37 | tOffset, 38 | mkTokenIdentifier, 39 | mkTokenKeyword, 40 | mkTokenDigit, 41 | mkTokenBracket, 42 | mkTokenString, 43 | mkTokenOperator, 44 | ) where 45 | 46 | import qualified Data.ByteString.Char8 as C 47 | import qualified Data.ByteString.Internal as BI 48 | import qualified Data.ByteString.Lazy as LB 49 | import qualified Data.DList as DL 50 | 51 | import CGrep.Parser.Char ( 52 | chr, 53 | isAlphaNum_, 54 | isAlpha_, 55 | isBracket', 56 | isCharNumber, 57 | isDigit, 58 | isPunctuation, 59 | isSpace, 60 | ) 61 | 62 | import CGrep.Types (Offset, Text8) 63 | import Data.List (genericLength) 64 | 65 | import CGrep.FileTypeMap ( 66 | CharIdentifierF, 67 | FileTypeInfo (ftIdentifierChars, ftKeywords), 68 | WordType (..), 69 | ) 70 | 71 | import qualified Data.HashMap.Strict as HM 72 | import Data.Sequence (Seq (Empty, (:<|), (:|>)), (|>)) 73 | import qualified Data.Sequence as S 74 | 75 | import Control.Monad.ST (ST, runST) 76 | import Data.MonoTraversable (MonoFoldable (oforM_)) 77 | import Data.STRef (STRef, modifySTRef, modifySTRef', newSTRef, readSTRef, writeSTRef) 78 | import Data.Word (Word8) 79 | 80 | import qualified ByteString.StrictBuilder as B 81 | import CGrep.Parser.Chunk 82 | 83 | import Data.Coerce (coerce) 84 | import Data.Text.Internal.Read (T) 85 | import GHC.Exts (inline) 86 | 87 | newtype TokenState = TokenState {unTokenState :: Int} 88 | deriving newtype (Eq) 89 | 90 | instance Show TokenState where 91 | show StateSpace = "space" 92 | show StateIdentifier = "identifier" 93 | show StateDigit = "digit" 94 | show StateBracket = "bracket" 95 | show StateLiteral = "literal" 96 | show StateOther = "other" 97 | {-# INLINE show #-} 98 | 99 | pattern StateSpace :: TokenState 100 | pattern StateSpace = TokenState 0 101 | 102 | pattern StateIdentifier :: TokenState 103 | pattern StateIdentifier = TokenState 1 104 | 105 | pattern StateDigit :: TokenState 106 | pattern StateDigit = TokenState 2 107 | 108 | pattern StateBracket :: TokenState 109 | pattern StateBracket = TokenState 3 110 | 111 | pattern StateLiteral :: TokenState 112 | pattern StateLiteral = TokenState 4 113 | 114 | pattern StateOther :: TokenState 115 | pattern StateOther = TokenState 5 116 | 117 | newtype Token = Token Chunk 118 | deriving newtype (Eq, Ord) 119 | 120 | instance Show Token where 121 | show (Token (Chunk typ bs off)) = "(" ++ show typ ++ " " ++ C.unpack bs ++ " @" ++ show off ++ ")" 122 | {-# INLINE show #-} 123 | 124 | eqToken :: Token -> Token -> Bool 125 | eqToken a b = 126 | tToken a == tToken b 127 | && tTyp a == tTyp b 128 | {-# INLINE eqToken #-} 129 | 130 | mkTokenIdentifier :: C.ByteString -> Offset -> Token 131 | mkTokenIdentifier bs off = Token $ Chunk ChunkIdentifier bs off 132 | {-# INLINE mkTokenIdentifier #-} 133 | 134 | mkTokenKeyword :: C.ByteString -> Offset -> Token 135 | mkTokenKeyword bs off = Token $ Chunk ChunkKeyword bs off 136 | {-# INLINE mkTokenKeyword #-} 137 | 138 | mkTokenDigit :: C.ByteString -> Offset -> Token 139 | mkTokenDigit bs off = Token $ Chunk ChunkDigit bs off 140 | {-# INLINE mkTokenDigit #-} 141 | 142 | mkTokenBracket :: C.ByteString -> Offset -> Token 143 | mkTokenBracket bs off = Token $ Chunk ChunkBracket bs off 144 | {-# INLINE mkTokenBracket #-} 145 | 146 | mkTokenOperator :: C.ByteString -> Offset -> Token 147 | mkTokenOperator bs off = Token $ Chunk ChunkOperator bs off 148 | {-# INLINE mkTokenOperator #-} 149 | 150 | mkTokenString :: C.ByteString -> Offset -> Token 151 | mkTokenString bs off = Token $ Chunk ChunkString bs off 152 | {-# INLINE mkTokenString #-} 153 | 154 | mkTokenNativeType :: C.ByteString -> Offset -> Token 155 | mkTokenNativeType bs off = Token $ Chunk ChunkNativeType bs off 156 | {-# INLINE mkTokenNativeType #-} 157 | 158 | mkTokenFromWord :: Maybe FileTypeInfo -> C.ByteString -> Offset -> Token 159 | mkTokenFromWord Nothing txt off = mkTokenIdentifier txt off 160 | mkTokenFromWord (Just info) txt off = 161 | case HM.lookup txt (ftKeywords info) of 162 | Just typ -> case typ of 163 | Keyword -> mkTokenKeyword txt off 164 | NativeType -> mkTokenNativeType txt off 165 | _ -> mkTokenIdentifier txt off 166 | {-# INLINEABLE mkTokenFromWord #-} 167 | 168 | mkToken :: Maybe FileTypeInfo -> TokenState -> C.ByteString -> Offset -> Token 169 | mkToken _ StateSpace = mkTokenOperator 170 | mkToken info StateIdentifier = mkTokenFromWord info 171 | mkToken _ StateDigit = mkTokenDigit 172 | mkToken _ StateBracket = mkTokenBracket 173 | mkToken _ StateLiteral = mkTokenString 174 | mkToken _ StateOther = mkTokenOperator 175 | 176 | tTyp :: Token -> ChunkType 177 | tTyp = cTyp . coerce 178 | {-# INLINE tTyp #-} 179 | 180 | tOffset :: Token -> Offset 181 | tOffset t = cOffset (coerce t :: Chunk) 182 | {-# INLINE tOffset #-} 183 | 184 | tToken :: Token -> Text8 185 | tToken t = cToken (coerce t :: Chunk) 186 | {-# INLINE tToken #-} 187 | 188 | isTokenIdentifier :: Token -> Bool 189 | isTokenIdentifier t = cTyp (coerce t) == ChunkIdentifier 190 | {-# INLINE isTokenIdentifier #-} 191 | 192 | isTokenKeyword :: Token -> Bool 193 | isTokenKeyword t = cTyp (coerce t) == ChunkKeyword 194 | {-# INLINE isTokenKeyword #-} 195 | 196 | isTokenNumber :: Token -> Bool 197 | isTokenNumber t = cTyp (coerce t) == ChunkDigit 198 | {-# INLINE isTokenNumber #-} 199 | 200 | isTokenBracket :: Token -> Bool 201 | isTokenBracket t = cTyp (coerce t) == ChunkBracket 202 | {-# INLINE isTokenBracket #-} 203 | 204 | isTokenOperator :: Token -> Bool 205 | isTokenOperator t = cTyp (coerce t) == ChunkOperator 206 | {-# INLINE isTokenOperator #-} 207 | 208 | isTokenString :: Token -> Bool 209 | isTokenString t = cTyp (coerce t) == ChunkString 210 | {-# INLINE isTokenString #-} 211 | 212 | isTokenNativeType :: Token -> Bool 213 | isTokenNativeType t = cTyp (coerce t) == ChunkNativeType 214 | {-# INLINE isTokenNativeType #-} 215 | 216 | isTokenUnspecified :: Token -> Bool 217 | isTokenUnspecified t = cTyp (coerce t) == ChunkUnspec 218 | {-# INLINE isTokenUnspecified #-} 219 | 220 | data TokenFilter = TokenFilter 221 | { tfIdentifier :: !Bool 222 | , tfKeyword :: !Bool 223 | , tfNativeType :: !Bool 224 | , tfString :: !Bool 225 | , tfNumber :: !Bool 226 | , tfOperator :: !Bool 227 | , tfBracket :: !Bool 228 | } 229 | deriving stock (Eq, Show) 230 | 231 | filterToken :: TokenFilter -> Token -> Bool 232 | filterToken f t = case cTyp (coerce t :: Chunk) of 233 | ChunkIdentifier -> tfIdentifier f 234 | ChunkKeyword -> tfKeyword f 235 | ChunkDigit -> tfNumber f 236 | ChunkOperator -> tfOperator f 237 | ChunkString -> tfString f 238 | ChunkNativeType -> tfNativeType f 239 | ChunkBracket -> tfBracket f 240 | ChunkUnspec -> False 241 | 242 | mkTokenFilter :: (Traversable t) => t ChunkType -> TokenFilter 243 | mkTokenFilter = foldr go (TokenFilter False False False False False False False) 244 | where 245 | go ChunkIdentifier f = f{tfIdentifier = True} 246 | go ChunkKeyword f = f{tfKeyword = True} 247 | go ChunkNativeType f = f{tfNativeType = True} 248 | go ChunkDigit f = f{tfNumber = True} 249 | go ChunkOperator f = f{tfOperator = True} 250 | go ChunkString f = f{tfString = True} 251 | go ChunkBracket f = f{tfBracket = True} 252 | go ChunkUnspec f = f 253 | 254 | (<~) :: STRef s a -> a -> ST s () 255 | ref <~ !x = writeSTRef ref x 256 | {-# INLINE (<~) #-} 257 | 258 | data TokenIdx = TokenIdx 259 | { offset :: {-# UNPACK #-} !Int 260 | , len :: {-# UNPACK #-} !Int 261 | } 262 | 263 | tkString :: TokenIdx -> C.ByteString -> C.ByteString 264 | tkString (TokenIdx off len) = C.take len . C.drop off 265 | {-# INLINE tkString #-} 266 | 267 | data AccOp = Reset | Start {-# UNPACK #-} !Int | Append {-# UNPACK #-} !Int 268 | 269 | (<<~) :: STRef s TokenIdx -> AccOp -> ST s () 270 | ref <<~ Reset = writeSTRef ref (TokenIdx (-1) 0) 271 | ref <<~ Start cur = writeSTRef ref (TokenIdx cur 1) 272 | ref <<~ Append cur = modifySTRef' ref $ \case 273 | TokenIdx (-1) 0 -> TokenIdx cur 1 274 | TokenIdx off len -> TokenIdx off (len + 1) 275 | {-# INLINE (<<~) #-} 276 | 277 | {-# INLINE parseTokens #-} 278 | parseTokens :: TokenFilter -> Maybe FileTypeInfo -> C.ByteString -> S.Seq Token 279 | parseTokens f@TokenFilter{..} l t = 280 | runST 281 | ( case l >>= ftIdentifierChars of 282 | Nothing -> parseToken' isAlpha_ isAlphaNum_ l t 283 | Just (isAlpha1, isAlphaN) -> parseToken' isAlpha1 isAlphaN l t 284 | ) 285 | where 286 | parseToken' :: CharIdentifierF -> CharIdentifierF -> Maybe FileTypeInfo -> C.ByteString -> ST a (S.Seq Token) 287 | parseToken' isAlpha1 isAlphaN info txt = do 288 | stateR <- newSTRef StateSpace 289 | accR <- newSTRef (TokenIdx (-1) (-1)) 290 | tokensR <- newSTRef S.empty 291 | curR <- newSTRef 0 292 | 293 | oforM_ txt $ \w -> do 294 | let x = BI.w2c w 295 | cur <- readSTRef curR 296 | state <- readSTRef stateR 297 | 298 | case state of 299 | StateSpace -> 300 | {-# SCC "StateSpace" #-} 301 | if 302 | | isSpace x -> do accR <<~ Reset 303 | | inline isAlpha1 x -> do stateR <~ StateIdentifier; accR <<~ Start cur 304 | | x == chr 2 -> do stateR <~ StateLiteral; accR <<~ Reset 305 | | isDigit x -> do stateR <~ StateDigit; accR <<~ Start cur 306 | | isBracket' x -> do stateR <~ StateBracket; accR <<~ Start cur 307 | | otherwise -> do stateR <~ StateOther; accR <<~ Start cur 308 | StateIdentifier -> 309 | {-# SCC "StateIdentifier" #-} 310 | if isAlphaN x 311 | then accR <<~ Append cur 312 | else do 313 | acc <- readSTRef accR 314 | tokens <- readSTRef tokensR 315 | if 316 | | isSpace x -> do stateR <~ StateSpace; accR <<~ Reset; tokensR <~ (tokens |> buildToken_ tfIdentifier tfKeyword tfNativeType (mkTokenFromWord info) acc txt) 317 | | x == chr 2 -> do stateR <~ StateLiteral; accR <<~ Reset; tokensR <~ (tokens |> buildToken_ tfIdentifier tfKeyword tfNativeType (mkTokenFromWord info) acc txt) 318 | | isBracket' x -> do stateR <~ StateBracket; accR <<~ Start cur; tokensR <~ (tokens |> buildToken_ tfIdentifier tfKeyword tfNativeType (mkTokenFromWord info) acc txt) 319 | | otherwise -> do stateR <~ StateOther; accR <<~ Start cur; tokensR <~ (tokens |> buildToken_ tfIdentifier tfKeyword tfNativeType (mkTokenFromWord info) acc txt) 320 | StateDigit -> 321 | {-# SCC "StateDigit" #-} 322 | if isCharNumber x 323 | then accR <<~ Append cur 324 | else do 325 | acc <- readSTRef accR 326 | tokens <- readSTRef tokensR 327 | if 328 | | isSpace x -> do stateR <~ StateSpace; accR <<~ Reset; tokensR <~ (tokens |> buildToken tfNumber mkTokenDigit acc txt) 329 | | x == chr 2 -> do stateR <~ StateLiteral; accR <<~ Reset; tokensR <~ (tokens |> buildToken tfNumber mkTokenDigit acc txt) 330 | | inline isAlpha1 x -> do stateR <~ StateIdentifier; accR <<~ Start cur; tokensR <~ (tokens |> buildToken tfNumber mkTokenDigit acc txt) 331 | | isBracket' x -> do stateR <~ StateBracket; accR <<~ Start cur; tokensR <~ (tokens |> buildToken tfNumber mkTokenDigit acc txt) 332 | | otherwise -> do stateR <~ StateOther; accR <<~ Start cur; tokensR <~ (tokens |> buildToken tfNumber mkTokenDigit acc txt) 333 | StateLiteral -> 334 | {-# SCC "StateLiteral" #-} 335 | if x == chr 3 336 | then do 337 | acc <- readSTRef accR 338 | tokens <- readSTRef tokensR 339 | stateR <~ StateSpace 340 | accR <<~ Reset 341 | tokensR <~ (tokens |> buildToken tfString mkTokenString acc txt) 342 | else do accR <<~ Append cur 343 | StateBracket -> 344 | {-# SCC "StateBracket" #-} 345 | do 346 | acc <- readSTRef accR 347 | tokens <- readSTRef tokensR 348 | if 349 | | isSpace x -> do stateR <~ StateSpace; accR <<~ Reset; tokensR <~ (tokens |> buildToken tfBracket mkTokenBracket acc txt) 350 | | inline isAlpha1 x -> do stateR <~ StateIdentifier; accR <<~ Start cur; tokensR <~ (tokens |> buildToken tfBracket mkTokenBracket acc txt) 351 | | isDigit x -> do stateR <~ StateDigit; accR <<~ Start cur; tokensR <~ (tokens |> buildToken tfBracket mkTokenBracket acc txt) 352 | | isBracket' x -> do accR <<~ Start cur; tokensR <~ (tokens |> buildToken tfBracket mkTokenBracket acc txt) 353 | | x == chr 2 -> do stateR <~ StateLiteral; accR <<~ Reset; tokensR <~ (tokens |> buildToken tfBracket mkTokenBracket acc txt) 354 | | otherwise -> do stateR <~ StateOther; accR <<~ Start cur; tokensR <~ (tokens |> buildToken tfBracket mkTokenBracket acc txt) 355 | StateOther -> 356 | {-# SCC "StateOther" #-} 357 | do 358 | acc <- readSTRef accR 359 | tokens <- readSTRef tokensR 360 | if 361 | | isSpace x -> do stateR <~ StateSpace; accR <<~ Reset; tokensR <~ (tokens |> buildToken tfOperator mkTokenOperator acc txt) 362 | | inline isAlpha1 x -> do stateR <~ StateIdentifier; accR <<~ Start cur; tokensR <~ (tokens |> buildToken tfOperator mkTokenOperator acc txt) 363 | | isDigit x -> 364 | if tkString acc txt == "." 365 | then do stateR <~ StateDigit; accR <<~ Append cur 366 | else do stateR <~ StateDigit; accR <<~ Start cur; tokensR <~ (tokens |> buildToken tfOperator mkTokenOperator acc txt) 367 | | isBracket' x -> do stateR <~ StateBracket; accR <<~ Append cur; tokensR <~ (tokens |> buildToken tfOperator mkTokenOperator acc txt) 368 | | x == chr 2 -> do stateR <~ StateLiteral; accR <<~ Reset; tokensR <~ (tokens |> buildToken tfBracket mkTokenBracket acc txt) 369 | | isPunctuation x -> do accR <<~ Start cur; tokensR <~ (tokens |> buildToken tfOperator mkTokenOperator acc txt) 370 | | otherwise -> do accR <<~ Append cur 371 | 372 | curR <~ (cur + 1) 373 | 374 | lastAcc <- readSTRef accR 375 | tokens <- readSTRef tokensR 376 | 377 | if lastAcc.len == 0 378 | then return tokens 379 | else do 380 | state <- readSTRef stateR 381 | cur <- readSTRef curR 382 | return $ tokens |> buildFilteredToken f (mkToken info state) lastAcc txt 383 | 384 | buildFilteredToken :: TokenFilter -> (C.ByteString -> Offset -> Token) -> TokenIdx -> C.ByteString -> Token 385 | buildFilteredToken tf f (TokenIdx start len) txt = 386 | let t = f (subByteString start len txt) (fromIntegral start) 387 | in if filterToken tf t 388 | then t 389 | else unspecifiedToken 390 | {-# INLINE buildFilteredToken #-} 391 | 392 | buildToken :: Bool -> (C.ByteString -> Offset -> Token) -> TokenIdx -> C.ByteString -> Token 393 | buildToken True f (TokenIdx start len) txt = f (subByteString start len txt) (fromIntegral start) 394 | buildToken False f (TokenIdx start len) txt = unspecifiedToken 395 | {-# INLINE buildToken #-} 396 | 397 | buildToken_ :: Bool -> Bool -> Bool -> (C.ByteString -> Offset -> Token) -> TokenIdx -> C.ByteString -> Token 398 | buildToken_ i k t f (TokenIdx start len) txt = 399 | if i && isTokenIdentifier tok || k && isTokenKeyword tok || t && isTokenNativeType tok 400 | then tok 401 | else unspecifiedToken 402 | where 403 | tok = f (subByteString start len txt) (fromIntegral start) 404 | 405 | subByteString :: Int -> Int -> C.ByteString -> C.ByteString 406 | subByteString i n = C.take n . C.drop i 407 | {-# INLINE subByteString #-} 408 | 409 | unspecifiedToken :: Token 410 | unspecifiedToken = Token $ Chunk ChunkUnspec C.empty 0 411 | -------------------------------------------------------------------------------- /src/CGrep/Search.hs: -------------------------------------------------------------------------------- 1 | -- 2 | -- Copyright (c) 2013-2023 Nicola Bonelli 3 | -- 4 | -- This program is free software; you can redistribute it and/or modify 5 | -- it under the terms of the GNU General Public License as published by 6 | -- the Free Software Foundation; either version 2 of the License, or 7 | -- (at your option) any later version. 8 | -- 9 | -- This program is distributed in the hope that it will be useful, 10 | -- but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | -- GNU General Public License for more details. 13 | -- 14 | -- You should have received a copy of the GNU General Public License 15 | -- along with this program; if not, write to the Free Software 16 | -- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 17 | -- 18 | 19 | module CGrep.Search ( 20 | searchStringIndices, 21 | searchStringTaggedIndices, 22 | eligibleForSearch, 23 | TaggedIx (..), 24 | ) where 25 | 26 | import CGrep.Types (Text8) 27 | import Data.Int (Int64) 28 | import GHC.Exts (groupWith) 29 | 30 | import qualified Data.ByteString.Char8 as C 31 | import qualified Data.ByteString.Search as BM 32 | import qualified Data.ByteString.Search.DFA as DFA 33 | 34 | import qualified Data.ByteString.Lazy.Search as LBM 35 | import qualified Data.ByteString.Lazy.Search.DFA as LDFA 36 | import Data.List.Extra (notNull) 37 | 38 | findIndices :: Text8 -> Text8 -> [Int] 39 | findIndices p = 40 | if C.length p <= 3 41 | then DFA.indices p 42 | else BM.indices p 43 | {-# INLINE findIndices #-} 44 | 45 | searchStringIndices :: [Text8] -> Text8 -> [[Int64]] 46 | searchStringIndices ps text = ps >>= \p -> [fromIntegral <$> p `findIndices` text] 47 | {-# INLINE searchStringIndices #-} 48 | 49 | data TaggedIx a = TaggedIx 50 | { index :: {-# UNPACK #-} !Int 51 | , tags :: [a] 52 | } 53 | deriving stock (Show) 54 | 55 | instance Eq (TaggedIx a) where 56 | (TaggedIx i1 _) == (TaggedIx i2 _) = i1 == i2 57 | 58 | instance Ord (TaggedIx a) where 59 | compare (TaggedIx i1 _) (TaggedIx i2 _) = compare i1 i2 60 | 61 | -- >>> searchStringTaggedIndices [("a",2),("b",1),("a",0), ("he", 42)] "aheba" 62 | -- [TaggedIx {index = 0, tags = [2,0]},TaggedIx {index = 1, tags = [42]},TaggedIx {index = 3, tags = [1]},TaggedIx {index = 4, tags = [2,0]}] 63 | 64 | searchStringTaggedIndices :: [(Text8, a)] -> Text8 -> [TaggedIx a] 65 | searchStringTaggedIndices ps text = 66 | let res = 67 | ps >>= \p -> 68 | let pat = fst p 69 | tag = snd p 70 | ids = findIndices pat text 71 | in (\i -> TaggedIx (fromIntegral i) [tag]) <$> ids 72 | in fuseGroup <$> groupWith index res 73 | where 74 | {-# INLINE fuseGroup #-} 75 | fuseGroup :: [TaggedIx a] -> TaggedIx a 76 | fuseGroup xs = TaggedIx (index $ head xs) $ concatMap tags xs 77 | 78 | eligibleForSearch :: [a] -> [[Int64]] -> Bool 79 | eligibleForSearch [_] = all notNull 80 | eligibleForSearch _ = any notNull 81 | {-# INLINE eligibleForSearch #-} 82 | -------------------------------------------------------------------------------- /src/CGrep/Strategy/BoyerMoore.hs: -------------------------------------------------------------------------------- 1 | -- 2 | -- Copyright (c) 2013-2023 Nicola Bonelli 3 | -- 4 | -- This program is free software; you can redistribute it and/or modify 5 | -- it under the terms of the GNU General Public License as published by 6 | -- the Free Software Foundation; either version 2 of the License, or 7 | -- (at your option) any later version. 8 | -- 9 | -- This program is distributed in the hope that it will be useful, 10 | -- but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | -- GNU General Public License for more details. 13 | -- 14 | -- You should have received a copy of the GNU General Public License 15 | -- along with this program; if not, write to the Free Software 16 | -- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 17 | -- 18 | 19 | module CGrep.Strategy.BoyerMoore (search) where 20 | 21 | import qualified Data.ByteString.Char8 as C 22 | import qualified Data.ByteString.Lazy.Char8 as LC 23 | 24 | import Control.Monad.IO.Class (MonadIO (liftIO)) 25 | import Control.Monad.Trans.Reader (ask, reader) 26 | import Data.List (genericLength, isPrefixOf, isSuffixOf) 27 | 28 | import CGrep.Common ( 29 | Text8, 30 | expandMultiline, 31 | getTargetContents, 32 | getTargetName, 33 | ignoreCase, 34 | ) 35 | import CGrep.ContextFilter (mkContextFilter) 36 | import CGrep.FileType (FileType) 37 | import CGrep.FileTypeMap (FileTypeInfo, contextFilter, fileTypeLookup) 38 | import CGrep.Output (Output, mkOutputElements, runSearch) 39 | import CGrep.Search 40 | import CGrep.Types (Offset) 41 | 42 | import CGrep.Parser.Chunk 43 | import Data.Int (Int64) 44 | import Options (Options (prefix_match, suffix_match, word_match)) 45 | import Reader (Env (..), ReaderIO) 46 | import Verbose (putMsgLnVerbose) 47 | 48 | import System.IO (stderr) 49 | import System.Posix.FilePath (RawFilePath) 50 | 51 | import CGrep.Parser.Line (getLineByOffset, getLineOffsets) 52 | import Data.Array (indices) 53 | import qualified Data.Vector.Unboxed as UV 54 | 55 | search :: Maybe (FileType, FileTypeInfo) -> RawFilePath -> [Text8] -> ReaderIO [Output] 56 | search info f patterns = do 57 | Env{..} <- ask 58 | 59 | text <- liftIO $ getTargetContents f 60 | 61 | let filename = getTargetName f 62 | 63 | -- transform text 64 | 65 | let ctxFilter = mkContextFilter opt 66 | 67 | let [text''', _, text', _] = 68 | scanr 69 | ($) 70 | text 71 | [ expandMultiline opt 72 | , contextFilter (fst <$> info) ctxFilter False 73 | , ignoreCase opt 74 | ] 75 | 76 | -- make shallow search 77 | 78 | let indices' = searchStringIndices patterns text' 79 | let indices''' = searchStringIndices patterns text''' 80 | 81 | -- search for matching tokens 82 | 83 | let ctor = Chunk ChunkUnspec 84 | 85 | let chunks = concat $ zipWith (\p xs -> (p `ctor`) <$> xs) patterns indices''' 86 | 87 | -- filter exact/partial matching tokens 88 | 89 | let lineOffsets = getLineOffsets (fromIntegral $ C.length text) text 90 | 91 | let chunks' = 92 | if word_match opt || prefix_match opt || suffix_match opt 93 | then filter (checkChunk opt lineOffsets (snd <$> info) text''') chunks 94 | else chunks 95 | 96 | putMsgLnVerbose 2 stderr $ "strategy : running Boyer-Moore search on " <> filename 97 | putMsgLnVerbose 3 stderr $ "---\n" <> text''' <> "\n---" 98 | 99 | runSearch opt filename (eligibleForSearch patterns indices') $ do 100 | putMsgLnVerbose 2 stderr $ "chunks' : " <> show chunks' 101 | mkOutputElements lineOffsets filename text text''' chunks' 102 | 103 | checkChunk :: Options -> UV.Vector Int64 -> Maybe FileTypeInfo -> Text8 -> Chunk -> Bool 104 | checkChunk opt vec info text chunk 105 | | word_match opt = let !off = cOffset chunk - off' in any (\chunk' -> cOffset chunk' == off && cToken chunk' == cToken chunk) cs 106 | | prefix_match opt = any (\chunk' -> cToken chunk `C.isPrefixOf` cToken chunk' && cOffset chunk' + off' == cOffset chunk) cs 107 | | suffix_match opt = any (\chunk' -> cToken chunk `C.isSuffixOf` cToken chunk' && cOffset chunk' + off' + fromIntegral (C.length (cToken chunk') - C.length (cToken chunk)) == cOffset chunk) cs 108 | | otherwise = undefined 109 | where 110 | (# line', off' #) = getLineByOffset (cOffset chunk) text vec 111 | cs = parseChunks info line' 112 | -------------------------------------------------------------------------------- /src/CGrep/Strategy/Levenshtein.hs: -------------------------------------------------------------------------------- 1 | -- 2 | -- Copyright (c) 2013-2023 Nicola Bonelli 3 | -- 4 | -- This program is free software; you can redistribute it and/or modify 5 | -- it under the terms of the GNU General Public License as published by 6 | -- the Free Software Foundation; either version 2 of the License, or 7 | -- (at your option) any later version. 8 | -- 9 | -- This program is distributed in the hope that it will be useful, 10 | -- but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | -- GNU General Public License for more details. 13 | -- 14 | -- You should have received a copy of the GNU General Public License 15 | -- along with this program; if not, write to the Free Software 16 | -- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 17 | -- 18 | 19 | module CGrep.Strategy.Levenshtein (search) where 20 | 21 | import CGrep.Parser.Line (getAllLineOffsets) 22 | 23 | import qualified Data.ByteString.Char8 as C 24 | 25 | import Control.Monad.IO.Class (MonadIO (liftIO)) 26 | import Control.Monad.Trans.Reader (ask, reader) 27 | 28 | import CGrep.Common ( 29 | Text8, 30 | expandMultiline, 31 | getTargetContents, 32 | getTargetName, 33 | ignoreCase, 34 | ) 35 | import CGrep.ContextFilter (mkContextFilter) 36 | import CGrep.Distance ((~==)) 37 | import CGrep.FileType (FileType) 38 | import CGrep.FileTypeMap ( 39 | FileTypeInfo, 40 | contextFilter, 41 | fileTypeLookup, 42 | ) 43 | import CGrep.Output (Output, mkOutputElements) 44 | import CGrep.Parser.Chunk (Chunk, cToken, parseChunks) 45 | 46 | import Data.Foldable (Foldable (toList)) 47 | import Reader (Env (..), ReaderIO) 48 | import System.IO (stderr) 49 | import System.Posix.FilePath (RawFilePath) 50 | import Verbose (putMsgLnVerbose) 51 | 52 | search :: Maybe (FileType, FileTypeInfo) -> RawFilePath -> [Text8] -> ReaderIO [Output] 53 | search info f patterns = do 54 | Env{..} <- ask 55 | 56 | text <- liftIO $ getTargetContents f 57 | 58 | let filename = getTargetName f 59 | 60 | -- transform text 61 | 62 | let ctxFilter = mkContextFilter opt 63 | 64 | let [text''', _, _, _] = 65 | scanr 66 | ($) 67 | text 68 | [ expandMultiline opt 69 | , contextFilter (fst <$> fileTypeLookup opt filename) ctxFilter False 70 | , ignoreCase opt 71 | ] 72 | 73 | -- parse source code, get the Cpp.Token list... 74 | 75 | tokens' = parseChunks (snd <$> info) text''' 76 | 77 | -- filter tokens... 78 | 79 | patterns' = map C.unpack patterns 80 | matches = filter (\t -> any (\p -> p ~== C.unpack (cToken t)) patterns') (toList tokens') 81 | 82 | putMsgLnVerbose 2 stderr $ "strategy : running edit-distance (Levenshtein) search on " <> filename <> "..." 83 | putMsgLnVerbose 3 stderr $ "---\n" <> text''' <> "\n---" 84 | 85 | putMsgLnVerbose 2 stderr $ "tokens : " <> show tokens' 86 | putMsgLnVerbose 2 stderr $ "matches : " <> show matches 87 | 88 | let lineOffsets = getAllLineOffsets text 89 | 90 | mkOutputElements lineOffsets filename text text''' matches 91 | -------------------------------------------------------------------------------- /src/CGrep/Strategy/Regex.hs: -------------------------------------------------------------------------------- 1 | -- 2 | -- Copyright (c) 2013-2023 Nicola Bonelli 3 | -- 4 | -- This program is free software; you can redistribute it and/or modify 5 | -- it under the terms of the GNU General Public License as published by 6 | -- the Free Software Foundation; either version 2 of the License, or 7 | -- (at your option) any later version. 8 | -- 9 | -- This program is distributed in the hope that it will be useful, 10 | -- but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | -- GNU General Public License for more details. 13 | -- 14 | -- You should have received a copy of the GNU General Public License 15 | -- along with this program; if not, write to the Free Software 16 | -- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 17 | -- 18 | 19 | module CGrep.Strategy.Regex (search) where 20 | 21 | import qualified Data.ByteString.Char8 as C 22 | 23 | import Control.Monad.IO.Class (MonadIO (liftIO)) 24 | import Control.Monad.Trans.Reader (ask, reader) 25 | 26 | import Text.Regex.Base ( 27 | AllTextMatches (getAllTextMatches), 28 | MatchText, 29 | ) 30 | import Text.Regex.PCRE ((=~)) 31 | import Text.Regex.Posix ((=~)) 32 | 33 | import Data.Array (Array, elems) 34 | 35 | import CGrep.Common ( 36 | Text8, 37 | expandMultiline, 38 | getTargetContents, 39 | getTargetName, 40 | ignoreCase, 41 | ) 42 | import CGrep.ContextFilter (mkContextFilter) 43 | import CGrep.FileType (FileType) 44 | import CGrep.FileTypeMap (FileTypeInfo (..), contextFilter, fileTypeLookup) 45 | import CGrep.Output (Output, mkOutputElements) 46 | 47 | import Options (Options (regex_pcre)) 48 | import Reader (Env (..), ReaderIO) 49 | import Verbose (putMsgLnVerbose) 50 | 51 | import CGrep.Parser.Chunk 52 | import CGrep.Parser.Line (getAllLineOffsets) 53 | 54 | import System.IO (stderr) 55 | import System.Posix.FilePath (RawFilePath) 56 | 57 | search :: Maybe (FileType, FileTypeInfo) -> RawFilePath -> [Text8] -> ReaderIO [Output] 58 | search info f patterns = do 59 | Env{..} <- ask 60 | 61 | text <- liftIO $ getTargetContents f 62 | 63 | let filename = getTargetName f 64 | 65 | -- transform text 66 | 67 | let ctxFilter = mkContextFilter opt 68 | 69 | let [text''', _, _, _] = 70 | scanr 71 | ($) 72 | text 73 | [ expandMultiline opt 74 | , contextFilter (fst <$> fileTypeLookup opt filename) ctxFilter False 75 | , ignoreCase opt 76 | ] 77 | 78 | -- search for matching tokens 79 | 80 | (=~~~) = if regex_pcre opt then (Text.Regex.PCRE.=~) else (Text.Regex.Posix.=~) 81 | 82 | tokens = 83 | map (\(str, (off, _)) -> Chunk ChunkUnspec str (fromIntegral off)) $ 84 | concatMap elems $ 85 | patterns >>= (\p -> elems (getAllTextMatches $ text''' =~~~ p :: (Array Int) (MatchText Text8))) 86 | 87 | putMsgLnVerbose 2 stderr $ "strategy : running regex " <> (if regex_pcre opt then "(pcre)" else "(posix)") <> " search on " <> filename <> "..." 88 | putMsgLnVerbose 3 stderr $ "---\n" <> text''' <> "\n---" 89 | putMsgLnVerbose 2 stderr $ "tokens : " <> show tokens 90 | 91 | let lineOffsets = getAllLineOffsets text 92 | 93 | mkOutputElements lineOffsets filename text text''' tokens 94 | -------------------------------------------------------------------------------- /src/CGrep/Strategy/Semantic.hs: -------------------------------------------------------------------------------- 1 | -- 2 | -- Copyright (c) 2013-2023 Nicola Bonelli 3 | -- 4 | -- This program is free software; you can redistribute it and/or modify 5 | -- it under the terms of the GNU General Public License as published by 6 | -- the Free Software Foundation; either version 2 of the License, or 7 | -- (at your option) any later version. 8 | -- 9 | -- This program is distributed in the hope that it will be useful, 10 | -- but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | -- GNU General Public License for more details. 13 | -- 14 | -- You should have received a copy of the GNU General Public License 15 | -- along with this program; if not, write to the Free Software 16 | -- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 17 | -- 18 | 19 | module CGrep.Strategy.Semantic (search) where 20 | 21 | import CGrep.Parser.Token 22 | import qualified Data.ByteString.Char8 as C 23 | 24 | import CGrep.Common ( 25 | Text8, 26 | expandMultiline, 27 | getTargetContents, 28 | getTargetName, 29 | ignoreCase, 30 | subText, 31 | trim, 32 | trim8, 33 | ) 34 | import CGrep.ContextFilter ( 35 | contextBitComment, 36 | mkContextFilter, 37 | (~!), 38 | ) 39 | 40 | import CGrep.Output (Output, mkOutputElements, runSearch) 41 | import CGrep.Parser.Line (getAllLineOffsets) 42 | import CGrep.Search (eligibleForSearch, searchStringIndices) 43 | 44 | import CGrep.Parser.Atom ( 45 | Atom (..), 46 | combineAtoms, 47 | filterTokensWithAtoms, 48 | mkAtomFromToken, 49 | ) 50 | 51 | import Control.Monad.IO.Class (MonadIO (liftIO)) 52 | import Control.Monad.Trans.Reader (ask, reader) 53 | 54 | import Data.Function (on) 55 | import Data.List (nub, sortBy) 56 | import Data.Maybe (mapMaybe) 57 | 58 | import CGrep.Parser.Chunk 59 | import Reader (Env (..), ReaderIO) 60 | import Util (rmQuote8) 61 | import Verbose (putMsgLnVerbose) 62 | 63 | import System.Posix.FilePath (RawFilePath, takeBaseName) 64 | 65 | import CGrep.FileType (FileType) 66 | import CGrep.FileTypeMap ( 67 | FileTypeInfo, 68 | contextFilter, 69 | fileTypeLookup, 70 | ) 71 | import System.IO (stderr) 72 | 73 | import Data.Coerce (coerce) 74 | import Data.Foldable (Foldable (toList)) 75 | import qualified Data.Sequence as S 76 | 77 | search :: Maybe (FileType, FileTypeInfo) -> RawFilePath -> [Text8] -> ReaderIO [Output] 78 | search info f ps = do 79 | Env{..} <- ask 80 | 81 | text <- liftIO $ getTargetContents f 82 | 83 | let filename = getTargetName f 84 | 85 | let [text''', _, text', _] = 86 | scanr 87 | ($) 88 | text 89 | [ expandMultiline opt 90 | , contextFilter (fst <$> fileTypeLookup opt filename) filt True 91 | , ignoreCase opt 92 | ] 93 | 94 | filt = mkContextFilter opt ~! contextBitComment 95 | 96 | -- pre-process patterns 97 | 98 | pfilter = 99 | TokenFilter 100 | { tfIdentifier = True 101 | , tfKeyword = True 102 | , tfNativeType = True 103 | , tfString = True 104 | , tfNumber = True 105 | , tfOperator = True 106 | , tfBracket = True 107 | } 108 | 109 | patterns = map (parseTokens pfilter (snd <$> info) . contextFilter (fst <$> fileTypeLookup opt filename) filt True) ps 110 | patterns' = map (mkAtomFromToken <$>) patterns 111 | patterns'' = map (combineAtoms . map (: [])) (toList <$> patterns') 112 | 113 | identifiers = 114 | mapMaybe 115 | ( \case 116 | Raw (Token (Chunk ChunkString xs _)) -> Just (rmQuote8 $ trim8 xs) 117 | Raw (Token (Chunk ChunkIdentifier "OR" _)) -> Nothing 118 | Raw t -> Just (tToken t) 119 | _ -> Nothing 120 | ) 121 | (concatMap toList patterns') 122 | 123 | -- put banners... 124 | 125 | putMsgLnVerbose 2 stderr $ "strategy : running generic semantic search on " <> filename <> "..." 126 | putMsgLnVerbose 2 stderr $ "atoms : " <> show patterns'' <> " -> identifiers: " <> show identifiers 127 | putMsgLnVerbose 3 stderr $ "---\n" <> text''' <> "\n---" 128 | 129 | let indices' = searchStringIndices identifiers text' 130 | 131 | runSearch opt filename (eligibleForSearch identifiers indices') $ do 132 | -- parse source code, get the Generic.Chunk list... 133 | 134 | let tfilter = mkTokenFilter $ cTyp . coerce <$> concatMap toList patterns 135 | 136 | let tokens = toList $ parseTokens tfilter (snd <$> info) (subText indices' text''') 137 | 138 | -- get matching tokens ... 139 | 140 | let tokens' = sortBy (compare `on` tOffset) $ nub $ concatMap (\ms -> filterTokensWithAtoms opt ms tokens) patterns'' 141 | 142 | -- convert Tokens to Chunks 143 | 144 | let matches = coerce tokens' :: [Chunk] 145 | 146 | putMsgLnVerbose 2 stderr $ "tokens : " <> show tokens 147 | putMsgLnVerbose 2 stderr $ "matches : " <> show matches 148 | 149 | let lineOffsets = getAllLineOffsets text 150 | 151 | mkOutputElements lineOffsets filename text text''' matches 152 | -------------------------------------------------------------------------------- /src/CGrep/Strategy/Tokenizer.hs: -------------------------------------------------------------------------------- 1 | -- 2 | -- Copyright (c) 2013-2023 Nicola Bonelli 3 | -- 4 | -- This program is free software; you can redistribute it and/or modify 5 | -- it under the terms of the GNU General Public License as published by 6 | -- the Free Software Foundation; either version 2 of the License, or 7 | -- (at your option) any later version. 8 | -- 9 | -- This program is distributed in the hope that it will be useful, 10 | -- but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | -- GNU General Public License for more details. 13 | -- 14 | -- You should have received a copy of the GNU General Public License 15 | -- along with this program; if not, write to the Free Software 16 | -- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 17 | -- 18 | 19 | module CGrep.Strategy.Tokenizer (search) where 20 | 21 | import Control.Monad.IO.Class (MonadIO (liftIO)) 22 | import Control.Monad.Trans.Reader (ask, reader) 23 | import qualified Data.ByteString.Char8 as C 24 | 25 | import CGrep.Common ( 26 | Text8, 27 | expandMultiline, 28 | getTargetContents, 29 | getTargetName, 30 | ignoreCase, 31 | subText, 32 | ) 33 | import CGrep.ContextFilter ( 34 | contextBitComment, 35 | mkContextFilter, 36 | (~!), 37 | ) 38 | import CGrep.Distance ((~==)) 39 | import CGrep.Output (Output, mkOutputElements, runSearch) 40 | 41 | import CGrep.Parser.Line 42 | import CGrep.Parser.Token 43 | 44 | import CGrep.FileType (FileType) 45 | import CGrep.FileTypeMap ( 46 | FileTypeInfo, 47 | contextFilter, 48 | fileTypeLookup, 49 | ) 50 | 51 | import CGrep.Search (eligibleForSearch, searchStringIndices) 52 | import Data.List (isInfixOf, isPrefixOf, isSuffixOf) 53 | 54 | import Options ( 55 | Options ( 56 | edit_dist, 57 | identifier, 58 | keyword, 59 | nativeType, 60 | number, 61 | operator, 62 | prefix_match, 63 | string, 64 | suffix_match, 65 | word_match 66 | ), 67 | ) 68 | import Reader (Env (..), ReaderIO) 69 | import Verbose (putMsgLnVerbose) 70 | 71 | import CGrep.Parser.Chunk (Chunk (..)) 72 | import System.IO (stderr) 73 | import System.Posix.FilePath (RawFilePath) 74 | 75 | import CGrep.Types (Offset) 76 | import Data.Coerce (coerce) 77 | import Data.Foldable (Foldable (toList)) 78 | 79 | import qualified Data.Sequence as S 80 | import Util (mapMaybe') 81 | 82 | search :: Maybe (FileType, FileTypeInfo) -> RawFilePath -> [Text8] -> ReaderIO [Output] 83 | search info f ps = do 84 | Env{..} <- ask 85 | 86 | text <- liftIO $ getTargetContents f 87 | 88 | let filename = getTargetName f 89 | 90 | -- transform text 91 | 92 | let filt = mkContextFilter opt ~! contextBitComment 93 | 94 | let [text''', _, text', _] = 95 | scanr 96 | ($) 97 | text 98 | [ expandMultiline opt 99 | , contextFilter (fst <$> fileTypeLookup opt filename) filt True 100 | , ignoreCase opt 101 | ] 102 | 103 | putMsgLnVerbose 2 stderr $ "strategy: running token search on " <> filename <> "..." 104 | putMsgLnVerbose 3 stderr $ "---\n" <> text''' <> "\n---" 105 | 106 | let indices' = searchStringIndices ps text' 107 | 108 | runSearch opt filename (eligibleForSearch ps indices') $ do 109 | -- parse source code, get the token list... 110 | 111 | let tfilter = 112 | TokenFilter 113 | { tfIdentifier = identifier opt 114 | , tfKeyword = keyword opt 115 | , tfNativeType = nativeType opt 116 | , tfString = string opt 117 | , tfNumber = number opt 118 | , tfOperator = operator opt 119 | , tfBracket = False 120 | } 121 | 122 | let tokens = {-# SCC tok_0 #-} parseTokens tfilter (snd <$> info) (subText indices' text''') 123 | 124 | -- filter tokens and make chunks 125 | 126 | matches = {-# SCC tok_3 #-} mapMaybe' (tokenizerFilter opt ps) tokens 127 | 128 | putMsgLnVerbose 2 stderr $ "tokens : " <> show tokens 129 | putMsgLnVerbose 2 stderr $ "matches : " <> show matches 130 | 131 | let lineOffsets = getAllLineOffsets text 132 | 133 | mkOutputElements lineOffsets filename text text''' matches 134 | 135 | tokenizerFilter :: Options -> [C.ByteString] -> Token -> Maybe Chunk 136 | tokenizerFilter opt patterns token 137 | | isTokenUnspecified token = Nothing 138 | | tokenPredicate opt patterns token = Just $ coerce token 139 | | otherwise = Nothing 140 | {-# INLINE tokenizerFilter #-} 141 | 142 | tokenPredicate :: Options -> [C.ByteString] -> Token -> Bool 143 | tokenPredicate opt patterns tokens 144 | | edit_dist opt = (\t -> any (\p -> C.unpack p ~== (C.unpack . tToken) t) patterns) tokens 145 | | word_match opt = ((`elem` patterns) . tToken) tokens 146 | | prefix_match opt = ((\t -> any (`C.isPrefixOf` t) patterns) . tToken) tokens 147 | | suffix_match opt = ((\t -> any (`C.isSuffixOf` t) patterns) . tToken) tokens 148 | | otherwise = ((\t -> any (`C.isInfixOf` t) patterns) . tToken) tokens 149 | -------------------------------------------------------------------------------- /src/CGrep/Types.hs: -------------------------------------------------------------------------------- 1 | -- 2 | -- Copyright (c) 2013-2023 Nicola Bonelli 3 | -- 4 | -- This program is free software; you can redistribute it and/or modify 5 | -- it under the terms of the GNU General Public License as published by 6 | -- the Free Software Foundation; either version 2 of the License, or 7 | -- (at your option) any later version. 8 | -- 9 | -- This program is distributed in the hope that it will be useful, 10 | -- but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | -- GNU General Public License for more details. 13 | -- 14 | -- You should have received a copy of the GNU General Public License 15 | -- along with this program; if not, write to the Free Software 16 | -- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 17 | -- 18 | 19 | module CGrep.Types ( 20 | Offset, 21 | Text8, 22 | LText8, 23 | ) where 24 | 25 | import Data.ByteString.Char8 as C (ByteString) 26 | import Data.ByteString.Lazy.Char8 as LC (ByteString) 27 | import Data.Int (Int64) 28 | 29 | type Offset = Int64 30 | type Text8 = C.ByteString 31 | type LText8 = LC.ByteString 32 | -------------------------------------------------------------------------------- /src/CmdOptions.hs: -------------------------------------------------------------------------------- 1 | -- 2 | -- Copyright (c) 2013-2023 Nicola Bonelli 3 | -- 4 | -- This program is free software; you can redistribute it and/or modify 5 | -- it under the terms of the GNU General Public License as published by 6 | -- the Free Software Foundation; either version 2 of the License, or 7 | -- (at your option) any later version. 8 | -- 9 | -- This program is distributed in the hope that it will be useful, 10 | -- but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | -- GNU General Public License for more details. 13 | -- 14 | -- You should have received a copy of the GNU General Public License 15 | -- along with this program; if not, write to the Free Software 16 | -- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 17 | -- 18 | 19 | module CmdOptions ( 20 | options, 21 | ) where 22 | 23 | import Data.Version (showVersion) 24 | import System.Console.CmdArgs ( 25 | CmdArgs, 26 | Mode, 27 | args, 28 | cmdArgsMode, 29 | explicit, 30 | groupname, 31 | help, 32 | name, 33 | program, 34 | summary, 35 | typ, 36 | (&=), 37 | ) 38 | 39 | import Options (Options (..)) 40 | import Paths_cgrep (version) 41 | 42 | options :: Mode (CmdArgs Options) 43 | options = 44 | cmdArgsMode $ 45 | Options 46 | { file = "" &= typ "FILE" &= groupname "Pattern" &= help "Read PATTERNs from file (one per line)" 47 | , word_match = False &= help "Force word matching" &= explicit &= name "word" &= name "w" 48 | , prefix_match = False &= help "Force prefix matching" &= explicit &= name "prefix" &= name "p" 49 | , suffix_match = False &= help "Force suffix matching" &= explicit &= name "suffix" &= name "s" 50 | , edit_dist = False &= help "Use edit distance" &= explicit &= name "edit" &= name "e" 51 | , regex_posix = False &= help "Use regex matching (posix)" &= explicit &= name "G" &= name "regex" 52 | , regex_pcre = False &= help "Use regex matching (pcre)" &= explicit &= name "P" &= name "pcre" 53 | , ignore_case = False &= help "Ignore case distinctions" 54 | , code = False &= groupname "\nContext filters" &= help "Enable search in source code" &= explicit &= name "c" &= name "code" 55 | , comment = False &= help "Enable search in comments" &= explicit &= name "m" &= name "comment" 56 | , literal = False &= help "Enable search in string literals" &= explicit &= name "l" &= name "literal" 57 | , identifier = False &= groupname "\nToken filters" &= help "Identifiers" &= explicit &= name "identifier" &= name "name" 58 | , nativeType = False &= help "Native Types" &= explicit &= name "native" &= name "type" 59 | , keyword = False &= help "Keywords" &= explicit &= name "keyword" 60 | , number = False &= help "Literal numbers" &= explicit &= name "number" 61 | , string = False &= help "Literal strings" &= explicit &= name "string" 62 | , operator = False &= help "Operators" &= explicit &= name "op" 63 | , type_filter = [] &= groupname "\nFile filters" &= help "Specify file types. ie: Cpp, +Haskell, -Makefile" 64 | , kind_filter = [] &= help "Specify file kinds. Text, Config, Language, Data, Markup or Script" 65 | , code_only = False &= help "Parse code modules only (skip headers/interfaces)" &= explicit &= name "code-only" 66 | , hdr_only = False &= help "Parse headers/interfaces only (skip modules)" &= explicit &= name "hdr-only" 67 | , skip_test = False &= help "Skip files that have 'test' in the name" &= explicit &= name "skip-test" &= name "T" 68 | , prune_dir = [] &= help "Do not descend into dir" &= explicit &= name "prune-dir" 69 | , recursive = False &= help "Enable recursive search (don't follow symlinks)" &= explicit &= name "recursive" &= name "r" 70 | , follow = False &= help "Follow symlinks" &= explicit &= name "follow" &= name "L" 71 | , semantic = False &= groupname "\nSemantic" &= help "\"code\" pattern: _, _1, _2... (identifiers), $, $1, $2... (optionals), ANY, KEY, STR, LIT, NUM, HEX, OCT, OR" &= explicit &= name "S" &= name "semantic" 72 | , max_count = maxBound &= groupname "\nControl" &= help "Stop search in files after INT matches" &= explicit &= name "max-count" 73 | , type_force = Nothing &= help "Force the type of file" &= explicit &= name "force-type" 74 | , type_map = False &= help "List the supported file types" &= explicit &= name "type-list" 75 | , invert_match = False &= help "Select non-matching lines" &= explicit &= name "invert-match" &= name "v" 76 | , multiline = 1 &= help "Enable multi-line matching" 77 | , jobs = Nothing &= help "Number threads to run in parallel" &= explicit &= name "threads" &= name "j" 78 | , show_match = False &= groupname "\nOutput format" &= help "Show list of matching tokens" &= explicit &= name "show-match" 79 | , color = False &= help "Use colors to highlight the match strings" &= explicit &= name "color" 80 | , no_color = False &= help "Do not use colors (override config file)" &= explicit &= name "no-color" 81 | , no_filename = False &= help "Suppress the file name prefix on output" &= explicit &= name "h" &= name "no-filename" 82 | , no_numbers = False &= help "Suppress both line and column numbers on output" &= explicit &= name "no-numbers" 83 | , no_column = False &= help "Suppress the column number on output" &= explicit &= name "no-column" 84 | , count = False &= help "Print only a count of matching lines per file" &= explicit &= name "count" 85 | , filename_only = False &= help "Print only the name of files containing matches" &= explicit &= name "filename-only" 86 | , vim = False &= help "Run vim editor passing the files that match" &= explicit &= name "vim" 87 | , editor = False &= help "Run the editor specified by EDITOR var., passing the files that match" &= explicit &= name "editor" 88 | , fileline = False &= help "When edit option is specified, pass the list of matching files in file:line format (e.g. vim 'file-line' plugin)" &= explicit &= name "fileline" 89 | , json = False &= help "Format output as json object" &= explicit &= name "json" 90 | , verbose = 0 &= groupname "\nMiscellaneous" &= help "Verbose level: 1, 2 or 3" &= explicit &= name "verbose" 91 | , no_shallow = False &= help "Disable shallow-search" &= explicit &= name "no-shallow" 92 | , show_palette = False &= help "Show color palette" &= explicit &= name "palette" 93 | , others = [] &= args 94 | } 95 | &= summary ("Cgrep " <> showVersion version <> ". Usage: cgrep [OPTION] [PATTERN] files...") 96 | &= program "cgrep" 97 | -------------------------------------------------------------------------------- /src/Config.hs: -------------------------------------------------------------------------------- 1 | -- 2 | -- Copyright (c) 2013-2023 Nicola Bonelli 3 | -- 4 | -- This program is free software; you can redistribute it and/or modify 5 | -- it under the terms of the GNU General Public License as published by 6 | -- the Free Software Foundation; either version 2 of the License, or 7 | -- (at your option) any later version. 8 | -- 9 | -- This program is distributed in the hope that it will be useful, 10 | -- but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | -- GNU General Public License for more details. 13 | -- 14 | -- You should have received a copy of the GNU General Public License 15 | -- along with this program; if not, write to the Free Software 16 | -- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 17 | -- 18 | {-# LANGUAGE DeriveGeneric #-} 19 | 20 | module Config ( 21 | Config (..), 22 | dumpPalette, 23 | getConfig, 24 | ) where 25 | 26 | import Control.Monad (MonadPlus (mzero), filterM, forM_) 27 | import System.Console.ANSI ( 28 | Color (Blue, Cyan, Green, Magenta, Red, White, Yellow), 29 | ColorIntensity (Vivid), 30 | ConsoleIntensity (BoldIntensity), 31 | ConsoleLayer (Foreground), 32 | SGR (SetColor, SetConsoleIntensity), 33 | setSGRCode, 34 | ) 35 | import System.Directory (doesFileExist, getHomeDirectory) 36 | 37 | import System.Console.ANSI.Types ( 38 | Color (Blue, Cyan, Green, Magenta, Red, White, Yellow), 39 | ColorIntensity (Vivid), 40 | ConsoleIntensity (BoldIntensity), 41 | ConsoleLayer (Foreground), 42 | SGR (SetColor, SetConsoleIntensity, SetPaletteColor), 43 | xterm6LevelRGB, 44 | ) 45 | 46 | import Data.Aeson (FromJSON (parseJSON), (.!=), (.:?)) 47 | import Data.Maybe (fromMaybe, mapMaybe) 48 | import qualified Data.Yaml as Y 49 | 50 | import CGrep.FileType (FileType) 51 | import GHC.Generics (Generic) 52 | 53 | import qualified Data.ByteString as B 54 | import qualified Data.ByteString.Char8 as C 55 | import Data.ByteString.RawFilePath (RawFilePath) 56 | import Data.List.Split (splitOn) 57 | import System.FilePath (()) 58 | 59 | import CGrep.FileKind (FileKind) 60 | import Data.List.Extra (notNull) 61 | import Text.Read (readMaybe) 62 | 63 | cgreprc :: FilePath 64 | cgreprc = "cgreprc" 65 | 66 | data Config = Config 67 | { configFileTypes :: [FileType] 68 | , configFileKinds :: [FileKind] 69 | , configPruneDirs :: [RawFilePath] 70 | , configColors :: Bool 71 | , configColorFile :: [SGR] 72 | , configColorMatch :: [SGR] 73 | , configFileLine :: Bool 74 | , configJobs :: Maybe Int 75 | } 76 | deriving stock (Show, Read) 77 | 78 | defaultConfig :: Config 79 | defaultConfig = 80 | Config 81 | { configFileTypes = [] 82 | , configFileKinds = [] 83 | , configPruneDirs = [] 84 | , configColors = False 85 | , configColorFile = [SetConsoleIntensity BoldIntensity, SetColor Foreground Vivid Blue] 86 | , configColorMatch = [SetConsoleIntensity BoldIntensity] 87 | , configFileLine = False 88 | , configJobs = Nothing 89 | } 90 | 91 | mkConfig :: YamlConfig -> Config 92 | mkConfig YamlConfig{..} = 93 | let configFileTypes = mapMaybe readMaybe yamlFileTypes 94 | configFileKinds = mapMaybe readMaybe yamlFileKinds 95 | configPruneDirs = C.pack <$> yamlPruneDirs 96 | configColors = yamlColors 97 | configColorFile = fromMaybe [] (yamlColorFileName >>= readColor) 98 | configColorMatch = fromMaybe [] (yamlColorMatch >>= readColor) 99 | configFileLine = yamlFileLine 100 | configJobs = yamlJobs 101 | in Config{..} 102 | 103 | data YamlConfig = YamlConfig 104 | { yamlFileTypes :: [String] 105 | , yamlFileKinds :: [String] 106 | , yamlPruneDirs :: [String] 107 | , yamlColors :: Bool 108 | , yamlColorFileName :: Maybe String 109 | , yamlColorMatch :: Maybe String 110 | , yamlFileLine :: Bool 111 | , yamlJobs :: Maybe Int 112 | } 113 | deriving stock (Show, Generic) 114 | 115 | instance Y.FromJSON YamlConfig where 116 | parseJSON (Y.Object v) = 117 | YamlConfig 118 | <$> v .:? "file_types" .!= [] 119 | <*> v .:? "file_kinds" .!= [] 120 | <*> v .:? "prune_dirs" .!= [] 121 | <*> v .:? "colors" .!= False 122 | <*> v .:? "color_filename" .!= Nothing 123 | <*> v .:? "color_match" .!= Nothing 124 | <*> v .:? "file_line" .!= False 125 | <*> v .:? "threads" .!= Nothing 126 | parseJSON _ = mzero 127 | 128 | getConfig :: IO (Config, Maybe FilePath) 129 | getConfig = do 130 | home <- getHomeDirectory 131 | confs <- filterM doesFileExist [cgreprc, "." <> cgreprc, home "." <> cgreprc, "/etc" cgreprc] 132 | if notNull confs 133 | then do 134 | conf <- Y.decodeFileEither (head confs) 135 | case conf of 136 | Left e -> errorWithoutStackTrace $ "CGrep:" <> Y.prettyPrintParseException e 137 | Right yconf -> return (mkConfig yconf, Just (head confs)) 138 | else return (defaultConfig, Nothing) 139 | 140 | readColor :: String -> Maybe [SGR] 141 | readColor "Bold" = Just [SetConsoleIntensity BoldIntensity] 142 | readColor "Red" = Just [SetConsoleIntensity BoldIntensity, SetColor Foreground Vivid Red] 143 | readColor "Green" = Just [SetConsoleIntensity BoldIntensity, SetColor Foreground Vivid Green] 144 | readColor "Yellow" = Just [SetConsoleIntensity BoldIntensity, SetColor Foreground Vivid Yellow] 145 | readColor "Blue" = Just [SetConsoleIntensity BoldIntensity, SetColor Foreground Vivid Blue] 146 | readColor "Magenta" = Just [SetConsoleIntensity BoldIntensity, SetColor Foreground Vivid Magenta] 147 | readColor "Cyan" = Just [SetConsoleIntensity BoldIntensity, SetColor Foreground Vivid Cyan] 148 | readColor "White" = Just [SetConsoleIntensity BoldIntensity, SetColor Foreground Vivid White] 149 | readColor "Orange" = Just [SetConsoleIntensity BoldIntensity, SetPaletteColor Foreground $ xterm6LevelRGB 5 2 0] 150 | readColor "Acqua" = Just [SetConsoleIntensity BoldIntensity, SetPaletteColor Foreground $ xterm6LevelRGB 2 5 4] 151 | readColor xs = case splitOn ":" xs of 152 | [r, g, b] -> Just [SetConsoleIntensity BoldIntensity, SetPaletteColor Foreground $ xterm6LevelRGB (read r) (read g) (read b)] 153 | _ -> Nothing 154 | 155 | dumpPalette :: IO () 156 | dumpPalette = do 157 | let palette = [(r, g, b) | r <- [0 .. 5], g <- [0 .. 5], b <- [0 .. 5]] 158 | forM_ palette $ \(r, g, b) -> do 159 | putStrLn $ setSGRCode [SetConsoleIntensity BoldIntensity, SetPaletteColor Foreground $ xterm6LevelRGB r g b] <> "COLOR " <> show r <> ":" <> show g <> ":" <> show b <> setSGRCode [] 160 | -------------------------------------------------------------------------------- /src/Main.hs: -------------------------------------------------------------------------------- 1 | -- 2 | -- Copyright (c) 2013-2023 Nicola Bonelli 3 | -- 4 | -- This program is free software; you can redistribute it and/or modify 5 | -- it under the terms of the GNU General Public License as published by 6 | -- the Free Software Foundation; either version 2 of the License, or 7 | -- (at your option) any later version. 8 | -- 9 | -- This program is distributed in the hope that it will be useful, 10 | -- but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | -- GNU General Public License for more details. 13 | -- 14 | -- You should have received a copy of the GNU General Public License 15 | -- along with this program; if not, write to the Free Software 16 | -- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 17 | -- 18 | 19 | module Main where 20 | 21 | import qualified Codec.Binary.UTF8.String as UC 22 | import qualified Data.ByteString.Char8 as C 23 | 24 | import Data.Char (toLower) 25 | import Data.List (elemIndex, genericLength, isInfixOf, isPrefixOf, isSuffixOf, nub, partition, sort, union, (\\)) 26 | import Data.Maybe (catMaybes) 27 | import Data.Version (showVersion) 28 | 29 | import Control.Monad (void, when) 30 | import Control.Monad.Trans.Reader (ReaderT (runReaderT), ask) 31 | 32 | import qualified Data.Map as M 33 | import GHC.Conc (getNumCapabilities, setNumCapabilities) 34 | import GHC.IO.Handle (hIsTerminalDevice) 35 | 36 | import System.Console.CmdArgs (cmdArgsRun) 37 | import System.Environment (withArgs) 38 | import System.Exit (exitSuccess) 39 | import System.IO (stderr, stdin, stdout) 40 | 41 | import CGrep.Common (trim8) 42 | import CGrep.FileType (readKindList, readTypeList) 43 | import CGrep.FileTypeMap (dumpFileTypeInfoMap, fileTypeInfoMap) 44 | import CGrep.Parser.Atom (wildCardMap) 45 | 46 | import CmdOptions (options) 47 | import Config ( 48 | Config (configColors, configFileKinds, configFileTypes, configJobs), 49 | dumpPalette, 50 | getConfig, 51 | ) 52 | import Options (Options (..)) 53 | import Paths_cgrep (version) 54 | import Verbose (putMsgLnVerbose) 55 | 56 | import Reader (Env (..), ReaderIO) 57 | import Search (isRegexp, parallelSearch) 58 | import System.Posix.FilePath (RawFilePath) 59 | import Util (partitionM) 60 | 61 | import Control.Applicative (Alternative ((<|>))) 62 | import Data.Functor (void, ($>)) 63 | import Data.List.Extra (notNull) 64 | 65 | main :: IO () 66 | main = do 67 | -- check whether this is a terminal device 68 | isTermIn <- hIsTerminalDevice stdin 69 | isTermOut <- hIsTerminalDevice stdout 70 | 71 | -- read config options 72 | (conf, _) <- getConfig 73 | 74 | -- read command-line options 75 | opt@Options{..} <- 76 | ( if isTermOut 77 | then \o -> o{color = color o || configColors conf} 78 | else id 79 | ) 80 | <$> cmdArgsRun options 81 | 82 | -- check for multiple backends... 83 | when (length (catMaybes [if json then Just "" else Nothing]) > 1) $ 84 | errorWithoutStackTrace "CGrep: you can use one back-end at time!" 85 | 86 | -- display lang-map and exit... 87 | when type_map $ 88 | dumpFileTypeInfoMap fileTypeInfoMap >> exitSuccess 89 | 90 | -- display color palette and exit... 91 | when show_palette $ 92 | dumpPalette >> exitSuccess 93 | 94 | -- check whether the pattern list is empty, display help message if it's the case 95 | when (null others && isTermIn && null file) $ 96 | withArgs ["--help"] $ 97 | void (cmdArgsRun options) 98 | 99 | let others' = C.pack <$> others 100 | 101 | -- load patterns 102 | patterns <- 103 | if null file 104 | then pure $ readPatternsFromCommandLine others' 105 | else readPatternsFromFile (C.pack file) 106 | 107 | let patterns' = map (if ignore_case then ic else id) patterns 108 | where 109 | ic 110 | | (not . isRegexp) opt && semantic = C.unwords . map (\p -> if p `elem` wildCardTokens then p else C.map toLower p) . C.words 111 | | otherwise = C.map toLower 112 | where 113 | wildCardTokens = "OR" : M.keys wildCardMap -- "OR" is not included in wildCardMap 114 | 115 | -- display the configuration in use 116 | 117 | -- when (isJust confpath) $ 118 | -- hPutStrLn stderr $ showBold opt ("Using '" <> fromJust confpath <> "' configuration file...") 119 | 120 | -- load files to parse: 121 | let paths = getFilePaths (notNull file) others' 122 | 123 | -- parse cmd line language list: 124 | let (l0, l1, l2) = readTypeList type_filter 125 | 126 | -- file type enabled: 127 | let types = (if null l0 then configFileTypes conf else l0 `union` l1) \\ l2 128 | kinds = if null kind_filter then configFileKinds conf else readKindList kind_filter 129 | 130 | runReaderT 131 | ( do 132 | putMsgLnVerbose 1 stderr $ "cgrep " <> showVersion version <> "!" 133 | putMsgLnVerbose 1 stderr $ "File types: " <> show type_filter 134 | putMsgLnVerbose 1 stderr $ "File kinds: " <> show kinds 135 | ) 136 | (Env conf opt) 137 | 138 | -- specify number of cores 139 | cap <- case jobs <|> configJobs conf of 140 | (Just j) -> setNumCapabilities (j + 1) $> j 141 | Nothing -> getNumCapabilities 142 | 143 | -- run search 144 | runReaderT (parallelSearch paths patterns' types kinds isTermIn) (Env conf opt{jobs = Just cap}) 145 | 146 | readPatternsFromFile :: RawFilePath -> IO [C.ByteString] 147 | readPatternsFromFile "" = return [] 148 | readPatternsFromFile f = map trim8 . C.lines <$> C.readFile (C.unpack f) 149 | 150 | readPatternsFromCommandLine :: [C.ByteString] -> [C.ByteString] 151 | readPatternsFromCommandLine [] = [] 152 | readPatternsFromCommandLine xs 153 | | ":" `elem` xs = takeWhile (/= ":") xs 154 | | otherwise = [head xs] 155 | 156 | getFilePaths :: Bool -> [RawFilePath] -> [RawFilePath] 157 | getFilePaths False xs = case ":" `elemIndex` xs of 158 | Nothing -> if null xs then [] else tail xs 159 | (Just n) -> drop (n + 1) xs 160 | getFilePaths True xs = xs -------------------------------------------------------------------------------- /src/Options.hs: -------------------------------------------------------------------------------- 1 | -- 2 | -- Copyright (c) 2013-2023 Nicola Bonelli 3 | -- 4 | -- This program is free software; you can redistribute it and/or modify 5 | -- it under the terms of the GNU General Public License as published by 6 | -- the Free Software Foundation; either version 2 of the License, or 7 | -- (at your option) any later version. 8 | -- 9 | -- This program is distributed in the hope that it will be useful, 10 | -- but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | -- GNU General Public License for more details. 13 | -- 14 | -- You should have received a copy of the GNU General Public License 15 | -- along with this program; if not, write to the Free Software 16 | -- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 17 | -- 18 | {-# LANGUAGE DeriveDataTypeable #-} 19 | 20 | module Options where 21 | 22 | import Data.Data (Data, Typeable) 23 | 24 | data Options = Options 25 | -- Pattern: 26 | { file :: String 27 | , word_match :: Bool 28 | , prefix_match :: Bool 29 | , suffix_match :: Bool 30 | , edit_dist :: Bool 31 | , regex_posix :: Bool 32 | , regex_pcre :: Bool 33 | , ignore_case :: Bool 34 | , -- Context: 35 | code :: Bool 36 | , comment :: Bool 37 | , literal :: Bool 38 | , -- Token filters: 39 | identifier :: Bool 40 | , nativeType :: Bool 41 | , keyword :: Bool 42 | , number :: Bool 43 | , string :: Bool 44 | , operator :: Bool 45 | , -- File filters: 46 | type_filter :: [String] 47 | , kind_filter :: [String] 48 | , code_only :: Bool 49 | , hdr_only :: Bool 50 | , skip_test :: Bool 51 | , prune_dir :: [FilePath] 52 | , recursive :: Bool 53 | , follow :: Bool 54 | , -- Semantic: 55 | semantic :: Bool 56 | , -- Control: 57 | max_count :: Int 58 | , type_force :: Maybe String 59 | , type_map :: Bool 60 | , invert_match :: Bool 61 | , multiline :: Int 62 | , jobs :: Maybe Int 63 | , -- Output format: 64 | show_match :: Bool 65 | , color :: Bool 66 | , no_color :: Bool 67 | , no_filename :: Bool 68 | , no_numbers :: Bool 69 | , no_column :: Bool 70 | , count :: Bool 71 | , filename_only :: Bool 72 | , json :: Bool 73 | , vim :: Bool 74 | , editor :: Bool 75 | , fileline :: Bool 76 | , -- Misc: 77 | verbose :: Int 78 | , no_shallow :: Bool 79 | , show_palette :: Bool 80 | , others :: [String] 81 | } 82 | deriving stock (Data, Typeable, Show) 83 | -------------------------------------------------------------------------------- /src/Reader.hs: -------------------------------------------------------------------------------- 1 | -- 2 | -- Copyright (c) 2013-2023 Nicola Bonelli 3 | -- 4 | -- This program is free software; you can redistribute it and/or modify 5 | -- it under the terms of the GNU General Public License as published by 6 | -- the Free Software Foundation; either version 2 of the License, or 7 | -- (at your option) any later version. 8 | -- 9 | -- This program is distributed in the hope that it will be useful, 10 | -- but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | -- GNU General Public License for more details. 13 | -- 14 | -- You should have received a copy of the GNU General Public License 15 | -- along with this program; if not, write to the Free Software 16 | -- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 17 | -- 18 | 19 | module Reader where 20 | 21 | import Control.Monad.Trans.Reader (ReaderT) 22 | 23 | import Config (Config) 24 | import Options (Options) 25 | 26 | data Env = Env 27 | { conf :: Config 28 | , opt :: Options 29 | } 30 | 31 | type ReaderIO = ReaderT Env IO 32 | -------------------------------------------------------------------------------- /src/Search.hs: -------------------------------------------------------------------------------- 1 | -- 2 | -- Copyright (c) 2013-2023 Nicola Bonelli 3 | -- 4 | -- This program is free software; you can redistribute it and/or modify 5 | -- it under the terms of the GNU General Public License as published by 6 | -- the Free Software Foundation; either version 2 of the License, or 7 | -- (at your option) any later version. 8 | -- 9 | -- This program is distributed in the hope that it will be useful, 10 | -- but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | -- GNU General Public License for more details. 13 | -- 14 | -- You should have received a copy of the GNU General Public License 15 | -- along with this program; if not, write to the Free Software 16 | -- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 17 | -- nc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 18 | -- 19 | 20 | module Search ( 21 | parallelSearch, 22 | isRegexp, 23 | ) where 24 | 25 | import Data.Function (fix) 26 | import Data.List (elemIndex, intersperse, isPrefixOf, isSuffixOf, partition) 27 | import Data.List.Split (chunksOf) 28 | import qualified Data.Map as M 29 | import Data.Maybe (catMaybes, fromJust, fromMaybe, isJust) 30 | import qualified Data.Set as S 31 | 32 | import Control.Concurrent (MVar, forkIO, forkOn, putMVar, threadDelay) 33 | import Control.Exception as E (SomeException, catch) 34 | 35 | import Control.Applicative ( 36 | Alternative ((<|>)), 37 | Applicative (liftA2), 38 | ) 39 | import Control.Monad (forM, forM_, forever, replicateM_, unless, void, when) 40 | import Control.Monad.Trans (MonadIO (liftIO)) 41 | import Control.Monad.Trans.Except (runExceptT, throwE) 42 | import Control.Monad.Trans.Reader ( 43 | ReaderT (runReaderT), 44 | ask, 45 | local, 46 | reader, 47 | ) 48 | 49 | import System.Environment (lookupEnv) 50 | import System.IO ( 51 | BufferMode (BlockBuffering), 52 | hPutStrLn, 53 | hSetBinaryMode, 54 | hSetBuffering, 55 | stderr, 56 | stdin, 57 | stdout, 58 | ) 59 | import System.PosixCompat.Files as PC ( 60 | FileStatus, 61 | getFileStatus, 62 | getSymbolicLinkStatus, 63 | isDirectory, 64 | ) 65 | 66 | import System.Process (runProcess, waitForProcess) 67 | 68 | import CGrep.Common (Text8, getTargetName, takeN) 69 | import CGrep.Output ( 70 | Output (..), 71 | putOutputElements, 72 | showFileName, 73 | ) 74 | import Config ( 75 | Config ( 76 | Config, 77 | configColorFile, 78 | configColorMatch, 79 | configColors, 80 | configFileLine, 81 | configFileTypes, 82 | configPruneDirs 83 | ), 84 | dumpPalette, 85 | ) 86 | import Options (Options (..)) 87 | import Reader ( 88 | Env (..), 89 | ReaderIO, 90 | ) 91 | 92 | import qualified Data.ByteString as B 93 | import qualified Data.ByteString.Builder as B 94 | import qualified Data.ByteString.Builder.Extra as B 95 | 96 | import qualified Codec.Binary.UTF8.String as UC 97 | import qualified Data.ByteString.Char8 as C 98 | import qualified Data.ByteString.Lazy.Char8 as LB 99 | 100 | import qualified Data.Bifunctor 101 | import Data.Tuple.Extra () 102 | 103 | import Data.Vector ((!)) 104 | import qualified Data.Vector as V hiding ((!)) 105 | 106 | import Control.Concurrent.Chan.Unagi.Bounded ( 107 | newChan, 108 | readChan, 109 | writeChan, 110 | ) 111 | import Data.IORef ( 112 | IORef, 113 | atomicModifyIORef, 114 | atomicModifyIORef', 115 | modifyIORef, 116 | modifyIORef', 117 | newIORef, 118 | readIORef, 119 | ) 120 | 121 | import Control.Arrow (Arrow ((&&&))) 122 | import Control.Concurrent.Async (Async, async, asyncOn, forConcurrently, forConcurrently_, mapConcurrently_, wait) 123 | import Data.Functor (void, ($>), (<&>)) 124 | import RawFilePath.Directory (doesDirectoryExist) 125 | import System.Directory (canonicalizePath, makeAbsolute) 126 | import System.Posix.Directory.Foreign (dtDir) 127 | import System.Posix.Directory.Traversals (getDirectoryContents) 128 | import System.Posix.FilePath (RawFilePath, takeBaseName, ()) 129 | 130 | import qualified CGrep.Strategy.BoyerMoore as BoyerMoore 131 | import qualified CGrep.Strategy.Levenshtein as Levenshtein 132 | import qualified CGrep.Strategy.Regex as Regex 133 | import qualified CGrep.Strategy.Semantic as Semantic 134 | import qualified CGrep.Strategy.Tokenizer as Tokenizer 135 | import Control.Monad.Catch (MonadCatch (catch), SomeException) 136 | import Control.Monad.IO.Class (MonadIO (liftIO)) 137 | 138 | import CGrep.FileType (FileType) 139 | import CGrep.FileTypeMap ( 140 | FileTypeInfo, 141 | fileTypeInfoLookup, 142 | fileTypeLookup, 143 | ) 144 | 145 | import CGrep.FileKind (FileKind) 146 | import Control.Concurrent.MVar (newMVar, takeMVar) 147 | import Control.Monad.Loops (whileM_) 148 | import Data.IORef.Extra (atomicWriteIORef') 149 | import qualified Data.List.NonEmpty as NE (unzip) 150 | import Verbose (putMsgLn, putMsgLnVerbose) 151 | 152 | withRecursiveContents :: 153 | Options -> 154 | RawFilePath -> 155 | [FileType] -> 156 | [FileKind] -> 157 | [RawFilePath] -> 158 | S.Set RawFilePath -> 159 | IORef Int -> 160 | ([RawFilePath] -> IO ()) -> 161 | IO () 162 | withRecursiveContents opt@Options{..} dir fTypes fKinds pdirs visited walkers action = do 163 | xs <- getDirectoryContents dir 164 | let (dirs, files) = partition ((== dtDir) . fst) xs 165 | 166 | -- filter the list of files 167 | let files' = (dir ) . snd <$> filter (\f -> fileFilter opt fTypes fKinds (snd f) && (not skip_test || isNotTestFile (snd f))) files 168 | let dirs' = (dir ) . snd <$> dirs 169 | 170 | -- run IO action 171 | mapM_ action (chunksOf 8 files') 172 | 173 | -- process directories recursively... 174 | foreach <- 175 | readIORef walkers >>= \tot -> do 176 | if tot < 64 177 | then pure (forConcurrently_ @[]) 178 | else pure forM_ 179 | 180 | foreach dirs' $ \dirPath -> do 181 | unless (isPrunableDir dirPath pdirs) $ 182 | -- this is a good directory, unless already visited... 183 | -- this is a good directory, unless already visited... 184 | -- this is a good directory, unless already visited... 185 | -- this is a good directory, unless already visited... 186 | 187 | -- this is a good directory, unless already visited... 188 | 189 | -- this is a good directory, unless already visited... 190 | 191 | -- this is a good directory, unless already visited... 192 | -- this is a good directory, unless already visited... 193 | 194 | -- this is a good directory, unless already visited... 195 | makeRawAbsolute dirPath >>= \cpath -> 196 | unless (cpath `S.member` visited) $ 197 | incrRef walkers 198 | *> withRecursiveContents opt dirPath fTypes fKinds pdirs (S.insert cpath visited) walkers action 199 | 200 | decrRef walkers 201 | 202 | parallelSearch :: [RawFilePath] -> [C.ByteString] -> [FileType] -> [FileKind] -> Bool -> ReaderIO () 203 | parallelSearch paths patterns fTypes fKinds isTermIn = do 204 | Env{..} <- ask 205 | 206 | let Config{..} = conf 207 | Options{..} = opt 208 | 209 | let multiplier = 4 210 | jobs' = fromMaybe 1 jobs 211 | totalJobs = jobs' * multiplier 212 | 213 | -- create channels ... 214 | fileCh <- liftIO $ newChan 65536 215 | 216 | -- recursively traverse the filesystem ... 217 | _ <- liftIO . forkOn 0 $ do 218 | walkers <- newIORef (0 :: Int) 219 | if recursive || follow 220 | then forM_ (if null paths then ["."] else paths) $ \p -> 221 | doesDirectoryExist p >>= \case 222 | True -> 223 | incrRef walkers 224 | *> withRecursiveContents 225 | opt 226 | p 227 | fTypes 228 | fKinds 229 | (mkPrunableDirName <$> configPruneDirs <> (C.pack <$> prune_dir)) 230 | (S.singleton p) 231 | walkers 232 | ( do 233 | writeChan (fst fileCh) 234 | ) 235 | _ -> writeChan (fst fileCh) [p] 236 | else 237 | forM_ 238 | ( if null paths && not isTermIn 239 | then [("", 0)] 240 | else paths `zip` [0 ..] 241 | ) 242 | (\(p, idx) -> writeChan (fst fileCh) [p]) 243 | 244 | -- enqueue EOF messages... 245 | when (verbose > 0) $ putMsgLn @Text8 stderr "filesystem traversal completed!" 246 | replicateM_ totalJobs $ writeChan (fst fileCh) [] 247 | 248 | -- launch the worker threads... 249 | matchingFiles <- liftIO $ newIORef S.empty 250 | 251 | let env = Env conf opt 252 | runSearch = getSearcher env 253 | 254 | workers <- forM ([0 .. totalJobs - 1] :: [Int]) $ \idx -> do 255 | let processor = 1 + idx `div` multiplier 256 | liftIO . asyncOn processor $ void . runExceptT $ do 257 | asRef <- liftIO $ newIORef ([] :: [Async ()]) 258 | forever $ do 259 | fs <- liftIO $ readChan (snd fileCh) 260 | liftIO $ 261 | E.catch 262 | ( case fs of 263 | [] -> liftIO $ readIORef asRef >>= mapM_ wait 264 | fs -> 265 | runReaderT 266 | ( do 267 | out <- 268 | catMaybes 269 | <$> forM 270 | fs 271 | ( \f -> do 272 | out' <- take max_count <$> runSearch (fileTypeInfoLookup opt f) f patterns 273 | when (vim || editor) $ 274 | liftIO $ 275 | mapM_ (modifyIORef matchingFiles . S.insert . (outFilePath &&& outLineNumb)) out' 276 | putOutputElements out' 277 | ) 278 | unless (null out) $ 279 | liftIO $ 280 | async 281 | ( do 282 | let !dump = LB.toStrict $ B.toLazyByteString (mconcat ((<> B.char8 '\n') <$> out)) 283 | B.hPut stdout dump 284 | ) 285 | >>= \a -> modifyIORef' asRef (a :) 286 | ) 287 | env 288 | ) 289 | ( \e -> 290 | let msg = show (e :: SomeException) 291 | in C.hPutStrLn stderr (showFileName conf opt (getTargetName (head fs)) <> ": error: " <> C.pack (takeN 120 msg)) 292 | ) 293 | when (null fs) $ do 294 | when (verbose > 0) $ putMsgLn stderr $ "[" <> C.pack (show idx) <> "]@" <> C.pack (show processor) <> " searcher done!" 295 | throwE () 296 | 297 | -- wait workers to complete the job 298 | liftIO $ mapM_ wait workers 299 | 300 | -- run editor... 301 | when (vim || editor) $ liftIO $ do 302 | editor' <- 303 | if vim 304 | then return (Just "vim") 305 | else lookupEnv "EDITOR" 306 | 307 | files <- S.toList <$> readIORef matchingFiles 308 | let filesUnpacked = Data.Bifunctor.first C.unpack <$> files 309 | 310 | let editFiles = 311 | ( if fileline || configFileLine 312 | then fmap (\(a, b) -> a <> ":" <> show b) 313 | else fmap fst 314 | ) 315 | filesUnpacked 316 | 317 | putStrLn $ "cgrep: open files " <> unwords editFiles <> "..." 318 | 319 | void $ 320 | runProcess 321 | (fromJust $ editor' <|> Just "vi") 322 | editFiles 323 | Nothing 324 | Nothing 325 | (Just stdin) 326 | (Just stdout) 327 | (Just stderr) 328 | >>= waitForProcess 329 | 330 | getSearcher :: Env -> (Maybe (FileType, FileTypeInfo) -> RawFilePath -> [Text8] -> ReaderIO [Output]) 331 | getSearcher Env{..} = do 332 | if 333 | | (not . isRegexp) opt && not (hasTokenizerOpt opt) && not (semantic opt) && edit_dist opt -> Levenshtein.search 334 | | (not . isRegexp) opt && not (hasTokenizerOpt opt) && not (semantic opt) -> BoyerMoore.search 335 | | (not . isRegexp) opt && semantic opt -> Semantic.search 336 | | (not . isRegexp) opt -> Tokenizer.search 337 | | isRegexp opt -> Regex.search 338 | | otherwise -> undefined 339 | 340 | makeRawAbsolute :: RawFilePath -> IO RawFilePath 341 | makeRawAbsolute p = makeAbsolute (C.unpack p) <&> C.pack 342 | {-# INLINE makeRawAbsolute #-} 343 | 344 | incrRef :: IORef Int -> IO () 345 | incrRef ref = atomicModifyIORef' ref (\n -> (n + 1, ())) 346 | {-# INLINE incrRef #-} 347 | 348 | decrRef :: IORef Int -> IO () 349 | decrRef ref = atomicModifyIORef' ref (\n -> (n - 1, ())) 350 | {-# INLINE decrRef #-} 351 | 352 | fileFilter :: Options -> [FileType] -> [FileKind] -> RawFilePath -> Bool 353 | fileFilter opt fTypes fKinds filename = fileFilterTypes typ && fileFilterKinds kin 354 | where 355 | (typ, kin) = NE.unzip $ fileTypeLookup opt filename 356 | fileFilterTypes = maybe False (liftA2 (||) (const $ null fTypes) (`elem` fTypes)) 357 | fileFilterKinds = maybe False (liftA2 (||) (const $ null fKinds) (`elem` fKinds)) 358 | 359 | isNotTestFile :: RawFilePath -> Bool 360 | isNotTestFile f = 361 | let fs = [("_test" `C.isSuffixOf`), ("-test" `C.isSuffixOf`), ("test-" `C.isPrefixOf`), ("test_" `C.isPrefixOf`), ("test" ==)] :: [C.ByteString -> Bool] 362 | in not $ any ($ takeBaseName f) fs 363 | {-# INLINE isNotTestFile #-} 364 | 365 | isPrunableDir :: RawFilePath -> [RawFilePath] -> Bool 366 | isPrunableDir dir = any (`C.isSuffixOf` pdir) 367 | where 368 | pdir = mkPrunableDirName dir 369 | {-# INLINE isPrunableDir #-} 370 | 371 | mkPrunableDirName :: RawFilePath -> RawFilePath 372 | mkPrunableDirName xs 373 | | "/" `C.isSuffixOf` xs = xs 374 | | otherwise = xs <> "/" 375 | {-# INLINE mkPrunableDirName #-} 376 | 377 | (.!.) :: V.Vector a -> Int -> a 378 | v .!. i = v ! (i `mod` V.length v) 379 | {-# INLINE (.!.) #-} 380 | 381 | hasFileType :: RawFilePath -> Options -> [FileType] -> Bool 382 | hasFileType path opt xs = isJust $ fileTypeLookup opt path >>= (\(typ, _) -> typ `elemIndex` xs) 383 | {-# INLINE hasFileType #-} 384 | 385 | hasTokenizerOpt :: Options -> Bool 386 | hasTokenizerOpt Options{..} = 387 | identifier 388 | || nativeType 389 | || keyword 390 | || number 391 | || string 392 | || operator 393 | 394 | isRegexp :: Options -> Bool 395 | isRegexp opt = regex_posix opt || regex_pcre opt 396 | {-# INLINE isRegexp #-} 397 | -------------------------------------------------------------------------------- /src/Util.hs: -------------------------------------------------------------------------------- 1 | -- 2 | -- Copyright (c) 2013-2023 Nicola Bonelli 3 | -- 4 | -- This program is free software; you can redistribute it and/or modify 5 | -- it under the terms of the GNU General Public License as published by 6 | -- the Free Software Foundation; either version 2 of the License, or 7 | -- (at your option) any later version. 8 | -- 9 | -- This program is distributed in the hope that it will be useful, 10 | -- but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | -- GNU General Public License for more details. 13 | -- 14 | -- You should have received a copy of the GNU General Public License 15 | -- along with this program; if not, write to the Free Software 16 | -- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 17 | -- 18 | 19 | module Util where 20 | 21 | import Data.Char (toLower) 22 | import Data.Maybe (listToMaybe) 23 | 24 | import qualified Data.ByteString.Char8 as C 25 | import Data.Sequence (Seq (Empty, (:<|), (:|>)), (|>)) 26 | import qualified Data.Sequence as S 27 | import Text.Read (readMaybe) 28 | 29 | partitionM :: (Monad m) => (a -> m Bool) -> [a] -> m ([a], [a]) 30 | partitionM _ [] = return ([], []) 31 | partitionM f (x : xs) = do 32 | res <- f x 33 | (as, bs) <- partitionM f xs 34 | return ([x | res] <> as, [x | not res] <> bs) 35 | {-# INLINE partitionM #-} 36 | 37 | xor :: Bool -> Bool -> Bool 38 | a `xor` b = a && not b || not a && b 39 | {-# INLINE xor #-} 40 | 41 | prettyRead :: (Read a) => String -> String -> a 42 | prettyRead xs err = 43 | case readMaybe xs of 44 | Just v -> v 45 | _ -> errorWithoutStackTrace $ err <> ": parse error near '" <> take 40 xs <> "'" 46 | 47 | spanGroup :: Int -> [a] -> [[a]] 48 | spanGroup _ [] = [] 49 | spanGroup 1 xs = map (: []) xs 50 | spanGroup n xs = take n xs : spanGroup n (tail xs) 51 | {-# INLINE spanGroup #-} 52 | 53 | spanGroupSeq :: Int -> S.Seq a -> [S.Seq a] 54 | spanGroupSeq _ S.Empty = [] 55 | spanGroupSeq 1 xs = [xs] 56 | spanGroupSeq n xs = S.take n xs : spanGroupSeq n (S.drop 1 xs) 57 | {-# INLINE spanGroupSeq #-} 58 | 59 | rmQuote :: String -> String 60 | rmQuote [] = [] 61 | rmQuote [x] = [x] 62 | rmQuote y@(x : xs) 63 | | x == '"' || x == '\'' = 64 | if x == last xs 65 | then init xs 66 | else y 67 | | otherwise = y 68 | {-# INLINE rmQuote #-} 69 | 70 | rmQuote8 :: C.ByteString -> C.ByteString 71 | rmQuote8 b 72 | | C.length b < 2 = b 73 | | otherwise = 74 | case C.uncons b of 75 | Just (x, xs) -> if (x == '"' || x == '\'') && (x == C.last b) then C.init xs else b 76 | _ -> b 77 | {-# INLINE rmQuote8 #-} 78 | 79 | mapMaybe' :: (Foldable f) => (a -> Maybe b) -> f a -> [b] 80 | mapMaybe' f = foldr g [] 81 | where 82 | g x rest 83 | | Just y <- f x = y : rest 84 | | otherwise = rest 85 | 86 | findWithIndex :: forall a. (a -> Bool) -> [a] -> (# Int, Maybe a #) 87 | findWithIndex predicate = go predicate 0 88 | where 89 | go :: forall a. (a -> Bool) -> Int -> [a] -> (# Int, Maybe a #) 90 | go p _ [] = (# 0, Nothing #) 91 | go p !index (x : xs) 92 | | p x = (# index, Just x #) 93 | | otherwise = go p (index + 1) xs -------------------------------------------------------------------------------- /src/Verbose.hs: -------------------------------------------------------------------------------- 1 | -- 2 | -- Copyright (c) 2013-2023 Nicola Bonelli 3 | -- 4 | -- This program is free software; you can redistribute it and/or modify 5 | -- it under the terms of the GNU General Public License as published by 6 | -- the Free Software Foundation; either version 2 of the License, or 7 | -- (at your option) any later version. 8 | -- 9 | -- This program is distributed in the hope that it will be useful, 10 | -- but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | -- GNU General Public License for more details. 13 | -- 14 | -- You should have received a copy of the GNU General Public License 15 | -- along with this program; if not, write to the Free Software 16 | -- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 17 | -- 18 | 19 | module Verbose where 20 | 21 | import Control.Monad.Trans.Reader ( reader ) 22 | import Control.Monad.IO.Class ( MonadIO(liftIO) ) 23 | import Control.Monad ( when ) 24 | 25 | import Options ( Options(verbose) ) 26 | import Reader ( ReaderIO, Env(..) ) 27 | 28 | import qualified Data.ByteString as C (hPutStr, hPut) 29 | import GHC.IO.Handle ( Handle ) 30 | import System.IO ( Handle, hPutStrLn, hPutStr ) 31 | import Data.String ( IsString ) 32 | 33 | import qualified Data.ByteString.Char8 as C 34 | import qualified Data.Text as T 35 | import qualified Data.Text.IO as T 36 | 37 | 38 | class (IsString a) => PutStr a where 39 | putStringLn :: Handle -> a -> IO () 40 | putString :: Handle -> a -> IO () 41 | 42 | instance PutStr String where 43 | putStringLn = hPutStrLn 44 | putString = hPutStr 45 | 46 | instance PutStr C.ByteString where 47 | putStringLn = C.hPutStrLn 48 | putString = C.hPutStr 49 | 50 | instance PutStr T.Text where 51 | putStringLn = T.hPutStrLn 52 | putString = T.hPutStr 53 | 54 | 55 | putMsgLnVerbose :: (PutStr a) => Int -> Handle -> a -> ReaderIO () 56 | putMsgLnVerbose l h xs = do 57 | n <- reader $ verbose . opt 58 | when (n >= l) $ 59 | liftIO $ putStringLn h xs 60 | {-# INLINE putMsgLnVerbose #-} 61 | 62 | 63 | putMsgLn :: (PutStr a, MonadIO m) => Handle -> a -> m () 64 | putMsgLn h xs = 65 | liftIO $ putStringLn h xs 66 | {-# INLINE putMsgLn #-} -------------------------------------------------------------------------------- /stack.yaml: -------------------------------------------------------------------------------- 1 | # This file was automatically generated by 'stack init' 2 | # 3 | # Some commonly used options have been documented as comments in this file. 4 | # For advanced use and comprehensive documentation of the format, please see: 5 | # http://docs.haskellstack.org/en/stable/yaml_configuration/ 6 | 7 | # Resolver to choose a 'specific' stackage snapshot or a compiler version. 8 | # A snapshot resolver dictates the compiler version and the set of packages 9 | # to be used for project dependencies. For example: 10 | # 11 | # resolver: lts-3.5 12 | # resolver: nightly-2015-09-21 13 | # resolver: ghc-7.10.2 14 | # resolver: ghcjs-0.1.0_ghc-7.10.2 15 | # resolver: 16 | # name: custom-snapshot 17 | # location: "./custom-snapshot.yaml" 18 | resolver: lts-21.25 19 | 20 | # User packages to be built. 21 | # Various formats can be used as shown in the example below. 22 | # 23 | # packages: 24 | # - some-directory 25 | # - https://example.com/foo/bar/baz-0.0.2.tar.gz 26 | # - location: 27 | # git: https://github.com/commercialhaskell/stack.git 28 | # commit: e7b331f14bcffb8367cd58fbfc8b40ec7642100a 29 | # - location: https://github.com/commercialhaskell/stack/commit/e7b331f14bcffb8367cd58fbfc8b40ec7642100a 30 | # extra-dep: true 31 | # subdirs: 32 | # - auto-update 33 | # - wai 34 | # 35 | # A package marked 'extra-dep: true' will only be built if demanded by a 36 | # non-dependency (i.e. a user package), and its test suites and benchmarks 37 | # will not be run. This is useful for tweaking upstream packages. 38 | packages: 39 | - '.' 40 | # Dependency packages to be pulled from upstream that are not in the resolver 41 | # (e.g., acme-missiles-0.3) 42 | extra-deps: 43 | - bitwise-1.0.0.1 44 | 45 | # Override default flag values for local packages and extra-deps 46 | flags: {} 47 | 48 | # Extra package databases containing global packages 49 | extra-package-dbs: [] 50 | 51 | # Control whether we use the GHC we find on the path 52 | # system-ghc: true 53 | # 54 | # Require a specific version of stack, using version ranges 55 | # require-stack-version: -any # Default 56 | # require-stack-version: ">=1.3" 57 | # 58 | # Override the architecture used by stack, especially useful on Windows 59 | # arch: i386 60 | # arch: x86_64 61 | # 62 | # Extra directories used by stack for building 63 | # extra-include-dirs: [/path/to/dir] 64 | # extra-lib-dirs: [/path/to/dir] 65 | # 66 | # Allow a newer minor version of GHC than the snapshot specifies 67 | # compiler-check: newer-minor 68 | -------------------------------------------------------------------------------- /test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # 2 | # Lorem ipsum dolor sit. Amet perferendis metus feugiat. Suspendisse massa egestas quam. 3 | # Morbi vivamus dolor nisl mauris ultricies molestie lacus. Proin ad nullam id integer. 4 | # 5 | 6 | message(STATUS "Sed etiam a suspendisse. \"Aliquam nulla erat risus.\"") 7 | -------------------------------------------------------------------------------- /test/Makefile: -------------------------------------------------------------------------------- 1 | # 2 | # Lorem ipsum dolor sit. Amet perferendis metus feugiat. Suspendisse massa egestas quam. 3 | # Morbi vivamus dolor nisl mauris ultricies molestie lacus. Proin ad nullam id integer. 4 | # 5 | 6 | all: 7 | @echo "Sed etiam a suspendisse. \"Aliquam nulla erat risus.\"" 8 | @echo 'Sed etiam a suspendisse. "Aliquam nulla erat risus."' 9 | -------------------------------------------------------------------------------- /test/test.c: -------------------------------------------------------------------------------- 1 | /* hello world */ 2 | int 3 | main(int argc, char *argv[]) 4 | { 5 | const char * x = "hello world"; 6 | int ab = 10; 7 | return 0; 8 | } 9 | 10 | -------------------------------------------------------------------------------- /test/test.chpl: -------------------------------------------------------------------------------- 1 | // 2 | // Lorem ipsum dolor sit. Amet perferendis metus feugiat. Suspendisse massa egestas quam. 3 | // Morbi vivamus dolor nisl mauris ultricies molestie lacus. Proin ad nullam id integer. 4 | // 5 | 6 | /* 7 | * Eget orci rutrum vel. Elit nullam amet integer. Fusce tellus ut massa. Maecenas risus dictum risus. 8 | * Augue aliquam molestie id. Commodo ultricies pede massa fusce ullamcorper dapibus dui. 9 | * Maecenas elementum duis porttitor facilisis lectus eleifend nec. Arcu et pellentesque 10 | * tellus non tristique suscipit nec. Tempor iaculis orci nec enim ac. 11 | */ 12 | 13 | writeln("Sed etiam a suspendisse. \"Aliquam nulla erat risus.\""); 14 | 15 | -------------------------------------------------------------------------------- /test/test.coffee: -------------------------------------------------------------------------------- 1 | # 2 | # Lorem ipsum dolor sit. Amet perferendis metus feugiat. Suspendisse massa egestas quam. 3 | # Morbi vivamus dolor nisl mauris ultricies molestie lacus. Proin ad nullam id integer. 4 | # 5 | 6 | ### 7 | 8 | Eget orci rutrum vel. Elit nullam amet integer. Fusce tellus ut massa. Maecenas risus dictum risus. 9 | Augue aliquam molestie id. Commodo ultricies pede massa fusce ullamcorper dapibus dui. 10 | Maecenas elementum duis porttitor facilisis lectus eleifend nec. Arcu et pellentesque 11 | tellus non tristique suscipit nec. Tempor iaculis orci nec enim ac. 12 | 13 | ### 14 | 15 | console.log "Sed etiam a suspendisse. \"Aliquam nulla erat risus.\" " 16 | 17 | -------------------------------------------------------------------------------- /test/test.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Lorem ipsum dolor sit. Amet perferendis metus feugiat. Suspendisse massa egestas quam. 3 | // Morbi vivamus dolor nisl mauris ultricies molestie lacus. Proin ad nullam id integer. 4 | // 5 | 6 | /* 7 | * Eget orci rutrum vel. Elit nullam amet integer. Fusce tellus ut massa. Maecenas risus dictum risus. 8 | * Augue aliquam molestie id. Commodo ultricies pede massa fusce ullamcorper dapibus dui. 9 | * Maecenas elementum duis porttitor facilisis lectus eleifend nec. Arcu et pellentesque 10 | * tellus non tristique suscipit nec. Tempor iaculis orci nec enim ac. 11 | */ 12 | 13 | int 14 | main(int argc, char *argv[]) 15 | { 16 | char a = 'a', b = '"', c = '\'', d = '\n', e = '"'; 17 | 18 | const char * msg = "Sed etiam a suspendisse. \"Aliquam nulla erat risus.\""; 19 | const char * raw = R"P(This is a raw string)P"; 20 | return 0; 21 | } 22 | 23 | -------------------------------------------------------------------------------- /test/test.dhall: -------------------------------------------------------------------------------- 1 | {- This is an example Dhall configuration file 2 | 3 | Can you spot the mistake? 4 | 5 | Fix the typo, then move onto the "Definitions" 6 | example 7 | -} 8 | 9 | { home = "/home/bill" 10 | , privateKey = "/home/bill/.ssh/id_ed25519" 11 | , publicKey = "/home/blil/.ssh/id_ed25519.pub" 12 | } 13 | -------------------------------------------------------------------------------- /test/test.erl: -------------------------------------------------------------------------------- 1 | % 2 | % Lorem ipsum dolor sit. Amet perferendis metus feugiat. Suspendisse massa egestas quam. 3 | % Morbi vivamus dolor nisl mauris ultricies molestie lacus. Proin ad nullam id integer. 4 | % 5 | 6 | 7 | io:format("Sed etiam a suspendisse. \"Aliquam nulla erat risus.\"~n"). 8 | 9 | 10 | -------------------------------------------------------------------------------- /test/test.fs: -------------------------------------------------------------------------------- 1 | // 2 | // Lorem ipsum dolor sit. Amet perferendis metus feugiat. Suspendisse massa egestas quam. 3 | // Morbi vivamus dolor nisl mauris ultricies molestie lacus. Proin ad nullam id integer. 4 | // 5 | 6 | (* 7 | * Eget orci rutrum vel. Elit nullam amet integer. Fusce tellus ut massa. Maecenas risus dictum risus. 8 | * Augue aliquam molestie id. Commodo ultricies pede massa fusce ullamcorper dapibus dui. 9 | * Maecenas elementum duis porttitor facilisis lectus eleifend nec. Arcu et pellentesque 10 | * tellus non tristique suscipit nec. Tempor iaculis orci nec enim ac. 11 | *) 12 | 13 | 14 | printfn "%s" "Sed etiam a suspendisse. \"Aliquam nulla erat risus.\""; 15 | printfn "%s" @"Sed etiam a suspendisse. ""Aliquam nulla erat risus."""; 16 | 17 | -------------------------------------------------------------------------------- /test/test.go: -------------------------------------------------------------------------------- 1 | // 2 | // Lorem ipsum dolor sit. Amet perferendis metus feugiat. Suspendisse massa egestas quam. 3 | // Morbi vivamus dolor nisl mauris ultricies molestie lacus. Proin ad nullam id integer. 4 | // 5 | 6 | /* 7 | * Eget orci rutrum vel. Elit nullam amet integer. Fusce tellus ut massa. Maecenas risus dictum risus. 8 | * Augue aliquam molestie id. Commodo ultricies pede massa fusce ullamcorper dapibus dui. 9 | * Maecenas elementum duis porttitor facilisis lectus eleifend nec. Arcu et pellentesque 10 | * tellus non tristique suscipit nec. Tempor iaculis orci nec enim ac. 11 | */ 12 | package main 13 | 14 | import "fmt" 15 | 16 | func main() { 17 | var a = `a` 18 | var b = `"` 19 | var c = "'" 20 | var d = "\n" 21 | 22 | var msg1 = "Sed etiam a suspendisse. \"Aliquam nulla erat risus.\"" 23 | var msg2 = `Sed etiam a suspendisse. "Aliquam nulla erat risus."\` 24 | 25 | fmt.Print(a, b, c, d, msg1, msg2) 26 | } 27 | -------------------------------------------------------------------------------- /test/test.h: -------------------------------------------------------------------------------- 1 | /* hello world */ 2 | static inline 3 | int fun(int argc, char *argv[]) 4 | { 5 | const char * x = "hello world"; 6 | int ab = 10; 7 | return 0; 8 | } 9 | 10 | -------------------------------------------------------------------------------- /test/test.hs: -------------------------------------------------------------------------------- 1 | -- 2 | -- Lorem ipsum dolor sit. Amet perferendis metus feugiat. Suspendisse massa egestas quam. 3 | -- Morbi vivamus dolor nisl mauris ultricies molestie lacus. Proin ad nullam id integer. 4 | -- 5 | 6 | {- 7 | Eget orci rutrum vel. Elit nullam amet integer. Fusce tellus ut massa. Maecenas risus dictum risus. 8 | Augue aliquam molestie id. Commodo ultricies pede massa fusce ullamcorper dapibus dui. 9 | Maecenas elementum duis porttitor facilisis lectus eleifend nec. Arcu et pellentesque 10 | tellus non tristique suscipit nec. Tempor iaculis orci nec enim ac. 11 | -} 12 | 13 | {-# LANGUAGE QuasiQuotes #-} 14 | 15 | import Data.String.Here 16 | 17 | main = do 18 | putStrLn "Sed etiam a suspendisse. \"Aliquam nulla erat risus.\"" 19 | putStrLn [here| Sed etiam a suspendisse. "Aliquam nulla erat risus." |] 20 | -------------------------------------------------------------------------------- /test/test.html: -------------------------------------------------------------------------------- 1 | 2 | 4 | 5 | 6 | 7 | -------------------------------------------------------------------------------- /test/test.ini: -------------------------------------------------------------------------------- 1 | ; last modified 1 April 2001 by John Doe 2 | [owner] 3 | name = John Doe 4 | organization = Acme Widgets Inc. 5 | 6 | [database] 7 | ; use IP address in case network name resolution is not working 8 | server = 192.0.2.62 9 | port = 143 10 | file = "payroll.dat" 11 | -------------------------------------------------------------------------------- /test/test.js: -------------------------------------------------------------------------------- 1 | // 2 | // Lorem ipsum dolor sit. Amet perferendis metus feugiat. Suspendisse massa egestas quam. 3 | // Morbi vivamus dolor nisl mauris ultricies molestie lacus. Proin ad nullam id integer. 4 | // 5 | 6 | /* 7 | * Eget orci rutrum vel. Elit nullam amet integer. Fusce tellus ut massa. Maecenas risus dictum risus. 8 | * Augue aliquam molestie id. Commodo ultricies pede massa fusce ullamcorper dapibus dui. 9 | * Maecenas elementum duis porttitor facilisis lectus eleifend nec. Arcu et pellentesque 10 | * tellus non tristique suscipit nec. Tempor iaculis orci nec enim ac. 11 | */ 12 | 13 | 14 | document.write("Sed etiam a suspendisse. \"Aliquam nulla erat risus.\""); 15 | 16 | -------------------------------------------------------------------------------- /test/test.lua: -------------------------------------------------------------------------------- 1 | -- 2 | -- Lorem ipsum dolor sit. Amet perferendis metus feugiat. Suspendisse massa egestas quam. 3 | -- Morbi vivamus dolor nisl mauris ultricies molestie lacus. Proin ad nullam id integer. 4 | -- 5 | 6 | --[[ 7 | Eget orci rutrum vel. Elit nullam amet integer. Fusce tellus ut massa. Maecenas risus dictum risus. 8 | Augue aliquam molestie id. Commodo ultricies pede massa fusce ullamcorper dapibus dui. 9 | Maecenas elementum duis porttitor facilisis lectus eleifend nec. Arcu et pellentesque 10 | tellus non tristique suscipit nec. Tempor iaculis orci nec enim ac. 11 | --]] 12 | 13 | print "Sed etiam a suspendisse. \"Aliquam nulla erat risus.\"" 14 | print 'Sed etiam a suspendisse. "Aliquam nulla erat risus.' 15 | 16 | print [===[Sed etiam a suspendisse. [==[ Aliquam nulla erat risus. ]===] 17 | print [==[Sed etiam a suspendisse. [=[ Aliquam nulla erat risus. ]==] 18 | print [=[Sed etiam a suspendisse. [[ Aliquam nulla erat risus. ]=] 19 | print [[Sed etiam a suspendisse. Aliquam nulla erat risus. ]] 20 | 21 | -------------------------------------------------------------------------------- /test/test.ml: -------------------------------------------------------------------------------- 1 | (* 2 | * Eget orci rutrum vel. Elit nullam amet integer. Fusce tellus ut massa. Maecenas risus dictum risus. 3 | * Augue aliquam molestie id. Commodo ultricies pede massa fusce ullamcorper dapibus dui. 4 | * Maecenas elementum duis porttitor facilisis lectus eleifend nec. Arcu et pellentesque 5 | * tellus non tristique suscipit nec. Tempor iaculis orci nec enim ac. 6 | *) 7 | 8 | 9 | print_endline "Sed etiam a suspendisse. \"Aliquam nulla erat risus.\""; 10 | 11 | -------------------------------------------------------------------------------- /test/test.php3: -------------------------------------------------------------------------------- 1 | // 2 | // Lorem ipsum dolor sit. Amet perferendis metus feugiat. Suspendisse massa egestas quam. 3 | // Morbi vivamus dolor nisl mauris ultricies molestie lacus. Proin ad nullam id integer. 4 | // 5 | 6 | # 7 | # Lorem ipsum dolor sit. Amet perferendis metus feugiat. Suspendisse massa egestas quam. 8 | # Morbi vivamus dolor nisl mauris ultricies molestie lacus. Proin ad nullam id integer. 9 | # 10 | 11 | /* 12 | * Eget orci rutrum vel. Elit nullam amet integer. Fusce tellus ut massa. Maecenas risus dictum risus. 13 | * Augue aliquam molestie id. Commodo ultricies pede massa fusce ullamcorper dapibus dui. 14 | * Maecenas elementum duis porttitor facilisis lectus eleifend nec. Arcu et pellentesque 15 | * tellus non tristique suscipit nec. Tempor iaculis orci nec enim ac. 16 | */ 17 | 18 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /test/test.pl: -------------------------------------------------------------------------------- 1 | # 2 | # Lorem ipsum dolor sit. Amet perferendis metus feugiat. Suspendisse massa egestas quam. 3 | # Morbi vivamus dolor nisl mauris ultricies molestie lacus. Proin ad nullam id integer. 4 | # 5 | 6 | =pod 7 | Eget orci rutrum vel. Elit nullam amet integer. Fusce tellus ut massa. Maecenas risus dictum risus. 8 | Augue aliquam molestie id. Commodo ultricies pede massa fusce ullamcorper dapibus dui. 9 | Maecenas elementum duis porttitor facilisis lectus eleifend nec. Arcu et pellentesque 10 | tellus non tristique suscipit nec. Tempor iaculis orci nec enim ac. 11 | =cut 12 | 13 | print "Sed etiam a suspendisse. \"Aliquam nulla erat risus.\""; 14 | print 'Sed etiam a suspendisse. "Aliquam nulla erat risus."'; 15 | 16 | -------------------------------------------------------------------------------- /test/test.py: -------------------------------------------------------------------------------- 1 | # 2 | # Lorem ipsum dolor sit. Amet perferendis metus feugiat. Suspendisse massa egestas quam. 3 | # Morbi vivamus dolor nisl mauris ultricies molestie lacus. Proin ad nullam id integer. 4 | # 5 | 6 | print """Sed etiam a suspendisse. \"Aliquam nulla erat risus. """ 7 | print '''Sed etiam a suspendisse. "Aliquam nulla erat risus. ''' 8 | 9 | print "Sed etiam a suspendisse. \"Aliquam nulla erat risus.\""; 10 | print 'Sed etiam a suspendisse. "Aliquam nulla erat risus."'; 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /test/test.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Lorem ipsum dolor sit. Amet perferendis metus feugiat. Suspendisse massa egestas quam. 3 | # Morbi vivamus dolor nisl mauris ultricies molestie lacus. Proin ad nullam id integer. 4 | # 5 | 6 | =begin 7 | Eget orci rutrum vel. Elit nullam amet integer. Fusce tellus ut massa. Maecenas risus dictum risus. 8 | Augue aliquam molestie id. Commodo ultricies pede massa fusce ullamcorper dapibus dui. 9 | Maecenas elementum duis porttitor facilisis lectus eleifend nec. Arcu et pellentesque 10 | tellus non tristique suscipit nec. Tempor iaculis orci nec enim ac. 11 | =end 12 | 13 | puts "Sed etiam a suspendisse. \"Aliquam nulla erat risus.\""; 14 | puts 'Sed etiam a suspendisse. \'Aliquam nulla erat risus.\''; 15 | puts %q(Sed etiam a suspendisse. 'Aliquam nulla erat risus.') 16 | puts %Q(Sed etiam a suspendisse. 'Aliquam nulla erat risus.') 17 | puts %|Sed etiam a suspendisse. 'Aliquam nulla erat risus.'| 18 | -------------------------------------------------------------------------------- /test/test.rs: -------------------------------------------------------------------------------- 1 | 2 | pub struct Gateway { 3 | config: Config, 4 | drain: drain::Watch, 5 | stack: Stack, 6 | } 7 | -------------------------------------------------------------------------------- /test/test.sh: -------------------------------------------------------------------------------- 1 | # 2 | # Lorem ipsum dolor sit. Amet perferendis metus feugiat. Suspendisse massa egestas quam. 3 | # Morbi vivamus dolor nisl mauris ultricies molestie lacus. Proin ad nullam id integer. 4 | # 5 | 6 | echo "Sed etiam a suspendisse. \"Aliquam nulla erat risus.\""; 7 | 8 | -------------------------------------------------------------------------------- /test/test.tex: -------------------------------------------------------------------------------- 1 | % 2 | % Lorem ipsum dolor sit. Amet perferendis metus feugiat. Suspendisse massa egestas quam. 3 | % Morbi vivamus dolor nisl mauris ultricies molestie lacus. Proin ad nullam id integer. 4 | % 5 | 6 | Sed etiam a suspendisse. "Aliquam nulla erat risus." 7 | 8 | -------------------------------------------------------------------------------- /test/test.toml: -------------------------------------------------------------------------------- 1 | # This is a TOML document 2 | 3 | title = "TOML Example" 4 | 5 | [owner] 6 | name = "Tom Preston-Werner" 7 | dob = 1979-05-27T07:32:00-08:00 8 | 9 | [database] 10 | enabled = true 11 | ports = [ 8000, 8001, 8002 ] 12 | data = [ ["delta", "phi"], [3.14] ] 13 | temp_targets = { cpu = 79.5, case = 72.0 } 14 | 15 | [servers] 16 | 17 | [servers.alpha] 18 | ip = "10.0.0.1" 19 | role = "frontend" 20 | 21 | [servers.beta] 22 | ip = "10.0.0.2" 23 | role = "backend" 24 | -------------------------------------------------------------------------------- /test/test.u: -------------------------------------------------------------------------------- 1 | -- this is an hello world! 2 | 3 | helloWorld : '{IO, Exception} () 4 | helloWorld = do 5 | use Text ++ 6 | name = !readLine 7 | printLine ("Hello " ++ name) 8 | -------------------------------------------------------------------------------- /test/test.utf8: -------------------------------------------------------------------------------- 1 | Original by Markus Kuhn, adapted for HTML by Martin Dürst. 2 | 3 | UTF-8 encoded sample plain-text file 4 | ‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ 5 | 6 | Markus Kuhn [ˈmaʳkʊs kuːn] — 1999-08-20 7 | 8 | 9 | The ASCII compatible UTF-8 encoding of ISO 10646 and Unicode 10 | plain-text files is defined in RFC 2279 and in ISO 10646-1 Annex R. 11 | 12 | 13 | Using Unicode/UTF-8, you can write in emails and source code things such as 14 | 15 | Mathematics and Sciences: 16 | 17 | ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), 18 | 19 | ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (A ⇔ B), 20 | 21 | 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm 22 | 23 | Linguistics and dictionaries: 24 | 25 | ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn 26 | Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] 27 | 28 | APL: 29 | 30 | ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ 31 | 32 | Nicer typography in plain text files: 33 | 34 | ╔══════════════════════════════════════════╗ 35 | ║ ║ 36 | ║ • ‘single’ and “double” quotes ║ 37 | ║ ║ 38 | ║ • Curly apostrophes: “We’ve been here” ║ 39 | ║ ║ 40 | ║ • Latin-1 apostrophe and accents: '´` ║ 41 | ║ ║ 42 | ║ • ‚deutsche‘ „Anführungszeichen“ ║ 43 | ║ ║ 44 | ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ 45 | ║ ║ 46 | ║ • ASCII safety test: 1lI|, 0OD, 8B ║ 47 | ║ ╭─────────╮ ║ 48 | ║ • the euro symbol: │ 14.95 € │ ║ 49 | ║ ╰─────────╯ ║ 50 | ╚══════════════════════════════════════════╝ 51 | 52 | Greek (in Polytonic): 53 | 54 | The Greek anthem: 55 | 56 | Σὲ γνωρίζω ἀπὸ τὴν κόψη 57 | τοῦ σπαθιοῦ τὴν τρομερή, 58 | σὲ γνωρίζω ἀπὸ τὴν ὄψη 59 | ποὺ μὲ βία μετράει τὴ γῆ. 60 | 61 | ᾿Απ᾿ τὰ κόκκαλα βγαλμένη 62 | τῶν ῾Ελλήνων τὰ ἱερά 63 | καὶ σὰν πρῶτα ἀνδρειωμένη 64 | χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! 65 | 66 | From a speech of Demosthenes in the 4th century BC: 67 | 68 | Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, 69 | ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς 70 | λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ 71 | τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ 72 | εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ 73 | πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν 74 | οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, 75 | xxx οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν 76 | ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον 77 | τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι 78 | γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν 79 | προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους 80 | σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ 81 | τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ 82 | τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς 83 | τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. 84 | 85 | Δημοσθένους, Γ´ ᾿Ολυνθιακὸς 86 | 87 | Georgian: 88 | 89 | From a Unicode conference invitation: 90 | 91 | გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო 92 | კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, 93 | ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს 94 | ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, 95 | ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება 96 | ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, 97 | ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. 98 | 99 | Russian: 100 | 101 | From a Unicode conference invitation: 102 | 103 | Зарегистрируйтесь сейчас на Десятую Международную Конференцию по 104 | Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. 105 | Конференция соберет широкий круг экспертов по вопросам глобального 106 | Интернета и Unicode, локализации и интернационализации, воплощению и 107 | применению Unicode в различных операционных системах и программных 108 | приложениях, шрифтах, верстке и многоязычных компьютерных системах. 109 | 110 | Thai (UCS Level 2): 111 | 112 | Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese 113 | classic 'San Gua'): 114 | 115 | [----------------------------|------------------------] 116 | ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ 117 | สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา 118 | ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา 119 | โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ 120 | เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ 121 | ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ 122 | พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ 123 | ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ 124 | 125 | (The above is a two-column text. If combining characters are handled 126 | correctly, the lines of the second column should be aligned with the 127 | | character above.) 128 | 129 | Ethiopian: 130 | 131 | Proverbs in the Amharic language: 132 | 133 | ሰማይ አይታረስ ንጉሥ አይከሰስ። 134 | ብላ ካለኝ እንደአባቴ በቆመጠኝ። 135 | ጌጥ ያለቤቱ ቁምጥና ነው። 136 | ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። 137 | የአፍ ወለምታ በቅቤ አይታሽም። 138 | አይጥ በበላ ዳዋ ተመታ። 139 | ሲተረጉሙ ይደረግሙ። 140 | ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። 141 | ድር ቢያብር አንበሳ ያስር። 142 | ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። 143 | እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። 144 | የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። 145 | ሥራ ከመፍታት ልጄን ላፋታት። 146 | ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። 147 | የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። 148 | ተንጋሎ ቢተፉ ተመልሶ ባፉ። 149 | ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። 150 | እግርህን በፍራሽህ ልክ ዘርጋ። 151 | 152 | Runes: 153 | 154 | ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ 155 | 156 | (Old English, which transcribed into Latin reads 'He cwaeth that he 157 | bude thaem lande northweardum with tha Westsae.' and means 'He said 158 | that he lived in the northern land near the Western Sea.') 159 | 160 | Braille: 161 | 162 | ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ 163 | 164 | ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ 165 | ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ 166 | ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ 167 | ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ 168 | ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ 169 | ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ 170 | 171 | ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ 172 | 173 | ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ 174 | ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ 175 | ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ 176 | ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ 177 | ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ 178 | ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ 179 | ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ 180 | ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ 181 | ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ 182 | 183 | (The first couple of paragraphs of "A Christmas Carol" by Dickens) 184 | 185 | Compact font selection example text: 186 | 187 | ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 188 | abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ 189 | –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд 190 | ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა 191 | 192 | Greetings in various languages: 193 | 194 | Hello world, Καλημέρα κόσμε, コンニチハ 195 | 196 | Box drawing alignment tests: █ 197 | ▉ 198 | ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ 199 | ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ 200 | ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ 201 | ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ 202 | ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ 203 | ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ 204 | ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ 205 | -------------------------------------------------------------------------------- /test/test.zig: -------------------------------------------------------------------------------- 1 | // this is a comment 2 | const std = @import("std"); 3 | 4 | pub fn main() !void { 5 | const stdout = std.io.getStdOut().writer(); 6 | 7 | const hello_world_in_c = 8 | \\#include 9 | \\ 10 | \\int main(int argc, char **argv) { 11 | \\ printf("hello world\n"); 12 | \\ return 0; 13 | \\} 14 | ; 15 | 16 | try stdout.print("Hello, {s}!\n", .{"world"}); 17 | } 18 | --------------------------------------------------------------------------------