├── .gitignore ├── .ocamlformat ├── CHANGES.md ├── CONTRIBUTING.md ├── LICENSE.md ├── Makefile ├── README.md ├── bin-tmp ├── dune ├── sexp_group-help-for-review.org ├── sexp_group.ml └── sexp_group.mli ├── bin ├── CHANGELOG.txt ├── TODO.txt ├── dune ├── main.ml └── main.mli ├── doc ├── change_by_example.md ├── change_semantics.md ├── dune ├── query_by_example.md └── query_semantics.md ├── dune ├── dune-project ├── lazy_list └── src │ ├── dune │ ├── lazy_list.ml │ └── lazy_list.mli ├── sexp.opam ├── sexp_app ├── pattern │ ├── compiled_query.ml │ ├── compiled_query.mli │ ├── dune │ ├── engine.ml │ ├── engine.mli │ ├── help.ml │ ├── help.mli │ ├── internal_parser.mly │ ├── lexer.mli │ ├── lexer.mll │ ├── output_method.ml │ ├── output_method.mli │ ├── parser.ml │ ├── parser.mli │ ├── query.ml │ ├── query.mli │ └── sexp_app_pattern.ml ├── pattern_test │ ├── dune │ ├── sexp_app_pattern_test.ml │ ├── test.ml │ └── test.mli ├── src │ ├── csv_file.ml │ ├── csv_file.mli │ ├── csv_lexeme.mli │ ├── csv_lexeme.mll │ ├── csv_record.ml │ ├── csv_record.mli │ ├── dune │ ├── manifest.ml │ ├── manifest.mli │ ├── parse_everything.ml │ ├── parse_everything.mli │ ├── parts.ml │ ├── parts.mli │ ├── semantics.ml │ ├── semantics.mli │ ├── sexp_app.ml │ ├── sexp_ext.ml │ ├── sexp_ext.mli │ ├── sexps.ml │ ├── sexps.mli │ ├── string_pad.ml │ ├── string_pad.mli │ ├── syntax.ml │ ├── syntax.mli │ ├── test.ml │ ├── test.mli │ ├── to_csv.ml │ ├── to_csv.mli │ ├── to_json.ml │ ├── to_json.mli │ ├── utils.ml │ └── utils.mli └── test │ ├── dune │ ├── import.ml │ ├── sexp_app_test.ml │ ├── test_parts.ml │ └── test_parts.mli └── src ├── dune ├── grammar.ml ├── grammar.mli ├── key_extractor.ml ├── key_extractor.mli ├── located.ml ├── located.mli ├── main_atom.ml ├── main_atom.mli ├── main_diff.ml ├── main_diff.mli ├── main_fzf.ml ├── main_fzf.mli ├── main_get.ml ├── main_get.mli ├── main_group.ml ├── main_group.mli ├── main_json.ml ├── main_json.mli ├── main_of_xml.ml ├── main_of_xml.mli ├── main_parts.ml ├── main_parts.mli ├── main_pattern.ml ├── main_pattern.mli ├── main_pp.ml ├── main_pp.mli ├── main_print.ml ├── main_print.mli ├── main_query.ml ├── main_query.mli ├── main_resolve_macros.ml ├── main_resolve_macros.mli ├── main_restructure.ml ├── main_restructure.mli ├── main_select.ml ├── main_select.mli ├── main_sexpify.ml ├── main_sexpify.mli ├── main_sort.ml ├── main_sort.mli ├── main_to_csv.ml ├── main_to_csv.mli ├── main_validate.ml ├── main_validate.mli ├── pat_query.ml ├── pat_query.mli ├── query.ml ├── query.mli ├── quine.ml ├── quine.mli ├── readme.ml ├── readme.mli ├── sexp_cmds.ml ├── sexp_cmds.mli ├── shared_params.ml └── shared_params.mli /.gitignore: -------------------------------------------------------------------------------- 1 | _build 2 | *.install 3 | *.merlin 4 | _opam 5 | 6 | -------------------------------------------------------------------------------- /.ocamlformat: -------------------------------------------------------------------------------- 1 | profile=janestreet 2 | -------------------------------------------------------------------------------- /CHANGES.md: -------------------------------------------------------------------------------- 1 | 2 | ## Release v0.17.0 3 | 4 | - [index] expressions in [sexp query] now support negative indexes. -1 selects the last 5 | element in a list, -2 selects the second-to-last element, and so on. 6 | 7 | - added [sexp pretty] as an alias for [sexp pp] 8 | 9 | - [sexp select] now allows selecting on strings with spaces 10 | 11 | - The [pp] subcommand can now take a file argument rather than always having to read from 12 | stdin. 13 | 14 | - Similarly, [sexp validate] can now take one or more filenames as arguments 15 | 16 | - Added a subcommand [sort] for sorting a sequence of sexps. 17 | 18 | - [sexp to-json] time complexity when merging arrays and objects went from O(n^2) to 19 | O(n log n). 20 | 21 | - Fixed a [sexp change] bug that was disabling the variable syntax escaping in [rewrite] 22 | expressions. 23 | 24 | - Added code for a [sexp-group] command for grouping a sequence of sexps by some 25 | subexpression. This will eventually become a subcommand of the [sexp] executable. For 26 | now it lives in tmp-bin/ 27 | 28 | - added a -drop flag to [sexp select] and [sexp multi-select] that will output the 29 | original sexp with the matching fields removed, rather than printing out the matches 30 | themselves. 31 | 32 | ## Release v0.16.0 33 | 34 | - added a subcommand [sexp validate] that checks whether an input parses as a sexp 35 | - exposed a new value [Sexp_sort.command]. We are testing this now and it will eventually 36 | become the [sexp sort] subcommand. 37 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | This repository contains open source software that is developed and 2 | maintained by [Jane Street][js]. 3 | 4 | Contributions to this project are welcome and should be submitted via 5 | GitHub pull requests. 6 | 7 | Signing contributions 8 | --------------------- 9 | 10 | We require that you sign your contributions. Your signature certifies 11 | that you wrote the patch or otherwise have the right to pass it on as 12 | an open-source patch. The rules are pretty simple: if you can certify 13 | the below (from [developercertificate.org][dco]): 14 | 15 | ``` 16 | Developer Certificate of Origin 17 | Version 1.1 18 | 19 | Copyright (C) 2004, 2006 The Linux Foundation and its contributors. 20 | 1 Letterman Drive 21 | Suite D4700 22 | San Francisco, CA, 94129 23 | 24 | Everyone is permitted to copy and distribute verbatim copies of this 25 | license document, but changing it is not allowed. 26 | 27 | 28 | Developer's Certificate of Origin 1.1 29 | 30 | By making a contribution to this project, I certify that: 31 | 32 | (a) The contribution was created in whole or in part by me and I 33 | have the right to submit it under the open source license 34 | indicated in the file; or 35 | 36 | (b) The contribution is based upon previous work that, to the best 37 | of my knowledge, is covered under an appropriate open source 38 | license and I have the right under that license to submit that 39 | work with modifications, whether created in whole or in part 40 | by me, under the same open source license (unless I am 41 | permitted to submit under a different license), as indicated 42 | in the file; or 43 | 44 | (c) The contribution was provided directly to me by some other 45 | person who certified (a), (b) or (c) and I have not modified 46 | it. 47 | 48 | (d) I understand and agree that this project and the contribution 49 | are public and that a record of the contribution (including all 50 | personal information I submit with it, including my sign-off) is 51 | maintained indefinitely and may be redistributed consistent with 52 | this project or the open source license(s) involved. 53 | ``` 54 | 55 | Then you just add a line to every git commit message: 56 | 57 | ``` 58 | Signed-off-by: Joe Smith 59 | ``` 60 | 61 | Use your real name (sorry, no pseudonyms or anonymous contributions.) 62 | 63 | If you set your `user.name` and `user.email` git configs, you can sign 64 | your commit automatically with git commit -s. 65 | 66 | [dco]: http://developercertificate.org/ 67 | [js]: https://opensource.janestreet.com/ 68 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | The MIT License 2 | 3 | Copyright (c) 2005--2025 Jane Street Group, LLC 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | INSTALL_ARGS := $(if $(PREFIX),--prefix $(PREFIX),) 2 | 3 | default: 4 | dune build 5 | 6 | install: 7 | dune install $(INSTALL_ARGS) 8 | 9 | uninstall: 10 | dune uninstall $(INSTALL_ARGS) 11 | 12 | reinstall: uninstall install 13 | 14 | clean: 15 | dune clean 16 | 17 | .PHONY: default install uninstall reinstall clean 18 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Sexp - The s-expression toolkit 2 | =============================== 3 | 4 | A suite of tools for working with s-expressions from the command line. 5 | It contains subcommands for pretty printing, querying, and modifying 6 | sexps, as well as some conversions to and from other formats. 7 | 8 | See also: 9 | 10 | - [Sexp query by example](doc/query_by_example.md) 11 | - [Sexp query formal semantics](doc/query_semantics.md) 12 | - [Sexp change formal semantics](doc/change_semantics.md) 13 | - [Sexp change by example](doc/change_by_example.md) 14 | 15 | ```sh 16 | $ sexp -help 17 | the s-expression toolkit 18 | 19 | sexp SUBCOMMAND 20 | 21 | === subcommands === 22 | 23 | assemble Assemble a lists of parts into sexps. Sexp part lists are 24 | separated by newlines. 25 | change transform an s-expression 26 | flatten Flatten a list of sexp into its parts. Each part on its own 27 | line. 28 | get extract parts of an s-expression 29 | of-xml convert XML from stdin into an s-expression 30 | pp Pretty print S expressions in a human-friendly way. 31 | print pretty-print an s-expression 32 | query query an s-expression 33 | resolve-macros resolve macros in a sexp 34 | restructure recover structure of an s-expression 35 | select Use CSS-style selectors to traverse sexp trees 36 | to-csv converts a list of record s-expressions into CSV format 37 | validate validate a sequence of s-expressions on stdin 38 | version print version information 39 | help explain a given subcommand (perhaps recursively) 40 | ``` 41 | 42 | ## Examples 43 | 44 | Colorize the output of `ocamlc -dlambda`: 45 | 46 | 47 | 67 |
68 | $ ocamlc -dlambda -c fact.ml 2>&1 | sexp pp -color
69 | (setglobal Fact! (
70 |   letrec
71 |   (fact/1008 (
72 |     function n/1009 (
73 |       if (== n/1009 0) 1 (* n/1009 (apply fact/1008 (- n/1009 1))))))
74 |   (makeblock 0 fact/1008)))
75 | 
76 | 77 | Extract the list of command run by jenga: 78 | 79 | ``` 80 | $ cat query.sexp 81 | (pipe 82 | (variant Job_started 1) 83 | (index 1) 84 | (wrap (cat (field prog) (pipe (field args) each))) 85 | ) 86 | $ sexp query -file query.sexp < .jenga/.jenga.debug 87 | (ocamlc -c foo.mli) 88 | (ocamlc -c foo.ml) 89 | ... 90 | ``` 91 | -------------------------------------------------------------------------------- /bin-tmp/dune: -------------------------------------------------------------------------------- 1 | (executables 2 | (modes byte exe) 3 | (names sexp_group) 4 | (public_names sexp-group) 5 | (libraries sexp_cmds core_unix.command_unix) 6 | (flags :standard -g) 7 | (preprocess 8 | (pps ppx_jane))) 9 | -------------------------------------------------------------------------------- /bin-tmp/sexp_group-help-for-review.org: -------------------------------------------------------------------------------- 1 | * sexp_group.exe 2 | 3 | : group input sexps by an arbitrary key 4 | : 5 | : sexp_group.exe 6 | : 7 | : Groups a list of s-expressions. The group key can be specified in a variety of formats: 8 | : a field name, a query as used for sexp query, a pattern as used for sexp pat-query, 9 | : a path as used for sexp get, or a CSS-style program as used for sexp select. 10 | : 11 | : For each unique key in the input, one record-style sexp will be outputted with three 12 | : keys: "key", containing the common key for the grouped values; "values", containing 13 | : the list of grouped values; and "count", the number of grouped values. 14 | : 15 | : The output groups will be sorted in order of appearance of each key, and the list of 16 | : values for each key will be in the same order as in the input. 17 | : 18 | : You can group by multiple keys, in which case the "key" in the output will be a list 19 | : containing each individual key. You can specify multiple keys by passing the same 20 | : selector format multiple times, e.g., "-field foo -field bar", or, if you want to use 21 | : multiple kinds of selectors, by using the special "-key" flag (see below). 22 | : 23 | : Missing keys can be handled in various ways by using the "-if-no-key" flag. By default 24 | : input sexps with missing keys will cause an error. By passing "-if-no-key drop", inputs 25 | : with missing keys will be dropped from the input. Missing keys can also just be treated 26 | : as units ( "()" ) by passing "-if-no-key unit". 27 | : 28 | : When using "-if-no-key unit", there is no way to disambiguate between a missing key 29 | : and an actual unit. If an actual unit is seen in the input, an error will be thrown. 30 | : To resolve this, there are two options: 31 | : - "-if-no-key wrap" will wrap all keys in an outer sexp, so that actual unit keys 32 | : become "(())", while missing keys are "()". 33 | : - "-if-no-key force-unit" will simply treat all missing keys as units, even if some 34 | : keys are actual units. 35 | : 36 | : To specify multiple keys that use different access formats, or if you want to specify 37 | : different handling of missing values for each key, use the "-key" flag and pass a 38 | : string of the form ":", where: 39 | : - is one of (field|index|query|pat-query|get|select) 40 | : - are optional strings to control specific sort behavior for that column. 41 | : Each modifier is prefixed with a '/'. Options are: 42 | : mfail -> raise an error if the key is missing (default) 43 | : mdrop -> drop sexps with missing keys from the output 44 | : munit -> treat missing keys as unit ( "()" ) 45 | : mwrap -> wrap all keys in an outer sexp; missing keys are units ( "()" ) 46 | : mforceunit -> treat missing keys as unit; even if there are actual unit keys 47 | : - is the arg you would pass to the equivalent "-" flag. 48 | : 49 | : === flags === 50 | : 51 | : [-field FIELD] ... . Group by the value associated with this field 52 | : [-get PATH] ... . Group by the values referenced by this path, as 53 | : used in sexp get 54 | : [-if-no-key _] . (can be: drop, fail, force-unit, unit, wrap) 55 | : [-index INDEX] ... . Group by the value at this index in the top-level 56 | : of a sexp 57 | : [-key KEY] ... . Group by the key 58 | : [-pat-query PATTERN] ... . Group by the values reference by this query, as 59 | : used in sexp pat-query 60 | : [-query QUERY] ... . Group by the values referenced by this query, as 61 | : used in sexp query 62 | : [-select PATH] ... . Group by the values referenced by this path, as 63 | : used in sexp select 64 | : [-build-info] . print info about this build and exit 65 | : [-version] . print the version of this build and exit 66 | : [-help], -? . print this help text and exit 67 | -------------------------------------------------------------------------------- /bin-tmp/sexp_group.ml: -------------------------------------------------------------------------------- 1 | let () = Command_unix.run Sexp_cmds.Main_group.command 2 | -------------------------------------------------------------------------------- /bin-tmp/sexp_group.mli: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/janestreet/sexp/05d436d6c541fd515919d9908768d98b69af04a1/bin-tmp/sexp_group.mli -------------------------------------------------------------------------------- /bin/CHANGELOG.txt: -------------------------------------------------------------------------------- 1 | ------------------------------------------------------------------------------- 2 | released ____________ from trunk on 2009-12-10 3 | 4 | * new change language and subcommand for transforming s-expressions using 5 | rewrite rules 6 | * bug fix: overhauled implementation is obviously tail-recursive 7 | (no more segfaults!) 8 | * bug fix: equals query now works with non-atomic argument 9 | * new flags for sexp query 10 | * -allow-empty-output 11 | * -quine flag 12 | * new output modes -count and -quiet 13 | * sexp query handles multiple filenames on the command line the way grep does 14 | * query results optionally labelled with the file they came from 15 | * added (test E1 E2 ...) as syntactic sugar for (test (pipe E1 E2 ...)) 16 | * pipe is now assumed as a default top-level connective 17 | * split out a sexpquery library for OCaml programs (thanks to Michael O'Connor) 18 | * various improvements to Command module, in anticipation of wider use 19 | 20 | ------------------------------------------------------------------------------- 21 | released 051af2d59b95 from trunk on 2009-03-20 22 | 23 | * added a subcommand facility with auto-help 24 | * added a revision subcommand 25 | * added a to-csv subcommand 26 | * fixed a non-tail-recursion in sexp_ext.ml 27 | * moved to using internally lazy S-expressions 28 | * moved to long flag names 29 | * new flags for sexpquery 30 | -quiet Silent mode (use when running only for exit status) 31 | -group Tread incoming sequence of sexps as a single list sexp 32 | 33 | ------------------------------------------------------------------------------- 34 | released ??? from trunk on 2009-02-19 (again) 35 | 36 | * added "branch" and "lowercase" commands 37 | * reimplemented "or" in terms of "branch" for expected output behavior 38 | 39 | ------------------------------------------------------------------------------- 40 | released 3b224e82263d from trunk on 2009-02-19 41 | 42 | * redefined "and" and "if" with more straightforward semantics 43 | * fixed a bug in the implementation of "and" 44 | * cut empty EXAMPLES section from README 45 | 46 | ------------------------------------------------------------------------------- 47 | released 77a502743af1 from trunk on 2009-02-19 48 | 49 | * removed complicating distinction between printed and selected 50 | sub-expressions so that the 'print' command is no longer necessary. 51 | * lazy input of multiple s-expressions from standard input 52 | * template-based printing via scheme-inspired quasi-quotation 53 | * command line options 54 | -d dump README 55 | -g display grammar for sexpquery programs 56 | -f FILE load programs from a file 57 | -s FILE run standalone scripts (using #! convention) 58 | -m machine output mode: one sexp per line 59 | * exit with status 1 on failed search, just like grep 60 | * various bugs fixed 61 | - wrap now works 62 | - multiple sexps on stdin are no longer handled in reverse order 63 | 64 | ------------------------------------------------------------------------------- 65 | 66 | * initial release on 2009-02-03 67 | 68 | -------------------------------------------------------------------------------- /bin/TODO.txt: -------------------------------------------------------------------------------- 1 | 2 | add more tests for to-csv 3 | 4 | new query commands 5 | (split ,) "one,two,three" => { ("one" "two" "three") } 6 | (merge ,) ("one" "two" "three") => "one,two,three" 7 | (match PAT) (match ".*\.PK") matches 8 | 9 | variations on existing query command 10 | (smash NUM) never remove any more than N parens 11 | (equal A B C) (or (equal A) (equal B) (equal C)) 12 | (index -2) (a b c d) => c 13 | 14 | add commands 15 | sexp set bundled-in sexpset.exe as requested by ralph 16 | sexp print basically just 'sexp query this' (same back-end) 17 | sexp of-csv reverse of to-csv 18 | 19 | add additional options for (of/to)-csv commands 20 | 21 | support for "2D array" sexps ((a b c) (1 2 3)) 22 | optional header record in the CSV 23 | 24 | rethink command names 25 | s/test/grep/ (?) 26 | ... I think I like this better the longer I think about it 27 | 28 | work out the algebra of commands (what laws hold) 29 | 30 | some recursion combinator? 31 | one that doesn't introduce non-termination? 32 | -------------------------------------------------------------------------------- /bin/dune: -------------------------------------------------------------------------------- 1 | (executables 2 | (modes byte exe) 3 | (names main) 4 | (public_names sexp) 5 | (libraries sexp_cmds async core core_unix.command_unix) 6 | (flags :standard -g) 7 | (preprocess 8 | (pps ppx_jane))) 9 | -------------------------------------------------------------------------------- /bin/main.ml: -------------------------------------------------------------------------------- 1 | open Async 2 | 3 | let () = 4 | Writer.behave_nicely_in_pipeline (); 5 | Command_unix.run Sexp_cmds.command 6 | ;; 7 | -------------------------------------------------------------------------------- /bin/main.mli: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/janestreet/sexp/05d436d6c541fd515919d9908768d98b69af04a1/bin/main.mli -------------------------------------------------------------------------------- /doc/change_by_example.md: -------------------------------------------------------------------------------- 1 | # Sexp change by example 2 | 3 | Sexp change is a command-line tool for manipulating S-expressions, 4 | useful if you want to do some data munging without having to write a 5 | full-fledged program to parse and process your sexps. 6 | 7 | This doc collects examples of sexp change in action. For a more 8 | complete overview of the language, see 9 | [this doc](./change_semantics.md), 10 | which includes many small examples showing how to use each keyword. 11 | 12 | Applying a rewrite rule throughout a file 13 | ----------------------------------------- 14 | 15 | Perhaps the most common way to use `sexp change` is just to barrel 16 | through a file attempting to apply a rewrite rule. You can accomplish 17 | this with the following: 18 | 19 | ``` 20 | (topdown (try (rewrite LHS RHS))) 21 | ``` 22 | 23 | The `topdown` keyword ensures you cover the entire file, while `try` 24 | ensures that if you can't apply the rewrite rule to a given expression 25 | LHS, you leave LHS as it is. 26 | 27 | For convenience, this functionality is exposed with the `sexp rewrite` command. 28 | 29 | Using sexp query and sexp change together 30 | ----------------------------------------- 31 | 32 | Here are a pair of examples making use of sexp query's `index`, 33 | `pipe`, `quote` and `unquote` commands, and sexp change's `seq`, 34 | `rewrite`, and `concat` commands. 35 | 36 | Suppose your input is something like: 37 | 38 | ``` 39 | (AMZN ( ... )) 40 | (MSFT ( ... )) 41 | ``` 42 | 43 | and you want to change it to: 44 | 45 | ``` 46 | ("AMZN US" ( ... )) 47 | ("MSFT US" ( ... )) 48 | ``` 49 | 50 | Here's how you could do it using the `change` subcommand of `sexp query`: 51 | 52 | ```sh 53 | sexp query ' 54 | (quote ( 55 | (unquote (pipe (index 0) (change (seq (rewrite $X ($X " US")) concat)))) 56 | (unquote (index 1)))) 57 | ' < bool 11 | val length : 'a t -> int 12 | val decons : 'a t -> ('a * 'a t) option 13 | val cons : 'a -> 'a t -> 'a t 14 | val snoc : 'a t -> 'a -> 'a t 15 | val append : 'a t -> 'a t -> 'a t 16 | val map : 'a t -> f:('a -> 'b) -> 'b t 17 | val concat : 'a t t -> 'a t 18 | val nth : 'a t -> int -> 'a option 19 | val concat_list : 'a list t -> 'a t 20 | val find : f:('a -> bool) -> 'a t -> 'a option 21 | val filter : f:('a -> bool) -> 'a t -> 'a t 22 | val filter_opt : 'a option t -> 'a t 23 | val filter_map : f:('a -> 'b option) -> 'a t -> 'b t 24 | val fold_left : f:('a -> 'b -> 'a) -> init:'a -> 'b t -> 'a 25 | val fold_right : f:('a -> 'b -> 'b) -> 'a t -> init:'b -> 'b 26 | 27 | (* [foldr ~f t ~init] is a lazy version of [fold_right] that doesn't 28 | necessarily force a traversal of the entire list, as is more 29 | natural for a lazy list. 30 | *) 31 | 32 | val foldr : 'a t -> f:('a -> 'b Lazy.t -> 'b) -> init:'b -> 'b Lazy.t 33 | val iter : 'a t -> f:('a -> unit) -> unit 34 | val of_iterator : curr:('a -> 'b option) -> next:('a -> 'a) -> init:'a -> 'b t 35 | val build : f:('s -> ('a * 's) option) -> seed:'s -> 'a t 36 | val unfold : f:('a -> 'a option) -> init:'a -> 'a t 37 | val uniter : f:(unit -> 'a option) -> 'a t 38 | val of_list : 'a list -> 'a t 39 | val to_rev_list : 'a t -> 'a list 40 | val to_list : 'a t -> 'a list 41 | val of_array : 'a array -> 'a t 42 | val to_array : 'a t -> 'a array 43 | val cartesian_product : 'a t t -> 'a t t 44 | val merge : cmp:('a -> 'a -> int) -> 'a t -> 'a t -> 'a t 45 | 46 | val unify 47 | : cmp:('a -> 'b -> int) 48 | -> 'a t 49 | -> 'b t 50 | -> [ `Left of 'a | `Right of 'b | `Both of 'a * 'b ] t 51 | 52 | val sort : cmp:('a -> 'a -> int) -> 'a t -> 'a t 53 | val lazy_sort : cmp:('a -> 'a -> int) -> 'a t -> 'a t 54 | 55 | (* constructing lazy lists from values of container types *) 56 | module Of_container : sig 57 | module type T = sig 58 | type 'a t 59 | 60 | val lazy_fold : 'a t -> f:('a -> 'b Lazy.t -> 'b) -> last:'b -> 'b 61 | end 62 | 63 | (* (applications of) this module are meant to be included into *) 64 | module Make (X : T) : sig 65 | val lazy_list_of_t : 'a X.t -> 'a t 66 | end 67 | end 68 | 69 | (* Iterators are useful when you're trying to avoid closing over the head of 70 | a lazy list to avoid a space leak. Just create one of these outside said 71 | closure and close over the iterator instead. 72 | *) 73 | module Iterator : sig 74 | type 'a lazy_list = 'a t 75 | type 'a t 76 | 77 | val create : 'a lazy_list -> 'a t 78 | 79 | (* Produces the next element in the list and updates the iterator *) 80 | 81 | val next : 'a t -> 'a option 82 | val iter : 'a t -> f:('a -> unit) -> unit 83 | end 84 | -------------------------------------------------------------------------------- /sexp.opam: -------------------------------------------------------------------------------- 1 | opam-version: "2.0" 2 | maintainer: "Jane Street developers" 3 | authors: ["Jane Street Group, LLC"] 4 | homepage: "https://github.com/janestreet/sexp" 5 | bug-reports: "https://github.com/janestreet/sexp/issues" 6 | dev-repo: "git+https://github.com/janestreet/sexp.git" 7 | doc: "https://ocaml.janestreet.com/ocaml-core/latest/doc/sexp/index.html" 8 | license: "MIT" 9 | build: [ 10 | ["dune" "build" "-p" name "-j" jobs] 11 | ] 12 | depends: [ 13 | "ocaml" {>= "5.1.0"} 14 | "async" 15 | "base" 16 | "core" 17 | "core_unix" 18 | "csvfields" 19 | "jsonaf" 20 | "parsexp" 21 | "ppx_jane" 22 | "re2" 23 | "sexp_diff" 24 | "sexp_macro" 25 | "sexp_pretty" 26 | "sexp_select" 27 | "sexplib" 28 | "shell" 29 | "angstrom" {>= "0.15.0"} 30 | "dune" {>= "3.17.0"} 31 | ] 32 | available: arch != "arm32" & arch != "x86_32" 33 | synopsis: "S-expression swiss knife" 34 | description: " 35 | A suite of tools for working with s-expressions from the command line. 36 | It contains subcommands for pretty printing, querying, and modifying 37 | sexps, as well as some conversions to and from other formats. 38 | " 39 | -------------------------------------------------------------------------------- /sexp_app/pattern/compiled_query.ml: -------------------------------------------------------------------------------- 1 | open Core 2 | 3 | type t = 4 | | Capture of t * int 5 | | Any 6 | | Atom of string 7 | | Atom_regex of Re2.t 8 | | Sequence of t list 9 | | Star of t 10 | | Star_greedy of t 11 | | Plus of t 12 | | Plus_greedy of t 13 | | Maybe of t 14 | | Maybe_greedy of t 15 | | List of t 16 | | Set of (t * Query.Set_kind.t) list 17 | | Subsearch of t 18 | | And of t list 19 | | Or_shortcircuiting of t list 20 | | Or_all of t list 21 | | First_match_only of t 22 | [@@deriving sexp_of] 23 | 24 | let of_query query ~idx_of_unlabeled_capture ~idx_of_number_capture ~idx_of_named_capture = 25 | let rec compile_list = List.map ~f:compile 26 | and compile_set_arg = List.map ~f:(fun (query, mode) -> compile query, mode) 27 | and compile query : t = 28 | match (query : Query.t) with 29 | (* capture lookup *) 30 | | Capture_unlabeled sub -> Capture (compile sub, idx_of_unlabeled_capture ()) 31 | | Capture_to_number (i, sub) -> Capture (compile sub, idx_of_number_capture i) 32 | | Capture_to_name (name, sub) -> Capture (compile sub, idx_of_named_capture name) 33 | (* regex compilation *) 34 | | Atom_regex s -> Atom_regex (Re2.create_exn s) 35 | (* traversal *) 36 | | Any -> Any 37 | | Atom s -> Atom s 38 | | Sequence subs -> Sequence (compile_list subs) 39 | | Star sub -> Star (compile sub) 40 | | Star_greedy sub -> Star_greedy (compile sub) 41 | | Plus sub -> Plus (compile sub) 42 | | Plus_greedy sub -> Plus_greedy (compile sub) 43 | | Maybe sub -> Maybe (compile sub) 44 | | Maybe_greedy sub -> Maybe_greedy (compile sub) 45 | | List sub -> List (compile sub) 46 | | Set subs -> Set (compile_set_arg subs) 47 | | Subsearch sub -> Subsearch (compile sub) 48 | | And subs -> And (compile_list subs) 49 | | Or_shortcircuiting subs -> Or_shortcircuiting (compile_list subs) 50 | | Or_all subs -> Or_all (compile_list subs) 51 | | First_match_only sub -> First_match_only (compile sub) 52 | in 53 | compile query 54 | ;; 55 | 56 | let create (type a) (uncompiled_query : Query.t) (output_method : a Output_method.t) = 57 | Query.validate_all_captures_labeled_or_all_unlabeled_exn uncompiled_query; 58 | let number_captures = Queue.create () in 59 | let named_captures = Queue.create () in 60 | let num_unlabeled_captures = ref 0 in 61 | Query.iter uncompiled_query ~f:(function 62 | | Capture_unlabeled _ -> incr num_unlabeled_captures 63 | | Capture_to_number (number, _) -> Queue.enqueue number_captures number 64 | | Capture_to_name (name, _) -> Queue.enqueue named_captures name 65 | | _ -> ()); 66 | let fail_unsupported_capture ~kind = 67 | raise_s 68 | (let query_pattern = uncompiled_query in 69 | [%message 70 | (sprintf 71 | "Query pattern contains %s capture, but they are not allowed when using this \ 72 | output method" 73 | kind) 74 | (query_pattern : Query.t) 75 | (output_method : _ Output_method.t)]) 76 | in 77 | let fail_unlabeled_capture () = fail_unsupported_capture ~kind:"unlabeled" in 78 | let fail_numbered_capture () = fail_unsupported_capture ~kind:"numbered" in 79 | let fail_named_capture () = fail_unsupported_capture ~kind:"named" in 80 | let pick_indices_for_named_and_number_captures () = 81 | let last_idx = ref (-1) in 82 | let get_idx () = 83 | incr last_idx; 84 | !last_idx 85 | in 86 | let idx_of_label = String.Table.create () in 87 | let compiled_query = 88 | of_query 89 | uncompiled_query 90 | ~idx_of_unlabeled_capture:(fun () -> fail_unlabeled_capture ()) 91 | ~idx_of_number_capture:(fun n -> 92 | Hashtbl.find_or_add idx_of_label (Int.to_string n) ~default:get_idx) 93 | ~idx_of_named_capture:(fun name -> 94 | Hashtbl.find_or_add idx_of_label name ~default:get_idx) 95 | in 96 | let label_of_idx = 97 | Hashtbl.to_alist idx_of_label 98 | |> List.map ~f:(fun (label, idx) -> idx, label) 99 | |> List.sort ~compare:(fun (idx0, _) (idx1, _) -> Int.compare idx0 idx1) 100 | in 101 | List.iteri label_of_idx ~f:(fun i (idx, _) -> assert (i = idx)); 102 | compiled_query, Array.of_list (List.map label_of_idx ~f:snd) 103 | in 104 | let compiled_query, labels_of_captures = 105 | match output_method with 106 | | Formats (_, formats) -> 107 | let used_labels = String.Hash_set.create () in 108 | Queue.iter named_captures ~f:(fun name -> Hash_set.add used_labels name); 109 | Queue.iter number_captures ~f:(fun number -> 110 | Hash_set.add used_labels (Int.to_string number)); 111 | List.iter formats ~f:(fun format -> 112 | List.iter (Output_method.Format.all_captures format) ~f:(fun c -> 113 | if not (Hash_set.mem used_labels c) 114 | then 115 | failwithf 116 | "Output or replacement expression uses capture not present in pattern: %s" 117 | c 118 | ())); 119 | pick_indices_for_named_and_number_captures () 120 | | Record _ -> pick_indices_for_named_and_number_captures () 121 | | Map -> pick_indices_for_named_and_number_captures () 122 | | List _ -> 123 | if Queue.length number_captures > 0 124 | then ( 125 | let used_idxs = Int.Hash_set.create () in 126 | Queue.iter number_captures ~f:(fun number -> 127 | if number < 0 128 | then 129 | raise_s 130 | [%message 131 | "Attempted to use negative number as index for a numbered capture" 132 | (number : int)]; 133 | Hash_set.add used_idxs number); 134 | let max_used_idx = Hash_set.to_list used_idxs |> List.reduce_exn ~f:Int.max in 135 | for i = 0 to max_used_idx do 136 | if not (Hash_set.mem used_idxs i) 137 | then 138 | if i = 0 139 | then 140 | failwithf 141 | "Match pattern uses captures up to %%%d but is missing %%%d (reminder: \ 142 | numbered captures should be zero-indexed)" 143 | max_used_idx 144 | i 145 | () 146 | else 147 | failwithf 148 | "Match pattern uses captures up to %%%d but is missing %%%d" 149 | max_used_idx 150 | i 151 | () 152 | done; 153 | let compiled_query = 154 | of_query 155 | uncompiled_query 156 | ~idx_of_unlabeled_capture:(fun () -> fail_unlabeled_capture ()) 157 | ~idx_of_number_capture:(fun n -> n) 158 | ~idx_of_named_capture:(fun _ -> fail_named_capture ()) 159 | in 160 | compiled_query, Array.init (max_used_idx + 1) ~f:Int.to_string) 161 | else ( 162 | (* Assign incrementing integer index by default for unlabeled captures *) 163 | let num_used_idxs = ref 0 in 164 | let compiled_query = 165 | of_query 166 | uncompiled_query 167 | ~idx_of_unlabeled_capture:(fun () -> 168 | let idx = !num_used_idxs in 169 | incr num_used_idxs; 170 | idx) 171 | ~idx_of_number_capture:(fun _ -> fail_numbered_capture ()) 172 | ~idx_of_named_capture:(fun _ -> fail_named_capture ()) 173 | in 174 | compiled_query, Array.init !num_used_idxs ~f:Int.to_string) 175 | | Single_capture _ -> 176 | if !num_unlabeled_captures <> 1 177 | then 178 | raise_s 179 | (let query_pattern = uncompiled_query in 180 | [%message 181 | "Query pattern has 0 or multiple unlabeled captures, which is not allowed \ 182 | if using this output method" 183 | (query_pattern : Query.t) 184 | (output_method : _ Output_method.t)]); 185 | let compiled_query = 186 | of_query 187 | uncompiled_query 188 | ~idx_of_unlabeled_capture:(fun () -> 0) 189 | ~idx_of_number_capture:(fun _ -> fail_numbered_capture ()) 190 | ~idx_of_named_capture:(fun _ -> fail_named_capture ()) 191 | in 192 | compiled_query, [| "0" |] 193 | in 194 | compiled_query, `Labels_of_captures labels_of_captures 195 | ;; 196 | -------------------------------------------------------------------------------- /sexp_app/pattern/compiled_query.mli: -------------------------------------------------------------------------------- 1 | open! Core 2 | 3 | (** Same as Query.t, except that Re2 regexes have been compiled and captures have been 4 | boiled down into simply an index into an array where that capture should be stored. *) 5 | type t = 6 | | Capture of t * int 7 | | Any 8 | | Atom of string 9 | | Atom_regex of Re2.t 10 | | Sequence of t list 11 | | Star of t 12 | | Star_greedy of t 13 | | Plus of t 14 | | Plus_greedy of t 15 | | Maybe of t 16 | | Maybe_greedy of t 17 | | List of t 18 | | Set of (t * Query.Set_kind.t) list 19 | | Subsearch of t 20 | | And of t list 21 | | Or_shortcircuiting of t list 22 | | Or_all of t list 23 | | First_match_only of t 24 | [@@deriving sexp_of] 25 | 26 | (** Compiles a query. Returns [t, `Labels_of_captures labels]. 27 | 28 | [labels] is the list of all unique keys that expect to receive a capture, a 29 | stringified integer in the case of a numbered capture, the string name in the case of 30 | a named capture, and unique default-created labels in the case of captures that the 31 | user did not themselves label. 32 | 33 | The array indices of [labels] correspond one-to-one with the integers returned in the 34 | [Capture] variant of [t]. *) 35 | val create : Query.t -> _ Output_method.t -> t * [ `Labels_of_captures of string array ] 36 | -------------------------------------------------------------------------------- /sexp_app/pattern/dune: -------------------------------------------------------------------------------- 1 | (library 2 | (name sexp_app_pattern) 3 | (public_name sexp.sexp_app_pattern) 4 | (libraries core re2 sexplib str) 5 | (flags :standard -w -68) 6 | (preprocess 7 | (pps ppx_jane))) 8 | 9 | (ocamllex lexer) 10 | 11 | (ocamlyacc internal_parser) 12 | -------------------------------------------------------------------------------- /sexp_app/pattern/engine.mli: -------------------------------------------------------------------------------- 1 | open Core 2 | 3 | (** {v 4 | Match a query against a sexp. 5 | 6 | Calls [f] once for every match found, passing the captured results in the format 7 | specified by [output_method] as an argument. 8 | 9 | [wrap_mode] controls what happens when a capture consumes multiple sexps during a 10 | single match. It specifies whether to wrap them together as a single sexp list or 11 | return all the results separately. 12 | 13 | For example: 14 | "(a %[.*])" tries to unwrap exactly one set of parens, match an 'a', and then capture 15 | all the sexps that follow that 'a'. Here are the three behaviors. 16 | 17 | `Unwrap_always: 18 | (a) -> [] (* *) 19 | (a b) -> [ Sexp.Atom b ] (* b *) 20 | (a b c) -> [ Sexp.Atom b; Sexp.Atom c ] (* b c *) 21 | 22 | `Wrap_non_singletons: 23 | (a) -> [Sexp.List []] (* () *) 24 | (a b) -> [ Sexp.Atom b ] (* b *) 25 | (a b c) -> [ Sexp.List [ Sexp.Atom b; Sexp.Atom c ] ] (* (b c) *) 26 | 27 | `Wrap_always: 28 | (a) -> [Sexp.List []] (* () *) 29 | (a b) -> [ Sexp.List [ Sexp.Atom b ] ] (* (b) *) 30 | (a b c) -> [ Sexp.List [ Sexp.Atom b; Sexp.Atom c ] ] (* (b c) *) 31 | 32 | This wrapping (or not) occurs before packing the result into whatever format specified 33 | by [output_method]. 34 | v} *) 35 | val iter_matches 36 | : query:Query.t 37 | -> output_method:'output_type Output_method.t 38 | -> Sexp.t 39 | -> f:('output_type -> unit) 40 | -> unit 41 | 42 | (** Match a query against a sexp. Then, for each successful match, perform a replacement 43 | where the subsexp or subsequence of sexps in the match corresponding to the [replace] 44 | label gets replaced according to [with_] and [wrap_mode]. 45 | 46 | The replacement is the result of substituting all of the captures of that match into 47 | the formats specified by [with_]. I.e. it replaces the sexp sequences of each capture 48 | of [replace] with what would be output by [Output_method.Formats (wrap_mode, with_)]. 49 | In most common use cases, [with_] will have length 1, but it is also okay for it to be 50 | length 0 (delete the labeled sexp) or of greater length (replace the labeled sexp(s) 51 | with multiple sexps). 52 | 53 | If replacements would happen at both a sexp and one of its subsexps, the replacement 54 | only occurs for the outer sexp. If two replacements would overlap at the same level 55 | (e.g. in "a b c" one replacement removes "a b" and another removes "b c"), an 56 | arbitrary one of them will happen and the other will not. 57 | 58 | The return type is a list because if the *entire* sexp itself is the target being 59 | replaced, then the result of replacement will have length equal to [with_] upon any 60 | successful match, for example returning an empty list if [with_] specifies to delete 61 | the whole sexp upon a match. *) 62 | val replace 63 | : query:Query.t 64 | -> replace:string 65 | -> with_:Output_method.Format.t list 66 | -> wrap_mode:_ Output_method.Wrap_mode.t 67 | -> Sexp.t 68 | -> Sexp.t list 69 | 70 | (** [replace'] is like [replace] except that instead of a simple replacement, one can 71 | specify a function to compute replacements. 72 | 73 | [f] is passed a map from capture label to the sexp sequence captured by that label. 74 | [f] should return a map from capture label to the new sexp sequence that should be 75 | used as a replacement. 76 | 77 | If no replacement is desired for a given label, [f] can omit that label entirely in 78 | the map it returns - that part of the original sexp will then remain unchanged. 79 | 80 | If there are multiple matches and two different calls to [f] overlap in what subsexps 81 | they indicate should be replaced in the original underlying sexp, only the outermost 82 | and/or the first of those [f]'s replacements happen. 83 | 84 | Note that this means that there is a difference between [f] acting as the identity on 85 | a given label and [f] omitting returning that label entirely. In the first case, the 86 | identity replacement will happen, excluding any overlapping replacements, while the 87 | second will not exclude overlapping replacements. *) 88 | val replace' 89 | : query:Query.t 90 | -> f:(Sexp.t list String.Map.t -> Sexp.t list String.Map.t) 91 | -> Sexp.t 92 | -> Sexp.t list 93 | -------------------------------------------------------------------------------- /sexp_app/pattern/help.ml: -------------------------------------------------------------------------------- 1 | let pat_query_readme = 2 | {| 3 | === Pattern syntax summary === 4 | For a longer explanation with examples, run 'sexp pat-query' with '-examples'. 5 | 6 | For a more precise specification of the semantics and the underlying type that corresponds 7 | to the grammar, see: 8 | https://ocaml.janestreet.com/ocaml-core/latest/doc/sexp_app_pattern/Sexp_app_pattern/Query/index.html 9 | 10 | . : Matches any sexp 11 | abc : Matches atom 'abc' 12 | /regex/ : Matches atom via regular expression 13 | foo* : Matches foo zero or more times 14 | foo+ : Matches foo one or more times 15 | foo? : Matches foo zero or one times 16 | ( ) : Matches a sexp list as a list, enforcing order and exact length 17 | { } : Matches a sexp list as a set, not enforcing order AND allowing extra elements. 18 | [ ] : Limit the scope of things (see examples below) 19 | 20 | .. foo : Searches for foo anywhere within any subexpression of the sexp. 21 | : This is a lot like sexp query's "smash". 22 | 23 | %. : Simple capture 24 | %(a %. c) : Simple captures can also be put on other subpatterns, or nested. 25 | %0 %1 : Numbered captures 26 | %abc %def : Named captures 27 | %abc=(a %. c) : Named or numbered captures can also be used on subpatterns or nested. 28 | 29 | a bc | de f : Matches 'a bc' OR 'de f'. 30 | a [bc | de] f : Matches 'a', then 'bc' OR 'de', then 'f'. 31 | a b & c d : Matches any sexp that matches both 'a b' AND 'c d'. 32 | 33 | [.. a & .. b] : Matches any sexp that has 'a' anywhere within, AND has 'b' anywhere within. 34 | 35 | !foo : Stop matching after the first match that foo finds. 36 | { foo? } : Matches foo to elements of the sexp list but accepts the lists where none match foo 37 | { !foo } : Stops matching foo once it matches for some element of the list 38 | 39 | foo*+ : Matches foo zero or more times, longer matches first (greedy) 40 | foo++ : Matches foo one or more times, longer matches first (greedy) 41 | foo?+ : Matches foo zero or one times, longer matches first (greedy) 42 | |} 43 | ;; 44 | 45 | let pat_query_examples = 46 | {| 47 | EXAMPLES OF HOW TO USE: 48 | 49 | Write a sexp-like expression for the pattern you're trying to match and use "%." to 50 | capture a value you want. 51 | 52 | Example pattern: (a b %.) 53 | Effect: 54 | (a b c) -> c 55 | (a b ()) -> () 56 | (a b (c d)) -> (c d) 57 | (a b c d) -> <> 58 | (a b) -> <> 59 | 60 | To capture multiple values into a list, you can use numbered captures (zero-indexed). 61 | Example pattern: (%0 b %1) 62 | Effect: 63 | (a b c) -> (a c) 64 | 65 | Or you can capture them into records by using named captures. 66 | Example pattern: (%foo b %bar) 67 | Effect: 68 | (a b c) -> ((foo a) (bar c)) 69 | 70 | You can also use -format to specify an explicit output format, like: 71 | Example pattern: (%foo b %bar), with -format (%foo (abc %bar)) 72 | Effect: 73 | (a b c) -> (a (abc c)) 74 | 75 | Regular parens require an exact match in order and length, but you can use curly brackets 76 | if you need to be robust to the order that things appear in something like a record, and 77 | you don't care if there are other fields: 78 | Example pattern: { (name %0) (time %1) } 79 | Effect: 80 | ((name Alice) (time 9:00) (qty 3)) -> (Alice 9:00) 81 | ((time 10:00) (name Bob)) -> (Bob 10:00) 82 | 83 | Most of the time, if you have a big sexp and you only want some deep piece of it, 84 | you can use '..', which will descend deep into any subsexp and return all matches. 85 | Example pattern: .. (sym %.) 86 | Effect: 87 | (my giant sexp ... (( more stuff ... (sym foo)...)) ... (sym bar) ... ) 88 | -> 89 | foo <---- Two separate matches 90 | bar <---/ 91 | 92 | '..' also works within subexpressions. For example, the following would search for a 93 | subrecord with a field "id", and anywhere deeper in it for a field "routes", and capture 94 | all the routes by id: 95 | 96 | Example pattern: .. { (id %0) .. (routes { %1 }) } 97 | Effect: 98 | (some giant sexp... ( ((id FOO) ((... (routes (A B))))) 99 | ((id BAR) ((... (routes (A E))))) )) 100 | -> 101 | (FOO A) 102 | (FOO B) 103 | (BAR A) 104 | (BAR E) 105 | 106 | You can use '.' to match any value, and * to match something zero or more times. 107 | For example, the following would grab the second element out of any subsexp: 108 | Example pattern: .. (. %. .*) 109 | Effect: 110 | (a (b c) (d e f)) 111 | -> 112 | (b c) 113 | c 114 | e 115 | 116 | You can use '?' to express that a value might or might not be there. For example, 117 | if you have two fields that are both optional: 118 | Example pattern: { (start (%0?)) (stop (%1?)) } 119 | Effect: 120 | ((start (3)) (stop (4))) -> (3 4) 121 | ((start ()) (stop (4))) -> (() 4) 122 | ((start (3)) (stop ())) -> (3 ()) 123 | ((start ()) (stop ())) -> (() ()) 124 | 125 | Within curly braces, '?' may be used to optionally match items in the list. This is useful 126 | for [%sexp.option] fields. 127 | 128 | For example this pattern: 129 | 130 | {(a %a)? (b %b)} 131 | 132 | will match records with a [%sexp.option] field "a" and a required field "b", capturing 133 | those fields. It will still succeed if the "a" field doesn't exist (while capturing 134 | nothing for "%a"). 135 | 136 | Example pattern: { (foo %0) (bar %1)? (baz %2)? } 137 | Effect: 138 | ((foo x)) -> (x () ()) 139 | ((foo x)(baz z)) -> (x () z) 140 | ((baz z)(foo x)(bar y)) -> (x y z) 141 | ((baz z)) -> <> 142 | 143 | You can use '&' for AND and '|' for OR. 144 | Use square brackets to delimit the scope of things where needed. 145 | Examples: 146 | 147 | Search anywhere for a triple of a, then b OR c, then d, and capture the b OR c: 148 | .. (a %[b | c] d) 149 | 150 | Search separately anywhere for a field "foo" and anywhere for a field "bar", and return 151 | the cross product of all the things they match: 152 | .. (foo %foo) & .. (bar %bar) 153 | 154 | You can use % on whole sexps as well. For example, to capture all records that have 155 | the field "count" anywhere ("." matches any sexp): 156 | .. %{(count .)} 157 | 158 | Or to capture it in a named or numbered fashion: 159 | .. %foo={(count .)} 160 | 161 | That's most of the basic things you can do! 162 | 163 | |} 164 | ;; 165 | -------------------------------------------------------------------------------- /sexp_app/pattern/help.mli: -------------------------------------------------------------------------------- 1 | val pat_query_readme : string 2 | val pat_query_examples : string 3 | -------------------------------------------------------------------------------- /sexp_app/pattern/internal_parser.mly: -------------------------------------------------------------------------------- 1 | %{ 2 | open Core 3 | module Q = Query 4 | %} 5 | 6 | %token WHITESPACE 7 | %token UNNAMEDCAPTURE 8 | %token NUMBERCAPTURE 9 | %token FIELDCAPTURE 10 | %token ATOM 11 | %token ATOM_REGEX 12 | %token DOT 13 | %token EQUAL 14 | 15 | %token LPAREN 16 | %token RPAREN 17 | %token LCURLY 18 | %token RCURLY 19 | %token LSQUARE 20 | %token RSQUARE 21 | 22 | %token TWODOTS 23 | %token STAR 24 | %token QUESTION 25 | %token BANG 26 | %token SEMI 27 | %token PLUS 28 | %token STARPLUS 29 | %token QUESTIONPLUS 30 | %token PLUSPLUS 31 | 32 | %token AND_ 33 | %token OR_ 34 | 35 | %token EOF 36 | 37 | %nonassoc BANG 38 | %nonassoc CURLY_BANG 39 | 40 | %start parse 41 | %type < Query.t > parse 42 | 43 | %% 44 | 45 | parse: 46 | | Spc Query EOF { $2 } 47 | 48 | Epsilon : { } 49 | 50 | Spc: 51 | | Epsilon {} 52 | | Spc WHITESPACE {} 53 | 54 | Query_base_wrapped: 55 | | LPAREN Spc RPAREN { Q.List (Q.Sequence []) } 56 | | LSQUARE Spc RSQUARE { Q.Sequence [] } 57 | | LCURLY Spc RCURLY { Q.Set [] } 58 | | LPAREN Spc Query RPAREN { Q.List $3 } 59 | | LSQUARE Spc Query RSQUARE { $3 } 60 | | LCURLY Spc Curly_queries RCURLY { Q.Set $3 } 61 | 62 | Query_base_no_bang: 63 | | Query_base_wrapped { $1 } 64 | | ATOM { Atom $1 } 65 | | ATOM_REGEX { Atom_regex $1 } 66 | | DOT { Any } 67 | 68 | Query_base: 69 | | Query_base_no_bang { $1 } 70 | | BANG Query_base_no_bang %prec BANG { Q.First_match_only $2 } 71 | 72 | Query_capture_no_bang: 73 | | Query_base_no_bang { $1 } 74 | | NUMBERCAPTURE { Q.Capture_to_number (Int.of_string $1,Any) } 75 | | FIELDCAPTURE { Q.Capture_to_name ($1,Any) } 76 | | UNNAMEDCAPTURE Query_base_no_bang { Q.Capture_unlabeled $2 } 77 | 78 | Query_capture: 79 | | Query_base { $1 } 80 | | NUMBERCAPTURE { Q.Capture_to_number (Int.of_string $1,Any) } 81 | | FIELDCAPTURE { Q.Capture_to_name ($1,Any) } 82 | | UNNAMEDCAPTURE Query_base { Q.Capture_unlabeled $2 } 83 | 84 | Query_equals_capture_no_bang: 85 | | Query_capture_no_bang { $1 } 86 | | NUMBERCAPTURE EQUAL Query_capture_no_bang { Q.Capture_to_number (Int.of_string $1, $3) } 87 | | FIELDCAPTURE EQUAL Query_capture_no_bang { Q.Capture_to_name ($1, $3) } 88 | 89 | Query_equals_capture: 90 | | Query_capture { $1 } 91 | | NUMBERCAPTURE EQUAL Query_capture { Q.Capture_to_number (Int.of_string $1, $3) } 92 | | FIELDCAPTURE EQUAL Query_capture { Q.Capture_to_name ($1, $3) } 93 | 94 | Query_term_within_curly: 95 | | Query_equals_capture_no_bang { $1 } 96 | | Query_term_within_curly STAR { Q.Star $1 } 97 | | Query_term_within_curly PLUS { Q.Plus $1 } 98 | | Query_term_within_curly STARPLUS { Q.Star_greedy $1 } 99 | | Query_term_within_curly PLUSPLUS { Q.Plus_greedy $1 } 100 | 101 | Query_term_no_space: 102 | | Query_equals_capture { $1 } 103 | | Query_term_no_space STAR { Q.Star $1 } 104 | | Query_term_no_space PLUS { Q.Plus $1 } 105 | | Query_term_no_space QUESTION { Q.Maybe $1 } 106 | | Query_term_no_space STARPLUS { Q.Star_greedy $1 } 107 | | Query_term_no_space PLUSPLUS { Q.Plus_greedy $1 } 108 | | Query_term_no_space QUESTIONPLUS { Q.Maybe_greedy $1 } 109 | 110 | Query_term: 111 | | Query_term_no_space Spc { $1 } 112 | 113 | Query_sequence: 114 | | Query_term { $1 } 115 | | Query_term Query_sequence { 116 | let q1 = $1 in 117 | let q2 = $2 in 118 | match q2 with 119 | | Q.Sequence list -> Q.Sequence (q1 :: list) 120 | | _ -> Q.Sequence [ q1; q2 ] 121 | } 122 | 123 | Query_dots: 124 | | Query_sequence { $1 } 125 | | TWODOTS Spc Query_dots { Q.Subsearch $3 } 126 | | Query_sequence TWODOTS Spc Query_dots { Q.Sequence [ $1; Q.Subsearch $4] } 127 | 128 | Query_and: 129 | | Query_dots { $1 } 130 | | Query_dots AND_ Spc Query_and { 131 | let q1 = $1 in 132 | let q2 = $4 in 133 | match q2 with 134 | | Q.And list -> Q.And (q1 :: list) 135 | | _ -> Q.And [ q1; q2 ] 136 | } 137 | 138 | Query_or: 139 | | Query_and { $1 } 140 | | Query_and OR_ Spc Query_or { 141 | let q1 = $1 in 142 | let q2 = $4 in 143 | match q2 with 144 | | Q.Or_all list -> Q.Or_all (q1 :: list) 145 | | _ -> Q.Or_all [ q1; q2 ] 146 | } 147 | 148 | Query: 149 | | Query_or { $1 } 150 | 151 | Curly_item: 152 | | BANG Query_term_within_curly QUESTION Spc %prec CURLY_BANG { ($2, {Q.Set_kind.optional=true; first_only=true}) } 153 | | BANG Query_term_within_curly Spc %prec CURLY_BANG { ($2, {Q.Set_kind.optional=false; first_only=true}) } 154 | | Query_term_within_curly QUESTION Spc { ($1, {Q.Set_kind.optional=true; first_only=false}) } 155 | | Query_term_within_curly Spc { ($1, {Q.Set_kind.optional=false; first_only=false}) } 156 | 157 | Curly_queries: 158 | | TWODOTS Spc Query_dots { [ Q.Subsearch $3, {optional=false; first_only=false}] } 159 | | Curly_item { [ $1 ] } 160 | | Curly_item Curly_queries { $1 :: $2 } 161 | -------------------------------------------------------------------------------- /sexp_app/pattern/lexer.mli: -------------------------------------------------------------------------------- 1 | open! Core 2 | 3 | val next_token : Lexing.lexbuf -> Internal_parser.token 4 | -------------------------------------------------------------------------------- /sexp_app/pattern/lexer.mll: -------------------------------------------------------------------------------- 1 | { 2 | open Core 3 | open Stdlib.Lexing 4 | 5 | module Token = struct 6 | type t = Internal_parser.token = 7 | | WHITESPACE 8 | 9 | | UNNAMEDCAPTURE 10 | | NUMBERCAPTURE of string 11 | | FIELDCAPTURE of string 12 | | ATOM of string 13 | | ATOM_REGEX of string 14 | | DOT 15 | | EQUAL 16 | 17 | | LPAREN 18 | | RPAREN 19 | | LCURLY 20 | | RCURLY 21 | | LSQUARE 22 | | RSQUARE 23 | 24 | | TWODOTS 25 | | STAR 26 | | QUESTION 27 | | BANG 28 | | SEMI 29 | | PLUS 30 | | STARPLUS 31 | | QUESTIONPLUS 32 | | PLUSPLUS 33 | 34 | | AND_ 35 | | OR_ 36 | 37 | | EOF 38 | [@@deriving sexp, variants] 39 | end 40 | 41 | let char_for_backslash = function 42 | | 'n' -> '\010' 43 | | 'r' -> '\013' 44 | | 'b' -> '\008' 45 | | 't' -> '\009' 46 | | c -> c 47 | 48 | let lf = '\010' 49 | 50 | let ascii_0 = Stdlib.Char.code '0' 51 | let ascii_a = Stdlib.Char.code 'a' 52 | let ascii_A = Stdlib.Char.code 'A' 53 | 54 | let dec_code c1 c2 c3 = 55 | 100 * (Stdlib.Char.code c1 - ascii_0) 56 | + 10 * (Stdlib.Char.code c2 - ascii_0) 57 | + (Stdlib.Char.code c3 - ascii_0) 58 | 59 | let hex_offset = function 60 | | 'a' .. 'f' -> ascii_a - 10 61 | | 'A' .. 'F' -> ascii_A - 10 62 | | _ -> ascii_0 63 | 64 | let hex_code c1 c2 = 65 | let v1 = Stdlib.Char.code c1 - hex_offset c1 in 66 | let v2 = Stdlib.Char.code c2 - hex_offset c2 in 67 | 16 * v1 + v2 68 | 69 | let found_newline ({ lex_curr_p; _ } as lexbuf) diff = 70 | lexbuf.lex_curr_p <- 71 | { 72 | lex_curr_p with 73 | pos_lnum = lex_curr_p.pos_lnum + 1; 74 | pos_bol = lex_curr_p.pos_cnum - diff; 75 | } 76 | 77 | (* same length computation as in [Lexing.lexeme] *) 78 | let lexeme_len { lex_start_pos; lex_curr_pos; _ } = lex_curr_pos - lex_start_pos 79 | 80 | } 81 | 82 | let lf = '\010' 83 | let dos_newline = "\013\010" 84 | let digit = ['0'-'9'] 85 | let hexdigit = digit | ['a'-'f' 'A'-'F'] 86 | let alphanumericunderscore = digit | ['a'-'z' 'A'-'Z'] | '_' 87 | let alpha = ['a'-'z' 'A'-'Z'] 88 | 89 | let atomchars_nodot = 90 | alphanumericunderscore | '-' | ':' | '~' | '\'' | '`' | '#' | '$' | '^' | ',' 91 | 92 | let atomchars = 93 | atomchars_nodot | '.' 94 | 95 | rule next_token = parse 96 | | [' ' '\t']+ { Token.WHITESPACE } 97 | | "\r\n" | "\n\r" { Lexing.new_line lexbuf; Token.WHITESPACE } 98 | | "\n" | "\r" { Lexing.new_line lexbuf; Token.WHITESPACE } 99 | | '\"' { 100 | let buf = Buffer.create 32 in 101 | let pos = Lexing.lexeme_start_p lexbuf in 102 | let start = lexbuf.Lexing.lex_start_p in 103 | scan_string buf pos lexbuf; 104 | lexbuf.Lexing.lex_start_p <- start; 105 | Token.ATOM (Buffer.contents buf) 106 | } 107 | 108 | | '/' { 109 | let buf = Buffer.create 32 in 110 | let pos = Lexing.lexeme_start_p lexbuf in 111 | let start = lexbuf.Lexing.lex_start_p in 112 | scan_regex buf pos lexbuf; 113 | lexbuf.Lexing.lex_start_p <- start; 114 | Token.ATOM_REGEX (Buffer.contents buf) 115 | } 116 | 117 | | '%' (digit digit* as str) { Token.NUMBERCAPTURE str } 118 | | '%' (atomchars_nodot atomchars* as str) { Token.FIELDCAPTURE str } 119 | | '%' (atomchars_nodot atomchars atomchars* as str) { Token.FIELDCAPTURE str } 120 | | '%' (atomchars atomchars_nodot atomchars* as str) { Token.FIELDCAPTURE str } 121 | | '%' (atomchars atomchars atomchars atomchars* as str) { Token.FIELDCAPTURE str } 122 | | '%' { Token.UNNAMEDCAPTURE } 123 | 124 | | (atomchars_nodot atomchars*) as str { Token.ATOM str } 125 | | (atomchars_nodot atomchars atomchars*) as str { Token.ATOM str } 126 | | (atomchars atomchars_nodot atomchars*) as str { Token.ATOM str } 127 | | (atomchars atomchars atomchars atomchars*) as str { Token.ATOM str } 128 | 129 | | "&" { Token.AND_ } 130 | | "|" { Token.OR_ } 131 | | "*" { Token.STAR } 132 | | "?+" { Token.QUESTIONPLUS } 133 | | "*+" { Token.STARPLUS } 134 | | "++" { Token.PLUSPLUS } 135 | | "?" { Token.QUESTION } 136 | | "+" { Token.PLUS } 137 | | "!" { Token.BANG } 138 | | ";" { Token.SEMI } 139 | | ".." { Token.TWODOTS } 140 | | "." { Token.DOT } 141 | | "=" { Token.EQUAL } 142 | 143 | | '(' { Token.LPAREN } 144 | | ')' { Token.RPAREN } 145 | | '{' { Token.LCURLY } 146 | | '}' { Token.RCURLY } 147 | | '[' { Token.LSQUARE } 148 | | ']' { Token.RSQUARE } 149 | 150 | | eof { Token.EOF } 151 | 152 | and scan_regex buf start = parse 153 | | "\\" (_ as c) { Buffer.add_char buf c; scan_regex buf start lexbuf } 154 | | eof { 155 | failwithf "Error parsing unterminated regex at line %d char %d" 156 | start.pos_lnum (start.pos_cnum - start.pos_bol) () 157 | } 158 | | '/' { () } 159 | | _ { Buffer.add_string buf (Lexing.lexeme lexbuf); scan_regex buf start lexbuf } 160 | 161 | 162 | (* Mostly copied from sexplib's parsing for quoted strings *) 163 | and scan_string buf start = parse 164 | | '"' { () } 165 | | '\\' lf [' ' '\t']* 166 | { 167 | let len = lexeme_len lexbuf - 2 in 168 | found_newline lexbuf len; 169 | scan_string buf start lexbuf 170 | } 171 | | '\\' dos_newline [' ' '\t']* 172 | { 173 | let len = lexeme_len lexbuf - 3 in 174 | found_newline lexbuf len; 175 | scan_string buf start lexbuf 176 | } 177 | | '\\' (['\\' '\'' '"' 'n' 't' 'b' 'r' ' '] as c) 178 | { 179 | Buffer.add_char buf (char_for_backslash c); 180 | scan_string buf start lexbuf 181 | } 182 | | '\\' (digit as c1) (digit as c2) (digit as c3) 183 | { 184 | let v = dec_code c1 c2 c3 in 185 | if v > 255 then ( 186 | let { pos_lnum; pos_bol; pos_cnum; pos_fname = _ } = lexeme_end_p lexbuf in 187 | let msg = 188 | sprintf 189 | "Sexplib.Lexer.scan_string: \ 190 | illegal escape at line %d char %d: `\\%c%c%c'" 191 | pos_lnum (pos_cnum - pos_bol - 3) 192 | c1 c2 c3 in 193 | failwith msg); 194 | Buffer.add_char buf (Stdlib.Char.chr v); 195 | scan_string buf start lexbuf 196 | } 197 | | '\\' 'x' (hexdigit as c1) (hexdigit as c2) 198 | { 199 | let v = hex_code c1 c2 in 200 | Buffer.add_char buf (Stdlib.Char.chr v); 201 | scan_string buf start lexbuf 202 | } 203 | | '\\' (_ as c) 204 | { 205 | Buffer.add_char buf '\\'; 206 | Buffer.add_char buf c; 207 | scan_string buf start lexbuf 208 | } 209 | | lf 210 | { 211 | found_newline lexbuf 0; 212 | Buffer.add_char buf lf; 213 | scan_string buf start lexbuf 214 | } 215 | | ([^ '\\' '"'] # lf)+ 216 | { 217 | let ofs = lexbuf.lex_start_pos in 218 | let len = lexbuf.lex_curr_pos - ofs in 219 | Buffer.add_subbytes buf lexbuf.lex_buffer ~pos:ofs ~len; 220 | scan_string buf start lexbuf 221 | } 222 | | eof 223 | { 224 | let msg = 225 | sprintf 226 | "Match.Lexer.scan_string: unterminated string at line %d char %d" 227 | start.pos_lnum (start.pos_cnum - start.pos_bol) 228 | in 229 | failwith msg 230 | } 231 | 232 | -------------------------------------------------------------------------------- /sexp_app/pattern/output_method.ml: -------------------------------------------------------------------------------- 1 | open! Core 2 | 3 | module Format = struct 4 | type t = 5 | | Atom of string 6 | | Capture of string 7 | | List of t list 8 | 9 | let escape_percents = 10 | unstage (String.Escaping.escape ~escapeworthy:[ '%' ] ~escape_char:'\\') 11 | ;; 12 | 13 | let unescape_percents = unstage (String.Escaping.unescape ~escape_char:'\\') 14 | 15 | let rec t_of_sexp sexp = 16 | match (sexp : Sexp.t) with 17 | | Atom s -> 18 | if String.is_prefix s ~prefix:"%" 19 | then Capture (String.chop_prefix_exn s ~prefix:"%") 20 | else Atom (unescape_percents s) 21 | | List list -> List (List.map list ~f:t_of_sexp) 22 | ;; 23 | 24 | let rec sexp_of_t t : Sexp.t = 25 | match t with 26 | | Atom s -> Atom (escape_percents s) 27 | | Capture s -> Atom ("%" ^ s) 28 | | List list -> List (List.map list ~f:sexp_of_t) 29 | ;; 30 | 31 | let ts_of_string s = Sexp.scan_sexps (Lexing.from_string s) |> List.map ~f:t_of_sexp 32 | 33 | let rec all_captures_aux t acc = 34 | match t with 35 | | Atom _ -> acc 36 | | Capture s -> s :: acc 37 | | List list -> List.fold_right list ~init:acc ~f:all_captures_aux 38 | ;; 39 | 40 | let all_captures t = all_captures_aux t [] 41 | 42 | let rec embed_captures t ~f : Sexp.t list = 43 | match t with 44 | | Atom s -> [ Atom s ] 45 | | Capture s -> f s 46 | | List list -> [ List (List.concat_map list ~f:(embed_captures ~f)) ] 47 | ;; 48 | end 49 | 50 | module Wrap_mode = struct 51 | type 'query_result t = 52 | | Wrap_always : Sexp.t t 53 | | Wrap_non_singletons : Sexp.t t 54 | | Unwrap_always : Sexp.t list t 55 | [@@deriving sexp_of] 56 | 57 | type some_wrap_mode = T : _ t -> some_wrap_mode 58 | end 59 | 60 | type 'query_result t = 61 | | Formats : _ Wrap_mode.t * Format.t list -> Sexp.t list t 62 | | List : 'query_result Wrap_mode.t -> Sexp.t t 63 | | Record : 'query_result Wrap_mode.t -> Sexp.t t 64 | | Single_capture : 'query_result Wrap_mode.t -> 'query_result t 65 | | Map : Sexp.t list String.Map.t t (** Return a map from capture name to captures *) 66 | [@@deriving sexp_of] 67 | 68 | type some_output_method = T : _ t -> some_output_method 69 | 70 | let default_method query ~wrap_mode = 71 | let { Query.Capture_count.num_number_captures 72 | ; num_named_captures 73 | ; num_unlabeled_captures 74 | } 75 | = 76 | Query.count_captures query 77 | in 78 | if num_named_captures > 0 79 | then T (Record wrap_mode) 80 | else if num_number_captures > 0 || num_unlabeled_captures > 1 81 | then T (List wrap_mode) 82 | else if num_unlabeled_captures = 1 83 | then T (Single_capture wrap_mode) 84 | else failwith "No captures % were specified in pattern" 85 | ;; 86 | -------------------------------------------------------------------------------- /sexp_app/pattern/output_method.mli: -------------------------------------------------------------------------------- 1 | open! Core 2 | 3 | module Format : sig 4 | (** A format specifies how the user wants their captured results to be formatted by 5 | providing a sexp structure in which the captures will get embedded. *) 6 | type t = 7 | | Atom of string 8 | | Capture of string 9 | | List of t list 10 | [@@deriving sexp] 11 | 12 | val ts_of_string : string -> t list 13 | val all_captures : t -> string list 14 | val embed_captures : t -> f:(string -> Sexp.t list) -> Sexp.t list 15 | end 16 | 17 | module Wrap_mode : sig 18 | (** {v 19 | This controls what happens when a single capture expression consumes multiple sexps 20 | during a single match. It specifies whether to wrap them together as a single sexp 21 | list or return all the results separately. 22 | 23 | For example: 24 | "(a %[.*])" tries to unwrap exactly one set of parens, match an 'a', and then 25 | capture all the sexps that follow that 'a'. Here are the three behaviors. 26 | 27 | Wrap_always: 28 | (a) -> Sexp.List [] (* () *) 29 | (a b) -> Sexp.List [ Sexp.Atom b ] (* (b) *) 30 | (a b c) -> Sexp.List [ Sexp.Atom b; Sexp.Atom c ] (* (b c) *) 31 | 32 | Wrap_non_singletons: 33 | (a) -> Sexp.List [] (* () *) 34 | (a b) -> Sexp.Atom b (* b *) 35 | (a b c) -> Sexp.List [ Sexp.Atom b; Sexp.Atom c ] (* (b c) *) 36 | 37 | Unwrap_always: 38 | (a) -> [] (* *) 39 | (a b) -> [ Sexp.Atom b ] (* b *) 40 | (a b c) -> [ Sexp.Atom b; Sexp.Atom c ] (* b c *) 41 | 42 | This wrapping (or not) occurs before packing the result into whatever format 43 | specified by [Output_method.t] below. 44 | 45 | Since [Unwrap_always] has the possiblity of returning multiple sexps separately, 46 | the [`capture] type for it is a list of sexps instead of a single sexp. 47 | v} *) 48 | type 'capture t = 49 | | Wrap_always : Sexp.t t 50 | | Wrap_non_singletons : Sexp.t t 51 | | Unwrap_always : Sexp.t list t 52 | [@@deriving sexp_of] 53 | 54 | type some_wrap_mode = T : _ t -> some_wrap_mode 55 | end 56 | 57 | type _ t = 58 | | Formats : _ Wrap_mode.t * Format.t list -> Sexp.t list t 59 | (** Embed captures in the specified formats *) 60 | | List : _ Wrap_mode.t -> Sexp.t t 61 | (** Return different capture expressions' results as a Sexp.List. In the case of 62 | [Unwrap_always], the sequences consumed by each capture expression are concatenated, 63 | so the list may be longer (or shorter) than the number of capture expressions. *) 64 | | Record : _ Wrap_mode.t -> Sexp.t t 65 | (** Return captures as a sexp record where the field names are the labels of the 66 | capturing expressions. In the case of [Unwrap_always], the sequences consumed by 67 | each capture expression have the field name consed onto them, so the result may not 68 | actually be a list of pairs! *) 69 | | Single_capture : 'query_result Wrap_mode.t -> 'query_result t 70 | (** Expect exactly one capture in the pattern, and return its captured contents. *) 71 | | Map : Sexp.t list String.Map.t t 72 | (** Return a map from capture name to captures. Similar to doing [Record Wrap_always] 73 | and then [[%of_sexp: Sexp.t list String.Map.t]] *) 74 | [@@deriving sexp_of] 75 | 76 | type some_output_method = T : _ t -> some_output_method 77 | 78 | (** Determine a default output method to use based on whether the query contains numbered 79 | or named captures. *) 80 | val default_method : Query.t -> wrap_mode:_ Wrap_mode.t -> some_output_method 81 | -------------------------------------------------------------------------------- /sexp_app/pattern/parser.ml: -------------------------------------------------------------------------------- 1 | open Core 2 | 3 | module Pos = struct 4 | type t = 5 | { linenum : int 6 | ; charnum : int 7 | ; line : string option 8 | } 9 | [@@deriving sexp] 10 | 11 | let create code lexbuf = 12 | let pos = Lexing.lexeme_start_p lexbuf in 13 | let lines = String.split code ~on:'\n' in 14 | { linenum = pos.Lexing.pos_lnum 15 | ; charnum = pos.Lexing.pos_cnum - pos.Lexing.pos_bol 16 | ; line = List.nth lines (pos.Lexing.pos_lnum - 1) 17 | } 18 | ;; 19 | 20 | let location_str t = sprintf "line %d char %d" t.linenum t.charnum 21 | let line t = Option.value t.line ~default:"" 22 | end 23 | 24 | let parse_exn str = 25 | let lexbuf = Lexing.from_string str in 26 | let query = 27 | try Internal_parser.parse Lexer.next_token lexbuf with 28 | | Parsing.Parse_error -> 29 | let pos = Pos.create str lexbuf in 30 | failwithf 31 | "Parsing match query failed at %s in query %s" 32 | (Pos.location_str pos) 33 | (Pos.line pos) 34 | () 35 | | exn -> 36 | let pos = Pos.create str lexbuf in 37 | Exn.reraisef 38 | exn 39 | "Parsing match query failed at %s in query %s" 40 | (Pos.location_str pos) 41 | (Pos.line pos) 42 | () 43 | in 44 | query 45 | ;; 46 | -------------------------------------------------------------------------------- /sexp_app/pattern/parser.mli: -------------------------------------------------------------------------------- 1 | open! Core 2 | 3 | val parse_exn : string -> Query.t 4 | -------------------------------------------------------------------------------- /sexp_app/pattern/query.ml: -------------------------------------------------------------------------------- 1 | open! Core 2 | 3 | module Set_kind = struct 4 | type t = 5 | { optional : bool 6 | ; first_only : bool 7 | } 8 | [@@deriving sexp] 9 | end 10 | 11 | type t = 12 | | Capture_unlabeled of t 13 | | Capture_to_number of int * t 14 | | Capture_to_name of string * t 15 | | Any 16 | | Atom of string 17 | | Atom_regex of string 18 | | Sequence of t list 19 | | Star of t 20 | | Star_greedy of t 21 | | Plus of t 22 | | Plus_greedy of t 23 | | Maybe of t 24 | | Maybe_greedy of t 25 | | List of t 26 | | Set of (t * Set_kind.t) list 27 | | Subsearch of t 28 | | And of t list 29 | | Or_shortcircuiting of t list 30 | | Or_all of t list 31 | | First_match_only of t 32 | [@@deriving sexp] 33 | 34 | let rec iter t ~f = 35 | f t; 36 | match t with 37 | | Any | Atom _ | Atom_regex _ -> () 38 | | Capture_unlabeled sub 39 | | Capture_to_number (_, sub) 40 | | Capture_to_name (_, sub) 41 | | Subsearch sub 42 | | First_match_only sub 43 | | Star sub 44 | | Star_greedy sub 45 | | Plus sub 46 | | Plus_greedy sub 47 | | Maybe sub 48 | | Maybe_greedy sub 49 | | List sub -> iter sub ~f 50 | | Sequence subs | And subs | Or_shortcircuiting subs | Or_all subs -> 51 | List.iter subs ~f:(fun sub -> iter sub ~f) 52 | | Set subs -> List.iter subs ~f:(fun (sub, _) -> iter sub ~f) 53 | ;; 54 | 55 | module Capture_count = struct 56 | type t = 57 | { num_number_captures : int 58 | ; num_named_captures : int 59 | ; num_unlabeled_captures : int 60 | } 61 | end 62 | 63 | let count_captures t = 64 | let num_number_captures = ref 0 in 65 | let num_named_captures = ref 0 in 66 | let num_unlabeled_captures = ref 0 in 67 | iter t ~f:(function 68 | | Capture_unlabeled _ -> incr num_unlabeled_captures 69 | | Capture_to_number _ -> incr num_number_captures 70 | | Capture_to_name _ -> incr num_named_captures 71 | | Any 72 | | Atom _ 73 | | Atom_regex _ 74 | | Subsearch _ 75 | | First_match_only _ 76 | | Star _ 77 | | Star_greedy _ 78 | | Plus _ 79 | | Plus_greedy _ 80 | | Maybe _ 81 | | Maybe_greedy _ 82 | | List _ 83 | | Sequence _ 84 | | And _ 85 | | Or_shortcircuiting _ 86 | | Or_all _ 87 | | Set _ -> ()); 88 | { Capture_count.num_number_captures = !num_number_captures 89 | ; num_named_captures = !num_named_captures 90 | ; num_unlabeled_captures = !num_unlabeled_captures 91 | } 92 | ;; 93 | 94 | let validate_all_captures_labeled_or_all_unlabeled_exn t = 95 | let { Capture_count.num_number_captures; num_named_captures; num_unlabeled_captures } = 96 | count_captures t 97 | in 98 | if num_unlabeled_captures > 0 && (num_number_captures > 0 || num_named_captures > 0) 99 | then 100 | failwith 101 | "Cannot mix unlabeled captures with named or numbered captures in the same pattern" 102 | ;; 103 | -------------------------------------------------------------------------------- /sexp_app/pattern/query.mli: -------------------------------------------------------------------------------- 1 | open! Core 2 | 3 | (** This module contains the type for a regexp-like query which can be used to match sexps 4 | and extract subsexps. *) 5 | 6 | (** A {e sexp sequence} is a list of sexps. We use the term sequence to differentiate from 7 | a sexp which is a list of sexps. Typically a sexp sequence corresponds to a sublist of 8 | a sexp list. 9 | 10 | A {e query} is a object that when applied to a sexp sequence, may fail to match, or 11 | else may match by successfully consuming zero or more elements from the head of that 12 | sequence. 13 | 14 | Queries may match with multiplicity greater than 1, each match independently consuming 15 | the same or different prefixes of the head of the list. 16 | 17 | Each match may also capture various sequences (which must be a subsequence of the 18 | consumed sexp sequences) returning those captures to the user. 19 | 20 | Captures do {e not} need to be uniquely numbered or named within a given query, since 21 | with the [Or_*] variants, one can legitimately write branched queries where each 22 | branch might capture into the same label, but only one branch at a time is expected to 23 | actually match. 24 | 25 | However, if a given single match of the pattern captures two different expressions 26 | into the same label simultaneously, it is unspecified which of the two expressions 27 | will actually be returned for that label. *) 28 | 29 | module Set_kind : sig 30 | (** Specifies how a term in a [Set] should be matched *) 31 | type t = 32 | { optional : bool 33 | (** If [optional] is true then the whole [Set] query may match a list even if the 34 | optional term doesn't match any elements of the list. But if the term does match 35 | some element then there is no trivial match where the term does not capture 36 | anything. *) 37 | ; first_only : bool 38 | (** If [first_only] is true then the term only returns matches for the first element 39 | of the list that it successfully matches in at least one way. *) 40 | } 41 | [@@deriving sexp] 42 | end 43 | 44 | type t = 45 | | Capture_unlabeled of t 46 | (** [Capture_unlabeled t] is equivalent to [Capture_to_number (i, t)] where [i] starts 47 | at 0 and increments each time an unlabeled capture appears in the query. Syntax: 48 | [%.] *) 49 | | Capture_to_number of int * t 50 | (** [Capture_to_number (i, t)] applies the subquery [t], associating each consumed 51 | sequence to the label [Int.to_string i]. Example syntax: [%0] *) 52 | | Capture_to_name of string * t 53 | (** [Capture_to_name (n, t)] applies the subquery [t], associating each consumed 54 | sequence to the label [n]. Example syntax: [%foo] *) 55 | | Any (** Consume a single sexp or fail to match. Syntax: [.] *) 56 | | Atom of string 57 | (** Consumes an atom if it matches this exact string. Example syntax: [foo] *) 58 | | Atom_regex of string 59 | (** Consumes an atom if it matches this regex. Example syntax: [/foo/] *) 60 | | Sequence of t list 61 | (** Recursively applies each [t] against the remaining tail unconsumed by previous [t]s 62 | in the sequence. Example syntax: [[t1 t2 t3]] *) 63 | | Star of t 64 | (** Iteratively applies all of [[ Sequence []; Sequence [t]; Sequence [t; t]; ... ]], 65 | with a special-case guard against an infinite loop if [t] itself can consume zero 66 | elements. Example syntax: [t*] *) 67 | | Star_greedy of t 68 | (** Same as [Star], but tries possiblities in reverse order. Example syntax: [t*+] *) 69 | | Plus of t 70 | (** Iteratively applies all of [[ Sequence [t]; Sequence [t; t]; ... ]], with a 71 | special-case guard against an infinite loop if [t] itself can consume zero elements. 72 | Example syntax: [t+] *) 73 | | Plus_greedy of t 74 | (** Same as [Plus], but tries possiblities in reverse order. Example syntax: [t++] *) 75 | | Maybe of t 76 | (** Iteratively applies both of [[ Sequence []; Sequence [t] ]], with a special-case 77 | guard to match with multiplicity only 1 if [t] itself can consume zero elements. 78 | Example syntax: [t?] *) 79 | | Maybe_greedy of t 80 | (** Same as [Maybe], but tries possiblities in reverse order. Example syntax: [t?+] *) 81 | | List of t 82 | (** Consumes a sexp if it is a list, applying [t] to the sequence of the list's elements 83 | and requiring any successful matches by [t] to consume the entire sequence. Example 84 | syntax: [(t1 t2 t3)] *) 85 | | Set of (t * Set_kind.t) list 86 | (** Consumes a sexp if it is a list, recursively applying each subquery [t] iteratively 87 | one by one to each element of the sublist where that element is given as a singleton 88 | sequence, requiring that subquery to match that element. Multiple terms of the query 89 | may match the same list element. The cartesian product of all possible matches over 90 | the subqueries will be the result. See also the documentation above for [Set_kind] 91 | for how it may modify this behavior. Example syntax: [{t1 t2??}] *) 92 | | Subsearch of t 93 | (** [Subsearch t] matches some sequence {e S} of sexps if {e S} contains a subsequence 94 | which [t] matches, or if [List (Subsearch t)] matches one of the sexps in {e S}. 95 | Example syntax: [.. t] *) 96 | | And of t list 97 | (** Consumes a sequence of sexps only if every [t] provided has a successful match 98 | consuming exactly that sequence of sexps. Example syntax: [t1 & t2] *) 99 | | Or_shortcircuiting of t list 100 | (** Applies each [t] iteratively, stopping after iterating through all matches of the 101 | first [t] that has any matches. In other words, 102 | [Or_shortcircuiting [t1; t2; ...; ti; ...]] matches if and only if [ti] matches and 103 | none of queries before [ti] match. Example syntax: none *) 104 | | Or_all of t list 105 | (** Applies each [t] iteratively, matching according to the union of their matches. In 106 | other words, [Or_all [t1; ...; tn]] matches if and only if at least one of the [ti] 107 | matches. If more than one of the [ti] matches, it is undefined which of the matching 108 | terms will give the captures. Example syntax: [t1 | t2] *) 109 | | First_match_only of t 110 | (** Applies [t] and stops after the first time it matches. Example syntax: [!t] *) 111 | [@@deriving sexp] 112 | 113 | val iter : t -> f:(t -> unit) -> unit 114 | 115 | module Capture_count : sig 116 | type t = 117 | { num_number_captures : int 118 | ; num_named_captures : int 119 | ; num_unlabeled_captures : int 120 | } 121 | end 122 | 123 | val count_captures : t -> Capture_count.t 124 | val validate_all_captures_labeled_or_all_unlabeled_exn : t -> unit 125 | -------------------------------------------------------------------------------- /sexp_app/pattern/sexp_app_pattern.ml: -------------------------------------------------------------------------------- 1 | module Compiled_query = Compiled_query 2 | module Engine = Engine 3 | module Help = Help 4 | module Internal_parser = Internal_parser 5 | module Lexer = Lexer 6 | module Output_method = Output_method 7 | module Parser = Parser 8 | module Query = Query 9 | -------------------------------------------------------------------------------- /sexp_app/pattern_test/dune: -------------------------------------------------------------------------------- 1 | (library 2 | (name sexp_app_pattern_test) 3 | (libraries core expect_test_helpers_core sexp_app_pattern sexplib) 4 | (preprocess 5 | (pps ppx_jane))) 6 | -------------------------------------------------------------------------------- /sexp_app/pattern_test/sexp_app_pattern_test.ml: -------------------------------------------------------------------------------- 1 | module Test = Test 2 | -------------------------------------------------------------------------------- /sexp_app/pattern_test/test.mli: -------------------------------------------------------------------------------- 1 | (*_ This signature is deliberately empty. *) 2 | -------------------------------------------------------------------------------- /sexp_app/src/csv_file.ml: -------------------------------------------------------------------------------- 1 | (** CSV files *) 2 | 3 | open Core 4 | 5 | type t = Csv_record.t Lazy_list.t 6 | 7 | let test_length ~found ~expected = 8 | if found <> expected 9 | then failwithf "length mismatch: expected %i but found %i" expected found () 10 | ;; 11 | 12 | let read lexbuf = 13 | Lazy_list.build ~seed:None ~f:(fun len_opt -> 14 | match Csv_record.read lexbuf with 15 | | None -> None 16 | | Some record -> 17 | let found = List.length record in 18 | (match len_opt with 19 | | None -> Some (record, Some found) 20 | | Some expected -> 21 | test_length ~found ~expected; 22 | Some (record, len_opt))) 23 | ;; 24 | 25 | let write ?(sep = ',') out t = 26 | let length = ref None in 27 | Lazy_list.iter t ~f:(fun record -> 28 | let found = List.length record in 29 | (match !length with 30 | | None -> length := Some found 31 | | Some expected -> test_length ~found ~expected); 32 | Csv_record.write out record ~sep) 33 | ;; 34 | -------------------------------------------------------------------------------- /sexp_app/src/csv_file.mli: -------------------------------------------------------------------------------- 1 | (** CSV files *) 2 | 3 | open Core 4 | 5 | type t = Csv_record.t Lazy_list.t 6 | 7 | (** (lazily) enforces that all records are the same length *) 8 | val read : Lexing.lexbuf -> t 9 | 10 | (** enforces that all records are the same length *) 11 | val write : ?sep:char -> Out_channel.t -> t -> unit 12 | -------------------------------------------------------------------------------- /sexp_app/src/csv_lexeme.mli: -------------------------------------------------------------------------------- 1 | (** This module encapsulates the lexical structure of the CSV format In particular, it 2 | does everything necessary to handle double-quotes. *) 3 | 4 | module T : sig 5 | type t = 6 | | Field of string 7 | | Comma 8 | | Newline 9 | end 10 | 11 | type t = T.t 12 | 13 | val read : Lexing.lexbuf -> t option 14 | val write : ?sep:char -> out_channel -> t -> unit 15 | -------------------------------------------------------------------------------- /sexp_app/src/csv_lexeme.mll: -------------------------------------------------------------------------------- 1 | { 2 | (* See http://tools.ietf.org/html/rfc4180 for more information. *) 3 | 4 | open Core 5 | module Pad = String_pad 6 | 7 | module T = struct 8 | type t = Field of string | Comma | Newline 9 | end 10 | 11 | include T 12 | 13 | } 14 | 15 | let newline = [ '\r' '\n' ] 16 | let other = [^ '\r' '\n' '"' ','] 17 | 18 | rule read = parse 19 | | newline+ { Some Newline } 20 | | ',' { Some Comma } 21 | | '"' { escaped Pad.empty lexbuf } 22 | | other* as field { Some (Field field) } 23 | | eof { None } 24 | 25 | and escaped pad = parse 26 | | '"' '"' { escaped (Pad.add_char pad '"') lexbuf } 27 | | '"' { Some (Field (Pad.dump pad)) } 28 | | _ as c { escaped (Pad.add_char pad c) lexbuf } 29 | | eof { failwith "unterminated \"-delimited field" } 30 | 31 | { 32 | 33 | let lf = '\n' 34 | 35 | let meta_character c ~sep = 36 | Char.equal c sep || Char.equal c '"' || Char.equal c lf 37 | 38 | let output_field out x ~sep = 39 | if not (String.exists x ~f:(meta_character ~sep)) then 40 | Out_channel.output_string out x 41 | else begin 42 | let putc c = Out_channel.output_char out c in 43 | putc '"'; 44 | String.iter x ~f:(fun c -> 45 | if Char.equal c '"' then 46 | (putc c; putc c) 47 | else 48 | putc c 49 | ); 50 | putc '"'; 51 | end 52 | 53 | let write ?(sep=',') out = function 54 | | Field field -> output_field out field ~sep 55 | | Comma -> Out_channel.output_char out sep 56 | | Newline -> Out_channel.output_char out lf 57 | 58 | } 59 | -------------------------------------------------------------------------------- /sexp_app/src/csv_record.ml: -------------------------------------------------------------------------------- 1 | open Core 2 | open Csv_lexeme.T 3 | 4 | type t = string list 5 | 6 | let read lexbuf = 7 | let fields = Manifest.create () in 8 | let return () = Some (Manifest.to_list fields) in 9 | let rec loop prev = 10 | match Csv_lexeme.read lexbuf with 11 | | None -> 12 | (match prev with 13 | | Newline -> None 14 | | _ -> return ()) 15 | | Some this -> 16 | (match prev, this with 17 | | Newline, Field x | Comma, Field x -> 18 | Manifest.add fields x; 19 | loop this 20 | | Field x, Field y -> failwithf "adjacent fields (%s) and (%s)" x y () 21 | | Field _, Comma -> loop this 22 | | Field _, Newline -> return () 23 | | Newline, Newline -> loop this 24 | | Newline, Comma | Comma, Comma -> 25 | Manifest.add fields ""; 26 | loop Comma 27 | | Comma, Newline -> 28 | Manifest.add fields ""; 29 | return ()) 30 | in 31 | loop Newline 32 | ;; 33 | 34 | let write ?(sep = ',') out t = 35 | let lexemes = 36 | List.fold_right t ~init:[ Newline ] ~f:(fun field lexemes -> 37 | match lexemes with 38 | | [ Newline ] -> Field field :: lexemes 39 | | lexemes -> Field field :: Comma :: lexemes) 40 | in 41 | List.iter lexemes ~f:(fun lexeme -> Csv_lexeme.write out lexeme ~sep) 42 | ;; 43 | -------------------------------------------------------------------------------- /sexp_app/src/csv_record.mli: -------------------------------------------------------------------------------- 1 | (** CSV records *) 2 | 3 | type t = string list 4 | 5 | val read : Lexing.lexbuf -> t option 6 | val write : ?sep:char -> out_channel -> t -> unit 7 | -------------------------------------------------------------------------------- /sexp_app/src/dune: -------------------------------------------------------------------------------- 1 | (library 2 | (name sexp_app) 3 | (public_name sexp.sexp_app) 4 | (libraries core lazy_list jsonaf re2 sexplib str) 5 | (preprocess 6 | (pps ppx_jane))) 7 | 8 | (ocamllex csv_lexeme) 9 | -------------------------------------------------------------------------------- /sexp_app/src/manifest.ml: -------------------------------------------------------------------------------- 1 | open Core 2 | 3 | type 'a t = 'a list ref 4 | 5 | let create () = ref [] 6 | let add t x = t := x :: !t 7 | let to_list t = List.rev !t 8 | -------------------------------------------------------------------------------- /sexp_app/src/manifest.mli: -------------------------------------------------------------------------------- 1 | (** mutable lists with addition to the end *) 2 | 3 | type 'a t 4 | 5 | val create : unit -> 'a t 6 | val add : 'a t -> 'a -> unit 7 | val to_list : 'a t -> 'a list 8 | -------------------------------------------------------------------------------- /sexp_app/src/parse_everything.ml: -------------------------------------------------------------------------------- 1 | open Core 2 | 3 | (* Given a function to get the next char of the input, returns a function to get the next 4 | block of transformed input. Strings returned via `Ok always have nonzero length. *) 5 | let read_of_next_char 6 | : next_char:(unit -> char option) -> (unit -> [ `Ok of string | `Eof ]) Staged.t 7 | = 8 | fun ~next_char -> 9 | (* These are string that could trigger comment-mode in the sexp lexer OR lead to parse 10 | errors - we make sure they get quoted so that they get interpreted as atoms instead *) 11 | let should_be_quoted c = 12 | match c with 13 | | ';' | '|' | '#' | ')' -> true 14 | | _ -> false 15 | in 16 | (* The transformation necessary to turn a raw atom that didn't appear with double quotes 17 | into a string that will parse to the same character sequence once it does get 18 | double-quoted *) 19 | let escape = unstage (String.Escaping.escape ~escapeworthy:[ '"' ] ~escape_char:'\\') in 20 | let maybe_quote_not_inside_string_atom s = 21 | if String.exists s ~f:should_be_quoted then "\"" ^ escape s ^ "\"" else s 22 | in 23 | (* These are characters that signal the end of the current atom when not inside a string *) 24 | let terminates_atom c ~paren_depth = 25 | match c with 26 | | '(' | '"' | ' ' | '\t' | '\012' | '\n' | '\r' -> true 27 | | ')' when Int.( > ) !paren_depth 0 -> true 28 | | _ -> false 29 | in 30 | (* State variables *) 31 | let paren_depth = ref 0 in 32 | let inside_string = ref false in 33 | let follows_escape_in_string = ref false in 34 | let atom_so_far = Buffer.create 32 in 35 | let all_done = ref false in 36 | (* Read from the in_channel and either return `Eof or return `Ok s where s is a chunk 37 | of input, possibly zero-length *) 38 | let read () = 39 | if !all_done 40 | then `Eof 41 | else ( 42 | match next_char () with 43 | | Some c -> 44 | if (* Inside string *) 45 | !inside_string 46 | then ( 47 | (* If we followed an escape character, we always take the next char verbatim *) 48 | let followed_escape_in_string = !follows_escape_in_string in 49 | follows_escape_in_string := false; 50 | if followed_escape_in_string 51 | then ( 52 | Buffer.add_char atom_so_far c; 53 | `Ok "" (* Else... *)) 54 | else ( 55 | match c with 56 | (* A quote terminates the string and we return it *) 57 | | '"' -> 58 | Buffer.add_char atom_so_far c; 59 | let s = Buffer.contents atom_so_far in 60 | Buffer.clear atom_so_far; 61 | inside_string := false; 62 | `Ok s 63 | (* Any other character gets added to the string, and if it's an escape 64 | character, we remember this *) 65 | | c -> 66 | if Char.equal c '\\' then follows_escape_in_string := true; 67 | Buffer.add_char atom_so_far c; 68 | `Ok "" (* Not inside string *))) 69 | else if (* Chars that don't terminate the atom just get appended and we continue *) 70 | not (terminates_atom c ~paren_depth) 71 | then ( 72 | Buffer.add_char atom_so_far c; 73 | `Ok "" (* Else... *)) 74 | else ( 75 | (* We have a naked atom that didn't appear as a string in the sexp - quote 76 | if it needed *) 77 | let ret = Buffer.contents atom_so_far in 78 | Buffer.clear atom_so_far; 79 | let ret = maybe_quote_not_inside_string_atom ret in 80 | (* Then handle the character that terminated the atom *) 81 | match c with 82 | (* Parens change the depth and then get output *) 83 | | '(' -> 84 | incr paren_depth; 85 | `Ok (ret ^ String.of_char c) 86 | | ')' -> 87 | decr paren_depth; 88 | `Ok (ret ^ String.of_char c) 89 | (* Whitespace simply gets output *) 90 | | ' ' | '\t' | '\012' | '\n' | '\r' -> `Ok (ret ^ String.of_char c) 91 | (* Quotes send us into string mode *) 92 | | '"' -> 93 | inside_string := true; 94 | Buffer.add_char atom_so_far c; 95 | `Ok ret 96 | | _ -> assert false) 97 | (* End of in-channel input *) 98 | | None -> 99 | let ret = 100 | (* If inside a string, then to prevent parse errors, finish up the string *) 101 | if !inside_string 102 | then ( 103 | (* If there was an escape char without anything after it, complete that too *) 104 | if !follows_escape_in_string then Buffer.add_char atom_so_far '\\'; 105 | Buffer.add_char atom_so_far '"'; 106 | let ret = Buffer.contents atom_so_far in 107 | Buffer.clear atom_so_far; 108 | ret 109 | (* Else if not inside a string, finish up any naked atom and quote as needed *)) 110 | else ( 111 | let ret = Buffer.contents atom_so_far in 112 | Buffer.clear atom_so_far; 113 | maybe_quote_not_inside_string_atom ret) 114 | in 115 | (* Then add parens to get our paren depth back to 0 *) 116 | while !paren_depth > 0 do 117 | Buffer.add_char atom_so_far ')'; 118 | decr paren_depth 119 | done; 120 | (* Yay! *) 121 | all_done := true; 122 | `Ok (ret ^ Buffer.contents atom_so_far)) 123 | in 124 | (* Transform the step function so that it never returns Ok "" *) 125 | let rec read_until () = 126 | match read () with 127 | | `Ok "" -> read_until () 128 | | `Ok s -> `Ok s 129 | | `Eof -> `Eof 130 | in 131 | stage read_until 132 | ;; 133 | 134 | let lexbuf_of_channel chan = 135 | let next_char () = In_channel.input_char chan in 136 | let read = unstage (read_of_next_char ~next_char) in 137 | (* Tuple of string, chars used in string *) 138 | let leftover = ref ("", 0) in 139 | (* Read up to n chars into bytes, for lexer *) 140 | let lex_fun bytes n = 141 | let result = 142 | if String.length (fst !leftover) - snd !leftover > 0 143 | then ( 144 | let s = !leftover in 145 | leftover := "", 0; 146 | `Ok s) 147 | else ( 148 | match read () with 149 | | `Eof -> `Eof 150 | | `Ok s -> `Ok (s, 0)) 151 | in 152 | match result with 153 | | `Eof -> 0 154 | | `Ok (s, used) -> 155 | if String.length s - used > n 156 | then ( 157 | Bytes.From_string.blit ~src_pos:used ~dst_pos:0 ~src:s ~dst:bytes ~len:n; 158 | leftover := s, used + n; 159 | n) 160 | else ( 161 | Bytes.From_string.blit 162 | ~src_pos:used 163 | ~dst_pos:0 164 | ~src:s 165 | ~dst:bytes 166 | ~len:(String.length s - used); 167 | String.length s - used) 168 | in 169 | Lexing.from_function lex_fun 170 | ;; 171 | 172 | let transform_string s = 173 | let pos = ref 0 in 174 | let next_char () = 175 | if !pos >= String.length s 176 | then None 177 | else ( 178 | let c = s.[!pos] in 179 | incr pos; 180 | Some c) 181 | in 182 | let read = unstage (read_of_next_char ~next_char) in 183 | let buf = Buffer.create (String.length s) in 184 | let rec loop () = 185 | match read () with 186 | | `Eof -> Buffer.contents buf 187 | | `Ok s -> 188 | Buffer.add_string buf s; 189 | loop () 190 | in 191 | loop () 192 | ;; 193 | 194 | open String.Replace_polymorphic_compare 195 | 196 | let unchanged s = transform_string s = s 197 | let%test _ = unchanged "" 198 | let%test _ = unchanged "abc" 199 | let%test _ = unchanged "()" 200 | let%test _ = unchanged "bf((a)d((c\"eg\")))" 201 | let%test _ = unchanged " d ( ef) \n (\r\t ) \\ \\m x \") \b\r (\"" 202 | let%test _ = unchanged "%!@&*^:'?/,.~`[}]{-+=_-" 203 | let%test _ = unchanged "\"foo\\\"d\"" 204 | let%test "completes unmatched parens" = transform_string "(" = "()" 205 | let%test "completes unmatched parens" = transform_string "(a)(b(()(c" = "(a)(b(()(c)))" 206 | let%test "completes unmatched quotes" = transform_string "\"" = "\"\"" 207 | let%test "completes unmatched quotes" = transform_string "\"\\\"" = "\"\\\"\"" 208 | let%test "completes unmatched quotes" = transform_string "((\"ab" = "((\"ab\"))" 209 | let%test "completes unmatched escape in string" = transform_string "\"\\" = "\"\\\\\"" 210 | let%test "stringifies extra close parens" = transform_string ")" = "\")\"" 211 | 212 | let%test "stringifies extra close parens" = 213 | transform_string ")(())))())" = "\")\"(())\"))\"()\")\"" 214 | ;; 215 | 216 | let%test "turns sexp special chars to strings" = transform_string "#" = "\"#\"" 217 | let%test "turns sexp special chars to strings" = transform_string ";" = "\";\"" 218 | let%test "turns sexp special chars to strings" = transform_string "|" = "\"|\"" 219 | 220 | let%test "turns sexp special chars to strings" = 221 | transform_string "## |#| (#a;) ;a\"bc\"|\n;#)|" 222 | = "\"##\" \"|#|\" (\"#a;\") \";a\"\"bc\"\"|\"\n\";#)|\"" 223 | ;; 224 | -------------------------------------------------------------------------------- /sexp_app/src/parse_everything.mli: -------------------------------------------------------------------------------- 1 | open! Core 2 | 3 | (* The purpose of this is to make it so that 'sexp print' has the option to make a 'best 4 | effort' at parsing anything rather than failing. It's frustrating when working in the 5 | console to have a big dump of data that contains sexps but isn't itself a sexp, and 6 | then attempt to pipe it to 'sexp print' and having sexp print fail because the extra 7 | stuff causes it to fail to parse. *) 8 | (* Turns an in_channel -> lexbuf by fixing up any problems in the input such that the sexp 9 | parser should be able to parse everything in the lexbuf. This also causes any comments 10 | in the sexp to get interpreted directly as part of the sexp rather than omitted. *) 11 | 12 | val lexbuf_of_channel : In_channel.t -> Lexing.lexbuf 13 | 14 | (* Does the same thing directly to a string *) 15 | 16 | val transform_string : string -> string 17 | 18 | (* Incremental version *) 19 | 20 | val read_of_next_char 21 | : next_char:(unit -> char option) 22 | -> (unit -> [ `Ok of string | `Eof ]) Staged.t 23 | -------------------------------------------------------------------------------- /sexp_app/src/parts.ml: -------------------------------------------------------------------------------- 1 | open Core 2 | 3 | module Path = struct 4 | include Sexplib.Path 5 | 6 | module X : sig 7 | val compare_el : el -> el -> int 8 | end = struct 9 | type nonrec el = el = 10 | | Pos of int 11 | | Match of string * int 12 | | Rec of string 13 | [@@deriving compare] 14 | end 15 | 16 | include X 17 | 18 | let to_string l = 19 | match l with 20 | | [] -> "." 21 | | l -> 22 | String.concat 23 | ~sep:"" 24 | (List.map l ~f:(function 25 | | Pos p -> sprintf ".[%i]" p 26 | | Rec n -> sprintf ".%s" n 27 | | Match (n, p) -> sprintf ".%s[%i]" n p)) 28 | ;; 29 | end 30 | 31 | open Path 32 | open Sexplib.Type 33 | 34 | type t = (Path.t * Sexp.t) list 35 | 36 | let identifier = Or_error.ok_exn (Re2.create "^[a-zA-Z_][_a-zA-Z0-9]+$") 37 | 38 | let is_record l = 39 | List.for_all l ~f:(function 40 | | List [ Atom n; _ ] -> Re2.matches identifier n 41 | | _ -> false) 42 | && Option.is_none 43 | (List.find_a_dup 44 | (List.rev_map l ~f:(function 45 | | List [ Atom n; _ ] -> n 46 | | _ -> assert false)) 47 | ~compare:Poly.compare) 48 | ;; 49 | 50 | let rec flatten path t = 51 | match t with 52 | | Atom s -> [ List.rev path, Atom s ] 53 | | List [] -> [ List.rev path, List [] ] 54 | | List l -> 55 | if is_record l 56 | then 57 | List.concat 58 | (List.map l ~f:(function 59 | | List [ Atom n; v ] -> flatten (Rec n :: path) v 60 | | _ -> assert false)) 61 | else List.concat (List.mapi l ~f:(fun p e -> flatten (Pos p :: path) e)) 62 | ;; 63 | 64 | let flatten t = flatten [] t 65 | 66 | let rec assemble (l : (Path.t * Sexp.t) list) = 67 | let group l = 68 | List.group l ~break:(fun (p1, _) (p2, _) -> 69 | not ([%compare.equal: Path.el option] (List.hd p1) (List.hd p2))) 70 | in 71 | let one_deeper l = 72 | List.map l ~f:(fun (p, v) -> 73 | match p with 74 | | Pos _ :: p -> p, v 75 | | Rec _ :: p -> p, v 76 | | _ -> assert false) 77 | in 78 | match group l with 79 | | [ [ (p, v) ] ] -> assemble1 p v 80 | | [] -> assert false 81 | | groups -> 82 | List 83 | (List.map groups ~f:(fun l -> 84 | match List.hd_exn l with 85 | | Pos _ :: _, _ -> assemble (one_deeper l) 86 | | Rec n :: _, _ -> List [ Atom n; assemble (one_deeper l) ] 87 | | _ -> assert false)) 88 | 89 | and assemble1 p v = 90 | match p with 91 | | [] -> v 92 | | Pos _ :: p -> List [ assemble1 p v ] 93 | | Rec n :: p -> List [ List [ Atom n; assemble1 p v ] ] 94 | | Match _ :: _ -> assert false 95 | ;; 96 | 97 | let output t out = 98 | List.iter t ~f:(fun (p, v) -> 99 | Printf.fprintf out "%s\t%s\n" (Path.to_string p) (Sexp.to_string_mach v)); 100 | Out_channel.output_string out "\n" 101 | ;; 102 | 103 | let input inch = 104 | let rec loop res = 105 | match In_channel.input_line ~fix_win_eol:true inch with 106 | | None | Some "" -> List.rev res 107 | | Some l -> 108 | let a, b = String.lsplit2_exn l ~on:'\t' in 109 | let path = Sexplib.Path.parse a in 110 | loop ((path, Sexp.of_string b) :: res) 111 | in 112 | loop [] 113 | ;; 114 | -------------------------------------------------------------------------------- /sexp_app/src/parts.mli: -------------------------------------------------------------------------------- 1 | open! Core 2 | 3 | module Path : sig 4 | type t = Sexplib.Path.t 5 | 6 | val to_string : t -> string 7 | end 8 | 9 | type t = (Sexplib.Path.t * Sexp.t) list 10 | 11 | val flatten : Sexp.t -> t 12 | val assemble : t -> Sexp.t 13 | val input : In_channel.t -> t 14 | val output : t -> Out_channel.t -> unit 15 | -------------------------------------------------------------------------------- /sexp_app/src/semantics.mli: -------------------------------------------------------------------------------- 1 | open! Core 2 | open Syntax 3 | 4 | type 'sexp query_meaning = 'sexp -> Sexp.t Lazy_list.t 5 | 6 | val query : Query.t -> Sexp.t query_meaning 7 | val query' : Query.t -> Sexp_ext.t query_meaning 8 | val change : Change.t -> Sexp.t -> Sexp.t option 9 | -------------------------------------------------------------------------------- /sexp_app/src/sexp_app.ml: -------------------------------------------------------------------------------- 1 | module Csv_file = Csv_file 2 | module Csv_lexeme = Csv_lexeme 3 | module Csv_record = Csv_record 4 | module Manifest = Manifest 5 | module Parse_everything = Parse_everything 6 | module Parts = Parts 7 | module Semantics = Semantics 8 | module Sexp_ext = Sexp_ext 9 | module Sexps = Sexps 10 | module String_pad = String_pad 11 | module Syntax = Syntax 12 | module Test = Test 13 | module To_csv = To_csv 14 | module To_json = To_json 15 | module Utils = Utils 16 | -------------------------------------------------------------------------------- /sexp_app/src/sexp_ext.ml: -------------------------------------------------------------------------------- 1 | open! Core 2 | 3 | type t = 4 | | Atom of string 5 | | List of t Lazy_list.t 6 | 7 | let rec t_of_sexp = function 8 | | Sexp.Atom x -> Atom x 9 | | Sexp.List xs -> List (Lazy_list.map ~f:t_of_sexp (Lazy_list.of_list xs)) 10 | ;; 11 | 12 | let rec cps_map xs ~f k = 13 | match xs with 14 | | [] -> k [] 15 | | x :: xs -> f x (fun y -> cps_map xs ~f (fun ys -> k (y :: ys))) 16 | ;; 17 | 18 | let rec sexp_of_t t k = 19 | match t with 20 | | Atom x -> k (Sexp.Atom x) 21 | | List xs -> cps_map ~f:sexp_of_t (Lazy_list.to_list xs) (fun xs -> k (Sexp.List xs)) 22 | ;; 23 | 24 | let sexp_of_t t = sexp_of_t t (fun x -> x) 25 | 26 | let equal = 27 | let rec equal_loop = function 28 | | [] -> true 29 | | xy :: pending -> 30 | (match xy with 31 | | Sexp.Atom x, Atom y -> String.equal x y && equal_loop pending 32 | | Sexp.List xs, List ys -> combine_loop xs ys pending 33 | | _ -> false) 34 | and combine_loop xs ys pending = 35 | match xs, Lazy_list.decons ys with 36 | | [], None -> equal_loop pending 37 | | x :: xs, Some (y, ys) -> combine_loop xs ys ((x, y) :: pending) 38 | | _ -> false 39 | in 40 | fun x y -> equal_loop [ x, y ] 41 | ;; 42 | 43 | let rec lowercase = function 44 | | Atom x -> Atom (String.lowercase x) 45 | | List xs -> List (Lazy_list.map ~f:lowercase xs) 46 | ;; 47 | 48 | let rec sub_expressions x = 49 | Lazy_list.cons 50 | x 51 | (match x with 52 | | Atom _ -> Lazy_list.empty () 53 | | List xs -> Lazy_list.bind xs ~f:sub_expressions) 54 | ;; 55 | -------------------------------------------------------------------------------- /sexp_app/src/sexp_ext.mli: -------------------------------------------------------------------------------- 1 | open! Core 2 | 3 | type t = 4 | | Atom of string 5 | | List of t Lazy_list.t 6 | 7 | include Sexpable with type t := t 8 | 9 | val equal : Sexp.t -> t -> bool 10 | val lowercase : t -> t 11 | val sub_expressions : t -> t Lazy_list.t 12 | -------------------------------------------------------------------------------- /sexp_app/src/sexps.ml: -------------------------------------------------------------------------------- 1 | open Core 2 | 3 | include Hash_set.Make (struct 4 | type t = Sexp.t = 5 | | Atom of string 6 | | List of t list 7 | [@@deriving compare] 8 | 9 | let sexp_of_t = Fn.id 10 | let t_of_sexp = Fn.id 11 | 12 | let rec hash = function 13 | | Atom s -> String.hash s 14 | | List ts -> List.fold ts ~init:(List.length ts) ~f:(fun n t -> n lxor hash t) 15 | ;; 16 | end) 17 | 18 | (** hide optional args *) 19 | let create () = create () 20 | 21 | let of_list sexps = of_list sexps 22 | -------------------------------------------------------------------------------- /sexp_app/src/sexps.mli: -------------------------------------------------------------------------------- 1 | open! Core 2 | 3 | type t = Sexp.t Hash_set.t 4 | 5 | val create : unit -> t 6 | val of_list : Sexp.t list -> t 7 | 8 | include Sexpable with type t := t 9 | -------------------------------------------------------------------------------- /sexp_app/src/string_pad.ml: -------------------------------------------------------------------------------- 1 | open Core 2 | 3 | (* 4 | Creating a pad and adding a bunch of strings to it has asymtotic time 5 | and space complexity O(n) where n is the total number of characters. 6 | Every once and a while, there will be a very expensive [add] operation. 7 | *) 8 | 9 | (** INVARIANT: strings are in order of decreasing length from right to left *) 10 | type t = 11 | | Empty 12 | | Snoc of t * string 13 | 14 | let empty = Empty 15 | 16 | let rec add t x = 17 | match t with 18 | | Empty -> Snoc (Empty, x) 19 | | Snoc (rest, y) -> 20 | if String.length y > String.length x then Snoc (t, x) else add rest (y ^ x) 21 | ;; 22 | 23 | let singleton x = add empty x 24 | let add_char t c = add t (String.of_char c) 25 | 26 | let rec dump = function 27 | | Empty -> "" 28 | | Snoc (Empty, x) -> x 29 | | Snoc (Snoc (t, x), y) -> dump (Snoc (t, x ^ y)) 30 | ;; 31 | -------------------------------------------------------------------------------- /sexp_app/src/string_pad.mli: -------------------------------------------------------------------------------- 1 | (* an asymtotically good incremental string concatenater *) 2 | 3 | type t 4 | 5 | val empty : t 6 | val add : t -> string -> t 7 | val singleton : string -> t 8 | val add_char : t -> char -> t 9 | val dump : t -> string 10 | -------------------------------------------------------------------------------- /sexp_app/src/syntax.mli: -------------------------------------------------------------------------------- 1 | open! Core 2 | 3 | type regex = Re2.t [@@deriving sexp] 4 | 5 | module Template : sig 6 | type 'a t = 7 | | Hole of 'a 8 | | Atom of string 9 | | List of 'a t list 10 | 11 | val t_of_sexp : (Sexp.t -> 'a) -> Sexp.t -> 'a t 12 | val map : 'a t -> f:('a -> 'b) -> 'b t 13 | val literal : Sexp.t -> 'a t 14 | end 15 | 16 | module Quotation : sig 17 | (* 18 | a non-regular datatype for quasi-quotation 19 | due to Ross Paterson and Richard Bird 20 | "de Bruijn notation as a nested datatype" 21 | JFP 9(1): 77-91. 1999 22 | *) 23 | 24 | type 'a t = 25 | | Atom of string 26 | | List of 'a t list 27 | | Quote of 'a t t 28 | | Unquote of 'a 29 | | Splice of 'a 30 | [@@deriving sexp] 31 | end 32 | 33 | module Var : sig 34 | include Identifiable.S 35 | 36 | val is_list : t -> bool 37 | end 38 | 39 | module type Pattern_general = sig 40 | type t = Var.t Template.t [@@deriving sexp] 41 | 42 | val pmatch : t -> Sexp.t -> fail:(unit -> 'a) -> succ:(Sexp.t Var.Table.t -> 'a) -> 'a 43 | val instantiate : t -> Sexp.t Var.Table.t -> (Sexp.t -> 'b) -> 'b 44 | end 45 | 46 | module Pattern : Pattern_general 47 | module Pattern_record : Pattern_general 48 | 49 | type 'a anti_quote = 50 | | Unquote of 'a 51 | | Splice of 'a 52 | 53 | val flatten : 'a Quotation.t -> 'a anti_quote Template.t 54 | 55 | module Record_field : sig 56 | type 'change t = 57 | { change : 'change 58 | ; new_name : string option 59 | ; presence : [ `Present | `Optional | `Absent ] 60 | } 61 | end 62 | 63 | type query = 64 | | This 65 | | Pipe of query * query 66 | | Die 67 | | Cat of query * query 68 | | Equals of Sexp.t Hash_set.t 69 | | Regex of regex 70 | | Variant of string * int option 71 | | Field of string 72 | | Index of int 73 | | Each 74 | | Smash 75 | | Atomic 76 | | Wrap of query 77 | | Test of query 78 | | Not of query 79 | | And of query * query 80 | | Or of query * query 81 | | If of query * query * query 82 | | Branch of query * query * query 83 | | Quote of query anti_quote Template.t 84 | | Change of change 85 | | Restructure 86 | 87 | and change = 88 | | Id 89 | | Fail 90 | | Delete 91 | | Alt of change * change 92 | | Seq of change * change 93 | | Children of change 94 | | Record of change Record_field.t String.Map.t 95 | | Rewrite of Pattern.t * Pattern.t 96 | | Rewrite_record of Pattern_record.t * Pattern_record.t 97 | | Topdown of change 98 | | Bottomup of change 99 | | Lowercase 100 | | Concat 101 | | Query of query 102 | 103 | val pipe : query list -> query 104 | val cat : query list -> query 105 | val or_ : query list -> query 106 | val and_ : query list -> query 107 | val quote : query Quotation.t -> query 108 | val equals : Sexp.t -> query 109 | val try_ : change -> change 110 | val alt : change list -> change 111 | val seq : change list -> change 112 | val const : Sexp.t -> change 113 | 114 | (* recursive syntactic sugar *) 115 | module Unroll : sig 116 | val topdown : change -> change 117 | val bottomup : change -> change 118 | end 119 | 120 | module Query : sig 121 | type t = query [@@deriving sexp] 122 | end 123 | 124 | module Change : sig 125 | type t = change [@@deriving sexp] 126 | end 127 | -------------------------------------------------------------------------------- /sexp_app/src/test.ml: -------------------------------------------------------------------------------- 1 | open Core 2 | 3 | let%test_unit _ = 4 | let change_tests = 5 | [ (* const *) 6 | "(const 13)", "foo", Some "13" 7 | ; "(const 13)", "()", Some "13" 8 | ; "(const 13)", "(foo bar)", Some "13" 9 | ; (* alt *) 10 | "(alt)", "()", None 11 | ; (* delete *) 12 | "delete", "foo", None 13 | ; "(seq delete fail)", "foo", None 14 | ; "(children delete)", "()", Some "()" 15 | ; "(children delete)", "(foo bar)", Some "()" 16 | ; "(children (alt (rewrite foo bar) delete))", "(foo zzz)", Some "(bar)" 17 | ; (* record *) 18 | "(record)", "foo", None 19 | ; "(record)", "()", Some "()" 20 | ; "(record)", "(foo)", None 21 | ; "(record)", "((foo 13))", Some "((foo 13))" 22 | ; "(record (foo (const 13)))", "((foo foo))", Some "((foo 13))" 23 | ; "(record (foo (const 13)))", "((foo foo) (bar bar))", Some "((foo 13) (bar bar))" 24 | ; ( "(record (foo (const 13)) (_ id))" 25 | , "((foo foo) (bar bar))" 26 | , Some "((foo 13) (bar bar))" ) 27 | ; "(record (_ delete))", "((foo foo) (bar bar))", Some "()" 28 | ; "(record (foo id) (_ delete))", "((foo 13))", Some "((foo 13))" 29 | ; "(record (foo id) (_ delete))", "((foo 13) (bar 14))", Some "((foo 13))" 30 | ; "(record (foo id) (_ fail))", "((foo 13))", Some "((foo 13))" 31 | ; "(record (foo id) (_ fail))", "((foo 13) (bar 14))", None 32 | ; "(record (foo id))", "()", None 33 | ; "(record (foo (present ) id))", "()", None 34 | ; "(record (foo (absent ) id))", "()", Some "((foo ()))" 35 | ; "(record (foo (optional) id))", "()", Some "((foo ()))" 36 | ; "(record (foo ((rename bar)) id))", "((foo 13))", Some "((bar 13))" 37 | ; ( "(record (foo id) (bar (const 13)) (baz (rewrite $X ($X $X))))" 38 | , "((foo 13) (bar 14) (baz 15) (bap 16))" 39 | , Some "((foo 13) (bar 13) (baz (15 15)) (bap 16))" ) 40 | ; "(record (a1 (optional) (const foo)))", "()", Some "((a1 foo))" 41 | ; ( "(record (a1 (optional) (const foo)) (a2 id))" 42 | , "((a2 v2) (a3 v3))" 43 | , Some "((a2 v2) (a3 v3) (a1 foo))" ) 44 | ] 45 | in 46 | let module Bug = struct 47 | type t = 48 | { program : string 49 | ; syntax : Syntax.Change.t 50 | ; input : string 51 | ; output : Sexp.t option 52 | ; expected_output : Sexp.t option 53 | } 54 | [@@deriving sexp_of] 55 | end 56 | in 57 | let bugs = 58 | List.filter_map change_tests ~f:(fun (program, input, expected_output) -> 59 | let syntax = Syntax.Change.t_of_sexp (Sexp.of_string program) in 60 | let output = Semantics.change syntax (Sexp.of_string input) in 61 | let expected_output = Option.map expected_output ~f:Sexp.of_string in 62 | if [%compare.equal: Sexp.t option] output expected_output 63 | then None 64 | else Some { Bug.program; syntax; input; output; expected_output }) 65 | in 66 | if not (List.is_empty bugs) 67 | then failwiths "bugs in Semantics.change" bugs [%sexp_of: Bug.t list] 68 | ;; 69 | 70 | let%expect_test _ = 71 | let run query inputs = 72 | let syntax = Syntax.Query.t_of_sexp (Sexp.of_string query) in 73 | List.iter inputs ~f:(fun input -> 74 | let result = 75 | Sexp.of_string input |> Semantics.query syntax |> Lazy_list.to_list |> Sexp.List 76 | in 77 | print_s result) 78 | in 79 | let examples = 80 | [ {|((id "(a b c)") (value 1))|} 81 | ; {|((id "results: (a b c)") (value 1))|} 82 | ; {|((id "A" ) (value 1))|} 83 | ; {|((id "(" ) (value 1))|} 84 | ; {|((id " " ) (value 1))|} 85 | ] 86 | in 87 | run {|(field id)|} examples; 88 | [%expect 89 | {| 90 | ("(a b c)") 91 | ("results: (a b c)") 92 | (A) 93 | ("(") 94 | (" ") 95 | |}]; 96 | run {|(pipe (field id) restructure)|} examples; 97 | [%expect 98 | {| 99 | ((a b c)) 100 | (results: (a b c)) 101 | (A) 102 | () 103 | () 104 | |}] 105 | ;; 106 | -------------------------------------------------------------------------------- /sexp_app/src/test.mli: -------------------------------------------------------------------------------- 1 | (* Just for testing. Nothing to export. *) 2 | -------------------------------------------------------------------------------- /sexp_app/src/to_csv.ml: -------------------------------------------------------------------------------- 1 | open Core 2 | 3 | let contains xs x = 4 | try 5 | ignore (List.find_exn xs ~f:(fun y -> String.equal x y)); 6 | true 7 | with 8 | | _ -> false 9 | ;; 10 | 11 | let rec has_duplicates = function 12 | | [] -> None 13 | | x :: xs -> if contains xs x then Some x else has_duplicates xs 14 | ;; 15 | 16 | module Query : sig 17 | val is_a_record : Sexp.t -> bool 18 | val header : Sexp.t -> string list 19 | 20 | val record 21 | : view_atoms_as_strings:bool 22 | -> warn_on_missing_fields:bool 23 | -> string list 24 | -> Sexp.t 25 | -> Csv_record.t 26 | end = struct 27 | open Syntax 28 | 29 | let is_a_record = 30 | let pair = and_ [ Index 0; Index 1; Not (Index 2) ] in 31 | let q = 32 | and_ [ Not Atomic; pipe [ Each; and_ [ pair; Test (pipe [ Index 0; Atomic ]) ] ] ] 33 | in 34 | fun sexp -> 35 | match Lazy_list.decons (Semantics.query q sexp) with 36 | | None -> false 37 | | Some _ -> true 38 | ;; 39 | 40 | let header = 41 | let q = pipe [ Each; Index 0 ] in 42 | let strip = function 43 | | Sexp.Atom x -> x 44 | | Sexp.List _ -> failwith "failed to ensure that header is called on a record" 45 | in 46 | fun sexp -> 47 | let hs = Lazy_list.to_list (Lazy_list.map ~f:strip (Semantics.query q sexp)) in 48 | match has_duplicates hs with 49 | | Some x -> failwith ("duplicate field name '" ^ x ^ "'") 50 | | None -> hs 51 | ;; 52 | 53 | let record ~view_atoms_as_strings ~warn_on_missing_fields fields = 54 | let q = cat (List.map fields ~f:(fun f -> Wrap (Field f))) in 55 | let coerce (f, results) = 56 | match results with 57 | | Sexp.List [ x ] -> 58 | (match x with 59 | | Sexp.Atom str when view_atoms_as_strings -> str 60 | | _ -> Sexp.to_string x) 61 | | Sexp.List (x :: _) -> 62 | eprintf "multiple values for field %s. Arbitrarily picking the first one.\n" f; 63 | Sexp.to_string x 64 | | _ -> 65 | if warn_on_missing_fields then eprintf "missing value for field %s\n" f; 66 | "" 67 | in 68 | fun sexp -> 69 | List.map 70 | ~f:coerce 71 | (List.zip_exn fields (Lazy_list.to_list (Semantics.query q sexp))) 72 | ;; 73 | end 74 | 75 | let header ~two_pass_processing sexps = 76 | if two_pass_processing 77 | then ( 78 | let fields = 79 | let q = String.Hash_queue.create () in 80 | (* note: this [Lazy_list.iter] call is why two-pass processing uses more memory. 81 | It's pulling every input sexp in memory. *) 82 | Lazy_list.iter sexps ~f:(fun sexp -> 83 | if Query.is_a_record sexp 84 | then 85 | List.iter (Query.header sexp) ~f:(fun field -> 86 | match Hash_queue.enqueue_back q field () with 87 | | `Ok | `Key_already_present -> ())); 88 | Hash_queue.keys q 89 | in 90 | if List.is_empty fields then None else Some fields) 91 | else ( 92 | match Lazy_list.decons sexps with 93 | | Some (x, _) -> 94 | if not (Query.is_a_record x) then failwith "first element is not a record\n"; 95 | Some (Query.header x) 96 | | _ -> None) 97 | ;; 98 | 99 | let csv_of_sexp ~view_atoms_as_strings ~two_pass_processing sexps = 100 | match header ~two_pass_processing sexps with 101 | | Some header -> 102 | let warn_on_missing_fields = 103 | (* The whole point of two-pass processing is to gather up field names across all 104 | records since presumably the first row isn't good enough to consult for this 105 | purpose since we not all rows will populate the same columns. So no point in 106 | complaining about that very thing. *) 107 | not two_pass_processing 108 | in 109 | let extract = Query.record ~view_atoms_as_strings ~warn_on_missing_fields header in 110 | Lazy_list.cons header (Lazy_list.map ~f:extract sexps) 111 | | None -> Lazy_list.empty () 112 | ;; 113 | -------------------------------------------------------------------------------- /sexp_app/src/to_csv.mli: -------------------------------------------------------------------------------- 1 | open! Core 2 | 3 | val csv_of_sexp 4 | : view_atoms_as_strings:bool 5 | -> two_pass_processing:bool 6 | -> Sexp.t Lazy_list.t 7 | -> Csv_record.t Lazy_list.t 8 | -------------------------------------------------------------------------------- /sexp_app/src/to_json.ml: -------------------------------------------------------------------------------- 1 | open! Core 2 | 3 | (** [parse_json_value] attempts to parse a json value from the sexps on the right hand 4 | side of the flatten command. (i.e. the sexps in a Parts.t) We're guaranteed that these 5 | sexps are not nested. *) 6 | 7 | module El = struct 8 | type t = Sexplib.Path.el = 9 | | Pos of int 10 | | Match of string * int 11 | | Rec of string 12 | [@@deriving compare] 13 | end 14 | 15 | (* When converting a sexp to JSON, we want to convert things that look like numbers 16 | to _valid_ JSON, and, critically, we also don't want to lose any information. 17 | (Idempotent sexp -> json -> sexp round-tripping seems like a desirable property.) 18 | It turns out this is rather tricky. 19 | 20 | OCaml and JSON have different grammars for numbers. The set of valid OCaml number 21 | literals is a strict superset of valid JSON number literals. Examples (not guaranteed 22 | to be exhaustive) of features supported in OCaml, but not in JSON: 23 | - Hex, octal, and binary literals, e.g., 0xc0de, 0o1357, or 0b01001010 24 | - Leading '+', e.g., +10 25 | - Multiple leading '0's, e.g., 007 26 | - Underscores for readability, e.g., 8_675_309 27 | 28 | In order to avoid losing information, even if something looks like a number in OCaml, 29 | we shouldn't convert it as a similar looking string that would also parse to the same 30 | number (e.g., 007 -> 7). This is bad because not everything that looks like a number is 31 | a number. Some examples include: 32 | - Any sort of number/account id that has a standard format, e.g. 12 digit bank account 33 | numbers may start with a 0. 34 | - Command line arguments may start with a '+' or '-' for specifying relative ranges 35 | rather than absolute values 36 | 37 | Therefore, in order to make sure we produce valid JSON, we will only convert atoms that 38 | are already valid JSON numbers according to its spec [1], which we'll do by testing the 39 | string against a straightforward regex, and we will serialize those strings without any 40 | modifications... 41 | 42 | ... with one exception: since we very frequently use underscores for readability in 43 | larger numbers, we will convert strings-that-look-like-numbers-with-underscores to 44 | JSON numbers by stripping out the underscores. 45 | 46 | OCaml is very generous in that it accepts any number of underscores anywhere in a 47 | number, e.g., "_-__1_.__2_e__+_3_4" is valid floating point literal. Conceivably, we 48 | could be less accepting than OCaml, and only support single occurrences of an 49 | underscore _between_ numbers, so things like "1_000" and "1234_5678" would still be 50 | converted to numbers, but not things like "_1", "2_", and "3__4". (Only allowing 51 | underscores every only every 3 numbers, something checked by ppx_js_style, is possible 52 | too, but it would make the regex more complicated, and isn't even correct; other 53 | cultures/situations don't always put separators as thousands/milli separators.) There's 54 | also a standard of using a double underscore to signify the "decimal point" in an 55 | integer that really represents a fixed point number. Ultimately there's too much gray 56 | area here, so we'll simply accept underscores anywhere like OCaml does. 57 | 58 | [1]: https://www.json.org/json-en.html 59 | *) 60 | 61 | module Assemble_to_json = struct 62 | type intermediate_result = 63 | | Array of Jsonaf.t list 64 | | Object of (string * Jsonaf.t) list 65 | 66 | let to_json = function 67 | | Array a -> `Array a 68 | | Object a -> `Object a 69 | ;; 70 | 71 | let combine acc item = 72 | match acc, item with 73 | | Array a, Array b -> Array (a @ b) 74 | | Object a, Object b -> Object (a @ b) 75 | | _ -> failwith "bug: shouldn't be dealing with both lists and records at once" 76 | ;; 77 | end 78 | 79 | (* regex based on diagram on https://www.json.org/json-en.html, but also with support 80 | for underscores *) 81 | let json_number_with_underscores_re = 82 | Re2.create_exn 83 | ~options:{ Re2.Options.default with never_capture = true } 84 | ("^" 85 | (* start of string *) 86 | ^ "(_*-)?" 87 | (* negative numbers start with '-' *) 88 | ^ "(_*0_*" 89 | (* integer part is either a 0 *) 90 | ^ "|_*[1-9](\\d|_)*)" 91 | (* or 1-9 followed by any digits *) 92 | ^ "(\\._*\\d(\\d|_)*)?" 93 | (* decimal part, '.' then at least one digit, is optional *) 94 | ^ "(_*[eE]_*[-+]?_*\\d(\\d|_)*)?" 95 | (* exponent part, also optional *) 96 | ^ "$" (* end of string *)) 97 | ;; 98 | 99 | let parse_json_value : Sexp.t -> Jsonaf.t = function 100 | | Sexp.Atom s -> 101 | (match Bool.of_string s with 102 | | true -> `True 103 | | false -> `False 104 | | exception _ -> 105 | if Re2.matches json_number_with_underscores_re s 106 | then `Number (String.filter s ~f:(fun c -> not (Char.equal c '_'))) 107 | else `String s) 108 | | Sexp.List [] -> `Null 109 | | Sexp.List _ -> failwith "bug - every value after flatten should be an atom or nil" 110 | ;; 111 | 112 | let one_deeper l = 113 | List.map l ~f:(fun (p, v) -> 114 | match p with 115 | | Sexplib.Path.Pos _ :: p -> p, v 116 | | Rec _ :: p -> p, v 117 | | _ -> assert false) 118 | ;; 119 | 120 | let group_by_path l = 121 | List.group l ~break:(fun (p1, _) (p2, _) -> 122 | not ([%compare.equal: El.t option] (List.hd p1) (List.hd p2))) 123 | ;; 124 | 125 | let rec assemble_to_json (l : (Sexplib.Path.t * Sexp.t) list) = 126 | match group_by_path l with 127 | | [ [ (p, v) ] ] -> assemble_to_json1 p v 128 | | [] -> assert false 129 | | groups -> 130 | let parts = 131 | List.map groups ~f:(fun l -> 132 | match List.hd_exn l with 133 | | Pos _ :: _, _ -> Assemble_to_json.Array [ assemble_to_json (one_deeper l) ] 134 | | Rec n :: _, _ -> Assemble_to_json.Object [ n, assemble_to_json (one_deeper l) ] 135 | | _ -> assert false) 136 | in 137 | List.reduce_balanced_exn parts ~f:Assemble_to_json.combine |> Assemble_to_json.to_json 138 | 139 | and assemble_to_json1 p v : Jsonaf.t = 140 | match p with 141 | | [] -> parse_json_value v 142 | | Pos _ :: p -> `Array [ assemble_to_json1 p v ] 143 | | Rec n :: p -> `Object [ n, assemble_to_json1 p v ] 144 | | Match _ :: _ -> assert false 145 | ;; 146 | 147 | let json_of_sexp sexp = 148 | let parts = Parts.flatten sexp in 149 | assemble_to_json parts 150 | ;; 151 | -------------------------------------------------------------------------------- /sexp_app/src/to_json.mli: -------------------------------------------------------------------------------- 1 | open! Core 2 | 3 | val json_of_sexp : Sexp.t -> Jsonaf.t 4 | -------------------------------------------------------------------------------- /sexp_app/src/utils.ml: -------------------------------------------------------------------------------- 1 | open Core 2 | 3 | let simple_query query sexp = Lazy_list.to_list (Semantics.query query sexp) 4 | 5 | let get_fields sexp field = 6 | simple_query (Syntax.Pipe (Syntax.Smash, Syntax.Field field)) sexp 7 | ;; 8 | 9 | module Non_unique_field = struct 10 | type t = 11 | { field : string 12 | ; sexp : Sexp.t 13 | ; matches : Sexp.t list 14 | } 15 | [@@deriving sexp] 16 | end 17 | 18 | let get_one_field sexp field = 19 | let results = get_fields sexp field in 20 | match results with 21 | | [] | _ :: _ :: _ -> 22 | Or_error.error 23 | "non-unique field" 24 | { Non_unique_field.field; sexp; matches = results } 25 | Non_unique_field.sexp_of_t 26 | | [ result ] -> Ok result 27 | ;; 28 | 29 | let sexp_rewrite_aux sexp ~f:visit = 30 | let rec aux sexp = 31 | match visit sexp with 32 | | `Changed sexp' -> Some sexp' 33 | | `Removed -> None 34 | | `Unchanged -> 35 | (match sexp with 36 | | Sexp.Atom _ -> Some sexp 37 | | Sexp.List sexps -> 38 | let sexps' = List.filter_map ~f:aux sexps in 39 | if List.length sexps = List.length sexps' 40 | && List.for_all2_exn ~f:phys_equal sexps sexps' 41 | then Some sexp 42 | else Some (Sexp.List sexps')) 43 | in 44 | match aux sexp with 45 | | None -> Or_error.error "not a record" sexp Fn.id 46 | | Some sexp -> Ok sexp 47 | ;; 48 | 49 | let sexp_rewrite sexp ~f = sexp_rewrite_aux sexp ~f |> Or_error.ok_exn 50 | 51 | let immediate_fields = function 52 | | Sexp.List children -> 53 | List.fold ~init:(Ok []) children ~f:(fun acc child -> 54 | match acc with 55 | | Error _ -> acc 56 | | Ok by_field -> 57 | (match child with 58 | | Sexp.List [ Sexp.Atom field; value ] -> 59 | (match List.Assoc.find by_field ~equal:String.equal field with 60 | | None -> Ok (List.Assoc.add by_field ~equal:String.equal field value) 61 | | Some _ -> Or_error.error "multiple values for field" field String.sexp_of_t) 62 | | _ -> Or_error.error "not a field" child Fn.id)) 63 | |> fun result -> 64 | (* Restore original order *) 65 | Or_error.map result ~f:List.rev 66 | | Sexp.Atom atom -> Or_error.error "not a record" atom String.sexp_of_t 67 | ;; 68 | 69 | let to_record_sexp by_fields = 70 | Sexp.List 71 | (List.map by_fields ~f:(fun (field, value) -> Sexp.List [ Sexp.Atom field; value ])) 72 | ;; 73 | 74 | let replace_immediate_field ~field ~value sexp = 75 | Or_error.map (immediate_fields sexp) ~f:(fun by_field -> 76 | List.Assoc.remove by_field ~equal:String.equal field 77 | |> (fun by_field -> List.Assoc.add by_field ~equal:String.equal field value) 78 | |> to_record_sexp) 79 | ;; 80 | 81 | let replace_field_recursively ~field ~value sexp = 82 | sexp_rewrite_aux sexp ~f:(function 83 | | Sexp.List [ Sexp.Atom f; _ ] when String.equal field f -> 84 | `Changed (Sexp.List [ Sexp.Atom f; value ]) 85 | | _ -> `Unchanged) 86 | ;; 87 | 88 | let replace_field ~field ~value sexp immediate_or_recursive = 89 | match immediate_or_recursive with 90 | | `Immediate -> replace_immediate_field ~field ~value sexp 91 | | `Recursive -> 92 | let%bind.Or_error result = replace_field_recursively ~field ~value sexp in 93 | if Sexp.( = ) result sexp 94 | then Or_error.error "field not found" field String.sexp_of_t 95 | else Ok result 96 | ;; 97 | 98 | let remove_immediate_field ~field sexp = 99 | Or_error.map (immediate_fields sexp) ~f:(fun by_field -> 100 | List.Assoc.remove by_field ~equal:String.equal field |> to_record_sexp) 101 | ;; 102 | 103 | let remove_field_recursively ~field sexp = 104 | sexp_rewrite_aux sexp ~f:(function 105 | | Sexp.List [ Sexp.Atom f; _ ] when String.equal field f -> `Removed 106 | | _ -> `Unchanged) 107 | ;; 108 | 109 | let remove_field ~field sexp immediate_or_recursive = 110 | match immediate_or_recursive with 111 | | `Immediate -> remove_immediate_field ~field sexp 112 | | `Recursive -> remove_field_recursively ~field sexp 113 | ;; 114 | 115 | module%test Utils = struct 116 | let sexp = 117 | Sexp.of_string "((first (a b c)) (second 123) (third ()) (fourth ((foo a) (boo b))))" 118 | ;; 119 | 120 | let%test _ = 121 | [%compare.equal: Sexp.t Or_error.t] 122 | (get_one_field sexp "second") 123 | (Ok (Sexp.Atom "123")) 124 | ;; 125 | 126 | let%test _ = Result.is_error (get_one_field sexp "zoo") 127 | 128 | let%test _ = 129 | [%compare.equal: Sexp.t Or_error.t] (get_one_field sexp "boo") (Ok (Sexp.Atom "b")) 130 | ;; 131 | 132 | let%test _ = Result.is_error (immediate_fields (Sexp.of_string "zoo")) 133 | let%test _ = Result.is_error (immediate_fields (Sexp.of_string "(zoo)")) 134 | let%test _ = Result.is_error (immediate_fields (Sexp.of_string "(zoo boo)")) 135 | let%test _ = Result.is_error (immediate_fields (Sexp.of_string "((good true)(bad))")) 136 | 137 | let%test _ = 138 | [%equal: Sexp.t] 139 | (List.Assoc.find_exn 140 | (Or_error.ok_exn (immediate_fields sexp)) 141 | ~equal:String.equal 142 | "second") 143 | (Atom "123") 144 | ;; 145 | 146 | let%test _ = 147 | [%equal: Sexp.t] 148 | (List.Assoc.find_exn 149 | (Or_error.ok_exn (immediate_fields sexp)) 150 | ~equal:String.equal 151 | "third") 152 | (List []) 153 | ;; 154 | 155 | let%test _ = 156 | [%equal: Sexp.t] 157 | (List.Assoc.find_exn 158 | (Or_error.ok_exn (immediate_fields sexp)) 159 | ~equal:String.equal 160 | "fourth") 161 | (Sexp.of_string "((foo a) (boo b))") 162 | ;; 163 | 164 | let%test _ = 165 | [%equal: Sexp.t] (to_record_sexp (Or_error.ok_exn (immediate_fields sexp))) sexp 166 | ;; 167 | 168 | let%test _ = 169 | let value = Sexp.Atom "my-new-value" in 170 | let sexp = Or_error.ok_exn (replace_field ~field:"second" ~value sexp `Immediate) in 171 | [%equal: Sexp.t] 172 | (List.Assoc.find_exn 173 | (Or_error.ok_exn (immediate_fields sexp)) 174 | ~equal:String.equal 175 | "second") 176 | value 177 | ;; 178 | 179 | let to_alist_exn sexp = Or_error.ok_exn (immediate_fields sexp) 180 | 181 | let ( -@! ) record_sexp field_name = 182 | List.Assoc.find_exn (to_alist_exn record_sexp) ~equal:String.equal field_name 183 | ;; 184 | 185 | let ( -@? ) record_sexp field_name = 186 | List.Assoc.find (to_alist_exn record_sexp) ~equal:String.equal field_name 187 | ;; 188 | 189 | let%test _ = 190 | let value = Sexp.Atom "my-new-value" in 191 | let sexp = Or_error.ok_exn (replace_field ~field:"foo" ~value sexp `Recursive) in 192 | [%equal: Sexp.t] (sexp -@! "fourth" -@! "foo") value 193 | ;; 194 | 195 | let%test "remove_field immediate" = 196 | let sexp = Or_error.ok_exn (remove_field ~field:"second" sexp `Immediate) in 197 | [%equal: Sexp.t option] (sexp -@? "second") None 198 | ;; 199 | 200 | let%test "remove_field recursive" = 201 | let sexp = Or_error.ok_exn (remove_field ~field:"foo" sexp `Recursive) in 202 | [%equal: Sexp.t option] (sexp -@! "fourth" -@? "foo") None 203 | ;; 204 | 205 | let%test "remove_field error" = 206 | let sexp_or_error = remove_field ~field:"foo" (Sexp.of_string "(foo)") `Immediate in 207 | Or_error.is_error sexp_or_error 208 | ;; 209 | end 210 | -------------------------------------------------------------------------------- /sexp_app/src/utils.mli: -------------------------------------------------------------------------------- 1 | open! Core 2 | 3 | val simple_query : Syntax.Query.t -> Sexp.t -> Sexp.t list 4 | 5 | (** [ get_fields sexp field ] searches for field recursively *) 6 | val get_fields : Sexp.t -> string -> Sexp.t list 7 | 8 | (** [ get_one_field sexp field ] searches for field recursively and returns the value, or 9 | error if there is not exactly one result. *) 10 | val get_one_field : Sexp.t -> string -> Sexp.t Or_error.t 11 | 12 | (** [ get_immediate_fields sexp ] returns an association list of field names to field 13 | values, or error if the sexp does not conform to the normal structure of an ocaml 14 | record. *) 15 | val immediate_fields : Sexp.t -> (string, Sexp.t) List.Assoc.t Or_error.t 16 | 17 | (** [ to_record_sexp by_field ] converts an association list of field names to field 18 | values into a record-like sexp. *) 19 | val to_record_sexp : (string, Sexp.t) List.Assoc.t -> Sexp.t 20 | 21 | (** [ sexp_rewrite sexp ~f ] returns the rewritten sexp where f is applied to sexp and its 22 | descendents. *) 23 | val sexp_rewrite : Sexp.t -> f:(Sexp.t -> [ `Unchanged | `Changed of Sexp.t ]) -> Sexp.t 24 | 25 | (** [ replace_field ~field ~value sexp scope ] replaces either the field with the given 26 | value, or returns an error if no match is found in the designated scope. If `Recursive 27 | is chosen, it will replace as many instances as appear, or error if none are found. *) 28 | val replace_field 29 | : field:string 30 | -> value:Sexp.t 31 | -> Sexp.t 32 | -> [ `Immediate | `Recursive ] 33 | -> Sexp.t Or_error.t 34 | 35 | (** [ remove_field ~field sexp scope ] removes the field if it exists in the designated 36 | scope. If `Recursive is chosen, it will replace as many instances as appear. It will 37 | not error if the field is not in scope *) 38 | val remove_field 39 | : field:string 40 | -> Sexp.t 41 | -> [ `Immediate | `Recursive ] 42 | -> Sexp.t Or_error.t 43 | -------------------------------------------------------------------------------- /sexp_app/test/dune: -------------------------------------------------------------------------------- 1 | (library 2 | (name sexp_app_test) 3 | (libraries core expect_test_helpers_core sexp_app sexplib) 4 | (preprocess 5 | (pps ppx_jane))) 6 | -------------------------------------------------------------------------------- /sexp_app/test/import.ml: -------------------------------------------------------------------------------- 1 | include Expect_test_helpers_core 2 | include Sexp_app 3 | -------------------------------------------------------------------------------- /sexp_app/test/sexp_app_test.ml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/janestreet/sexp/05d436d6c541fd515919d9908768d98b69af04a1/sexp_app/test/sexp_app_test.ml -------------------------------------------------------------------------------- /sexp_app/test/test_parts.ml: -------------------------------------------------------------------------------- 1 | open! Core 2 | open! Import 3 | 4 | let forall_sexps here ~f = 5 | require_does_not_raise ~cr:CR_soon ~here (fun () -> 6 | Quickcheck.test 7 | ~sexp_of:[%sexp_of: Sexp.t] 8 | ~shrinker:[%quickcheck.shrinker: Sexp.t] 9 | [%quickcheck.generator: Sexp.t] 10 | ~f) 11 | ;; 12 | 13 | let%expect_test "assemble(flatten(sexp)) = sexp" = 14 | forall_sexps [%here] ~f:(fun sexp -> 15 | [%test_eq: Sexp.t] sexp (Parts.assemble (Parts.flatten sexp))); 16 | [%expect {| |}] 17 | ;; 18 | 19 | let%expect_test "flatten does the same as Path.get" = 20 | forall_sexps [%here] ~f:(fun sexp -> 21 | let parts = Parts.flatten sexp in 22 | List.iter parts ~f:(fun (path, sexp_part) -> 23 | match Sexplib.Path.get ~path sexp with 24 | | gotten_part -> [%test_eq: Sexp.t] sexp_part gotten_part 25 | | exception exn -> Exn.reraisef exn !"invalid path %{Parts.Path}" path ())); 26 | [%expect {| |}] 27 | ;; 28 | 29 | let%expect_test "Path.get respects assembled sexps" = 30 | forall_sexps [%here] ~f:(fun sexp -> 31 | let parts = Parts.flatten sexp in 32 | let assembled = Parts.assemble parts in 33 | List.iter parts ~f:(fun (path, sexp_part) -> 34 | match Sexplib.Path.get ~path assembled with 35 | | gotten_part -> [%test_eq: Sexp.t] sexp_part gotten_part 36 | | exception exn -> Exn.reraisef exn !"invalid path %{Parts.Path}" path ())); 37 | [%expect {| |}] 38 | ;; 39 | -------------------------------------------------------------------------------- /sexp_app/test/test_parts.mli: -------------------------------------------------------------------------------- 1 | (*_ Intentionally blank *) 2 | -------------------------------------------------------------------------------- /src/dune: -------------------------------------------------------------------------------- 1 | (library 2 | (name sexp_cmds) 3 | (libraries async core angstrom base.composition_infix 4 | core_unix.filename_unix jsonaf lazy_list parsexp re2 sexp_app 5 | sexp_app_pattern sexp_diff sexp_macro sexp_pretty sexp_select sexplib 6 | shell.string_extended csvfields.xml-light) 7 | (preprocessor_deps) 8 | (preprocess 9 | (pps ppx_jane))) 10 | -------------------------------------------------------------------------------- /src/grammar.ml: -------------------------------------------------------------------------------- 1 | open Core 2 | open Sexp_app 3 | module S = Syntax 4 | 5 | let atom x = S.Template.Atom x 6 | 7 | let qcmds = 8 | let etc = S.Quote (atom "...") in 9 | let t = S.Quote (atom "xxx") in 10 | let r = Re2.create_exn "R" in 11 | let temp n = atom (sprintf "T[%d]" n) in 12 | let sexp = Sexp.Atom "SEXP" in 13 | let num = 999 in 14 | List.map 15 | ~f:S.Query.sexp_of_t 16 | [ S.Index num 17 | ; S.Field "FIELDNAME" 18 | ; S.Each 19 | ; S.Smash 20 | ; S.pipe [ t; t; etc ] 21 | ; S.cat [ t; t; etc ] 22 | ; S.This 23 | ; S.Die 24 | ; S.Atomic 25 | ; S.Variant ("TAG", None) 26 | ; S.Variant ("TAG", Some num) 27 | ; S.Equals (Sexp_app.Sexps.of_list [ sexp ]) 28 | ; S.Equals (Sexp_app.Sexps.of_list [ sexp; sexp; Sexp.Atom "..." ]) 29 | ; S.Regex r 30 | ; S.Test t 31 | ; S.Not t 32 | ; S.and_ [ t; t; etc ] 33 | ; S.or_ [ t; t; etc ] 34 | ; S.If (t, t, t) 35 | ; S.Branch (t, t, t) 36 | ; S.Wrap t 37 | ; S.Quote (temp 0) 38 | ; S.Change S.Id 39 | ; S.Restructure 40 | ] 41 | ;; 42 | 43 | let ccmds = 44 | let t = S.Query S.This in 45 | let etc = S.Query (S.Quote (atom "...")) in 46 | List.map 47 | ~f:S.Change.sexp_of_t 48 | [ S.Rewrite (atom "P", atom "P") 49 | ; S.seq [ t; t; etc ] 50 | ; S.alt [ t; t; etc ] 51 | ; S.Record String.Map.empty 52 | ; S.Id 53 | ; S.Fail 54 | ; S.Delete 55 | ; S.const (Sexp.Atom "SEXP") 56 | ; S.try_ t 57 | ; S.Children t 58 | ; S.Topdown t 59 | ; S.Bottomup t 60 | ; S.Lowercase 61 | ; S.Concat 62 | ; S.Query (S.Quote (atom "xxx")) 63 | ] 64 | ;; 65 | 66 | let cprgm x = S.Change.t_of_sexp (Sexp.of_string x) 67 | let qprgm x = S.Query.t_of_sexp (Sexp.of_string x) 68 | 69 | let munge = 70 | let change = 71 | cprgm 72 | {| 73 | (seq 74 | (topdown (try lowercase)) 75 | (topdown ( 76 | try ( 77 | alt 78 | (rewrite 999 NUM) 79 | (rewrite t[0] T[0]) 80 | (rewrite p P) 81 | (rewrite r R) 82 | (rewrite (record ()) (record (FIELDNAME [OPTIONS] C) ...)) 83 | (rewrite fieldname FIELDNAME) 84 | (rewrite (quote xxx) Q) 85 | (rewrite (quote ...) ...) 86 | (rewrite (query this) C) 87 | (rewrite (query (quote ...)) ...) 88 | (rewrite (rewrite $X sexp) (const SEXP)) 89 | (rewrite ($cmd ($_ $A $B) $C) ($cmd $A $B $C)) 90 | (rewrite (variant $tag ()) (variant $tag)) 91 | (rewrite (variant $tag ($num)) (variant $tag $num)) 92 | (rewrite (change $_) (change C)) 93 | (rewrite (query $_) (query Q)) 94 | (rewrite (alt $C id) (try $C)))))) 95 | |} 96 | in 97 | fun sexp -> 98 | match Semantics.change change sexp with 99 | | None -> assert false 100 | | Some sexp -> sexp 101 | ;; 102 | 103 | let make_lead x ~print_string = 104 | let flag = ref true in 105 | fun () -> 106 | let str = 107 | if !flag 108 | then ( 109 | flag := false; 110 | " " ^ x ^ " ::= ") 111 | else " | " 112 | in 113 | print_string str 114 | ;; 115 | 116 | let grammar_for_readme () = 117 | let buf = Buffer.create 0 in 118 | let print_string s = Buffer.add_string buf s in 119 | let print_endline s = 120 | Buffer.add_string buf s; 121 | Buffer.add_string buf "\n" 122 | in 123 | print_string "=== Grammar summary for query expressions ===\n"; 124 | print_string "See '-grammar' for a more complete grammar.\n"; 125 | print_string "See 'sexp pat-query' for simpler regular-expression-like language.\n\n"; 126 | let lead = make_lead "Q" ~print_string in 127 | List.iter qcmds ~f:(fun cmd -> 128 | lead (); 129 | print_string (Sexp.to_string_hum (munge cmd)); 130 | print_endline ""); 131 | Buffer.contents buf 132 | ;; 133 | 134 | let print () = 135 | (print_string "\n--- grammar for query expressions ---\n\n"; 136 | let lead = make_lead "Q" ~print_string in 137 | List.iter qcmds ~f:(fun cmd -> 138 | lead (); 139 | Sexp.output_hum stdout (munge cmd); 140 | print_endline "")); 141 | (print_string "\n--- grammar for change expressions ---\n\n"; 142 | let lead = make_lead "C" ~print_string in 143 | List.iter ccmds ~f:(fun cmd -> 144 | lead (); 145 | Sexp.output_hum stdout (munge cmd); 146 | print_endline "")); 147 | print_string "\n--- grammar for patterns ---\n\n"; 148 | print_endline " P ::= ATOM"; 149 | print_endline " | (P ... P)"; 150 | print_endline " | $VAR"; 151 | print_endline " | @VAR"; 152 | print_string "\n--- grammar for templates ---\n\n"; 153 | print_endline " T[0] ::= ATOM"; 154 | print_endline " | (T[0] ... T[0])"; 155 | print_endline " | (quote T[1])"; 156 | print_endline " | (unquote Q)"; 157 | print_endline " | (splice Q)"; 158 | print_endline ""; 159 | print_endline " T[n+1] ::= ATOM"; 160 | print_endline " | (T[n+1] ... T[n+1])"; 161 | print_endline " | (quote T[n+2])"; 162 | print_endline " | (unquote T[n])"; 163 | print_endline " | (splice T[n])"; 164 | print_endline "" 165 | ;; 166 | 167 | let commands () = 168 | let munge = Semantics.query (qprgm "(or (index 0) this)") in 169 | List.map qcmds ~f:(fun cmd -> 170 | match Lazy_list.to_list (munge cmd) with 171 | | [ Sexp.Atom x ] -> x 172 | | _ -> failwith ("error: " ^ Sexp.to_string cmd)) 173 | ;; 174 | -------------------------------------------------------------------------------- /src/grammar.mli: -------------------------------------------------------------------------------- 1 | val commands : unit -> string list 2 | val grammar_for_readme : unit -> string 3 | val print : unit -> unit 4 | -------------------------------------------------------------------------------- /src/key_extractor.mli: -------------------------------------------------------------------------------- 1 | open! Core 2 | 3 | (* This module exposes a [Key_extractor.t] type that can be used to extract 4 | values from a [Sexp.t] using one of a predefined set of common access patterns. 5 | This is used for extracting sort keys for sexp sort. 6 | 7 | The access patterns match the capabilities of the various filtering commands: 8 | - sexp query 9 | - sexp pat-query 10 | - sexp get 11 | - sexp select 12 | 13 | That is, given a string you could pass to one of the above commands, you 14 | can build a [Key_extractor.t] that can then be used to create a function that 15 | will return the part of a [Sexp.t] that the filter command would extract. 16 | 17 | (The field and index extractors are special cases of the common sexp query 18 | commands "(field )" and "(index )".) 19 | 20 | Some of the filter patterns can extract multiple values (e.g., a sexp select 21 | query could return fields with the same name nested at different levels of the 22 | sexp). The expectation is that each input [Sexp.t] has a single key, so an 23 | error will be returned if the extraction results in multiple values (or no 24 | values!). 25 | 26 | Key extractors are designed to be specified from the command line, so we also 27 | take in a flag name that can used to provide errors with a little more context. 28 | 29 | When building the extract function, we also provide a transformation function 30 | to perform additional transformations on just the extracted part of the key. 31 | *) 32 | 33 | type t 34 | 35 | (* An extractor that returns the entire input [Sexp.t]. *) 36 | val identity_extractor : t 37 | 38 | (* Build extractors from command line arguments that are the equivalent to running 39 | sexp query "(field )" on the input [Sexp.t]. *) 40 | val field_param : ?flag:string -> doc:string -> unit -> t list option Command.Param.t 41 | 42 | (* Build extractors from command line arguments that are the equivalent to running 43 | sexp query "(index )" on the input [Sexp.t]. *) 44 | val index_param : ?flag:string -> doc:string -> unit -> t list option Command.Param.t 45 | 46 | (* Build extractors from command line arguments that are the equivalent to running 47 | sexp query on the input [Sexp.t]. *) 48 | val query_param : ?flag:string -> doc:string -> unit -> t list option Command.Param.t 49 | 50 | (* Build extractors from command line arguments that are the equivalent to running 51 | sexp pat-query on the input [Sexp.t]. *) 52 | val pat_query_param : ?flag:string -> doc:string -> unit -> t list option Command.Param.t 53 | 54 | (* Build extractors from command line arguments that are the equivalent to running 55 | sexp get on the input [Sexp.t]. *) 56 | val get_param : ?flag:string -> doc:string -> unit -> t list option Command.Param.t 57 | 58 | (* Build extractors from command line arguments that are the equivalent to running 59 | sexp select on the input [Sexp.t]. *) 60 | val select_param : ?flag:string -> doc:string -> unit -> t list option Command.Param.t 61 | 62 | type _ modifiers_handler = 63 | | Not_supported : t modifiers_handler 64 | | Map : (string list option -> flag_and_arg:string -> 'a) -> (t * 'a) modifiers_handler 65 | 66 | (* Build extractors from command line arguments of the form ":" 67 | where: 68 | - is one of (field|index|query|pat-query|get|select) 69 | - are optional strings whose meaning is defined by the caller. 70 | The and each modifier should be separated by a '/'. 71 | - is the arg you would pass to _extractor. 72 | 73 | If you don't want to support modifiers, pass in [modifiers:Not_supported], and 74 | you will get back a list of extractors. 75 | 76 | If you do want to support modifiers, pass in [modifiers:(Map some_fn)], where 77 | some_fn is passed a [string list option] of modifiers. You will get back a list 78 | of extractors paired with the return value of some_fn. 79 | 80 | For example, building a general extractor from "query/mod_a/mod_b:(field foo)" will 81 | be equivalent to building a query extractor from the string "(field foo)" and it 82 | will return ["mod_a"; "mod_b"] as the modifiers. *) 83 | val general_param 84 | : ?flag:string 85 | -> doc:string 86 | -> modifiers:'a modifiers_handler 87 | -> unit 88 | -> 'a list option Command.Param.t 89 | 90 | module Extraction_error : sig 91 | type t = 92 | | Missing_key 93 | | Multiple_keys 94 | end 95 | 96 | (* Build a function that will extract out part of the input [Sexp.t] that is specified 97 | by the key extractor. It will error if nothing is extracted from the input [Sexp.t], 98 | or if multiple values are extracted. The [string] paired with the [Extraction_error.t] 99 | in the [Error] case can be used in an error message to indicate which flag on the 100 | command line was used when getting this value. *) 101 | val extract_or_error_fn 102 | : t 103 | -> (Sexp.t -> (Sexp.t, Extraction_error.t * string) result) Staged.t 104 | 105 | (* A helper for producing nicer error message; returns a string like 106 | ' specified by -field foo' to clarify which key lookup failed if there are 107 | multiple keys specified. *) 108 | val extractor_source : t -> string 109 | 110 | (* A helper for producing nicer error messages; combines flag name and a command line 111 | argument into a double quoted string. *) 112 | val quoted_flag_and_arg : flag:string -> arg:string -> string 113 | -------------------------------------------------------------------------------- /src/located.ml: -------------------------------------------------------------------------------- 1 | open Core 2 | module X = Lazy_list 3 | 4 | let of_list (xs : 'a list) : 'a X.t = X.of_list xs 5 | 6 | type 'a t = 7 | | Stdin of string option * 'a 8 | | Files of (string * 'a) X.t 9 | 10 | let map t ~f = 11 | match t with 12 | | Stdin (label, x) -> Stdin (label, f x) 13 | | Files fs -> Files (X.map fs ~f:(fun (label, x) -> label, f x)) 14 | ;; 15 | 16 | let iter t ~f = 17 | match t with 18 | | Stdin (label, x) -> 19 | let label = Option.value ~default:"stdin" label in 20 | f label x 21 | | Files fs -> X.iter fs ~f:(fun (label, x) -> f label x) 22 | ;; 23 | 24 | let stdin label_opt = Stdin (label_opt, ()) 25 | let files fs = Files (X.map (of_list fs) ~f:(fun f -> f, ())) 26 | 27 | let channels = function 28 | | Stdin (label, ()) -> Stdin (label, In_channel.stdin) 29 | | Files fs -> 30 | let f (fname, ()) = fname, In_channel.create fname in 31 | Files (X.map fs ~f) 32 | ;; 33 | -------------------------------------------------------------------------------- /src/located.mli: -------------------------------------------------------------------------------- 1 | open! Core 2 | 3 | (* container type where each element is associated with some labeled input source *) 4 | 5 | type 'a t 6 | 7 | (* [stdin label_opt] indicates standard input as a labeled sources *) 8 | 9 | val stdin : string option -> unit t 10 | 11 | (* [files [f1; f2; ... ;fn]] 12 | indicates files [f1] ... [f2] as sources labeled with their own filenames 13 | *) 14 | 15 | val files : string list -> unit t 16 | 17 | (* open all associated input channels *) 18 | 19 | val channels : unit t -> In_channel.t t 20 | 21 | (* map across the container *) 22 | 23 | val map : 'a t -> f:('a -> 'b) -> 'b t 24 | 25 | (* iterate across elements together with their associated source labels *) 26 | 27 | val iter : 'a t -> f:(string -> 'a -> unit) -> unit 28 | -------------------------------------------------------------------------------- /src/main_atom.ml: -------------------------------------------------------------------------------- 1 | open Core 2 | open Async 3 | 4 | module Escape = struct 5 | module Kind = struct 6 | type t = 7 | | Exactly_stdin 8 | | Strip_trailing_newlines 9 | | Lines 10 | end 11 | 12 | let command = 13 | Command.async 14 | ~summary:"Convert stdin into a sexp atom" 15 | [%map_open.Command 16 | let kind = 17 | choose_one 18 | ~if_nothing_chosen:(Default_to Kind.Exactly_stdin) 19 | [ flag 20 | "-strip-trailing-newlines" 21 | (no_arg_some Kind.Strip_trailing_newlines) 22 | ~doc:"ignore any trailing newlines." 23 | ; flag 24 | "-lines" 25 | (no_arg_some Kind.Lines) 26 | ~doc: 27 | "generate and output one atom per line of input. This ignores a single \ 28 | trailing newline." 29 | ] 30 | in 31 | fun () -> 32 | let stdin = Lazy.force Reader.stdin in 33 | let singleton = Deferred.map ~f:Pipe.singleton in 34 | let%bind strings = 35 | match kind with 36 | | Exactly_stdin -> Reader.contents stdin |> singleton 37 | | Strip_trailing_newlines -> 38 | Reader.contents stdin 39 | |> Deferred.map ~f:(fun s -> String.rstrip s ~drop:(Char.( = ) '\n')) 40 | |> singleton 41 | | Lines -> Reader.lines stdin |> return 42 | in 43 | Pipe.iter_without_pushback strings ~f:(fun str -> 44 | str |> Sexp.Atom |> Sexp.to_string |> print_endline)] 45 | ~behave_nicely_in_pipeline:false 46 | ;; 47 | end 48 | 49 | module Escape_command = struct 50 | let command = 51 | Command.async 52 | ~summary:"convert an argument list to a sequence of sexp atoms" 53 | ~readme:(fun () -> 54 | "This command is useful when you need to call an OCaml program from bash,\n\ 55 | where OCaml needs a list of strings as a sexp, while the bash program has it\n\ 56 | available in an array.\n\n\ 57 | For example, you can write the argument list to a sexp config file like this:\n\n\ 58 | \ run () {\n\ 59 | \ extra_args=$(sexp atom escape-command -- \"$@\")\n\ 60 | \ echo \"((some_arg 1)(extra_args ($extra_args)))\" >config.sexp\n\ 61 | \ run.exe -config config.sexp\n\ 62 | \ }\n") 63 | [%map_open.Command 64 | let args = 65 | flag "--" ~doc:"args the command line to convert to a sexp list" escape 66 | in 67 | fun () -> 68 | (match args with 69 | | None -> failwith "no args given to sexp unescape-command" 70 | | Some args -> 71 | List.iter args ~f:(fun str -> 72 | str |> Sexp.Atom |> Sexp.to_string |> print_endline)); 73 | return ()] 74 | ~behave_nicely_in_pipeline:false 75 | ;; 76 | end 77 | 78 | module Unescape = struct 79 | let command = 80 | let parse s = Scanf.unescaped s in 81 | let unparse c = String.escaped (String.of_char c) in 82 | Command.async 83 | ~summary:"Print unescaped atoms from stdin" 84 | [%map_open.Command 85 | let () = return () 86 | and sep = 87 | flag_optional_with_default_doc 88 | "-sep" 89 | string 90 | [%sexp_of: string] 91 | ~default:"\n" 92 | ~doc: 93 | [%string 94 | "STR separator/terminator to use instead of line breaks. The syntax is \ 95 | the same as for string literals in OCaml. (e.g. \"%{unparse '\n\ 96 | '}\", \"%{unparse '\000'}\")"] 97 | in 98 | fun () -> 99 | let sep = parse sep in 100 | Lazy.force Reader.stdin 101 | |> Reader.read_sexps 102 | |> Pipe.iter_without_pushback ~f:(function 103 | | Atom atom -> printf "%s%s" atom sep 104 | | List _ as input -> 105 | Core.eprint_s [%message "Non-atom input" (input : Sexp.t)])] 106 | ~behave_nicely_in_pipeline:false 107 | ;; 108 | end 109 | 110 | module Unescape_command = struct 111 | module Input = struct 112 | type t = 113 | | Sexp_lists 114 | | Atoms 115 | 116 | let param = 117 | let open Command.Param in 118 | choose_one 119 | ~if_nothing_chosen:(Default_to Atoms) 120 | [ flag 121 | "-sexp-lists" 122 | (no_arg_some Sexp_lists) 123 | ~doc: 124 | " expect the input to be a sequence of sexp lists, one per command line. \ 125 | The command lines will be printed newline-separated." 126 | ; flag 127 | "-atoms" 128 | (no_arg_some Atoms) 129 | ~doc: 130 | " expect the input to be a sequence of atoms that comprise a single \ 131 | command line. This is the default." 132 | ] 133 | ;; 134 | end 135 | 136 | let command = 137 | Command.async 138 | ~summary:"convert command line(s) specified as sexps to shell syntax" 139 | ~readme:(fun () -> 140 | "This command is useful when you have an argument list written in sexp\n\ 141 | format and you want to use it as a part of a command line in bash.\n\ 142 | This avoids the quoting issues that can otherwise be tricky to deal with.\n\ 143 | For example:\n\n\ 144 | \ args='-arg1\"foo\"-arg2\"bar\"'\n\ 145 | \ cmd=\"command.exe $(printf \"%s\" \"$args\" | sexp atom unescape-command)\"\n\ 146 | \ bash -c \"$cmd\"") 147 | [%map_open.Command 148 | let input = Input.param in 149 | fun () -> 150 | let input_pipe = Lazy.force Reader.stdin |> Reader.read_sexps in 151 | match input with 152 | | Sexp_lists -> 153 | Pipe.iter_without_pushback input_pipe ~f:(function 154 | | Sexp.Atom _ -> 155 | raise_s 156 | [%sexp 157 | "Atom encountered where a command line encoded as a sexp list was \ 158 | expected."] 159 | | List l -> 160 | let atoms = List.map l ~f:[%of_sexp: string] in 161 | print_endline (String.concat ~sep:" " (List.map ~f:Sys.quote atoms))) 162 | | Atoms -> 163 | let first = ref true in 164 | let%map () = 165 | Pipe.iter_without_pushback input_pipe ~f:(fun atom -> 166 | if not !first then print_string " "; 167 | first := false; 168 | print_string (Sys.quote ([%of_sexp: string] atom))) 169 | in 170 | print_endline ""] 171 | ~behave_nicely_in_pipeline:false 172 | ;; 173 | end 174 | 175 | let command = 176 | Command.group 177 | ~summary:"Escape/unscape sexp atoms" 178 | [ "escape", Escape.command 179 | ; "unescape", Unescape.command 180 | ; "escape-command", Escape_command.command 181 | ; "unescape-command", Unescape_command.command 182 | ] 183 | ;; 184 | -------------------------------------------------------------------------------- /src/main_atom.mli: -------------------------------------------------------------------------------- 1 | open Core 2 | 3 | val command : Command.t 4 | -------------------------------------------------------------------------------- /src/main_diff.ml: -------------------------------------------------------------------------------- 1 | open Core 2 | open Async 3 | 4 | let load file ~expand_macros ~multiple_sexps_in_each_file = 5 | if expand_macros 6 | then Sexp_macro.Blocking.load_sexp file |> return 7 | else if multiple_sexps_in_each_file 8 | then Reader.file_contents file >>| sprintf "( %s )" >>| Sexp.of_string 9 | else Reader.file_contents file >>| Sexp.of_string 10 | ;; 11 | 12 | let expand_macros_flag = 13 | let open Command.Param in 14 | flag "expand-macros" no_arg ~doc:" expand macros in input files" 15 | ;; 16 | 17 | let multiple_sexps_in_each_file_flag = 18 | let open Command.Param in 19 | flag 20 | "multiple-sexps-in-each-file" 21 | no_arg 22 | ~doc:" multiple sexps in each input file (incompatible with -expand-macros)" 23 | ;; 24 | 25 | module Display_function = struct 26 | type t = 27 | { display_options : Sexp_diff.Display.Display_options.t 28 | ; display_as_plain_string : bool 29 | } 30 | 31 | let create ?(display_as_plain_string = false) ?collapse_threshold ?num_shown () = 32 | let display_options = 33 | Sexp_diff.Display.Display_options.create ?collapse_threshold ?num_shown Two_column 34 | in 35 | { display_options; display_as_plain_string } 36 | ;; 37 | 38 | let flags = 39 | let%map_open.Command () = return () 40 | and display_as_plain_string = 41 | flag "plain" no_arg ~doc:" display as plain string instead of ANSI colors" 42 | and collapse_threshold = 43 | flag 44 | "collapse-threshold" 45 | (optional int) 46 | ~doc:"INT set [collapse_threshold] in Display_options (default: 10)" 47 | and num_shown = 48 | flag 49 | "num-shown" 50 | (optional int) 51 | ~doc:"INT set [num_shown] in Display_options (default: 3)" 52 | in 53 | match display_as_plain_string, collapse_threshold, num_shown with 54 | | false, None, None -> None 55 | | _ -> Some (create ~display_as_plain_string ?collapse_threshold ?num_shown ()) 56 | ;; 57 | 58 | let run { display_options; display_as_plain_string } diff = 59 | let display = 60 | if display_as_plain_string 61 | then Sexp_diff.Display.display_as_plain_string 62 | else Sexp_diff.Display.display_with_ansi_colors 63 | in 64 | display display_options diff |> Core.print_endline 65 | ;; 66 | end 67 | 68 | module Diff_mode = struct 69 | type t = 70 | | Emit_patch 71 | | Emit_diff of Display_function.t 72 | | Quiet 73 | 74 | let flags = 75 | let open Command.Param in 76 | choose_one 77 | ~if_nothing_chosen:(Default_to (Emit_diff (Display_function.create ()))) 78 | [ flag 79 | "for-patch" 80 | no_arg 81 | ~doc:" print the sexp representation of the diff for use with \"sexp patch\"" 82 | |> map ~f:(fun x -> Option.some_if x Emit_patch) 83 | ; flag "quiet" no_arg ~doc:" print nothing. run only for the exit code" 84 | |> map ~f:(fun x -> Option.some_if x Quiet) 85 | ; Display_function.flags |> map ~f:(Option.map ~f:(fun x -> Emit_diff x)) 86 | ] 87 | ;; 88 | end 89 | 90 | let diff_command = 91 | Command.async 92 | ~summary:"print the diff of two sexp files (expanding macros)" 93 | ~readme:(fun () -> 94 | {| 95 | Whenever there is a sequence of [collapse_threshold] or more unchanged lines, we collapse 96 | them and only show the first [num_shown] and the last [num_shown] of these lines. 97 | 98 | Like the unix diff utility, this command exits non-zero (with exit code 2) when its inputs 99 | are different, so it can be used as a sexp equality test with the -quiet flag. Passing 100 | the -for-patch flag disables this behavior. 101 | 102 | It also exists non-zero (with exit code 1) if either sexp is malformed. 103 | |}) 104 | (let%map_open.Command file1, file2 = 105 | anon (t2 ("FILE1" %: Filename_unix.arg_type) ("FILE2" %: Filename_unix.arg_type)) 106 | and mode = Diff_mode.flags 107 | and expand_macros = expand_macros_flag 108 | and multiple_sexps_in_each_file = multiple_sexps_in_each_file_flag in 109 | fun () -> 110 | if expand_macros && multiple_sexps_in_each_file 111 | then failwith "Incompatible flags -expand-macros and -multiple-sexps-in-each-file"; 112 | let%bind original = load file1 ~expand_macros ~multiple_sexps_in_each_file in 113 | let%bind updated = load file2 ~expand_macros ~multiple_sexps_in_each_file in 114 | let diff = Sexp_diff.Algo.diff ~original ~updated () in 115 | (* emit output *) 116 | (match mode with 117 | | Quiet -> () 118 | | Emit_patch -> Sexp_diff.Diff.sexp_of_t diff |> print_s 119 | | Emit_diff display_fun -> Display_function.run display_fun diff); 120 | (* similar to "diff", we exit non-zero if the files are different *) 121 | match (diff : Sexp_diff.Diff.t) with 122 | | Same _ -> return () 123 | | Add _ | Delete _ | Replace _ | Enclose _ -> 124 | (match mode with 125 | | Emit_patch -> 126 | (* In this case, we're running for the patch output, not the comparison *) 127 | return () 128 | | Emit_diff _ | Quiet -> exit 2)) 129 | ~behave_nicely_in_pipeline:false 130 | ;; 131 | 132 | let patch_command = 133 | Command.async 134 | ~summary:"apply a diff to a sexp file (expanding macros)" 135 | ~readme:(fun () -> 136 | {| 137 | The resulting sexp is printed to stdout. 138 | 139 | DIFF-FILE should have the same format as that produced by [sexp diff -for-patch]. 140 | |}) 141 | (let%map_open.Command diff_file, file = 142 | anon 143 | (t2 144 | ("DIFF-FILE" %: Filename_unix.arg_type) 145 | (maybe_with_default "/dev/stdin" ("FILE" %: Filename_unix.arg_type))) 146 | and expand_macros = expand_macros_flag in 147 | fun () -> 148 | let%bind diff = 149 | load diff_file ~expand_macros ~multiple_sexps_in_each_file:false 150 | >>| Sexp_diff.Diff.t_of_sexp 151 | in 152 | let%map file = load file ~expand_macros ~multiple_sexps_in_each_file:false in 153 | Sexp_diff.Diff.apply_exn diff file |> print_s) 154 | ~behave_nicely_in_pipeline:false 155 | ;; 156 | -------------------------------------------------------------------------------- /src/main_diff.mli: -------------------------------------------------------------------------------- 1 | open! Core 2 | 3 | val diff_command : Command.t 4 | val patch_command : Command.t 5 | -------------------------------------------------------------------------------- /src/main_fzf.ml: -------------------------------------------------------------------------------- 1 | open! Core 2 | open Async 3 | 4 | let main ~filename = 5 | let fzf = 6 | let fzf_binary = "fzf" in 7 | let fzf_arguments = 8 | [ "--multi" 9 | ; "--track" 10 | ; "--no-sort" 11 | ; "--reverse" 12 | ; "--exact" 13 | ; "--ansi" 14 | ; "--preview-window down:70%" 15 | ; "--preview-label-pos bottom" 16 | ; "--preview-label 'ctrl+l less | ctrl+alt+c copy | shift+up top | shift+down \ 17 | bottom'" 18 | ; {| --preview "printf '%s' {} | sexp pp -color -i" |} 19 | ; {| --bind "ctrl-l:execute(printf '%s' {} | sexp pp -color -i | less -R)" |} 20 | ; {| --bind "ctrl-alt-c:execute-silent(printf '%s' {} | sexp pp -i | xclip -selection clipboard)" |} 21 | ; {| --bind "shift-up:first" |} 22 | ; {| --bind "shift-down:last" |} 23 | ] 24 | in 25 | [%string {| %{fzf_binary} %{String.concat fzf_arguments ~sep:" "} |}] 26 | in 27 | Sys.command [%string {| cat %{Sys.quote filename} | %{fzf} |}] >>= exit 28 | ;; 29 | 30 | let command = 31 | Command.async_or_error 32 | ~summary:"select one or more inputs sexps with [fzf --multi]" 33 | [%map_open.Command 34 | let filename = 35 | anon (maybe_with_default "-" ("FILENAME" %: Filename_unix.arg_type)) 36 | in 37 | fun () -> main ~filename] 38 | ~behave_nicely_in_pipeline:false 39 | ;; 40 | -------------------------------------------------------------------------------- /src/main_fzf.mli: -------------------------------------------------------------------------------- 1 | open! Core 2 | 3 | val command : Command.t 4 | -------------------------------------------------------------------------------- /src/main_get.ml: -------------------------------------------------------------------------------- 1 | open Core 2 | open Async 3 | module Sexp_path = Sexplib.Path 4 | 5 | let main ~ignore_errors ~sexp_paths () = 6 | Reader.read_sexps (Lazy.force Reader.stdin) 7 | |> Pipe.iter_without_pushback ~f:(fun sexp -> 8 | let strings = 9 | List.map (String.split sexp_paths ~on:',') ~f:(fun sexp_path -> 10 | let ignore_errors, sexp_path = 11 | if String.(slice sexp_path (-1) 0 = "?") 12 | then true, String.slice sexp_path 0 (-1) 13 | else ignore_errors, sexp_path 14 | in 15 | try 16 | let path = Sexp_path.parse sexp_path in 17 | let res = Sexp_path.get ~path sexp in 18 | match res with 19 | | Sexp.Atom s -> s 20 | | Sexp.List l -> 21 | (try 22 | let res = List.map ~f:Sexp.to_string_mach l in 23 | String.concat ~sep:" " res 24 | with 25 | | _ -> Sexp.to_string_hum res) 26 | with 27 | | exn -> if ignore_errors then "" else raise exn) 28 | in 29 | print_endline (String.concat ~sep:"," strings)) 30 | ;; 31 | 32 | let command = 33 | let readme () = 34 | String.strip 35 | {| 36 | See also the query and select subcommands. 37 | 38 | Path syntax: 39 | 40 | "." -> separates path elements; must be present at start of string. 41 | "[N]" -> specifies the Nth element in a tuple. 42 | "some_tag[N]" -> tries to match [some_tag], then denotes its Nth argument. 43 | "name" -> denotes record field having [name]. 44 | "...?" -> ignore errors for this path only. 45 | 46 | Examples for sexp "((a 23) (b 24) (c (2 3 5)))": 47 | 48 | ".a" -> "23" 49 | ".a,.b" -> "23,24" 50 | ".c" -> "2 3 5" 51 | ".[1]" -> "b 24" 52 | ".[2]" -> "c (2 3 5)" 53 | ".[2].c[0]" -> "2 3 5" 54 | ".c.[1]" -> "3" 55 | |} 56 | in 57 | Command.async 58 | ~summary:"extract parts of an s-expression" 59 | ~readme 60 | (let%map_open.Command ignore_errors = 61 | flag "ignore-errors" no_arg ~doc:" ignore errors and print whatever is found" 62 | and sexp_paths = anon ("\"SEXP-PATH(,SEXP-PATH)*\"" %: string) in 63 | fun () -> main ~ignore_errors ~sexp_paths ()) 64 | ~behave_nicely_in_pipeline:false 65 | ;; 66 | -------------------------------------------------------------------------------- /src/main_get.mli: -------------------------------------------------------------------------------- 1 | open! Core 2 | 3 | val command : Command.t 4 | -------------------------------------------------------------------------------- /src/main_group.mli: -------------------------------------------------------------------------------- 1 | open Core 2 | 3 | val command : Command.t 4 | -------------------------------------------------------------------------------- /src/main_json.ml: -------------------------------------------------------------------------------- 1 | open Core 2 | open Sexplib 3 | open Composition_infix 4 | 5 | let to_json ~in_channel () = 6 | match 7 | let%map.List sexp = Sexp.input_sexps in_channel in 8 | let json = Sexp_app.To_json.json_of_sexp sexp in 9 | print_endline (Jsonaf.to_string_hum json) 10 | with 11 | | (_ : unit list) -> () 12 | | exception End_of_file -> () 13 | ;; 14 | 15 | let of_json ~in_channel ~machine = 16 | let module Conv = Sexplib.Conv in 17 | let rec convert : Jsonaf.t -> Sexp.t = function 18 | | `Null -> Conv.sexp_of_unit () 19 | | `True -> Conv.sexp_of_bool true 20 | | `False -> Conv.sexp_of_bool false 21 | | `Number s -> Conv.sexp_of_string s 22 | | `String s -> Conv.sexp_of_string s 23 | | `Array json_list -> Conv.sexp_of_list convert json_list 24 | | `Object alist -> 25 | let sexp_of_item = Conv.sexp_of_pair Conv.sexp_of_string convert in 26 | Conv.sexp_of_list sexp_of_item alist 27 | in 28 | let parser_state = 29 | (* Print as we go so the incremental work can be gc'd *) 30 | Angstrom.lift (convert >> print_s ?mach:(Option.some_if machine ())) Jsonaf.Parser.t 31 | |> Angstrom.many 32 | |> Angstrom.map ~f:(fun (_ : unit list) -> ()) 33 | |> Angstrom.Buffered.parse 34 | in 35 | let parser_state = 36 | In_channel.fold_lines ~init:parser_state in_channel ~f:(fun parser_state line -> 37 | Angstrom.Buffered.feed parser_state (`String line)) 38 | in 39 | let parser_state = Angstrom.Buffered.feed parser_state `Eof in 40 | match Angstrom.Buffered.state_to_result parser_state with 41 | | Error s -> eprint_s [%message "Got an error parsing the json" s] 42 | | Ok () -> () 43 | ;; 44 | 45 | let to_json_command = 46 | Command.basic 47 | ~summary:"Convert sexps on stdin or in a file to json." 48 | (let%map_open.Command in_channel = Shared_params.channel_stdin_or_anon_file in 49 | fun () -> to_json ~in_channel ()) 50 | ;; 51 | 52 | let of_json_command = 53 | Command.basic 54 | ~summary:"Convert json on stdin or in a file to sexps." 55 | (let%map_open.Command machine = Shared_params.machine 56 | and in_channel = Shared_params.channel_stdin_or_anon_file in 57 | fun () -> of_json ~in_channel ~machine) 58 | ;; 59 | -------------------------------------------------------------------------------- /src/main_json.mli: -------------------------------------------------------------------------------- 1 | open! Core 2 | 3 | val of_json_command : Command.t 4 | val to_json_command : Command.t 5 | -------------------------------------------------------------------------------- /src/main_of_xml.ml: -------------------------------------------------------------------------------- 1 | open Core 2 | 3 | module Main = struct 4 | open Xml_light 5 | 6 | type attributes = (string * string) list [@@deriving sexp] 7 | 8 | type xml = Xml.xml = 9 | | Element of (string * attributes * xml list) 10 | | PCData of string 11 | 12 | type sexp = Sexp.t = 13 | | Atom of string 14 | | List of sexp list 15 | 16 | module type S = sig 17 | val sexp_of_xml : xml -> sexp 18 | end 19 | 20 | (* direct style *) 21 | module _ : S = struct 22 | let rec sexp_of_xml = function 23 | | PCData x -> Atom x 24 | | Element (tag, attrs, children) -> 25 | let children = List.map ~f:sexp_of_xml children in 26 | List [ Atom tag; sexp_of_attributes attrs; List children ] 27 | ;; 28 | end 29 | 30 | (* continuation-passing style *) 31 | module _ : S = struct 32 | let rec sexp_of_xml xml k = 33 | match xml with 34 | | PCData x -> k (Atom x) 35 | | Element (tag, attrs, children) -> 36 | sexps_of_xmls children (fun children -> 37 | k (List [ Atom tag; sexp_of_attributes attrs; List children ])) 38 | 39 | and sexps_of_xmls xmls k = 40 | match xmls with 41 | | [] -> k [] 42 | | xml :: xmls -> 43 | sexp_of_xml xml (fun sexp -> sexps_of_xmls xmls (fun sexps -> k (sexp :: sexps))) 44 | ;; 45 | 46 | (** shadows auxilliary *) 47 | let sexp_of_xml xml = sexp_of_xml xml Fn.id 48 | end 49 | 50 | (* defunctionalized continuation-passing style *) 51 | module V3 : S = struct 52 | type cont = 53 | | Top 54 | | Foo of cont2 * xml list 55 | 56 | and cont2 = 57 | | Bar of cont2 * sexp 58 | | Quz of cont * string * sexp 59 | 60 | let rec sexp_of_xml xml k = 61 | match xml with 62 | | PCData x -> apply k (Atom x) 63 | | Element (tag, attrs, children) -> 64 | sexps_of_xmls children (Quz (k, tag, sexp_of_attributes attrs)) 65 | 66 | and sexps_of_xmls xmls k = 67 | match xmls with 68 | | [] -> apply2 k [] 69 | | xml :: xmls -> sexp_of_xml xml (Foo (k, xmls)) 70 | 71 | and apply k sexp = 72 | match k with 73 | | Top -> sexp 74 | | Foo (k, xmls) -> sexps_of_xmls xmls (Bar (k, sexp)) 75 | 76 | and apply2 k sexps = 77 | match k with 78 | | Bar (k, sexp) -> apply2 k (sexp :: sexps) 79 | | Quz (k, tag, attrs) -> apply k (List [ Atom tag; attrs; List sexps ]) 80 | ;; 81 | 82 | let sexp_of_xml xml = sexp_of_xml xml Top 83 | 84 | (* shadows auxilliary *) 85 | end 86 | 87 | let string_of_sexp ~machine = 88 | if machine then Sexp.to_string else fun x -> Sexp.to_string_hum x 89 | ;; 90 | 91 | let main ~machine = 92 | try 93 | print_endline 94 | (string_of_sexp ~machine (V3.sexp_of_xml (Xml.parse_in In_channel.stdin))) 95 | with 96 | | Xml_light.Xml.Error xml_err -> 97 | eprintf "ERROR: %s\n" (Xml_light.Xml.error xml_err); 98 | exit 1 99 | | e -> raise e 100 | ;; 101 | end 102 | 103 | let command = 104 | Command.basic 105 | ~summary:"convert XML from stdin into an s-expression" 106 | (let%map_open.Command machine = Shared_params.machine in 107 | fun () -> Main.main ~machine) 108 | ;; 109 | -------------------------------------------------------------------------------- /src/main_of_xml.mli: -------------------------------------------------------------------------------- 1 | open! Core 2 | 3 | val command : Command.t 4 | -------------------------------------------------------------------------------- /src/main_parts.ml: -------------------------------------------------------------------------------- 1 | open Core 2 | open Sexp_app 3 | 4 | let rec flatten () = 5 | match 6 | try Some (Sexp.input_sexp In_channel.stdin) with 7 | | End_of_file -> None 8 | with 9 | | None -> () 10 | | Some sexp -> 11 | Parts.output (Parts.flatten sexp) stdout; 12 | flatten () 13 | ;; 14 | 15 | let rec assemble () = 16 | let flattened = Parts.input In_channel.stdin in 17 | if List.is_empty flattened 18 | then () 19 | else ( 20 | Sexp.output_mach stdout (Parts.assemble flattened); 21 | print_endline ""; 22 | assemble ()) 23 | ;; 24 | 25 | let flatten_command = 26 | Command.basic 27 | ~summary:"Flatten a list of sexp into its parts. Each part on its own line." 28 | (let%map_open.Command () = return () in 29 | fun () -> flatten ()) 30 | ;; 31 | 32 | let assemble_command = 33 | Command.basic 34 | ~summary: 35 | "Assemble a lists of parts into sexps. Sexp part lists are separated by newlines." 36 | (let%map_open.Command () = return () in 37 | fun () -> assemble ()) 38 | ;; 39 | -------------------------------------------------------------------------------- /src/main_parts.mli: -------------------------------------------------------------------------------- 1 | open! Core 2 | open! Async 3 | 4 | val flatten_command : Command.t 5 | val assemble_command : Command.t 6 | -------------------------------------------------------------------------------- /src/main_pattern.mli: -------------------------------------------------------------------------------- 1 | open! Core 2 | 3 | val pat_query_command : Command.t 4 | val pat_change_command : Command.t 5 | -------------------------------------------------------------------------------- /src/main_pp.ml: -------------------------------------------------------------------------------- 1 | open! Core 2 | 3 | (* Options are provided for parameters which are likely to change a lot and are likely to 4 | vary depending on the file that is being processed. 5 | *) 6 | let command ?alias_for () = 7 | Command.basic 8 | ~summary: 9 | (match alias_for with 10 | | None -> 11 | "Pretty print S expressions in a human-friendly way, either from stdin or a \ 12 | file." 13 | | Some other_command -> [%string "Alias for %{other_command}"]) 14 | ~readme:(fun () -> 15 | "Use pre-defined styles or load a custom style from a file." 16 | ^ "\nYou can use -p to print out one of the predefined styles and customize it.") 17 | (let%map_open.Command config_file = 18 | flag 19 | "-c" 20 | (optional Filename_unix.arg_type) 21 | ~doc:"file use custom configuration file" 22 | and color = 23 | flag 24 | "-color" 25 | no_arg 26 | ~doc: 27 | (" enable colors. By default, colors are disabled " 28 | ^ "even if they are set in the configuration file") 29 | and interpret_atom_as_sexp = flag "-i" no_arg ~doc:" try to interpret atoms as sexps" 30 | and drop_comments = flag "-drop-comments" no_arg ~doc:" drop comments" 31 | and new_line_separator = 32 | flag "-s" (optional bool) ~doc:"bool separate sexps with an empty line" 33 | and print_settings = flag "-p" no_arg ~doc:" print the settings in colorless format" 34 | and in_channel = Shared_params.channel_stdin_or_anon_file in 35 | fun () -> 36 | let config = 37 | match config_file with 38 | | Some path -> Sexp.load_sexp_conv_exn path Sexp_pretty.Config.t_of_sexp 39 | | None -> Sexp_pretty.Config.default 40 | in 41 | let config = 42 | let color = color || print_settings in 43 | Sexp_pretty.Config.update 44 | config 45 | ~interpret_atom_as_sexp 46 | ~drop_comments 47 | ~color 48 | ?new_line_separator 49 | in 50 | if print_settings 51 | then ( 52 | let config_for_output = 53 | { config with 54 | atom_coloring = Color_none 55 | ; paren_coloring = false 56 | ; atom_printing = Escaped 57 | } 58 | in 59 | let fmt = Format.formatter_of_out_channel Stdlib.stdout in 60 | let sexp = 61 | Sexp_pretty.sexp_to_sexp_or_comment 62 | config_for_output 63 | (Sexp_pretty.Config.sexp_of_t config) 64 | in 65 | Sexp_pretty.Sexp_with_layout.pp_formatter config_for_output fmt sexp) 66 | else ( 67 | let sparser = Sexp.With_layout.Parser.sexp Sexp.With_layout.Lexer.main in 68 | let lexbuf = Lexing.from_channel in_channel in 69 | let fmt = Format.formatter_of_out_channel stdout in 70 | let next () = 71 | try Some (sparser lexbuf) with 72 | | _ -> None 73 | in 74 | Sexp_pretty.Sexp_with_layout.pp_formatter' ~next config fmt)) 75 | ;; 76 | -------------------------------------------------------------------------------- /src/main_pp.mli: -------------------------------------------------------------------------------- 1 | open! Core 2 | 3 | val command : ?alias_for:string -> unit -> Command.t 4 | -------------------------------------------------------------------------------- /src/main_print.ml: -------------------------------------------------------------------------------- 1 | open Core 2 | 3 | let command = 4 | Command.basic 5 | ~summary:"pretty-print an s-expression" 6 | (let%map_open.Command { machine; fail_on_parse_error } = 7 | Shared_params.machine_and_fail_on_parse_error 8 | in 9 | fun () -> 10 | let perform_query sexp_ext ~on_result = 11 | let lazy_results = Sexp_app.Semantics.query' Sexp_app.Syntax.This sexp_ext in 12 | Lazy_list.iter lazy_results ~f:on_result 13 | in 14 | Query.execute 15 | { inputs = Located.stdin None 16 | ; output_mode = Query.Sexp 17 | ; allow_empty_output = true 18 | ; group = false 19 | ; machine 20 | ; labeled = false 21 | ; fail_on_parse_error 22 | ; perform_query 23 | }) 24 | ;; 25 | -------------------------------------------------------------------------------- /src/main_print.mli: -------------------------------------------------------------------------------- 1 | open! Core 2 | 3 | val command : Command.t 4 | -------------------------------------------------------------------------------- /src/main_query.ml: -------------------------------------------------------------------------------- 1 | open Core 2 | open Sexp_app 3 | 4 | type source = 5 | | Anon of Syntax.query 6 | | File of string 7 | | Script of string 8 | 9 | let syntax_error msg e = failwithf "Syntax error: %s\n\t%s" msg (Exn.to_string e) () 10 | 11 | let load' f x ~is_change = 12 | let t_of_sexp = 13 | if is_change 14 | then fun sexp -> Syntax.Change (Syntax.Change.t_of_sexp sexp) 15 | else Syntax.Query.t_of_sexp 16 | in 17 | let sexps = 18 | try f x with 19 | | e -> syntax_error "bad s-expression" e 20 | in 21 | try Syntax.pipe (List.map ~f:t_of_sexp sexps) with 22 | | e -> syntax_error "bad program" e 23 | ;; 24 | 25 | let load = load' ~is_change:false 26 | 27 | let load_file file ~is_change ~skip_first_line = 28 | let handle = In_channel.create file in 29 | if skip_first_line then ignore (In_channel.input_line_exn handle : string); 30 | load' Sexp.input_sexps handle ~is_change 31 | ;; 32 | 33 | let create_perform_query_f ~source ~is_change = 34 | let prgm = 35 | match source with 36 | | Anon prgm -> prgm 37 | | File file -> load_file file ~skip_first_line:false ~is_change 38 | | Script file -> load_file file ~skip_first_line:true ~is_change 39 | in 40 | fun sexp_ext ~on_result -> 41 | let lazy_results = Semantics.query' prgm sexp_ext in 42 | Lazy_list.iter lazy_results ~f:on_result 43 | ;; 44 | 45 | let query_arg = 46 | Command.Arg_type.create (fun prgm -> load Sexp.scan_sexps @@ Lexing.from_string prgm) 47 | ;; 48 | 49 | let query_command = 50 | Command.basic 51 | ~summary:"query an s-expression" 52 | ~readme:(fun () -> Grammar.grammar_for_readme ()) 53 | (let%map_open.Command () = 54 | flag 55 | "examples" 56 | (no_arg_abort ~exit:(fun () -> 57 | print_endline Readme.query_by_example_dot_md; 58 | exit 0)) 59 | ~doc:" Detailed, example-driven guide to using sexp-query" 60 | and () = 61 | flag 62 | "formal-semantics" 63 | (no_arg_abort ~exit:(fun () -> 64 | print_endline Readme.query_semantics_dot_md; 65 | exit 0)) 66 | ~doc:" Show a doc describing sexp-query's formal semantics" 67 | and () = 68 | flag 69 | "grammar" 70 | (no_arg_abort ~exit:(fun () -> 71 | Grammar.print (); 72 | exit 0)) 73 | ~doc:" Show full grammar for sexpquery programs" 74 | and () = 75 | flag 76 | "quine" 77 | (no_arg_abort ~exit:(fun () -> 78 | Quine.show stdout; 79 | exit 0)) 80 | ~doc:" Print a sexp-query quine (outputs itself on any input sexp)" 81 | and source, inputs, labeled_default = 82 | let%map_open.Command file = 83 | flag 84 | "file" 85 | (optional Filename_unix.arg_type) 86 | ~doc:"FILE Read program from file instead of command line" 87 | and script = 88 | flag 89 | "script" 90 | (optional Filename_unix.arg_type) 91 | ~doc:"FILE Read program from file instead of command line (skip #!)" 92 | and query = 93 | anon 94 | (maybe 95 | (t2 ("QUERY" %: query_arg) (sequence ("FILE" %: Filename_unix.arg_type)))) 96 | and stdin_label = 97 | flag 98 | "stdin-label" 99 | (optional string) 100 | ~doc:"LABEL override default label for stdin" 101 | in 102 | (* switch from command line parsing to argument processing *) 103 | let query, files = 104 | match query with 105 | | None -> None, [] 106 | | Some (query, files) -> Some query, files 107 | in 108 | let inputs, labeled_default = 109 | match files with 110 | | [] -> Located.stdin stdin_label, Option.is_some stdin_label 111 | | [ file ] -> Located.files [ file ], false 112 | | files -> Located.files files, true 113 | in 114 | let source = 115 | match file, script, query with 116 | | Some x, None, None -> File x 117 | | None, Some x, None -> Script x 118 | | None, None, Some x -> Anon x 119 | | _ -> failwith "must pass exactly one of QUERY, -file, and -script" 120 | in 121 | source, inputs, labeled_default 122 | and group = 123 | flag 124 | "group" 125 | no_arg 126 | ~doc:" Group incoming sequence of sexps into a single list sexp" 127 | and { machine; fail_on_parse_error } = Shared_params.machine_and_fail_on_parse_error 128 | and { output_mode; allow_empty_output; labeled } = Shared_params.query_args in 129 | fun () -> 130 | let perform_query = create_perform_query_f ~source ~is_change:false in 131 | Query.execute 132 | { inputs 133 | ; output_mode 134 | ; allow_empty_output 135 | ; group 136 | ; machine 137 | ; labeled = Option.value labeled ~default:labeled_default 138 | ; fail_on_parse_error 139 | ; perform_query 140 | }) 141 | ;; 142 | 143 | let change_arg = Command.Param.sexp_conv Syntax.Change.t_of_sexp 144 | 145 | let stdin_label_arg = 146 | let open Command.Param in 147 | flag "stdin-label" (optional string) ~doc:"LABEL override default label for stdin" 148 | ;; 149 | 150 | let create_inputs ~files ~stdin_label = 151 | match files with 152 | | [] -> Located.stdin stdin_label, Option.is_some stdin_label 153 | | [ file ] -> Located.files [ file ], false 154 | | files -> Located.files files, true 155 | ;; 156 | 157 | let change_command_body ~files ~stdin_label ~source ~machine ~fail_on_parse_error = 158 | let inputs, labeled = create_inputs ~files ~stdin_label in 159 | let perform_query = create_perform_query_f ~source ~is_change:true in 160 | Query.execute 161 | { inputs 162 | ; output_mode = Sexp 163 | ; allow_empty_output = false 164 | ; group = false 165 | ; machine 166 | ; labeled 167 | ; fail_on_parse_error 168 | ; perform_query 169 | } 170 | ;; 171 | 172 | let change_command = 173 | Command.basic 174 | ~summary:"transform an s-expression" 175 | (let%map_open.Command () = 176 | flag 177 | "formal-semantics" 178 | (no_arg_abort ~exit:(fun () -> 179 | print_endline Readme.change_semantics_dot_md; 180 | exit 0)) 181 | ~doc:" Documentation dump" 182 | and () = 183 | flag 184 | "examples" 185 | (no_arg_abort ~exit:(fun () -> 186 | print_endline Readme.change_by_example_dot_md; 187 | exit 0)) 188 | ~doc:" Show examples of change expressions" 189 | and () = 190 | flag 191 | "grammar" 192 | (no_arg_abort ~exit:(fun () -> 193 | Grammar.print (); 194 | exit 0)) 195 | ~doc:" Show grammar for change expressions" 196 | and { machine; fail_on_parse_error } = Shared_params.machine_and_fail_on_parse_error 197 | and source, files = 198 | let%map_open.Command x = 199 | anon 200 | (maybe 201 | (t2 ("QUERY" %: change_arg) (sequence ("FILE" %: Filename_unix.arg_type)))) 202 | and file = 203 | flag 204 | "file" 205 | (optional Filename_unix.arg_type) 206 | ~doc:"FILE Read program from file instead of command line" 207 | in 208 | match x, file with 209 | | None, Some x -> File x, [] 210 | | Some (prgm, files), None -> Anon (Syntax.Change prgm), files 211 | | _ -> failwith "must pass exactly one of -file and QUERY" 212 | and stdin_label = stdin_label_arg in 213 | fun () -> 214 | change_command_body ~files ~stdin_label ~source ~machine ~fail_on_parse_error) 215 | ;; 216 | 217 | let pattern_arg = Command.Param.sexp_conv Syntax.Pattern.t_of_sexp 218 | 219 | let rewrite_command = 220 | Command.basic 221 | ~summary:"rewrite patterns within an s-expression" 222 | ~readme:(fun () -> 223 | {| 224 | rewrite patterns within an s-expression 225 | 226 | main.exe rewrite A B [FILE ...] 227 | 228 | is exactly the same as 229 | 230 | sexp change '(topdown (try (rewrite A B)))' 231 | 232 | but easier to remember, find, and use. 233 | 234 | Say we have this sexp: 235 | 236 | $ cat /tmp/sexp 237 | ((laundry true) 238 | (basket 239 | (fruit Banana) 240 | (utensil fork) 241 | (utensil knife) 242 | (also 243 | (fruit Pear) 244 | (fruit Lychee)))) 245 | 246 | We can rewrite the "fruit" fields to "snack" quite easily: 247 | 248 | $ cat /tmp/sexp | sexp rewrite '(fruit $FRUIT)' '(snack $FRUIT)' 249 | ((laundry true) 250 | (basket 251 | (snack Banana) 252 | (utensil fork) 253 | (utensil knife) 254 | (also 255 | (snack Pear) 256 | (snack Lychee)))) 257 | 258 | 259 | See `sexp change -examples` for more information.|}) 260 | (let%map_open.Command { machine; fail_on_parse_error } = 261 | Shared_params.machine_and_fail_on_parse_error 262 | and source_a = anon ("A" %: pattern_arg) 263 | and source_b = anon ("B" %: pattern_arg) 264 | and files = anon (sequence ("FILE" %: Filename_unix.arg_type)) 265 | and stdin_label = stdin_label_arg in 266 | fun () -> 267 | change_command_body 268 | ~files 269 | ~stdin_label 270 | ~machine 271 | ~fail_on_parse_error 272 | ~source:(Anon (Change (Topdown (Syntax.try_ (Rewrite (source_a, source_b))))))) 273 | ;; 274 | -------------------------------------------------------------------------------- /src/main_query.mli: -------------------------------------------------------------------------------- 1 | open! Core 2 | 3 | val query_command : Command.t 4 | val change_command : Command.t 5 | val rewrite_command : Command.t 6 | -------------------------------------------------------------------------------- /src/main_resolve_macros.ml: -------------------------------------------------------------------------------- 1 | open Core 2 | module Sexp = Sexplib.Sexp 3 | 4 | module Mode = struct 5 | type t = 6 | | Print_resolved_sexp 7 | | Print_included_files 8 | 9 | let flags = 10 | let%map_open.Command only_print_included_files = 11 | flag 12 | "only-print-loaded-files" 13 | no_arg 14 | ~doc: 15 | "Instead of printing the resolved macros, only print the names of the files \ 16 | that are loaded as a result of resolving the input file (including the input \ 17 | file itself)." 18 | in 19 | if only_print_included_files then Print_included_files else Print_resolved_sexp 20 | ;; 21 | end 22 | 23 | let main mode infile cout = 24 | match (mode : Mode.t) with 25 | | Print_resolved_sexp -> 26 | List.iter (Sexp_macro.Blocking.load_sexps infile) ~f:(fun sexp -> 27 | Sexp.output_hum cout sexp; 28 | (* a whitespace is necessary to separate adjacent atoms *) 29 | Out_channel.output_char cout '\n') 30 | | Print_included_files -> 31 | Sexp_macro.Blocking.included_files infile |> Out_channel.output_lines cout 32 | ;; 33 | 34 | let readme () = 35 | "Resolve a sexp with macros as understood by [Sexp_macros].\n\ 36 | Reads from INFILE and writes to stdout (or OUTFILE)" 37 | ;; 38 | 39 | let command = 40 | Command.basic 41 | ~summary:"resolve macros in a sexp" 42 | ~readme 43 | (let%map_open.Command mode = Mode.flags 44 | and infile, maybe_cout = 45 | anon 46 | (t2 47 | ("INFILE" %: Filename_unix.arg_type) 48 | (maybe ("OUTFILE" %: Filename_unix.arg_type))) 49 | in 50 | fun () -> 51 | let k cout = main mode infile cout in 52 | match maybe_cout with 53 | | None -> k stdout 54 | | Some outfile -> Out_channel.with_file outfile ~f:k) 55 | ;; 56 | -------------------------------------------------------------------------------- /src/main_resolve_macros.mli: -------------------------------------------------------------------------------- 1 | open! Core 2 | 3 | val command : Command.t 4 | -------------------------------------------------------------------------------- /src/main_restructure.ml: -------------------------------------------------------------------------------- 1 | open Core 2 | module Sexp = Sexplib.Sexp 3 | 4 | let rec restructure = function 5 | | Sexp.List xs -> Sexp.List (List.map ~f:restructure xs) 6 | | Sexp.Atom foo as same -> 7 | (try 8 | assert (Sexplib.Pre_sexp.must_escape foo && String.length foo > 80); 9 | let xs = Sexp.scan_sexps (Lexing.from_string foo) in 10 | let xs = 11 | List.map xs ~f:(function 12 | | Sexp.List _ as restructured_list -> restructure restructured_list 13 | | Sexp.Atom restructured_foo as restructured_atom -> 14 | (match [%equal: string] foo restructured_foo with 15 | | true -> restructured_atom 16 | | false -> restructure restructured_atom)) 17 | in 18 | Sexp.List (Sexp.Atom "-RESTRUCTURED-" :: xs) 19 | with 20 | | _ -> same) 21 | ;; 22 | 23 | let main cin cout = Sexp.output_hum cout (restructure (Sexp.input_sexp cin)) 24 | 25 | let readme () = 26 | "Attempt to recover the structure in an s-expression containing atoms\n\ 27 | constructed by some use of Sexp.to_string. For example, this often\n\ 28 | happens with error messages with deep structure. Reads from stdin (or INFILE)\n\ 29 | and writes to stdout (or OUTFILE)" 30 | ;; 31 | 32 | let command = 33 | Command.basic 34 | ~summary:"recover structure of an s-expression" 35 | ~readme 36 | (let%map_open.Command argv = 37 | anon 38 | (maybe 39 | (t2 40 | ("INFILE" %: Filename_unix.arg_type) 41 | (maybe ("OUTFILE" %: Filename_unix.arg_type)))) 42 | in 43 | fun () -> 44 | let cin, cout = In_channel.stdin, Out_channel.stdout in 45 | (* defaults *) 46 | match argv with 47 | | None -> main cin cout 48 | | Some (infile, outfile) -> 49 | In_channel.with_file infile ~f:(fun cin -> 50 | match outfile with 51 | | None -> main cin cout 52 | | Some outfile -> Out_channel.with_file outfile ~f:(fun cout -> main cin cout))) 53 | ;; 54 | -------------------------------------------------------------------------------- /src/main_restructure.mli: -------------------------------------------------------------------------------- 1 | open! Core 2 | 3 | val command : Command.t 4 | -------------------------------------------------------------------------------- /src/main_select.ml: -------------------------------------------------------------------------------- 1 | open Core 2 | open Async 3 | 4 | let example_sexp_string = 5 | {| 6 | ((foo bar) 7 | (baz ( 8 | (sausage banana) 9 | (fred george) 10 | (wizzle ( 11 | (one a) 12 | (two b) 13 | (three c))))) 14 | (wizzle fizzle) 15 | (wizzle ( 16 | (grizzle ( 17 | (one z) 18 | (two y))) 19 | (drizzle chizzle))) 20 | (fred percy)) 21 | |} 22 | ;; 23 | 24 | let example_programs = 25 | [ "foo" 26 | ; "sausage" 27 | ; "fred" 28 | ; "baz fred" 29 | ; "two" 30 | ; "wizzle two" 31 | ; "wizzle > two" 32 | ; "wizzle > ( one two )" 33 | ; "wizzle ( one two )" 34 | ; "wizzle" 35 | ; "> wizzle" 36 | ; "wizzle > *" 37 | ] 38 | ;; 39 | 40 | let readme () = 41 | let example_sexp = Parsexp.Single.parse_string_exn example_sexp_string in 42 | let example_programs_and_output = 43 | Sexp_select.format_program_outputs example_sexp example_programs 44 | in 45 | String.strip 46 | {| 47 | Implementation of a subset of CSS-style selectors for traversing sexp trees. 48 | 49 | See also the get and query subcommands. 50 | 51 | Syntax: 52 | - "foo" finds the value of every pair in your tree having "foo" as the key 53 | - "foo bar" finds the value of every pair in your tree having "foo" as the 54 | key, and then for each of these trees finds the value of every pair having 55 | "bar" as the key. 56 | - "foo > bar" finds the value of every pair in your tree having "foo" as the 57 | key, and then for each of these trees finds the value of every top-level pair 58 | having "bar" as the key. 59 | - "foo > ( bar baz )" finds the value of every pair in your tree having "foo" as the 60 | key, and then for each of these trees finds the value of every top-level pair 61 | having either "bar" or "baz" as the key. 62 | - "*" matches anything 63 | 64 | Examples: 65 | |} 66 | ^ sprintf "\n%s\n\n%s" example_sexp_string example_programs_and_output 67 | ;; 68 | 69 | let readme_flag () = 70 | let open Command.Param in 71 | flag 72 | "readme" 73 | (no_arg_abort ~exit:(fun () -> 74 | Core.print_endline (readme ()); 75 | Core.exit 0)) 76 | ~doc:" Show the readme" 77 | ;; 78 | 79 | let remove_duplicates_flag = 80 | let open Command.Param in 81 | flag 82 | ~doc:" remove duplicate outputs from each PROGRAM" 83 | "remove-dupes" 84 | (map_flag no_arg ~f:(fun arg -> 85 | Option.some_if arg (List.stable_dedup ~compare:Sexp.compare))) 86 | ;; 87 | 88 | let drop_flag = 89 | let open Command.Param in 90 | flag "drop" no_arg ~doc:" output the original input sexps with matching fields removed" 91 | ;; 92 | 93 | let mach_flag = 94 | let open Command.Param in 95 | map Shared_params.machine ~f:(fun mach -> 96 | if mach then Sexp.to_string_mach else fun sexp -> Sexp.to_string_hum sexp) 97 | ;; 98 | 99 | let command = 100 | Command.async 101 | ~summary:"Use CSS-style selectors to traverse sexp trees" 102 | (let%map_open.Command () = readme_flag () 103 | and program = anon ("program" %: string) 104 | and sexp_to_string = mach_flag 105 | and maybe_sexp_string = anon (maybe ("sexp" %: string)) 106 | and remove_duplicate_outputs = remove_duplicates_flag 107 | and drop = drop_flag in 108 | if drop && Option.is_some remove_duplicate_outputs 109 | then failwith "Cannot specify -remove-dupes with -drop"; 110 | fun () -> 111 | let maybe_remove_duplicate_outputs = 112 | Option.value remove_duplicate_outputs ~default:Fn.id 113 | in 114 | let sexp_pipe = 115 | match maybe_sexp_string with 116 | | None -> Reader.read_sexps (Lazy.force Reader.stdin) 117 | | Some x -> Pipe.singleton (Sexp.of_string x) 118 | in 119 | let process_fn = 120 | if drop 121 | then `drop (unstage (Sexp_select.deselect_staged program)) 122 | else `select (unstage (Sexp_select.select_staged program)) 123 | in 124 | Pipe.iter_without_pushback sexp_pipe ~f:(fun sexp -> 125 | match process_fn with 126 | | `select select_fn -> 127 | List.iter 128 | (maybe_remove_duplicate_outputs (select_fn sexp)) 129 | ~f:(fun answer -> printf "%s\n%!" (sexp_to_string answer)) 130 | | `drop drop_fn -> 131 | (match drop_fn sexp with 132 | | None -> () 133 | | Some sexp -> printf "%s\n%!" (sexp_to_string sexp)))) 134 | ;; 135 | 136 | let multi_command = 137 | Command.async 138 | ~summary: 139 | "like [sexp select], but allowing multiple programs to be passed, and grouping \ 140 | together output from each input sexp" 141 | (let%map_open.Command () = readme_flag () 142 | and labeled = 143 | flag "labeled" no_arg ~doc:" label each match with the PROGRAM that matched it" 144 | and sexp_to_string = mach_flag 145 | and remove_duplicate_outputs = remove_duplicates_flag 146 | and drop = drop_flag 147 | and programs = 148 | map 149 | ~f:(fun (x, xs) -> x :: xs) 150 | (anon (non_empty_sequence_as_pair ("program" %: string))) 151 | in 152 | if drop && Option.is_some remove_duplicate_outputs 153 | then failwith "Cannot specify -remove-dupes with -drop"; 154 | if drop && labeled then failwith "Cannot specify -labeled with -drop"; 155 | fun () -> 156 | let process_fn = 157 | if drop 158 | then ( 159 | let programs = 160 | List.map programs ~f:(fun program -> 161 | unstage (Sexp_select.deselect_staged program)) 162 | in 163 | `drop 164 | (fun sexp -> 165 | List.fold_until 166 | programs 167 | ~init:sexp 168 | ~f:(fun sexp f -> 169 | match f sexp with 170 | | None -> Stop None 171 | | Some sexp -> Continue sexp) 172 | ~finish:(fun sexp -> Some sexp))) 173 | else ( 174 | let maybe_remove_duplicate_outputs = 175 | Option.value remove_duplicate_outputs ~default:Fn.id 176 | in 177 | let programs = 178 | List.map programs ~f:(fun program -> 179 | program, unstage (Sexp_select.select_staged program)) 180 | in 181 | `select 182 | (fun sexp -> 183 | List.concat_map programs ~f:(fun (program, select_fn) -> 184 | List.map 185 | (maybe_remove_duplicate_outputs (select_fn sexp)) 186 | ~f:(fun answer -> 187 | if labeled 188 | then [%sexp_of: string * Sexp.t] (program, answer) 189 | else answer)))) 190 | in 191 | Reader.read_sexps (Lazy.force Reader.stdin) 192 | |> Pipe.iter_without_pushback ~f:(fun sexp -> 193 | match process_fn with 194 | | `select select_fn -> 195 | (match select_fn sexp with 196 | | [] -> () 197 | | sexps -> printf "%s\n%!" (sexp_to_string ([%sexp_of: Sexp.t list] sexps))) 198 | | `drop drop_fn -> 199 | (match drop_fn sexp with 200 | | None -> () 201 | | Some sexp -> printf "%s\n%!" (sexp_to_string sexp)))) 202 | ;; 203 | -------------------------------------------------------------------------------- /src/main_select.mli: -------------------------------------------------------------------------------- 1 | open Core 2 | 3 | val command : Command.t 4 | val multi_command : Command.t 5 | -------------------------------------------------------------------------------- /src/main_sexpify.ml: -------------------------------------------------------------------------------- 1 | open Core 2 | 3 | let main () = 4 | let buf_size = 1024 in 5 | let buf = Bytes.create buf_size in 6 | let idx = ref 0 in 7 | let n = ref 0 in 8 | let next_char_no_check () = 9 | let c = Bytes.get buf !idx in 10 | incr idx; 11 | Some c 12 | in 13 | let next_char () = 14 | if !idx >= !n 15 | then ( 16 | idx := 0; 17 | n := In_channel.input In_channel.stdin ~buf ~pos:0 ~len:buf_size; 18 | if !n <= 0 then None else next_char_no_check ()) 19 | else next_char_no_check () 20 | in 21 | let read = unstage (Sexp_app.Parse_everything.read_of_next_char ~next_char) in 22 | let rec loop () = 23 | match read () with 24 | | `Eof -> () 25 | | `Ok s -> 26 | print_string s; 27 | loop () 28 | in 29 | loop () 30 | ;; 31 | 32 | let command = 33 | let readme () = 34 | String.strip 35 | {| 36 | Make a best effort to transform a string into something that will parse as a 37 | sexp, preserving the sexp structure of any parts of the string that already look like a 38 | sexp. Transforms things that would be comments into actual data in the sexp as well. 39 | |} 40 | in 41 | Command.basic 42 | ~summary:"Sexpify an arbitrary string received via stdin." 43 | ~readme 44 | (let%map_open.Command () = return () in 45 | fun () -> main ()) 46 | ;; 47 | -------------------------------------------------------------------------------- /src/main_sexpify.mli: -------------------------------------------------------------------------------- 1 | open! Core 2 | 3 | val command : Command.t 4 | -------------------------------------------------------------------------------- /src/main_sort.mli: -------------------------------------------------------------------------------- 1 | open! Core 2 | 3 | val command : Command.t 4 | -------------------------------------------------------------------------------- /src/main_to_csv.ml: -------------------------------------------------------------------------------- 1 | open Core 2 | open Sexp_app 3 | 4 | let myprotect f x = 5 | try Some (f x) with 6 | | _ -> None 7 | ;; 8 | 9 | let main ~in_channel ~two_pass_processing ~view_atoms_as_strings ~delimiter:sep = 10 | let sexps = 11 | let channel = Lexing.from_channel in_channel in 12 | Lazy_list.build ~seed:() ~f:(fun () -> 13 | match myprotect Sexp.scan_sexp channel with 14 | | None -> None 15 | | Some sexp -> Some (sexp, ())) 16 | in 17 | Csv_file.write 18 | Out_channel.stdout 19 | (To_csv.csv_of_sexp ~two_pass_processing ~view_atoms_as_strings sexps) 20 | ~sep 21 | ;; 22 | 23 | let command = 24 | Command.basic 25 | ~summary: 26 | "Converts a list of record s-expressions from stdin or a file into CSV format." 27 | ~readme:(fun () -> 28 | {| 29 | Example 30 | 31 | COMMAND 32 | 33 | sexp to-csv OUTPUT 34 | 35 | INPUT 36 | 37 | ((foo a) (bar 2) (baz 8)) 38 | ((foo b) (bar 3) (baz 88)) 39 | ((foo c) (bar 4) (baz 888)) 40 | 41 | OUTPUT 42 | 43 | foo,bar,baz 44 | a,2,8 45 | b,3,88 46 | c,4,888 47 | |}) 48 | (let%map_open.Command view_atoms_as_sexps = 49 | flag 50 | "atoms-as-sexps" 51 | no_arg 52 | ~doc: 53 | " when the extracted CSV value is an atom, dump it as a string rather than as \ 54 | an s-expression (Note: this causes atoms with embedded whitespace to show up \ 55 | triple-quoted)" 56 | and two_pass_processing = 57 | flag 58 | "two-pass-processing" 59 | no_arg 60 | ~doc: 61 | " Uses one pass the gather all columns names for the header and a second to \ 62 | generate the rows. This ensures that no data will be discarded from any \ 63 | record, but requires more memory. Without this option, the header is \ 64 | generated solely by the first record. Any field not found in the first \ 65 | record---but found in later records---will be dropped." 66 | and delimiter = 67 | flag 68 | "delimiter" 69 | (optional_with_default ',' char) 70 | ~doc:(sprintf "CHAR use this delimiter instead of ','") 71 | and in_channel = Shared_params.channel_stdin_or_anon_file in 72 | fun () -> 73 | main 74 | ~in_channel 75 | ~delimiter 76 | ~two_pass_processing 77 | ~view_atoms_as_strings:(not view_atoms_as_sexps)) 78 | ;; 79 | -------------------------------------------------------------------------------- /src/main_to_csv.mli: -------------------------------------------------------------------------------- 1 | open! Core 2 | 3 | val command : Command.t 4 | -------------------------------------------------------------------------------- /src/main_validate.ml: -------------------------------------------------------------------------------- 1 | open Core 2 | 3 | let execute query = 4 | try Query.execute query with 5 | | _ -> exit 1 6 | ;; 7 | 8 | (* preserve failure, but silently *) 9 | 10 | let command = 11 | Command.basic 12 | ~summary:"validate s-expressions from stdin or one or more files" 13 | (let%map_open.Command files = anon (sequence ("FILE" %: Filename_unix.arg_type)) in 14 | fun () -> 15 | let perform_query sexp_ext ~on_result = 16 | let lazy_results = Sexp_app.Semantics.query' Sexp_app.Syntax.This sexp_ext in 17 | Lazy_list.iter lazy_results ~f:on_result 18 | in 19 | execute 20 | { inputs = 21 | (match files with 22 | | [] -> Located.stdin None 23 | | files -> Located.files files) 24 | ; output_mode = Query.Silent 25 | ; allow_empty_output = true 26 | ; group = false 27 | ; machine = true 28 | ; labeled = false 29 | ; fail_on_parse_error = true 30 | ; perform_query 31 | }) 32 | ;; 33 | -------------------------------------------------------------------------------- /src/main_validate.mli: -------------------------------------------------------------------------------- 1 | open! Core 2 | 3 | val command : Command.t 4 | -------------------------------------------------------------------------------- /src/pat_query.ml: -------------------------------------------------------------------------------- 1 | open! Core 2 | 3 | (* We want to support sorting by multiple captured values, e.g., with -pat-query "{ 4 | (field1 %0) (field2 %1) }", so we want to use the List output method. If there's only 5 | one capture, we'll use Single_capture. (This matches the behavior of 6 | default_output_method, which uses List when there are any numbered captures, or 7 | multiple unlabeled captures, and Single_capture when there's just a single unlabeled 8 | capture.) Using Single_capture when possible will avoid allocating an extra Sexp.List 9 | for every capture. 10 | 11 | For wrap_mode, the pat-query documentation explains the different options by 12 | showing what happens when "(a %[.*])" matches "(a)", "(a b)", and "(a b c)": 13 | 14 | Unwrap_always: 15 | (a) -> (* *) 16 | (a b) -> (* b *) 17 | (a b c) -> (* b c *) 18 | 19 | Wrap_non_singletons: 20 | (a) -> (* () *) 21 | (a b) -> (* b *) 22 | (a b c) -> (* (b c) *) 23 | 24 | Wrap_always: 25 | (a) -> (* () *) 26 | (a b) -> (* (b) *) 27 | (a b c) -> (* (b c) *) 28 | 29 | We definitely don't want Wrap_non_singletons, becuase that would cause "(a b)" 30 | (extracted key = "b") to be sorted before "(a)" (extracted key = "()"), but I'm not 31 | sure if there's a difference between Unwrap_always and Wrap_always. 32 | 33 | Using Unwrap_always can cause different inputs to produce the same keys in a way that 34 | is probably not what the user intended: 35 | 36 | $ echo "((a 1)(b 5 3 4)) ((a 1 2)(b 3 4))" > wrap.sexp 37 | $ cat wrap.sexp | sexp pat-query "{ (a %0=[.*]) (b %1=[.*]) }" # Wrap_non_singletons 38 | (1 (5 3 4)) 39 | ((1 2) (3 4)) 40 | $ cat wrap.sexp | sexp pat-query "{ (a %0=[.*]) (b %1=[.*]) }" -wrap # Wrap_always 41 | ((1) (5 3 4)) 42 | ((1 2) (3 4)) 43 | $ cat wrap.sexp | sexp pat-query "{ (a %0=[.*]) (b %1=[.*]) }" -unwrap # Unwrap_always 44 | (1 5 3 4) 45 | (1 2 3 4) 46 | 47 | If we were to sort the input sexps by the output of the pat-query, we would get 48 | different results depending on if we used Wrap_always or Unwrap_always. Looking at 49 | the pattern, very likely we want to sort by the tuple "(, 50 | )", which corresponds to the behavior of Wrap_always. 51 | *) 52 | 53 | let single_capture_output_method : Sexp.t Sexp_app_pattern.Output_method.t = 54 | Single_capture Wrap_always 55 | ;; 56 | 57 | let multi_or_numbered_capture_output_method : Sexp.t Sexp_app_pattern.Output_method.t = 58 | List Wrap_always 59 | ;; 60 | 61 | let run (query : Sexp_app_pattern.Query.t) : (Sexp.t -> Sexp.t list) Staged.t = 62 | let ({ num_number_captures; num_unlabeled_captures; _ } 63 | : Sexp_app_pattern.Query.Capture_count.t) 64 | = 65 | Sexp_app_pattern.Query.count_captures query 66 | in 67 | let output_method = 68 | if num_number_captures = 0 && num_unlabeled_captures = 1 69 | then single_capture_output_method 70 | else multi_or_numbered_capture_output_method 71 | in 72 | stage (fun sexp -> 73 | let keys = ref [] in 74 | Sexp_app_pattern.Engine.iter_matches ~query ~output_method sexp ~f:(fun sexp -> 75 | keys := sexp :: !keys); 76 | List.rev !keys) 77 | ;; 78 | -------------------------------------------------------------------------------- /src/pat_query.mli: -------------------------------------------------------------------------------- 1 | (** using [sexp pat-query] style queries to extract keys for sorting *) 2 | 3 | open! Core 4 | 5 | val run : Sexp_app_pattern.Query.t -> (Sexp.t -> Sexp.t list) Staged.t 6 | -------------------------------------------------------------------------------- /src/query.ml: -------------------------------------------------------------------------------- 1 | open Core 2 | open Sexp_app 3 | 4 | type output_mode = 5 | | Sexp 6 | | Count 7 | | Silent 8 | 9 | type t = 10 | { inputs : unit Located.t 11 | ; output_mode : output_mode 12 | ; allow_empty_output : bool 13 | ; labeled : bool 14 | ; group : bool 15 | ; machine : bool 16 | ; fail_on_parse_error : bool 17 | ; perform_query : Sexp_ext.t -> on_result:(Sexp.t -> unit) -> unit 18 | } 19 | 20 | type parameters = t 21 | 22 | let singleton x = Lazy_list.cons x (Lazy_list.empty ()) 23 | 24 | let scan lexbuf ~fail_on_parse_error = 25 | Lazy_list.build ~seed:() ~f:(fun () -> 26 | try 27 | match Sexp.scan_sexp_opt lexbuf with 28 | | None -> None 29 | | Some sexp -> Some (Sexp_ext.t_of_sexp sexp, ()) 30 | with 31 | | _ignored_exn when not fail_on_parse_error -> None) 32 | ;; 33 | 34 | module Transform : sig 35 | type t 36 | 37 | val make : parameters -> t 38 | val initialize_source : t -> string -> Sexp.t -> unit 39 | val finalize_source : t -> string -> unit 40 | val finalize_all : t -> unit 41 | val any_output : t -> bool 42 | end = struct 43 | type t = 44 | { initialize_source : string -> Sexp.t -> unit 45 | ; process_sexp : Sexp.t -> unit 46 | ; finalize_source : string -> unit 47 | ; finalize_all : unit -> unit 48 | ; any_output : bool ref 49 | } 50 | 51 | let initialize_source t label = t.initialize_source label 52 | let finalize_source t label = t.finalize_source label 53 | let finalize_all t = t.finalize_all () 54 | let any_output t = t.any_output.contents 55 | let with_label label sexp = Sexp.List [ Sexp.Atom label; sexp ] 56 | 57 | let make_count t ~f = 58 | let count = ref 0 in 59 | let process_sexp _sexp = incr count in 60 | if t.labeled 61 | then ( 62 | let finalize_source label = 63 | let sexp = with_label label (Int.sexp_of_t !count) in 64 | count := 0; 65 | f sexp 66 | in 67 | let finalize_all = ignore in 68 | process_sexp, finalize_source, finalize_all) 69 | else ( 70 | let finalize_source = ignore in 71 | let finalize_all () = 72 | let count = Int.sexp_of_t !count in 73 | f count 74 | in 75 | process_sexp, finalize_source, finalize_all) 76 | ;; 77 | 78 | let make t = 79 | let any_output = ref t.allow_empty_output in 80 | let sexp_output = if t.machine then Sexp.output else Sexp.output_hum in 81 | let process_sexp sexp = 82 | sexp_output stdout sexp; 83 | print_endline "" 84 | in 85 | let process_output ~f = 86 | if t.allow_empty_output 87 | then f 88 | else 89 | fun sexp -> 90 | f sexp; 91 | any_output := true 92 | in 93 | let process_sexp, finalize_source, finalize_all = 94 | match t.output_mode with 95 | | Sexp -> process_output ~f:process_sexp, ignore, ignore 96 | | Count -> make_count t ~f:(process_output ~f:process_sexp) 97 | | Silent -> process_output ~f:ignore, ignore, ignore 98 | in 99 | let initialize_source = 100 | if t.labeled 101 | then (fun label -> 102 | (); 103 | fun sexp -> process_sexp (with_label label sexp)) 104 | else fun _ -> process_sexp 105 | in 106 | { initialize_source; process_sexp; finalize_source; finalize_all; any_output } 107 | ;; 108 | end 109 | 110 | let execute t = 111 | let transform = Transform.make t in 112 | let channels = Located.channels t.inputs in 113 | let input = 114 | Located.map channels ~f:(fun chan -> 115 | let sexps = 116 | scan (Lexing.from_channel chan) ~fail_on_parse_error:t.fail_on_parse_error 117 | in 118 | if t.group then singleton (Sexp_ext.List sexps) else sexps) 119 | in 120 | let iter_source label sexps = 121 | let process_sexp = Transform.initialize_source transform label in 122 | Lazy_list.iter sexps ~f:(fun sexp -> t.perform_query sexp ~on_result:process_sexp); 123 | Transform.finalize_source transform label 124 | in 125 | Located.iter input ~f:iter_source; 126 | Transform.finalize_all transform; 127 | exit (if Transform.any_output transform then 0 else 1) 128 | ;; 129 | -------------------------------------------------------------------------------- /src/query.mli: -------------------------------------------------------------------------------- 1 | open! Core 2 | open Sexp_app 3 | 4 | type output_mode = 5 | | Sexp 6 | | Count 7 | | Silent 8 | 9 | type t = 10 | { inputs : unit Located.t 11 | ; output_mode : output_mode 12 | ; allow_empty_output : bool 13 | ; labeled : bool 14 | ; group : bool 15 | ; machine : bool 16 | ; fail_on_parse_error : bool 17 | ; perform_query : Sexp_ext.t -> on_result:(Sexp.t -> unit) -> unit 18 | } 19 | 20 | val execute : t -> unit 21 | -------------------------------------------------------------------------------- /src/quine.ml: -------------------------------------------------------------------------------- 1 | open Core 2 | open Sexp_app 3 | 4 | let prgm = 5 | String.strip 6 | {| 7 | (pipe 8 | (quote ( 9 | quote (pipe (unquote (wrap (cat (quote quote) this))) (unquote this)))) 10 | (quote (pipe (unquote (wrap (cat (quote quote) this))) (unquote this)))) 11 | |} 12 | ;; 13 | 14 | let prgm_sexp = Sexp.of_string prgm 15 | let prgm = Syntax.Query.t_of_sexp prgm_sexp 16 | 17 | let show chan = 18 | Sexp.output_hum chan prgm_sexp; 19 | print_endline "" 20 | ;; 21 | -------------------------------------------------------------------------------- /src/quine.mli: -------------------------------------------------------------------------------- 1 | open Sexp_app 2 | 3 | val prgm : Syntax.query 4 | val show : out_channel -> unit 5 | -------------------------------------------------------------------------------- /src/readme.mli: -------------------------------------------------------------------------------- 1 | val change_by_example_dot_md : string 2 | val change_semantics_dot_md : string 3 | val query_by_example_dot_md : string 4 | val query_semantics_dot_md : string 5 | -------------------------------------------------------------------------------- /src/sexp_cmds.ml: -------------------------------------------------------------------------------- 1 | open Core 2 | module Main_group = Main_group 3 | 4 | let summary = "the s-expression toolkit" 5 | 6 | let command = 7 | Command.group 8 | ~summary 9 | ([ "assemble", Main_parts.assemble_command 10 | ; "atom", Main_atom.command 11 | ; "change", Main_query.change_command 12 | ; "rewrite", Main_query.rewrite_command 13 | ; "diff", Main_diff.diff_command 14 | ; "flatten", Main_parts.flatten_command 15 | ; "fzf", Main_fzf.command 16 | ; "get", Main_get.command 17 | ; "multi-select", Main_select.multi_command 18 | ; "of-json", Main_json.of_json_command 19 | ; "of-xml", Main_of_xml.command 20 | ; "pat-change", Main_pattern.pat_change_command 21 | ; "pat-query", Main_pattern.pat_query_command 22 | ; "patch", Main_diff.patch_command 23 | ; "pp", Main_pp.command ~alias_for:"[sexp pretty]" () 24 | ; "pretty", Main_pp.command () 25 | ; "print", Main_print.command 26 | ; "query", Main_query.query_command 27 | ; "resolve-macros", Main_resolve_macros.command 28 | ; "restructure", Main_restructure.command 29 | ; "select", Main_select.command 30 | ; "sexpify", Main_sexpify.command 31 | ; "sort", Main_sort.command 32 | ; "to-csv", Main_to_csv.command 33 | ; "to-json", Main_json.to_json_command 34 | ; "validate", Main_validate.command 35 | ] 36 | @ if am_running_test then [ "group", Main_group.command ] else []) 37 | ;; 38 | -------------------------------------------------------------------------------- /src/sexp_cmds.mli: -------------------------------------------------------------------------------- 1 | open Async 2 | module Main_group = Main_group 3 | 4 | val command : Command.t 5 | -------------------------------------------------------------------------------- /src/shared_params.ml: -------------------------------------------------------------------------------- 1 | open! Core 2 | 3 | module Query_args = struct 4 | type t = 5 | { output_mode : Query.output_mode 6 | ; allow_empty_output : bool 7 | ; labeled : bool option 8 | } 9 | end 10 | 11 | let query_args = 12 | let%map_open.Command output_mode = 13 | let%map_open.Command quiet = 14 | flag 15 | "quiet" 16 | no_arg 17 | ~doc:" Produce no output (useful when running for exit status alone)" 18 | and count = flag "count" no_arg ~doc:" Produce only a count of returned sexps" in 19 | match quiet, count with 20 | | true, false -> Query.Silent 21 | | false, true -> Query.Count 22 | | false, false -> Query.Sexp 23 | | true, true -> failwith "can't pass both -quiet and -count" 24 | and allow_empty_output = 25 | flag "allow-empty-output" no_arg ~doc:" Do not fail even if no match is found" 26 | and labeled = 27 | let%map_open.Command label = 28 | flag "label" no_arg ~doc:" pair with filenames (override default behavior)" 29 | and no_label = 30 | flag 31 | "no-label" 32 | no_arg 33 | ~doc:" do not pair with filenames (override default behavior)" 34 | in 35 | match label, no_label with 36 | | true, false -> Some true 37 | | false, true -> Some false 38 | | false, false -> None 39 | | true, true -> failwith "can't pass both -label and -no-label flags" 40 | in 41 | { Query_args.output_mode; allow_empty_output; labeled } 42 | ;; 43 | 44 | module Machine_and_fail_on_parse_error = struct 45 | type t = 46 | { machine : bool 47 | ; fail_on_parse_error : bool 48 | } 49 | end 50 | 51 | let machine = 52 | let open Command.Param in 53 | flag "machine" no_arg ~doc:" Use machine style for output (one sexp per line)" 54 | ;; 55 | 56 | let machine_and_fail_on_parse_error = 57 | let%map_open.Command machine 58 | and fail_on_parse_error = 59 | flag 60 | "fail-on-parse-error" 61 | no_arg 62 | ~doc:" raise exception on bad input (override default behavior)" 63 | in 64 | { Machine_and_fail_on_parse_error.machine; fail_on_parse_error } 65 | ;; 66 | 67 | let channel_stdin_or_anon_file = 68 | let open Command.Param in 69 | match%map.Command anon (maybe ("FILE" %: Filename_unix.arg_type)) with 70 | | None -> Stdlib.stdin 71 | | Some file -> In_channel.create file 72 | ;; 73 | -------------------------------------------------------------------------------- /src/shared_params.mli: -------------------------------------------------------------------------------- 1 | open! Core 2 | 3 | module Query_args : sig 4 | type t = 5 | { output_mode : Query.output_mode 6 | ; allow_empty_output : bool 7 | ; labeled : bool option 8 | } 9 | end 10 | 11 | val query_args : Query_args.t Command.Param.t 12 | 13 | module Machine_and_fail_on_parse_error : sig 14 | type t = 15 | { machine : bool 16 | ; fail_on_parse_error : bool 17 | } 18 | end 19 | 20 | val machine_and_fail_on_parse_error : Machine_and_fail_on_parse_error.t Command.Param.t 21 | val machine : bool Command.Param.t 22 | val channel_stdin_or_anon_file : In_channel.t Command.Param.t 23 | --------------------------------------------------------------------------------