├── .gitignore ├── .ocamlformat ├── CHANGES.md ├── CONTRIBUTING.md ├── LICENSE.md ├── Makefile ├── README.mdx ├── dune ├── dune-project ├── ppx_string.opam ├── runtime ├── dune ├── ppx_string_runtime.ml ├── ppx_string_runtime.mli └── ppx_string_runtime_intf.ml └── src ├── dune ├── ppx_string.ml ├── ppx_string.mli └── ppx_string_intf.ml /.gitignore: -------------------------------------------------------------------------------- 1 | _build 2 | *.install 3 | *.merlin 4 | _opam 5 | 6 | -------------------------------------------------------------------------------- /.ocamlformat: -------------------------------------------------------------------------------- 1 | profile=janestreet 2 | -------------------------------------------------------------------------------- /CHANGES.md: -------------------------------------------------------------------------------- 1 | ## Release v0.17.0 2 | 3 | * Refactor codebase to support more general kinds of interpolation. 4 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | This repository contains open source software that is developed and 2 | maintained by [Jane Street][js]. 3 | 4 | Contributions to this project are welcome and should be submitted via 5 | GitHub pull requests. 6 | 7 | Signing contributions 8 | --------------------- 9 | 10 | We require that you sign your contributions. Your signature certifies 11 | that you wrote the patch or otherwise have the right to pass it on as 12 | an open-source patch. The rules are pretty simple: if you can certify 13 | the below (from [developercertificate.org][dco]): 14 | 15 | ``` 16 | Developer Certificate of Origin 17 | Version 1.1 18 | 19 | Copyright (C) 2004, 2006 The Linux Foundation and its contributors. 20 | 1 Letterman Drive 21 | Suite D4700 22 | San Francisco, CA, 94129 23 | 24 | Everyone is permitted to copy and distribute verbatim copies of this 25 | license document, but changing it is not allowed. 26 | 27 | 28 | Developer's Certificate of Origin 1.1 29 | 30 | By making a contribution to this project, I certify that: 31 | 32 | (a) The contribution was created in whole or in part by me and I 33 | have the right to submit it under the open source license 34 | indicated in the file; or 35 | 36 | (b) The contribution is based upon previous work that, to the best 37 | of my knowledge, is covered under an appropriate open source 38 | license and I have the right under that license to submit that 39 | work with modifications, whether created in whole or in part 40 | by me, under the same open source license (unless I am 41 | permitted to submit under a different license), as indicated 42 | in the file; or 43 | 44 | (c) The contribution was provided directly to me by some other 45 | person who certified (a), (b) or (c) and I have not modified 46 | it. 47 | 48 | (d) I understand and agree that this project and the contribution 49 | are public and that a record of the contribution (including all 50 | personal information I submit with it, including my sign-off) is 51 | maintained indefinitely and may be redistributed consistent with 52 | this project or the open source license(s) involved. 53 | ``` 54 | 55 | Then you just add a line to every git commit message: 56 | 57 | ``` 58 | Signed-off-by: Joe Smith 59 | ``` 60 | 61 | Use your real name (sorry, no pseudonyms or anonymous contributions.) 62 | 63 | If you set your `user.name` and `user.email` git configs, you can sign 64 | your commit automatically with git commit -s. 65 | 66 | [dco]: http://developercertificate.org/ 67 | [js]: https://opensource.janestreet.com/ 68 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | The MIT License 2 | 3 | Copyright (c) 2020--2025 Jane Street Group, LLC 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | INSTALL_ARGS := $(if $(PREFIX),--prefix $(PREFIX),) 2 | 3 | default: 4 | dune build 5 | 6 | install: 7 | dune install $(INSTALL_ARGS) 8 | 9 | uninstall: 10 | dune uninstall $(INSTALL_ARGS) 11 | 12 | reinstall: uninstall install 13 | 14 | clean: 15 | dune clean 16 | 17 | .PHONY: default install uninstall reinstall clean 18 | -------------------------------------------------------------------------------- /README.mdx: -------------------------------------------------------------------------------- 1 | ppx_string 2 | ========== 3 | 4 | 9 | 10 | This extension provides a syntax for string interpolation. Here is an example of 11 | its features: 12 | 13 | ```ocaml 14 | let script_remotely (user : string) (host : string) (port : int) (script : string) = 15 | [%string "ssh %{user}@%{host} -p %{port#Int} %{Sys.quote script}"] 16 | ;; 17 | ``` 18 | 19 | ```ocaml 20 | # script_remotely "jane-doe" "workstation-1" 22 {|echo "use ppx_string to interpolate"|} 21 | - : string = 22 | "ssh jane-doe@workstation-1 -p 22 'echo \"use ppx_string to interpolate\"'" 23 | ``` 24 | 25 | The above function is equivalent to: 26 | 27 | ```ocaml 28 | let script_remotely (user : string) (host : string) (port : int) (script : string) = 29 | String.concat 30 | [ "ssh " 31 | ; user 32 | ; "@" 33 | ; host 34 | ; " -p " 35 | ; Int.to_string port 36 | ; " " 37 | ; Sys.quote script 38 | ] 39 | ;; 40 | ``` 41 | 42 | `ppx_string` also works with the shorthand string extension syntax: 43 | 44 | ```ocaml 45 | let script_remotely (user : string) (host : string) (port : int) (script : string) = 46 | {%string|ssh %{user}@%{host} -p %{port#Int} %{Sys.quote script}|} 47 | ;; 48 | ``` 49 | 50 | Compared to `Printf.sprintf`: 51 | 52 | ```ocaml 53 | let script_remotely (user : string) (host : string) (port : int) (script : string) = 54 | sprintf "ssh %s@%s -p %d %s" user host port (Sys.quote script) 55 | ;; 56 | ``` 57 | 58 | Having the values inline instead of after the format string can make it easier 59 | to understand the resulting string, and avoids the potential mistake of passing 60 | arguments in the wrong order. This is truer the more format arguments there are. 61 | On the other hand, some things are much easier with printf: pad numbers with 62 | zeroes, pad strings on the right, display floats in a specific formats, etc. 63 | 64 | Compared to manually writing something like `String.concat` version above, 65 | `ppx_string` is shorter and can oftentimes be less error-prone (it's really easy 66 | to forget whitespace after `ssh` or around `-p` in the explicit `String.concat` 67 | version). 68 | 69 | ## Interpolation syntax 70 | 71 | 74 | 75 | | Syntax | Meaning | 76 | | --------------------- | -------------------------------------------------------------------------------- | 77 | | `%{expr}` | Directly insert the `string` expression `expr` | 78 | | `%{expr#Mod}` | Insert the result of converting `expr` to a `string` via `Mod.to_string` | 79 | | `%{expr#Mod:int_expr}`| Left-pad `Mod.to_string expr` to a width of at least `int_expr` | 80 | | `%{expr#:int_expr}` | Left-pad the `string` expression `expr` to a width of at least `int_expr` spaces | 81 | 82 | To emit the literal sequence `%{`, you can escape it as follows: 83 | 84 | ```ocaml 85 | # {%string|%{"%{"}|} 86 | - : string = "%{" 87 | ``` 88 | 89 | To pad strings with spaces on the left, add an integer expression after a colon: 90 | 91 | ```ocaml 92 | # let term_width = 60 in 93 | let items = 94 | [ "jane-doe", "workstation-1", 22, {|echo "use ppx_string to interpolate"|} 95 | ; "root", "workstation-1", 8080, {|echo "it can even pad"|} 96 | ] 97 | in 98 | List.map items ~f:(fun (col1, col2, col3, col4) -> 99 | {%string|%{col1#:term_width / 6}%{col2#:term_width/4}%{col3#Int:8} %{col4}|}) 100 | - : string list = 101 | [" jane-doe workstation-1 22 echo \"use ppx_string to interpolate\""; 102 | " root workstation-1 8080 echo \"it can even pad\""] 103 | ``` 104 | is equivalent to: 105 | 106 | ```ocaml 107 | # let pad str len = 108 | let pad_len = max 0 (len - String.length str) in 109 | let padding = String.make pad_len ' ' in 110 | padding ^ str 111 | in 112 | let term_width = 60 in 113 | let items = 114 | [ "jane-doe", "workstation-1", 22, {|echo "use ppx_string to interpolate"|} 115 | ; "root", "box-42", 8080, {|echo "it can even pad"|} 116 | ] 117 | in 118 | List.map items ~f:(fun (col1, col2, col3, col4) -> 119 | String.concat 120 | [ pad col1 (term_width / 6) 121 | ; pad col2 (term_width / 4) 122 | ; pad (Int.to_string col3) 8 123 | ; " " 124 | ; col4 125 | ]) 126 | - : string list = 127 | [" jane-doe workstation-1 22 echo \"use ppx_string to interpolate\""; 128 | " root box-42 8080 echo \"it can even pad\""] 129 | ``` 130 | (note that the pad length can be dynamic, as with the format string `"%*s"`) 131 | 132 | ## Interacting with and producing local strings 133 | 134 | `ppx_string` can consume `local` expressions in interpolated components: 135 | 136 | ```ocaml 137 | # let module Local_string = struct 138 | type t = { box : string } 139 | 140 | let to_string ({ box } @ local) = box 141 | end 142 | in 143 | let f (s : string @@ local) (ls : Local_string.t @@ local) = 144 | {%string|concatenated %{s} and boxed %{ls#Local_string}|} 145 | in 146 | f "a" { box = "b" } 147 | - : string = "concatenated a and boxed b" 148 | ``` 149 | 150 | The resulting concatenation is still allocated on the heap and available 151 | `@ global`: 152 | 153 | ```ocaml 154 | # let assert_global : 'a @ global -> 'a @ global = fun x -> x in 155 | let local_ s = "this input is local" in 156 | assert_global {%string|the result is global, even though %{s}|} 157 | - : string = "the result is global, even though this input is local" 158 | ``` 159 | 160 | It is safe to return the result of the concatenation globally even if 161 | it has local components because we anyways allocate a new string for 162 | the contents of the concatenation, effectively "globalizing" any component. 163 | Globalizing in this way only incurs an additional cost in the case 164 | where the contents of the `[%string]` are a single interpolated 165 | component: in this case we need to globalize the component, when we 166 | otherwise could have returned it directly. 167 | 168 | For example: 169 | 170 | ```ocaml 171 | # let local_ s = "this input is local" in 172 | {%string|%{s}|} 173 | - : string = "this input is local" 174 | ``` 175 | 176 | effectively translates to 177 | 178 | ```ocaml 179 | # let local_ s = "this input is local" in 180 | String.globalize s 181 | - : string = "this input is local" 182 | ``` 183 | 184 | rather than 185 | 186 | ```ocaml 187 | # let local_ s = "this input is local" in 188 | Fn.id s 189 | Line 2, characters 3-10: 190 | Error: This value escapes its region. 191 | ``` 192 | 193 | Before `ppx_string` supported `local` inputs, a singleton interpolated 194 | expression was simply returned without globalizing, as in the latter example. 195 | The legacy behavior --- where inputs to the concatenation are accepted 196 | `@ global` and singleton inputs are returned as-is, without 197 | globalizing --- is still available via the `[%string.global]` extension: 198 | 199 | ```ocaml 200 | # let s = "no globalization occurs (trust me)" in 201 | {%string.global|%{s}|} 202 | - : string = "no globalization occurs (trust me)" 203 | # let local_ s = "this input is local" in 204 | {%string.global|%{s}|} 205 | Line 2, characters 7-8: 206 | Error: This value escapes its region. 207 | ``` 208 | 209 | We also plan eventually to support constructing `local` strings with 210 | `ppx_string`. 211 | -------------------------------------------------------------------------------- /dune: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/janestreet/ppx_string/06e1adb7147fd4fd0d4b1a863026e33cf32bdbc6/dune -------------------------------------------------------------------------------- /dune-project: -------------------------------------------------------------------------------- 1 | (lang dune 3.17) 2 | -------------------------------------------------------------------------------- /ppx_string.opam: -------------------------------------------------------------------------------- 1 | opam-version: "2.0" 2 | maintainer: "Jane Street developers" 3 | authors: ["Jane Street Group, LLC"] 4 | homepage: "https://github.com/janestreet/ppx_string" 5 | bug-reports: "https://github.com/janestreet/ppx_string/issues" 6 | dev-repo: "git+https://github.com/janestreet/ppx_string.git" 7 | doc: "https://ocaml.janestreet.com/ocaml-core/latest/doc/ppx_string/index.html" 8 | license: "MIT" 9 | build: [ 10 | ["dune" "build" "-p" name "-j" jobs] 11 | ] 12 | depends: [ 13 | "ocaml" {>= "5.1.0"} 14 | "base" 15 | "ppx_base" 16 | "ppx_template" 17 | "dune" {>= "3.17.0"} 18 | "ppxlib" {>= "0.33.0" & < "0.36.0"} 19 | ] 20 | available: arch != "arm32" & arch != "x86_32" 21 | synopsis: "Ppx extension for string interpolation" 22 | description: " 23 | Part of the Jane Street's PPX rewriters collection. 24 | " 25 | -------------------------------------------------------------------------------- /runtime/dune: -------------------------------------------------------------------------------- 1 | (library 2 | (name ppx_string_runtime) 3 | (public_name ppx_string.runtime) 4 | (libraries base) 5 | (preprocess 6 | (pps ppx_template))) 7 | -------------------------------------------------------------------------------- /runtime/ppx_string_runtime.ml: -------------------------------------------------------------------------------- 1 | open! Base 2 | include Ppx_string_runtime_intf.Definitions 3 | 4 | [%%template 5 | (* The behavior of [finish_one] in the global and local versions of [For_string] below 6 | differ in a way that isn't actually compatible with allocation polymorphism. What's 7 | different between them is not {i where} we would like the result to be allocated, but 8 | {i whether} we would like to allocate at all. In other words: in order to ensure that a 9 | value is on the heap, we want to move it to the heap only if we don't know that it's on 10 | the heap already. 11 | 12 | While we might be hesitant to supply a function like [globalize_if_local] for 13 | templating over in client code, we use it here only to implement the runtime modules 14 | for two different ppxs, and they need not actually be mode- or allocation-polymorphic. 15 | So our use of [ppx_template] here is just a trick for convenience. 16 | *) 17 | let globalize_if_local t = t 18 | let[@mode local] globalize_if_local t = String.globalize t 19 | 20 | module For_string = struct 21 | let empty = "" 22 | let of_string t = t 23 | let convert t = t 24 | let concat list = String.concat ~sep:"" list 25 | let finish_one = (globalize_if_local [@mode m]) 26 | 27 | let pad t ~len = 28 | let n = String.length t in 29 | if n >= len then t else String.make (len - n) ' ' ^ t 30 | ;; 31 | end 32 | [@@mode m = (global, local)]] 33 | -------------------------------------------------------------------------------- /runtime/ppx_string_runtime.mli: -------------------------------------------------------------------------------- 1 | include Ppx_string_runtime_intf.Ppx_string_runtime 2 | -------------------------------------------------------------------------------- /runtime/ppx_string_runtime_intf.ml: -------------------------------------------------------------------------------- 1 | open! Base 2 | 3 | module Definitions = struct 4 | (** Signature for runtime implementations of Ppx_string's backend. 5 | 6 | May be used for derived ppxes using different types or modified behavior. Types [t], 7 | [conversion], and [length] should be erased using destructive substitution, i.e. 8 | [:=]. Otherwise they introduce new aliases for the types in question, and error 9 | messages or Merlin may start referring to them. *) 10 | module type%template S = sig 11 | (** Result type of interpolation, and of interpolated [%{values}]. *) 12 | type t 13 | 14 | (** Result type of %[{converted#String}] interpolated values. This will often be 15 | either [string] or [t], depending on what is convenient for the configured ppx. *) 16 | type conversion 17 | 18 | (** Type of length values for %[{padding#:8}]. *) 19 | type length 20 | 21 | (** Empty string. *) 22 | val empty : t 23 | 24 | (** Literal string. *) 25 | val of_string : string -> t 26 | 27 | (** Finish a conversion to [t]. *) 28 | val convert : conversion -> t 29 | 30 | (** Pad to some minimum length. *) 31 | val pad : t -> len:length -> t 32 | 33 | (** Combine multiple values in order. *) 34 | val concat : t list -> t 35 | 36 | (** Called in place of [concat] when there is just one component, an interpolated 37 | part, in the interpolation. This function has two purposes: 38 | 39 | 1. Ensure an argument has type [t] in expanded code without calling [concat] and 40 | without needing the type [t] to be exported explicitly for a type annotation. 41 | See note above about destructive substitution. 42 | 2. Globalize [t] when using the version of the interface designed for extensions 43 | that consume [local]s but produce [global] concatenation results. *) 44 | val finish_one : t -> t 45 | end 46 | [@@mode m = (global, local)] 47 | end 48 | 49 | module type%template Ppx_string_runtime = sig 50 | include module type of struct 51 | include Definitions 52 | end 53 | 54 | module For_string : 55 | S [@mode m] with type t := string and type length := int and type conversion := string 56 | [@@mode m = (global, local)] 57 | end 58 | -------------------------------------------------------------------------------- /src/dune: -------------------------------------------------------------------------------- 1 | (library 2 | (name ppx_string) 3 | (public_name ppx_string) 4 | (kind ppx_rewriter) 5 | (libraries base compiler-libs.common ppxlib) 6 | (ppx_runtime_libraries ppx_string.runtime) 7 | (preprocess 8 | (pps ppx_base ppxlib.metaquot ppxlib.traverse))) 9 | -------------------------------------------------------------------------------- /src/ppx_string.ml: -------------------------------------------------------------------------------- 1 | open Base 2 | open Ppxlib 3 | open Ast_builder.Default 4 | include Ppx_string_intf.Definitions 5 | 6 | module Where = struct 7 | type t = 8 | | Imprecise of Location.t 9 | | Precise of { mutable position : position } 10 | 11 | let is_precise = function 12 | | Imprecise _ -> false 13 | | Precise _ -> true 14 | ;; 15 | 16 | let advance position char = 17 | let pos_cnum = position.pos_cnum + 1 in 18 | match char with 19 | | '\n' -> 20 | { position with pos_lnum = position.pos_lnum + 1; pos_bol = pos_cnum; pos_cnum } 21 | | _ -> { position with pos_cnum } 22 | ;; 23 | 24 | let skip t string = 25 | match t with 26 | | Imprecise _ -> () 27 | | Precise at -> 28 | for pos = 0 to String.length string - 1 do 29 | at.position <- advance at.position string.[pos] 30 | done 31 | ;; 32 | 33 | let loc_start = function 34 | | Imprecise loc -> loc.loc_start 35 | | Precise { position } -> position 36 | ;; 37 | 38 | let loc_end = function 39 | | Imprecise loc -> loc.loc_end 40 | | Precise { position } -> position 41 | ;; 42 | 43 | let skip_with_loc t string = 44 | let loc_start = loc_start t in 45 | skip t string; 46 | let loc_end = loc_end t in 47 | { loc_ghost = true; loc_start; loc_end } 48 | ;; 49 | 50 | let has_escapes ~loc ~string ~delimiter = 51 | match delimiter with 52 | | Some _ -> false 53 | | None -> 54 | let unescaped_len = 1 + String.length string + 1 in 55 | let actual_len = loc.loc_end.pos_cnum - loc.loc_start.pos_cnum in 56 | unescaped_len <> actual_len 57 | ;; 58 | 59 | let literal_prefix ~delimiter = 60 | match delimiter with 61 | | None -> "\"" 62 | | Some id -> Printf.sprintf "{%s|" id 63 | ;; 64 | 65 | let create ~loc ~string ~delimiter ~preprocess_before_parsing = 66 | if Option.is_some preprocess_before_parsing || has_escapes ~loc ~string ~delimiter 67 | then Imprecise { loc with loc_ghost = true } 68 | else ( 69 | let t = Precise { position = loc.loc_start } in 70 | skip t (literal_prefix ~delimiter); 71 | t) 72 | ;; 73 | end 74 | 75 | let if_stack_allocating ~(config : Config.t) f expr = 76 | if config.assert_list_is_stack_allocated then f expr else expr 77 | ;; 78 | 79 | let dot id name = pexp_ident ~loc:id.loc { id with txt = Ldot (id.txt, name) } 80 | 81 | let config_expr ~(config : Config.t) ~loc name = 82 | dot { loc; txt = config.fully_qualified_runtime_module } name 83 | ;; 84 | 85 | let interpret 86 | ~(config : Config.t) 87 | ({ loc_start; value; module_path; pad_length; loc_end; interpreted_string = _ } : 88 | Part.Interpreted.t) 89 | = 90 | let loc = { loc_ghost = true; loc_start; loc_end } in 91 | let unpadded = 92 | match module_path with 93 | | None -> value 94 | | Some fn -> 95 | [%expr 96 | [%e config_expr ~config ~loc "convert"] 97 | ([%e dot fn config.conversion_function_name] [%e value])] 98 | in 99 | match pad_length with 100 | | None -> unpadded 101 | | Some len -> [%expr [%e config_expr ~config ~loc "pad"] [%e unpadded] ~len:[%e len]] 102 | ;; 103 | 104 | let parse_literal string ~where ~start ~until ~acc = 105 | if start >= until 106 | then acc 107 | else ( 108 | let literal = String.sub string ~pos:start ~len:(until - start) in 109 | let loc = Where.skip_with_loc where literal in 110 | Part.Literal { txt = literal; loc } :: acc) 111 | ;; 112 | 113 | let set_locs loc = 114 | object 115 | inherit Ast_traverse.map 116 | method! location _ = loc 117 | end 118 | ;; 119 | 120 | let parse_error ~loc ~name string = 121 | Location.raise_errorf ~loc "invalid %s: %S" name string 122 | ;; 123 | 124 | let parse_expression ~where ~loc ~name string = 125 | let lexbuf = Lexing.from_string string in 126 | lexbuf.lex_abs_pos <- loc.loc_start.pos_cnum; 127 | lexbuf.lex_curr_p <- loc.loc_start; 128 | match Parse.expression lexbuf with 129 | | exception _ -> parse_error ~loc ~name string 130 | | expr -> if Where.is_precise where then expr else (set_locs loc)#expression expr 131 | ;; 132 | 133 | let parse_ident ~where ~loc ~name module_path = 134 | match parse_expression ~where ~loc ~name module_path with 135 | | { pexp_desc = Pexp_construct (ident, None); _ } -> ident 136 | | _ -> parse_error ~loc ~name module_path 137 | ;; 138 | 139 | let parse_body ~where string = 140 | let loc = Where.skip_with_loc where string in 141 | parse_expression ~where ~loc ~name:"%{...} expression" string 142 | ;; 143 | 144 | let parse_module_path ~where string = 145 | let loc = Where.skip_with_loc where string in 146 | parse_ident ~where ~loc ~name:"%{...} module path" string 147 | ;; 148 | 149 | let parse_pad_length ~where string = 150 | let loc = Where.skip_with_loc where string in 151 | parse_expression ~where ~loc ~name:"%{...} pad length" string 152 | ;; 153 | 154 | let parse_interpreted string ~where ~start ~until ~acc = 155 | Where.skip where "%{"; 156 | let loc_start = Where.loc_start where in 157 | let string = String.sub string ~pos:start ~len:(until - start) in 158 | let value, module_path, pad_length = 159 | match String.rsplit2 string ~on:'#' with 160 | | None -> 161 | let value = parse_body ~where string in 162 | value, None, None 163 | | Some (body, formatting) -> 164 | let body = parse_body ~where body in 165 | Where.skip where "#"; 166 | let module_path, pad_length = 167 | match String.rsplit2 formatting ~on:':' with 168 | | None -> 169 | let fn = parse_module_path ~where formatting in 170 | Some fn, None 171 | | Some (module_path, pad_length) -> 172 | let fn = 173 | if String.is_empty module_path 174 | then None 175 | else Some (parse_module_path ~where module_path) 176 | in 177 | Where.skip where ":"; 178 | let len = parse_pad_length ~where pad_length in 179 | fn, Some len 180 | in 181 | body, module_path, pad_length 182 | in 183 | let loc_end = Where.loc_end where in 184 | Where.skip where "}"; 185 | Part.Interpreted 186 | { loc_start; value; module_path; pad_length; loc_end; interpreted_string = string } 187 | :: acc 188 | ;; 189 | 190 | type interpreted = 191 | { percent : int 192 | ; lbrace : int 193 | ; rbrace : int 194 | } 195 | 196 | let find_interpreted string ~where ~pos = 197 | String.substr_index string ~pos ~pattern:"%{" 198 | |> Option.map ~f:(fun percent -> 199 | let lbrace = percent + 1 in 200 | match String.substr_index string ~pos:(lbrace + 1) ~pattern:"}" with 201 | | None -> 202 | Where.skip where (String.sub string ~pos ~len:(percent - pos)); 203 | let loc = Where.skip_with_loc where "%{" in 204 | Location.raise_errorf ~loc "unterminated %%{" 205 | | Some rbrace -> { percent; lbrace; rbrace }) 206 | ;; 207 | 208 | let rec parse_from string ~where ~pos ~acc = 209 | match find_interpreted string ~where ~pos with 210 | | None -> 211 | let len = String.length string in 212 | let acc = parse_literal string ~where ~start:pos ~until:len ~acc in 213 | List.rev acc 214 | | Some { percent; lbrace; rbrace } -> 215 | let acc = parse_literal string ~where ~start:pos ~until:percent ~acc in 216 | let acc = parse_interpreted string ~where ~start:(lbrace + 1) ~until:rbrace ~acc in 217 | parse_from string ~where ~pos:(rbrace + 1) ~acc 218 | ;; 219 | 220 | let parse ~(config : Config.t) ~string_loc ~delimiter string = 221 | let preprocess_before_parsing = config.preprocess_before_parsing in 222 | let string = 223 | match preprocess_before_parsing with 224 | | None -> string 225 | | Some preprocess -> preprocess string 226 | in 227 | let where = 228 | Where.create ~loc:string_loc ~delimiter ~string ~preprocess_before_parsing 229 | in 230 | let parts = parse_from string ~where ~pos:0 ~acc:[] in 231 | let locations_are_precise = Where.is_precise where in 232 | ({ parts; locations_are_precise } : Parse_result.t) 233 | ;; 234 | 235 | let expand_part_to_expression ~config part = 236 | match (part : Part.t) with 237 | | Literal { txt; loc } -> 238 | [%expr [%e config_expr ~config ~loc "of_string"] [%e estring txt ~loc]] 239 | | Interpreted interpreted -> interpret ~config interpreted 240 | ;; 241 | 242 | let concatenate ~config ~loc (parts_and_exprs : (Part.t * expression) list) = 243 | let nontail_if_stack_allocating expr = 244 | if_stack_allocating ~config (fun e -> [%expr [%e e] [@nontail]]) expr 245 | in 246 | match parts_and_exprs with 247 | | [] -> [%expr [%e config_expr ~config ~loc "empty"]] 248 | | [ (part, expr) ] -> 249 | (match part with 250 | | Literal _ -> expr 251 | | Interpreted _ -> 252 | [%expr [%e config_expr ~config ~loc "finish_one"] [%e expr]] 253 | |> nontail_if_stack_allocating) 254 | | multiple -> 255 | [%expr 256 | [%e config_expr ~config ~loc "concat"] 257 | [%e 258 | elist ~loc (List.map ~f:snd multiple) 259 | |> if_stack_allocating ~config (fun e -> [%expr [%e e]])]] 260 | |> nontail_if_stack_allocating 261 | ;; 262 | 263 | let expand ~config ~expr_loc ~string_loc ~string ~delimiter = 264 | (parse ~config ~string_loc ~delimiter string).parts 265 | |> List.map ~f:(fun part -> part, expand_part_to_expression ~config part) 266 | |> concatenate ~config ~loc:expr_loc 267 | ;; 268 | 269 | let extension ~name ~(config : Config.t) = 270 | Extension.declare 271 | name 272 | Extension.Context.expression 273 | Ast_pattern.(pstr (pstr_eval (pexp_constant (pconst_string __' __ __)) nil ^:: nil)) 274 | (fun ~loc:expr_loc ~path:_ { loc = string_loc; txt = string } _ delimiter -> 275 | Merlin_helpers.hide_expression 276 | (expand ~config ~expr_loc ~string_loc ~string ~delimiter)) 277 | ;; 278 | 279 | let config_for_string ~local : Config.t = 280 | let suffix = if local then "__local" else "" in 281 | { fully_qualified_runtime_module = 282 | Ldot (Lident "Ppx_string_runtime", "For_string" ^ suffix) 283 | ; conversion_function_name = "to_string" 284 | ; preprocess_before_parsing = None 285 | ; assert_list_is_stack_allocated = local 286 | } 287 | ;; 288 | 289 | let () = 290 | Ppxlib.Driver.register_transformation 291 | "ppx_string" 292 | ~extensions: 293 | [ extension ~name:"ppx_string.string" ~config:(config_for_string ~local:true) 294 | ; extension 295 | ~name:"ppx_string.@string.global" 296 | ~config:(config_for_string ~local:false) 297 | ] 298 | ;; 299 | -------------------------------------------------------------------------------- /src/ppx_string.mli: -------------------------------------------------------------------------------- 1 | include Ppx_string_intf.Ppx_string 2 | -------------------------------------------------------------------------------- /src/ppx_string_intf.ml: -------------------------------------------------------------------------------- 1 | open! Base 2 | open Ppxlib 3 | 4 | module Definitions = struct 5 | (** Used to configure different instances of this ppx. May be used, for example, to add 6 | preprocessing, or to interpolate a different string-like type. *) 7 | module Config = struct 8 | type t = 9 | { fully_qualified_runtime_module : Longident.t 10 | (** Where to find an implementation of [Ppx_string_runtime.S]. The implementation of 11 | [[%string]] is at [Ldot (Lident "Ppx_string_runtime", "For_string")] *) 12 | ; conversion_function_name : string 13 | (** Conversion function implied by ["%{expr#Module}"], e.g. ["to_string"]. *) 14 | ; preprocess_before_parsing : (string -> string) option 15 | (** Preprocessing to apply before parsing the string for interpolation. If [None], 16 | source locations can be computed precisely based on the result of parsing. *) 17 | ; assert_list_is_stack_allocated : bool 18 | (** Whether to wrap the list passed to [concat] with [stack_] and add the related 19 | necessary [[@nontail]] annotations. *) 20 | } 21 | end 22 | 23 | module Part = struct 24 | module Interpreted = struct 25 | type t = 26 | { loc_start : position 27 | ; value : expression 28 | ; module_path : longident_loc option 29 | ; pad_length : expression option 30 | ; loc_end : position 31 | ; interpreted_string : string 32 | (** [interpreted_string] is the string of the interpreted part. (e.g. in the 33 | example %[{foo#Foo}], the string is "foo#Foo") *) 34 | } 35 | end 36 | 37 | type t = 38 | | Literal of string loc 39 | | Interpreted of Interpreted.t 40 | end 41 | 42 | module Parse_result = struct 43 | type t = 44 | { parts : Part.t list 45 | ; locations_are_precise : bool 46 | } 47 | end 48 | end 49 | 50 | module type Ppx_string = sig 51 | include module type of struct 52 | include Definitions 53 | end 54 | 55 | (** Parse a string to find interpolated substrings. *) 56 | val parse 57 | : config:Config.t 58 | -> string_loc:location 59 | -> delimiter:string option 60 | -> string 61 | -> Parse_result.t 62 | 63 | (** Interpret an interpolated string as an expression, including [%{conversions#String}] 64 | and [%{padding#:8}]. *) 65 | val interpret : config:Config.t -> Part.Interpreted.t -> expression 66 | 67 | (** Combines [parse], [interpret], and concatenation to expand an interpolated string to 68 | an expression implementing it. *) 69 | val expand 70 | : config:Config.t 71 | -> expr_loc:location 72 | -> string_loc:location 73 | -> string:string 74 | -> delimiter:string option 75 | -> expression 76 | 77 | (** Construct an [Extension.t] implementing the configured interpolation ppx. *) 78 | val extension : name:string -> config:Config.t -> Extension.t 79 | 80 | (** Configuration for [[%string]] family: string type and conversion type are [string], 81 | length type is [int], and no preprocessing. When [~local:true], the configuration 82 | for [[%string]], otherwise the configuration for [[%string.global]] *) 83 | val config_for_string : local:bool -> Config.t 84 | end 85 | --------------------------------------------------------------------------------