├── CHANGES.md ├── .merlin ├── .ocp-indent ├── .gitignore ├── pkg ├── META ├── build.ml ├── config.ml ├── git.ml ├── topkg-ext.ml └── topkg.ml ├── _tags ├── myocamlbuild.ml ├── opam ├── test └── test.ml ├── src └── ppx_utf8_lit.ml └── README.md /CHANGES.md: -------------------------------------------------------------------------------- 1 | # v0.0.0 YYYY-MM-DD Location 2 | -------------------------------------------------------------------------------- /.merlin: -------------------------------------------------------------------------------- 1 | PKG compiler-libs.common uutf uunf 2 | -------------------------------------------------------------------------------- /.ocp-indent: -------------------------------------------------------------------------------- 1 | strict_with=always,match_clause=4,strict_else=never -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | _build 2 | tmp 3 | *~ 4 | \#*# 5 | CLOCK.org 6 | *.byte 7 | *.native 8 | -------------------------------------------------------------------------------- /pkg/META: -------------------------------------------------------------------------------- 1 | description = "UTF-8 string literals and patterns for OCaml" 2 | version = "%%VERSION%%" 3 | ppx = "./ppx_utf8_lit" -------------------------------------------------------------------------------- /_tags: -------------------------------------------------------------------------------- 1 | true: bin_annot, debug 2 | : include 3 | : package(compiler-libs.common), \ 4 | package(uutf), package(uunf) 5 | 6 | : include 7 | : ppx_utf8_lit -------------------------------------------------------------------------------- /myocamlbuild.ml: -------------------------------------------------------------------------------- 1 | open Ocamlbuild_plugin 2 | 3 | let () = 4 | dispatch begin function 5 | | After_rules -> 6 | flag ["ocaml"; "compile"; "ppx_utf8_lit"] & 7 | S [A "-ppx"; A "src/ppx_utf8_lit.native"]; 8 | | _ -> () 9 | end 10 | -------------------------------------------------------------------------------- /pkg/build.ml: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ocaml 2 | #directory "pkg";; 3 | #use "topkg.ml";; 4 | 5 | let () = 6 | Pkg.describe "ppx_utf8_lit" ~builder:`OCamlbuild [ 7 | Pkg.lib "pkg/META"; 8 | Pkg.libexec ~auto:true "src/ppx_utf8_lit"; 9 | Pkg.doc "README.md"; 10 | Pkg.doc "CHANGES.md"; ] 11 | -------------------------------------------------------------------------------- /pkg/config.ml: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ocaml 2 | #directory "pkg" 3 | #use "topkg-ext.ml" 4 | 5 | module Config = struct 6 | include Config_default 7 | let vars = 8 | [ "NAME", "ppx_utf8_lit"; 9 | "VERSION", Git.describe ~chop_v:true "master"; 10 | "MAINTAINER", "Daniel Bünzli " ] 11 | end 12 | -------------------------------------------------------------------------------- /pkg/git.ml: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ocaml 2 | #directory "pkg" 3 | #use "config.ml" 4 | 5 | (* This is only for git checkout builds, it can be ignored 6 | for distribution builds. *) 7 | 8 | let () = 9 | if Dir.exists ".git" then begin 10 | Vars.subst ~skip:Config.subst_skip ~vars:Config.vars ~dir:"." 11 | >>& fun () -> Cmd.exec_hook Config.git_hook 12 | >>& fun () -> () 13 | end 14 | 15 | -------------------------------------------------------------------------------- /opam: -------------------------------------------------------------------------------- 1 | opam-version: "1.2" 2 | maintainer: "Daniel Bünzli " 3 | authors: ["Daniel Bünzli "] 4 | homepage: "http://erratique.ch/software/ppx_utf8_lit" 5 | doc: "http://erratique.ch/software/ppx_utf8_lit" 6 | dev-repo: "http://erratique.ch/repos/ppx_utf8_lit.git" 7 | bug-reports: "https://github.com/dbuenzli/ppx_utf8_lit/issues" 8 | tags: [ "ppx" "syntax" "unicode" "text" "normalization" "org:erratique" ] 9 | license: "BSD3" 10 | available: [ ocaml-version >= "4.02.0"] 11 | depends: [ "ocamlfind" "uutf" "uunf" ] 12 | build: 13 | [ 14 | [ "ocaml" "pkg/git.ml" ] 15 | [ "ocaml" "pkg/build.ml" "native=%{ocaml-native}%" 16 | "native-dynlink=%{ocaml-native-dynlink}%" ] 17 | ] 18 | -------------------------------------------------------------------------------- /test/test.ml: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2015 Daniel C. Bünzli. All rights reserved. 3 | Distributed under the BSD3 license, see license at the end of the file. 4 | %%NAME%% release %%VERSION%% 5 | ---------------------------------------------------------------------------*) 6 | 7 | let assert_lits () = 8 | assert ("Révolte"[@u] = "R\xC3\xA9volte"); 9 | assert ("Révolte"[@u.nfc] = "R\xC3\xA9volte"); 10 | assert ("Révolte"[@u.nfd] = "Re\xCC\x81volte"); 11 | assert ("fi"[@u.nfd] = "\xEF\xAC\x81"); 12 | assert ("fi"[@u.nfc] = "\xEF\xAC\x81"); 13 | assert ("fi"[@u.nfkd] = "fi"); 14 | assert ("fi"[@u.nfkc] = "fi"); 15 | () 16 | 17 | let assert_pats () = 18 | assert (match "Révolte"[@u] with "Révolte"[@u] -> true | _ -> false); 19 | assert (match "Révolte"[@u.nfc] with "Révolte"[@u.nfc] -> true | _ -> false); 20 | assert (match "Révolte"[@u.nfc] with "Révolte"[@u.nfd] -> false | _ -> true); 21 | assert (match "Révolte"[@u.nfd] with "Révolte"[@u.nfd] -> true | _ -> false); 22 | assert (match "Révolte"[@u.nfd] with "Révolte"[@u.nfc] -> false | _ -> true); 23 | assert (match "fi"[@u.nfc] with "fi"[@u.nfc] -> true | _ -> false ); 24 | assert (match "fi"[@u.nfc] with "fi"[@u.nfd] -> true | _ -> false ); 25 | assert (match "fi"[@u.nfc] with "fi"[@u.nfkd] -> false | _ -> true ); 26 | assert (match "fi"[@u.nfc] with "fi"[@u.nfkc] -> false | _ -> true ); 27 | assert (match "fi"[@u.nfd] with "fi"[@u.nfd] -> true | _ -> false ); 28 | assert (match "fi"[@u.nfkd] with "fi" -> true | _ -> false ); 29 | assert (match "fi"[@u.nfkc] with "fi" -> true | _ -> false ); 30 | () 31 | 32 | let () = 33 | assert_lits (); 34 | assert_pats (); 35 | () 36 | 37 | (*--------------------------------------------------------------------------- 38 | Copyright (c) 2015 Daniel C. Bünzli. 39 | All rights reserved. 40 | 41 | Redistribution and use in source and binary forms, with or without 42 | modification, are permitted provided that the following conditions 43 | are met: 44 | 45 | 1. Redistributions of source code must retain the above copyright 46 | notice, this list of conditions and the following disclaimer. 47 | 48 | 2. Redistributions in binary form must reproduce the above 49 | copyright notice, this list of conditions and the following 50 | disclaimer in the documentation and/or other materials provided 51 | with the distribution. 52 | 53 | 3. Neither the name of Daniel C. Bünzli nor the names of 54 | contributors may be used to endorse or promote products derived 55 | from this software without specific prior written permission. 56 | 57 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 58 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 59 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 60 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 61 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 62 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 63 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 64 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 65 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 66 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 67 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 68 | ---------------------------------------------------------------------------*) 69 | -------------------------------------------------------------------------------- /src/ppx_utf8_lit.ml: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2015 Daniel C. Bünzli. All rights reserved. 3 | Distributed under the BSD3 license, see license at the end of the file. 4 | %%NAME%% release %%VERSION%% 5 | ---------------------------------------------------------------------------*) 6 | 7 | open Ast_mapper 8 | open Ast_helper 9 | open Asttypes 10 | open Parsetree 11 | open Longident 12 | 13 | (* Errors *) 14 | 15 | let strf = Printf.sprintf 16 | let err_utf8 ~loc bytes = 17 | let err = strf "illegal bytes (%S) found in UTF-8 encoded string" bytes in 18 | raise (Location.Error (Location.error ~loc err)) 19 | 20 | (* UTF-8 validity check and normalization *) 21 | 22 | let utf8_normalize ~loc nf s = 23 | let b = Buffer.create (String.length s * 3) in 24 | let n = Uunf.create nf in 25 | let rec add v = match Uunf.add n v with 26 | | `Uchar u -> Uutf.Buffer.add_utf_8 b u; add `Await 27 | | `Await | `End -> () 28 | in 29 | let add_uchar _ _ = function 30 | | `Malformed bytes -> err_utf8 ~loc bytes 31 | | `Uchar _ as u -> add u 32 | in 33 | Uutf.String.fold_utf_8 add_uchar () s; add `End; Buffer.contents b 34 | 35 | (* AST mapper *) 36 | 37 | let uattr_to_nf = function 38 | | "u" | "u.nfc" -> `NFC 39 | | "u.nfd" -> `NFD 40 | | "u.nfkd" -> `NFKD 41 | | "u.nfkc" -> `NFKC 42 | | _ -> assert false 43 | 44 | let find_uattr attrs = 45 | let attrs = List.rev attrs (* last u[.*] attribute takes over *) in 46 | let rec loop = function 47 | | ({ txt = ("u"|"u.nfd"|"u.nfc"|"u.nfkd"|"u.nfkc" as a); _}, _) :: _ -> Some a 48 | | _ :: atts -> loop atts 49 | | [] -> None 50 | in 51 | loop attrs 52 | 53 | let expr m = function 54 | | { pexp_loc = loc; 55 | pexp_desc = Pexp_constant (Const_string (str, v)); 56 | pexp_attributes; } as e -> 57 | begin match find_uattr pexp_attributes with 58 | | None -> default_mapper.expr m e 59 | | Some uattr -> 60 | let ustr = utf8_normalize ~loc (uattr_to_nf uattr) str in 61 | let pexp_desc = Pexp_constant (Const_string (ustr, v)) in 62 | default_mapper.expr m { e with pexp_desc } 63 | end 64 | | e -> default_mapper.expr m e 65 | 66 | let pat m = function 67 | | { ppat_loc = loc; 68 | ppat_desc = Ppat_constant (Const_string (str, v)); 69 | ppat_attributes; } as p -> 70 | begin match find_uattr ppat_attributes with 71 | | None -> default_mapper.pat m p 72 | | Some uattr -> 73 | let ustr = utf8_normalize ~loc (uattr_to_nf uattr) str in 74 | let ppat_desc = Ppat_constant (Const_string (ustr, v)) in 75 | default_mapper.pat m { p with ppat_desc } 76 | end 77 | | p -> default_mapper.pat m p 78 | 79 | let utf8_lit_mapper argv = { default_mapper with expr; pat } 80 | 81 | let () = register "utf8_lit" utf8_lit_mapper 82 | 83 | (*--------------------------------------------------------------------------- 84 | Copyright (c) 2015 Daniel C. Bünzli. 85 | All rights reserved. 86 | 87 | Redistribution and use in source and binary forms, with or without 88 | modification, are permitted provided that the following conditions 89 | are met: 90 | 91 | 1. Redistributions of source code must retain the above copyright 92 | notice, this list of conditions and the following disclaimer. 93 | 94 | 2. Redistributions in binary form must reproduce the above 95 | copyright notice, this list of conditions and the following 96 | disclaimer in the documentation and/or other materials provided 97 | with the distribution. 98 | 99 | 3. Neither the name of Daniel C. Bünzli nor the names of 100 | contributors may be used to endorse or promote products derived 101 | from this software without specific prior written permission. 102 | 103 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 104 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 105 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 106 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 107 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 108 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 109 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 110 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 111 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 112 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 113 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 114 | ---------------------------------------------------------------------------*) 115 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ppx_utf8_lit — UTF-8 string literals and patterns for OCaml 2 | ------------------------------------------------------------ 3 | Experiment 4 | 5 | This ppx explores *one* design direction to improve the Unicode 6 | support provided by the OCaml compiler. See the 7 | [rationale](#rationale). 8 | 9 | `ppx_utf8_lit` depends on [Uutf][1] and [Uunf][2] 10 | 11 | [1]: http://erratique.ch/software/uutf 12 | [2]: http://erratique.ch/software/uunf 13 | 14 | # Installation and usage 15 | 16 | Note, `ppx_utf8_lit` is not formally released, it is an experiment. 17 | 18 | ``` 19 | # With opam 1.2 or later 20 | opam pin add ppx_utf8_lit http://erratique.ch/repos/ppx_utf8_lit.git 21 | ``` 22 | 23 | The `ppx` understands the following attributes on string literals 24 | and patterns: 25 | 26 | * `"Révolte"[@u]` checks for UTF-8 validity and puts the string in NFC. 27 | * `"Révolte"[@u.nfc]` checks for UTF-8 validity and puts the string in NFC. 28 | * `"Révolte"[@u.nfd]` checks for UTF-8 validity and puts the string in NFD. 29 | * `"Révolte"[@u.nfkd]` checks for UTF-8 validity and puts the string in NFKD. 30 | * `"Révolte"[@u.nfkc]` checks for UTF-8 validity and puts the string in NFKC. 31 | 32 | To compile a source with the `ppx`: 33 | 34 | ``` 35 | ocamfind ocamlc -package ppx_utf8_lit src.ml 36 | ``` 37 | 38 | # Rationale 39 | 40 | Rather than introducing a new Unicode string datastructure and 41 | associated new literal and pattern notations `ppx_utf8_lit` tries to 42 | improve the situation for libraries and programs that adopted the idea 43 | of interpreting current OCaml strings (which are fundamentally 44 | sequences of bytes) as UTF-8 encoded text. It does so by using the 45 | attribute mechanism introduced in OCaml 4.02. 46 | 47 | The advantage of interpreting current OCaml strings as UTF-8 encoded 48 | text are: 49 | 50 | 1. No new notation or primitive type is introduced. This means that 51 | the surface syntax of the language doesn't change and that other 52 | subsystems remain untouched (e.g. format specifiers). Besides nothing 53 | needs to change in the compiler itself except at the parsing phase. 54 | 55 | 2. It plays exceptionally well with the current OCaml system as it 56 | exists (for example with `Printf`, `Format` and IO primitives). 57 | 58 | 3. Since `latin1` identifiers in source code have been deprecated in 59 | OCaml 4.01, if a source is only using `US-ASCII` identifiers it can be 60 | UTF-8 encoded which allows to directly write UTF-8 string literals and 61 | patterns. 62 | 63 | However there are two problems with these UTF-8 literals and patterns: 64 | 65 | 1. The compiler sees them as sequences of bytes, hence they cannot be 66 | trusted as being valid UTF-8 in your program (e.g. if your editor 67 | has bugs in its UTF-8 encoder). The only way to make sure the 68 | encoding will be correct is to escape the UTF-8 encoding which is 69 | not particularly readable (e.g. `"R\xC3\xA9volte"` vs `"Révolte"`) . 70 | 71 | 2. You don't get any guarantee on the Unicode normal form (if any) in 72 | which the literals and patterns occur. They are subject to what 73 | your editor decided to choose. Which is problematic for testing 74 | equality (see 75 | [here](http://erratique.ch/software/uucp/doc/Uucp#equivalence) for 76 | a quick recall on why Unicode normalization is essential for 77 | testing equality). This means that you have to convert to a normal 78 | form manually and explicitely escape the UTF-8 which is neither 79 | convenient nor readable. (e.g. `"Révolte"` in NFD would be 80 | `"Re\xCC\x81volte"`) 81 | 82 | In order to alleviate this, we introduce 5 annotations on string 83 | literals and patterns. Any string sporting such an annotation will be 84 | checked for UTF-8 validity with compilation failing if that is not the 85 | case. Besides each of the annotation will guarantee the string is 86 | converted to one of the four Unicode normal form. 87 | 88 | * `"Révolte"[@u]` checks for UTF-8 validity and puts the string in NFC. 89 | * `"Révolte"[@u.nfc]` checks for UTF-8 validity and puts the string in NFC. 90 | * `"Révolte"[@u.nfd]` checks for UTF-8 validity and puts the string in NFD. 91 | * `"Révolte"[@u.nfkd]` checks for UTF-8 validity and puts the string in NFKD. 92 | * `"Révolte"[@u.nfkc]` checks for UTF-8 validity and puts the string in NFKC. 93 | 94 | The reason for using NFC for the `[@u]` notation is that this is the 95 | normalization recommended by the w3c for the 96 | [web](http://www.w3.org/TR/charmod-norm/#h4_choice-of-normalization-form). I 97 | have no strong opinion about that though (thought about a filename 98 | friendly normalization form but according to 99 | [this](https://github.com/whitequark/ocaml-m17n#interaction-with-filesystem) 100 | there's no cross-platform consensus – and it some sense it should be 101 | the task of the FS APIs to normalize whatever we feed them with). 102 | 103 | This means that now, if I you make sure that the strings you input are 104 | in a given normal form (using 105 | e.g. [Uutf](http://erratique.ch/software/uutf) and 106 | [Uunf](http://erratique.ch/software/uunf)) you can safely pattern 107 | match on them. For example: 108 | 109 | ```ocaml 110 | let is_fr_revolt s = match s (* assuming [s] is in NFC form *) with 111 | | "Révolte"[@u] -> true 112 | | _ -> false 113 | 114 | let () = 115 | assert (is_fr_revolt ("Révolte"[@u])); 116 | assert (not (is_fr_revolt ("Révolte"[@u.nfd])); 117 | () 118 | ``` 119 | -------------------------------------------------------------------------------- /pkg/topkg-ext.ml: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2014 Daniel C. Bünzli. All rights reserved. 3 | Distributed under the BSD3 license, see license at the end of the file. 4 | %%NAME%% release %%VERSION%% 5 | ---------------------------------------------------------------------------*) 6 | 7 | let ( >>= ) v f = match v with `Ok v -> f v | `Error _ as e -> e 8 | let ( >>& ) v f = match v with 9 | | `Ok v -> f v | `Error e -> Printf.eprintf "%s: %s\n%!" Sys.argv.(0) e; exit 1 10 | 11 | type 'a result = [ `Ok of 'a | `Error of string ] 12 | 13 | (** Working with files *) 14 | module File : sig 15 | val exists : string -> bool 16 | (** [exists file] is [true] if [file] exists. *) 17 | 18 | val read : string -> string result 19 | (** [read file] is [file]'s contents. *) 20 | 21 | val write : string -> string -> unit result 22 | (** [write file content] writes [contents] to [file]. *) 23 | 24 | val write_subst : string -> (string * string) list -> string -> unit result 25 | (** [write_subst file vars content] writes [contents] to [file] 26 | substituting variables of the form [%%ID%%] by their definition. 27 | The [ID]'s are [List.map fst vars] and their definition content 28 | is found with [List.assoc]. *) 29 | 30 | val delete : ?maybe:bool -> string -> unit result 31 | (** [delete maybe file] deletes file [file]. If [maybe] is [true] (defaults 32 | to false) no error is reported if the file doesn't exist. *) 33 | 34 | val temp : unit -> string result 35 | (** [temp ()] creates a temporary file and returns its name. The file 36 | is destroyed at the end of program execution. *) 37 | end = struct 38 | let exists = Sys.file_exists 39 | let read file = try 40 | let ic = open_in file in 41 | let len = in_channel_length ic in 42 | let s = String.create len in 43 | really_input ic s 0 len; close_in ic; `Ok s 44 | with Sys_error e -> `Error e 45 | 46 | let write f s = try 47 | let oc = open_out f in 48 | output_string oc s; close_out oc; `Ok () 49 | with Sys_error e -> `Error e 50 | 51 | let write_subst f vars s = try 52 | let oc = open_out f in 53 | let start = ref 0 in 54 | let last = ref 0 in 55 | let len = String.length s in 56 | while (!last < len - 4) do 57 | if not (s.[!last] = '%' && s.[!last + 1] = '%') then incr last else 58 | begin 59 | let start_subst = !last in 60 | let last_id = ref (!last + 2) in 61 | let stop = ref false in 62 | while (!last_id < len - 1 && not !stop) do 63 | if not (s.[!last_id] = '%' && s.[!last_id + 1] = '%') then begin 64 | if s.[!last_id] <> ' ' then (incr last_id) else 65 | (stop := true; last := !last_id) 66 | end else begin 67 | let id_start = start_subst + 2 in 68 | let id = String.sub s (id_start) (!last_id - id_start) in 69 | try 70 | let subst = List.assoc id vars in 71 | output oc s !start (start_subst - !start); 72 | output_string oc subst; 73 | stop := true; 74 | start := !last_id + 2; 75 | last := !last_id + 2; 76 | with Not_found -> 77 | stop := true; 78 | last := !last_id 79 | end 80 | done 81 | end 82 | done; 83 | output oc s !start (len - !start); close_out oc; `Ok () 84 | with Sys_error e -> `Error e 85 | 86 | let delete ?(maybe = false) file = try 87 | if maybe && not (exists file) then `Ok () else 88 | `Ok (Sys.remove file) 89 | with Sys_error e -> `Error e 90 | 91 | let temp () = try 92 | let f = Filename.temp_file (Filename.basename Sys.argv.(0)) "topkg" in 93 | at_exit (fun () -> ignore (delete f)); `Ok f 94 | with Sys_error e -> `Error e 95 | end 96 | 97 | (** Working with directories. *) 98 | module Dir : sig 99 | val exists : string -> bool 100 | (** [exists dir] is [true] if directory [dir] exists. *) 101 | 102 | val change_cwd : string -> unit result 103 | (** [change_cwd dir] changes the current working directory to [dir]. *) 104 | 105 | val fold_files_rec : ?skip:string list -> (string -> 'a -> 'a result) -> 106 | 'a -> string list -> 'a result 107 | (** [fold_files_rec skip f acc paths] folds [f] over the files 108 | found in [paths]. Files and directories whose suffix matches an 109 | element of [skip] are skipped. *) 110 | end = struct 111 | let exists dir = Sys.file_exists dir && Sys.is_directory dir 112 | let change_cwd dir = try `Ok (Sys.chdir dir) with Sys_error e -> `Error e 113 | let fold_files_rec ?(skip = []) f acc paths = 114 | let is_dir d = try Sys.is_directory d with Sys_error _ -> false in 115 | let readdir d = try Array.to_list (Sys.readdir d) with Sys_error _ -> [] in 116 | let keep p = not (List.exists (fun s -> Filename.check_suffix p s) skip) in 117 | let process acc file = match acc with 118 | | `Error _ as e -> e 119 | | `Ok acc -> f file acc 120 | in 121 | let rec aux f acc = function 122 | | (d :: ds) :: up -> 123 | let paths = List.rev_map (Filename.concat d) (readdir d) in 124 | let paths = List.find_all keep paths in 125 | let dirs, files = List.partition is_dir paths in 126 | begin match List.fold_left process acc files with 127 | | `Error _ as e -> e 128 | | `Ok _ as acc -> aux f acc (dirs :: ds :: up) 129 | end 130 | | [] :: [] -> acc 131 | | [] :: up -> aux f acc up 132 | | _ -> assert false 133 | in 134 | let paths = List.find_all keep paths in 135 | let dirs, files = List.partition is_dir paths in 136 | let acc = List.fold_left process (`Ok acc) files in 137 | aux f acc (dirs :: []) 138 | end 139 | 140 | (** Command invocation. *) 141 | module Cmd : sig 142 | val exec : string -> unit result 143 | (** [exec cmd] executes [cmd]. *) 144 | 145 | val exec_hook : string option -> unit result 146 | (** [exec_hook args] is [exec ("ocaml " ^ "args")] if [args] is some. *) 147 | 148 | val read : string -> string result 149 | (** [read cmd] executes [cmd] and returns the contents of its stdout. *) 150 | end = struct 151 | let exec cmd = 152 | let code = Sys.command cmd in 153 | if code = 0 then `Ok () else 154 | `Error (Printf.sprintf "invocation `%s' exited with %d" cmd code) 155 | 156 | let exec_hook args = match args with 157 | | None -> `Ok () 158 | | Some args -> exec (Printf.sprintf "ocaml %s" args) 159 | 160 | let read cmd = 161 | File.temp () >>= fun file -> 162 | exec (Printf.sprintf "%s > %s" cmd file) >>= fun () -> 163 | File.read file >>= fun v -> 164 | `Ok v 165 | end 166 | 167 | (** Variable substitution. *) 168 | module Vars : sig 169 | val subst : skip:string list -> vars:(string * string) list -> 170 | dir:string -> unit result 171 | (** [subst skip vars dir] substitutes [vars] in all files 172 | in [dir] except those that are [skip]ped (see {!Dir.fold_files_rec}). *) 173 | 174 | val get : string -> (string * string) list -> string result 175 | (** [get v] lookup variable [v] in [vars]. Returns an error if [v] is 176 | absent or if it is the empty string. *) 177 | 178 | end = struct 179 | let subst ~skip ~vars ~dir = 180 | let subst f () = 181 | File.read f >>= fun contents -> 182 | File.write_subst f vars contents >>= fun () -> `Ok () 183 | in 184 | Dir.fold_files_rec ~skip subst () [dir] 185 | 186 | let get v vars = 187 | let v = try List.assoc v vars with Not_found -> "" in 188 | if v <> "" then `Ok v else 189 | `Error (Printf.sprintf "empty or undefined variable %s in Config.vars" v) 190 | end 191 | 192 | (** Git invocations. *) 193 | module Git : sig 194 | val describe : ?chop_v:bool -> string -> string 195 | (** [describe chop_v branch] invokes [git describe branch]. If [chop_v] 196 | is [true] (defaults to [false]) an initial ['v'] in the result 197 | is chopped. *) 198 | end = struct 199 | let describe ?(chop_v = false) branch = 200 | if not (Dir.exists ".git") then "not-a-git-checkout" else 201 | Cmd.read (Printf.sprintf "git describe %s" branch) >>& fun d -> 202 | let len = String.length d in 203 | if chop_v && len > 0 && d.[0] = 'v' then String.sub d 1 (len - 2) else 204 | String.sub d 0 (len - 1) (* remove \n *) 205 | end 206 | 207 | (** Default configuration. *) 208 | module Config_default : sig 209 | val subst_skip : string list 210 | (** [subst_skip] is a list of suffixes that are automatically 211 | skipped during variable substitution. *) 212 | 213 | val vars : (string * string) list 214 | (** [vars] is the list of variables to substitute, empty. *) 215 | 216 | val git_hook : string option 217 | (** [git_start_hook] is an ocaml script to invoke before a git package 218 | build, after variable substitution occured. *) 219 | 220 | val distrib_remove : string list 221 | (** [distrib_remove] is a list of files to remove before making 222 | the distributino tarball. *) 223 | 224 | val distrib_hook : string option 225 | (** [distrib_hook] is an ocaml script to invoke before trying 226 | to build the distribution. *) 227 | 228 | val www_demos : string list 229 | (** [www_demos] is a list of build targets that represent single page 230 | js_of_ocaml demo. *) 231 | end = struct 232 | let subst_skip = [".git"; ".png"; ".jpeg"; ".otf"; ".ttf"; ".pdf" ] 233 | let vars = [] 234 | let git_hook = None 235 | let distrib_remove = [".git"; ".gitignore"; "build"] 236 | let distrib_hook = None 237 | let www_demos = [] 238 | end 239 | 240 | 241 | (*--------------------------------------------------------------------------- 242 | Copyright (c) 2014 Daniel C. Bünzli. 243 | All rights reserved. 244 | 245 | Redistribution and use in source and binary forms, with or without 246 | modification, are permitted provided that the following conditions 247 | are met: 248 | 249 | 1. Redistributions of source code must retain the above copyright 250 | notice, this list of conditions and the following disclaimer. 251 | 252 | 2. Redistributions in binary form must reproduce the above 253 | copyright notice, this list of conditions and the following 254 | disclaimer in the documentation and/or other materials provided 255 | with the distribution. 256 | 257 | 3. Neither the name of Daniel C. Bünzli nor the names of 258 | contributors may be used to endorse or promote products derived 259 | from this software without specific prior written permission. 260 | 261 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 262 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 263 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 264 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 265 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 266 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 267 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 268 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 269 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 270 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 271 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 272 | ---------------------------------------------------------------------------*) 273 | -------------------------------------------------------------------------------- /pkg/topkg.ml: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2014 Daniel C. Bünzli. All rights reserved. 3 | Distributed under the BSD3 license, see license at the end of the file. 4 | %%NAME%% release %%VERSION%% 5 | ---------------------------------------------------------------------------*) 6 | 7 | (* Public api *) 8 | 9 | (** Build environment access *) 10 | module type Env = sig 11 | val bool : string -> bool 12 | (** [bool key] declares [key] as being a boolean key in the environment. 13 | Specifing key=(true|false) on the command line becomes mandatory. *) 14 | 15 | val native : bool 16 | (** [native] is [bool "native"]. *) 17 | 18 | val native_dynlink : bool 19 | (** [native_dylink] is [bool "native-dynlink"] *) 20 | end 21 | 22 | (** Exts defines sets of file extensions. *) 23 | module type Exts = sig 24 | val interface : string list 25 | (** [interface] is [[".mli"; ".cmi"; ".cmti"]] *) 26 | 27 | val interface_opt : string list 28 | (** [interface_opt] is [".cmx" :: interface] *) 29 | 30 | val c_library : string list 31 | (** [c_library] is the extension for C libraries, [".a"] for unices 32 | and [".lib"] for win32 *) 33 | 34 | val c_dll_library : string list 35 | (** [c_dll_library] is the extension for C dynamic libraries [".so"] 36 | for unices and [".dll"] for win32 *) 37 | 38 | val library : string list 39 | (** [library] is [[".cma"; ".cmxa"; ".cmxs"] @ c_library] *) 40 | 41 | val module_library : string list 42 | (** [module_library] is [(interface_opt @ library)]. *) 43 | end 44 | 45 | (** Package description. *) 46 | module type Pkg = sig 47 | type builder = [ `OCamlbuild | `Other of string * string ] 48 | (** The type for build tools. Either [`OCamlbuild] or an 49 | [`Other (tool, bdir)] tool [tool] that generates its build artefacts 50 | in [bdir]. *) 51 | 52 | type moves 53 | (** The type for install moves. *) 54 | 55 | type field = ?cond:bool -> ?exts:string list -> ?dst:string -> string -> moves 56 | (** The type for field install functions. A call 57 | [field cond exts dst path] generates install moves as follows: 58 | {ul 59 | {- If [cond] is [false] (defaults to [true]), no move is generated.} 60 | {- If [exts] is present, generates a move for each path in 61 | the list [List.map (fun e -> path ^ e) exts].} 62 | {- If [dst] is present this path is used as the move destination 63 | (allows to install in subdirectories). If absent [dst] is 64 | [Filename.basename path].} *) 65 | 66 | val lib : field 67 | val bin : ?auto:bool -> field 68 | (** If [auto] is true (defaults to false) generates 69 | [path ^ ".native"] if {!Env.native} is [true] and 70 | [path ^ ".byte"] if {!Env.native} is [false]. *) 71 | val sbin : ?auto:bool -> field (** See {!bin}. *) 72 | val libexec : ?auto:bool -> field (** See {!bin}. *) 73 | val toplevel : field 74 | val share : field 75 | val share_root : field 76 | val etc : field 77 | val doc : field 78 | val misc : field 79 | val stublibs : field 80 | val man : field 81 | val describe : string -> builder:builder -> moves list -> unit 82 | (** [describe name builder moves] describes a package named [name] with 83 | builder [builder] and install moves [moves]. *) 84 | end 85 | 86 | (* Implementation *) 87 | 88 | module Topkg : sig 89 | val cmd : [`Build | `Explain | `Help ] 90 | val env : (string * bool) list 91 | val err_parse : string -> 'a 92 | val err_mdef : string -> 'a 93 | val err_miss : string -> 'a 94 | val err_file : string -> string -> 'a 95 | val warn_unused : string -> unit 96 | end = struct 97 | 98 | (* Parses the command line. The actual cmd execution occurs in the call 99 | to Pkg.describe. *) 100 | 101 | let err fmt = 102 | let k _ = exit 1 in 103 | Format.kfprintf k Format.err_formatter ("%s: " ^^ fmt ^^ "@.") Sys.argv.(0) 104 | 105 | let err_parse a = err "argument `%s' is not of the form key=(true|false)" a 106 | let err_mdef a = err "bool `%s' is defined more than once" a 107 | let err_miss a = err "argument `%s=(true|false)' is missing" a 108 | let err_file f e = err "%s: %s" f e 109 | let warn_unused k = 110 | Format.eprintf "%s: warning: environment key `%s` unused.@." Sys.argv.(0) k 111 | 112 | let cmd, env = 113 | let rec parse_env acc = function (* not t.r. *) 114 | | arg :: args -> 115 | begin try 116 | (* String.cut ... *) 117 | let len = String.length arg in 118 | let eq = String.index arg '=' in 119 | let bool = bool_of_string (String.sub arg (eq + 1) (len - eq - 1)) in 120 | let key = String.sub arg 0 eq in 121 | if key = "" then raise Exit else 122 | try ignore (List.assoc key acc); err_mdef key with 123 | | Not_found -> parse_env ((key, bool) :: acc) args 124 | with 125 | | Invalid_argument _ | Not_found | Exit -> err_parse arg 126 | end 127 | | [] -> acc 128 | in 129 | match List.tl (Array.to_list Sys.argv) with 130 | | "explain" :: args -> `Explain, parse_env [] args 131 | | ("help" | "-h" | "--help" | "-help") :: args -> `Help, parse_env [] args 132 | | args -> `Build, parse_env [] args 133 | end 134 | 135 | module Env : sig 136 | include Env 137 | val get : unit -> (string * bool) list 138 | end = struct 139 | let env = ref [] 140 | let get () = !env 141 | let add_bool key b = env := (key, b) :: !env 142 | let bool key = 143 | let b = try List.assoc key Topkg.env with 144 | | Not_found -> if Topkg.cmd = `Build then Topkg.err_miss key else true 145 | in 146 | add_bool key b; b 147 | 148 | let native = bool "native" 149 | let native_dynlink = bool "native-dynlink" 150 | end 151 | 152 | module Exts : Exts = struct 153 | let interface = [".mli"; ".cmi"; ".cmti"] 154 | let interface_opt = ".cmx" :: interface 155 | let c_library = if Sys.win32 then [".lib"] else [".a"] 156 | let c_dll_library = if Sys.win32 then [".dll"] else [".so"] 157 | let library = [".cma"; ".cmxa"; ".cmxs"] @ c_library 158 | let module_library = (interface_opt @ library) 159 | end 160 | 161 | module Pkg : Pkg = struct 162 | type builder = [ `OCamlbuild | `Other of string * string ] 163 | type moves = (string * (string * string)) list 164 | type field = ?cond:bool -> ?exts:string list -> ?dst:string -> string -> moves 165 | 166 | let str = Printf.sprintf 167 | let warn_unused () = 168 | let keys = List.map fst Topkg.env in 169 | let keys_used = List.map fst (Env.get ()) in 170 | let unused = List.find_all (fun k -> not (List.mem k keys_used)) keys in 171 | List.iter Topkg.warn_unused unused 172 | 173 | let has_suffix = Filename.check_suffix 174 | let build_strings ?(exec_sep = " ") btool bdir mvs = 175 | let no_build = [ ".cmti"; ".cmt" ] in 176 | let install = Buffer.create 1871 in 177 | let exec = Buffer.create 1871 in 178 | let rec add_mvs current = function 179 | | (field, (src, dst)) :: mvs when field = current -> 180 | if List.exists (has_suffix src) no_build then 181 | Buffer.add_string install (str "\n \"?%s/%s\" {\"%s\"}" bdir src dst) 182 | else begin 183 | Buffer.add_string exec (str "%s%s" exec_sep src); 184 | Buffer.add_string install (str "\n \"%s/%s\" {\"%s\"}" bdir src dst); 185 | end; 186 | add_mvs current mvs 187 | | (((field, _) :: _) as mvs) -> 188 | if current <> "" (* first *) then Buffer.add_string install " ]\n"; 189 | Buffer.add_string install (str "%s: [" field); 190 | add_mvs field mvs 191 | | [] -> () 192 | in 193 | Buffer.add_string exec btool; 194 | add_mvs "" mvs; 195 | Buffer.add_string install " ]\n"; 196 | Buffer.contents install, Buffer.contents exec 197 | 198 | let pr = Format.printf 199 | let pr_explanation btool bdir pkg mvs = 200 | let env = Env.get () in 201 | let install, exec = build_strings ~exec_sep:" \\\n " btool bdir mvs in 202 | pr "@["; 203 | pr "Package name: %s@," pkg; 204 | pr "Build tool: %s@," btool; 205 | pr "Build directory: %s@," bdir; 206 | pr "Environment:@, "; 207 | List.iter (fun (k,v) -> pr "%s=%b@, " k v) (List.sort compare env); 208 | pr "@,Build invocation:@,"; 209 | pr " %s@,@," exec; 210 | pr "Install file:@,"; 211 | pr "%s@," install; 212 | pr "@]"; 213 | () 214 | 215 | let pr_help () = 216 | pr "Usage example:@\n %s" Sys.argv.(0); 217 | List.iter (fun (k,v) -> pr " %s=%b" k v) (List.sort compare (Env.get ())); 218 | pr "@." 219 | 220 | let build btool bdir pkg mvs = 221 | let install, exec = build_strings btool bdir mvs in 222 | let e = Sys.command exec in 223 | if e <> 0 then exit e else 224 | let install_file = pkg ^ ".install" in 225 | try 226 | let oc = open_out install_file in 227 | output_string oc install; flush oc; close_out oc 228 | with Sys_error e -> Topkg.err_file install_file e 229 | 230 | let mvs ?(drop_exts = []) field ?(cond = true) ?(exts = []) ?dst src = 231 | if not cond then [] else 232 | let mv src dst = (field, (src, dst)) in 233 | let expand exts s d = List.map (fun e -> mv (s ^ e) (d ^ e)) exts in 234 | let dst = match dst with None -> Filename.basename src | Some dst -> dst in 235 | let files = if exts = [] then [mv src dst] else expand exts src dst in 236 | let keep (_, (src, _)) = not (List.exists (has_suffix src) drop_exts) in 237 | List.find_all keep files 238 | 239 | let lib = 240 | let drop_exts = 241 | if Env.native && not Env.native_dynlink then [ ".cmxs" ] else 242 | if not Env.native then Exts.c_library @ [".cmx"; ".cmxa"; ".cmxs" ] 243 | else [] 244 | in 245 | mvs ~drop_exts "lib" 246 | 247 | let share = mvs "share" 248 | let share_root = mvs "share_root" 249 | let etc = mvs "etc" 250 | let toplevel = mvs "toplevel" 251 | let doc = mvs "doc" 252 | let misc = mvs "misc" 253 | let stublibs = mvs "stublibs" 254 | let man = mvs "man" 255 | 256 | let bin_drops = if not Env.native then [ ".native" ] else [] 257 | let bin_mvs field ?(auto = false) ?cond ?exts ?dst src = 258 | let src, dst = 259 | if not auto then src, dst else 260 | let dst = match dst with 261 | | None -> Some (Filename.basename src) 262 | | Some _ as dst -> dst 263 | in 264 | let src = if Env.native then src ^ ".native" else src ^ ".byte" in 265 | src, dst 266 | in 267 | mvs ~drop_exts:bin_drops field ?cond ?dst src 268 | 269 | let bin = bin_mvs "bin" 270 | let sbin = bin_mvs "sbin" 271 | let libexec = bin_mvs "libexec" 272 | 273 | let describe pkg ~builder mvs = 274 | let mvs = List.sort compare (List.flatten mvs) in 275 | let btool, bdir = match builder with 276 | | `OCamlbuild -> "ocamlbuild -use-ocamlfind -classic-display", "_build" 277 | | `Other (btool, bdir) -> btool, bdir 278 | in 279 | match Topkg.cmd with 280 | | `Explain -> pr_explanation btool bdir pkg mvs 281 | | `Help -> pr_help () 282 | | `Build -> warn_unused (); build btool bdir pkg mvs 283 | end 284 | 285 | (*--------------------------------------------------------------------------- 286 | Copyright (c) 2014 Daniel C. Bünzli. 287 | All rights reserved. 288 | 289 | Redistribution and use in source and binary forms, with or without 290 | modification, are permitted provided that the following conditions 291 | are met: 292 | 293 | 1. Redistributions of source code must retain the above copyright 294 | notice, this list of conditions and the following disclaimer. 295 | 296 | 2. Redistributions in binary form must reproduce the above 297 | copyright notice, this list of conditions and the following 298 | disclaimer in the documentation and/or other materials provided 299 | with the distribution. 300 | 301 | 3. Neither the name of Daniel C. Bünzli nor the names of 302 | contributors may be used to endorse or promote products derived 303 | from this software without specific prior written permission. 304 | 305 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 306 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 307 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 308 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 309 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 310 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 311 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 312 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 313 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 314 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 315 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 316 | ---------------------------------------------------------------------------*) 317 | --------------------------------------------------------------------------------