├── .gitignore ├── .ocamlformat ├── CHANGES.md ├── CONTRIBUTING.md ├── LICENSE.md ├── Makefile ├── README.md ├── doc └── design.notes ├── dune ├── dune-project ├── expander ├── dune ├── ppx_hash_expander.ml └── ppx_hash_expander.mli ├── hash_types ├── README.org ├── src │ ├── base_internalhash_types.ml │ ├── dune │ ├── internalhash.h │ ├── internalhash_stubs.c │ ├── runtime.js │ └── runtime.wat └── test │ ├── base_internalhash_types_test.ml │ ├── dune │ ├── import.ml │ ├── test_immediate.ml │ └── test_immediate.mli ├── ppx_hash.opam ├── runtime ├── siphash │ ├── dune │ ├── siphash.c │ ├── siphash.c.txt │ ├── siphash.ml │ ├── siphash.mli │ └── siphash_lib.ml └── src │ ├── dune │ ├── hash.ml │ ├── hash.mli │ ├── hash_intf.ml │ ├── hash_stubs.c │ └── ppx_hash_lib.ml └── src ├── dune ├── ppx_hash.ml └── ppx_hash.mli /.gitignore: -------------------------------------------------------------------------------- 1 | _build 2 | *.install 3 | *.merlin 4 | _opam 5 | 6 | -------------------------------------------------------------------------------- /.ocamlformat: -------------------------------------------------------------------------------- 1 | profile=janestreet 2 | -------------------------------------------------------------------------------- /CHANGES.md: -------------------------------------------------------------------------------- 1 | ## v0.10 2 | 3 | - Fixed `[@@deriving hash]` on nested tuples 4 | 5 | - Added `@compare.ignore` record-field attribute; `ppx_compare` and `ppx_hash` 6 | skip record fields annotated with `@compare.ignore`. 7 | 8 | - Changed `[@@deriving hash]` and `[%hash]` on atomic types to use `hash` rather 9 | than `hash_fold`. E.g. `[%hash: M.t]` now expands to `M.hash`. 10 | 11 | - Renamed `@hash.no_hashing` to `@hash.ignore`. 12 | 13 | - Depend on ppxlib instead of (now deprecated) ppx\_core, ppx\_driver and 14 | ppx\_metaquot. 15 | 16 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | This repository contains open source software that is developed and 2 | maintained by [Jane Street][js]. 3 | 4 | Contributions to this project are welcome and should be submitted via 5 | GitHub pull requests. 6 | 7 | Signing contributions 8 | --------------------- 9 | 10 | We require that you sign your contributions. Your signature certifies 11 | that you wrote the patch or otherwise have the right to pass it on as 12 | an open-source patch. The rules are pretty simple: if you can certify 13 | the below (from [developercertificate.org][dco]): 14 | 15 | ``` 16 | Developer Certificate of Origin 17 | Version 1.1 18 | 19 | Copyright (C) 2004, 2006 The Linux Foundation and its contributors. 20 | 1 Letterman Drive 21 | Suite D4700 22 | San Francisco, CA, 94129 23 | 24 | Everyone is permitted to copy and distribute verbatim copies of this 25 | license document, but changing it is not allowed. 26 | 27 | 28 | Developer's Certificate of Origin 1.1 29 | 30 | By making a contribution to this project, I certify that: 31 | 32 | (a) The contribution was created in whole or in part by me and I 33 | have the right to submit it under the open source license 34 | indicated in the file; or 35 | 36 | (b) The contribution is based upon previous work that, to the best 37 | of my knowledge, is covered under an appropriate open source 38 | license and I have the right under that license to submit that 39 | work with modifications, whether created in whole or in part 40 | by me, under the same open source license (unless I am 41 | permitted to submit under a different license), as indicated 42 | in the file; or 43 | 44 | (c) The contribution was provided directly to me by some other 45 | person who certified (a), (b) or (c) and I have not modified 46 | it. 47 | 48 | (d) I understand and agree that this project and the contribution 49 | are public and that a record of the contribution (including all 50 | personal information I submit with it, including my sign-off) is 51 | maintained indefinitely and may be redistributed consistent with 52 | this project or the open source license(s) involved. 53 | ``` 54 | 55 | Then you just add a line to every git commit message: 56 | 57 | ``` 58 | Signed-off-by: Joe Smith 59 | ``` 60 | 61 | Use your real name (sorry, no pseudonyms or anonymous contributions.) 62 | 63 | If you set your `user.name` and `user.email` git configs, you can sign 64 | your commit automatically with git commit -s. 65 | 66 | [dco]: http://developercertificate.org/ 67 | [js]: https://opensource.janestreet.com/ 68 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | The MIT License 2 | 3 | Copyright (c) 2015--2025 Jane Street Group, LLC 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | INSTALL_ARGS := $(if $(PREFIX),--prefix $(PREFIX),) 2 | 3 | default: 4 | dune build 5 | 6 | install: 7 | dune install $(INSTALL_ARGS) 8 | 9 | uninstall: 10 | dune uninstall $(INSTALL_ARGS) 11 | 12 | reinstall: uninstall install 13 | 14 | clean: 15 | dune clean 16 | 17 | .PHONY: default install uninstall reinstall clean 18 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ppx_hash 2 | ======== 3 | 4 | A ppx writer that generates hash functions from type expressions and definitions. 5 | 6 | Syntax 7 | ------ 8 | 9 | Type definitions: `[@@deriving hash]` 10 | Expressions: `[%hash_fold: TYPE]` and `[%hash: TYPE]` 11 | Types, record fields: `[@hash.ignore]` 12 | 13 | Basic usage 14 | ----------- 15 | 16 | ```ocaml 17 | type t = { 18 | s : string; 19 | x : (int * bool) list; 20 | } 21 | [@@deriving hash] 22 | ``` 23 | 24 | This will generate a function `hash_fold_t : Hash.state -> t -> Hash.state`. 25 | 26 | Where `Hash` is `Ppx_hash_lib.Std.Hash`. 27 | 28 | The generated function follows the structure of the type; allowing user overrides at every 29 | level. This is in contrast to ocaml's builtin polymorphic hashing `Hashtbl.hash` which 30 | ignores user overrides. 31 | 32 | Also generated is a direct hash-function `hash : t -> 33 | Hash.hash_value`. This function will be named `hash_` when != "t". 34 | 35 | The direct hash function is the one suitable for `Hashable.Make`. 36 | 37 | Signature 38 | --------- 39 | 40 | In a module signature, `type t [@@deriving hash]` will add both `hash_fold_t` and `hash` 41 | (or `hash_`) to the signature. 42 | 43 | Hashing without a type definition 44 | --------------------------------- 45 | 46 | A folding hash function is accessed/created as `[%hash_fold: TYPE]`. 47 | A direct hash function is accessed/created as `[%hash: TYPE]`. 48 | 49 | Ignoring part of types 50 | ---------------------- 51 | 52 | Types can be annotated with `[@hash.ignore]` so that they are not 53 | incorporated into the computed hash value. 54 | 55 | ```ocaml 56 | type second_only = (string [@hash.ignore]) * int [@@deriving hash] 57 | ``` 58 | 59 | Mutable records fields must have such an annotation. 60 | 61 | ```ocaml 62 | type t = { 63 | mutable s : string [@hash.ignore]; 64 | x : (int * bool) list; 65 | } 66 | [@@deriving hash] 67 | ``` 68 | 69 | Special support for `ppx_compare` 70 | --------------------------------- 71 | 72 | The annotation `[@compare.ignore]` (and `[@ignore]`) implies 73 | `[@hash.ignore]`, in order to preserve the invariant that `compare x y 74 | = 0` implies `hash x = hash y`. 75 | 76 | Adapting code to `ppx_hash` 77 | --------------------------- 78 | 79 | So long as all types in support hashing, the following common pattern: 80 | 81 | ```ocaml 82 | module T = struct 83 | type t = [@@deriving compare, sexp] 84 | let hash = Hashtbl.hash 85 | end 86 | include T 87 | include Hashable.Make (T) 88 | ``` 89 | 90 | Can this be written as: 91 | 92 | ```ocaml 93 | module T = struct 94 | type t = [@@deriving compare, hash, sexp] 95 | end 96 | include T 97 | include Hashable.Make (T) 98 | ``` 99 | 100 | More information 101 | ---------------- 102 | 103 | ppx/ppx_hash/doc/design.notes 104 | ppx/ppx_hash/runtime-lib/make_std.ml 105 | -------------------------------------------------------------------------------- /doc/design.notes: -------------------------------------------------------------------------------- 1 | 2 | Design for [ppx_hash] syntax extension: 3 | 4 | (1) [@@deriving hash] 5 | 6 | From: type t = ... [@@deriving hash] 7 | 8 | Generate a folding-style function. 9 | [hash_fold_t] : [Hash.state -> t -> Hash.state] 10 | 11 | And generate a direct-style function. 12 | [hash] : [t -> Hash.hash_value] (named [hash_] when != "t") 13 | 14 | where [Hash] is [Ppx_hash_lib.Std.Hash]. 15 | 16 | The folding-style function [hash_fold_] function is compositional, following the 17 | structure of the type; allowing user overrides at every level. This is in contrast to 18 | ocaml's builtin polymorphic hashing [Hashtbl.hash] which ignores user overrides. 19 | 20 | The direct-style function is a wrapper around the folding-style function, and is not used 21 | in a compositional way. 22 | 23 | [hash_fold_t state x] is supposed to disturb the [state] by mixing in all the 24 | information present in [x]. It should not discard [state]. 25 | 26 | To have collision resistance, it should expand to different sequences of 27 | built-in mixing functions for different values of [x]. No such sequence is 28 | allowed to be a prefix of another. 29 | 30 | We also support the inline syntax extensions [%hash_fold: TYPE] and [%hash: TYPE] 31 | 32 | (2) [Hash.state], [Hash.hash_value], [Hash.seed] 33 | 34 | The [ppx_hash] extension is not tied to any specific hash-function, with the generated 35 | code making no assumptions over the detail of these types. 36 | 37 | These types are defined by the specific hash-function selected as [Ppx_hash_lib.Std.Hash] 38 | We have conservatively selected to use the builtin hash-function defined internally in 39 | ocaml - which we refer to as "internalhash" - (but used in a compositional way). 40 | 41 | This hash-function has: [type seed = int] [type hash_value = int] 42 | [Hash.state] is abstract, but is an immediate value, so avoiding allocation issues. 43 | 44 | (3) User interface: 45 | 46 | Normally hashing is performed with the generated direct-style hash function: 47 | hash x 48 | 49 | Alternatively, the generated folding-style function can be run with [Hash.run]: 50 | (In addition, this allows a non-default seed to be passed.) 51 | Hash.run ?seed hash_fold_t x 52 | 53 | Or we can use the syntax extensions: 54 | 55 | [%hash: T] x 56 | Hash.run ?seed [%hash_fold: T] x 57 | 58 | (4) Code generation: 59 | 60 | The generated code follows the structure of the type. 61 | 62 | For leaf types, (in these examples [a], [b] and [c]), the generated function expects the 63 | corresponding hash_fold function ([hash_fold_a], [hash_fold_b] and [hash_fold_c]) to be in 64 | scope, accompanying the types in scope. 65 | 66 | Tuples: 67 | 68 | type t1 = a * b * c [@@deriving hash] 69 | 70 | let hash_fold_t1 : Hash.t -> t1 -> Hash.t = 71 | fun hsv -> 72 | fun arg -> 73 | let (e0,e1,e2) = arg in 74 | hash_fold_c (hash_fold_b (hash_fold_a hsv e0) e1) e2 75 | 76 | Records: 77 | 78 | type t2 = {a: a; b: b; c: c;} [@@deriving hash] 79 | 80 | let hash_fold_t2 : Hash.t -> t2 -> Hash.t = 81 | fun hsv -> 82 | fun arg -> 83 | hash_fold_c (hash_fold_b (hash_fold_a hsv arg.a) arg.b) arg.c 84 | 85 | 86 | For variants, we also take account of (the position of) the variant tag: 87 | 88 | type t3 = Foo | Bar of a | Qaz of b * c [@@deriving hash] 89 | 90 | let hash_fold_t3 : Hash.t -> t3 -> Hash.t = 91 | fun hsv -> 92 | fun arg -> 93 | match arg with 94 | | Foo -> Hash.fold_int hsv 0 95 | | Bar a0 -> hash_fold_a (Hash.fold_int hsv 1) a0 96 | | Qaz (a0,a1) -> hash_fold_c (hash_fold_b (Hash.fold_int hsv 2) a0) a1 97 | 98 | 99 | For polymorphic-variants, we use the ocaml hash value of the polymorphic-variant tag, 100 | returned by [Btype.hash_variant]: 101 | 102 | type t4 = [ `Foo of a | `Bar ] [@@deriving hash] 103 | 104 | let hash_fold_t4 : Hash.t -> t4 -> Hash.t = 105 | fun hsv -> 106 | fun arg -> 107 | match arg with 108 | | `Foo _v -> hash_fold_a (Hash.fold_int hsv 3505894) _v 109 | | `Bar -> Hash.fold_int hsv 3303859 110 | 111 | 112 | For parametrised types we generate a hash function parametrised over the hash function 113 | for the element type (nothing new here). 114 | 115 | type 'a t5 = ('a * 'a) list [@@deriving hash] 116 | 117 | let hash_fold_t5 : 'a . (Hash.t -> 'a -> Hash.t) -> Hash.t -> 'a t5 -> Hash.t = 118 | fun _hash_fold_a -> 119 | fun hsv -> 120 | fun arg -> 121 | hash_fold_list 122 | (fun hsv -> 123 | fun arg -> 124 | let (e0,e1) = arg in _hash_fold_a (_hash_fold_a hsv e0) e1) 125 | hsv arg 126 | 127 | 128 | (5) Special support for record fields: 129 | 130 | Record fields can be annotated with [@hash.ignore] so that they are not 131 | incorporated into the computed hash value. In the case of mutable fields, there 132 | must be such an annotation. 133 | 134 | type t = { 135 | mutable s : string; [@hash.ignore] 136 | i : int; 137 | } [@@deriving hash] 138 | 139 | let hash_fold_t : Hash.t -> t -> Hash.t = 140 | fun hsv -> fun arg -> hash_fold_int hsv arg.i 141 | 142 | 143 | (6) Support for builtins: 144 | 145 | We do nothing special for built-in types such as [int] or [float], or build-in type 146 | constructors such as [list] and [option]. We just expect the corresponding [hash_fold_] 147 | functions to be in scope. 148 | 149 | This is the same approach as taken by sexp-conv, but different from ppx_compare, which 150 | does treat built-in types & constructors specially, leading to buggy behaviour when those 151 | names are redefined. 152 | 153 | A runtime library defines the hash functions for the built-in types & constructors, and is 154 | put in scope by [open Core] as is done for built-in sexp converters. 155 | 156 | type 'a folder = Hash.t -> 'a -> Hash.t 157 | 158 | val hash_fold_nativeint : nativeint folder 159 | val hash_fold_int64 : int64 folder 160 | val hash_fold_int32 : int32 folder 161 | val hash_fold_char : char folder 162 | val hash_fold_int : int folder 163 | val hash_fold_bool : bool folder 164 | val hash_fold_string : string folder 165 | val hash_fold_float : float folder 166 | val hash_fold_unit : unit folder 167 | 168 | val hash_fold_option : 'a folder -> 'a option folder 169 | val hash_fold_list : 'a folder -> 'a list folder 170 | val hash_fold_lazy_t : 'a folder -> 'a lazy_t folder 171 | 172 | 173 | (7) Array/Ref: 174 | 175 | Hash support for [array] and [ref] is not provided directly, because of the danger 176 | when hashing mutable values: the computed hash changes when the value mutates. 177 | 178 | Instead we provide [.._frozen] type aliases, with the corresponding [hash_fold_] function. 179 | 180 | type 'a ref_frozen = 'a ref 181 | type 'a array_frozen = 'a array 182 | 183 | val hash_fold_ref_frozen : 'a folder -> 'a ref folder 184 | val hash_fold_array_frozen : 'a folder -> 'a array folder 185 | 186 | These are not safe if the ref/array value hashed is mutated. 187 | 188 | 189 | (8) [lazy_t]: 190 | 191 | We avoid the bug in ocaml's internal function on [lazy_t] values, by defining: 192 | 193 | let hash_fold_lazy_t hash_fold_elem s x = 194 | hash_fold_elem s (Lazy.force x) 195 | 196 | (9) GADTs: 197 | 198 | GADTs are not explicitly supported. Some examples will be fine, but examples with 199 | existential types wont work and will generate ill-typed code. We make no attempt to 200 | distinguish the cases. 201 | -------------------------------------------------------------------------------- /dune: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/janestreet/ppx_hash/11117ed12915db9cabc97a07460567c298cb05bc/dune -------------------------------------------------------------------------------- /dune-project: -------------------------------------------------------------------------------- 1 | (lang dune 3.17) 2 | -------------------------------------------------------------------------------- /expander/dune: -------------------------------------------------------------------------------- 1 | (library 2 | (name ppx_hash_expander) 3 | (public_name ppx_hash.expander) 4 | (ppx_runtime_libraries ppx_hash.runtime-lib) 5 | (libraries ppx_compare.expander ppxlib.stdppx ppxlib ppxlib_jane 6 | ocaml-compiler-libs.common compiler-libs.common) 7 | (preprocess 8 | (pps ppxlib.metaquot))) 9 | -------------------------------------------------------------------------------- /expander/ppx_hash_expander.ml: -------------------------------------------------------------------------------- 1 | open Stdppx 2 | open Ppxlib 3 | open Ast_builder.Default 4 | 5 | module Attrs = struct 6 | let ignore_label_declaration = 7 | Attribute.declare 8 | "hash.ignore" 9 | Attribute.Context.label_declaration 10 | Ast_pattern.(pstr nil) 11 | () 12 | ;; 13 | 14 | let ignore_core_type = 15 | Attribute.declare "hash.ignore" Attribute.Context.core_type Ast_pattern.(pstr nil) () 16 | ;; 17 | 18 | let no_hashing_label_declaration = 19 | Attribute.declare 20 | "hash.no_hashing" 21 | Attribute.Context.label_declaration 22 | Ast_pattern.(pstr nil) 23 | () 24 | ;; 25 | end 26 | 27 | let str_attributes = 28 | [ Attribute.T Attrs.ignore_core_type 29 | ; Attribute.T Attrs.ignore_label_declaration 30 | ; Attribute.T Attrs.no_hashing_label_declaration 31 | ] 32 | ;; 33 | 34 | let is_ignored_gen attrs t = 35 | List.exists attrs ~f:(fun attr -> Option.is_some (Attribute.get attr t)) 36 | ;; 37 | 38 | let core_type_is_ignored ct = 39 | is_ignored_gen 40 | [ Attrs.ignore_core_type; Ppx_compare_expander.Compare.Attrs.ignore_core_type ] 41 | ct 42 | ;; 43 | 44 | let should_ignore_label_declaration ld = 45 | let warning = "[@hash.no_hashing] is deprecated. Use [@hash.ignore]." in 46 | let is_ignored = 47 | is_ignored_gen 48 | [ Attrs.ignore_label_declaration 49 | ; Ppx_compare_expander.Compare.Attrs.ignore_label_declaration 50 | ] 51 | ld 52 | (* Avoid confusing errors with [ { mutable field : (value[@ignore]) } ] 53 | vs [ { mutable field : value [@ignore] } ] by treating them the same. *) 54 | || core_type_is_ignored ld.pld_type 55 | in 56 | match Attribute.get Attrs.no_hashing_label_declaration ld with 57 | | None -> (if is_ignored then `ignore else `incorporate), None 58 | | Some () -> `ignore, Some (attribute_of_warning ld.pld_loc warning) 59 | ;; 60 | 61 | (* Generate code to compute hash values of type [t] in folding style, following the structure of 62 | the type. Incorporate all structure when computing hash values, to maximise hash 63 | quality. Don't attempt to detect/avoid cycles - just loop. *) 64 | 65 | let hash_state_t ~loc = [%type: Ppx_hash_lib.Std.Hash.state] 66 | 67 | let hash_fold_type ~loc ty = 68 | let loc = { loc with loc_ghost = true } in 69 | [%type: [%t hash_state_t ~loc] -> [%t ty] -> [%t hash_state_t ~loc]] 70 | ;; 71 | 72 | let hash_type ~loc ty = 73 | let loc = { loc with loc_ghost = true } in 74 | [%type: [%t ty] -> Ppx_hash_lib.Std.Hash.hash_value] 75 | ;; 76 | 77 | (* [expr] is an expression that doesn't use the [hsv] variable. 78 | Currently it's there only for documentation value, but conceptually it can be thought 79 | of as an abstract type *) 80 | type expr = expression 81 | 82 | (* Represents an expression that produces a hash value and uses the variable [hsv] in 83 | a linear way (mixes it in exactly once). 84 | You can think of it as a body of a function of type [Hash.state -> Hash.state] *) 85 | module Hsv_expr : sig 86 | type t 87 | 88 | val identity : loc:location -> t 89 | val invoke_hash_fold_t : loc:location -> hash_fold_t:expr -> t:expr -> t 90 | val compose : loc:location -> t -> t -> t 91 | val compile_error : loc:location -> string -> t 92 | 93 | (** the [_unchecked] functions all break abstraction in some way *) 94 | val of_expression_unchecked : expr -> t 95 | 96 | (** the returned [expression] uses the binding [hsv] bound by [pattern] *) 97 | val to_expression : loc:location -> t -> pattern * expression 98 | 99 | (* [case] is binding a variable that's not [hsv] and uses [hsv] on the rhs 100 | exactly once *) 101 | type case 102 | 103 | val compile_error_case : loc:location -> string -> case 104 | val pexp_match : loc:location -> expr -> case list -> t 105 | 106 | (* [lhs] should not bind [hsv] *) 107 | val case : lhs:pattern -> guard:expr option -> rhs:t -> case 108 | 109 | (* [value_binding]s should not bind or use [hsv] *) 110 | val pexp_let : loc:location -> rec_flag -> value_binding list -> t -> t 111 | val with_attributes : f:(attribute list -> attribute list) -> t -> t 112 | end = struct 113 | type t = expression 114 | type nonrec case = case 115 | 116 | let invoke_hash_fold_t ~loc ~hash_fold_t ~t = eapply ~loc hash_fold_t [ [%expr hsv]; t ] 117 | let identity ~loc = [%expr hsv] 118 | 119 | let compose ~loc a b = 120 | [%expr 121 | let hsv = [%e a] in 122 | [%e b]] 123 | ;; 124 | 125 | let to_expression ~loc x = [%pat? hsv], x 126 | let of_expression_unchecked x = x 127 | let pexp_match = pexp_match 128 | let case = case 129 | let pexp_let = pexp_let 130 | let with_attributes ~f x = { x with pexp_attributes = f x.pexp_attributes } 131 | 132 | let compile_error ~loc s = 133 | pexp_extension ~loc (Location.Error.to_extension (Location.Error.createf ~loc "%s" s)) 134 | ;; 135 | 136 | let compile_error_case ~loc s = 137 | case ~lhs:(ppat_any ~loc) ~guard:None ~rhs:(compile_error ~loc s) 138 | ;; 139 | end 140 | 141 | let hash_fold_int_expr ~loc i_expr : Hsv_expr.t = 142 | Hsv_expr.invoke_hash_fold_t 143 | ~loc 144 | ~hash_fold_t:[%expr Ppx_hash_lib.Std.Hash.fold_int] 145 | ~t:i_expr 146 | ;; 147 | 148 | let hash_fold_int ~loc i : Hsv_expr.t = hash_fold_int_expr ~loc (eint ~loc i) 149 | 150 | let special_case_types_named_t = function 151 | | `hash_fold -> false 152 | | `hash -> true 153 | ;; 154 | 155 | let hash_fold_ tn = 156 | match tn with 157 | | "t" when special_case_types_named_t `hash_fold -> "hash_fold" 158 | | _ -> "hash_fold_" ^ tn 159 | ;; 160 | 161 | let hash_ tn = 162 | match tn with 163 | | "t" when special_case_types_named_t `hash -> "hash" 164 | | _ -> "hash_" ^ tn 165 | ;; 166 | 167 | (** renames [x] avoiding collision with [type_name] *) 168 | let rigid_type_var ~type_name x = 169 | let prefix = "rigid_" in 170 | if String.equal x type_name || String.is_prefix x ~prefix 171 | then prefix ^ x ^ "_of_type_" ^ type_name 172 | else x 173 | ;; 174 | 175 | let make_type_rigid ~type_name = 176 | let map = 177 | object 178 | inherit Ast_traverse.map as super 179 | 180 | method! core_type ty = 181 | let ptyp_desc = 182 | let () = 183 | (* making sure [type_name] is the only free type variable *) 184 | match ty.ptyp_desc with 185 | | Ptyp_constr (name, _args) -> 186 | (match name.txt with 187 | | Ldot _ | Lapply _ -> () 188 | | Lident name -> 189 | if not (String.equal name type_name) 190 | then 191 | Location.raise_errorf 192 | ~loc:ty.ptyp_loc 193 | "ppx_hash: make_type_rigid: unexpected type %S. expected to only \ 194 | find %S" 195 | (string_of_core_type ty) 196 | type_name; 197 | ()) 198 | | _ -> () 199 | in 200 | match Ppxlib_jane.Shim.Core_type_desc.of_parsetree ty.ptyp_desc with 201 | | Ptyp_var (s, _) -> 202 | Ptyp_constr (Located.lident ~loc:ty.ptyp_loc (rigid_type_var ~type_name s), []) 203 | | _ -> super#core_type_desc ty.ptyp_desc 204 | in 205 | { ty with ptyp_desc } 206 | end 207 | in 208 | map#core_type 209 | ;; 210 | 211 | (* The only names we assume to be in scope are [hash_fold_] 212 | So we are sure [tp_name] (which start with an [_]) will not capture them. *) 213 | let tp_name n = Printf.sprintf "_hash_fold_%s" n 214 | 215 | let with_tuple loc (value : expr) xs (f : (expr * core_type) list -> Hsv_expr.t) 216 | : Hsv_expr.t 217 | = 218 | let names = List.mapi ~f:(fun i lt -> Printf.sprintf "e%d" i, lt) xs in 219 | let pattern = 220 | let l = List.map ~f:(fun (n, (lbl, _)) -> lbl, pvar ~loc n) names in 221 | Ppxlib_jane.Ast_builder.Default.ppat_tuple ~loc l Closed 222 | in 223 | let e = f (List.map ~f:(fun (n, (_lbl, t)) -> evar ~loc n, t) names) in 224 | let binding = value_binding ~loc ~pat:pattern ~expr:value in 225 | Hsv_expr.pexp_let ~loc Nonrecursive [ binding ] e 226 | ;; 227 | 228 | let hash_ignore ~loc value = 229 | Hsv_expr.pexp_let 230 | ~loc 231 | Nonrecursive 232 | [ value_binding ~loc ~pat:[%pat? _] ~expr:value ] 233 | (Hsv_expr.identity ~loc) 234 | ;; 235 | 236 | let ghostify_located (t : 'a loc) : 'a loc = 237 | { t with loc = { t.loc with loc_ghost = true } } 238 | ;; 239 | 240 | let rec hash_applied ty value = 241 | let loc = { ty.ptyp_loc with loc_ghost = true } in 242 | match ty.ptyp_desc with 243 | | Ptyp_constr (name, ta) -> 244 | let args = List.map ta ~f:(hash_fold_of_ty_fun ~type_constraint:false) in 245 | Hsv_expr.invoke_hash_fold_t 246 | ~loc 247 | ~hash_fold_t:(type_constr_conv ~loc name ~f:hash_fold_ args) 248 | ~t:value 249 | | _ -> assert false 250 | 251 | and hash_fold_of_tuple ~loc tys value = 252 | with_tuple loc value tys (fun elems1 -> 253 | List.fold_right 254 | elems1 255 | ~init:(Hsv_expr.identity ~loc) 256 | ~f:(fun (v, t) (result : Hsv_expr.t) -> 257 | Hsv_expr.compose ~loc (hash_fold_of_ty t v) result)) 258 | 259 | and hash_variant ~loc row_fields value = 260 | let map row = 261 | match row.prf_desc with 262 | | Rtag ({ txt = cnstr; _ }, true, _) | Rtag ({ txt = cnstr; _ }, _, []) -> 263 | Hsv_expr.case 264 | ~guard:None 265 | ~lhs:(ppat_variant ~loc cnstr None) 266 | ~rhs:(hash_fold_int ~loc (Ocaml_common.Btype.hash_variant cnstr)) 267 | | Rtag ({ txt = cnstr; _ }, false, tp :: _) -> 268 | let v = "_v" in 269 | let body = 270 | Hsv_expr.compose 271 | ~loc 272 | (hash_fold_int ~loc (Ocaml_common.Btype.hash_variant cnstr)) 273 | (hash_fold_of_ty tp (evar ~loc v)) 274 | in 275 | Hsv_expr.case 276 | ~guard:None 277 | ~lhs:(ppat_variant ~loc cnstr (Some (pvar ~loc v))) 278 | ~rhs:body 279 | | Rinherit ({ ptyp_desc = Ptyp_constr (id, _); _ } as ty) -> 280 | (* Generated code from.. 281 | type 'a id = 'a [@@deriving hash] 282 | type t = [ `a | [ `b ] id ] [@@deriving hash] 283 | doesn't compile: Also see the "sadly" note in: ppx_compare_expander.ml *) 284 | let v = "_v" in 285 | Hsv_expr.case 286 | ~guard:None 287 | ~lhs:(ppat_alias ~loc (ppat_type ~loc (ghostify_located id)) (Located.mk ~loc v)) 288 | ~rhs:(hash_applied ty (evar ~loc v)) 289 | | Rinherit ty -> 290 | let s = string_of_core_type ty in 291 | Hsv_expr.compile_error_case 292 | ~loc 293 | (Printf.sprintf "ppx_hash: impossible variant case: %s" s) 294 | in 295 | Hsv_expr.pexp_match ~loc value (List.map ~f:map row_fields) 296 | 297 | and branch_of_sum hsv ~loc cd = 298 | match cd.pcd_args with 299 | | Pcstr_tuple [] -> 300 | let pcnstr = pconstruct cd None in 301 | Hsv_expr.case ~guard:None ~lhs:pcnstr ~rhs:hsv 302 | | Pcstr_tuple args -> 303 | let ids_ty = 304 | List.mapi args ~f:(fun i arg -> 305 | let ty = Ppxlib_jane.Shim.Pcstr_tuple_arg.to_core_type arg in 306 | Printf.sprintf "_a%d" i, ty) 307 | in 308 | let lpatt = List.map ids_ty ~f:(fun (l, _ty) -> pvar ~loc l) |> ppat_tuple ~loc 309 | and body = 310 | List.fold_left ids_ty ~init:(Hsv_expr.identity ~loc) ~f:(fun expr (l, ty) -> 311 | Hsv_expr.compose ~loc expr (hash_fold_of_ty ty (evar ~loc l))) 312 | in 313 | Hsv_expr.case 314 | ~guard:None 315 | ~lhs:(pconstruct cd (Some lpatt)) 316 | ~rhs:(Hsv_expr.compose ~loc hsv body) 317 | | Pcstr_record lds -> 318 | let arg = "_ir" in 319 | let pat = pvar ~loc arg in 320 | let v = evar ~loc arg in 321 | let body = hash_fold_of_record ~loc lds v in 322 | Hsv_expr.case 323 | ~guard:None 324 | ~lhs:(pconstruct cd (Some pat)) 325 | ~rhs:(Hsv_expr.compose ~loc hsv body) 326 | 327 | and branches_of_sum = function 328 | | [ cd ] -> 329 | (* this is an optimization: we don't need to mix in the constructor tag if the type 330 | only has one constructor *) 331 | let loc = cd.pcd_loc in 332 | [ branch_of_sum (Hsv_expr.identity ~loc) ~loc cd ] 333 | | cds -> 334 | List.mapi cds ~f:(fun i cd -> 335 | let loc = cd.pcd_loc in 336 | let hsv = hash_fold_int ~loc i in 337 | branch_of_sum hsv ~loc cd) 338 | 339 | and hash_sum_special_case_for_enums ~loc cds value = 340 | let cd_has_payload (cd : constructor_declaration) = 341 | match cd.pcd_args with 342 | | Pcstr_tuple [] -> false 343 | | _ -> true 344 | in 345 | match cds with 346 | | [ _ ] -> 347 | (* Don't need to write any tag at all for the single-constructor case. 348 | The general [branches_of_sum] function can take care of that *) 349 | None 350 | | _ -> 351 | (match List.exists cds ~f:(fun cd -> cd_has_payload cd) with 352 | | true -> None 353 | | false -> 354 | Some 355 | (let tag_expr = 356 | pexp_match 357 | ~loc 358 | value 359 | (List.mapi cds ~f:(fun i cd -> 360 | let loc = cd.pcd_loc in 361 | let tag = eint ~loc i in 362 | let pcnstr = pconstruct cd None in 363 | case ~guard:None ~lhs:pcnstr ~rhs:tag)) 364 | in 365 | hash_fold_int_expr ~loc tag_expr)) 366 | 367 | and hash_sum ~loc cds value = 368 | match hash_sum_special_case_for_enums ~loc cds value with 369 | | Some v -> v 370 | | None -> Hsv_expr.pexp_match ~loc value (branches_of_sum cds) 371 | 372 | and hash_fold_of_ty ty value = 373 | let loc = { ty.ptyp_loc with loc_ghost = true } in 374 | if core_type_is_ignored ty 375 | then hash_ignore ~loc value 376 | else ( 377 | match Ppxlib_jane.Shim.Core_type_desc.of_parsetree ty.ptyp_desc with 378 | | Ptyp_constr _ -> hash_applied ty value 379 | | Ptyp_tuple tys -> hash_fold_of_tuple ~loc tys value 380 | | Ptyp_var (name, _) -> 381 | Hsv_expr.invoke_hash_fold_t ~loc ~hash_fold_t:(evar ~loc (tp_name name)) ~t:value 382 | | Ptyp_arrow _ -> Hsv_expr.compile_error ~loc "ppx_hash: functions can not be hashed." 383 | | Ptyp_variant (row_fields, Closed, _) -> hash_variant ~loc row_fields value 384 | | _ -> 385 | let s = string_of_core_type ty in 386 | Hsv_expr.compile_error ~loc (Printf.sprintf "ppx_hash: unsupported type: %s" s)) 387 | 388 | and hash_fold_of_ty_fun ~type_constraint ty = 389 | let loc = { ty.ptyp_loc with loc_ghost = true } in 390 | let arg = "arg" in 391 | let maybe_constrained_arg = 392 | if type_constraint then ppat_constraint ~loc (pvar ~loc arg) ty else pvar ~loc arg 393 | in 394 | let hsv_pat, hsv_expr = 395 | Hsv_expr.to_expression ~loc (hash_fold_of_ty ty (evar ~loc arg)) 396 | in 397 | eta_reduce_if_possible 398 | [%expr fun [%p hsv_pat] [%p maybe_constrained_arg] -> [%e hsv_expr]] 399 | 400 | and hash_fold_of_record ~loc lds value = 401 | let is_evar = function 402 | | { pexp_desc = Pexp_ident _; _ } -> true 403 | | _ -> false 404 | in 405 | assert (is_evar value); 406 | List.fold_left lds ~init:(Hsv_expr.identity ~loc) ~f:(fun hsv ld -> 407 | Hsv_expr.compose 408 | ~loc 409 | hsv 410 | (let loc = ld.pld_loc in 411 | let label = Located.map lident ld.pld_name in 412 | let should_ignore, should_warn = should_ignore_label_declaration ld in 413 | let field_handling = 414 | match ld.pld_mutable, should_ignore with 415 | | Mutable, `incorporate -> 416 | `error "require [@hash.ignore] or [@compare.ignore] on mutable record field" 417 | | (Mutable | Immutable), `ignore -> `ignore 418 | | Immutable, `incorporate -> `incorporate 419 | in 420 | let hsv = 421 | match field_handling with 422 | | `error s -> Hsv_expr.compile_error ~loc (Printf.sprintf "ppx_hash: %s" s) 423 | | `incorporate -> hash_fold_of_ty ld.pld_type (pexp_field ~loc value label) 424 | | `ignore -> Hsv_expr.identity ~loc 425 | in 426 | match should_warn with 427 | | None -> hsv 428 | | Some attribute -> 429 | Hsv_expr.with_attributes ~f:(fun attributes -> attribute :: attributes) hsv)) 430 | ;; 431 | 432 | let hash_fold_of_abstract ~loc type_name value = 433 | let str = 434 | Printf.sprintf 435 | "hash called on the type %s, which is abstract in an implementation." 436 | type_name 437 | in 438 | Hsv_expr.of_expression_unchecked 439 | [%expr 440 | let _ = hsv in 441 | let _ = [%e value] in 442 | failwith [%e estring ~loc str]] 443 | ;; 444 | 445 | (** this does not change behavior (keeps the expression side-effect if any), but it can 446 | make the compiler happy when the expression occurs on the rhs of an [let rec] binding. *) 447 | let eta_expand ~loc f = 448 | [%expr 449 | let func = [%e f] in 450 | fun x -> func x] 451 | ;; 452 | 453 | let recognize_simple_type ty = 454 | match ty.ptyp_desc with 455 | | Ptyp_constr (lident, []) -> Some lident 456 | | _ -> None 457 | ;; 458 | 459 | let hash_of_ty_fun ~special_case_simple_types ~type_constraint ty = 460 | let loc = { ty.ptyp_loc with loc_ghost = true } in 461 | let arg = "arg" in 462 | let maybe_constrained_arg = 463 | if type_constraint then ppat_constraint ~loc (pvar ~loc arg) ty else pvar ~loc arg 464 | in 465 | match recognize_simple_type ty with 466 | | Some lident when special_case_simple_types -> 467 | unapplied_type_constr_conv ~loc lident ~f:hash_ 468 | | _ -> 469 | let hsv_pat, hsv_expr = 470 | Hsv_expr.to_expression ~loc (hash_fold_of_ty ty (evar ~loc arg)) 471 | in 472 | [%expr 473 | fun [%p maybe_constrained_arg] -> 474 | Ppx_hash_lib.Std.Hash.get_hash_value 475 | (let [%p hsv_pat] = Ppx_hash_lib.Std.Hash.create () in 476 | [%e hsv_expr])] 477 | ;; 478 | 479 | let hash_structure_item_of_td td ~portable = 480 | let loc = td.ptype_loc in 481 | match td.ptype_params with 482 | | _ :: _ -> [] 483 | | [] -> 484 | [ (let bnd = pvar ~loc (hash_ td.ptype_name.txt) in 485 | let typ = combinator_type_of_type_declaration td ~f:hash_type in 486 | let pat = ppat_constraint ~loc bnd typ in 487 | let expected_scope, expr = 488 | let is_simple_type ty = 489 | match recognize_simple_type ty with 490 | | Some _ -> true 491 | | None -> false 492 | in 493 | match td.ptype_kind, td.ptype_manifest with 494 | | Ptype_abstract, Some ty when is_simple_type ty -> 495 | ( `uses_rhs 496 | , hash_of_ty_fun ~special_case_simple_types:true ~type_constraint:false ty ) 497 | | _ -> 498 | ( `uses_hash_fold_t_being_defined 499 | , hash_of_ty_fun 500 | ~special_case_simple_types:false 501 | ~type_constraint:false 502 | { ptyp_loc = loc 503 | ; ptyp_loc_stack = [] 504 | ; ptyp_attributes = [] 505 | ; ptyp_desc = Ptyp_constr ({ loc; txt = Lident td.ptype_name.txt }, []) 506 | } ) 507 | in 508 | ( expected_scope 509 | , Ppxlib_jane.Ast_builder.Default.value_binding 510 | ~loc 511 | ~pat 512 | ~expr:(eta_expand ~loc expr) 513 | ~modes:(if portable then [ { loc; txt = Mode "portable" } ] else []) )) 514 | ] 515 | ;; 516 | 517 | let hash_fold_structure_item_of_td td ~rec_flag ~portable = 518 | let loc = { td.ptype_loc with loc_ghost = true } in 519 | let arg = "arg" in 520 | let body = 521 | let v = evar ~loc arg in 522 | match Ppxlib_jane.Shim.Type_kind.of_parsetree td.ptype_kind with 523 | | Ptype_variant cds -> hash_sum ~loc cds v 524 | | Ptype_record lds -> hash_fold_of_record ~loc lds v 525 | | Ptype_record_unboxed_product _ -> 526 | Hsv_expr.compile_error ~loc "ppx_hash: unboxed record types are not supported" 527 | | Ptype_open -> Hsv_expr.compile_error ~loc "ppx_hash: open types are not supported" 528 | | Ptype_abstract -> 529 | (match td.ptype_manifest with 530 | | None -> hash_fold_of_abstract ~loc td.ptype_name.txt v 531 | | Some ty -> 532 | (match ty.ptyp_desc with 533 | | Ptyp_variant (_, Open, _) | Ptyp_variant (_, Closed, Some (_ :: _)) -> 534 | Hsv_expr.compile_error 535 | ~loc:ty.ptyp_loc 536 | "ppx_hash: cannot hash open polymorphic variant types" 537 | | Ptyp_variant (row_fields, _, _) -> hash_variant ~loc row_fields v 538 | | _ -> hash_fold_of_ty ty v)) 539 | in 540 | let vars = List.map td.ptype_params ~f:(fun p -> get_type_param_name p) in 541 | let extra_names = List.map vars ~f:(fun x -> tp_name x.txt) in 542 | let hsv_pat, hsv_expr = Hsv_expr.to_expression ~loc body in 543 | let patts = List.map extra_names ~f:(pvar ~loc) @ [ hsv_pat; pvar ~loc arg ] in 544 | let bnd = pvar ~loc (hash_fold_ td.ptype_name.txt) in 545 | let scheme = combinator_type_of_type_declaration td ~f:hash_fold_type in 546 | let pat = ppat_constraint ~loc bnd (ptyp_poly ~loc vars scheme) in 547 | let expr = 548 | eta_reduce_if_possible_and_nonrec ~rec_flag (eabstract ~loc patts hsv_expr) 549 | in 550 | let use_rigid_variables = 551 | match td.ptype_kind with 552 | | Ptype_variant _ -> true 553 | | _ -> false 554 | in 555 | let expr = 556 | if use_rigid_variables 557 | then ( 558 | let type_name = td.ptype_name.txt in 559 | List.fold_right 560 | vars 561 | ~f:(fun s -> 562 | pexp_newtype ~loc { txt = rigid_type_var ~type_name s.txt; loc = s.loc }) 563 | ~init:(pexp_constraint ~loc expr (make_type_rigid ~type_name scheme))) 564 | else expr 565 | in 566 | Ppxlib_jane.Ast_builder.Default.value_binding 567 | ~loc 568 | ~pat 569 | ~expr 570 | ~modes:(if portable then [ { txt = Mode "portable"; loc } ] else []) 571 | ;; 572 | 573 | let pstr_value ~loc rec_flag bindings = 574 | match bindings with 575 | | [] -> [] 576 | | nonempty_bindings -> 577 | (* [pstr_value] with zero bindings is invalid *) 578 | [ pstr_value ~loc rec_flag nonempty_bindings ] 579 | ;; 580 | 581 | let str_type_decl ~loc ~path:_ (rec_flag, tds) ~portable = 582 | let tds = List.map tds ~f:name_type_params_in_td in 583 | let rec_flag = 584 | (object 585 | inherit type_is_recursive rec_flag tds as super 586 | 587 | method! label_declaration ld = 588 | match fst (should_ignore_label_declaration ld) with 589 | | `ignore -> () 590 | | `incorporate -> super#label_declaration ld 591 | 592 | method! core_type ty = if core_type_is_ignored ty then () else super#core_type ty 593 | end) 594 | #go 595 | () 596 | in 597 | let hash_fold_bindings = 598 | List.map ~f:(hash_fold_structure_item_of_td ~rec_flag ~portable) tds 599 | in 600 | let hash_bindings = 601 | List.concat (List.map ~f:(hash_structure_item_of_td ~portable) tds) 602 | in 603 | match rec_flag with 604 | | Recursive -> 605 | (* if we wanted to maximize the scope hygiene here this would be, in this order: 606 | - recursive group of [hash_fold] 607 | - nonrecursive group of [hash] that are [`uses_hash_fold_t_being_defined] 608 | - recursive group of [hash] that are [`uses_rhs] 609 | but fighting the "unused rec flag" warning is just way too hard *) 610 | pstr_value ~loc Recursive (hash_fold_bindings @ List.map ~f:snd hash_bindings) 611 | | Nonrecursive -> 612 | let rely_on_hash_fold_t, use_rhs = 613 | List.partition_map 614 | (function 615 | | `uses_hash_fold_t_being_defined, binding -> Left binding 616 | | `uses_rhs, binding -> Right binding) 617 | hash_bindings 618 | in 619 | pstr_value ~loc Nonrecursive (hash_fold_bindings @ use_rhs) 620 | @ pstr_value ~loc Nonrecursive rely_on_hash_fold_t 621 | ;; 622 | 623 | let mk_sig ~loc:_ ~path:_ (_rec_flag, tds) ~portable = 624 | List.concat 625 | (List.map tds ~f:(fun td -> 626 | let monomorphic = List.is_empty td.ptype_params in 627 | let definition ~f_type ~f_name = 628 | let type_ = combinator_type_of_type_declaration td ~f:f_type in 629 | let name = 630 | let tn = td.ptype_name.txt in 631 | f_name tn 632 | in 633 | let loc = td.ptype_loc in 634 | psig_value 635 | ~loc 636 | (Ppxlib_jane.Ast_builder.Default.value_description 637 | ~loc 638 | ~name:{ td.ptype_name with txt = name } 639 | ~type_ 640 | ~modalities:(if portable then [ Ppxlib_jane.Modality "portable" ] else []) 641 | ~prim:[]) 642 | in 643 | List.concat 644 | [ [ definition ~f_type:hash_fold_type ~f_name:hash_fold_ ] 645 | ; (if monomorphic then [ definition ~f_type:hash_type ~f_name:hash_ ] else []) 646 | ])) 647 | ;; 648 | 649 | let sig_type_decl ~loc ~path (rec_flag, tds) ~portable = 650 | match 651 | mk_named_sig 652 | ~loc 653 | ~sg_name:"Ppx_hash_lib.Hashable.S" 654 | ~handle_polymorphic_variant:true 655 | tds 656 | with 657 | | Some include_info -> 658 | [ Ppxlib_jane.Ast_builder.Default.psig_include 659 | ~loc 660 | ~modalities: 661 | (if portable then [ Loc.make ~loc (Ppxlib_jane.Modality "portable") ] else []) 662 | include_info 663 | ] 664 | | None -> mk_sig ~loc ~path (rec_flag, tds) ~portable 665 | ;; 666 | 667 | let hash_fold_core_type ty = hash_fold_of_ty_fun ~type_constraint:true ty 668 | 669 | let hash_core_type ty = 670 | hash_of_ty_fun ~special_case_simple_types:true ~type_constraint:true ty 671 | ;; 672 | -------------------------------------------------------------------------------- /expander/ppx_hash_expander.mli: -------------------------------------------------------------------------------- 1 | open Ppxlib 2 | 3 | val hash_fold_type : loc:Location.t -> core_type -> core_type 4 | val hash_fold_core_type : core_type -> expression 5 | 6 | (** [hash_core_type ty] is an expression of type [Hash.state -> ty -> Hash.state] *) 7 | val hash_type : loc:Location.t -> core_type -> core_type 8 | 9 | val hash_core_type : core_type -> expression 10 | val str_attributes : Attribute.packed list 11 | 12 | val str_type_decl 13 | : loc:Location.t 14 | -> path:string 15 | -> rec_flag * type_declaration list 16 | -> portable:bool 17 | -> structure 18 | 19 | val sig_type_decl 20 | : loc:Location.t 21 | -> path:string 22 | -> rec_flag * type_declaration list 23 | -> portable:bool 24 | -> signature_item list 25 | -------------------------------------------------------------------------------- /hash_types/README.org: -------------------------------------------------------------------------------- 1 | #+TITLE: Base_internalhash_types 2 | 3 | This micro-library allows hash states, seeds, and values to be type-equal 4 | between ~Base~ and ~Base_boot~. 5 | -------------------------------------------------------------------------------- /hash_types/src/base_internalhash_types.ml: -------------------------------------------------------------------------------- 1 | type seed = int 2 | type hash_value = int 3 | 4 | (* The main non-64-bit systems we are planning to support are JavaScript and WebAssembly. 5 | js_of_ocaml is treated as a 32-bit platform with 32-bit native integers. All of the 6 | external [caml_hash*] implementations are all written assuming these 32-bit native ints 7 | and so using Int32 on the [Non_immediate] path allows for this change to be a runtime 8 | noop for native and JavaScript. If we were to make this something like [Int63], which 9 | intuitively feels like a more consistent choice, we would have to change the 10 | implementations of all of these pretty foundational hash functions in both js and wasm. *) 11 | include Sys.Immediate64.Make (Int) (Int32) 12 | 13 | type state = t 14 | 15 | let compare_state (x : state) (y : state) : hash_value = 16 | match repr with 17 | | Immediate -> Int.compare x y 18 | | Non_immediate -> Int32.compare x y 19 | ;; 20 | 21 | let state_to_string (x : state) : string = 22 | match repr with 23 | | Immediate -> Int.to_string x 24 | | Non_immediate -> Int32.to_string x 25 | ;; 26 | 27 | let create_seeded (x : int) : state = 28 | match repr with 29 | | Immediate -> x 30 | | Non_immediate -> Int32.of_int x 31 | ;; 32 | 33 | external fold_int64 34 | : state 35 | -> (int64[@unboxed]) 36 | -> state 37 | = "Base_internalhash_fold_int64" "Base_internalhash_fold_int64_unboxed" 38 | [@@noalloc] 39 | 40 | external fold_int : state -> int -> state = "Base_internalhash_fold_int" [@@noalloc] 41 | 42 | external fold_float 43 | : state 44 | -> (float[@unboxed]) 45 | -> state 46 | = "Base_internalhash_fold_float" "Base_internalhash_fold_float_unboxed" 47 | [@@noalloc] 48 | 49 | external fold_string : state -> string -> state = "Base_internalhash_fold_string" 50 | [@@noalloc] 51 | 52 | external get_hash_value : state -> hash_value = "Base_internalhash_get_hash_value" 53 | [@@noalloc] 54 | -------------------------------------------------------------------------------- /hash_types/src/dune: -------------------------------------------------------------------------------- 1 | (library 2 | (foreign_stubs 3 | (language c) 4 | (names internalhash_stubs)) 5 | (name base_internalhash_types) 6 | (public_name ppx_hash.base_internalhash_types) 7 | (libraries) 8 | (preprocess no_preprocessing) 9 | (js_of_ocaml 10 | (javascript_files runtime.js)) 11 | (install_c_headers internalhash) 12 | (wasm_of_ocaml 13 | (javascript_files runtime.js) 14 | (wasm_files runtime.wat))) 15 | -------------------------------------------------------------------------------- /hash_types/src/internalhash.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | CAMLexport uint32_t Base_internalhash_fold_blob(uint32_t h, mlsize_t len, uint8_t *s); 4 | -------------------------------------------------------------------------------- /hash_types/src/internalhash_stubs.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "internalhash.h" 5 | 6 | /* This pretends that the state of the OCaml internal hash function, which is an 7 | int32, is actually stored in an OCaml int. */ 8 | 9 | CAMLprim value Base_internalhash_fold_int32(value st, value i) { 10 | return Val_long(caml_hash_mix_uint32(Long_val(st), Int32_val(i))); 11 | } 12 | 13 | CAMLprim value Base_internalhash_fold_nativeint(value st, value i) { 14 | return Val_long(caml_hash_mix_intnat(Long_val(st), Nativeint_val(i))); 15 | } 16 | 17 | CAMLprim value Base_internalhash_fold_int64(value st, value i) { 18 | return Val_long(caml_hash_mix_int64(Long_val(st), Int64_val(i))); 19 | } 20 | 21 | CAMLprim value Base_internalhash_fold_int64_unboxed(value st, int64_t i) { 22 | return Val_long(caml_hash_mix_int64(Long_val(st), i)); 23 | } 24 | 25 | CAMLprim value Base_internalhash_fold_int(value st, value i) { 26 | return Val_long(caml_hash_mix_intnat(Long_val(st), Long_val(i))); 27 | } 28 | 29 | CAMLprim value Base_internalhash_fold_float(value st, value i) { 30 | return Val_long(caml_hash_mix_double(Long_val(st), Double_val(i))); 31 | } 32 | 33 | CAMLprim value Base_internalhash_fold_float_unboxed(value st, double i) { 34 | return Val_long(caml_hash_mix_double(Long_val(st), i)); 35 | } 36 | 37 | /* This code mimics what hashtbl.hash does in OCaml's hash.c */ 38 | #define FINAL_MIX(h) \ 39 | h ^= h >> 16; \ 40 | h *= 0x85ebca6b; \ 41 | h ^= h >> 13; \ 42 | h *= 0xc2b2ae35; \ 43 | h ^= h >> 16; 44 | 45 | CAMLprim value Base_internalhash_get_hash_value(value st) { 46 | uint32_t h = Int_val(st); 47 | FINAL_MIX(h); 48 | return Val_int(h & 0x3FFFFFFFU); /*30 bits*/ 49 | } 50 | 51 | /* Macros copied from hash.c in ocaml distribution */ 52 | #define ROTL32(x, n) ((x) << n | (x) >> (32 - n)) 53 | 54 | #define MIX(h, d) \ 55 | d *= 0xcc9e2d51; \ 56 | d = ROTL32(d, 15); \ 57 | d *= 0x1b873593; \ 58 | h ^= d; \ 59 | h = ROTL32(h, 13); \ 60 | h = h * 5 + 0xe6546b64; 61 | 62 | /* Version of [caml_hash_mix_string] from hash.c - adapted for arbitrary char arrays */ 63 | CAMLexport uint32_t Base_internalhash_fold_blob(uint32_t h, mlsize_t len, uint8_t *s) { 64 | mlsize_t i; 65 | uint32_t w; 66 | 67 | /* Mix by 32-bit blocks (little-endian) */ 68 | for (i = 0; i + 4 <= len; i += 4) { 69 | #ifdef ARCH_BIG_ENDIAN 70 | w = s[i] | (s[i + 1] << 8) | (s[i + 2] << 16) | (s[i + 3] << 24); 71 | #else 72 | w = *((uint32_t *)&(s[i])); 73 | #endif 74 | MIX(h, w); 75 | } 76 | /* Finish with up to 3 bytes */ 77 | w = 0; 78 | switch (len & 3) { 79 | case 3: 80 | w = s[i + 2] << 16; /* fallthrough */ 81 | case 2: 82 | w |= s[i + 1] << 8; /* fallthrough */ 83 | case 1: 84 | w |= s[i]; 85 | MIX(h, w); 86 | default: /*skip*/; /* len & 3 == 0, no extra bytes, do nothing */ 87 | } 88 | /* Finally, mix in the length. Ignore the upper 32 bits, generally 0. */ 89 | h ^= (uint32_t)len; 90 | return h; 91 | } 92 | 93 | CAMLprim value Base_internalhash_fold_string(value st, value v_str) { 94 | uint32_t h = Long_val(st); 95 | mlsize_t len = caml_string_length(v_str); 96 | uint8_t *s = (uint8_t *)String_val(v_str); 97 | 98 | h = Base_internalhash_fold_blob(h, len, s); 99 | 100 | return Val_long(h); 101 | } 102 | -------------------------------------------------------------------------------- /hash_types/src/runtime.js: -------------------------------------------------------------------------------- 1 | //Provides: Base_internalhash_fold_int64 2 | //Requires: caml_hash_mix_int64 3 | var Base_internalhash_fold_int64 = caml_hash_mix_int64; 4 | //Provides: Base_internalhash_fold_int 5 | //Requires: caml_hash_mix_int 6 | var Base_internalhash_fold_int = caml_hash_mix_int; 7 | //Provides: Base_internalhash_fold_float 8 | //Requires: caml_hash_mix_float 9 | var Base_internalhash_fold_float = caml_hash_mix_float; 10 | //Provides: Base_internalhash_fold_string 11 | //Requires: caml_hash_mix_string 12 | var Base_internalhash_fold_string = caml_hash_mix_string; 13 | //Provides: Base_internalhash_get_hash_value 14 | //Requires: caml_hash_mix_final 15 | function Base_internalhash_get_hash_value(seed) { 16 | var h = caml_hash_mix_final(seed); 17 | return h & 0x3FFFFFFF; 18 | } 19 | -------------------------------------------------------------------------------- /hash_types/src/runtime.wat: -------------------------------------------------------------------------------- 1 | (module 2 | (import "env" "Int32_val" (func $Int32_val (param (ref eq)) (result i32))) 3 | (import "env" "Int64_val" (func $Int64_val (param (ref eq)) (result i64))) 4 | (import "env" "Double_val" 5 | (func $Double_val (param (ref eq)) (result f64))) 6 | (import "env" "caml_copy_int32" 7 | (func $caml_copy_int32 (param $i i32) (result (ref eq)))) 8 | (import "env" "caml_hash_mix_int" 9 | (func $caml_hash_mix_int (param i32) (param i32) (result i32))) 10 | (import "env" "caml_hash_mix_int64" 11 | (func $caml_hash_mix_int64 (param i32) (param i64) (result i32))) 12 | (import "env" "caml_hash_mix_double" 13 | (func $caml_hash_mix_double (param i32) (param f64) (result i32))) 14 | (import "env" "caml_hash_mix_string" 15 | (func $caml_hash_mix_string 16 | (param i32) (param (ref $string)) (result i32))) 17 | (import "env" "caml_hash_mix_final" 18 | (func $caml_hash_mix_final (param i32) (result i32))) 19 | 20 | (type $string (array (mut i8))) 21 | (type $compare 22 | (func (param (ref eq)) (param (ref eq)) (param i32) (result i32))) 23 | (type $hash 24 | (func (param (ref eq)) (result i32))) 25 | (type $fixed_length (struct (field $bsize_32 i32) (field $bsize_64 i32))) 26 | (type $serialize 27 | (func (param (ref eq)) (param (ref eq)) (result i32) (result i32))) 28 | (type $deserialize (func (param (ref eq)) (result (ref eq)) (result i32))) 29 | (type $dup (func (param (ref eq)) (result (ref eq)))) 30 | (type $custom_operations 31 | (struct 32 | (field $id (ref $string)) 33 | (field $compare (ref null $compare)) 34 | (field $compare_ext (ref null $compare)) 35 | (field $hash (ref null $hash)) 36 | (field $fixed_length (ref null $fixed_length)) 37 | (field $serialize (ref null $serialize)) 38 | (field $deserialize (ref null $deserialize)) 39 | (field $dup (ref null $dup)))) 40 | (type $custom (sub (struct (field (ref $custom_operations))))) 41 | (type $int32 42 | (sub final $custom (struct (field (ref $custom_operations)) (field i32)))) 43 | 44 | (func (export "Base_internalhash_fold_int64") 45 | (param $st (ref eq)) (param $i (ref eq)) (result (ref eq)) 46 | (call $caml_copy_int32 47 | (call $caml_hash_mix_int64 48 | (call $Int32_val (local.get $st)) 49 | (call $Int64_val (local.get $i))))) 50 | 51 | (func (export "Base_internalhash_fold_int") 52 | (param $st (ref eq)) (param $i (ref eq)) (result (ref eq)) 53 | (call $caml_copy_int32 54 | (call $caml_hash_mix_int 55 | (call $Int32_val (local.get $st)) 56 | (i31.get_s (ref.cast (ref i31) (local.get $i)))))) 57 | 58 | (func (export "Base_internalhash_fold_float") 59 | (param $st (ref eq)) (param $f (ref eq)) (result (ref eq)) 60 | (call $caml_copy_int32 61 | (call $caml_hash_mix_double 62 | (call $Int32_val (local.get $st)) 63 | (call $Double_val (local.get $f))))) 64 | 65 | (func (export "Base_internalhash_fold_string") 66 | (param $st (ref eq)) (param $s (ref eq)) (result (ref eq)) 67 | (call $caml_copy_int32 68 | (call $caml_hash_mix_string 69 | (call $Int32_val (local.get $st)) 70 | (ref.cast (ref $string) (local.get $s))))) 71 | 72 | (func (export "Base_internalhash_get_hash_value") 73 | (param $st (ref eq)) (result (ref eq)) 74 | (ref.i31 75 | (i32.and 76 | (call $caml_hash_mix_final (call $Int32_val (local.get $st))) 77 | (i32.const 0x3FFFFFFF)))) 78 | ) 79 | -------------------------------------------------------------------------------- /hash_types/test/base_internalhash_types_test.ml: -------------------------------------------------------------------------------- 1 | module Import = Import 2 | module Test_immediate = Test_immediate 3 | -------------------------------------------------------------------------------- /hash_types/test/dune: -------------------------------------------------------------------------------- 1 | (library 2 | (name base_internalhash_types_test) 3 | (libraries base expect_test_helpers_core stdio) 4 | (preprocess 5 | (pps ppx_jane))) 6 | -------------------------------------------------------------------------------- /hash_types/test/import.ml: -------------------------------------------------------------------------------- 1 | include Stdio 2 | include Expect_test_helpers_core 3 | -------------------------------------------------------------------------------- /hash_types/test/test_immediate.ml: -------------------------------------------------------------------------------- 1 | open! Base 2 | open! Import 3 | 4 | let%expect_test "[Base.Hash.state] is still immediate" = 5 | require_no_allocation (fun () -> ignore (Sys.opaque_identity (Base.Hash.create ()))); 6 | [%expect {| |}] 7 | ;; 8 | 9 | let%expect_test _ = 10 | print_s 11 | [%sexp (Stdlib.Obj.is_int (Stdlib.Obj.repr (Base.Hash.create ~seed:1 ())) : bool)]; 12 | [%expect {| true |}] 13 | ;; 14 | -------------------------------------------------------------------------------- /hash_types/test/test_immediate.mli: -------------------------------------------------------------------------------- 1 | (*_ This signature is deliberately empty. *) 2 | -------------------------------------------------------------------------------- /ppx_hash.opam: -------------------------------------------------------------------------------- 1 | opam-version: "2.0" 2 | maintainer: "Jane Street developers" 3 | authors: ["Jane Street Group, LLC"] 4 | homepage: "https://github.com/janestreet/ppx_hash" 5 | bug-reports: "https://github.com/janestreet/ppx_hash/issues" 6 | dev-repo: "git+https://github.com/janestreet/ppx_hash.git" 7 | doc: "https://ocaml.janestreet.com/ocaml-core/latest/doc/ppx_hash/index.html" 8 | license: "MIT" 9 | build: [ 10 | ["dune" "build" "-p" name "-j" jobs] 11 | ] 12 | depends: [ 13 | "ocaml" {>= "5.1.0"} 14 | "ppx_compare" 15 | "ppx_sexp_conv" 16 | "ppxlib_jane" 17 | "dune" {>= "3.17.0"} 18 | "ocaml-compiler-libs" {>= "v0.11.0"} 19 | "ppxlib" {>= "0.33.0" & < "0.36.0"} 20 | ] 21 | available: arch != "arm32" & arch != "x86_32" 22 | synopsis: "A ppx rewriter that generates hash functions from type expressions and definitions" 23 | description: " 24 | Part of the Jane Street's PPX rewriters collection. 25 | " 26 | -------------------------------------------------------------------------------- /runtime/siphash/dune: -------------------------------------------------------------------------------- 1 | (library 2 | (foreign_stubs 3 | (language c) 4 | (names siphash)) 5 | (name siphash_lib) 6 | (libraries base) 7 | (preprocess no_preprocessing)) 8 | -------------------------------------------------------------------------------- /runtime/siphash/siphash.c: -------------------------------------------------------------------------------- 1 | /* This file has been modified to be used as one of our hash-folding algorithm. 2 | The reference implementation of siphash is kept in siphash.c.txt */ 3 | /* 4 | SipHash reference C implementation 5 | 6 | Copyright (c) 2012-2014 Jean-Philippe Aumasson 7 | 8 | Copyright (c) 2012-2014 Daniel J. Bernstein 9 | 10 | To the extent possible under law, the author(s) have dedicated all copyright 11 | and related and neighboring rights to this software to the public domain 12 | worldwide. This software is distributed without any warranty. 13 | 14 | You should have received a copy of the CC0 Public Domain Dedication along 15 | with 16 | this software. If not, see 17 | . 18 | 19 | */ 20 | 21 | #include 22 | #include 23 | #include 24 | 25 | /* default: SipHash-2-4 */ 26 | #define cROUNDS 1 27 | #define dROUNDS 3 28 | 29 | #define ROTL(x, b) (uint64_t)(((x) << (b)) | ((x) >> (64 - (b)))) 30 | 31 | #define U32TO8_LE(p, v) \ 32 | (p)[0] = (uint8_t)((v)); \ 33 | (p)[1] = (uint8_t)((v) >> 8); \ 34 | (p)[2] = (uint8_t)((v) >> 16); \ 35 | (p)[3] = (uint8_t)((v) >> 24); 36 | 37 | #define U64TO8_LE(p, v) \ 38 | U32TO8_LE((p), (uint32_t)((v))); \ 39 | U32TO8_LE((p) + 4, (uint32_t)((v) >> 32)); 40 | 41 | #define U8TO64_LE(p) \ 42 | (((uint64_t)((p)[0])) | ((uint64_t)((p)[1]) << 8) | \ 43 | ((uint64_t)((p)[2]) << 16) | ((uint64_t)((p)[3]) << 24) | \ 44 | ((uint64_t)((p)[4]) << 32) | ((uint64_t)((p)[5]) << 40) | \ 45 | ((uint64_t)((p)[6]) << 48) | ((uint64_t)((p)[7]) << 56)) 46 | 47 | #define SIPROUND(h) \ 48 | do { \ 49 | h->v0 += h->v1; \ 50 | h->v1 = ROTL(h->v1, 13); \ 51 | h->v1 ^= h->v0; \ 52 | h->v0 = ROTL(h->v0, 32); \ 53 | h->v2 += h->v3; \ 54 | h->v3 = ROTL(h->v3, 16); \ 55 | h->v3 ^= h->v2; \ 56 | h->v0 += h->v3; \ 57 | h->v3 = ROTL(h->v3, 21); \ 58 | h->v3 ^= h->v0; \ 59 | h->v2 += h->v1; \ 60 | h->v1 = ROTL(h->v1, 17); \ 61 | h->v1 ^= h->v2; \ 62 | h->v2 = ROTL(h->v2, 32); \ 63 | } while (0) 64 | 65 | #define TRACE(h) \ 66 | do { \ 67 | printf(" h->v0 %08x %08x\n", (uint32_t)(h->v0 >> 32), \ 68 | (uint32_t)h->v0); \ 69 | printf(" h->v1 %08x %08x\n", (uint32_t)(h->v1 >> 32), \ 70 | (uint32_t)h->v1); \ 71 | printf(" h->v2 %08x %08x\n", (uint32_t)(h->v2 >> 32), \ 72 | (uint32_t)h->v2); \ 73 | printf(" h->v3 %08x %08x\n", (uint32_t)(h->v3 >> 32), \ 74 | (uint32_t)h->v3); \ 75 | } while (0) 76 | 77 | /* The code above this line is mostly a copy and paste from siphash reference 78 | implementation. The code below has been substantially modified. */ 79 | 80 | struct hash_state 81 | { 82 | uint64_t v0, v1, v2, v3; 83 | }; 84 | 85 | /* internal */ 86 | void siphash_fold_uint64(value state, uint64_t i) 87 | { 88 | struct hash_state * h = (struct hash_state *) state; 89 | unsigned round; 90 | h->v3 ^= i; 91 | for (round = 0; round < cROUNDS; ++round) 92 | SIPROUND(h); 93 | h->v0 ^= i; 94 | } 95 | 96 | CAMLprim value siphash_fold_int64(value st, value i) 97 | { 98 | siphash_fold_uint64(st, Int64_val(i)); 99 | return st; 100 | } 101 | 102 | CAMLprim value siphash_fold_int(value st, value i) 103 | { 104 | siphash_fold_uint64(st, Long_val(i)); 105 | return st; 106 | } 107 | 108 | /* The code has been 'borrowed' from byterun/hash.c in ocaml */ 109 | CAMLexport uint64_t caml_hash_normalize_double_to_int64(double d) 110 | { 111 | union { 112 | double d; 113 | uint64_t i64; 114 | #if defined(ARCH_BIG_ENDIAN) || (defined(__arm__) && !defined(__ARM_EABI__)) 115 | struct { uint32_t h; uint32_t l; } i; 116 | #else 117 | struct { uint32_t l; uint32_t h; } i; 118 | #endif 119 | } u; 120 | uint32_t h, l; 121 | /* Convert to two 32-bit halves */ 122 | u.d = d; 123 | h = u.i.h; l = u.i.l; 124 | /* Normalize NaNs */ 125 | if ((h & 0x7FF00000) == 0x7FF00000 && (l | (h & 0xFFFFF)) != 0) { 126 | h = 0x7FF00000; 127 | l = 0x00000001; 128 | } 129 | /* Normalize -0 into +0 */ 130 | else if (h == 0x80000000 && l == 0) { 131 | h = 0; 132 | } 133 | u.i.h = h; 134 | u.i.l = l; 135 | return u.i64; 136 | } 137 | 138 | CAMLprim value siphash_fold_float(value st, value i) 139 | { 140 | uint64_t x = caml_hash_normalize_double_to_int64(Double_val(i)); 141 | siphash_fold_uint64(st, x); 142 | return st; 143 | } 144 | 145 | CAMLprim value siphash_fold_string(value st, value s) 146 | { 147 | const mlsize_t len = caml_string_length(s); 148 | const int left = len & 7; 149 | unsigned char * in; 150 | mlsize_t i; 151 | uint64_t w; 152 | /* The length must be mixed in before the elements to avoid a violation of the rule described by Perfect_hash. */ 153 | siphash_fold_uint64(st, ((uint64_t)len)); 154 | /* Mix by 64-bit blocks (little-endian) */ 155 | for (i = 0; i + 8 <= len; i += 8) { 156 | w = U8TO64_LE(Bp_val(s)+i); 157 | siphash_fold_uint64(st, w); 158 | } 159 | in = (uint8_t*)Bp_val(s) + i; 160 | w = ((uint64_t)len) << 56; 161 | switch (left) { 162 | case 7: 163 | w |= ((uint64_t)in[6]) << 48; 164 | /* fall through */ 165 | case 6: 166 | w |= ((uint64_t)in[5]) << 40; 167 | /* fall through */ 168 | case 5: 169 | w |= ((uint64_t)in[4]) << 32; 170 | /* fall through */ 171 | case 4: 172 | w |= ((uint64_t)in[3]) << 24; 173 | /* fall through */ 174 | case 3: 175 | w |= ((uint64_t)in[2]) << 16; 176 | /* fall through */ 177 | case 2: 178 | w |= ((uint64_t)in[1]) << 8; 179 | /* fall through */ 180 | case 1: 181 | w |= ((uint64_t)in[0]); 182 | break; 183 | case 0: 184 | break; 185 | } 186 | siphash_fold_uint64(st,w); 187 | return st; 188 | } 189 | 190 | CAMLprim value siphash_alloc () 191 | { 192 | return caml_alloc_small(sizeof(struct hash_state) / sizeof(value), Abstract_tag); 193 | } 194 | 195 | CAMLprim value siphash_reset (value st, value key) 196 | { 197 | struct hash_state * h = (struct hash_state *) (Op_val(st)); 198 | char buffer[16]; 199 | uint64_t k0, k1; 200 | unsigned i; 201 | unsigned key_len = caml_string_length(key); 202 | /* initialize the buffer */ 203 | memset(buffer, 0, 16); 204 | /* copy the first 16 chars of the key to the buffer */ 205 | for (i = 0; i < (key_len > 16 ? 16 : key_len); i++) 206 | buffer[i] = Byte_u(key,i); 207 | /* initialize k0 and k1 */ 208 | k0 = U8TO64_LE(buffer); 209 | k1 = U8TO64_LE(buffer + 8); 210 | /* Those are verbatim from siphash reference implementation.*/ 211 | /* "somepseudorandomlygeneratedbytes" */ 212 | h->v0 = 0x736f6d6570736575ULL ^ k0; 213 | h->v1 = 0x646f72616e646f6dULL ^ k1; 214 | h->v2 = 0x6c7967656e657261ULL ^ k0; 215 | h->v3 = 0x7465646279746573ULL ^ k1; 216 | /* If we switch to DOUBLE, we need to bring back the rest of siphash init 217 | code. */ 218 | return st; 219 | } 220 | 221 | /* This function destroy (that is, mixes) the content of the hash state. That 222 | means that it is not possible to stop in the middle of a hash, and ask for 223 | the current hash value, and continue to hash the rest of a structure. */ 224 | CAMLprim value siphash_get_hash_value(value st) 225 | { 226 | struct hash_state * h = (struct hash_state *) st; 227 | uint64_t b; 228 | unsigned i; 229 | /* This is the final mix step of the reference implementation of siphash, in 230 | the "non-DOUBLE" case (that is, where we get a single int64 out of the hash 231 | state). */ 232 | h->v2 ^= 0xff; 233 | for (i = 0; i < dROUNDS; ++i) 234 | SIPROUND(h); 235 | b = h->v0 ^ h->v1 ^ h->v2 ^ h->v3; 236 | /* We lose one bit of precision here. */ 237 | return Val_long(b); 238 | } 239 | 240 | CAMLprim value siphash_blit_hash_to_bytes(value st, value bytes) 241 | { 242 | struct hash_state * h = (struct hash_state *) st; 243 | uint8_t *str = (uint8_t *) (Bp_val(bytes)); 244 | U64TO8_LE(str + 0, h -> v0); 245 | U64TO8_LE(str + 8, h -> v1); 246 | U64TO8_LE(str + 16, h -> v2); 247 | U64TO8_LE(str + 24, h -> v3); 248 | return Val_long(0); 249 | } 250 | -------------------------------------------------------------------------------- /runtime/siphash/siphash.c.txt: -------------------------------------------------------------------------------- 1 | /* 2 | SipHash reference C implementation 3 | Copyright (c) 2012-2014 Jean-Philippe Aumasson 4 | 5 | Copyright (c) 2012-2014 Daniel J. Bernstein 6 | To the extent possible under law, the author(s) have dedicated all copyright 7 | and related and neighboring rights to this software to the public domain 8 | worldwide. This software is distributed without any warranty. 9 | You should have received a copy of the CC0 Public Domain Dedication along 10 | with 11 | this software. If not, see 12 | . 13 | */ 14 | #include 15 | #include 16 | #include 17 | 18 | /* default: SipHash-2-4 */ 19 | #define cROUNDS 2 20 | #define dROUNDS 4 21 | 22 | #define ROTL(x, b) (uint64_t)(((x) << (b)) | ((x) >> (64 - (b)))) 23 | 24 | #define U32TO8_LE(p, v) \ 25 | (p)[0] = (uint8_t)((v)); \ 26 | (p)[1] = (uint8_t)((v) >> 8); \ 27 | (p)[2] = (uint8_t)((v) >> 16); \ 28 | (p)[3] = (uint8_t)((v) >> 24); 29 | 30 | #define U64TO8_LE(p, v) \ 31 | U32TO8_LE((p), (uint32_t)((v))); \ 32 | U32TO8_LE((p) + 4, (uint32_t)((v) >> 32)); 33 | 34 | #define U8TO64_LE(p) \ 35 | (((uint64_t)((p)[0])) | ((uint64_t)((p)[1]) << 8) | \ 36 | ((uint64_t)((p)[2]) << 16) | ((uint64_t)((p)[3]) << 24) | \ 37 | ((uint64_t)((p)[4]) << 32) | ((uint64_t)((p)[5]) << 40) | \ 38 | ((uint64_t)((p)[6]) << 48) | ((uint64_t)((p)[7]) << 56)) 39 | 40 | #define SIPROUND \ 41 | do { \ 42 | v0 += v1; \ 43 | v1 = ROTL(v1, 13); \ 44 | v1 ^= v0; \ 45 | v0 = ROTL(v0, 32); \ 46 | v2 += v3; \ 47 | v3 = ROTL(v3, 16); \ 48 | v3 ^= v2; \ 49 | v0 += v3; \ 50 | v3 = ROTL(v3, 21); \ 51 | v3 ^= v0; \ 52 | v2 += v1; \ 53 | v1 = ROTL(v1, 17); \ 54 | v1 ^= v2; \ 55 | v2 = ROTL(v2, 32); \ 56 | } while (0) 57 | 58 | #ifdef DEBUG 59 | #define TRACE \ 60 | do { \ 61 | printf("(%3d) v0 %08x %08x\n", (int)inlen, (uint32_t)(v0 >> 32), \ 62 | (uint32_t)v0); \ 63 | printf("(%3d) v1 %08x %08x\n", (int)inlen, (uint32_t)(v1 >> 32), \ 64 | (uint32_t)v1); \ 65 | printf("(%3d) v2 %08x %08x\n", (int)inlen, (uint32_t)(v2 >> 32), \ 66 | (uint32_t)v2); \ 67 | printf("(%3d) v3 %08x %08x\n", (int)inlen, (uint32_t)(v3 >> 32), \ 68 | (uint32_t)v3); \ 69 | } while (0) 70 | #else 71 | #define TRACE 72 | #endif 73 | 74 | int siphash(uint8_t *out, const uint8_t *in, uint64_t inlen, const uint8_t *k) { 75 | /* "somepseudorandomlygeneratedbytes" */ 76 | uint64_t v0 = 0x736f6d6570736575ULL; 77 | uint64_t v1 = 0x646f72616e646f6dULL; 78 | uint64_t v2 = 0x6c7967656e657261ULL; 79 | uint64_t v3 = 0x7465646279746573ULL; 80 | uint64_t b; 81 | uint64_t k0 = U8TO64_LE(k); 82 | uint64_t k1 = U8TO64_LE(k + 8); 83 | uint64_t m; 84 | int i; 85 | const uint8_t *end = in + inlen - (inlen % sizeof(uint64_t)); 86 | const int left = inlen & 7; 87 | b = ((uint64_t)inlen) << 56; 88 | v3 ^= k1; 89 | v2 ^= k0; 90 | v1 ^= k1; 91 | v0 ^= k0; 92 | 93 | #ifdef DOUBLE 94 | v1 ^= 0xee; 95 | #endif 96 | 97 | for (; in != end; in += 8) { 98 | m = U8TO64_LE(in); 99 | v3 ^= m; 100 | 101 | TRACE; 102 | for (i = 0; i < cROUNDS; ++i) 103 | SIPROUND; 104 | 105 | v0 ^= m; 106 | } 107 | 108 | switch (left) { 109 | case 7: 110 | b |= ((uint64_t)in[6]) << 48; 111 | case 6: 112 | b |= ((uint64_t)in[5]) << 40; 113 | case 5: 114 | b |= ((uint64_t)in[4]) << 32; 115 | case 4: 116 | b |= ((uint64_t)in[3]) << 24; 117 | case 3: 118 | b |= ((uint64_t)in[2]) << 16; 119 | case 2: 120 | b |= ((uint64_t)in[1]) << 8; 121 | case 1: 122 | b |= ((uint64_t)in[0]); 123 | break; 124 | case 0: 125 | break; 126 | } 127 | 128 | v3 ^= b; 129 | 130 | TRACE; 131 | for (i = 0; i < cROUNDS; ++i) 132 | SIPROUND; 133 | 134 | v0 ^= b; 135 | 136 | #ifndef DOUBLE 137 | v2 ^= 0xff; 138 | #else 139 | v2 ^= 0xee; 140 | #endif 141 | 142 | TRACE; 143 | for (i = 0; i < dROUNDS; ++i) 144 | SIPROUND; 145 | 146 | b = v0 ^ v1 ^ v2 ^ v3; 147 | U64TO8_LE(out, b); 148 | 149 | #ifdef DOUBLE 150 | v1 ^= 0xdd; 151 | 152 | TRACE; 153 | for (i = 0; i < dROUNDS; ++i) 154 | SIPROUND; 155 | 156 | b = v0 ^ v1 ^ v2 ^ v3; 157 | U64TO8_LE(out + 8, b); 158 | #endif 159 | 160 | return 0; 161 | } 162 | -------------------------------------------------------------------------------- /runtime/siphash/siphash.ml: -------------------------------------------------------------------------------- 1 | let description = "siphash" 2 | 3 | type state 4 | type hash_value = int 5 | type seed = string 6 | 7 | external alloc : unit -> state = "siphash_alloc" 8 | external reset_to : state -> seed -> state = "siphash_reset" [@@noalloc] 9 | external fold_int64 : state -> int64 -> state = "siphash_fold_int64" [@@noalloc] 10 | external fold_int : state -> int -> state = "siphash_fold_int" [@@noalloc] 11 | external fold_float : state -> float -> state = "siphash_fold_float" [@@noalloc] 12 | external fold_string : state -> string -> state = "siphash_fold_string" [@@noalloc] 13 | external get_hash_value : state -> hash_value = "siphash_get_hash_value" [@@noalloc] 14 | 15 | let default_seed = "the_default_seed" 16 | let reset ?(seed = default_seed) t = reset_to t seed 17 | 18 | module For_tests = struct 19 | external blit_state_to_bytes : state -> bytes -> unit = "siphash_blit_hash_to_bytes" 20 | [@@noalloc] 21 | 22 | let state_to_string state = 23 | let bytes = Bytes.create (8 * 4) in 24 | blit_state_to_bytes state bytes; 25 | Bytes.to_string bytes 26 | ;; 27 | 28 | let compare_state a b = compare (state_to_string a) (state_to_string b) 29 | end 30 | -------------------------------------------------------------------------------- /runtime/siphash/siphash.mli: -------------------------------------------------------------------------------- 1 | include Base.Hash.S with type seed = string and type hash_value = int 2 | 3 | (** [Siphash] uses first 16 chars of the [seed] string to initialize/reset the hash state, 4 | padding it to the right with zero bytes if it's too short. The rest of the string is 5 | discarded. *) 6 | 7 | external alloc : unit -> state = "siphash_alloc" 8 | external reset_to : state -> seed -> state = "siphash_reset" [@@noalloc] 9 | external fold_int64 : state -> int64 -> state = "siphash_fold_int64" [@@noalloc] 10 | external fold_int : state -> int -> state = "siphash_fold_int" [@@noalloc] 11 | external fold_float : state -> float -> state = "siphash_fold_float" [@@noalloc] 12 | external fold_string : state -> string -> state = "siphash_fold_string" [@@noalloc] 13 | external get_hash_value : state -> int = "siphash_get_hash_value" [@@noalloc] 14 | -------------------------------------------------------------------------------- /runtime/siphash/siphash_lib.ml: -------------------------------------------------------------------------------- 1 | module Siphash = Siphash 2 | -------------------------------------------------------------------------------- /runtime/src/dune: -------------------------------------------------------------------------------- 1 | (library 2 | (foreign_stubs 3 | (language c) 4 | (names hash_stubs)) 5 | (name ppx_hash_lib) 6 | (public_name ppx_hash.runtime-lib) 7 | (libraries base_internalhash_types) 8 | (preprocess 9 | (pps ppx_compare ppx_sexp_conv))) 10 | -------------------------------------------------------------------------------- /runtime/src/hash.ml: -------------------------------------------------------------------------------- 1 | (* 2 | This is the interface to the runtime support for [ppx_hash]. 3 | 4 | The [ppx_hash] syntax extension supports: [@@deriving hash] and [%hash_fold: TYPE] and 5 | [%hash: TYPE] 6 | 7 | For type [t] a function [hash_fold_t] of type [Hash.state -> t -> Hash.state] is 8 | generated. 9 | 10 | The generated [hash_fold_] function is compositional, following the structure of the 11 | type; allowing user overrides at every level. This is in contrast to ocaml's builtin 12 | polymorphic hashing [Hashtbl.hash] which ignores user overrides. 13 | 14 | The generator also provides a direct hash-function [hash] (named [hash_] when != 15 | "t") of type: [t -> Hash.hash_value]. 16 | 17 | The folding hash function can be accessed as [%hash_fold: TYPE] 18 | The direct hash function can be accessed as [%hash: TYPE] 19 | *) 20 | 21 | include Hash_intf 22 | 23 | (** Builtin folding-style hash functions, abstracted over [Hash_intf.S] *) 24 | module Folding (Hash : Hash_intf.S) : 25 | Hash_intf.Builtin_intf 26 | with type state = Hash.state 27 | and type hash_value = Hash.hash_value = struct 28 | type state = Hash.state 29 | type hash_value = Hash.hash_value 30 | type 'a folder = state -> 'a -> state 31 | 32 | let hash_fold_unit s () = s 33 | let hash_fold_int = Hash.fold_int 34 | let hash_fold_int64 = Hash.fold_int64 35 | let hash_fold_float = Hash.fold_float 36 | let hash_fold_string = Hash.fold_string 37 | let as_int f s x = hash_fold_int s (f x) 38 | 39 | (* This ignores the sign bit on 32-bit architectures, but it's unlikely to lead to 40 | frequent collisions (min_value colliding with 0 is the most likely one). *) 41 | let hash_fold_int32 = as_int Stdlib.Int32.to_int 42 | let hash_fold_char = as_int Char.code 43 | 44 | let hash_fold_bool = 45 | as_int (function 46 | | true -> 1 47 | | false -> 0) 48 | ;; 49 | 50 | let hash_fold_nativeint s x = hash_fold_int64 s (Stdlib.Int64.of_nativeint x) 51 | 52 | let hash_fold_option hash_fold_elem s = function 53 | | None -> hash_fold_int s 0 54 | | Some x -> hash_fold_elem (hash_fold_int s 1) x 55 | ;; 56 | 57 | let rec hash_fold_list_body hash_fold_elem s list = 58 | match list with 59 | | [] -> s 60 | | x :: xs -> hash_fold_list_body hash_fold_elem (hash_fold_elem s x) xs 61 | ;; 62 | 63 | let hash_fold_list hash_fold_elem s list = 64 | (* The [length] of the list must be incorporated into the hash-state so values of 65 | types such as [unit list] - ([], [()], [();()],..) are hashed differently. *) 66 | (* The [length] must come before the elements to avoid a violation of the rule 67 | enforced by Perfect_hash. *) 68 | let s = hash_fold_int s (List.length list) in 69 | let s = hash_fold_list_body hash_fold_elem s list in 70 | s 71 | ;; 72 | 73 | let hash_fold_lazy_t hash_fold_elem s x = hash_fold_elem s (Stdlib.Lazy.force x) 74 | let hash_fold_ref_frozen hash_fold_elem s x = hash_fold_elem s !x 75 | 76 | let rec hash_fold_array_frozen_i hash_fold_elem s array i = 77 | if i = Array.length array 78 | then s 79 | else ( 80 | let e = Array.unsafe_get array i in 81 | hash_fold_array_frozen_i hash_fold_elem (hash_fold_elem s e) array (i + 1)) 82 | ;; 83 | 84 | let hash_fold_array_frozen hash_fold_elem s array = 85 | hash_fold_array_frozen_i 86 | (* [length] must be incorporated for arrays, as it is for lists. See comment above *) 87 | hash_fold_elem 88 | (hash_fold_int s (Array.length array)) 89 | array 90 | 0 91 | ;; 92 | 93 | (* the duplication here is because we think 94 | ocaml can't eliminate indirect function calls otherwise. *) 95 | let hash_nativeint x = 96 | Hash.get_hash_value (hash_fold_nativeint (Hash.reset (Hash.alloc ())) x) 97 | ;; 98 | 99 | let hash_int64 x = Hash.get_hash_value (hash_fold_int64 (Hash.reset (Hash.alloc ())) x) 100 | let hash_int32 x = Hash.get_hash_value (hash_fold_int32 (Hash.reset (Hash.alloc ())) x) 101 | let hash_char x = Hash.get_hash_value (hash_fold_char (Hash.reset (Hash.alloc ())) x) 102 | let hash_int x = Hash.get_hash_value (hash_fold_int (Hash.reset (Hash.alloc ())) x) 103 | let hash_bool x = Hash.get_hash_value (hash_fold_bool (Hash.reset (Hash.alloc ())) x) 104 | 105 | let hash_string x = 106 | Hash.get_hash_value (hash_fold_string (Hash.reset (Hash.alloc ())) x) 107 | ;; 108 | 109 | let hash_float x = Hash.get_hash_value (hash_fold_float (Hash.reset (Hash.alloc ())) x) 110 | let hash_unit x = Hash.get_hash_value (hash_fold_unit (Hash.reset (Hash.alloc ())) x) 111 | end 112 | 113 | module F (Hash : Hash_intf.S) : 114 | Hash_intf.Full 115 | with type hash_value = Hash.hash_value 116 | and type state = Hash.state 117 | and type seed = Hash.seed = struct 118 | include Hash 119 | 120 | type 'a folder = state -> 'a -> state 121 | 122 | let create ?seed () = reset ?seed (alloc ()) 123 | let of_fold hash_fold_t t = get_hash_value (hash_fold_t (create ()) t) 124 | 125 | module Builtin = Folding (Hash) 126 | 127 | let run ?seed folder x = 128 | Hash.get_hash_value (folder (Hash.reset ?seed (Hash.alloc ())) x) 129 | ;; 130 | end 131 | 132 | module Internalhash : sig 133 | include 134 | Hash_intf.S 135 | with type state = Base_internalhash_types.state 136 | (* We give a concrete type for [state], albeit only partially exposed (see 137 | Base_internalhash_types), so that it unifies with the same type in [Base_boot], 138 | and to allow optimizations for the immediate type. *) 139 | and type seed = Base_internalhash_types.seed 140 | and type hash_value = Base_internalhash_types.hash_value 141 | 142 | external fold_int64 143 | : state 144 | -> (int64[@unboxed]) 145 | -> state 146 | = "Base_internalhash_fold_int64" "Base_internalhash_fold_int64_unboxed" 147 | [@@noalloc] 148 | 149 | external fold_int : state -> int -> state = "Base_internalhash_fold_int" [@@noalloc] 150 | 151 | external fold_float 152 | : state 153 | -> (float[@unboxed]) 154 | -> state 155 | = "Base_internalhash_fold_float" "Base_internalhash_fold_float_unboxed" 156 | [@@noalloc] 157 | 158 | external fold_string : state -> string -> state = "Base_internalhash_fold_string" 159 | [@@noalloc] 160 | 161 | external get_hash_value : state -> hash_value = "Base_internalhash_get_hash_value" 162 | [@@noalloc] 163 | end = struct 164 | let description = "internalhash" 165 | 166 | include Base_internalhash_types 167 | 168 | let alloc () = create_seeded 0 169 | let reset ?(seed = 0) _t = create_seeded seed 170 | 171 | module For_tests = struct 172 | let compare_state = Base_internalhash_types.compare_state 173 | let state_to_string = Base_internalhash_types.state_to_string 174 | end 175 | end 176 | 177 | module T = struct 178 | include Internalhash 179 | 180 | type 'a folder = state -> 'a -> state 181 | 182 | let create ?seed () = reset ?seed (alloc ()) 183 | let run ?seed folder x = get_hash_value (folder (reset ?seed (alloc ())) x) 184 | let of_fold hash_fold_t t = get_hash_value (hash_fold_t (create ()) t) 185 | 186 | module Builtin = struct 187 | module Folding = struct 188 | include Folding (Internalhash) 189 | 190 | (* Hack to make zero-alloc see that these functions are zero-alloc, even 191 | in BUILD_PROFILE=dev. dev builds do not inline functor applications 192 | like [Folding (Internalhash)] above. 193 | *) 194 | let hash_fold_string = Internalhash.fold_string 195 | let hash_fold_float = Internalhash.fold_float 196 | let hash_fold_int = Internalhash.fold_int 197 | let hash_fold_int64 = Internalhash.fold_int64 198 | end 199 | 200 | include Folding 201 | 202 | (* [Folding] provides some default implementations for the [hash_*] functions below, 203 | but they are inefficient for some use-cases because of the use of the [hash_fold] 204 | functions. At this point, the [hash_value] type has been fixed to [int], so this 205 | module can provide specialized implementations. *) 206 | 207 | let hash_char = Char.code 208 | 209 | (* This hash was chosen from here: https://gist.github.com/badboy/6267743 210 | 211 | It attempts to fulfill the primary goals of a non-cryptographic hash function: 212 | 213 | - a bit change in the input should change ~1/2 of the output bits 214 | - the output should be uniformly distributed across the output range 215 | - inputs that are close to each other shouldn't lead to outputs that are close to 216 | each other. 217 | - all bits of the input are used in generating the output 218 | 219 | In our case we also want it to be fast, non-allocating, and inlinable. *) 220 | let[@inline always] hash_int (t : int) = 221 | let t = lnot t + (t lsl 21) in 222 | let t = t lxor (t lsr 24) in 223 | let t = t + (t lsl 3) + (t lsl 8) in 224 | let t = t lxor (t lsr 14) in 225 | let t = t + (t lsl 2) + (t lsl 4) in 226 | let t = t lxor (t lsr 28) in 227 | t + (t lsl 31) 228 | ;; 229 | 230 | let hash_bool x = if x then 1 else 0 231 | 232 | external hash_float 233 | : (float[@unboxed]) 234 | -> int 235 | = "Base_hash_double" "Base_hash_double_unboxed" 236 | [@@noalloc] 237 | 238 | let hash_unit () = 0 239 | end 240 | end 241 | 242 | include T 243 | -------------------------------------------------------------------------------- /runtime/src/hash.mli: -------------------------------------------------------------------------------- 1 | include Hash_intf.Hash (** @inline *) 2 | -------------------------------------------------------------------------------- /runtime/src/hash_intf.ml: -------------------------------------------------------------------------------- 1 | (** [Hash_intf.S] is the interface which a hash function must support. 2 | 3 | The functions of [Hash_intf.S] are only allowed to be used in specific sequence: 4 | 5 | [alloc], [reset ?seed], [fold_..*], [get_hash_value], [reset ?seed], [fold_..*], 6 | [get_hash_value], ... 7 | 8 | (The optional [seed]s passed to each reset may differ.) 9 | 10 | The chain of applications from [reset] to [get_hash_value] must be done in a 11 | single-threaded manner (you can't use [fold_*] on a state that's been used before). 12 | More precisely, [alloc ()] creates a new family of states. All functions that take [t] 13 | and produce [t] return a new state from the same family. 14 | 15 | At any point in time, at most one state in the family is "valid". The other states are 16 | "invalid". 17 | 18 | - The state returned by [alloc] is invalid. 19 | - The state returned by [reset] is valid (all of the other states become invalid). 20 | - The [fold_*] family of functions requires a valid state and produces a valid state 21 | (thereby making the input state invalid). 22 | - [get_hash_value] requires a valid state and makes it invalid. 23 | 24 | These requirements are currently formally encoded in the [Check_initialized_correctly] 25 | module in bench/bench.ml. *) 26 | 27 | module type S = sig 28 | (** Name of the hash-function, e.g., "internalhash", "siphash" *) 29 | val description : string 30 | 31 | (** [state] is the internal hash-state used by the hash function. *) 32 | type state 33 | 34 | (** [fold_ state v] incorporates a value [v] of type into the hash-state, 35 | returning a modified hash-state. Implementations of the [fold_] functions may 36 | mutate the [state] argument in place, and return a reference to it. Implementations 37 | of the fold_ functions should not allocate. *) 38 | val fold_int : state -> int -> state 39 | 40 | val fold_int64 : state -> int64 -> state 41 | val fold_float : state -> float -> state 42 | val fold_string : state -> string -> state 43 | 44 | (** [seed] is the type used to seed the initial hash-state. *) 45 | type seed 46 | 47 | (** [alloc ()] returns a fresh uninitialized hash-state. May allocate. *) 48 | val alloc : unit -> state 49 | 50 | (** [reset ?seed state] initializes/resets a hash-state with the given [seed], or else a 51 | default-seed. Argument [state] may be mutated. Should not allocate. *) 52 | val reset : ?seed:seed -> state -> state 53 | 54 | (** [hash_value] The type of hash values, returned by [get_hash_value]. *) 55 | type hash_value 56 | 57 | (** [get_hash_value] extracts a hash-value from the hash-state. *) 58 | val get_hash_value : state -> hash_value 59 | 60 | module For_tests : sig 61 | val compare_state : state -> state -> int 62 | val state_to_string : state -> string 63 | end 64 | end 65 | 66 | module type Builtin_hash_fold_intf = sig 67 | type state 68 | type 'a folder = state -> 'a -> state 69 | 70 | val hash_fold_nativeint : nativeint folder 71 | val hash_fold_int64 : int64 folder 72 | val hash_fold_int32 : int32 folder 73 | val hash_fold_char : char folder 74 | val hash_fold_int : int folder 75 | val hash_fold_bool : bool folder 76 | val hash_fold_string : string folder 77 | val hash_fold_float : float folder 78 | val hash_fold_unit : unit folder 79 | val hash_fold_option : 'a folder -> 'a option folder 80 | val hash_fold_list : 'a folder -> 'a list folder 81 | val hash_fold_lazy_t : 'a folder -> 'a lazy_t folder 82 | 83 | (** Hash support for [array] and [ref] is provided, but is potentially DANGEROUS, since 84 | it incorporates the current contents of the array/ref into the hash value. Because 85 | of this we add a [_frozen] suffix to the function name. 86 | 87 | Hash support for [string] is also potentially DANGEROUS, but strings are mutated 88 | less often, so we don't append [_frozen] to it. 89 | 90 | Also note that we don't support [bytes]. *) 91 | val hash_fold_ref_frozen : 'a folder -> 'a ref folder 92 | 93 | val hash_fold_array_frozen : 'a folder -> 'a array folder 94 | end 95 | 96 | module type Builtin_hash_intf = sig 97 | type hash_value 98 | 99 | val hash_nativeint : nativeint -> hash_value 100 | val hash_int64 : int64 -> hash_value 101 | val hash_int32 : int32 -> hash_value 102 | val hash_char : char -> hash_value 103 | val hash_int : int -> hash_value 104 | val hash_bool : bool -> hash_value 105 | val hash_string : string -> hash_value 106 | val hash_float : float -> hash_value 107 | val hash_unit : unit -> hash_value 108 | end 109 | 110 | module type Builtin_intf = sig 111 | include Builtin_hash_fold_intf 112 | include Builtin_hash_intf 113 | end 114 | 115 | module type Full = sig 116 | include S (** @inline *) 117 | 118 | type 'a folder = state -> 'a -> state 119 | 120 | (** [create ?seed ()] is a convenience. Equivalent to [reset ?seed (alloc ())]. *) 121 | val create : ?seed:seed -> unit -> state 122 | 123 | (** [of_fold fold] constructs a standard hash function from an existing fold function. *) 124 | val of_fold : (state -> 'a -> state) -> 'a -> hash_value 125 | 126 | module Builtin : 127 | Builtin_intf 128 | with type state := state 129 | and type 'a folder := 'a folder 130 | and type hash_value := hash_value 131 | 132 | (** [run ?seed folder x] runs [folder] on [x] in a newly allocated hash-state, 133 | initialized using optional [seed] or a default-seed. 134 | 135 | The following identity exists: [run [%hash_fold: T]] == [[%hash: T]] 136 | 137 | [run] can be used if we wish to run a hash-folder with a non-default seed. *) 138 | val run : ?seed:seed -> 'a folder -> 'a -> hash_value 139 | end 140 | 141 | module type Hash = sig 142 | module type Full = Full 143 | module type S = S 144 | 145 | module F (Hash : S) : 146 | Full 147 | with type hash_value = Hash.hash_value 148 | and type state = Hash.state 149 | and type seed = Hash.seed 150 | 151 | (** The code of [ppx_hash] is agnostic to the choice of hash algorithm that is used. 152 | However, it is not currently possible to mix various choices of hash algorithms in a 153 | given code base. 154 | 155 | We experimented with: 156 | (a) custom hash algorithms implemented in OCaml and 157 | (b) in C; 158 | (c) OCaml's internal hash function (which is a custom version of Murmur3, 159 | implemented in C); 160 | (d) siphash, a modern hash function implemented in C. 161 | 162 | Our findings were as follows: 163 | 164 | - Implementing our own custom hash algorithms in OCaml and C yielded very little 165 | performance improvement over the (c) proposal, without providing the benefit of 166 | being a peer-reviewed, widely used hash function. 167 | 168 | - Siphash (a modern hash function with an internal state of 32 bytes) has a worse 169 | performance profile than (a,b,c) above (hashing takes more time). Since its 170 | internal state is bigger than an OCaml immediate value, one must either manage 171 | allocation of such state explicitly, or paying the cost of allocation each time a 172 | hash is computed. While being a supposedly good hash function (with good hash 173 | quality), this quality was not translated in measurable improvements in our macro 174 | benchmarks. (Also, based on the data available at the time of writing, it's 175 | unclear that other hash algorithms in this class would be more than marginally 176 | faster.) 177 | 178 | - By contrast, using the internal combinators of OCaml hash function means that we 179 | do not allocate (the internal state of this hash function is 32 bit) and have the 180 | same quality and performance as Hashtbl.hash. 181 | 182 | Hence, we are here making the choice of using this Internalhash (that is, Murmur3, 183 | the OCaml hash algorithm as of 4.03) as our hash algorithm. It means that the state 184 | of the hash function does not need to be preallocated, and makes for simpler use in 185 | hash tables and other structures. *) 186 | 187 | (** @open *) 188 | include 189 | Full 190 | with type state = Base_internalhash_types.state 191 | and type seed = Base_internalhash_types.seed 192 | and type hash_value = Base_internalhash_types.hash_value 193 | end 194 | -------------------------------------------------------------------------------- /runtime/src/hash_stubs.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | /* Final mix and return from the hash.c implementation from INRIA */ 6 | #define FINAL_MIX_AND_RETURN(h) \ 7 | h ^= h >> 16; \ 8 | h *= 0x85ebca6b; \ 9 | h ^= h >> 13; \ 10 | h *= 0xc2b2ae35; \ 11 | h ^= h >> 16; \ 12 | return Val_int(h & 0x3FFFFFFFU); 13 | 14 | CAMLprim value Base_hash_string(value string) { 15 | uint32_t h; 16 | h = caml_hash_mix_string(0, string); 17 | FINAL_MIX_AND_RETURN(h) 18 | } 19 | 20 | CAMLprim value Base_hash_double(value d) { 21 | uint32_t h; 22 | h = caml_hash_mix_double(0, Double_val(d)); 23 | FINAL_MIX_AND_RETURN(h); 24 | } 25 | 26 | CAMLprim value Base_hash_double_unboxed(double d) { 27 | uint32_t h; 28 | h = caml_hash_mix_double(0, d); 29 | FINAL_MIX_AND_RETURN(h); 30 | } 31 | -------------------------------------------------------------------------------- /runtime/src/ppx_hash_lib.ml: -------------------------------------------------------------------------------- 1 | module Std = struct 2 | module Hash = Hash 3 | end 4 | 5 | (** The functor is exposed to make it possible to use ppx_hash with alternative hash types 6 | without having to duplicate these definitions. *) 7 | module F (Types : sig 8 | type hash_state 9 | type hash_value 10 | end) = 11 | struct 12 | open Types 13 | 14 | type 'a hash_fold = hash_state -> 'a -> hash_state 15 | 16 | module Hashable = struct 17 | module type S_any = sig 18 | type t 19 | 20 | val hash_fold_t : t hash_fold 21 | val hash : t -> hash_value 22 | end 23 | 24 | module type S = sig 25 | type t 26 | 27 | include S_any with type t := t 28 | end 29 | 30 | module type S1 = sig 31 | type 'a t 32 | 33 | val hash_fold_t : 'a hash_fold -> 'a t hash_fold 34 | end 35 | 36 | module type S2 = sig 37 | type ('a, 'b) t 38 | 39 | val hash_fold_t : 'a hash_fold -> 'b hash_fold -> ('a, 'b) t hash_fold 40 | end 41 | 42 | module type S3 = sig 43 | type ('a, 'b, 'c) t 44 | 45 | val hash_fold_t 46 | : 'a hash_fold 47 | -> 'b hash_fold 48 | -> 'c hash_fold 49 | -> ('a, 'b, 'c) t hash_fold 50 | end 51 | end 52 | end 53 | 54 | include F (struct 55 | type nonrec hash_state = Std.Hash.state 56 | type nonrec hash_value = Std.Hash.hash_value 57 | end) 58 | -------------------------------------------------------------------------------- /src/dune: -------------------------------------------------------------------------------- 1 | (library 2 | (name ppx_hash) 3 | (public_name ppx_hash) 4 | (kind ppx_deriver) 5 | (libraries ppxlib ppx_hash_expander) 6 | (preprocess no_preprocessing)) 7 | -------------------------------------------------------------------------------- /src/ppx_hash.ml: -------------------------------------------------------------------------------- 1 | open Ppxlib 2 | 3 | let type_extension name f = 4 | Context_free.Rule.extension 5 | (Extension.declare 6 | name 7 | Core_type 8 | Ast_pattern.(ptyp __) 9 | (fun ~loc ~path:_ ty -> f ~loc ty)) 10 | ;; 11 | 12 | let () = 13 | let name = "hash_fold" in 14 | Deriving.ignore 15 | (Deriving.add name ~extension:(fun ~loc:_ ~path:_ ty -> 16 | Ppx_hash_expander.hash_fold_core_type ty)); 17 | Driver.register_transformation 18 | name 19 | ~rules:[ type_extension name Ppx_hash_expander.hash_fold_type ] 20 | ;; 21 | 22 | let () = 23 | let name = "hash" in 24 | Deriving.ignore 25 | (Deriving.add 26 | name 27 | ~str_type_decl: 28 | (Deriving.Generator.make 29 | Deriving.Args.(empty +> flag "portable") 30 | (fun ~loc ~path tds portable -> 31 | Ppx_hash_expander.str_type_decl ~loc ~path tds ~portable) 32 | ~attributes:Ppx_hash_expander.str_attributes) 33 | ~sig_type_decl: 34 | (Deriving.Generator.make 35 | Deriving.Args.(empty +> flag "portable") 36 | (fun ~loc ~path tds portable -> 37 | Ppx_hash_expander.sig_type_decl ~loc ~path tds ~portable)) 38 | ~extension:(fun ~loc:_ ~path:_ ty -> Ppx_hash_expander.hash_core_type ty)); 39 | Driver.register_transformation 40 | name 41 | ~rules:[ type_extension name Ppx_hash_expander.hash_type ] 42 | ;; 43 | -------------------------------------------------------------------------------- /src/ppx_hash.mli: -------------------------------------------------------------------------------- 1 | (*_ This signature is deliberately empty. *) 2 | --------------------------------------------------------------------------------