├── .gitignore ├── .ocamlinit ├── INSTALL.md ├── LICENSE ├── META ├── Makefile ├── README.md ├── _tags ├── opam └── src ├── lib ├── api.mli ├── functors.ml ├── int_utf8_character.ml ├── int_utf8_character.mli ├── list_of.ml ├── list_of.mli ├── native_bytes.ml ├── native_bytes.mli ├── native_character.ml ├── native_character.mli ├── native_string.ml ├── native_string.mli ├── of_mutable.ml ├── of_mutable.mli ├── sosa.mlpack ├── sosa_pervasives.ml └── sosa_utilities.ml └── test └── main.ml /.gitignore: -------------------------------------------------------------------------------- 1 | _build/ 2 | *.cmp 3 | sosa_tests 4 | doc 5 | -------------------------------------------------------------------------------- /.ocamlinit: -------------------------------------------------------------------------------- 1 | 2 | #directory "_build/src/lib" ;; 3 | #load_rec "sosa.cma" ;; 4 | -------------------------------------------------------------------------------- /INSTALL.md: -------------------------------------------------------------------------------- 1 | BUILD INSTRUCTIONS 2 | ================== 3 | 4 | Build 5 | ----- 6 | 7 | This package does not have dependencies, just do: 8 | 9 | make build 10 | 11 | Install 12 | ------- 13 | 14 | You can install the library with `ocamlfind`: 15 | 16 | make install 17 | 18 | You may want to control the destination with the variable 19 | `OCAMLFIND_DESTDIR`. 20 | 21 | Uninstall 22 | --------- 23 | 24 | You can uninstall the library (also `ocamlfind`): 25 | 26 | make uninstall 27 | 28 | 29 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2013, Sebastien Mondet 2 | 3 | Permission to use, copy, modify, and/or distribute this software for any 4 | purpose with or without fee is hereby granted, provided that the above 5 | copyright notice and this permission notice appear in all copies. THE 6 | SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS 7 | ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED 8 | WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR 9 | BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR 10 | ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, 11 | WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, 12 | ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS 13 | SOFTWARE. 14 | -------------------------------------------------------------------------------- /META: -------------------------------------------------------------------------------- 1 | version = "0.0.1" 2 | description = "Sane OCaml String API" 3 | archive(byte) = "sosa.cma" 4 | archive(byte,plugin) = "sosa.cma" 5 | archive(native) = "sosa.cmxa" 6 | archive(native,plugin) = "sosa.cmxs" 7 | exists_if = "sosa.cma" 8 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | BISECT_DIR=$(shell ocamlfind query bisect) 2 | 3 | .PHONY: clean build install uninstall default doc 4 | 5 | default: 6 | @echo "available targets:" 7 | @echo " build compile sosa" 8 | @echo " test compile sosa_tests, a test suite" 9 | @echo " coverage compile sosa_test with instrumented bisect_ppx coverage" 10 | @echo " cov_report create a coverage report from the latest coverage run" 11 | @echo " clean remove build directory" 12 | @echo " install install via ocamlfind" 13 | @echo " uninstall unintall via ocamlfind" 14 | @echo " merlinize create .merlin file" 15 | @echo " doc create documentation" 16 | 17 | build: 18 | ocamlbuild -use-ocamlfind -cflag -safe-string -I src/lib sosa.cmx sosa.cma sosa.cmxa sosa.cmxs 19 | 20 | test: 21 | ocamlbuild -use-ocamlfind -package nonstd -package unix -package bigarray -cflag -safe-string -I src/lib -I src/test main.native && \ 22 | rm -f main.native && \ 23 | mv _build/src/test/main.native sosa_tests 24 | 25 | coverage: 26 | ocamlbuild -use-ocamlfind -pkgs nonstd,unix,bigarray,bisect_ppx -cflag -safe-string -I src/lib -I src/test main.native && \ 27 | rm -f main.native && \ 28 | mv _build/src/test/main.native sosa_tests 29 | 30 | clean: 31 | ocamlbuild -clean && \ 32 | rm -f main.native 33 | 34 | install: 35 | ocamlfind install sosa META \ 36 | _build/src/lib/sosa.cmi \ 37 | _build/src/lib/sosa.cmo \ 38 | _build/src/lib/sosa.cmx \ 39 | _build/src/lib/sosa.a \ 40 | _build/src/lib/sosa.o \ 41 | _build/src/lib/sosa.cma \ 42 | _build/src/lib/sosa.cmxa \ 43 | _build/src/lib/sosa.cmxs 44 | 45 | uninstall: 46 | ocamlfind remove sosa 47 | 48 | merlinize: 49 | echo 'S .' > .merlin 50 | echo 'B _build' >> .merlin 51 | echo 'S .' > src/test/.merlin 52 | echo 'B ../../_build' >> src/test/.merlin 53 | echo 'PKG nonstd' >> src/test/.merlin 54 | 55 | doc: 56 | cp src/lib/sosa.mlpack sosa.odocl && \ 57 | ocamlbuild -I src/lib/ -docflags -charset,UTF-8,-keep-code,-colorize-code,-html,-short-functors sosa.docdir/index.html && \ 58 | rm sosa.docdir && \ 59 | ln -s _build/sosa.docdir/ doc && \ 60 | rm sosa.odocl 61 | 62 | cov_report: 63 | cd _build && \ 64 | bisect-ppx-report -html ../report_dir ../$(shell ls -t bisect*.out | head -1) && \ 65 | cd - 66 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | Sane OCaml String API 3 | ===================== 4 | 5 | 6 | This library is a set of APIs defined with module types, and a set of 7 | modules and functors implementing one or more of those interfaces. 8 | 9 | The APIs define what a *character* and a *string of characters* should 10 | be. 11 | 12 | See the [INSTALL](INSTALL.md) file for build instructions and/or the 13 | [documentation](http://www.hammerlab.org/docs/sosa/master/index.html) website. 14 | 15 | The library is “packed” in the `Sosa` toplevel module name. 16 | 17 | Module Types (APIs) 18 | ------------------- 19 | 20 | We have, in the sub-module `Api`: 21 | 22 | - `BASIC_CHARACTER`: characters of any length. 23 | - `NATIVE_CONVERSIONS`: functions to transform from/to native OCaml 24 | strings. 25 | - `BASIC_STRING`: immutable strings of (potentially abstract) 26 | characters: 27 | - includes `NATIVE_CONVERSIONS`, 28 | - contains a functor to provide a thread agnostic `output` function: 29 | `Make_output`: `OUTPUT_MODEL` → `sig val output: ... end`. 30 | - `UNSAFELY_MUTABLE`: mutability of some string implementations 31 | (“unsafe” meaning that they break immutability 32 | invariants/assumptions). 33 | - `MINIMALISTIC_MUTABLE_STRING`: abstract mutable string used as 34 | argument of the `Of_mutable` functor. 35 | 36 | Implementations 37 | --------------- 38 | 39 | ### Native OCaml Characters 40 | 41 | The `Native_character` module implements `BASIC_CHARACTER` with 42 | OCaml's `char` type. 43 | 44 | ### Native OCaml Strings 45 | 46 | The `Native_string` module implements `BASIC_STRING` with OCaml's `string` type 47 | considered immutable (and hence `Native_character`). 48 | 49 | ### Native Mutable OCaml Strings (Bytes) 50 | 51 | The `Native_bytes` module implements `BASIC_STRING` 52 | and `UNSAFELY_MUTABLE` with OCaml's `bytes` type. 53 | 54 | ### Lists Of Arbitrary Characters 55 | 56 | `List_of` is a functor: `BASIC_CHARACTER` → `BASIC_STRING`, i.e., it creates a 57 | string datastructure made of a list of characters. 58 | 59 | ### Build From Basic Mutable Data-structures 60 | 61 | The functor `Of_mutable` uses an implementation of 62 | `MINIMALISTIC_MUTABLE_STRING` to build a `BASIC_STRING`. 63 | 64 | ### Integer UTF-8 Characters 65 | 66 | The `Int_utf8_character` module implements `BASIC_CHARACTER` with 67 | OCaml integers (`int`) representing Utf8 characters (we force the 68 | handling of not more than 31 bits, even if [RFC 3629][RFC3629] 69 | restricts them to end at U+10FFFF, c.f. also 70 | [wikipedia][wikipedia:UTF-8]). Note that the function `is_whitespace` considers 71 | only ASCII whitespace (useful while writing parsers for example). 72 | 73 | Examples, Tests, and Benchmarks 74 | ------------------------------- 75 | 76 | See the file [`test/main.ml`](src/test/main.ml) for usage examples, the 77 | library is tested with: 78 | 79 | - native strings and characters, 80 | - lists of native characters (`List_of(Native_character)`), 81 | - lists of integers representing UTF-8 characters (`List_of(utf8-int array)`), 82 | - arrays of integers representing UTF-8 characters (`Of_mutable(utf8-int)`), 83 | - bigarrays of 8-bit integers (`Of_mutable(int8 Bigarray1.t)`). 84 | 85 | The tests depend on the [Nonstd](https://bitbucket.org/smondet/nonstd), 86 | `unix`, and `bigarray` libraries: 87 | 88 | make test 89 | ./sosa_tests 90 | 91 | and you may add the basic benchmarks to the process with: 92 | 93 | ./sosa_tests bench 94 | 95 | [wikipedia:UTF-8]: http://en.wikipedia.org/wiki/UTF-8 96 | [RFC3629]: http://tools.ietf.org/html/rfc3629 97 | 98 | -------------------------------------------------------------------------------- /_tags: -------------------------------------------------------------------------------- 1 | true: annot, bin_annot, principal 2 | and not : for-pack(Sosa) 3 | -------------------------------------------------------------------------------- /opam: -------------------------------------------------------------------------------- 1 | opam-version: "1.2" 2 | maintainer: "seb@mondet.org" 3 | homepage: "http://seb.mondet.org/software/sosa/index.html" 4 | bug-reports: "https://github.com/hammerlab/sosa/issues" 5 | dev-repo: "https://github.com/hammerlab/sosa.git" 6 | authors: [ 7 | "Sebastien Mondet " 8 | "Leonid Rozenberg " 9 | "Isaac Hodes " 10 | "Jeff Hammerbacher " 11 | ] 12 | available: [ ocaml-version >= "4.02.0" ] 13 | install: [ 14 | [ make "build" ] 15 | [ make "install" ] 16 | ] 17 | remove: [ 18 | [ make "uninstall"] 19 | ] 20 | depends: [ 21 | "ocamlfind" {build} 22 | "ocamlbuild" {build} 23 | ] 24 | -------------------------------------------------------------------------------- /src/lib/api.mli: -------------------------------------------------------------------------------- 1 | (** The module types that define Sosa's API. *) 2 | 3 | (* Document generation nuisances: 4 | - Unfortunately, some of the internal references ie, things like {!val:foo} 5 | turn out to look horrible because ocamldoc generates the full path 6 | 'Api.BASIC_CHARACTER.Foo' which distracts from the readability; so I've tried 7 | to avoid using them, and replaced them with specific links: {{!val:foo}foo}. 8 | *) 9 | 10 | type ('a, 'b) result = [ 11 | | `Ok of 'a 12 | | `Error of 'b 13 | ] 14 | (** The type [result] is a reusable version of the classical [Result.t] 15 | type. *) 16 | 17 | (** A monadic thread model (like [Lwt], [Async]) and an [output] 18 | function. *) 19 | module type OUTPUT_MODEL = sig 20 | 21 | type ('a, 'b, 'c) thread 22 | (** The type of the threads, the type parameters are there in case 23 | the user needs up to 3 of them. 24 | 25 | For instance, if implement with [Lwt], we will have [type ('a, 'b, 'c) 26 | thread = 'a Lwt.t], but with [Pvem.DEFERRED_RESULT]: [type ('a, 'b, 'c) 27 | thread = ('a, 'b) Deferred_result.t]. *) 28 | 29 | type ('a, 'b, 'c) channel 30 | (** The channel type, channels can also have up to 3 type-parameters. *) 31 | 32 | val return: 'a -> ('a, 'b, 'c) thread 33 | (** The monadic [return]. *) 34 | 35 | val bind: ('a, 'b, 'c) thread -> ('a -> ('d, 'b, 'c) thread) -> ('d, 'b, 'c) thread 36 | (** The monadic [bind]. *) 37 | 38 | val output: ('a, 'b, 'c) channel -> String.t -> (unit, 'e, 'f) thread 39 | (** The function to output a given native string to a channel. *) 40 | 41 | end (* OUTPUT_MODEL *) 42 | 43 | (** The minimal API implemented by characters. *) 44 | module type BASIC_CHARACTER = sig 45 | 46 | type t 47 | (** The type representing the character. *) 48 | 49 | val of_native_char: char -> t option 50 | (** Import a native [char], returns [None] if the character is not 51 | representable. *) 52 | 53 | val of_int: int -> t option 54 | (** Import an integer, returns [None] if there is no character for 55 | that value. *) 56 | 57 | val to_int: t -> int 58 | (** Returns the integer representation of the character. *) 59 | 60 | val size: t -> int 61 | (** Get the size of the character, the exact semantics are 62 | implementation-specific (c.f. {!write_to_native_bytes}) *) 63 | 64 | val write_to_native_bytes: t -> buf:Bytes.t -> index:int -> (int, [> `out_of_bounds]) result 65 | (** [write_to_native_bytes c ~buf ~index] serializes 66 | the character [c] at position [index] in the native bytes 67 | [buf] (writing [size c] units). Note, as with {!size} that the 68 | meaning of [index] is implementation dependent (can be the {i 69 | index-th} byte, the {i index-th} bit, etc.). *) 70 | 71 | val to_native_string: t -> String.t 72 | (** [to_native_string c] creates a string containing the 73 | serialization of the character [c] (if [size c] is not a 74 | multiple of 8, the end-padding is undefined). *) 75 | 76 | val read_from_native_string: buf:String.t -> index:int -> (t * int) option 77 | (** Read a character at a given [index] in a native string, returns 78 | [Some (c, s)], the character [c] and the number of units read [s], 79 | or [None] if there is no representable/valid character at that 80 | index. *) 81 | 82 | val to_string_hum: t -> String.t 83 | (** Convert the character to a human-readable native string (in the 84 | spirit of [sprintf "%s"]). *) 85 | 86 | val compare: t -> t -> int 87 | (** Comparison function (as expected by most common functors in the 88 | ecosystem). *) 89 | 90 | val is_whitespace: t -> bool 91 | (** Tell whether a character is considered whitespace. *) 92 | 93 | end (* BASIC_CHARACTER *) 94 | 95 | (** API definition of conversions from native OCaml strings to a 96 | given string type or vice-versa. *) 97 | module type NATIVE_CONVERSIONS = sig 98 | 99 | type t 100 | (** The string type. *) 101 | 102 | val of_native_string: string -> (t, [> `wrong_char_at of int ]) result 103 | (** Convert a native string to the current representation. 104 | [of_native_string] returns [`Error (`wrong_char_at index)] 105 | when the native string contains a character not representable 106 | with the type [character] at [index]. *) 107 | 108 | val of_native_substring: string -> offset:int -> length:int -> 109 | (t, [> `wrong_char_at of int | `out_of_bounds ]) result 110 | (** Convert a native string like [of_native_string] but take a 111 | subset of the string. *) 112 | 113 | val to_native_string: t -> string 114 | (** Serialize the string to a native string. *) 115 | 116 | end (* NATIVE_CONVERSIONS *) 117 | 118 | (** The minimal API implemented by string modules. *) 119 | module type BASIC_STRING = sig 120 | 121 | type character 122 | (** A string is composed of characters. *) 123 | 124 | type t 125 | (** The type of the string. *) 126 | 127 | val max_string_length : int option 128 | (** If the representation of strings is bounded 129 | (by a constant other than the process memory), the maximum 130 | length of a string is assumed to be this 131 | {i number of [character]s}. *) 132 | 133 | val empty: t 134 | (** A string of zero length. *) 135 | 136 | val is_empty: t -> bool 137 | (** Test whether a string is empty. *) 138 | 139 | val make: int -> character -> t 140 | (** [make size char] builds a new string of the passed [length] where the 141 | character at every position is [char], like [String.make]. 142 | 143 | The behavior of [make size] is undefined when 144 | [size] is [< 0] or [> {max_string_length}] (if it is [Some _]). 145 | Depending on the backend implementing the API, the function 146 | may raise an exception. *) 147 | 148 | val length: t -> int 149 | (** Get the length of the string (i.e. the number of characters). *) 150 | 151 | val of_character: character -> t 152 | (** Make a string with one character. *) 153 | 154 | val of_character_list: character list -> t 155 | (** Make a string out of a list of characters. This function may 156 | also raise an exception when the required length is larger than 157 | {!max_string_length} (depends on the backend implementation). *) 158 | 159 | val to_character_list: t -> character list 160 | (** Explode a string into a list of characters. *) 161 | 162 | val get: t -> index:int -> character option 163 | (** Get the n-th char, indexes are not necessarily bytes, they can 164 | be bits. [get] returns [None] when [index] is out of bounds. *) 165 | 166 | val set: t -> index:int -> v:character -> t option 167 | (** [set str ~index ~v] creates a new string equal to [t] with 168 | character [v] at position [index]. [set] returns [None] when 169 | [index] is out of bounds. *) 170 | 171 | val get_exn: t -> index:int -> character 172 | (** Like {{!val:get}get} but fail with an exception. 173 | 174 | @raise Invalid_argument when [index] is not in [\[0,length)] 175 | *) 176 | 177 | val set_exn: t -> index:int -> v:character -> t 178 | (** Like {{!val:set} set} but fail with an exception. 179 | 180 | @raise Invalid_argument when [index] is not in [\[0,length)] 181 | *) 182 | 183 | val concat: ?sep:t -> t list -> t 184 | (** The classical [concat] function. 185 | 186 | The function is subject to same limitations as 187 | {!of_character_list} regarding {!max_string_length}. *) 188 | 189 | (** By including {{!modtype:Api.NATIVE_CONVERSIONS} NATIVE_CONVERSIONS}, a 190 | basic string provides 191 | {{!val:Api.NATIVE_CONVERSIONS.of_native_string} of_native_string}, 192 | {{!val:Api.NATIVE_CONVERSIONS.of_native_substring} of_native_substring}, 193 | and {{!val:Api.NATIVE_CONVERSIONS.to_native_string} to_native_string}. 194 | *) 195 | include NATIVE_CONVERSIONS with type t := t 196 | 197 | val to_string_hum: t -> string 198 | (** Convert the string to a human-readable native string (à la 199 | [sprintf "%S"]). 200 | 201 | Returning an OCaml native [string], the function may raise an 202 | exception when the resulting [string] exceeds 203 | [Sys.max_string_length]. *) 204 | 205 | val fold: t -> init:'a -> f:('a -> character -> 'a) -> 'a 206 | (** The standard [fold] function, see [List.fold_left] for example. *) 207 | 208 | val foldi: t -> init:'a -> f:(int -> 'a -> character -> 'a) -> 'a 209 | (** Pass an accumulator over the string's characters and their ordinals, 210 | starting with the first; left most. *) 211 | 212 | val fold2_exn: t -> t -> init:'a -> f:('a -> character -> character -> 'a) -> 'a 213 | (** The standard [fold2] function, see [List.fold_left2] for example. Fails on 214 | [t]s of different length. *) 215 | 216 | val compare: t -> t -> int 217 | (** Comparison function (as expected by most common functors in the 218 | ecosystem). *) 219 | 220 | val sub: t -> index:int -> length:int -> t option 221 | (** Get the sub-string of size [length] at position [index]. If 222 | [length] is 0, [sub] returns [Some empty] regardless of the other 223 | parameters. *) 224 | 225 | val sub_exn: t -> index:int -> length:int -> t 226 | (** Like {{!val:sub}sub} but throw an exception instead of returning [None]. 227 | 228 | @raise Invalid_argument when [index] and [length] do not represent a 229 | valid substring. 230 | *) 231 | 232 | val slice: ?start:int -> ?finish:int -> t -> t option 233 | (** Create a sub-string from [start] to just before [finish] if all of the 234 | indices are in bounds. 235 | 236 | @param start defaults to [0], must be within [\[0,length)] 237 | @param finish default to [length], must be within [\[0,length)] 238 | *) 239 | 240 | val slice_exn: ?start:int -> ?finish:int -> t -> t 241 | (** Like {{!val:slice}slice} but throw an exception instead of returning [None] 242 | if the indices are out of bounds. 243 | 244 | @raise Invalid_argument when [start] or [finish] are not in their 245 | respective bounds. See [slice]. 246 | *) 247 | 248 | val is_prefix: t -> prefix:t -> bool 249 | (** Does [t] start with [prefix]? *) 250 | 251 | val is_suffix: t -> suffix:t -> bool 252 | (** Does [t] end with [suffix]? *) 253 | 254 | val chop_prefix_exn: t -> prefix:t -> t 255 | (** Return a copy of [t] with [prefix] removed from the beginning. 256 | 257 | @raise Invalid_argument if [t] does not start with [prefix]. 258 | *) 259 | 260 | val chop_prefix: t -> prefix:t -> t option 261 | (** Like {{!val:chop_prefix_exn}chop_prefix_exn} but return [None] instead of 262 | throwing an [Invalid_argument]. *) 263 | 264 | val chop_suffix_exn: t -> suffix:t -> t 265 | (** Return a copy of [t] with [suffix] removed from the end. 266 | 267 | @raise Invalid_argument if [t] does not end with [suffix]. 268 | *) 269 | 270 | val chop_suffix: t -> suffix:t -> t option 271 | (** Like {{!val:chop_suffix_exn}chop_suffix_exn} but return [None] instead of 272 | throwing an exception. *) 273 | 274 | val split_at: t -> index:int -> t * t 275 | (** Return a tuple where the first string is a prefix of the specified length 276 | and the second is the rest. 277 | 278 | If index is [=< 0] then the first element is empty and the string is 279 | returned in the second element, similarly if the index is [>= length t] 280 | then the first element is [t] and the second is [empty]. *) 281 | 282 | val take: t -> index:int -> t 283 | (** Just the first part of [split_at]. *) 284 | 285 | val drop: t -> index:int -> t 286 | (** Just the second part of [split_at]. *) 287 | 288 | val compare_substring: t * int * int -> t * int * int -> int 289 | (** Comparison function for substrings: use as [compare_substring 290 | (s1, index1, length1) (s2, index2, length2)]. 291 | 292 | Note that out-of-bounds accesses will {b not} be reported: for 293 | performance reasons, if the result can be decided with the 294 | smallest sub-string then [compare_substring] won't look 295 | further. 296 | 297 | However, if {{!val:compare_substring_strict}compare_substring_strict} 298 | returns [Some c] then [compare_substring] {i must} return [d] such as 299 | [c] = [d] or [c] × [d] > 0 (i.e. strictly same sign). 300 | 301 | In other words, if [sub a ~index:ia ~length:la] returns [Some suba] and 302 | [sub b ~index:ib ~length:lb] returns [Some subb], then 303 | [compare_substring (a, ia, la) (b, ib, lb)] will behave like 304 | [compare suba subb] (again, with the same sign). 305 | *) 306 | 307 | val compare_substring_strict: t * int * int -> t * int * int -> int option 308 | (** Like {{!val:compare_substring}compare_substring} but return [Some _] only 309 | when it is well defined (same validity criteria as {{!val:sub}sub}: if 310 | [length] is [0], [index] is irrelevant). 311 | 312 | Depending on the backend implementation, this function might be 313 | significantly slower than [compare_substring] (for example when 314 | calls to [length] are not {i O(1)}). *) 315 | 316 | val iter: t -> f:(character -> unit) -> unit 317 | (** Apply [f] on every character successively. *) 318 | 319 | val iteri: t -> f:(int -> character -> unit) -> unit 320 | (** Apply [f] on every character and its index. *) 321 | 322 | val iter_reverse: t -> f:(character -> unit) -> unit 323 | (** Apply [f] on every character successively in reverse order. *) 324 | 325 | val rev: t -> t 326 | (** Reverse the string. **) 327 | 328 | val map: t -> f:(character -> character) -> t 329 | (** Make a new string by applying [f] to all characters of the 330 | input. *) 331 | 332 | val mapi: t -> f:(int -> character -> character) -> t 333 | (** Make a new string by applying [f] to all characters and their indices. *) 334 | 335 | val map2_exn: t -> t -> f:(character -> character -> character) -> t 336 | (** Make a new string by applying [f] to all pairs of characters of the 337 | inputs. Fail if strings are not the same length. *) 338 | 339 | val for_all: t -> f:(character -> bool) -> bool 340 | (** Return [true] if-and-only-if [f] returns [true] on all characters. *) 341 | 342 | val exists: t -> f:(character -> bool) -> bool 343 | (** Return [true] if-and-only-if [f] returns [true] on at least one 344 | character. *) 345 | 346 | val take_while: t -> f:(character -> bool) -> t 347 | (** Take a prefix of the string until [f] returns [false]. *) 348 | 349 | val take_while_with_index: t -> f:(int -> character -> bool) -> t 350 | (** Like {{!val:take_while}take_while} but the function also takes the 351 | current index. *) 352 | 353 | val index_of_character: t -> ?from:int -> character -> int option 354 | (** Find the first occurrence of a character in the string (starting 355 | at position [from]). 356 | 357 | @param from default value is [0]. 358 | If [from] is negative, [0] will be used. 359 | If [from >= length t], [None] will be returned. 360 | *) 361 | 362 | val index_of_character_reverse: t -> ?from:int -> character -> int option 363 | (** Like {{!val:index_of_character}index_of_character} but start from the 364 | end of the string. 365 | 366 | @param from defaults to [length t - 1] (end of the string). 367 | If [from] is negative, [None] will be returned. 368 | If [from >= length t], [length t - 1] will be used. 369 | *) 370 | 371 | val index_of_string: ?from:int -> ?sub_index:int -> ?sub_length:int -> t -> 372 | sub:t -> int option 373 | (** Find the first occurrence of the substring [(sub, sub_index, 374 | sub_length)] in a given string, starting at [from]. 375 | 376 | @param from behaves like [from] in 377 | {{!val:index_of_character}index_of_character}. 378 | 379 | @param sub_index is constrained to [\[0, length sub)] 380 | @param sub_length is constrained to [\[0, length sub - sub_index)]. 381 | 382 | For example, if called with [~sub:"abc" ~sub_index:(-1) ~sub_length:4] 383 | then [sub_index] and [sub_length] will be constrained to [0] and [3], 384 | respectively. 385 | 386 | If called with [~sub:"abc" ~sub_index:1 ~sub_length:3] then [sub_index] 387 | and [sub_length] will be constrained to [1] and [2], respectively. 388 | 389 | Searching for an empty string (if [sub] is empty or it is constrained via 390 | [sub_index] or [sub_length]) from a valid position always succeeds at 391 | that position ({i ie} [from]). 392 | *) 393 | 394 | val index_of_string_reverse: ?from:int -> ?sub_index:int -> ?sub_length:int -> 395 | t -> sub:t -> int option 396 | (** Like {{!val:index_of_string}index_of_string} but start from the end of the 397 | string. 398 | 399 | @param from behaves like [from] in 400 | {{!val:index_of_character_reverse}index_of_character_reverse}. 401 | @param sub_index is constrained like 402 | {{!val:index_of_string}index_of_string}. 403 | @param sub_length is constrained like 404 | {{!val:index_of_string}index_of_string}. 405 | *) 406 | 407 | val find: ?from:int -> ?length:int -> t -> f:(character -> bool) -> int option 408 | (** Find the index of the first character [c] for which [f c] is [true]. 409 | 410 | One can restrict to the sub-string [(from, length)] (the default is to use 411 | the whole string, “out-of-bound” values are restricted to the bounds of 412 | the string). *) 413 | 414 | val find_reverse: ?from:int -> ?length:int -> t -> f:(character -> bool) -> 415 | int option 416 | (** Find the index of the last character [c] for which [f c] is [true]. 417 | 418 | One can restrict to the reverse sub-string [(from, length)] (the 419 | default is to use the whole string, “out-of-bound” values are restricted 420 | to the bounds of the string). *) 421 | 422 | val filter_map: ?from:int -> ?length:int -> t -> 423 | f:(character -> character option) -> t 424 | (** Create a new string with the characters for which [f c] returned [Some c]. 425 | 426 | One can restrict to the sub-string [(from, length)] (the default is to use 427 | the whole string, “out-of-bound” values are restricted to the bounds of 428 | the string). *) 429 | 430 | val filter: ?from:int -> ?length:int -> t -> f:(character -> bool) -> t 431 | (** Create a new string with the characters for which [f c] is true. 432 | 433 | One can restrict to the sub-string [(from, length)] (the default is to use 434 | the whole string, “out-of-bound” values are restricted 435 | to the bounds of the string). *) 436 | 437 | val split: t -> on:[ `Character of character | `String of t ] -> t list 438 | (** Split the string using [on] as separator. 439 | 440 | Splitting the empty string returns [\[\]]. 441 | 442 | Splitting with [~on:(`String empty)] explodes the [t] into a list of 443 | one-character strings. *) 444 | 445 | val strip: ?on:[`Both | `Left | `Right] -> ?whitespace:(character -> bool) -> 446 | t -> t 447 | (** Remove any whitespace characters at the beginning and/or the end of the 448 | string 449 | 450 | @param on defaults to [`Both]. 451 | @param whitespace defaults to calling 452 | {{!val:Api.BASIC_CHARACTER.is_whitespace}is_whitespace} of the 453 | implemented character. 454 | *) 455 | 456 | module Make_output (Model : OUTPUT_MODEL) : sig 457 | 458 | val output: ('a, 'b, 'c) Model.channel -> t -> (unit, 'e, 'f) Model.thread 459 | (** Output a string to a channel. *) 460 | 461 | end 462 | (** [Make_output(Asynchronous_output_model)] provides a function 463 | {{!val:Api.BASIC_STRING.Make_output.output}output} 464 | given an {{!modtype:Api.OUTPUT_MODEL}OUTPUT_MODEL}. *) 465 | 466 | end (* BASIC_STRING *) 467 | 468 | (** This interface defines functions that may be implemented by 469 | particular string types that are actually mutable. 470 | 471 | They are considered “unsafe” because they break the immutability 472 | invariants assumed by the rest of this library; you'd better 473 | know what you're doing. *) 474 | module type UNSAFELY_MUTABLE = sig 475 | 476 | type t 477 | type character 478 | 479 | val mutate: t -> index:int -> character -> (unit, [> `out_of_bounds ]) result 480 | (** Set the [index]-th character of the string. *) 481 | 482 | val mutate_exn: t -> index:int -> character -> unit 483 | (** Set the [index]-th character of the string, but fail with a 484 | non-specified exception. *) 485 | 486 | val blit: 487 | src:t -> src_index:int -> dst:t -> dst_index:int -> length:int -> 488 | (unit, [> `out_of_bounds ]) result 489 | (** Copy [length] characters from [src] (starting at [src_index]) to 490 | [dst] (starting at [dst_index]). *) 491 | 492 | val blit_exn: 493 | src:t -> src_index:int -> dst:t -> dst_index:int -> length:int -> unit 494 | (** Like {!blit} but fail with a non-specified exception. *) 495 | 496 | end (* UNSAFELY_MUTABLE *) 497 | 498 | (** Native {i OCaml} character. *) 499 | module type NATIVE_CHARACTER = sig 500 | type t = char 501 | include BASIC_CHARACTER with type t := char 502 | end 503 | 504 | (** Native {i OCaml} string. *) 505 | module type NATIVE_STRING = sig 506 | 507 | type character = char 508 | type t = string 509 | 510 | include BASIC_STRING 511 | with type t := string 512 | with type character := char 513 | 514 | end (* NATIVE_STRING *) 515 | 516 | (** Native {i OCaml} byte. *) 517 | module type NATIVE_BYTES = sig 518 | 519 | type character = char 520 | type t = bytes 521 | 522 | include BASIC_STRING 523 | with type t := bytes 524 | with type character := char 525 | 526 | include UNSAFELY_MUTABLE 527 | with type t := bytes 528 | with type character := char 529 | 530 | end (* NATIVE_BYTES *) 531 | 532 | (** Minimal mutable string used as argument to the {!module:Of_mutable} functor. *) 533 | module type MINIMALISTIC_MUTABLE_STRING = sig 534 | 535 | type character 536 | (** A string is composed of character.*) 537 | 538 | type t 539 | (** The type of the string. *) 540 | 541 | val empty: t 542 | (** A string of zero length *) 543 | 544 | val max_string_length : int option 545 | (** If the representation of strings is bounded, 546 | the maximum length of a string. *) 547 | 548 | val make: int -> character -> t 549 | (** [make size char] builds a new string of the passed [length] where the 550 | character at every position is [char], like [String.make]. *) 551 | 552 | val length: t -> int 553 | (** Get the length of the string (i.e. the number of characters). *) 554 | 555 | val compare: t -> t -> int 556 | (** Comparison function for strings. *) 557 | 558 | val compare_char: character -> character -> int 559 | (** Comparison function for characters. *) 560 | 561 | val get: t -> int -> character 562 | (** Get the n-th char. *) 563 | 564 | val set: t -> int -> character -> unit 565 | (** Set the n-th char. *) 566 | 567 | val blit: src:t -> src_pos:int -> dst:t -> dst_pos:int -> len:int -> unit 568 | (** [blit src src_pos dst dst_pos len] copies [len] characters starting at 569 | [src_pos] of [src] into [dst] starting from [dst_post]. *) 570 | 571 | val is_whitespace: character -> bool 572 | (** Tell whether a character is considered whitespace. *) 573 | 574 | (** {{!modtype:MINIMALISTIC_MUTABLE_STRING}MINIMALISTIC_MUTABLE_STRING} requires 575 | {{!val:Api.NATIVE_CONVERSIONS.of_native_string} of_native_string}, 576 | {{!val:Api.NATIVE_CONVERSIONS.of_native_substring} of_native_substring}, 577 | and {{!val:Api.NATIVE_CONVERSIONS.to_native_string} to_native_string}. *) 578 | include NATIVE_CONVERSIONS with type t := t 579 | end 580 | -------------------------------------------------------------------------------- /src/lib/functors.ml: -------------------------------------------------------------------------------- 1 | (** Components for implementing common logic throughout Sosa's implementation.*) 2 | 3 | open Sosa_pervasives 4 | open Printf 5 | 6 | (* These modules types are inputs to these not-exposed functors! *) 7 | module type T_LENGTH_AND_COMPSUB = sig 8 | type t 9 | val length: t -> int 10 | val compare_substring: t * int * int -> t * int * int -> int 11 | end (* T_LENGTH_AND_COMPSUB *) 12 | 13 | (* This module type is a subset of `BASIC_STRING` for strings with a `length` 14 | function, a `sub_exn` function, and the `index_of_*` functions *) 15 | module type T_LENGTH_SUB_AND_SEARCH = sig 16 | type t 17 | type character 18 | val length: t -> int 19 | val sub_exn: t -> index:int -> length:int -> t 20 | val index_of_character: t -> ?from:int -> character -> int option 21 | val index_of_string: ?from:int -> 22 | ?sub_index:int -> ?sub_length:int -> t -> sub:t -> int option 23 | end (* T_LENGTH_SUB_AND_SEARCH *) 24 | 25 | module type T_LENGTH_SUB_AND_SEARCH_REV = sig 26 | type t 27 | type character 28 | val empty: t 29 | val length: t -> int 30 | val sub_exn: t -> index:int -> length:int -> t 31 | val index_of_character_reverse: t -> ?from:int -> character -> int option 32 | val index_of_string_reverse: ?from:int -> 33 | ?sub_index:int -> ?sub_length:int -> t -> sub:t -> int option 34 | 35 | end (* T_LENGTH_SUB_AND_SEARCH_REV *) 36 | 37 | (* This functor builds a `compare_substring_strict` function out of a 38 | `compare_substring` function. 39 | 40 | It may not be the optimal algorithm (it may call `length` on both 41 | strings.) 42 | *) 43 | module Compare_substring_strict_of_loose (S: T_LENGTH_AND_COMPSUB) = struct 44 | open S 45 | let compare_substring_strict (a, idxa, lena) (b, idxb, lenb) = 46 | let check_a = lazy (idxa >= 0 && lena >= 0 && idxa + lena <= (length a)) in 47 | let check_b = lazy (idxb >= 0 && lenb >= 0 && idxb + lenb <= (length b)) in 48 | if lena = 0 && lenb = 0 then Some 0 49 | else 50 | (if lena = 0 then (if Lazy.force check_b then Some (-1) else None) 51 | else 52 | (if lenb = 0 then (if Lazy.force check_a then Some (1) else None) 53 | else 54 | (if not (Lazy.force check_a) || not (Lazy.force check_b) then None 55 | else 56 | Some (compare_substring (a, idxa, lena) (b, idxb, lenb))))) 57 | end 58 | 59 | module Make_index_of_string (S: T_LENGTH_AND_COMPSUB) = struct 60 | open S 61 | let index_of_string ?(from=0) ?(sub_index=0) ?sub_length t ~sub = 62 | let module With_exn = struct 63 | exception Found of int 64 | 65 | let f () = 66 | (* Readjust the arguments: *) 67 | let length_of_t = length t in 68 | let from = 69 | if from <= 0 then 0 else min length_of_t from in 70 | let total_length_of_sub = length sub in 71 | let sub_index = 72 | if sub_index <= 0 then 0 else sub_index in 73 | let sub_length = 74 | let default = max 0 (total_length_of_sub - sub_index) in 75 | match sub_length with 76 | | None -> default 77 | | Some s when s >= default -> default 78 | | Some s when s < 0 -> 0 79 | | Some s -> s 80 | in 81 | (* dbg "from: %d, length: %d sub_index: %d sub_length: %d" *) 82 | (* from length_of_t sub_index sub_length; *) 83 | if from >= length_of_t then None 84 | else if length_of_t = 0 then None 85 | else if sub_length <= 0 then Some from 86 | else 87 | begin try 88 | for i = 0 to length_of_t - from do 89 | if compare_substring 90 | (t, i + from, sub_length) 91 | (sub, sub_index, sub_length) = 0 92 | then raise (Found (i + from)) 93 | done; 94 | None 95 | with Found f -> Some f 96 | end 97 | end in 98 | With_exn.f () 99 | 100 | let index_of_string_reverse ?from ?(sub_index=0) ?sub_length t ~sub = 101 | let module With_exn = struct 102 | exception Found of int 103 | 104 | let f () = 105 | let length_of_t = length t in 106 | let last = length_of_t - 1 in 107 | let from = 108 | match from with 109 | | None -> last 110 | | Some f when f >= last -> last 111 | | Some f -> f in 112 | let total_length_of_sub = length sub in 113 | let sub_index = 114 | if sub_index <= 0 then 0 else sub_index in 115 | let sub_length = 116 | let default = max 0 (total_length_of_sub - sub_index) in 117 | match sub_length with 118 | | None -> default 119 | | Some s when s >= default -> default 120 | | Some s when s < 0 -> 0 121 | | Some s -> s 122 | in 123 | (* dbg "from: %d, length: %d sub_index: %d sub_length: %d" *) 124 | (* from length_of_t sub_index sub_length; *) 125 | if from < 0 then None 126 | else if length_of_t = 0 then None 127 | else if sub_length <= 0 then Some from 128 | else 129 | begin try 130 | for i = from downto 0 do 131 | if compare_substring 132 | (t, i, sub_length) 133 | (sub, sub_index, sub_length) = 0 134 | then raise (Found (i)) 135 | done; 136 | None 137 | with Found f -> Some f 138 | end 139 | end in 140 | With_exn.f () 141 | 142 | end 143 | 144 | (* This functor implements the `BASIC_STRING.split` function out of a 145 | `T_LENGTH_SUB_AND_SEARCH` *) 146 | module Make_split_function (S: T_LENGTH_SUB_AND_SEARCH) = struct 147 | 148 | let split t ~on = 149 | let length_of_t = S.length t in 150 | begin match on with 151 | | `Character c -> 152 | let rec loop acc from = 153 | match S.index_of_character t ~from c with 154 | | Some index -> 155 | loop (S.sub_exn t ~index:from ~length:(index - from) :: acc) 156 | (index + 1) 157 | | None -> 158 | (S.sub_exn t ~index:from ~length:(length_of_t - from) :: acc) 159 | in 160 | List.rev (loop [] 0) 161 | | `String s -> 162 | let length_of_s = S.length s in 163 | let rec loop acc from = 164 | match S.index_of_string t ~from ~sub:s with 165 | | Some index -> 166 | loop (S.sub_exn t ~index:from ~length:(index - from) :: acc) 167 | (index + length_of_s) 168 | | None -> 169 | (S.sub_exn t ~index:from ~length:(length_of_t - from) :: acc) 170 | in 171 | if length_of_s > 0 172 | then List.rev (loop [] 0) 173 | else if length_of_t = 0 174 | then [ t ] 175 | else begin 176 | let res = ref [] in 177 | for index = length_of_t - 1 downto 0 do 178 | res := S.sub_exn t ~index ~length:1 :: !res 179 | done; 180 | !res 181 | end 182 | end 183 | 184 | end 185 | 186 | (* This functor implements the `BASIC_STRING.split` function out of a 187 | `T_LENGTH_SUB_AND_SEARCH_REV` by looping through the string backwards, 188 | on some representations it is the more effective option. *) 189 | module Make_split_rev_function (S: T_LENGTH_SUB_AND_SEARCH_REV) = struct 190 | 191 | let split t ~on = 192 | let length_of_t = S.length t in 193 | begin match on with 194 | | `Character c -> 195 | let rec loop acc from = 196 | match S.index_of_character_reverse t ~from c with 197 | | Some index when index = length_of_t - 1 -> 198 | loop (S.empty :: acc) (index - 1) 199 | | Some index -> 200 | loop (S.sub_exn t ~index:(index + 1) ~length:(from - index) :: acc) 201 | (index - 1) 202 | | None -> 203 | S.sub_exn t ~index:0 ~length:(from + 1) :: acc 204 | in 205 | loop [] (length_of_t - 1) 206 | | `String s -> 207 | let length_of_s = S.length s in 208 | let rec loop acc from = 209 | match S.index_of_string_reverse t ~from ~sub:s with 210 | | Some index when index = length_of_t - length_of_s -> 211 | loop (S.empty :: acc) (index - 1) 212 | | Some index -> 213 | let offset = index + length_of_s in 214 | let length = from - offset + 1 in 215 | loop (S.sub_exn t ~index:offset ~length :: acc) (index - 1) 216 | | None -> 217 | S.sub_exn t ~index:0 ~length:(from + 1) :: acc 218 | in 219 | if length_of_s > 0 220 | then loop [] (length_of_t - 1) 221 | else if length_of_t = 0 222 | then [ t ] 223 | else begin 224 | let res = ref [] in 225 | for index = length_of_t - 1 downto 0 do 226 | res := S.sub_exn t ~index ~length:1 :: !res 227 | done; 228 | !res 229 | end 230 | end 231 | 232 | end (* Make_split_function *) 233 | 234 | module Make_strip_function (S: 235 | sig 236 | type t 237 | type character 238 | val empty : t 239 | val is_whitespace: character -> bool 240 | val length: t -> int 241 | val find: 242 | ?from:int -> ?length:int -> t -> f:(character -> bool) -> int option 243 | val find_reverse: 244 | ?from:int -> ?length:int -> t -> f:(character -> bool) -> int option 245 | val sub_exn: t -> index:int -> length:int -> t 246 | end) = struct 247 | 248 | let strip ?(on=`Both) ?(whitespace=S.is_whitespace) t = 249 | let open S in 250 | let first_non () = 251 | match find t ~f:(fun c -> not (whitespace c)) with 252 | | None -> raise Not_found | Some s -> s in 253 | let last_non () = 254 | match find_reverse t ~f:(fun c -> not (whitespace c)) with 255 | | None -> raise Not_found | Some s -> s in 256 | try 257 | match on with 258 | | `Both -> 259 | let index = first_non () in 260 | let last = last_non () in 261 | sub_exn t ~index ~length:(last - index + 1) 262 | | `Left -> 263 | let index = first_non () in 264 | sub_exn t ~index ~length:(length t - index) 265 | | `Right -> 266 | let last = last_non () in 267 | sub_exn t ~index:0 ~length:(last + 1) 268 | with 269 | | Not_found -> empty 270 | end (* Make_strip_function *) 271 | 272 | module Make_prefix_suffix_array (A: 273 | sig 274 | type t 275 | type character 276 | val get : t -> int -> character 277 | val length: t -> int 278 | val sub_exn: t -> index:int -> length:int -> t 279 | end) = struct 280 | 281 | let rec sub_same_tl t ~comp ~len ~off = 282 | let rec loop i = 283 | i = len || (A.get t (off + i) = A.get comp i) && loop (i + 1) 284 | in 285 | (A.length t >= len) && loop 0 286 | 287 | let is_prefix t ~prefix = 288 | let len = A.length prefix in 289 | sub_same_tl t ~comp:prefix ~len ~off:0 290 | 291 | let is_suffix t ~suffix = 292 | let len = A.length suffix and lt = A.length t in 293 | sub_same_tl t ~comp:suffix ~len ~off:(lt - len) 294 | 295 | let chop_prefix_exn t ~prefix = 296 | let len = A.length prefix and lt = A.length t in 297 | if sub_same_tl t ~comp:prefix ~len ~off:0 298 | then A.sub_exn t ~index:len ~length:(lt - len) 299 | else raise (Invalid_argument "not a prefix") 300 | 301 | let chop_prefix t ~prefix = 302 | try Some (chop_prefix_exn t prefix) 303 | with _ -> None 304 | 305 | let chop_suffix_exn t ~suffix = 306 | let len = A.length suffix and lt = A.length t in 307 | if sub_same_tl t ~comp:suffix ~len ~off:(lt - len) 308 | then A.sub_exn t ~index:0 ~length:(lt - len) 309 | else raise (Invalid_argument "not a suffix") 310 | 311 | let chop_suffix t ~suffix = 312 | try Some (chop_suffix_exn t suffix) 313 | with _ -> None 314 | 315 | end (* Make_prefix_suffix_array *) 316 | 317 | module Make_split_at_index_functions (A: 318 | sig 319 | type t 320 | type character 321 | val empty : t 322 | val length : t -> int 323 | val sub_exn : t -> index:int -> length:int -> t 324 | end) = struct 325 | 326 | let split_at t ~index = 327 | let l = A.length t in 328 | if index < 0 then (A.empty, t) 329 | else if index >= l then (t, A.empty) 330 | else (A.sub_exn t ~index:0 ~length:index), 331 | (A.sub_exn t ~index:index ~length:(l - index)) 332 | 333 | let take t ~index = 334 | let l = A.length t in 335 | if index < 0 then A.empty 336 | else if index >= l then t 337 | else A.sub_exn t ~index:0 ~length:index 338 | 339 | let drop t ~index = 340 | let l = A.length t in 341 | if index < 0 then t 342 | else if index >= l then A.empty 343 | else (A.sub_exn t ~index:index ~length:(l - index)) 344 | 345 | 346 | end (* Make_split_at_index_functions *) 347 | 348 | module Make_native (B : 349 | sig 350 | type t 351 | val empty : t 352 | val length : t -> int 353 | val get : t -> int -> char 354 | val make : int -> char -> t 355 | val init : int -> f:(int -> char) -> t 356 | val compare : t -> t -> int 357 | val concat : sep:t -> t list -> t 358 | val iter : f:(char -> unit) -> t -> unit 359 | val iteri : f:(int -> char -> unit) -> t-> unit 360 | val map : f:(char -> char) -> t -> t 361 | val mapi : f:(int -> char -> char) -> t -> t 362 | val index_from : t -> int -> char -> int 363 | val rindex_from : t -> int -> char -> int 364 | val sub : t -> pos:int -> len:int -> t 365 | val of_buffer : Buffer.t -> t 366 | val string_for_output : t -> string 367 | end) = struct 368 | 369 | type character = char 370 | type t = B.t 371 | 372 | let max_string_length = Some Sys.max_string_length 373 | let empty = B.empty 374 | let compare = B.compare 375 | let is_empty t = (compare B.empty t = 0) 376 | 377 | let make = B.make 378 | let length = B.length 379 | 380 | let of_character = B.make 1 381 | 382 | let of_character_list cl = 383 | let r = ref cl in 384 | B.init (List.length cl) ~f:(fun _ -> 385 | let c = List.hd !r in 386 | r := List.tl !r; 387 | c) 388 | 389 | let to_character_list s = 390 | let res = ref [] in 391 | for i = length s - 1 downto 0 do 392 | res := (B.get s i) :: !res 393 | done; 394 | !res 395 | 396 | let get s ~index = 397 | try Some (B.get s index) 398 | with _ -> None 399 | 400 | (* Since our set always returns a copy! *) 401 | let set s ~index ~v = 402 | if index > length s - 1 403 | then None 404 | else Some (B.mapi (fun i c -> if i = index then v else c) s) 405 | 406 | let get_exn s ~index = B.get s index 407 | 408 | let set_exn s ~index ~v = 409 | match set s ~index ~v with None -> invalid_arg "set_exn" | Some s -> s 410 | 411 | let compare = B.compare 412 | 413 | let compare_substring (a, idxa, lena) (b, idxb, lenb) = 414 | let module With_exns = struct 415 | exception Return of int 416 | exception Left_out of int 417 | exception Right_out of int 418 | let f () = 419 | try 420 | let shortest = min lena lenb in 421 | for i = 0 to shortest - 1 do 422 | let ca = try B.get a (idxa + i) with _ -> raise (Left_out i) in 423 | let cb = try B.get b (idxb + i) with _ -> raise (Right_out i) in 424 | let c = Char.compare ca cb in 425 | if c <> 0 426 | then raise (Return c) 427 | else () 428 | done; 429 | (Pervasives.compare (lena : int) lenb) 430 | with 431 | | Return c -> c 432 | | Left_out c -> (* a went out of bounds at 'c + idxa' *) -1 433 | | Right_out _ -> (* b went out of bounds at 'c + idxb' *) 434 | (* so, a is “longer” *) 1 435 | end in 436 | With_exns.f () 437 | 438 | type s = t 439 | module T_length_and_compsub = struct 440 | type t = s 441 | let length = length 442 | let compare_substring = compare_substring 443 | end 444 | 445 | include Compare_substring_strict_of_loose(T_length_and_compsub) 446 | include Make_index_of_string(T_length_and_compsub) 447 | 448 | let concat ?(sep=B.empty) sl = B.concat ~sep sl 449 | 450 | let fold t ~init ~f = 451 | let res = ref init in 452 | for i = 0 to length t - 1 do 453 | res := f !res (B.get t i); 454 | done; 455 | !res 456 | 457 | let foldi t ~init ~f = 458 | let res = ref init in 459 | for i = 0 to length t - 1 do 460 | res := f i !res (B. get t i); 461 | done; 462 | !res 463 | 464 | let fold2_exn t1 t2 ~init ~f = 465 | let lgth1 = (length t1) in 466 | let lgth2 = (length t2) in 467 | match lgth1, lgth2 with 468 | | 0, 0 -> init 469 | | _, _ when lgth1 <> lgth2 -> invalid_arg "fold2_exn" 470 | | lgth1, lgth2 -> 471 | let res = ref init in 472 | for i = 0 to lgth1 - 1 do 473 | res := f !res (B.get t1 i) (B.get t2 i); 474 | done; 475 | !res 476 | 477 | let sub_exn t ~index ~length = 478 | if length = 0 then empty else B.sub t ~pos:index ~len:length 479 | 480 | let sub t ~index ~length = 481 | if length = 0 then Some empty else 482 | try Some (B.sub t ~pos:index ~len:length) 483 | with e -> None 484 | 485 | let slice_exn ?(start=0) ?finish t = 486 | let length_of_t = length t in 487 | let bound_check strict m x = 488 | let out_of_ub = if strict then x > length_of_t else x >= length_of_t in 489 | if x < 0 || (not (is_empty t) && out_of_ub) then 490 | Printf.ksprintf invalid_arg "slice_exn: invalid %s %d" m x 491 | else x 492 | in 493 | let _ = bound_check false "start" start 494 | and finish = 495 | match finish with 496 | | None -> length_of_t 497 | | Some f -> bound_check true "finish" f 498 | in 499 | sub_exn t ~index:start ~length:(finish - start) 500 | 501 | let slice ?start ?finish t = 502 | try Some (slice_exn ?start ?finish t) 503 | with _ -> None 504 | 505 | let iter t ~f = B.iter t ~f 506 | let iteri t ~f = B.iteri t ~f 507 | let iter_reverse t ~f = 508 | for i = length t -1 downto 0 do 509 | f (get_exn t i) 510 | done 511 | 512 | let rev t = 513 | let lgth = length t in 514 | match lgth with 515 | | 0 -> empty 516 | | lgth -> 517 | let o = lgth - 1 in 518 | B.mapi ~f:(fun i _ -> B.get t (o - i)) t 519 | 520 | let map t ~f = B.map t ~f 521 | 522 | let map2_exn t1 t2 ~f = 523 | let lgth1 = (length t1) in 524 | let lgth2 = (length t2) in 525 | match lgth1, lgth2 with 526 | | 0, 0 -> empty 527 | | _, _ when lgth1 <> lgth2 -> invalid_arg "map2_exn" 528 | | lgth1, lgth2 -> 529 | B.mapi ~f:(fun i c -> f c (B.get t2 i)) t1 530 | 531 | let mapi t ~f = B.mapi t ~f 532 | 533 | let for_all t ~f = 534 | try (iter t (fun x -> if not (f x) then raise Not_found else ()); true) 535 | with Not_found -> false 536 | 537 | let exists t ~f = 538 | try (iter t (fun x -> if f x then raise Not_found else ()); false) 539 | with Not_found -> true 540 | 541 | let index_of_character t ?(from=0) c = 542 | let from = if from <= 0 then 0 else min (length t) from in 543 | try Some (B.index_from t from c) 544 | with _ -> None 545 | 546 | let index_of_character_reverse t ?from c = 547 | let from = 548 | let length_of_t = length t in 549 | match from with 550 | | None -> length_of_t - 1 551 | | Some s when s < 0 -> -1 552 | | Some s when s > length_of_t - 1 -> length_of_t - 1 553 | | Some s -> s 554 | in 555 | try Some (B.rindex_from t from c) 556 | with _ -> None 557 | 558 | let resize_from_length ~from ?length ~length_of_s = 559 | let from = if from <= 0 then 0 else min length_of_s from in 560 | let length = 561 | match length with 562 | | None -> length_of_s - from 563 | | Some lg when lg <= 0 -> 0 564 | | Some lg -> min (length_of_s - from) lg 565 | in 566 | (from, length) 567 | 568 | let find ?(from=0) ?length s ~f = 569 | let length_of_s = B.length s in 570 | let from, length = resize_from_length ~from ?length ~length_of_s in 571 | let found = ref None in 572 | let i = ref 0 in 573 | while !found = None && !i < length do 574 | if f (get_exn s (!i + from)) 575 | then found := Some (!i + from) 576 | else incr i 577 | done; 578 | !found 579 | 580 | let find_reverse ?from ?length s ~f = 581 | let length_of_s = B.length s in 582 | if length_of_s = 0 then None 583 | else begin 584 | let from = 585 | match from with 586 | | None -> length_of_s - 1 587 | | Some s when s < 0 -> -1 588 | | Some s when s >= length_of_s - 1 -> length_of_s - 1 589 | | Some s -> s 590 | in 591 | let length = 592 | match length with 593 | | None -> from + 1 594 | | Some l when l <= 0 -> 0 595 | | Some l when l >= from + 1 -> from + 1 596 | | Some l -> l 597 | in 598 | let found = ref None in 599 | let i = ref from in 600 | while !found = None && !i >= from - length + 1 do 601 | (* dbg "i: %d from: %d length: %d" !i from length; *) 602 | if f (get_exn s !i) 603 | then found := Some (!i) 604 | else decr i 605 | done; 606 | !found 607 | end 608 | 609 | let filter_map ?(from=0) ?length s ~f = 610 | let length_of_s = B.length s in 611 | let from, length = resize_from_length ~from ?length ~length_of_s in 612 | if length = 0 then empty 613 | else begin 614 | let b = Buffer.create length in 615 | for i = 0 to length - 1 do 616 | match f (get_exn s (i + from)) with 617 | | Some c -> Buffer.add_char b c 618 | | None -> () 619 | done; 620 | B.of_buffer b 621 | end 622 | 623 | let filter ?from ?length s ~f = 624 | filter_map ?from ?length s ~f:(fun c -> if f c then Some c else None) 625 | 626 | include Make_strip_function (struct 627 | type t = s 628 | type character = char 629 | let empty = empty 630 | let length = length 631 | let sub_exn = sub_exn 632 | let find = find 633 | let find_reverse = find_reverse 634 | let is_whitespace = Native_character.is_whitespace 635 | end) 636 | 637 | include Make_split_rev_function(struct 638 | type t = s 639 | type character = char 640 | let empty = empty 641 | let length = length 642 | let sub_exn = sub_exn 643 | let index_of_string_reverse = index_of_string_reverse 644 | let index_of_character_reverse = index_of_character_reverse 645 | end) 646 | 647 | include Make_prefix_suffix_array (struct 648 | type t = s 649 | type character = char 650 | let length = length 651 | let get = B.get 652 | let sub_exn = sub_exn 653 | end) 654 | 655 | include Make_split_at_index_functions(struct 656 | type t = s 657 | type character = char 658 | let empty = empty 659 | let length = length 660 | let sub_exn = sub_exn 661 | end) 662 | 663 | module Make_output (Model: Api.OUTPUT_MODEL) = struct 664 | 665 | let output chan t = Model.output chan (B.string_for_output t) 666 | 667 | end 668 | 669 | let take_while_with_index t ~f = 670 | let buf = Buffer.create (length t) in 671 | let rec loop idx = 672 | match get t idx with 673 | | Some c when f idx c -> Buffer.add_char buf c; loop (idx + 1) 674 | | _ -> () 675 | in 676 | loop 0; 677 | B.of_buffer buf 678 | 679 | let take_while t ~f = take_while_with_index t ~f:(fun _ c -> f c) 680 | 681 | end (* Make_native *) 682 | -------------------------------------------------------------------------------- /src/lib/int_utf8_character.ml: -------------------------------------------------------------------------------- 1 | 2 | open Sosa_pervasives 3 | open Printf 4 | 5 | type t = int 6 | 7 | let of_native_char x = Some (int_of_char x) 8 | 9 | let compare (i: int) (j : int) = compare i j 10 | let of_int x = 11 | if x land 0x7FFF_FFFF = x then Some x else None 12 | let to_int c = c 13 | let size x = 14 | if x <= 0x7f then 1 else 15 | if x <= 0x7ff then 2 else 16 | if x <= 0xffff then 3 else 17 | if x <= 0x1f_ffff then 4 else 18 | if x <= 0x3ff_ffff then 5 else 19 | if x <= 0x7fff_ffff then 6 else 0 20 | 21 | let is_print t = int_of_char ' ' <= t && t <= int_of_char '~' 22 | 23 | let to_string_hum x = 24 | if is_print x then String.make 1 (char_of_int x) 25 | else sprintf "&#x%X;" x 26 | 27 | let write_to_native_bytes c ~buf ~index = 28 | let sz = size c in 29 | try 30 | let first_byte = 31 | match sz with 32 | | 1 -> ((c lsr 0) land 0b0111_1111) lor 0b0000_0000 33 | | 2 -> ((c lsr 6) land 0b0001_1111) lor 0b1100_0000 34 | | 3 -> ((c lsr 12) land 0b0000_1111) lor 0b1110_0000 35 | | 4 -> ((c lsr 18) land 0b0000_0111) lor 0b1111_0000 36 | | 5 -> ((c lsr 24) land 0b0000_0011) lor 0b1111_1000 37 | | 6 -> ((c lsr 30) land 0b0000_0001) lor 0b1111_1100 38 | | _ -> assert false in 39 | Bytes.set buf index (char_of_int first_byte); 40 | for i = 2 to sz do 41 | let ith_byte = 42 | ((c lsr (6 * (i - 2))) land 0b0011_1111) lor 0b1000_0000 in 43 | Bytes.set buf (index + sz - i + 1) (char_of_int ith_byte); 44 | done; 45 | return sz 46 | with _ -> fail `out_of_bounds 47 | 48 | let read_from_native_string ~buf ~index = 49 | try 50 | let first_char = buf.[index] |> int_of_char in 51 | let size, mask = 52 | if first_char lsr 7 = 0 then (1, 0b0111_1111) 53 | else if first_char lsr 5 = 0b110 then (2, 0b0001_1111) 54 | else if first_char lsr 4 = 0b1110 then (3, 0b0000_1111) 55 | else if first_char lsr 3 = 0b11110 then (4, 0b0000_0111) 56 | else if first_char lsr 2 = 0b111110 then (5, 0b0000_0011) 57 | else if first_char lsr 1 = 0b1111110 then (6, 0b0000_0001) 58 | else raise Not_found 59 | in 60 | let the_int = ref (first_char land mask) in 61 | for i = 1 to size - 1 do 62 | let the_char = buf.[index + i] |> int_of_char in 63 | if (the_char lsr 6) = 0b10 64 | then ( 65 | the_int := (!the_int lsl 6) lor (the_char land 0b0011_1111); 66 | ) else raise Not_found; 67 | done; 68 | Some (!the_int, size) 69 | with _ -> None 70 | 71 | let to_native_string x = 72 | let buf = Bytes.make (size x) 'B' in 73 | begin match write_to_native_bytes x ~buf ~index:0 with 74 | | `Ok _ -> () 75 | | `Error e -> 76 | dbg "buf: %S siz: %d x: %d" (Bytes.to_string buf) (size x) x; 77 | assert false 78 | end; 79 | Bytes.to_string buf 80 | 81 | let is_whitespace c = 82 | try 83 | match char_of_int c with 84 | | ' ' | '\t' | '\r' | '\n' -> true | _ -> false 85 | with _ -> false 86 | 87 | -------------------------------------------------------------------------------- /src/lib/int_utf8_character.mli: -------------------------------------------------------------------------------- 1 | (** {!modtype:Api.BASIC_CHARACTER} with OCaml integers ([int]) representing 2 | Utf8 characters *) 3 | 4 | include (Api.BASIC_CHARACTER with type t = int) 5 | -------------------------------------------------------------------------------- /src/lib/list_of.ml: -------------------------------------------------------------------------------- 1 | 2 | open Sosa_pervasives 3 | open Printf 4 | open Sosa_utilities 5 | 6 | module F = Functors 7 | 8 | module Make (Char: Api.BASIC_CHARACTER) : 9 | Api.BASIC_STRING 10 | with type character = Char.t 11 | with type t = Char.t list = struct 12 | 13 | type character = Char.t 14 | 15 | type t = character list 16 | 17 | let empty = [] 18 | let is_empty = (=) [] 19 | 20 | let max_string_length = None 21 | 22 | let make length c = 23 | let rec loop n acc = 24 | if n >= length then acc else loop (n + 1) (c :: acc) 25 | in 26 | loop 0 [] 27 | 28 | let of_character c = [c] 29 | let of_character_list cl = cl 30 | let to_character_list cl = cl 31 | 32 | let get sl ~index = 33 | try Some (List.nth sl index) with _ -> None 34 | 35 | let set s ~index ~v = 36 | let rec loop n acc = function 37 | | [] -> None 38 | | q :: t when n = index -> 39 | Some (List.rev_append acc (v :: t)) 40 | | q :: t -> 41 | loop (n + 1) (q :: acc) t 42 | in 43 | loop 0 [] s 44 | 45 | let get_exn s ~index = 46 | match get s ~index with None -> invalid_arg "get_exn" | Some s -> s 47 | let set_exn s ~index ~v = 48 | match set s ~index ~v with None -> invalid_arg "set_exn" | Some s -> s 49 | 50 | let iter t ~f = List.iter t ~f 51 | let iteri t ~f = List.iteri t ~f 52 | let iter_reverse t ~f = 53 | List.iter (List.rev t) ~f 54 | 55 | let rev t = List.rev t 56 | 57 | let fold t ~init ~f = List.fold_left t ~init ~f 58 | let foldi t ~init ~f = 59 | snd (List.fold_left t ~init:(0,init) 60 | ~f:(fun (i,a) c -> (i+1,f i a c))) 61 | let fold2_exn t1 t2 ~init ~f = List.fold_left2 t1 t2 ~init ~f 62 | let map = Core_list_map.map 63 | let mapi = Core_list_map.mapi 64 | let map2_exn = Core_list_map.map2_exn 65 | let for_all t ~f = List.for_all t ~f 66 | let exists t ~f = List.exists t ~f 67 | 68 | let compare (a : Char.t list) (b: Char.t list) = compare a b 69 | let of_native_substring s ~offset ~length = 70 | Conversions.of_native_substring 71 | ~empty ~init:(fun () -> ref []) 72 | ~on_new_character:(fun x c -> x := c :: !x) 73 | ~finalize:(fun x -> List.rev !x) 74 | ~read_character_from_native_string:Char.read_from_native_string 75 | s ~offset ~length 76 | 77 | let of_native_string s = 78 | Conversions.of_native_string 79 | of_native_substring s 80 | 81 | let to_native_string l = 82 | Conversions.to_native_string_knowing_size 83 | ~future_size:(fun l -> 84 | List.fold_left l ~init:0 ~f:(fun sum c -> sum + Char.size c)) 85 | ~iter ~write_char_to_native_bytes:Char.write_to_native_bytes 86 | l 87 | |> Bytes.to_string 88 | 89 | let to_string_hum l = sprintf "%S" (to_native_string l) 90 | 91 | let concat ?(sep=[]) ll = 92 | match ll with 93 | | [] -> [] 94 | | hh :: tt -> 95 | let x = ref (List.rev hh) in 96 | List.iter tt ~f:(fun l -> 97 | x := List.rev_append sep !x; 98 | x := List.rev_append l !x; 99 | ); 100 | List.rev !x 101 | 102 | let length = List.length 103 | 104 | let sub t ~index ~length = 105 | let r = ref [] in 106 | let c = ref 0 in 107 | try 108 | List.iteri t ~f:(fun i a -> 109 | if i >= index + length then raise Not_found; 110 | if index <= i then ( 111 | r:= a :: !r; 112 | incr c; 113 | ); 114 | ); 115 | if !c = length then Some (List.rev !r) else None 116 | with 117 | | Not_found -> Some (List.rev !r) 118 | 119 | let sub_exn t ~index ~length = 120 | match sub t ~index ~length with 121 | | Some s -> s 122 | | None -> ksprintf invalid_arg "sub_exn(%d,%d)" index length 123 | 124 | let slice_exn ?(start=0) ?finish t = 125 | let length_of_t = List.length t in 126 | if start < 0 || (not (is_empty t) && start >= length_of_t) then 127 | ksprintf invalid_arg "slice_exn: invalid start %d" start 128 | else 129 | match finish with 130 | | None -> sub_exn t ~index:start ~length:(length_of_t - start) 131 | | Some f -> if f < 0 || f > length_of_t then 132 | ksprintf invalid_arg "slice_exn: invalid finish %d" f 133 | else 134 | sub_exn t ~index:start ~length:(f - start) 135 | 136 | let slice ?start ?finish t = 137 | try Some (slice_exn ?start ?finish t) 138 | with _ -> None 139 | 140 | 141 | let rec comp_loop p lst_pair = 142 | if p 143 | then match lst_pair with 144 | | (i,[]) -> Some i 145 | | ([],j) -> None 146 | | (i::is), (j::js) -> comp_loop (i = j) (is,js) 147 | else None 148 | 149 | let is_prefix t ~prefix = 150 | match comp_loop true (t,prefix) with 151 | | Some _ -> true 152 | | None -> false 153 | 154 | let is_suffix t ~suffix = 155 | is_prefix (List.rev t) ~prefix:(List.rev suffix) 156 | 157 | let chop_prefix_exn t ~prefix = 158 | match comp_loop true (t,prefix) with 159 | | Some r -> r 160 | | None -> raise (Invalid_argument "chop_prefix_exn: not a prefix") 161 | 162 | let chop_prefix t ~prefix = 163 | try Some (chop_prefix_exn t prefix) 164 | with _ -> None 165 | 166 | let chop_suffix_exn t ~suffix = 167 | List.rev (chop_prefix_exn (List.rev t) ~prefix:(List.rev suffix)) 168 | 169 | let chop_suffix t ~suffix = 170 | try Some (chop_suffix_exn t suffix) 171 | with _ -> None 172 | 173 | let unrevSplit t n = 174 | if n < 0 175 | then [],t 176 | else let rec offset i ((l,r) as p) = 177 | if i = n 178 | then p 179 | else match r with 180 | | [] -> p 181 | | h::t -> offset (i + 1) (h::l,t) 182 | in 183 | offset 0 ([],t) 184 | 185 | let split_at t ~index = 186 | let l,r = unrevSplit t index in 187 | List.rev l, r 188 | 189 | let take t ~index = fst (split_at t index) 190 | 191 | let drop t ~index = 192 | let l,r = unrevSplit t index in 193 | r 194 | 195 | let index_of_character t ?(from=0) c = 196 | let rec loop index = function 197 | | [] -> None 198 | | x :: tl -> 199 | if index >= from && x = c then 200 | Some index 201 | else 202 | loop (index + 1) tl 203 | in 204 | loop 0 t 205 | 206 | let index_of_character_reverse t ?from c = 207 | let length_of_t, rev = 208 | let rec loop lgth acc = function 209 | | [] -> (lgth, acc) 210 | | h :: t -> loop (lgth + 1) (h :: acc) t in 211 | loop 0 [] t 212 | in 213 | let from = 214 | match from with 215 | | None -> length_of_t - 1 216 | | Some s when s < 0 -> -1 217 | | Some s when s > length_of_t - 1 -> length_of_t - 1 218 | | Some s -> s 219 | in 220 | match index_of_character rev ~from:(length_of_t - from - 1) c with 221 | | Some c -> Some (length_of_t - c - 1) 222 | | None -> None 223 | 224 | let compare_substring (a, idxa, lena) (b, idxb, lenb) = 225 | let module With_exns = struct 226 | exception Left 227 | exception Right 228 | let rec drop_until ~exn idx l = 229 | match idx, l with 230 | | 0, l -> l 231 | | more, [] -> raise exn 232 | | more, h :: t -> drop_until ~exn (more - 1) t 233 | let f () = 234 | begin try 235 | let rec cmp l1 l2 len1 len2 = 236 | if len1 < 0 then raise Left; 237 | if len2 < 0 then raise Right; 238 | match l1, l2 with 239 | | _, _ when len1 = 0 && len2 = 0 -> 0 240 | | _, _ when len1 = 0 -> -1 241 | | _, _ when len2 = 0 -> 1 242 | | [], [] when len1 = 0 || len2 = 0 -> Pervasives.compare lena lenb 243 | | [], _ when len1 > 0 -> raise Left 244 | | _, [] when len2 > 0 -> raise Right 245 | | h1 :: t1, h2 :: t2 when Char.compare h1 h2 = 0 -> 246 | cmp t1 t2 (len1 - 1) (len2 - 1) 247 | | h1 :: _, h2 :: _ -> Char.compare h1 h2 248 | | _, _ -> assert false (* calming down the warnings.. *) 249 | in 250 | if lena = 0 && lenb = 0 then 0 251 | else ( 252 | let aa = drop_until ~exn:Left idxa a in 253 | let bb = drop_until ~exn:Right idxb b in 254 | (cmp aa bb lena lenb) 255 | ) 256 | with 257 | | Left -> -1 258 | | Right -> 1 259 | | Failure s -> 1 260 | (* dbg "(%d, %d/%d) Vs (%d, %d/%d) %s" idxa lena (length a) idxb lenb (length b) s; *) 261 | end 262 | end in 263 | With_exns.f () 264 | 265 | type s = t 266 | module T_length_and_compsub = struct 267 | type t = s let length = length let compare_substring = compare_substring 268 | end 269 | include F.Compare_substring_strict_of_loose(T_length_and_compsub) 270 | include F.Make_index_of_string(T_length_and_compsub) 271 | 272 | let find ?(from=0) ?length s ~f = 273 | (* index and virtual_length are maybe a bit redundant but I favor 274 | readability of the branches of the match *) 275 | let from = if from <= 0 then 0 else from in 276 | let rec find_from index virtual_length l = 277 | match l, length with 278 | | [], _ -> None 279 | | _, Some lgth when lgth <= virtual_length -> None 280 | | h :: t, _ when index < from -> find_from (index + 1) virtual_length t 281 | | h :: t, _ when index >= from && f h -> Some index 282 | | h :: t, _ -> find_from (index + 1) (virtual_length + 1) t 283 | in 284 | find_from 0 0 s 285 | 286 | let find_reverse ?from ?length s ~f = 287 | let length_of_s = List.length s in 288 | let from = 289 | match from with 290 | | None -> None 291 | | Some s when s < 0 -> Some length_of_s 292 | | Some s when s > length_of_s - 1 -> Some 0 293 | | Some s -> Some (length_of_s - 1 - s) 294 | in 295 | match find ?from ?length (List.rev s) ~f with 296 | | None -> None 297 | | Some i -> Some (length_of_s - 1 - i) 298 | 299 | let filter_map ?(from=0) ?length t ~f = 300 | let rec filter_map_rec acc index virtual_length l = 301 | match l, length with 302 | | [], _ -> List.rev acc 303 | | _, Some lgth when lgth <= virtual_length -> List.rev acc 304 | | h :: t, _ when index < from -> 305 | filter_map_rec acc (index + 1) virtual_length t 306 | | h :: t, _ (* when index >= from *) -> 307 | begin match f h with 308 | | Some o -> filter_map_rec (o :: acc) (index + 1) (virtual_length + 1) t 309 | | None -> filter_map_rec acc (index + 1) (virtual_length + 1) t 310 | end 311 | in 312 | filter_map_rec [] 0 0 t 313 | 314 | let filter ?from ?length t ~f = 315 | filter_map ?from ?length t ~f:(fun c -> if f c then Some c else None) 316 | 317 | include F.Make_strip_function (struct 318 | type t = Char.t list 319 | type character = Char.t 320 | let empty = empty 321 | let length = length 322 | let sub_exn = sub_exn 323 | let find = find 324 | let find_reverse = find_reverse 325 | let is_whitespace = Char.is_whitespace 326 | end) 327 | 328 | include F.Make_split_function(struct 329 | type t = Char.t list 330 | type character = Char.t 331 | let length = length 332 | let sub_exn = sub_exn 333 | let index_of_string = index_of_string 334 | let index_of_character = index_of_character 335 | end) 336 | 337 | module Make_output (Model: Api.OUTPUT_MODEL) = struct 338 | 339 | let (>>=) = Model.bind 340 | 341 | let output chan l = 342 | List.fold_left l ~init:(Model.return ()) ~f:(fun prev_m c -> 343 | prev_m >>= fun () -> 344 | (* TODO: Safe to call Bytes.unsafe_to_string? *) 345 | Model.output chan (Char.to_native_string c)) 346 | 347 | end 348 | 349 | let take_while_with_index t ~f = 350 | let rec loop idx acc = 351 | function 352 | | h :: t when f idx h -> loop (idx + 1) (h :: acc) t 353 | | [] 354 | | _ :: _ -> List.rev acc 355 | in 356 | loop 0 [] t 357 | let take_while t ~f = take_while_with_index t ~f:(fun _ c -> f c) 358 | 359 | end (* Make *) 360 | -------------------------------------------------------------------------------- /src/lib/list_of.mli: -------------------------------------------------------------------------------- 1 | (** A functor to create strings that are lists of characters.*) 2 | 3 | module Make (Char : Api.BASIC_CHARACTER) : 4 | Api.BASIC_STRING with type character = Char.t 5 | -------------------------------------------------------------------------------- /src/lib/native_bytes.ml: -------------------------------------------------------------------------------- 1 | 2 | open Sosa_pervasives 3 | 4 | module N = Functors.Make_native(struct 5 | include BytesLabels 6 | let of_buffer = Buffer.to_bytes 7 | (* Would this be an appropriate place for Bytes.unsafe_to_string?*) 8 | let string_for_output = to_string 9 | end) 10 | 11 | include N 12 | 13 | let mutate_exn t ~index c = BytesLabels.set t index c 14 | 15 | let mutate t ~index c = 16 | try BytesLabels.set t index c; return () with _ -> fail `out_of_bounds 17 | 18 | let blit_exn ~src ~src_index ~dst ~dst_index ~length = 19 | BytesLabels.blit ~src ~src_pos:src_index ~dst ~dst_pos:dst_index ~len:length 20 | 21 | let blit ~src ~src_index ~dst ~dst_index ~length = 22 | try blit_exn ~src ~src_index ~dst ~dst_index ~length; return () 23 | with _ -> fail `out_of_bounds 24 | 25 | let to_native_string x = BytesLabels.to_string x 26 | let of_native_string x = return (BytesLabels.of_string x) 27 | let of_native_substring x ~offset ~length = 28 | if length = 0 then return BytesLabels.empty 29 | else 30 | try return (StringLabels.sub x ~pos:offset ~len:length 31 | |> BytesLabels.of_string) 32 | with e -> fail `out_of_bounds 33 | 34 | let to_string_hum x = Printf.sprintf "%S" (Bytes.to_string x) 35 | -------------------------------------------------------------------------------- /src/lib/native_bytes.mli: -------------------------------------------------------------------------------- 1 | (** Richer interface to the built in [byte] type. *) 2 | 3 | include Api.NATIVE_BYTES 4 | -------------------------------------------------------------------------------- /src/lib/native_character.ml: -------------------------------------------------------------------------------- 1 | 2 | open Sosa_pervasives 3 | open Printf 4 | 5 | type t = char 6 | 7 | let of_native_char x = Some x 8 | let of_int x = 9 | try Some (char_of_int x) with _ -> None 10 | let to_int = int_of_char 11 | let compare = Char.compare 12 | 13 | let size _ = 1 14 | 15 | let is_print t = ' ' <= t && t <= '~' 16 | let to_native_string x = String.make 1 x 17 | let to_string_hum x = 18 | if is_print x then String.make 1 x 19 | else sprintf "0x%2x" (int_of_char x) 20 | 21 | let write_to_native_bytes c ~buf ~index = 22 | try Bytes.set buf index c; return 1 23 | with _ -> fail `out_of_bounds 24 | 25 | let read_from_native_string ~buf ~index = 26 | try Some (String.get buf index, 1) 27 | with _ -> None 28 | 29 | let is_whitespace = 30 | function ' ' | '\t' | '\r' | '\n' -> true | _ -> false 31 | 32 | -------------------------------------------------------------------------------- /src/lib/native_character.mli: -------------------------------------------------------------------------------- 1 | (** Richer interface to the build in [char] type. *) 2 | 3 | include Api.NATIVE_CHARACTER 4 | -------------------------------------------------------------------------------- /src/lib/native_string.ml: -------------------------------------------------------------------------------- 1 | 2 | open Sosa_pervasives 3 | 4 | module N = Functors.Make_native(struct 5 | include StringLabels 6 | let of_buffer = Buffer.contents 7 | let empty = "" 8 | let string_for_output s = s 9 | end) 10 | 11 | include N 12 | 13 | let to_native_string x = x 14 | let of_native_string x = return x 15 | let of_native_substring x ~offset ~length = 16 | if length = 0 then return "" 17 | else 18 | try return (StringLabels.sub x ~pos:offset ~len:length) 19 | with e -> fail `out_of_bounds 20 | 21 | let to_string_hum x = Printf.sprintf "%S" x 22 | -------------------------------------------------------------------------------- /src/lib/native_string.mli: -------------------------------------------------------------------------------- 1 | (** Richer interface to the built in [string] type. *) 2 | 3 | include Api.NATIVE_STRING 4 | -------------------------------------------------------------------------------- /src/lib/of_mutable.ml: -------------------------------------------------------------------------------- 1 | open Sosa_pervasives 2 | open Printf 3 | module F = Functors 4 | 5 | module Make (S: Api.MINIMALISTIC_MUTABLE_STRING) : Api.BASIC_STRING 6 | with type character = S.character 7 | with type t = S.t = struct 8 | 9 | include S 10 | let is_empty s = 11 | try ignore (S.get s 0); false with _ -> true 12 | 13 | let get t ~index = try Some (get t index) with _ -> None 14 | let set t ~index ~v:c = 15 | let lgth = length t in 16 | if index < 0 || lgth <= index then None 17 | else Some ( 18 | let res = make lgth (S.get t 0) in 19 | blit ~dst:res ~dst_pos:0 ~src:t ~src_pos:0 ~len:lgth; 20 | S.set res index c; 21 | res) 22 | 23 | let get_exn s ~index = S.get s index 24 | let set_exn s ~index ~v = 25 | match set s ~index ~v with None -> invalid_arg "set_exn" | Some s -> s 26 | 27 | let of_character c = make 1 c 28 | 29 | let of_character_list cl = 30 | match cl with 31 | | [] -> empty 32 | | one :: more -> 33 | let res = make (List.length cl) one in 34 | List.iteri more ~f:(fun i c -> 35 | S.set res (i + 1) c); 36 | res 37 | 38 | let to_character_list s = 39 | let res = ref [] in 40 | for i = S.length s - 1 downto 0 do 41 | res := S.get s i :: !res 42 | done; 43 | !res 44 | 45 | let rec concat ?(sep=empty) tl = 46 | match tl with 47 | | [] -> empty 48 | | one :: more -> 49 | begin try 50 | let first_char = 51 | try S.get one 0 52 | with _ -> S.get sep 0 53 | in 54 | let sep_length = S.length sep in 55 | let total_length = 56 | List.fold_left ~init:(S.length one) more ~f:(fun prev s -> 57 | prev + sep_length + S.length s) in 58 | let dst = make total_length first_char in 59 | let index = ref 0 in 60 | blit ~dst ~dst_pos:!index ~src:one ~src_pos:0 ~len:(length one); 61 | index := !index + (length one); 62 | List.iter more ~f:(fun s -> 63 | blit ~dst ~dst_pos:!index ~src:sep ~src_pos:0 ~len:sep_length; 64 | index := !index + sep_length; 65 | blit ~dst ~dst_pos:!index ~src:s ~src_pos:0 ~len:(length s); 66 | index := !index + (length s); 67 | ); 68 | dst 69 | with _ -> 70 | concat more ~sep (* both one and sep are empty *) 71 | end 72 | 73 | let iter t ~f = 74 | for i = 0 to length t - 1 do 75 | f (S.get t i) 76 | done 77 | 78 | let iteri t ~f = 79 | for i = 0 to length t - 1 do 80 | f i (S.get t i) 81 | done 82 | 83 | let iter_reverse t ~f = 84 | for i = length t -1 downto 0 do 85 | f (S.get t i) 86 | done 87 | 88 | let fold t ~init ~f = 89 | let x = ref init in 90 | for i = 0 to length t - 1 do 91 | x := f !x (S.get t i) 92 | done; 93 | !x 94 | 95 | let foldi t ~init ~f = 96 | let x = ref init in 97 | for i = 0 to length t - 1 do 98 | x := f i !x (S.get t i) 99 | done; 100 | !x 101 | 102 | let fold2_exn t1 t2 ~init ~f = 103 | let lgth1 = (length t1) in 104 | let lgth2 = (length t2) in 105 | match lgth1, lgth2 with 106 | | 0, 0 -> init 107 | | _, _ when lgth1 <> lgth2 -> invalid_arg "fold2_exn" 108 | | lgth1, lgth2 -> 109 | let res = ref init in 110 | for i = 0 to lgth1 - 1 do 111 | res := f !res (S.get t1 i) (S.get t2 i); 112 | done; 113 | !res 114 | 115 | let rev t = 116 | let lgth = length t in 117 | match lgth with 118 | | 0 -> empty 119 | | lgth -> 120 | let res = make lgth (S.get t 0) in 121 | for i = 0 to lgth - 1 do 122 | S.set res i (S.get t (lgth - 1 - i)) 123 | done; 124 | res 125 | 126 | let map t ~f = 127 | let lgth = (length t) in 128 | if lgth = 0 129 | then empty 130 | else begin 131 | let res = make lgth (S.get t 0) in 132 | for i = 0 to lgth - 1 do 133 | S.set res i (f (S.get t i)) 134 | done; 135 | res 136 | end 137 | 138 | let mapi t ~f = 139 | let lgth = (length t) in 140 | if lgth = 0 141 | then empty 142 | else begin 143 | let res = make lgth (S.get t 0) in 144 | for i = 0 to lgth - 1 do 145 | S.set res i (f i (S.get t i)) 146 | done; 147 | res 148 | end 149 | 150 | let map2_exn t1 t2 ~f = 151 | let lgth1 = (length t1) in 152 | let lgth2 = (length t2) in 153 | match lgth1, lgth2 with 154 | | 0, 0 -> empty 155 | | _, _ when lgth1 <> lgth2 -> invalid_arg "map2_exn" 156 | | lgth1, lgth2 -> 157 | let res = make lgth1 (S.get t1 0) in 158 | for i = 0 to lgth1 - 1 do 159 | S.set res i (f (S.get t1 i) (S.get t2 i)) 160 | done; 161 | res 162 | 163 | let for_all t ~f = 164 | try 165 | iter t (fun c -> if not (f c) then raise Not_found); 166 | true 167 | with _ -> false 168 | 169 | let exists t ~f = 170 | try 171 | iter t (fun c -> if (f c) then raise Not_found); 172 | false 173 | with _ -> true 174 | 175 | let sub t ~index ~length = 176 | if length = 0 then Some empty else 177 | begin 178 | let lgth = S.length t in 179 | if lgth = 0 180 | then None (* `length <> 0` *) 181 | else begin 182 | try 183 | let res = make length (S.get t index) in 184 | for i = 1 to length - 1 do 185 | S.set res i (S.get t (index + i)) 186 | done; 187 | Some res 188 | with _ -> None 189 | end 190 | end 191 | 192 | let sub_exn t ~index ~length = 193 | match sub t ~index ~length with 194 | | Some s -> s 195 | | None -> ksprintf invalid_arg "sub_exn(%d,%d)" index length 196 | 197 | let slice_exn ?(start=0) ?finish t = 198 | let length_of_t = S.length t in 199 | if start < 0 || (not (is_empty t) && start >= length_of_t) then 200 | ksprintf invalid_arg "slice_exn: invalid start %d" start 201 | else 202 | match finish with 203 | | None -> sub_exn t ~index:start ~length:(length_of_t - start) 204 | | Some f -> if f < 0 || f > length_of_t then 205 | ksprintf invalid_arg "slice_exn: invalid finish %d" f 206 | else 207 | sub_exn t ~index:start ~length:(f - start) 208 | 209 | let slice ?start ?finish t = 210 | try Some (slice_exn ?start ?finish t) 211 | with _ -> None 212 | 213 | let to_string_hum t = to_native_string t |> sprintf "%S" 214 | 215 | let index_of_character t ?(from=0) c = 216 | let from = if from <= 0 then 0 else min (length t) from in 217 | let res = ref None in 218 | try 219 | for i = from to length t - 1 do 220 | if S.get t i = c then (res:= Some i; raise Not_found) 221 | done; 222 | None 223 | with _ -> !res 224 | 225 | let index_of_character_reverse t ?from c = 226 | let from = 227 | let length_of_t = length t in 228 | match from with 229 | | None -> length_of_t - 1 230 | | Some s when s < 0 -> -1 231 | | Some s when s > length_of_t - 1 -> length_of_t - 1 232 | | Some s -> s 233 | in 234 | let res = ref None in 235 | try 236 | for i = from downto 0 do 237 | if S.get t i = c then (res:= Some i; raise Not_found) 238 | done; 239 | None 240 | with _ -> !res 241 | 242 | let compare_substring (a, idxa, lena) (b, idxb, lenb) = 243 | let module With_exns = struct 244 | exception Return of int 245 | exception Left_out of int 246 | exception Right_out of int 247 | let f () = 248 | try 249 | let shortest = min lena lenb in 250 | for i = 0 to shortest - 1 do 251 | let ca = try S.get a (idxa + i) with _ -> raise (Left_out i) in 252 | let cb = try S.get b (idxb + i) with _ -> raise (Right_out i) in 253 | let c = S.compare_char ca cb in 254 | if c <> 0 255 | then raise (Return c) 256 | else () 257 | done; 258 | (Pervasives.compare (lena : int) lenb) 259 | with 260 | | Return c -> c 261 | | Left_out c -> (* a went out of bounds at 'c + idxa' *) -1 262 | | Right_out _ -> (* b went out of bounds at 'c + idxb' *) 263 | (* so, a is “longer” *) 1 264 | end in 265 | With_exns.f () 266 | 267 | type s = t 268 | module T_length_and_compsub = struct 269 | type t = s let length = length let compare_substring = compare_substring 270 | end 271 | include F.Compare_substring_strict_of_loose(T_length_and_compsub) 272 | include F.Make_index_of_string(T_length_and_compsub) 273 | 274 | 275 | let resize_from_length ~from ?length ~length_of_s = 276 | let from = if from <= 0 then 0 else min length_of_s from in 277 | let length = 278 | match length with 279 | | None -> length_of_s - from 280 | | Some lg when lg <= 0 -> 0 281 | | Some lg -> min (length_of_s - from) lg 282 | in 283 | (from, length) 284 | 285 | let find ?(from=0) ?length s ~f = 286 | let length_of_s = S.length s in 287 | let from, length = resize_from_length ~from ?length ~length_of_s in 288 | let found = ref None in 289 | let i = ref 0 in 290 | while !found = None && !i < length do 291 | if f (get_exn s (!i + from)) 292 | then found := Some (!i + from) 293 | else incr i 294 | done; 295 | !found 296 | 297 | let find_reverse ?from ?length s ~f = 298 | let length_of_s = S.length s in 299 | if length_of_s = 0 then None 300 | else begin 301 | let from = 302 | match from with 303 | | None -> length_of_s - 1 304 | | Some s when s < 0 -> -1 305 | | Some s when s >= length_of_s - 1 -> length_of_s - 1 306 | | Some s -> s 307 | in 308 | let length = 309 | match length with 310 | | None -> from + 1 311 | | Some l when l <= 0 -> 0 312 | | Some l when l >= from + 1 -> from + 1 313 | | Some l -> l 314 | in 315 | let found = ref None in 316 | let i = ref from in 317 | while !found = None && !i >= from - length + 1 do 318 | (* dbg "i: %d from: %d length: %d" !i from length; *) 319 | if f (get_exn s !i) 320 | then found := Some (!i) 321 | else decr i 322 | done; 323 | !found 324 | end 325 | 326 | let filter_map ?(from=0) ?length s ~f = 327 | let length_of_s = S.length s in 328 | let from, length = resize_from_length ~from ?length ~length_of_s in 329 | if length = 0 then empty 330 | else begin 331 | let res = ref [] in 332 | for i = length - 1 downto 0 do 333 | match f (get_exn s (i + from)) with 334 | | Some c -> res := c :: !res 335 | | None -> () 336 | done; 337 | of_character_list !res 338 | end 339 | 340 | let filter ?from ?length s ~f = 341 | filter_map ?from ?length s ~f:(fun c -> if f c then Some c else None) 342 | 343 | include F.Make_strip_function (struct 344 | type t = S.t 345 | type character = S.character 346 | let empty = empty 347 | let length = length 348 | let sub_exn = sub_exn 349 | let find = find 350 | let find_reverse = find_reverse 351 | let is_whitespace = S.is_whitespace 352 | end) 353 | 354 | include F.Make_split_rev_function(struct 355 | type t = S.t 356 | type character = S.character 357 | let empty = empty 358 | let length = length 359 | let sub_exn = sub_exn 360 | let index_of_string_reverse = index_of_string_reverse 361 | let index_of_character_reverse = index_of_character_reverse 362 | end) 363 | 364 | include F.Make_prefix_suffix_array (struct 365 | type t = S.t 366 | type character = S.character 367 | let length = S.length 368 | let get = S.get 369 | let sub_exn = sub_exn 370 | end) 371 | 372 | 373 | include F.Make_split_at_index_functions(struct 374 | type t = S.t 375 | type character = S.character 376 | let empty = empty 377 | let length = length 378 | let sub_exn t ~index ~length = sub_exn t index length 379 | end) 380 | 381 | module Make_output (Model: Api.OUTPUT_MODEL) = struct 382 | 383 | let (>>=) = Model.bind 384 | 385 | let output chan t = 386 | Model.output chan (to_native_string t) 387 | 388 | end 389 | 390 | let take_while_with_index t ~f = 391 | if length t = 0 then empty 392 | else ( 393 | let buf = make (length t) (S.get t 0) in 394 | let rec loop idx = 395 | match get t idx with 396 | | Some c when f idx c -> S.set buf idx c; loop (idx + 1) 397 | | _ -> idx 398 | in 399 | let new_length = loop 0 in 400 | sub_exn buf ~index:0 ~length:new_length 401 | ) 402 | 403 | let take_while t ~f = take_while_with_index t ~f:(fun _ c -> f c) 404 | 405 | end (* Make *) 406 | -------------------------------------------------------------------------------- /src/lib/of_mutable.mli: -------------------------------------------------------------------------------- 1 | (** A functor that uses an implementation of 2 | {!modtype:Api.MINIMALISTIC_MUTABLE_STRING} to build 3 | {!modtype:Api.BASIC_STRING}. *) 4 | 5 | module Make (S : Api.MINIMALISTIC_MUTABLE_STRING) : 6 | Api.BASIC_STRING with type character = S.character 7 | -------------------------------------------------------------------------------- /src/lib/sosa.mlpack: -------------------------------------------------------------------------------- 1 | Api Functors Native_character Native_string Native_bytes List_of Of_mutable Int_utf8_character 2 | -------------------------------------------------------------------------------- /src/lib/sosa_pervasives.ml: -------------------------------------------------------------------------------- 1 | 2 | open Api 3 | open Printf 4 | 5 | module List = ListLabels 6 | module String = StringLabels 7 | 8 | let (|>) x f = f x 9 | let return x : (_, _) result = `Ok x 10 | let fail x : (_, _) result = `Error x 11 | let bind x f = 12 | match x with 13 | | `Ok o -> f o 14 | | `Error e -> fail e 15 | let (>>=) = bind 16 | let dbg fmt = printf ("DBG: " ^^ fmt ^^ "\n%!") 17 | 18 | (* The function `List.map` adapted from `Core_kernel`'s way of 19 | unrolling the loops. *) 20 | module Core_list_map = struct 21 | 22 | let map_slow l ~f = List.rev (List.rev_map ~f l) 23 | 24 | let rec count_map ~f l ctr = 25 | match l with 26 | | [] -> [] 27 | | [x1] -> 28 | let f1 = f x1 in 29 | [f1] 30 | | [x1; x2] -> 31 | let f1 = f x1 in 32 | let f2 = f x2 in 33 | [f1; f2] 34 | | [x1; x2; x3] -> 35 | let f1 = f x1 in 36 | let f2 = f x2 in 37 | let f3 = f x3 in 38 | [f1; f2; f3] 39 | | [x1; x2; x3; x4] -> 40 | let f1 = f x1 in 41 | let f2 = f x2 in 42 | let f3 = f x3 in 43 | let f4 = f x4 in 44 | [f1; f2; f3; f4] 45 | | x1 :: x2 :: x3 :: x4 :: x5 :: tl -> 46 | let f1 = f x1 in 47 | let f2 = f x2 in 48 | let f3 = f x3 in 49 | let f4 = f x4 in 50 | let f5 = f x5 in 51 | f1 :: f2 :: f3 :: f4 :: f5 :: 52 | (if ctr > 1000 53 | then map_slow ~f tl 54 | else count_map ~f tl (ctr + 1)) 55 | 56 | let map l ~f = count_map ~f l 0 57 | 58 | let mapi_slow l ~f ~i = 59 | let _, r = List.fold_left l 60 | ~f:(fun (i, a) e -> (i + 1, ((f i e)::a))) 61 | ~init:(i,[]) 62 | in 63 | List.rev r 64 | 65 | let rec count_mapi ~f l ctr = 66 | match l with 67 | | [] -> [] 68 | | [x1] -> 69 | let f1 = f ctr x1 in 70 | [f1] 71 | | [x1; x2] -> 72 | let f1 = f ctr x1 in 73 | let f2 = f (ctr + 1) x2 in 74 | [f1; f2] 75 | | [x1; x2; x3] -> 76 | let f1 = f ctr x1 in 77 | let f2 = f (ctr + 1) x2 in 78 | let f3 = f (ctr + 2) x3 in 79 | [f1; f2; f3] 80 | | [x1; x2; x3; x4] -> 81 | let f1 = f ctr x1 in 82 | let f2 = f (ctr + 1) x2 in 83 | let f3 = f (ctr + 2) x3 in 84 | let f4 = f (ctr + 3) x4 in 85 | [f1; f2; f3; f4] 86 | | x1 :: x2 :: x3 :: x4 :: x5 :: tl -> 87 | let f1 = f ctr x1 in 88 | let f2 = f (ctr + 1) x2 in 89 | let f3 = f (ctr + 2) x3 in 90 | let f4 = f (ctr + 3) x4 in 91 | let f5 = f (ctr + 4) x5 in 92 | f1 :: f2 :: f3 :: f4 :: f5 :: 93 | (if ctr > 5000 94 | then mapi_slow ~f ~i:(ctr + 5) tl 95 | else count_mapi ~f tl (ctr + 5)) 96 | 97 | let mapi l ~f = count_mapi ~f l 0 98 | 99 | let map2_slow l1 l2 ~f = List.rev (List.rev_map2 ~f l1 l2) 100 | 101 | let rec count_map2_exn ~f l1 l2 ctr = 102 | match l1, l2 with 103 | | [], [] -> [] 104 | | [x1], [y1] -> 105 | let f1 = f x1 y1 in 106 | [f1] 107 | | [x1; x2], [y1; y2] -> 108 | let f1 = f x1 y1 in 109 | let f2 = f x2 y2 in 110 | [f1; f2] 111 | | [x1; x2; x3], [y1; y2; y3] -> 112 | let f1 = f x1 y1 in 113 | let f2 = f x2 y2 in 114 | let f3 = f x3 y3 in 115 | [f1; f2; f3] 116 | | [x1; x2; x3; x4], [y1; y2; y3; y4] -> 117 | let f1 = f x1 y1 in 118 | let f2 = f x2 y2 in 119 | let f3 = f x3 y3 in 120 | let f4 = f x4 y4 in 121 | [f1; f2; f3; f4] 122 | | x1 :: x2 :: x3 :: x4 :: x5 :: tl1, 123 | y1 :: y2 :: y3 :: y4 :: y5 :: tl2 -> 124 | let f1 = f x1 y1 in 125 | let f2 = f x2 y2 in 126 | let f3 = f x3 y3 in 127 | let f4 = f x4 y4 in 128 | let f5 = f x5 y5 in 129 | f1 :: f2 :: f3 :: f4 :: f5 :: 130 | (if ctr > 1000 131 | then map2_slow ~f tl1 tl2 132 | else count_map2_exn ~f tl1 tl2 (ctr + 1)) 133 | | _, _ -> failwith "count_map2" 134 | 135 | let map2_exn l1 l2 ~f = count_map2_exn ~f l1 l2 0 136 | end 137 | -------------------------------------------------------------------------------- /src/lib/sosa_utilities.ml: -------------------------------------------------------------------------------- 1 | (* Methods useful for constructing Sosa functor inputs. *) 2 | 3 | open Sosa_pervasives 4 | 5 | (** Module to help build `{of,to}_native_[sub]string` functions. 6 | It is most useful while using variable sized characters. *) 7 | module Conversions = struct 8 | 9 | let of_native_substring 10 | ~empty ~init ~on_new_character ~finalize 11 | ~read_character_from_native_string 12 | s ~offset ~length = 13 | if length = 0 then return empty 14 | else 15 | begin 16 | (if offset + length > String.length s 17 | then fail `out_of_bounds 18 | else return ()) 19 | >>= fun () -> 20 | let module With_exn = struct 21 | exception WChar of int 22 | let f buf = 23 | let x = init () in 24 | try 25 | let rec loop index = 26 | if index < offset + length 27 | then 28 | begin match read_character_from_native_string ~buf ~index with 29 | | Some (s, size) when index + size <= offset + length -> 30 | on_new_character x s; 31 | loop (index + size) 32 | | Some (_, _ (* too big size *)) 33 | | None -> raise (WChar index) 34 | end 35 | else () 36 | in 37 | loop offset; 38 | return (finalize x) 39 | with 40 | | WChar c -> fail (`wrong_char_at c) 41 | end in 42 | With_exn.f s 43 | end 44 | 45 | let of_native_string of_native_substring s = 46 | match of_native_substring s ~offset:0 ~length:(String.length s) with 47 | | `Ok o -> return o 48 | | `Error (`wrong_char_at c) -> fail (`wrong_char_at c) 49 | | `Error `out_of_bounds -> (* There is a bug ! *) assert false 50 | 51 | 52 | let to_native_string_knowing_size 53 | ~future_size ~iter ~write_char_to_native_bytes l = 54 | let length = future_size l in 55 | let buf = Bytes.make length 'B' in 56 | let index = ref 0 in 57 | iter l ~f:begin fun c -> 58 | match write_char_to_native_bytes c ~buf ~index:!index with 59 | | `Ok siz -> index := !index + siz 60 | | `Error `out_of_bounds -> 61 | failwith "Bug in Make_native_conversions.to_native_string" 62 | end; 63 | buf 64 | 65 | end (* Conversions. *) 66 | -------------------------------------------------------------------------------- /src/test/main.ml: -------------------------------------------------------------------------------- 1 | (*M 2 | 3 | Tests of the Sosa library 4 | ========================= 5 | 6 | Test Utilities 7 | -------------- 8 | 9 | M*) 10 | 11 | open Nonstd 12 | module String = StringLabels 13 | open Printf 14 | open Sosa 15 | open Sosa_utilities 16 | 17 | let say fmt = printf (fmt ^^ "\n%!") 18 | 19 | let should_do_benchmarks = 20 | try Sys.argv.(1) = "bench" with _ -> false 21 | 22 | let cartesian_product list1 list2 = 23 | if list2 = [] then [] else 24 | let rec loop l1 l2 accum = match l1 with 25 | | [] -> accum 26 | | (hd :: tl) -> 27 | loop tl l2 28 | (List.rev_append 29 | (List.map ~f:(fun x -> (hd,x)) l2) 30 | accum) 31 | in 32 | List.rev (loop list1 list2 []) 33 | 34 | let return_code = ref 0 35 | let should_not_return_zero () = return_code := 5 36 | 37 | let test_assert msg cond = 38 | if not cond then ( 39 | should_not_return_zero (); 40 | say ">> TEST FAILED: [%s]" msg 41 | ) else () 42 | 43 | let test_assertf cond fmt = 44 | ksprintf (fun s -> test_assert s cond) fmt 45 | 46 | let make_string = String.init 47 | 48 | let list_dot_init l f = 49 | Array.init l f |> Array.to_list 50 | 51 | let random_string i = 52 | let length = Random.int i in 53 | make_string length (fun _ -> char_of_int (Random.int 256)) 54 | 55 | let random_ascii_string i = 56 | let length = Random.int i in 57 | make_string length (fun _ -> char_of_int (Random.int 128)) 58 | 59 | let random_utf8_string i = 60 | let length = Random.int i in 61 | list_dot_init length (fun _ -> Random.int 0x10_FFFF) 62 | |> List.map ~f:Int_utf8_character.to_native_string 63 | |> String.concat ~sep:"" 64 | 65 | let test_native_subjects = 66 | "" :: "A" :: "\x00" :: "Invalid UTF-8: \197" 67 | :: "Invalid UTF-8 again: \197\000" 68 | :: "Invalid UTF-8 again: \197\000 " 69 | :: list_dot_init 20 (fun i -> random_string (i * 4 + 1)) 70 | @ list_dot_init 20 (fun i -> random_ascii_string (i * 4 + 1)) 71 | @ list_dot_init 20 (fun i -> random_utf8_string (i * 4 + 1)) 72 | 73 | (*M 74 | 75 | This is a set of common denominator native strings, i.e., string that can be 76 | converted to every other representation. We call them DNA and use only `A`, 77 | `C`, `G`, `T` for future implementations which will be using only 2 or 4 78 | bits per character. 79 | 80 | M*) 81 | let dna_test_subjects = 82 | let random_read _ = 83 | make_string (Random.int 300 + 1) (fun _ -> 84 | begin match Random.int 4 with 85 | | 0 -> 'A' 86 | | 1 -> 'C' 87 | | 2 -> 'G' 88 | | _ -> 'T' 89 | end) in 90 | list_dot_init 200 random_read 91 | 92 | (*M 93 | 94 | Benchmarks 95 | ---------- 96 | 97 | If asked on the command line, each test will run some benchmarks on the 98 | implementation. 99 | 100 | M*) 101 | module Benchmark = struct 102 | 103 | let now () = Unix.gettimeofday () 104 | 105 | let benchmarks_table = ref [] 106 | 107 | let add ~implementation ~experiment ~result = 108 | match List.Assoc.get implementation !benchmarks_table with 109 | | Some exps -> 110 | exps := (experiment, result) :: !exps 111 | | None -> 112 | benchmarks_table := (implementation, ref [experiment, result]) :: !benchmarks_table 113 | 114 | let measure ?(repeats=1000) f = 115 | let start = now () in 116 | for i = 1 to repeats do 117 | f () 118 | done; 119 | let stop = (now ()) in 120 | (1000. *. (stop -. start) /. (float repeats)) 121 | 122 | let declare ?repeats ~implementation ~experiment f = 123 | if should_do_benchmarks then 124 | let time = measure ?repeats f in 125 | let result = sprintf "%.3f ms" time in 126 | add ~implementation ~experiment ~result 127 | else () 128 | 129 | let to_string () = 130 | let experiments = 131 | List.map !benchmarks_table ~f:(fun (_, l) -> 132 | List.map !l ~f:(fun (e, _) -> e)) 133 | |> List.concat |> List.dedup in 134 | let first_row = 135 | "Implementation" :: experiments 136 | in 137 | let row_widths = 138 | List.map first_row (fun s -> ref (String.length s)) in 139 | (* say "row widths: %s" (String.concat ~sep:", " 140 | (List.map row_widths (fun r -> sprintf "%d" !r))); *) 141 | let other_rows = 142 | List.map !benchmarks_table (fun (impl, l) -> 143 | let w = List.nth_exn row_widths 0 in 144 | w := max !w (String.length impl); 145 | impl :: List.mapi experiments (fun i exp -> 146 | let res = List.Assoc.get exp !l 147 | |> Option.value_exn ~msg:"assoc experiments" in 148 | let w = List.nth_exn row_widths (i + 1) in 149 | (* say "w: %d, i: %d lgth: %d" !w i (String.length res); *) 150 | w := max !w (String.length res); 151 | res)) in 152 | let row_to_string row = 153 | row 154 | |> List.mapi ~f:(fun i c -> 155 | (* say "%d %s %d %d" i c !(List.nth_exn row_widths i) (String.length c); *) 156 | sprintf "%s%s" c 157 | (String.make (1 + !(List.nth_exn row_widths i) - String.length c) ' ')) 158 | |> String.concat ~sep:" " 159 | in 160 | sprintf "%s\n%s\n%s\n" 161 | (first_row |> row_to_string) 162 | (List.map row_widths (fun s -> String.make (!s) '-') 163 | |> String.concat ~sep:" ") 164 | (other_rows 165 | |> List.map ~f:row_to_string 166 | |> String.concat ~sep:"\n") 167 | 168 | end 169 | 170 | (*M 171 | 172 | Test with First-Class Modules 173 | ----------------------------- 174 | 175 | The function `do_basic_test` below takes a whole OCaml module implementation as 176 | argument; `TEST_STRING` is the expected signature: 177 | 178 | M*) 179 | 180 | module type TEST_STRING = sig 181 | val test_name: string 182 | val can_have_wrong_char: bool 183 | module Chr: Api.BASIC_CHARACTER 184 | module Str: Api.BASIC_STRING with type character := Chr.t 185 | end 186 | 187 | let do_basic_test (module Test : TEST_STRING) = 188 | let open Test in 189 | say "### Test %S" test_name; 190 | 191 | test_assertf (Str.length Str.empty = 0) "(length empty)"; 192 | test_assertf (Str.is_empty Str.empty) "(is_empty empty)"; 193 | begin match Str.of_native_string "" with 194 | | `Ok o -> test_assertf (Str.is_empty o) "(is_empty \"\")"; 195 | | `Error _ -> test_assertf false "Str.of_native_string %S -> Error" "" 196 | end; 197 | 198 | begin (* test of_/to_ native_string *) 199 | let test_ofto s = 200 | begin match Str.of_native_string s with 201 | | `Ok s2 -> 202 | (* We test that when we can transform from `s2`, the opposite 203 | conversion works: *) 204 | let back = Str.to_native_string s2 in 205 | test_assert (sprintf "test_ofto %S <> %S" s back) (s = back); 206 | 207 | (* We test `Str.fold` against a potentially *very* slow 208 | implementation using `Str.length` and `Str.get`. *) 209 | let folding ~init ~f to_string = 210 | let fold = Str.fold s2 ~init ~f in 211 | let refold = 212 | let r = ref init in 213 | for i = 0 to Str.length s2 - 1 do 214 | r := f !r (Option.value_exn ~msg:"folding" (Str.get s2 ~index:i)); 215 | done; 216 | !r in 217 | test_assertf (fold = refold) "\nfold: %s\nrefold: %s" 218 | (to_string fold) (to_string refold) 219 | in 220 | folding ~init:[] ~f:(fun p c -> c :: p) 221 | (fun c -> String.concat ~sep:", " (List.map c Chr.to_string_hum)); 222 | folding ~init:42 ~f:(fun p c -> Hashtbl.hash (p, c)) (sprintf "%d"); 223 | 224 | (* This a function that displays an Str.t (extracts the 225 | beginning and its size(s)) *) 226 | let str_to_hum s = 227 | sprintf "[%S.%d,%dB]" 228 | (Str.to_native_string s |> sprintf "%s" 229 | |> (fun s -> String.sub s 0 (min (String.length s) 10))) 230 | (Str.length s) 231 | (Str.to_native_string s |> String.length) in 232 | 233 | 234 | (* We test `Str.sub` by comparing it with an implementation 235 | based on `Str.get`. *) 236 | let subbing ~index ~length = 237 | let subopt = Str.sub s2 ~index ~length in 238 | let r = ref [] in 239 | for i = index to index + length - 1 do 240 | r := Str.get s2 ~index:i :: !r 241 | done; 242 | begin match subopt with 243 | | Some sub -> 244 | (* If `Str.sub` returned some then `r` contains all the 245 | characters in reverse order: *) 246 | let bus = 247 | test_assertf (List.for_all !r ((<>) None)) "sub %d %d: r has a None" 248 | index length; 249 | Str.concat ~sep:Str.empty 250 | (List.rev_map (List.filter_opt !r) ~f:(Str.of_character)) in 251 | (* We compare the result `sub` and the re-computed version `bus`: *) 252 | test_assertf (Str.compare sub bus = 0) "sub %s %d %d\n→ Some %s ≠ %s" 253 | (str_to_hum s2) index length (str_to_hum sub) (str_to_hum bus) 254 | | None -> 255 | test_assertf (!r = [] || List.exists !r ((=) None)) 256 | "sub %s %d %d → None" (str_to_hum s2) index length 257 | end 258 | in 259 | subbing ~index:0 ~length:0; 260 | subbing ~index:0 ~length:(Str.length s2); 261 | subbing ~index:2 ~length:(Str.length s2); 262 | subbing ~index:4 ~length:(Str.length s2); 263 | subbing ~index:5 ~length:(Str.length s2); 264 | subbing ~index:0 ~length:1; 265 | subbing ~index:0 ~length:3; 266 | subbing ~index:0 ~length:30; 267 | subbing ~index:1 ~length:30; 268 | subbing ~index:2 ~length:30; 269 | subbing ~index:4 ~length:3; 270 | subbing ~index:40 ~length:3; 271 | subbing ~index:400 ~length:3; 272 | subbing ~index:0 ~length:(Str.length s2 - 0); 273 | subbing ~index:2 ~length:(Str.length s2 - 2); 274 | subbing ~index:4 ~length:(Str.length s2 - 4); 275 | subbing ~index:5 ~length:(Str.length s2 - 5); 276 | subbing ~index:0 ~length:(Str.length s2 - 0 - 1); 277 | subbing ~index:2 ~length:(Str.length s2 - 2 - 1); 278 | subbing ~index:4 ~length:(Str.length s2 - 4 - 1); 279 | subbing ~index:5 ~length:(Str.length s2 - 5 - 1); 280 | 281 | 282 | | `Error (`wrong_char_at i) -> 283 | (* If the conversion fails, we check that the error value points 284 | to an invalid character: *) 285 | test_assert (sprintf "test_ofto %S -> wrong char at index %d" s i) 286 | (Chr.read_from_native_string ~buf:s ~index:i = None) 287 | end; 288 | in 289 | List.iter test_native_subjects test_ofto; 290 | end; 291 | 292 | begin (* test concat and a bit more *) 293 | let tried_separators = ref 0 in 294 | let rec try_separators n = 295 | if n = 0 296 | then 297 | if !tried_separators < 10 then 298 | say "WARNING: %s -> try_separators did not try much (%d separators)" 299 | test_name !tried_separators 300 | else () 301 | else 302 | let sep = random_string n in 303 | begin match Str.of_native_string sep with 304 | | `Ok csep -> 305 | let selection = 306 | List.filter test_native_subjects (fun _ -> Random.bool ()) in 307 | let viable_strings, converted = 308 | let zipped = 309 | List.filter_map selection (fun s -> 310 | match Str.of_native_string s with 311 | | `Ok s2 -> Some (s, s2) 312 | | `Error (`wrong_char_at c) -> None) in 313 | List.map zipped ~f:fst, List.map zipped ~f:snd 314 | in 315 | let concated = String.concat ~sep viable_strings in 316 | let concated2 = Str.concat ~sep:csep converted in 317 | (* say "separators %S" sep; *) 318 | incr tried_separators; 319 | test_assertf (Str.to_native_string concated2 = concated) 320 | "try_separators %d (%dth): %S %s →\n %S Vs\n %s" n !tried_separators sep 321 | (String.concat ~sep:", " (List.map viable_strings (sprintf "%S"))) 322 | concated 323 | (Str.to_string_hum concated2); 324 | try_separators (n - 1) 325 | | `Error _ -> try_separators (n - 1) 326 | end 327 | in 328 | try_separators 800; 329 | end; 330 | 331 | (* This tests `make` against `length` and `get`: *) 332 | for i = 0 to 100 do 333 | let seed = 50 * (i + 1) in 334 | let char = Random.int seed in 335 | let length = Random.int seed in 336 | match Chr.of_int char with 337 | | Some character -> 338 | let s = Str.make length character in 339 | test_assertf (Str.length s = length) "length of make"; 340 | for j = 0 to length - 1 do 341 | test_assertf ((Str.get s j) = Some character) "nth char of make" 342 | done; 343 | | None -> () 344 | done; 345 | 346 | begin (* We test the`Str.Make_output` functor with `Buffer.t` by writing 347 | directly the transformable functions and though Out.output and 348 | comparing the resulting buffer contents. *) 349 | let module Out = Str.Make_output(struct 350 | type ('a, 'b, 'c) thread = 'a 351 | type ('a, 'b, 'c) channel = Buffer.t 352 | let return x = x 353 | let bind x f = f x 354 | let output buf s = 355 | (* say "adding %S" s;; *) 356 | Buffer.add_string buf s 357 | end) 358 | in 359 | let buf_ground = Buffer.create 42 in 360 | let buf_through_str = Buffer.create 42 in 361 | let there_was_an_error = ref None in 362 | List.iter test_native_subjects (fun s -> 363 | match Str.of_native_string s with 364 | | `Ok o -> 365 | Out.output buf_through_str o; 366 | Buffer.add_string buf_ground s; 367 | | `Error (`wrong_char_at i) -> 368 | there_was_an_error := Some i); 369 | test_assertf (Buffer.contents buf_ground = Buffer.contents buf_through_str) 370 | "Str.Make_output test %S, %S" 371 | (Buffer.contents buf_ground) 372 | (Buffer.contents buf_through_str); 373 | end; 374 | 375 | begin (* Some tests of `for_all` and `exists`: *) 376 | List.iter test_native_subjects (fun str -> 377 | match Str.of_native_string str with 378 | | `Ok o -> 379 | test_assertf (Str.for_all o (fun _ -> true) = true) "∀ true = true"; 380 | test_assertf (Str.for_all o (fun _ -> false) = false || Str.is_empty o) 381 | "∀ false in %S = false" str; 382 | test_assertf (Str.exists o (fun _ -> true) = true || Str.is_empty o) 383 | "∃ true => true"; 384 | test_assertf (Str.exists o (fun _ -> false) = false) 385 | "∃ false in %S = false" str; 386 | let i_did_false = ref false in 387 | let comp = Str.for_all o (fun _ -> 388 | if Random.bool () then true else (i_did_false := true; false)) in 389 | test_assertf (comp = not !i_did_false) "random test for_all"; 390 | let i_did_true = ref false in 391 | let comp = Str.exists o (fun _ -> 392 | if Random.bool () then (i_did_true := true; true) else false) in 393 | test_assertf (comp = !i_did_true) "random test exists"; 394 | | `Error (`wrong_char_at i) -> () 395 | ); 396 | end; 397 | 398 | begin (* of_native_substring *) 399 | (* First some basic tests of Str.of_native_substring, then the 400 | bigger test with all the test strings. *) 401 | test_assertf (Str.of_native_substring "" ~offset:0 ~length:0 = `Ok Str.empty) 402 | "sub '' 0 0 = ''"; 403 | test_assertf (Str.of_native_substring "" ~offset:0 ~length:1 = `Error `out_of_bounds) 404 | "sub '' 0 1 → out_of_bounds"; 405 | test_assertf (Str.of_native_substring "" ~offset:1 ~length:0 = `Ok Str.empty) 406 | "sub '' 1 0 → ''"; 407 | test_assertf (Str.of_native_substring "" ~offset:1 ~length:1 = `Error `out_of_bounds) 408 | "sub '' 1 1 → out_of_bounds"; 409 | let i_have_been_to_ok = ref false in 410 | let i_have_been_to_wrong_char = ref false in 411 | let i_have_been_to_out_of_bounds = ref false in 412 | List.iter test_native_subjects begin fun str -> 413 | let offset = Random.int 42 in 414 | let length = Random.int 42 in 415 | let substr = try (String.sub str offset length) with _ -> "" in 416 | begin match Str.of_native_substring str ~offset ~length with 417 | | `Ok _ as o -> 418 | i_have_been_to_ok := true; 419 | test_assertf (o = (Str.of_native_string substr)) 420 | "sub %S %d %d → Ok" str offset length; 421 | | `Error (`wrong_char_at c) -> 422 | i_have_been_to_wrong_char := true; 423 | test_assertf (Str.of_native_string substr = `Error (`wrong_char_at (c - offset))) 424 | "sub %S %d %d → Ok" str offset length; 425 | | `Error `out_of_bounds -> 426 | i_have_been_to_out_of_bounds := true; 427 | test_assertf (substr = "") "sub out_of_bounds" 428 | end; 429 | end; 430 | test_assertf !i_have_been_to_ok "i_have_been_to_ok"; 431 | test_assertf (!i_have_been_to_wrong_char || not can_have_wrong_char) 432 | "i_have_been_to_wrong_char"; 433 | test_assertf !i_have_been_to_out_of_bounds "i_have_been_to_out_of_bounds"; 434 | end; 435 | 436 | let int_list_to_string l = 437 | sprintf "[%s]" 438 | (List.map ~f:Int.to_string l |> String.concat ~sep:",") in 439 | 440 | let str_to_int_list s = 441 | Str.to_character_list s |> List.map ~f:Chr.to_int in 442 | 443 | let int_option_to_string io = 444 | Option.value_map ~default:"None" ~f:(sprintf "Some %d") io in 445 | 446 | begin (* index_of_character{,_reverse} *) 447 | let test ?from ?should_find l c = 448 | let s = List.filter_map l Chr.of_int |> Str.of_character_list in 449 | let ch = 450 | Option.value_exn ~msg:"test index_of_character" (Chr.of_int c) in 451 | let res = Str.index_of_character ?from s ch in 452 | test_assertf (res = should_find) 453 | "index_of_character: %s (from: %s) expects %s but got %s" 454 | (str_to_int_list s |> int_list_to_string) 455 | (int_option_to_string from) 456 | (int_option_to_string should_find) 457 | (int_option_to_string res); 458 | if from = None then ( 459 | let from = Some 0 in 460 | let res = Str.index_of_character ?from s ch in 461 | test_assertf (res = should_find) 462 | "index_of_character: %s (added-from: %s) expects %s but got %s" 463 | (str_to_int_list s |> int_list_to_string) 464 | (int_option_to_string from) 465 | (int_option_to_string should_find) 466 | (int_option_to_string res); 467 | ); 468 | in 469 | test [] 0; 470 | test [1] 0; 471 | test [1;2;3;4] 0; 472 | test [0] 0 ~should_find:0; 473 | test [1;2;0] 0 ~should_find:2; 474 | test ~from:1 [] 0; 475 | test ~from:1 [1] 0; 476 | test ~from:1 [1;2;3;4] 0; 477 | test ~from:1 [0] 0; 478 | test ~from:1 [1;2;0] 0 ~should_find:2; 479 | test ~from:(-1) [] 0; 480 | test ~from:(-1) [1] 0; 481 | test ~from:(-1) [1;2;3;4] 0; 482 | test ~from:(-1) [0] 0 ~should_find:0; 483 | test ~from:(-1) [1;2;0] 0 ~should_find:2; 484 | test ~from:4 [] 0; 485 | test ~from:4 [1] 0; 486 | test ~from:4 [1;2;3;4] 0; 487 | test ~from:4 [0] 0; 488 | test ~from:4 [1;2;0] 0; 489 | 490 | let test ?from ?should_find l c = 491 | let s = List.filter_map l Chr.of_int |> Str.of_character_list in 492 | let ch = 493 | Option.value_exn ~msg:"test index_of_character" (Chr.of_int c) in 494 | let res = Str.index_of_character_reverse ?from s ch in 495 | test_assertf (res = should_find) 496 | "index_of_character_reverse: %s (from: %s) expects %s but got %s" 497 | (str_to_int_list s |> int_list_to_string) 498 | (int_option_to_string from) 499 | (int_option_to_string should_find) 500 | (int_option_to_string res); 501 | if from = None then ( 502 | let from = Some (Str.length s - 1) in 503 | let res = Str.index_of_character_reverse ?from s ch in 504 | test_assertf (res = should_find) 505 | "index_of_character_reverse: %s (added-from: %s) expects %s but got %s" 506 | (str_to_int_list s |> int_list_to_string) 507 | (int_option_to_string from) 508 | (int_option_to_string should_find) 509 | (int_option_to_string res); 510 | ); 511 | in 512 | test [] 0; 513 | test [1] 0; 514 | test [1;2;3;4] 0; 515 | test [0] 0 ~should_find:0; 516 | test [1;2;0] 0 ~should_find:2; 517 | test ~from:1 [] 0; 518 | test ~from:1 [1] 0; 519 | test ~from:1 [1;2;3;4] 0; 520 | test ~from:1 [0] 0 ~should_find:0; 521 | test ~from:1 [1;2;0] 0; 522 | test ~from:1 [1;2;0] 1 ~should_find:0; 523 | test ~from:(-1) [] 0; 524 | test ~from:(-1) [1] 0; 525 | test ~from:(-1) [1;2;3;4] 0; 526 | test ~from:(-1) [0] 0; 527 | test ~from:(-1) [1;2;0] 0; 528 | test ~from:4 [] 0; 529 | test ~from:4 [1] 0; 530 | test ~from:4 [1;2;3;4] 0; 531 | test ~from:4 [0] 0 ~should_find:0; 532 | test ~from:4 [1;2;0] 0 ~should_find:2; 533 | 534 | 535 | (* A test of index_of_character and index_of_character_reverse, we 536 | create a big cartesian product 537 | (nat_string, (from_index, char_to_find)) and we run both searches. *) 538 | let froms = List.init 10 (fun i -> Random.int (i + 1)) in 539 | let chars = List.init 10 (fun i -> Chr.of_int i) |> List.filter_opt in 540 | let to_do = 541 | List.(cartesian_product test_native_subjects (cartesian_product froms chars)) 542 | in 543 | let i_went_to_some_index = ref 0 in 544 | let i_went_to_none_from_length = ref 0 in 545 | let i_went_to_none_from_absent = ref 0 in 546 | let rev_i_went_to_some_index = ref 0 in 547 | let rev_i_went_to_none_from_length = ref 0 in 548 | let rev_i_went_to_none_from_absent = ref 0 in 549 | List.iter to_do (fun (nat, (from, char)) -> 550 | match Str.of_native_string nat with 551 | | `Ok o -> 552 | begin match Str.index_of_character o ~from char with 553 | | Some index -> 554 | incr i_went_to_some_index; 555 | test_assertf (Str.get o index = Some char) "find | get"; 556 | | None -> 557 | if from > Str.length o - 1 then (incr i_went_to_none_from_length) 558 | else begin 559 | for i = from to Str.length o - 1 do 560 | test_assertf (Str.get o i <> Some char) 561 | "not found => can't find, i: %d, from : %d, length: %d" 562 | i from (Str.length o); 563 | done; 564 | incr i_went_to_none_from_absent; 565 | end 566 | end; 567 | begin match Str.index_of_character_reverse o ~from char with 568 | | Some index -> 569 | incr rev_i_went_to_some_index; 570 | test_assertf (Str.get o index = Some char) "rev, find | get"; 571 | | None -> 572 | if from > Str.length o - 1 then (incr rev_i_went_to_none_from_length) 573 | else begin 574 | for i = from downto 0 do 575 | test_assertf (Str.get o i <> Some char) 576 | "rev, not found => can't find, i: %d, from : %d, length: %d" 577 | i from (Str.length o); 578 | done; 579 | incr rev_i_went_to_none_from_absent; 580 | end; 581 | end; 582 | | _ -> ()); 583 | test_assertf (!i_went_to_some_index > 0) ""; 584 | test_assertf (!i_went_to_none_from_length > 0) ""; 585 | test_assertf (!i_went_to_none_from_absent > 0) ""; 586 | test_assertf (!rev_i_went_to_some_index > 0) ""; 587 | test_assertf (!rev_i_went_to_none_from_length > 0) ""; 588 | test_assertf (!rev_i_went_to_none_from_absent > 0) ""; 589 | end; 590 | 591 | begin (* Test compare_substring{_strict} *) 592 | (* A first test of compare_substring{_strict} with special cases, empty strings, 593 | and small strings, containing 'a', 'c', 'g', 't' → they should 594 | be convertible to any backend :) *) 595 | let is_equivalent resopt expected = 596 | (match resopt with 597 | | None -> false 598 | | Some r -> r = expected || r * expected > 0) in 599 | let test_compare_substring (a, idxa, lena) (b, idxb, lenb) expected = 600 | match Str.of_native_string a, Str.of_native_string b with 601 | | `Ok aa, `Ok bb -> 602 | let res = Str.compare_substring (aa, idxa, lena) (bb, idxb, lenb) in 603 | test_assertf (res = expected || res * expected > 0) (* We test for the sign *) 604 | "test_compare_substring (%S, %d, %d) (%S, %d, %d) = %d, × %d < 0" 605 | a idxa lena b idxb lenb res expected; 606 | let resopt = Str.compare_substring_strict (aa, idxa, lena) (bb, idxb, lenb) in 607 | test_assertf (is_equivalent resopt expected) 608 | "test_compare_substring_strict (%S, %d, %d) (%S, %d, %d) = %d, × %d < 0" 609 | a idxa lena b idxb lenb res expected; 610 | (* And now check commutativity: *) 611 | let invres = Str.compare_substring (bb, idxb, lenb) (aa, idxa, lena) in 612 | test_assertf (invres = (~- expected) || invres * expected < 0) (* We test for the sign *) 613 | "test_compare_substring, commutes (%S, %d, %d) (%S, %d, %d) × -1 = %d, × %d < 0" 614 | a idxa lena b idxb lenb res expected; 615 | let resopt = Str.compare_substring_strict (bb, idxb, lenb) (aa, idxa, lena) in 616 | test_assertf (is_equivalent resopt (~- expected)) 617 | "test_compare_substring_strict, commutes (%S, %d, %d) (%S, %d, %d) = %d, × %d < 0" 618 | a idxa lena b idxb lenb res expected; 619 | | _, _ -> test_assertf false "assumption about ACGT is wrong" 620 | in 621 | (* Semantically well-defined tests: *) 622 | test_compare_substring ("", 0, 0) ("", 0, 0) ( 0); 623 | test_compare_substring ("aaa", 0, 0) ("", 0, 0) ( 0); 624 | test_compare_substring ("aaa", 0, 0) ("ggg", 0, 0) ( 0); 625 | test_compare_substring ("aaa", 1, 0) ("ggg", 1, 0) ( 0); 626 | test_compare_substring ("aaa", 1, 0) ("ggg", 1, 0) ( 0); 627 | test_compare_substring ("aaa", 0, 0) ("ggg", 0, 1) (-1); 628 | test_compare_substring ("aaa", 1, 0) ("ggg", 1, 1) (-1); 629 | test_compare_substring ("aaa", 1, 0) ("ggg", 1, 1) (-1); 630 | test_compare_substring ("aaa", 1, 1) ("ggg", 1, 1) (-1); 631 | test_compare_substring ("aga", 1, 1) ("ggc", 1, 1) ( 0); 632 | test_compare_substring ("aga", 1, 1) ("gag", 1, 1) ( 1); 633 | test_compare_substring ("aga", 1, 1) ("gcg", 1, 1) ( 1); 634 | test_compare_substring ("aagg", 2, 2) ("gg", 0, 2) ( 0); 635 | (* A test of the out-of-bounds behavior: *) 636 | let test_compare_substring_strictness (a, idxa, lena) = 637 | match Str.of_native_string a, Str.of_native_string "acgt" with 638 | | `Ok aa, `Ok bb -> 639 | test_assertf (Str.compare_substring_strict (aa, idxa, lena) (Str.empty, 0, 0) = None) 640 | "Str.compare_substring_strict out_of_bounds 1"; 641 | test_assertf (Str.compare_substring_strict (aa, idxa, lena) (bb, 1, 2) = None) 642 | "Str.compare_substring_strict out_of_bounds 2"; 643 | test_assertf (Str.compare_substring_strict (Str.empty, 0, 0) (aa, idxa, lena) = None) 644 | "Str.compare_substring_strict out_of_bounds 3"; 645 | test_assertf (Str.compare_substring_strict (bb, 1, 2) (aa, idxa, lena) = None) 646 | "Str.compare_substring_strict out_of_bounds 4"; 647 | | _ -> test_assertf false "assumption about ACGT is wrong" 648 | in 649 | test_compare_substring_strictness ("", 0, 1); 650 | test_compare_substring_strictness ("a", 1, 1); 651 | test_compare_substring_strictness ("a", -1, 1); 652 | test_compare_substring_strictness ("a", 1, -1); 653 | test_compare_substring_strictness ("a", -1, -1); 654 | test_compare_substring_strictness ("aa", 10, 1); 655 | test_compare_substring_strictness ("aa", 10, -1); 656 | (* Now we run a bigger randomized test of compare_substring{_strict}. *) 657 | let been_to_some_0 = ref 0 in 658 | let been_to_some_m = ref 0 in 659 | List.iter test_native_subjects (fun a -> 660 | List.iter test_native_subjects (fun b -> 661 | match Str.of_native_string a, Str.of_native_string b with 662 | | `Ok aa, `Ok bb -> 663 | let rec test n = 664 | let length_a = Str.length aa in 665 | let length_b = Str.length bb in 666 | let lena = Random.int (length_a + 5) in 667 | let idxa = Random.int (lena + 5) in 668 | let idxb, lenb = 669 | if Random.bool () 670 | then (Random.int (length_b + 5), Random.int (length_b + 5)) 671 | else (idxa, lena) (* half of the times with same params *) 672 | in 673 | let res = 674 | Str.compare_substring (aa, idxa, lena) (bb, idxb, lenb) in 675 | let resopt = 676 | Str.compare_substring_strict (aa, idxa, lena) (bb, idxb, lenb) in 677 | begin match res with 678 | | 0 -> (* EQUAL *) 679 | test_assertf (lena = lenb) 680 | "compare_substring: equal but lengths %d ≠ %d" lena lenb; 681 | for i = 0 to min lena lenb - 1 do 682 | test_assertf ((Str.get aa (i + idxa)) = (Str.get bb (i + idxb))) 683 | "compare_substring: equal but different …" 684 | done; 685 | test_assertf (is_equivalent resopt res) 686 | "resopt Vs res in EQUAL case"; 687 | incr been_to_some_0; 688 | | m -> 689 | let suba = Str.sub aa ~index:idxa ~length:(lena) in 690 | let subb = Str.sub bb ~index:idxb ~length:(lenb) in 691 | begin match suba, subb with 692 | | Some sa, Some sb -> 693 | (* Well defined case: "sub" returns something for both *) 694 | test_assertf (Str.compare sa sb * m > 0) 695 | "%d instead of %d sa: %s sb:%s (lena: %d, lenb: %d)" m (Str.compare sa sb) 696 | (Str.to_string_hum sa) (Str.to_string_hum sb) lena lenb; 697 | test_assertf (is_equivalent resopt res) 698 | "strict: %s instead of %d sa: %s sb:%s (lena: %d, lenb: %d)" 699 | (Option.value_map ~default:"None" resopt ~f:(sprintf "Some %d")) 700 | (Str.compare sa sb) 701 | (Str.to_string_hum sa) (Str.to_string_hum sb) lena lenb; 702 | | _, _ -> () 703 | end; 704 | incr been_to_some_m; 705 | () 706 | end; 707 | if n > 0 then test (n - 1); 708 | in 709 | test 4 710 | | _, _ -> () 711 | ) 712 | ); 713 | test_assertf (!been_to_some_0 > 5) "been_to_some_0: %d" !been_to_some_0; 714 | test_assertf (!been_to_some_m > 5) "been_to_some_m: %d" !been_to_some_m; 715 | end; 716 | 717 | begin (* We test index_of_string and index_of_string_reverse, if we expect 718 | the same result we may give only `~expect` if not, we use 719 | `~expect_rev`. *) 720 | let test_index_of_string ?from ?sub_index ?sub_length ?expect_rev s ~sub ~expect = 721 | match Str.of_native_string s, Str.of_native_string sub with 722 | | `Ok t, `Ok subt -> 723 | let res = Str.index_of_string t ~sub:subt ?from ?sub_index ?sub_length in 724 | let shopt = Option.value_map ~f:(sprintf "Some %d") ~default:"None" in 725 | test_assertf (res = expect) 726 | "Str.index_of_string %s ~sub:%s ?from:%s ?sub_index:%s ?sub_length:%s gave %s not %s" 727 | s sub (shopt from) (shopt sub_index) (shopt sub_length) 728 | (shopt res) (shopt expect); 729 | let erev = match expect_rev with None -> expect | Some opt -> opt in 730 | let res = Str.index_of_string_reverse t ~sub:subt ?from ?sub_index ?sub_length in 731 | test_assertf (res = erev) 732 | "Str.index_of_string_reverse %s ~sub:%s ?from:%s ?sub_index:%s ?sub_length:%s gave %s not %s" 733 | s sub (shopt from) (shopt sub_index) (shopt sub_length) 734 | (shopt res) (shopt erev); 735 | | _, _ -> () 736 | in 737 | test_index_of_string "aaaa" ~sub:"cc" ~expect:None; 738 | test_index_of_string "aaaa" ~sub:"aa" ~expect:(Some 0) ~expect_rev:(Some 2); 739 | test_index_of_string "ccaa" ~sub:"aa" ~expect:(Some 2); 740 | test_index_of_string "cccca" ~sub:"aa" ~expect:(None); 741 | test_index_of_string "aacca" ~from:1 ~sub:"aa" ~expect:(None) ~expect_rev:(Some 0); 742 | test_index_of_string "aaccaa" ~from:1 ~sub:"aa" ~expect:(Some 4) ~expect_rev:(Some 0); 743 | test_index_of_string "aacca" ~from:1 ~sub:"aa" ~sub_index:1 ~expect:(Some 1) ~expect_rev:(Some 1); 744 | test_index_of_string "aacca" ~from:2 ~sub:"aa" ~sub_index:1 ~expect:(Some 4) ~expect_rev:(Some 1); 745 | test_index_of_string "aacca" ~from:2 ~sub:"aa" ~sub_length:1 ~expect:(Some 4) ~expect_rev:(Some 1); 746 | test_index_of_string "aacca" ~from:2 ~sub:"aa" ~sub_length:0 ~expect:(Some 2) ~expect_rev:(Some 2); 747 | test_index_of_string "aacca" ~from:2 ~sub:"" ~expect:(Some 2) ~expect_rev:(Some 2); 748 | test_index_of_string "caaa" ~from:(-1) ~sub:"aa" ~expect:(Some 1) ~expect_rev:None; 749 | test_index_of_string "aaaa" ~from:3 ~sub:"aa" ~expect:None ~expect_rev:(Some 2); 750 | test_index_of_string "aaaa" ~from:4 ~sub:"aa" ~expect:None ~expect_rev:(Some 2); 751 | test_index_of_string "aaaa" ~from:5 ~sub:"aa" ~expect:None ~expect_rev:(Some 2); 752 | test_index_of_string "caaa" ~sub_index:(-1) ~sub:"aa" ~expect:(Some 1) ~expect_rev:(Some 2); 753 | test_index_of_string "aaaa" ~sub_index:2 ~sub:"aa" ~expect:(Some 0) ~expect_rev:(Some 3); 754 | (* ┗▶ This is searching the empty string ! Find everywhere. *) 755 | test_index_of_string "caaa" ~sub_index:3 ~sub:"aa" ~expect:(Some 0) ~expect_rev:(Some 3); 756 | (* ┗▶ This is also searching the empty string ! *) 757 | test_index_of_string "caaa" ~sub_index:1 ~sub_length:3 ~sub:"aa" ~expect:(Some 1) ~expect_rev:(Some 3); 758 | test_index_of_string "caaa" ~sub_index:(-1) ~sub_length:3 ~sub:"aa" ~expect:(Some 1) ~expect_rev:(Some 2); 759 | end; 760 | 761 | begin (* Test `filter_map` *) 762 | let test ?from ?length l ~f ~expect fmt = 763 | let name = ksprintf (fun s -> s) fmt in 764 | (* say "test: %s l : %d" name (List.length l); *) 765 | let s = 766 | List.filter_map l Chr.of_int |> Str.of_character_list in 767 | (* say "test: %s s: %d" name (Str.length s); *) 768 | let filtered = 769 | Str.filter_map ?from ?length s ~f:(fun c -> 770 | (* say "Chr: %d" (Chr.to_int c); *) 771 | match f (Chr.to_int c) with 772 | | Some i -> Chr.of_int i 773 | | None -> None) 774 | in 775 | let res_ints = 776 | Str.to_character_list filtered |> List.map ~f:Chr.to_int in 777 | let before = 778 | Str.to_character_list s |> List.map ~f:Chr.to_int in 779 | test_assertf (expect = res_ints) 780 | "test_filter_map: [%s=%s] → [%s] <> [%s] (%s)" 781 | (List.map l (sprintf "%d") |> String.concat ~sep:",") 782 | (List.map before (sprintf "%d") |> String.concat ~sep:",") 783 | (List.map res_ints (sprintf "%d") |> String.concat ~sep:",") 784 | (List.map expect (sprintf "%d") |> String.concat ~sep:",") 785 | name; 786 | in 787 | let some = fun s -> Some s in 788 | test [] ~f:some ~expect:[] "all empty"; 789 | test [1] ~f:some ~expect:[1] "some 1"; 790 | test [1;2;3] ~f:some ~expect:[1;2;3] "some 123"; 791 | test [1;1;1] ~f:some ~expect:[1;1;1] "some 111"; 792 | test [] ~f:(fun _ -> None) ~expect:[] "none"; 793 | test [1] ~f:(fun _ -> None) ~expect:[] "none"; 794 | test [1;2;3] ~f:(fun _ -> None) ~expect:[] "none"; 795 | let opt_of_cond c = fun x -> if c x then Some x else None in 796 | test [1;2;3] ~f:(opt_of_cond ((<) 1)) ~expect:[2;3] "opt_of_cond _ > 1"; 797 | test [1;2;3] ~f:(opt_of_cond ((<) 2)) ~expect:[3] "opt_of_cond _ > 2"; 798 | test [1;2;3] ~from:1 ~f:(opt_of_cond ((<) 1)) ~expect:[2;3] "opt_of_cond _ > 1 from 1"; 799 | test [1;2;3] ~from:2 ~f:(opt_of_cond ((<) 1)) ~expect:[3] "opt_of_cond _ > 1 from 2"; 800 | test [1;2;3] ~from:3 ~f:(opt_of_cond ((<) 1)) ~expect:[] "opt_of_cond _ > 1 from 3"; 801 | test [1;2;3] ~from:4 ~f:(opt_of_cond ((<) 1)) ~expect:[] "opt_of_cond _ > 1 from 4"; 802 | test [1;2;3] ~length:0 ~f:(opt_of_cond ((<) 1)) ~expect:[] "opt_of_cond _ > 1 length 0"; 803 | test [1;2;3] ~length:1 ~f:(opt_of_cond ((<) 1)) ~expect:[] "opt_of_cond _ > 1 length 1"; 804 | test [1;2;3] ~length:2 ~f:(opt_of_cond ((<) 1)) ~expect:[2] "opt_of_cond _ > 1 length 2"; 805 | test [1;2;3] ~length:3 ~f:(opt_of_cond ((<) 1)) ~expect:[2;3] "opt_of_cond _ > 1 length 3"; 806 | test [1;2;3] ~length:4 ~f:(opt_of_cond ((<) 1)) ~expect:[2;3] "opt_of_cond _ > 1 length 4"; 807 | test [1;2;3] ~from:2 ~length:2 ~f:(opt_of_cond ((<) 1)) ~expect:[3] "opt_of_cond _ > 1"; 808 | end; 809 | begin (* Test `filter` *) 810 | let test ?from ?length l ~f ~expect fmt = 811 | let name = ksprintf (fun s -> s) fmt in 812 | (* say "test: %s l : %d" name (List.length l); *) 813 | let s = List.filter_map l Chr.of_int |> Str.of_character_list in 814 | (* say "test: %s s: %d" name (Str.length s); *) 815 | let filtered = Str.filter ?from ?length s ~f:(fun c -> f (Chr.to_int c)) in 816 | let res_ints = Str.to_character_list filtered |> List.map ~f:Chr.to_int in 817 | let before = Str.to_character_list s |> List.map ~f:Chr.to_int in 818 | let pp l = List.map l (sprintf "%d") |> String.concat ~sep:";" in 819 | test_assertf (expect = res_ints) 820 | "test_filter: [%s=%s] → [%s] <> [%s] (%s)" 821 | (pp l) (pp before) (pp res_ints) (pp expect) 822 | name; 823 | in 824 | let always_true _ = true in 825 | let always_false _ = false in 826 | test [] ~f:always_true ~expect:[] "all empty"; 827 | test [1] ~f:always_true ~expect:[1] "true 1"; 828 | test [1;2;3] ~f:always_true ~expect:[1;2;3] "true 123"; 829 | test [1;1;1] ~f:always_true ~expect:[1;1;1] "some 111"; 830 | test [] ~f:always_false ~expect:[] "all empty"; 831 | test [1] ~f:always_false ~expect:[] "false 1"; 832 | test [1;2;3] ~f:always_false ~expect:[] "false 123"; 833 | let opt_of_cond c = fun x -> c x in 834 | test [1;2;3] ~f:(opt_of_cond ((<) 1)) ~expect:[2;3] "opt_of_cond _ > 1"; 835 | test [1;2;3] ~f:(opt_of_cond ((<) 2)) ~expect:[3] "opt_of_cond _ > 2"; 836 | test [1;2;3] ~from:1 ~f:(opt_of_cond ((<) 1)) ~expect:[2;3] "opt_of_cond _ > 1 from 1"; 837 | test [1;2;3] ~from:2 ~f:(opt_of_cond ((<) 1)) ~expect:[3] "opt_of_cond _ > 1 from 2"; 838 | test [1;2;3] ~from:3 ~f:(opt_of_cond ((<) 1)) ~expect:[] "opt_of_cond _ > 1 from 3"; 839 | test [1;2;3] ~from:4 ~f:(opt_of_cond ((<) 1)) ~expect:[] "opt_of_cond _ > 1 from 4"; 840 | test [1;2;3] ~length:0 ~f:(opt_of_cond ((<) 1)) ~expect:[] "opt_of_cond _ > 1 length 0"; 841 | test [1;2;3] ~length:1 ~f:(opt_of_cond ((<) 1)) ~expect:[] "opt_of_cond _ > 1 length 1"; 842 | test [1;2;3] ~length:2 ~f:(opt_of_cond ((<) 1)) ~expect:[2] "opt_of_cond _ > 1 length 2"; 843 | test [1;2;3] ~length:3 ~f:(opt_of_cond ((<) 1)) ~expect:[2;3] "opt_of_cond _ > 1 length 3"; 844 | test [1;2;3] ~length:4 ~f:(opt_of_cond ((<) 1)) ~expect:[2;3] "opt_of_cond _ > 1 length 4"; 845 | test [1;2;3] ~from:2 ~length:2 ~f:(opt_of_cond ((<) 1)) ~expect:[3] "opt_of_cond _ > 1"; 846 | end; 847 | 848 | 849 | begin (* Test the `split` function *) 850 | let test l ~on ~expect = 851 | let s = List.filter_map l Chr.of_int |> Str.of_character_list in 852 | let on_converted = 853 | match on with 854 | | `C c -> 855 | `Character (Option.value_exn ~msg:"Chr.of_int" (Chr.of_int c)) 856 | | `S l -> 857 | `String (List.filter_map l Chr.of_int |> Str.of_character_list) 858 | in 859 | let res = Str.split s ~on:on_converted in 860 | let res_list = List.map res ~f:str_to_int_list 861 | in 862 | test_assertf (res_list = expect) 863 | "split: l: %s = %s on:(%s)\n expect: {%s}\n res: {%s}: %s." 864 | (int_list_to_string l) 865 | (Str.to_string_hum s) 866 | (match on with 867 | | `C c -> sprintf "`Character %d" c 868 | | `S s -> sprintf "`Bytes %s" (int_list_to_string s)) 869 | (List.map ~f:int_list_to_string expect |> String.concat ~sep:" -- ") 870 | (List.map ~f:int_list_to_string res_list |> String.concat ~sep:" -- ") 871 | (List.map ~f:Str.to_string_hum res |> String.concat ~sep:"; ") 872 | in 873 | let on_one t ~expect = 874 | test t ~on:(`C 1) ~expect; 875 | test t ~on:(`S [1]) ~expect; 876 | in 877 | on_one [] ~expect:[[]]; 878 | on_one [2;3;] ~expect:[[2;3]]; 879 | on_one [1] ~expect:[[]; []]; 880 | on_one [2;1;2;3;4] ~expect:[[2]; [2;3;4]]; 881 | on_one [2;1;2;3;4;1] ~expect:[[2]; [2;3;4]; []]; 882 | on_one [1;2;1;2;3;4;1] ~expect:[[]; [2]; [2;3;4]; []]; 883 | on_one [1;1;2;1;2;3;4;1] ~expect:[[]; []; [2]; [2;3;4]; []]; 884 | 885 | let on_123 t ~expect = 886 | test t ~on:(`S [1;2;3]) ~expect; 887 | in 888 | on_123 [] ~expect:[ [] ]; 889 | on_123 [1] ~expect:[ [1] ]; 890 | on_123 [1;2;4;5] ~expect:[ [1;2;4;5;] ]; 891 | on_123 [1;2;3] ~expect:[[]; []]; 892 | on_123 [2;1;2;3;4] ~expect:[[2]; [4]]; 893 | on_123 [2;1;2;3;4;1] ~expect:[[2]; [4;1]]; 894 | on_123 [1;2;1;2;3;4;1] ~expect:[[1;2]; [4;1]]; 895 | on_123 [1;2;3;1;2;1;2;3;4;1] ~expect:[[]; [1;2]; [4;1]]; 896 | 897 | let on_empty t ~expect = 898 | test t ~on:(`S []) ~expect; 899 | in 900 | on_empty [] ~expect:[ [] ]; 901 | on_empty [1] ~expect:[ [1] ]; 902 | on_empty [1;2;4;5] ~expect:[ [1]; [2]; [4]; [5] ]; 903 | end; 904 | 905 | begin (* Test `find` *) 906 | let test ?from ?length ?should_find l ~f = 907 | let s = List.filter_map l Chr.of_int |> Str.of_character_list in 908 | let f x = f (Chr.to_int x) in 909 | let res = Str.find ?from ?length s ~f in 910 | test_assertf (res = should_find) 911 | "find: %s (%s, %s) expects %s but got %s" 912 | (str_to_int_list s |> int_list_to_string) 913 | (int_option_to_string from) 914 | (int_option_to_string length) 915 | (int_option_to_string should_find) 916 | (int_option_to_string res); 917 | if from = None then ( 918 | let from = Some 0 in 919 | let res = Str.find ?from ?length s ~f in 920 | test_assertf (res = should_find) 921 | "find: %s (%s →, %s) expects %s but got %s" 922 | (str_to_int_list s |> int_list_to_string) 923 | (int_option_to_string from) 924 | (int_option_to_string length) 925 | (int_option_to_string should_find) 926 | (int_option_to_string res); 927 | ); 928 | in 929 | test [] ~f:(fun _ -> true); 930 | test [] ~f:(fun _ -> false); 931 | test [1] ~f:(fun _ -> true) ~should_find:0; 932 | test [1] ~f:(fun _ -> false); 933 | test [1;2;3] ~f:(fun _ -> true) ~should_find:0; 934 | test [1;2;3] ~f:(fun _ -> false); 935 | test [1;2;3] ~f:(fun x -> x > 1) ~should_find:1; 936 | test [1;2;3] ~f:(fun x -> x < 1); 937 | test [1;1;1;2;3] ~from:2 ~f:(fun x -> x > 1) ~should_find:3; 938 | test [1;1;1;2;3] ~from:2 ~f:(fun x -> x < 1); 939 | test [1;1;1;2;3] ~from:2 ~f:(fun x -> x <= 1) ~should_find:2; 940 | test [1;2;3] ~from:2 ~f:(fun _ -> true) ~should_find:2; 941 | test [1;2;3] ~from:3 ~f:(fun _ -> true); 942 | test [1;2;3] ~from:(-2) ~f:(fun _ -> true) ~should_find:0; 943 | test [] ~length:0 ~f:(fun _ -> true); 944 | test [] ~length:0 ~f:(fun _ -> false); 945 | test [1;2;] ~length:0 ~f:(fun _ -> true); 946 | test [1;2;] ~length:0 ~f:(fun _ -> false); 947 | test [1;2;] ~length:1 ~f:(fun _ -> true) ~should_find:0; 948 | test [1;2;] ~length:1 ~f:(fun _ -> false); 949 | test [1;2;] ~from:1 ~length:1 ~f:(fun _ -> true) ~should_find:1; 950 | test [1;2;] ~from:2 ~length:1 ~f:(fun _ -> true); 951 | test [1;2;3;] ~from:2 ~length:1 ~f:(fun _ -> true) ~should_find:2; 952 | test [1;2;] ~from:1 ~length:2 ~f:(fun _ -> true) ~should_find:1; 953 | test [1;2;] ~from:0 ~length:(-1) ~f:(fun _ -> true); 954 | test [1;2;3;4] ~from:3 ~length:2 ~f:(fun _ -> true) ~should_find:3; 955 | end; 956 | 957 | begin (* Test `find_reverse` *) 958 | let test ?from ?length ?should_find l ~f = 959 | let s = List.filter_map l Chr.of_int |> Str.of_character_list in 960 | let f x = f (Chr.to_int x) in 961 | let res = Str.find_reverse ?from ?length s ~f in 962 | test_assertf (res = should_find) 963 | "find_reverse: %s (%s, %s) expects %s but got %s" 964 | (str_to_int_list s |> int_list_to_string) 965 | (int_option_to_string from) 966 | (int_option_to_string length) 967 | (int_option_to_string should_find) 968 | (int_option_to_string res); 969 | if from = None then ( 970 | let from = Some (Str.length s - 1) in 971 | let res = Str.find_reverse ?from ?length s ~f in 972 | test_assertf (res = should_find) 973 | "find_reverse: %s (%s → added, %s) expects %s but got %s" 974 | (str_to_int_list s |> int_list_to_string) 975 | (int_option_to_string from) 976 | (int_option_to_string length) 977 | (int_option_to_string should_find) 978 | (int_option_to_string res); 979 | ); 980 | in 981 | test [] ~f:(fun _ -> true); 982 | test [] ~f:(fun _ -> false); 983 | test [1] ~f:(fun _ -> true) ~should_find:0; 984 | test [1] ~f:(fun _ -> false); 985 | test [1;2;3] ~f:(fun _ -> true) ~should_find:2; 986 | test [1;2;3] ~f:(fun _ -> false); 987 | test [1;2;3] ~f:(fun x -> x <= 2) ~should_find:1; 988 | test [1;2;3] ~f:(fun x -> x < 1); 989 | test [1;1;1;2;3] ~from:2 ~f:(fun x -> x > 1); 990 | test [1;1;1;2;3] ~from:2 ~f:(fun x -> x < 1); 991 | test [1;1;1;2;3] ~from:2 ~f:(fun x -> x <= 1) ~should_find:2; 992 | test [1;2;3] ~from:2 ~f:(fun _ -> true) ~should_find:2; 993 | test [1;2;3] ~from:3 ~f:(fun _ -> true) ~should_find:2; 994 | test [1;2;3] ~from:(-2) ~f:(fun _ -> true); 995 | test [] ~length:0 ~f:(fun _ -> true); 996 | test [] ~length:0 ~f:(fun _ -> false); 997 | test [1;2;] ~length:0 ~f:(fun _ -> true); 998 | test [1;2;] ~length:0 ~f:(fun _ -> false); 999 | test [1;2;] ~length:1 ~f:(fun _ -> true) ~should_find:1; 1000 | test [1;2;] ~length:1 ~f:(fun _ -> false); 1001 | test [1;2;] ~from:1 ~length:1 ~f:(fun _ -> true) ~should_find:1; 1002 | test [1;2;] ~from:2 ~length:1 ~f:(fun _ -> true) ~should_find:1; 1003 | test [1;2;3;] ~from:2 ~length:1 ~f:(fun _ -> true) ~should_find:2; 1004 | test [1;2;] ~from:1 ~length:2 ~f:(fun _ -> true) ~should_find:1; 1005 | test [1;2;] ~from:0 ~length:(-1) ~f:(fun _ -> true); 1006 | test [1;2;3;4] ~from:3 ~length:2 ~f:(fun _ -> true) ~should_find:3; 1007 | end; 1008 | 1009 | begin (* Test `strip` *) 1010 | let test ?on ~whitespace l ~expect = 1011 | let s = List.filter_map l Chr.of_int |> Str.of_character_list in 1012 | let expect_str = 1013 | List.filter_map expect Chr.of_int |> Str.of_character_list in 1014 | let whitespace x = List.mem (Chr.to_int x) whitespace in 1015 | let res = Str.strip ?on ~whitespace s in 1016 | test_assertf (res = expect_str) 1017 | "strip: %s ~on:%s expects %s but got %s" 1018 | (str_to_int_list s |> int_list_to_string) 1019 | (match on with Some `Both -> "Both" | Some `Left -> "Left" 1020 | | Some `Right -> "Right" | None -> "None") 1021 | (expect |> int_list_to_string) 1022 | (str_to_int_list res |> int_list_to_string) 1023 | in 1024 | let whitespace = [0;1] in 1025 | test ?on:None ~whitespace [] ~expect:[]; 1026 | test ~on:`Both ~whitespace [] ~expect:[]; 1027 | test ~on:`Left ~whitespace [] ~expect:[]; 1028 | test ~on:`Right ~whitespace [] ~expect:[]; 1029 | test ?on:None ~whitespace [2] ~expect:[2]; 1030 | test ~on:`Both ~whitespace [2] ~expect:[2]; 1031 | test ~on:`Left ~whitespace [2] ~expect:[2]; 1032 | test ~on:`Right ~whitespace [2] ~expect:[2]; 1033 | test ?on:None ~whitespace [0;1;2;2] ~expect:[ 2;2]; 1034 | test ~on:`Both ~whitespace [0;1;2;2] ~expect:[ 2;2]; 1035 | test ~on:`Left ~whitespace [0;1;2;2] ~expect:[ 2;2]; 1036 | test ~on:`Right ~whitespace [0;1;2;2] ~expect:[0;1;2;2]; 1037 | test ?on:None ~whitespace [0;1;2;2;3;1;0;1] ~expect:[ 2;2;3; ]; 1038 | test ~on:`Both ~whitespace [0;1;2;2;3;1;0;1] ~expect:[ 2;2;3; ]; 1039 | test ~on:`Left ~whitespace [0;1;2;2;3;1;0;1] ~expect:[ 2;2;3;1;0;1]; 1040 | test ~on:`Right ~whitespace [0;1;2;2;3;1;0;1] ~expect:[0;1;2;2;3; ]; 1041 | test ?on:None ~whitespace [0;1;1;0;1] ~expect:[]; 1042 | test ~on:`Both ~whitespace [0;1;1;0;1] ~expect:[]; 1043 | test ~on:`Left ~whitespace [0;1;1;0;1] ~expect:[]; 1044 | test ~on:`Right ~whitespace [0;1;1;0;1] ~expect:[]; 1045 | let whitespace = [] in 1046 | test ?on:None ~whitespace [] ~expect:[]; 1047 | test ~on:`Both ~whitespace [] ~expect:[]; 1048 | test ~on:`Left ~whitespace [] ~expect:[]; 1049 | test ~on:`Right ~whitespace [] ~expect:[]; 1050 | test ?on:None ~whitespace [2;0;1;2] ~expect:[2;0;1;2]; 1051 | test ~on:`Both ~whitespace [2;0;1;2] ~expect:[2;0;1;2]; 1052 | test ~on:`Left ~whitespace [2;0;1;2] ~expect:[2;0;1;2]; 1053 | test ~on:`Right ~whitespace [2;0;1;2] ~expect:[2;0;1;2]; 1054 | end; 1055 | 1056 | begin (* Test `take_while{,_with_index}` *) 1057 | List.iter ~f:(fun subject -> 1058 | match Str.of_native_string subject with 1059 | | `Error _ -> () 1060 | | `Ok s -> 1061 | test_assertf (Str.take_while s ~f:(fun _ -> true) = s) 1062 | "take_while-true (%S)" subject; 1063 | test_assertf (Str.take_while s ~f:(fun _ -> false) = Str.empty) 1064 | "take_while-false (%S)" subject; 1065 | let length = Random.int (Str.length s + 1) in 1066 | test_assertf (Str.take_while_with_index s ~f:(fun idx _ -> idx < length) 1067 | = Str.sub_exn s ~index:0 ~length) 1068 | "take_while < length (%S)" subject; 1069 | let rint = Random.int 42 in 1070 | test_assertf ( 1071 | let prefix = Str.take_while s ~f:(fun c -> Chr.to_int c < rint) in 1072 | Str.fold prefix ~init:true ~f:(fun prev c -> 1073 | prev && Chr.to_int c < rint)) 1074 | "take_while: char < random-int (%S)" subject; 1075 | ) test_native_subjects; 1076 | end; 1077 | 1078 | let ints_to_str x = List.filter_map x Chr.of_int |> Str.of_character_list 1079 | and optionMap f = function | None -> None 1080 | | Some s -> f s 1081 | and defOptMap d f = function | None -> d 1082 | | Some s -> f s in 1083 | let pp_int_opt o = defOptMap "None" (fun x -> "Some " ^ string_of_int x) o in 1084 | 1085 | begin (* Test slice *) 1086 | let test ?start ?finish l ~expect = 1087 | let s = ints_to_str l in 1088 | let exp = optionMap (fun e -> Some (ints_to_str e)) expect 1089 | and res = Str.slice ?start ?finish s 1090 | in 1091 | test_assertf (res = exp) 1092 | "slice: %s ?start:(%s) ?finish:(%s) expects %s but got %s" 1093 | (int_list_to_string l) 1094 | (pp_int_opt start) 1095 | (pp_int_opt finish) 1096 | (defOptMap "None" int_list_to_string expect) 1097 | (defOptMap "None" (fun r -> str_to_int_list r |> int_list_to_string) res) 1098 | in 1099 | test [] ~expect:(Some []); 1100 | test [1] ~expect:(Some [1]); 1101 | test [1;2;3] ~expect:(Some [1;2;3]); 1102 | test ~start:0 [] ~expect:(Some []); 1103 | test ~start:0 [1] ~expect:(Some [1]); 1104 | test ~start:0 [1;2;3] ~expect:(Some [1;2;3]); 1105 | test ~start:0 ~finish:0 [] ~expect:(Some []); 1106 | test ~start:0 ~finish:1 [1] ~expect:(Some [1]); 1107 | test ~start:0 ~finish:3 [1;2;3] ~expect:(Some [1;2;3]); 1108 | test ~start:1 [1;2;3] ~expect:(Some [2;3]); 1109 | test ~start:2 [1;2;3] ~expect:(Some [3]); 1110 | test ~finish:1 [1;2;3] ~expect:(Some [1]); 1111 | test ~finish:2 [1;2;3] ~expect:(Some [1;2]); 1112 | test ~start:1 ~finish:1 [1;2;3] ~expect:(Some []); 1113 | test ~start:1 ~finish:2 [1;2;3] ~expect:(Some [2]); 1114 | 1115 | test ~start:(-1) [] ~expect:None; 1116 | test ~start:(-1) [1;2] ~expect:None; 1117 | test ~start:1 [] ~expect:None; 1118 | test ~start:2 [1;2] ~expect:None; 1119 | 1120 | test ~finish:(-1) [] ~expect:None; 1121 | test ~finish:(-1) [1;2] ~expect:None; 1122 | 1123 | test ~finish:1 [] ~expect:None; 1124 | test ~finish:2 [1;2] ~expect:(Some [1;2]); 1125 | test ~finish:3 [1;2] ~expect:None; 1126 | 1127 | end; 1128 | begin (* test `rev` *) 1129 | let into_str l = List.filter_map l Chr.of_int |> Str.of_character_list in 1130 | let pp l = List.map l (sprintf "%d") |> String.concat ~sep:";" in 1131 | let test l ~expect fmt = 1132 | let name = ksprintf (fun s -> s) fmt in 1133 | let s = into_str l in 1134 | let reversed = Str.rev s in 1135 | let res_ints = Str.to_character_list reversed |> List.map ~f:Chr.to_int in 1136 | let before = Str.to_character_list s |> List.map ~f:Chr.to_int in 1137 | test_assertf (expect = res_ints) 1138 | "test_mapi: [%s=%s] → [%s] <> [%s] (%s)" 1139 | (pp l) (pp before) (pp res_ints) (pp expect) 1140 | name; 1141 | in 1142 | test [] ~expect:[] "empty"; 1143 | test [1;2;3] ~expect:[3;2;1] "simple 123->321"; 1144 | test [1] ~expect:[1] "single 1"; 1145 | end; 1146 | begin (* Test map *) 1147 | let into_str l = List.filter_map l Chr.of_int |> Str.of_character_list in 1148 | let pp l = List.map l (sprintf "%d") |> String.concat ~sep:";" in 1149 | let test_explicit l ~f ~expect fmt = 1150 | let name = ksprintf (fun s -> s) fmt in 1151 | let s = into_str l in 1152 | let test_fn_exn x = 1153 | let res = f (Chr.to_int x) in 1154 | match Chr.of_int res with 1155 | | Some res -> res 1156 | | None -> failwith "bad char" 1157 | in 1158 | let mapped = Str.map s ~f:test_fn_exn in 1159 | let res_ints = Str.to_character_list mapped |> List.map ~f:Chr.to_int in 1160 | let before = Str.to_character_list s |> List.map ~f:Chr.to_int in 1161 | test_assertf (expect = res_ints) 1162 | "test_mapi: [%s=%s] → [%s] <> [%s] (%s)" 1163 | (pp l) (pp before) (pp res_ints) (pp expect) 1164 | name; 1165 | in 1166 | test_explicit [1;2;1] ~f:(fun x -> x + 1) ~expect:[2;3;2] "map"; 1167 | test_explicit [1;2;2] ~f:(fun x -> x + 1) ~expect:[2;3;3] "map"; 1168 | test_explicit [1;2;3;0] ~f:(fun x -> x) ~expect:[1;2;3;0] "map"; 1169 | (* Make sure map_slow works as well. *) 1170 | test_explicit (List.init 10000 (fun x -> x mod 50)) 1171 | ~f:(fun x -> (x + 1) mod 50) 1172 | ~expect:(List.init 10000 (fun x -> (x + 1) mod 50)) 1173 | "map large"; 1174 | end; 1175 | begin (* Test mapi *) 1176 | let char_upper = 199 (* Not really, but not / 5 *) 1177 | and char_lower = 33 in (* Before this come the odd ones in ASCII *) 1178 | let d = char_upper - char_lower in 1179 | let into_str l = List.filter_map l Chr.of_int |> Str.of_character_list in 1180 | let pp l = List.map l (sprintf "%d") |> String.concat ~sep:";" in 1181 | let int_to_char i = 1182 | match Chr.of_int ((i mod d) + char_lower) with 1183 | | None -> failwith "bad logic" 1184 | | Some c -> c 1185 | in 1186 | let rec make_list acc n = 1187 | if n < 0 then acc else make_list ((int_to_char n)::acc) (n - 1) 1188 | in 1189 | let test n = 1190 | let lst = make_list [] n in 1191 | let dum = Str.make (n + 1) (int_to_char 0) in 1192 | let exp = Str.of_character_list lst in 1193 | let res = Str.mapi dum ~f:(fun i _ -> int_to_char i) in 1194 | test_assertf (res = exp) "mapi: %d [%s] [%s]" 1195 | n (Str.to_native_string exp) (Str.to_native_string res) 1196 | in 1197 | let test_explicit l ~f ~expect fmt = 1198 | let name = ksprintf (fun s -> s) fmt in 1199 | let s = into_str l in 1200 | let test_fn_exn i x = 1201 | let res = f i (Chr.to_int x) in 1202 | match Chr.of_int res with 1203 | | Some res -> res 1204 | | None -> failwith "bad char" 1205 | in 1206 | let mapped = Str.mapi s ~f:test_fn_exn in 1207 | let res_ints = Str.to_character_list mapped |> List.map ~f:Chr.to_int in 1208 | let before = Str.to_character_list s |> List.map ~f:Chr.to_int in 1209 | test_assertf (expect = res_ints) 1210 | "test_mapi: [%s=%s] → [%s] <> [%s] (%s)" 1211 | (pp l) (pp before) (pp res_ints) (pp expect) 1212 | name; 1213 | in 1214 | test 10; 1215 | test 10000; (* to cover that slow case *) 1216 | test_explicit [1;2;1] ~f:(fun _ x -> x + 1) ~expect:[2;3;2] "mapi inc"; 1217 | test_explicit [1;2;2] ~f:(fun i x -> i) ~expect:[0;1;2] "mapi indexed"; 1218 | (* Make sure mapi_slow works as well. *) 1219 | test_explicit (List.init 10000 (fun x -> x mod 50)) 1220 | ~f:(fun _ x -> (x + 1) mod 50) 1221 | ~expect:(List.init 10000 (fun x -> (x + 1) mod 50)) 1222 | "mapi large"; 1223 | test_explicit (List.init 10000 (fun x -> x mod 50)) 1224 | ~f:(fun i _ -> i mod 50) 1225 | ~expect:(List.init 10000 (fun x -> x mod 50)) 1226 | "mapi large"; 1227 | end; 1228 | begin (* Test `map2_exn` *) 1229 | let into_str l = List.filter_map l Chr.of_int |> Str.of_character_list in 1230 | let pp l = List.map l (sprintf "%d") |> String.concat ~sep:";" in 1231 | let test l1 l2 ~f ~expect fmt = 1232 | let name = ksprintf (fun s -> s) fmt in 1233 | let s1 = into_str l1 in 1234 | let s2 = into_str l2 in 1235 | let test_fn_exn x y = 1236 | let res = f (Chr.to_int x) (Chr.to_int y) in 1237 | match Chr.of_int res with 1238 | | Some res -> res 1239 | | None -> failwith "bad char" 1240 | in 1241 | let mapped = Str.map2_exn s1 s2 ~f:test_fn_exn in 1242 | let res_ints = Str.to_character_list mapped |> List.map ~f:Chr.to_int in 1243 | let before1 = Str.to_character_list s1 |> List.map ~f:Chr.to_int in 1244 | let before2 = Str.to_character_list s2 |> List.map ~f:Chr.to_int in 1245 | test_assertf (expect = res_ints) 1246 | "test_map2_exn: [%s,%s=%s,%s] → [%s] <> [%s] (%s)" 1247 | (pp l1) (pp l2) (pp before1) (pp before2) (pp res_ints) (pp expect) 1248 | name; 1249 | in 1250 | let test_fail l1 l2 ~f fmt = 1251 | let name = ksprintf (fun s -> s) fmt in 1252 | let s1 = into_str l1 in 1253 | let s2 = into_str l2 in 1254 | let failed = 1255 | begin try 1256 | Str.map2_exn s1 s2 ~f:f |> ignore; 1257 | false 1258 | with _ -> true 1259 | end 1260 | in 1261 | test_assertf failed 1262 | "test_map2_exn: should have failed on [%s,%s] (%s)" 1263 | (pp l1) (pp l2) 1264 | name; 1265 | in 1266 | test [] [] ~f:(fun x _ -> x) ~expect:[] "all empty"; 1267 | test [1] [2] ~f:(fun x _ -> x) ~expect:[1] "1"; 1268 | test [1] [2] ~f:(fun _ y -> y) ~expect:[2] "2"; 1269 | test [1;2;3;4] [2;3;4;3] ~f:(fun _ y -> y) ~expect:[2;3;4;3] "2343"; 1270 | test [1;2;1;0] [2;1;1;3] ~f:(fun x y -> x + y) ~expect:[3;3;2;3] "3323"; 1271 | (* Make sure we hit map2_slow: *) 1272 | test (List.init 10000 (fun _ -> 0)) (List.init 10000 (fun _ -> 1)) 1273 | ~f:(fun x _ -> x) 1274 | ~expect:(List.init 10000 (fun _ -> 0)) "long str"; 1275 | test (List.init 10000 (fun _ -> 0)) (List.init 10000 (fun _ -> 1)) 1276 | ~f:(fun _ y -> y) 1277 | ~expect:(List.init 10000 (fun _ -> 1)) "long str"; 1278 | (* Make sure we fail on lists of different lengths: *) 1279 | test_fail [1;2] [] ~f:(fun x y -> x) "unequal length"; 1280 | test_fail [] [1] ~f:(fun x y -> y) "other side unequal length"; 1281 | test_fail [1;2;3] [1] ~f:(fun x y -> y) "nonempty unequal length"; 1282 | end; 1283 | begin (* Test `foldi` *) 1284 | let into_str l = List.filter_map l Chr.of_int |> Str.of_character_list in 1285 | let pp l = List.map l (sprintf "%d") |> String.concat ~sep:";" in 1286 | let test lst ~init ~expect fmt = 1287 | let name = ksprintf (fun s -> s) fmt in 1288 | let s = into_str lst in 1289 | let folded = Str.foldi s ~init ~f:(fun i a c -> i + a + Chr.to_int c) in 1290 | test_assertf (folded = expect) 1291 | "test_foldi: init %d + the indices and chars of [%s] → [%d] <> %d %s" 1292 | init (pp lst) folded expect name; 1293 | in 1294 | test [] ~init:0 ~expect:0 "empty"; 1295 | test [0] ~init:100 ~expect:100 "singleton, zero-indexed"; 1296 | test [0;0;0;0;0] ~init:0 ~expect:10 1297 | "adding the indices of 5 long string should be ten"; 1298 | test [100;100;100;100] ~init:0 ~expect:406 1299 | "test uses string values." 1300 | end; 1301 | begin (* Test `fold2_exn` *) 1302 | let into_str l = List.filter_map l Chr.of_int |> Str.of_character_list in 1303 | let pp l = List.map l (sprintf "%d") |> String.concat ~sep:";" in 1304 | let test l1 l2 ~f ~init ~expect fmt = 1305 | let name = ksprintf (fun s -> s) fmt in 1306 | let s1 = into_str l1 in 1307 | let s2 = into_str l2 in 1308 | let test_fn_exn i x y = f i (Chr.to_int x) (Chr.to_int y) in 1309 | let res = Str.fold2_exn s1 s2 ~f:test_fn_exn ~init:init in 1310 | let before1 = Str.to_character_list s1 |> List.map ~f:Chr.to_int in 1311 | let before2 = Str.to_character_list s2 |> List.map ~f:Chr.to_int in 1312 | test_assertf (expect = res) 1313 | "test_fold2_exn: [%s,%s=%s,%s] init:%s → %s <> %s (%s)" 1314 | (pp l1) (pp l2) (pp before1) (pp before2) ((sprintf "%d") init) 1315 | ((sprintf "%d") res) ((sprintf "%d") expect) 1316 | name; 1317 | in 1318 | let test_fail l1 l2 ~f ~init fmt = 1319 | let name = ksprintf (fun s -> s) fmt in 1320 | let s1 = into_str l1 in 1321 | let s2 = into_str l2 in 1322 | let failed = 1323 | begin try 1324 | Str.fold2_exn s1 s2 ~f:f ~init:init |> ignore; 1325 | false 1326 | with _ -> true 1327 | end 1328 | in 1329 | test_assertf failed 1330 | "test_fold2_exn: should have failed on [%s,%s] (%s)" 1331 | (pp l1) (pp l2) 1332 | name; 1333 | in 1334 | test [] [] ~f:(fun _ _ _ -> 1) ~init:0 ~expect:0 "nothing"; 1335 | test [1] [2] ~f:(fun i _ _ -> i) ~init:1 ~expect:1 "1"; 1336 | test [1] [2] ~f:(fun _ _ y -> y) ~init:1 ~expect:2 "2"; 1337 | test [0;1] [0;1] ~f:(fun i x y -> x + y + i) 1338 | ~init:1 ~expect:3 "3"; 1339 | (* Make sure we fail on lists of different lengths: *) 1340 | test_fail [1;2] [] ~f:(fun _ x y -> 1) ~init:0 "unequal length"; 1341 | test_fail [] [1] ~f:(fun _ x y -> 1) ~init:0 "other side unequal length"; 1342 | test_fail [1;2;3] [1] ~f:(fun _ x y -> 1) ~init:0 "nonempty unequal length"; 1343 | end; 1344 | begin (* Test is_prefix *) 1345 | let test l ~p ~expect = 1346 | let t = ints_to_str l 1347 | and prefix = ints_to_str p in 1348 | test_assertf (expect = Str.is_prefix t ~prefix) 1349 | "is_prefix: %s prefix:(%s) expects %B" 1350 | (int_list_to_string l) 1351 | (int_list_to_string p) 1352 | expect 1353 | in 1354 | test [] ~p:[] ~expect:true; 1355 | test [1;2;3] ~p:[] ~expect:true; 1356 | test [1;2;3] ~p:[1] ~expect:true; 1357 | test [1;2;3] ~p:[1;2;3] ~expect:true; 1358 | test [1;2;3] ~p:[4] ~expect:false; 1359 | test [] ~p:[1] ~expect:false; 1360 | test [1;2;3] ~p:[1;2;3;4] ~expect:false; 1361 | end; 1362 | begin (* Test is_suffix *) 1363 | let test l ~s ~expect = 1364 | let t = ints_to_str l 1365 | and suffix = ints_to_str s in 1366 | test_assertf (expect = Str.is_suffix t ~suffix) 1367 | "is_suffix: %s suffix:(%s) expects %B" 1368 | (int_list_to_string l) 1369 | (int_list_to_string s) 1370 | expect 1371 | in 1372 | test [] ~s:[] ~expect:true; 1373 | test [1;2;3] ~s:[] ~expect:true; 1374 | test [1;2;3] ~s:[3] ~expect:true; 1375 | test [1;2;3] ~s:[1;2;3] ~expect:true; 1376 | test [1;2;3] ~s:[4] ~expect:false; 1377 | test [] ~s:[1] ~expect:false; 1378 | test [1;2;3] ~s:[1;2;3;4] ~expect:false; 1379 | end; 1380 | begin (* Test chop_prefix *) 1381 | let test l ~p ~expect = 1382 | let t = ints_to_str l 1383 | and prefix = ints_to_str p in 1384 | let res = Str.chop_prefix t ~prefix 1385 | and exp = optionMap (fun x -> Some (ints_to_str x)) expect in 1386 | test_assertf (exp = res) 1387 | "chop_prefix: %s prefix:(%s) expected %s but got %s" 1388 | (int_list_to_string l) 1389 | (int_list_to_string p) 1390 | (defOptMap "None" int_list_to_string expect) 1391 | (defOptMap "None" Str.to_native_string res) 1392 | in 1393 | test [] ~p:[] ~expect:(Some []); 1394 | test [1;2;3] ~p:[] ~expect:(Some [1;2;3]); 1395 | test [1;2;3] ~p:[1] ~expect:(Some [2;3]); 1396 | test [1;2;3] ~p:[1;2;3] ~expect:(Some []); 1397 | test [1;2;3] ~p:[4] ~expect:None; 1398 | test [] ~p:[1] ~expect:None; 1399 | test [1;2;3] ~p:[1;2;3;4] ~expect:None; 1400 | end; 1401 | begin (* Test chop_suffix *) 1402 | let test l ~s ~expect = 1403 | let t = ints_to_str l 1404 | and suffix = ints_to_str s in 1405 | let res = Str.chop_suffix t ~suffix 1406 | and exp = optionMap (fun x -> Some (ints_to_str x)) expect in 1407 | test_assertf (exp = res) 1408 | "chop_suffix: %s suffix:(%s) expected %s but got %s" 1409 | (int_list_to_string l) 1410 | (int_list_to_string s) 1411 | (defOptMap "None" int_list_to_string expect) 1412 | (defOptMap "None" Str.to_native_string res) 1413 | in 1414 | test [] ~s:[] ~expect:(Some []); 1415 | test [1;2;3] ~s:[] ~expect:(Some [1;2;3]); 1416 | test [1;2;3] ~s:[3] ~expect:(Some [1;2]); 1417 | test [1;2;3] ~s:[1;2;3] ~expect:(Some []); 1418 | test [1;2;3] ~s:[4] ~expect:None; 1419 | test [] ~s:[1] ~expect:None; 1420 | test [1;2;3] ~s:[1;2;3;4] ~expect:None; 1421 | end; 1422 | 1423 | begin (* Test split_at, take and drop. *) 1424 | let test l n (le,ri) = 1425 | let s = ints_to_str l 1426 | and le' = ints_to_str le 1427 | and ri' = ints_to_str ri in 1428 | let (rl,rr) as res = Str.split_at s n 1429 | and tres = Str.take s n 1430 | and dres = Str.drop s n in 1431 | test_assertf (res = (le',ri') && tres = le' && dres = ri') 1432 | "split_at: %s %d expects (%s,%s) but got (%s,%s)" 1433 | (int_list_to_string l) 1434 | n 1435 | (int_list_to_string le) 1436 | (int_list_to_string ri) 1437 | (Str.to_native_string rl) 1438 | (Str.to_native_string rr) 1439 | in 1440 | test [] (-1) ([],[]); 1441 | test [] (0) ([],[]); 1442 | test [] (1) ([],[]); 1443 | test [1;2;3] (-1) ([],[1;2;3]); 1444 | test [1;2;3] (0) ([],[1;2;3]); 1445 | test [1;2;3] (1) ([1],[2;3]); 1446 | test [1;2;3] (2) ([1;2],[3]); 1447 | test [1;2;3] (3) ([1;2;3],[]); 1448 | test [1;2;3] (4) ([1;2;3],[]); 1449 | end; 1450 | 1451 | (* #### BENCHMARKS #### *) 1452 | 1453 | let converted_dna_reads = 1454 | let all = 1455 | List.filter_map dna_test_subjects (fun s -> 1456 | match Str.of_native_string s with 1457 | | `Ok cs -> Some cs 1458 | | `Error _ -> None) in 1459 | test_assertf (List.length dna_test_subjects = List.length all) 1460 | "Convert DNA (common denominator) %d <> %d" 1461 | (List.length dna_test_subjects) (List.length all); 1462 | all in 1463 | 1464 | let implementation = test_name in 1465 | Benchmark.declare 1466 | ~experiment:(sprintf "Concat%d" (List.length converted_dna_reads)) 1467 | ~implementation 1468 | (fun () -> 1469 | let _ = 1470 | Str.concat ~sep:Str.empty converted_dna_reads in 1471 | ()); 1472 | 1473 | let cated = 1474 | Str.concat ~sep:Str.empty converted_dna_reads in 1475 | Benchmark.declare 1476 | ~experiment:(sprintf "Length") 1477 | ~implementation 1478 | ~repeats:10000 1479 | (fun () -> ignore (Str.length cated)); 1480 | let sub = 1481 | List.nth_exn converted_dna_reads (List.length converted_dna_reads - 30) in 1482 | let sub_index = 1 in 1483 | let sub_length = Str.length sub - sub_index in 1484 | let from = 2 in 1485 | Benchmark.declare 1486 | ~experiment:(sprintf "Find") 1487 | ~implementation 1488 | ~repeats:5 1489 | (fun () -> ignore ( 1490 | Str.index_of_string cated ~sub ~from ~sub_index ~sub_length 1491 | )); 1492 | Benchmark.declare 1493 | ~experiment:(sprintf "R-Find") 1494 | ~implementation 1495 | ~repeats:5 1496 | (fun () -> ignore ( 1497 | Str.index_of_string_reverse cated ~sub ~from ~sub_index ~sub_length 1498 | )); 1499 | 1500 | Benchmark.declare 1501 | ~experiment:(sprintf "Split On Char") 1502 | ~implementation 1503 | ~repeats:40 1504 | (fun () -> 1505 | match Chr.of_int (int_of_char 'A') with 1506 | | None -> say "Skipping split test since can't convert 'A'" 1507 | | Some a -> ignore (List.iter converted_dna_reads 1508 | (fun s -> ignore (Str.split ~on:(`Character a) s)))); 1509 | 1510 | () 1511 | 1512 | (*M 1513 | 1514 | A UTF-8-Specific Test 1515 | --------------------- 1516 | 1517 | M*) 1518 | let utf8_specific_test () = 1519 | let module Utf8 = Int_utf8_character in 1520 | say "### UTF-8 Test"; 1521 | let ground_truth = [ 1522 | "$", 0x24; (* ASCII *) 1523 | "¢", 0xA2; (* Latin-something *) 1524 | "€", 0x20AC; (* Multi-byte *) 1525 | "\xF0\xA4\xAD\xA2", 0x24B62; (* Another example from Wikipedia *) 1526 | "í", 0xED; (* Spanish stuff *) 1527 | "œ", 0x153; (* French stuff *) 1528 | "ß", 0xDF; (* German Stuff *) 1529 | ] in 1530 | List.iter ground_truth (fun (s, i) -> 1531 | let actual_test = Utf8.to_native_string i in 1532 | test_assertf (actual_test = s) "utf8_specific_test: (%S, %d) Vs %S" 1533 | s i actual_test; 1534 | begin match Utf8.read_from_native_string ~buf:s ~index:0 with 1535 | | Some (v, sz) -> 1536 | test_assertf (v = i) 1537 | "utf8_specific_test: Utf8.read_from_native_string: %d <> %d" v i; 1538 | test_assertf (sz = String.length s) 1539 | "utf8_specific_test: Utf8.read_from_native_string: size %d Vs %S" sz s; 1540 | | None -> 1541 | test_assertf false "utf8_specific_test: Utf8.read_from_native_string fail" 1542 | end 1543 | ); 1544 | () 1545 | 1546 | (*M 1547 | 1548 | Test Instantiations 1549 | ------------------- 1550 | 1551 | M*) 1552 | let () = 1553 | do_basic_test (module struct 1554 | let test_name = "Both natives" 1555 | let can_have_wrong_char = false 1556 | module Chr = Native_character 1557 | module Str = Native_bytes 1558 | end); 1559 | do_basic_test (module struct 1560 | let test_name = "List of natives" 1561 | let can_have_wrong_char = false 1562 | module Chr = Native_character 1563 | module Str = List_of.Make (Native_character) 1564 | end); 1565 | do_basic_test (module struct 1566 | let test_name = "List of UTF-8 Integers" 1567 | let can_have_wrong_char = true 1568 | module Chr = Int_utf8_character 1569 | module Str = List_of.Make (Int_utf8_character) 1570 | end); 1571 | do_basic_test (module struct 1572 | let test_name = "Of_mutable(utf8-int array)" 1573 | let can_have_wrong_char = true 1574 | module Chr = Int_utf8_character 1575 | module Str = Of_mutable.Make (struct 1576 | type character = Chr.t 1577 | type t = int array 1578 | let empty = [| |] 1579 | let max_string_length = Some Sys.max_array_length 1580 | let length = Array.length 1581 | let make = Array.make 1582 | let get t i = t.(i) 1583 | let set t i c = t.(i) <- c 1584 | let blit = Array.blit 1585 | let is_whitespace = Chr.is_whitespace 1586 | let compare a b = 1587 | let open Array in 1588 | let len = min (length a) (length b) in 1589 | let res = ref 0 in 1590 | try 1591 | for i = 0 to len - 1 do 1592 | let cmp = compare (get a i) (get b i) in 1593 | if cmp = 0 1594 | then () 1595 | else (res := cmp; raise Not_found) 1596 | done; 1597 | compare (length a) (length b) 1598 | with _ -> !res 1599 | 1600 | let compare_char = Int.compare 1601 | 1602 | let of_native_substring natstr ~offset ~length = 1603 | Conversions.of_native_substring 1604 | ~empty ~init:(fun () -> ref []) 1605 | ~on_new_character:(fun x c -> x := c :: !x) 1606 | ~finalize:(fun x -> List.rev !x |> Array.of_list) 1607 | ~read_character_from_native_string:Chr.read_from_native_string 1608 | natstr ~offset ~length 1609 | 1610 | let of_native_string natstr = 1611 | Conversions.of_native_string 1612 | of_native_substring natstr 1613 | 1614 | let to_native_string l = 1615 | Conversions.to_native_string_knowing_size 1616 | ~future_size:(fun l -> 1617 | let s = ref 0 in 1618 | Array.iter l ~f:(fun c -> s := !s + Chr.size c); 1619 | !s) 1620 | ~iter:(fun a ~f -> Array.iter a ~f) 1621 | ~write_char_to_native_bytes:Chr.write_to_native_bytes 1622 | l 1623 | |> Bytes.to_string 1624 | 1625 | end) 1626 | end); 1627 | do_basic_test (module struct 1628 | let test_name = "Of_mutable(int8 Bigarray1.t)" 1629 | let can_have_wrong_char = false 1630 | open Bigarray 1631 | type char_bigarray = (char, int8_unsigned_elt, c_layout) Array1.t 1632 | module Chr = Native_character 1633 | module Str = Of_mutable.Make (struct 1634 | type character = Chr.t 1635 | type t = char_bigarray 1636 | let empty = Array1.create char c_layout 0 1637 | let max_string_length = None 1638 | let length = Array1.dim 1639 | let make len c = 1640 | let res = Array1.create char c_layout len in 1641 | Array1.fill res c; 1642 | res 1643 | let is_whitespace = Chr.is_whitespace 1644 | let get t i = Array1.get t i 1645 | let set t i c = Array1.set t i c 1646 | let blit ~src ~src_pos ~dst ~dst_pos ~len = 1647 | Array1.(blit (sub src src_pos len) (sub dst dst_pos len)) 1648 | 1649 | let compare a b = 1650 | let len = min (length a) (length b) in 1651 | let res = ref 0 in 1652 | try 1653 | for i = 0 to len - 1 do 1654 | let cmp = compare (get a i) (get b i) in 1655 | if cmp = 0 1656 | then () 1657 | else (res := cmp; raise Not_found) 1658 | done; 1659 | compare (length a) (length b) 1660 | with _ -> !res 1661 | 1662 | 1663 | let compare_char = Char.compare 1664 | 1665 | let of_native_substring natstr ~offset ~length = 1666 | Conversions.of_native_substring 1667 | ~empty ~init:(fun () -> ref []) 1668 | ~on_new_character:(fun x c -> x := c :: !x) 1669 | ~finalize:(fun x -> 1670 | Array1.of_array char c_layout (List.rev !x |> Array.of_list)) 1671 | ~read_character_from_native_string:Chr.read_from_native_string 1672 | natstr ~offset ~length 1673 | 1674 | let of_native_substring natstr ~offset ~length = 1675 | try 1676 | let res = Array1.create char c_layout length in 1677 | for i = 0 to length - 1 do 1678 | Array1.set res i (natstr.[i + offset]) 1679 | done; 1680 | `Ok res 1681 | with _ -> `Error `out_of_bounds 1682 | 1683 | let of_native_string natstr = 1684 | Conversions.of_native_string of_native_substring natstr 1685 | 1686 | let to_native_string l = 1687 | String.init (Array1.dim l) (Array1.get l) 1688 | 1689 | end) 1690 | end); 1691 | utf8_specific_test (); 1692 | say "\n## Benchmarks\n\n"; 1693 | say "- `uname -a`: "; 1694 | ignore (Unix.system "uname -a"); 1695 | say "- Word size: %d\n\n%s\n" Sys.word_size (Benchmark.to_string ()); 1696 | exit !return_code 1697 | --------------------------------------------------------------------------------