├── src ├── astring_top.mllib ├── astring.mllib ├── astring_top.ml ├── astring.ml ├── astring_top_init.ml ├── astring_unsafe.ml ├── astring_base.ml ├── astring_char.ml ├── astring_escape.ml ├── astring_sub.ml ├── astring_string.ml └── astring.mli ├── .merlin ├── .ocp-indent ├── doc ├── .merlin └── index.mld ├── .gitignore ├── _tags ├── pkg ├── pkg.ml └── META ├── test ├── total.ml ├── test.ml ├── examples.ml ├── testing.mli ├── test_char.ml ├── testing.ml └── test_string.ml ├── opam ├── LICENSE.md ├── CHANGES.md └── README.md /src/astring_top.mllib: -------------------------------------------------------------------------------- 1 | Astring_top -------------------------------------------------------------------------------- /.merlin: -------------------------------------------------------------------------------- 1 | S src 2 | S test 3 | B _build/** 4 | -------------------------------------------------------------------------------- /.ocp-indent: -------------------------------------------------------------------------------- 1 | strict_with=always,match_clause=4,strict_else=never -------------------------------------------------------------------------------- /doc/.merlin: -------------------------------------------------------------------------------- 1 | # Generated by brzo 2 | S ./** 3 | B /Users/dbuenzli/sync/repos/astring/doc/_b0/brzo/ocaml-doc/** -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | _b0 2 | _build 3 | tmp 4 | CLOCK.org 5 | *~ 6 | \.\#* 7 | \#*# 8 | *.native 9 | *.byte 10 | *.install 11 | -------------------------------------------------------------------------------- /src/astring.mllib: -------------------------------------------------------------------------------- 1 | Astring_unsafe 2 | Astring_base 3 | Astring_escape 4 | Astring_char 5 | Astring_sub 6 | Astring_string 7 | Astring 8 | -------------------------------------------------------------------------------- /_tags: -------------------------------------------------------------------------------- 1 | true : bin_annot, safe_string 2 | <_b0> : -traverse 3 | : include 4 | : package(compiler-libs.toplevel) 5 | : include 6 | -------------------------------------------------------------------------------- /doc/index.mld: -------------------------------------------------------------------------------- 1 | {0 Astring {%html: %%VERSION%%%}} 2 | 3 | Astring exposes an alternative [String] module for OCaml. This module 4 | tries to balance minimality and expressiveness for basic, index-free, 5 | string processing and provides types and functions for substrings, 6 | string sets and string maps. 7 | 8 | {1:api API} 9 | 10 | {!modules: 11 | Astring 12 | } 13 | -------------------------------------------------------------------------------- /pkg/pkg.ml: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ocaml 2 | #use "topfind" 3 | #require "topkg" 4 | open Topkg 5 | 6 | let () = 7 | Pkg.describe "astring" @@ fun c -> 8 | Ok [ Pkg.mllib ~api:["Astring"] "src/astring.mllib"; 9 | Pkg.mllib ~api:[] "src/astring_top.mllib"; 10 | Pkg.lib "src/astring_top_init.ml"; 11 | Pkg.doc "test/examples.ml"; 12 | Pkg.test "test/test"; 13 | Pkg.test "test/examples"; ] 14 | -------------------------------------------------------------------------------- /test/total.ml: -------------------------------------------------------------------------------- 1 | 2 | open Astring 3 | 4 | (* Total *) 5 | let find_all p s = 6 | let rec loop acc i = match String.find ~start:i p s with 7 | | None -> List.rev acc 8 | | Some i -> loop (i :: acc) (i + 1) 9 | in 10 | loop [] 0 11 | 12 | (* Not total *) 13 | let find_all p s = 14 | let rec loop acc i = 15 | if i > String.length s then List.rev acc else 16 | match String.find ~start:i p s with 17 | | None -> List.rev acc 18 | | Some i -> loop (i :: acc) (i + 1) 19 | in 20 | loop [] 0 21 | -------------------------------------------------------------------------------- /pkg/META: -------------------------------------------------------------------------------- 1 | description = "Alternative String module for OCaml" 2 | version = "%%VERSION_NUM%%" 3 | requires = "" 4 | archive(byte) = "astring.cma" 5 | archive(native) = "astring.cmxa" 6 | plugin(byte) = "astring.cma" 7 | plugin(native) = "astring.cmxs" 8 | 9 | package "top" ( 10 | description = "Astring toplevel support" 11 | version = "%%VERSION_NUM%%" 12 | requires = "astring" 13 | archive(byte) = "astring_top.cma" 14 | archive(native) = "astring_top.cmxa" 15 | plugin(byte) = "astring_top.cma" 16 | plugin(native) = "astring_top.cmxs" 17 | ) 18 | -------------------------------------------------------------------------------- /opam: -------------------------------------------------------------------------------- 1 | opam-version: "2.0" 2 | maintainer: "Daniel Bünzli " 3 | authors: ["The astring programmers"] 4 | homepage: "https://erratique.ch/software/astring" 5 | doc: "https://erratique.ch/software/astring/doc" 6 | dev-repo: "git+http://erratique.ch/repos/astring.git" 7 | bug-reports: "https://github.com/dbuenzli/astring/issues" 8 | tags: [ "string" "org:erratique" ] 9 | license: "ISC" 10 | depends: [ 11 | "ocaml" {>= "4.05.0"} 12 | "ocamlfind" {build} 13 | "ocamlbuild" {build} 14 | "topkg" {build} ] 15 | build: [[ "ocaml" "pkg/pkg.ml" "build" "--pinned" "%{pinned}%" ]] 16 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016 The astring programmers 2 | 3 | Permission to use, copy, modify, and/or distribute this software for any 4 | purpose with or without fee is hereby granted, provided that the above 5 | copyright notice and this permission notice appear in all copies. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 8 | WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 9 | MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 10 | ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 11 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 12 | ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 13 | OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 14 | -------------------------------------------------------------------------------- /CHANGES.md: -------------------------------------------------------------------------------- 1 | v0.8.5 2020-08-08 Zagreb 2 | ------------------------ 3 | 4 | - Support OCaml 4.12 injectiviy annotation of Map.S (#18). 5 | Thanks to Jeremy Yallop for the patch. 6 | 7 | v0.8.4 2020-06-18 Zagreb 8 | ------------------------ 9 | 10 | - Handle `Pervasives`'s deprecation. 11 | - Require OCaml 4.05 12 | - Add conversions to/from Stdlib sets and maps. Thanks 13 | to Hezekiah M. Carty for the patch. 14 | 15 | v0.8.3 2016-09-12 Zagreb 16 | ------------------------ 17 | 18 | - Fix potential segfault on 32-bit platforms due to overflow in 19 | `String[.Sub].concat`. Spotted by Jeremy Yallop in the standard 20 | library. The same bug was present in Astring. 21 | 22 | v0.8.2 2016-08-26 Zagreb 23 | ------------------------ 24 | 25 | - Fix `String.Set.pp` not using the `sep` argument. 26 | - Build depend on topkg. 27 | - Relicense from BSD3 to ISC. 28 | 29 | v0.8.1 2015-02-22 La Forclaz (VS) 30 | --------------------------------- 31 | 32 | - Fix a bug in `String.Sub.span`. 33 | 34 | v0.8.0 2015-12-14 Cambridge (UK) 35 | -------------------------------- 36 | 37 | First release. 38 | -------------------------------------------------------------------------------- /src/astring_top.ml: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2015 The astring programmers. All rights reserved. 3 | Distributed under the ISC license, see terms at the end of the file. 4 | ---------------------------------------------------------------------------*) 5 | 6 | let () = ignore (Toploop.use_file Format.err_formatter "astring_top_init.ml") 7 | 8 | (*--------------------------------------------------------------------------- 9 | Copyright (c) 2015 The astring programmers 10 | 11 | Permission to use, copy, modify, and/or distribute this software for any 12 | purpose with or without fee is hereby granted, provided that the above 13 | copyright notice and this permission notice appear in all copies. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 16 | WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 17 | MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 18 | ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 19 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 20 | ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 21 | OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 22 | ---------------------------------------------------------------------------*) 23 | -------------------------------------------------------------------------------- /src/astring.ml: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2015 The astring programmers. All rights reserved. 3 | Distributed under the ISC license, see terms at the end of the file. 4 | ---------------------------------------------------------------------------*) 5 | 6 | let strf = Format.asprintf 7 | let ( ^ ) = Astring_string.append 8 | 9 | module Char = Astring_char 10 | module String = Astring_string 11 | 12 | (*--------------------------------------------------------------------------- 13 | Copyright (c) 2015 The astring programmers 14 | 15 | Permission to use, copy, modify, and/or distribute this software for any 16 | purpose with or without fee is hereby granted, provided that the above 17 | copyright notice and this permission notice appear in all copies. 18 | 19 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 20 | WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 21 | MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 22 | ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 23 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 24 | ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 25 | OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 26 | ---------------------------------------------------------------------------*) 27 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Astring — Alternative String module for OCaml 2 | ------------------------------------------------------------------------------- 3 | %%VERSION%% 4 | 5 | Astring exposes an alternative `String` module for OCaml. This module 6 | tries to balance minimality and expressiveness for basic, index-free, 7 | string processing and provides types and functions for substrings, 8 | string sets and string maps. 9 | 10 | Remaining compatible with the OCaml `String` module is a non-goal. The 11 | `String` module exposed by Astring has exception safe functions, 12 | removes deprecated and rarely used functions, alters some signatures 13 | and names, adds a few missing functions and fully exploits OCaml's 14 | newfound string immutability. 15 | 16 | Astring depends only on the OCaml standard library. It is distributed 17 | under the ISC license. 18 | 19 | Home page: http://erratique.ch/software/astring 20 | 21 | ## Installation 22 | 23 | Astring can be installed with `opam`: 24 | 25 | opam install astring 26 | 27 | If you don't use `opam` consult the [`opam`](opam) file for build 28 | instructions. 29 | 30 | ## Documentation 31 | 32 | The documentation and API reference is automatically generated by 33 | `ocamldoc` from the interfaces. It can be consulted [online][doc] 34 | or via `odig doc astring`. 35 | 36 | [doc]: http://erratique.ch/software/astring/doc/ 37 | 38 | ## Sample programs 39 | 40 | If you installed Astring with `opam` sample programs are located in 41 | the directory `opam config var astring:doc`. 42 | -------------------------------------------------------------------------------- /test/test.ml: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2015 The astring programmers. All rights reserved. 3 | Distributed under the ISC license, see terms at the end of the file. 4 | ---------------------------------------------------------------------------*) 5 | 6 | let tests () = Testing.run 7 | [ Test_char.suite; 8 | Test_string.suite; 9 | Test_sub.suite; ] 10 | 11 | let run () = tests (); Testing.log_results () 12 | 13 | let () = if run () then exit 0 else exit 1 14 | 15 | (*--------------------------------------------------------------------------- 16 | Copyright (c) 2015 The astring programmers 17 | 18 | Permission to use, copy, modify, and/or distribute this software for any 19 | purpose with or without fee is hereby granted, provided that the above 20 | copyright notice and this permission notice appear in all copies. 21 | 22 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 23 | WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 24 | MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 25 | ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 26 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 27 | ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 28 | OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 29 | ---------------------------------------------------------------------------*) 30 | -------------------------------------------------------------------------------- /src/astring_top_init.ml: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2015 The astring programmers. All rights reserved. 3 | Distributed under the ISC license, see terms at the end of the file. 4 | ---------------------------------------------------------------------------*) 5 | 6 | open Astring;; 7 | 8 | #install_printer Char.dump;; 9 | #install_printer String.dump;; 10 | #install_printer String.Sub.dump;; 11 | #install_printer String.Set.dump;; 12 | #install_printer String.Map.dump_string_map;; 13 | 14 | (*--------------------------------------------------------------------------- 15 | Copyright (c) 2015 The astring programmers 16 | 17 | Permission to use, copy, modify, and/or distribute this software for any 18 | purpose with or without fee is hereby granted, provided that the above 19 | copyright notice and this permission notice appear in all copies. 20 | 21 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 22 | WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 23 | MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 24 | ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 25 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 26 | ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 27 | OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 28 | ---------------------------------------------------------------------------*) 29 | -------------------------------------------------------------------------------- /src/astring_unsafe.ml: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2015 The astring programmers. All rights reserved. 3 | Distributed under the ISC license, see terms at the end of the file. 4 | ---------------------------------------------------------------------------*) 5 | 6 | (* Unsafe string and byte manipulations. If you don't believe the 7 | author's invariants, replacing with safe versions makes everything 8 | safe in the library. He won't be upset. *) 9 | 10 | let array_unsafe_get = Array.unsafe_get 11 | 12 | external char_unsafe_of_byte : int -> char = "%identity" 13 | external char_to_byte : char -> int = "%identity" 14 | 15 | let bytes_unsafe_set = Bytes.unsafe_set 16 | let bytes_unsafe_to_string = Bytes.unsafe_to_string 17 | let bytes_unsafe_blit_string s sfirst d dfirst len = 18 | Bytes.(unsafe_blit (unsafe_of_string s) sfirst d dfirst len) 19 | 20 | external string_length : string -> int = "%string_length" 21 | external string_equal : string -> string -> bool = "caml_string_equal" 22 | external string_compare : string -> string -> int = "caml_string_compare" 23 | external string_safe_get : string -> int -> char = "%string_safe_get" 24 | external string_unsafe_get : string -> int -> char = "%string_unsafe_get" 25 | 26 | let unsafe_string_sub s first len = 27 | let b = Bytes.create len in 28 | bytes_unsafe_blit_string s first b 0 len; 29 | Bytes.unsafe_to_string b 30 | 31 | (*--------------------------------------------------------------------------- 32 | Copyright (c) 2015 The astring programmers 33 | 34 | Permission to use, copy, modify, and/or distribute this software for any 35 | purpose with or without fee is hereby granted, provided that the above 36 | copyright notice and this permission notice appear in all copies. 37 | 38 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 39 | WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 40 | MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 41 | ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 42 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 43 | ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 44 | OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 45 | ---------------------------------------------------------------------------*) 46 | -------------------------------------------------------------------------------- /test/examples.ml: -------------------------------------------------------------------------------- 1 | (* This code is in the public domain *) 2 | 3 | open Astring 4 | 5 | (* Version number (v|V).major.minor[.patch][(+|-)info] *) 6 | 7 | let parse_version : string -> (int * int * int * string option) option = 8 | fun s -> try 9 | let parse_opt_v s = match String.Sub.head s with 10 | | Some ('v'|'V') -> String.Sub.tail s 11 | | Some _ -> s 12 | | None -> raise Exit 13 | in 14 | let parse_dot s = match String.Sub.head s with 15 | | Some '.' -> String.Sub.tail s 16 | | Some _ | None -> raise Exit 17 | in 18 | let parse_int s = 19 | match String.Sub.span ~min:1 ~sat:Char.Ascii.is_digit s with 20 | | (i, _) when String.Sub.is_empty i -> raise Exit 21 | | (i, s) -> 22 | match String.Sub.to_int i with 23 | | None -> raise Exit | Some i -> i, s 24 | in 25 | let maj, s = parse_int (parse_opt_v (String.sub s)) in 26 | let min, s = parse_int (parse_dot s) in 27 | let patch, s = match String.Sub.head s with 28 | | Some '.' -> parse_int (parse_dot s) 29 | | _ -> 0, s 30 | in 31 | let info = match String.Sub.head s with 32 | | Some ('+' | '-') -> Some (String.Sub.(to_string (tail s))) 33 | | Some _ -> raise Exit 34 | | None -> None 35 | in 36 | Some (maj, min, patch, info) 37 | with Exit -> None 38 | 39 | (* Key value bindings *) 40 | 41 | let parse_env : string -> string String.map option = 42 | fun s -> try 43 | let skip_white s = String.Sub.drop ~sat:Char.Ascii.is_white s in 44 | let parse_key s = 45 | let id_char c = Char.Ascii.is_letter c || c = '_' in 46 | match String.Sub.span ~min:1 ~sat:id_char s with 47 | | (key, _) when String.Sub.is_empty key -> raise Exit 48 | | (key, rem) -> (String.Sub.to_string key), rem 49 | in 50 | let parse_eq s = match String.Sub.head s with 51 | | Some '=' -> String.Sub.tail s 52 | | Some _ | None -> raise Exit 53 | in 54 | let parse_value s = match String.Sub.head s with 55 | | Some '"' -> (* quoted *) 56 | let is_data = function '\\' | '"' -> false | _ -> true in 57 | let rec loop acc s = 58 | let data, rem = String.Sub.span ~sat:is_data s in 59 | match String.Sub.head rem with 60 | | Some '"' -> 61 | let acc = List.rev (data :: acc) in 62 | String.Sub.(to_string @@ concat acc), (String.Sub.tail rem) 63 | | Some '\\' -> 64 | let rem = String.Sub.tail rem in 65 | begin match String.Sub.head rem with 66 | | Some ('"' | '\\' as c) -> 67 | let acc = String.(sub (of_char c)) :: data :: acc in 68 | loop acc (String.Sub.tail rem) 69 | | Some _ | None -> raise Exit 70 | end 71 | | None | Some _ -> raise Exit 72 | in 73 | loop [] (String.Sub.tail s) 74 | | Some _ -> 75 | let is_data c = not (Char.Ascii.is_white c) in 76 | let data, rem = String.Sub.span ~sat:is_data s in 77 | String.Sub.to_string data, rem 78 | | None -> "", s 79 | in 80 | let rec parse_bindings acc s = 81 | if String.Sub.is_empty s then acc else 82 | let key, s = parse_key s in 83 | let value, s = s |> skip_white |> parse_eq |> skip_white |> parse_value in 84 | parse_bindings (String.Map.add key value acc) (skip_white s) 85 | in 86 | Some (String.sub s |> skip_white |> parse_bindings String.Map.empty) 87 | with Exit -> None 88 | -------------------------------------------------------------------------------- /src/astring_base.ml: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2015 The astring programmers. All rights reserved. 3 | Distributed under the ISC license, see terms at the end of the file. 4 | ---------------------------------------------------------------------------*) 5 | 6 | (* Commonalities for strings and substrings *) 7 | 8 | open Astring_unsafe 9 | 10 | let strf = Format.asprintf 11 | 12 | (* Errors *) 13 | 14 | let err_empty_string = "the string is empty" 15 | let err_empty_sep = "~sep is an empty string" 16 | let err_neg_max max = strf "negative ~max (%d)" max 17 | let err_neg_min max = strf "negative ~min (%d)" max 18 | let err_neg_len len = strf "negative length (%d)" len 19 | let err_max_string_len = "Sys.max_string_length exceeded" 20 | 21 | (* Base *) 22 | 23 | let empty = "" 24 | 25 | (* Predicates *) 26 | 27 | let for_all sat s ~first ~last = 28 | let rec loop i = 29 | if i > last then true else 30 | if sat (string_unsafe_get s i) then loop (i + 1) else false 31 | in 32 | loop first 33 | 34 | let exists sat s ~first ~last = 35 | let rec loop i = 36 | if i > last then false else 37 | if sat (string_unsafe_get s i) then true else loop (i + 1) 38 | in 39 | loop first 40 | 41 | (* Traversing *) 42 | 43 | let fold_left f acc s ~first ~last = 44 | let rec loop acc i = 45 | if i > last then acc else 46 | loop (f acc (string_unsafe_get s i)) (i + 1) 47 | in 48 | loop acc first 49 | 50 | let fold_right f s acc ~first ~last = 51 | let rec loop i acc = 52 | if i < first then acc else 53 | loop (i - 1) (f (string_unsafe_get s i) acc) 54 | in 55 | loop last acc 56 | 57 | (* OCaml conversions *) 58 | 59 | let of_char c = 60 | let b = Bytes.create 1 in 61 | bytes_unsafe_set b 0 c; 62 | bytes_unsafe_to_string b 63 | 64 | let to_char s = match string_length s with 65 | | 0 -> None 66 | | 1 -> Some (string_unsafe_get s 0) 67 | | _ -> None 68 | 69 | let of_bool = string_of_bool 70 | let to_bool s = 71 | try Some (bool_of_string s) with Invalid_argument (* good joke *) _ -> None 72 | 73 | let of_int = string_of_int 74 | let to_int s = try Some (int_of_string s) with Failure _ -> None 75 | let of_nativeint = Nativeint.to_string 76 | let to_nativeint s = try Some (Nativeint.of_string s) with Failure _ -> None 77 | let of_int32 = Int32.to_string 78 | let to_int32 s = try Some (Int32.of_string s) with Failure _ -> None 79 | let of_int64 = Int64.to_string 80 | let to_int64 s = try Some (Int64.of_string s) with Failure _ -> None 81 | let of_float = string_of_float 82 | let to_float s = try Some (float_of_string s) with Failure _ -> None 83 | 84 | (*--------------------------------------------------------------------------- 85 | Copyright (c) 2015 The astring programmers 86 | 87 | Permission to use, copy, modify, and/or distribute this software for any 88 | purpose with or without fee is hereby granted, provided that the above 89 | copyright notice and this permission notice appear in all copies. 90 | 91 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 92 | WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 93 | MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 94 | ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 95 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 96 | ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 97 | OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 98 | ---------------------------------------------------------------------------*) 99 | -------------------------------------------------------------------------------- /src/astring_char.ml: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2015 The astring programmers. All rights reserved. 3 | Distributed under the ISC license, see terms at the end of the file. 4 | ---------------------------------------------------------------------------*) 5 | 6 | let err_byte b = Printf.sprintf "%d is not a byte" b 7 | 8 | (* Bytes *) 9 | 10 | type t = char 11 | 12 | let unsafe_of_byte = Astring_unsafe.char_unsafe_of_byte 13 | 14 | let of_byte b = 15 | if b < 0 || b > 255 then invalid_arg (err_byte b) else unsafe_of_byte b 16 | 17 | let of_int b = 18 | if b < 0 || b > 255 then None else (Some (unsafe_of_byte b)) 19 | 20 | let to_int = Astring_unsafe.char_to_byte 21 | 22 | let hash c = Hashtbl.hash c 23 | 24 | (* Predicates *) 25 | 26 | let equal : t -> t -> bool = fun c0 c1 -> c0 = c1 27 | let compare : t -> t -> int = fun c0 c1 -> compare c0 c1 28 | 29 | (* Bytes as US-ASCII characters *) 30 | 31 | module Ascii = struct 32 | let max_ascii = '\x7F' 33 | 34 | let is_valid : t -> bool = fun c -> c <= max_ascii 35 | 36 | let is_digit = function '0' .. '9' -> true | _ -> false 37 | 38 | let is_hex_digit = function 39 | | '0' .. '9' | 'A' .. 'F' | 'a' .. 'f' -> true 40 | | _ -> false 41 | 42 | let is_upper = function 'A' .. 'Z' -> true | _ -> false 43 | 44 | let is_lower = function 'a' .. 'z' -> true | _ -> false 45 | 46 | let is_letter = function 'A' .. 'Z' | 'a' .. 'z' -> true | _ -> false 47 | 48 | let is_alphanum = function 49 | | '0' .. '9' | 'A' .. 'Z' | 'a' .. 'z' -> true 50 | | _ -> false 51 | 52 | let is_white = function ' ' | '\t' .. '\r' -> true | _ -> false 53 | 54 | let is_blank = function ' ' | '\t' -> true | _ -> false 55 | 56 | let is_graphic = function '!' .. '~' -> true | _ -> false 57 | 58 | let is_print = function ' ' .. '~' -> true | _ -> false 59 | 60 | let is_control = function '\x00' .. '\x1F' | '\x7F' -> true | _ -> false 61 | 62 | let uppercase = function 63 | | 'a' .. 'z' as c -> unsafe_of_byte @@ to_int c - 0x20 64 | | c -> c 65 | 66 | let lowercase = function 67 | | 'A' .. 'Z' as c -> unsafe_of_byte @@ to_int c + 0x20 68 | | c -> c 69 | 70 | (* Escaping *) 71 | 72 | let escape = Astring_escape.char_escape 73 | let escape_char = Astring_escape.char_escape_char 74 | end 75 | 76 | (* Pretty printing *) 77 | 78 | let pp = Format.pp_print_char 79 | let dump ppf c = 80 | Format.pp_print_char ppf '\''; 81 | Format.pp_print_string ppf (Ascii.escape_char c); 82 | Format.pp_print_char ppf '\''; 83 | () 84 | 85 | (*--------------------------------------------------------------------------- 86 | Copyright (c) 2015 The astring programmers 87 | 88 | Permission to use, copy, modify, and/or distribute this software for any 89 | purpose with or without fee is hereby granted, provided that the above 90 | copyright notice and this permission notice appear in all copies. 91 | 92 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 93 | WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 94 | MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 95 | ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 96 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 97 | ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 98 | OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 99 | ---------------------------------------------------------------------------*) 100 | -------------------------------------------------------------------------------- /test/testing.mli: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2015 The astring programmers. All rights reserved. 3 | Distributed under the ISC license, see terms at the end of the file. 4 | ---------------------------------------------------------------------------*) 5 | 6 | (* {1 Value equality and pretty printing} *) 7 | 8 | type 'a eq = 'a -> 'a -> bool 9 | type 'a pp = Format.formatter -> 'a -> unit 10 | 11 | (* {1 Pretty printers} *) 12 | 13 | val pp_int : int pp 14 | val pp_bool : bool pp 15 | val pp_float : float pp 16 | val pp_char : char pp 17 | val pp_str : string pp 18 | val pp_list : 'a pp -> 'a list pp 19 | val pp_option : 'a pp -> 'a option pp 20 | 21 | (* {1 Logging} *) 22 | 23 | val log_part : ('a, Format.formatter, unit) format -> 'a 24 | val log : ?header:string -> ('a, Format.formatter, unit) format -> 'a 25 | val log_results : unit -> bool 26 | 27 | (* {1 Testing scopes} *) 28 | 29 | type test 30 | type suite 31 | 32 | val block : (unit -> unit) -> unit 33 | val test : string -> (unit -> unit) -> test 34 | val suite : string -> test list -> suite 35 | 36 | val run : suite list -> unit 37 | 38 | (* {1 Passing and failing tests} *) 39 | 40 | val pass : unit -> unit 41 | val fail : ('a, Format.formatter, unit, unit) format4 -> 'a 42 | 43 | (* {1 Checking values} *) 44 | 45 | val eq : eq:'a eq -> pp:'a pp -> 'a -> 'a -> unit 46 | val eq_char : char -> char -> unit 47 | val eq_str : string -> string -> unit 48 | val eq_bool : bool -> bool -> unit 49 | val eq_int : int -> int -> unit 50 | val eq_int32 : int32 -> int32 -> unit 51 | val eq_int64 : int64 -> int64 -> unit 52 | val eq_float : float -> float -> unit 53 | val eq_nan : float -> unit 54 | 55 | val eq_option : eq:'a eq -> pp:'a pp -> 'a option -> 'a option -> unit 56 | val eq_some : 'a option -> unit 57 | val eq_none : pp:'a pp -> 'a option -> unit 58 | 59 | val eq_list : eq:'a eq -> pp:'a pp -> 'a list -> 'a list -> unit 60 | 61 | (* {1 Tracing and checking function applications} *) 62 | 63 | type app (* holds information about the application *) 64 | 65 | val ( $ ) : 'a -> (app -> 'a -> 'b) -> 'b 66 | val ( @-> ) : 'a pp -> (app -> 'b -> 'c) -> app -> ('a -> 'b) -> 'a -> 'c 67 | 68 | val ret : 'a pp -> app -> 'a -> 'a 69 | val ret_eq : eq:'a eq -> 'a pp -> 'a -> app -> 'a -> 'a 70 | val ret_some : 'a pp -> app -> 'a option -> 'a option 71 | val ret_none : 'a pp -> app -> 'a option -> 'a option 72 | val ret_get_option : 'a pp -> app -> 'a option -> 'a 73 | 74 | val app_invalid : pp:'b pp -> ('a -> 'b) -> 'a -> unit 75 | val app_exn : pp:'b pp -> exn -> ('a -> 'b) -> 'a -> unit 76 | val app_raises : pp:'b pp -> ('a -> 'b) -> 'a -> unit 77 | 78 | (*--------------------------------------------------------------------------- 79 | Copyright (c) 2015 The astring programmers 80 | 81 | Permission to use, copy, modify, and/or distribute this software for any 82 | purpose with or without fee is hereby granted, provided that the above 83 | copyright notice and this permission notice appear in all copies. 84 | 85 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 86 | WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 87 | MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 88 | ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 89 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 90 | ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 91 | OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 92 | ---------------------------------------------------------------------------*) 93 | -------------------------------------------------------------------------------- /test/test_char.ml: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2015 The astring programmers. All rights reserved. 3 | Distributed under the ISC license, see terms at the end of the file. 4 | ---------------------------------------------------------------------------*) 5 | 6 | open Testing 7 | open Astring 8 | 9 | let eq = eq ~pp:Char.dump 10 | let eq_opt = eq_option ~pp:Char.dump ~eq:Char.equal 11 | let invalid = app_invalid ~pp:Char.dump 12 | 13 | let misc = test "Char.{of_byte,of_int,to_int}" @@ fun () -> 14 | invalid Char.of_byte (-1); 15 | invalid Char.of_byte (256); 16 | eq_opt (Char.of_int (-1)) None; 17 | eq_opt (Char.of_int 256) None; 18 | for i = 0 to 0xFF do 19 | let of_int = Char.of_int $ pp_int @-> ret_get_option Char.dump in 20 | eq_int (Char.to_int (of_int i)) i 21 | done; 22 | () 23 | 24 | let predicates = test "Char.{equal,compare}" @@ fun () -> 25 | eq_bool (Char.equal ' ' ' ') true; 26 | eq_bool (Char.equal ' ' 'a') false; 27 | eq_int (Char.compare ' ' 'a') (-1); 28 | eq_int (Char.compare ' ' ' ') (0); 29 | eq_int (Char.compare 'a' ' ') (1); 30 | eq_int (Char.compare '\x00' ' ') (-1); 31 | () 32 | 33 | let ascii_predicates = test "Char.Ascii.is_*" @@ fun () -> 34 | let pp_int ppf i = Format.fprintf ppf "%X" i in 35 | let test_pred p pi i = 36 | let pred p i = p (Char.of_byte i) in 37 | (pred p $ pp_int @-> ret_eq ~eq:(=) pp_bool (pi i)) i 38 | in 39 | let test p pi = for i = 0 to 255 do ignore (test_pred p pi i) done in 40 | test Char.Ascii.is_valid (fun i -> i <= 0x7F); 41 | test Char.Ascii.is_digit (fun i -> 0x30 <= i && i <= 0x39); 42 | test Char.Ascii.is_hex_digit (fun i -> (0x30 <= i && i <= 0x39) || 43 | (0x41 <= i && i <= 0x46) || 44 | (0x61 <= i && i <= 0x66)); 45 | test Char.Ascii.is_upper (fun i -> 0x41 <= i && i <= 0x5A); 46 | test Char.Ascii.is_lower (fun i -> 0x61 <= i && i <= 0x7A); 47 | test Char.Ascii.is_letter (fun i -> (0x41 <= i && i <= 0x5A) || 48 | (0x61 <= i && i <= 0x7A)); 49 | test Char.Ascii.is_alphanum (fun i -> (0x30 <= i && i <= 0x39) || 50 | (0x41 <= i && i <= 0x5A) || 51 | (0x61 <= i && i <= 0x7A)); 52 | test Char.Ascii.is_white (fun i -> (0x09 <= i && i <= 0x0D) || i = 0x20); 53 | test Char.Ascii.is_blank (fun i -> (i = 0x20 || i = 0x09)); 54 | test Char.Ascii.is_graphic (fun i -> (0x21 <= i && i <= 0x7E)); 55 | test Char.Ascii.is_print (fun i -> (0x21 <= i && i <= 0x7E) || i = 0x20); 56 | test Char.Ascii.is_control (fun i -> (0x00 <= i && i <= 0x1F) || i = 0x7F); 57 | () 58 | 59 | let ascii_transforms = test "Char.Ascii.{uppercase,lowercase}" @@ fun () -> 60 | for i = 0 to 255 do 61 | if (0x61 <= i && i <= 0x7A) 62 | then eq_char Char.(Ascii.uppercase @@ of_byte i) (Char.of_byte (i - 32)) 63 | else eq_char Char.(Ascii.uppercase @@ of_byte i) (Char.of_byte i) 64 | done; 65 | for i = 0 to 255 do 66 | if (0x41 <= i && i <= 0x5A) 67 | then eq_char Char.(Ascii.lowercase @@ of_byte i) (Char.of_byte (i + 32)) 68 | else eq_char Char.(Ascii.lowercase @@ of_byte i) (Char.of_byte i) 69 | done; 70 | () 71 | 72 | let ascii_escape = test "Char.Ascii.{escape,escape_char}" @@ fun () -> 73 | for i = 0 to 255 do 74 | let c = Char.of_byte i in 75 | let esc = Char.Ascii.escape c in 76 | begin match String.Ascii.unescape esc with 77 | | None -> fail "could not unescape"; 78 | | Some unesc -> 79 | eq_int (String.length unesc) 1; 80 | eq_char unesc.[0] (Char.of_byte i); 81 | end; 82 | if (0x00 <= i && i <= 0x1F) || (0x7F <= i && i <= 0xFF) 83 | then eq_str esc (Printf.sprintf "\\x%02X" i) 84 | else if (i = 0x5C) 85 | then eq_str esc "\\\\" 86 | else eq_str esc (Printf.sprintf "%c" c) 87 | done; 88 | for i = 0 to 255 do 89 | let c = Char.of_byte i in 90 | let esc = Char.Ascii.escape_char c in 91 | begin match String.Ascii.unescape_string esc with 92 | | None -> fail "could not unescape"; 93 | | Some unesc -> 94 | eq_int (String.length unesc) 1; 95 | eq_char unesc.[0] (Char.of_byte i); 96 | end; 97 | if (i = 0x08) then eq_str esc "\\b" else 98 | if (i = 0x09) then eq_str esc "\\t" else 99 | if (i = 0x0A) then eq_str esc "\\n" else 100 | if (i = 0x0D) then eq_str esc "\\r" else 101 | if (i = 0x27) then eq_str esc "\\'" else 102 | if (i = 0x5C) then eq_str esc "\\\\" else 103 | if (0x00 <= i && i <= 0x1F) || (0x7F <= i && i <= 0xFF) 104 | then eq_str esc (Printf.sprintf "\\x%02X" i) 105 | else eq_str esc (Printf.sprintf "%c" c) 106 | done; 107 | () 108 | 109 | let suite = suite "Char functions" 110 | [ misc; 111 | predicates; 112 | ascii_predicates; 113 | ascii_transforms; 114 | ascii_escape; ] 115 | 116 | (*--------------------------------------------------------------------------- 117 | Copyright (c) 2015 The astring programmers 118 | 119 | Permission to use, copy, modify, and/or distribute this software for any 120 | purpose with or without fee is hereby granted, provided that the above 121 | copyright notice and this permission notice appear in all copies. 122 | 123 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 124 | WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 125 | MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 126 | ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 127 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 128 | ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 129 | OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 130 | ---------------------------------------------------------------------------*) 131 | -------------------------------------------------------------------------------- /src/astring_escape.ml: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2015 The astring programmers. All rights reserved. 3 | Distributed under the ISC license, see terms at the end of the file. 4 | ---------------------------------------------------------------------------*) 5 | 6 | open Astring_unsafe 7 | 8 | let hex_digit = 9 | [|'0';'1';'2';'3';'4';'5';'6';'7';'8';'9';'A';'B';'C';'D';'E';'F'|] 10 | 11 | let hex_escape b k c = 12 | let byte = char_to_byte c in 13 | let hi = byte / 16 in 14 | let lo = byte mod 16 in 15 | bytes_unsafe_set b (k ) '\\'; 16 | bytes_unsafe_set b (k + 1) 'x'; 17 | bytes_unsafe_set b (k + 2) (array_unsafe_get hex_digit hi); 18 | bytes_unsafe_set b (k + 3) (array_unsafe_get hex_digit lo); 19 | () 20 | 21 | let letter_escape b k letter = 22 | bytes_unsafe_set b (k ) '\\'; 23 | bytes_unsafe_set b (k + 1) letter; 24 | () 25 | 26 | (* Character escapes *) 27 | 28 | let char_escape = function 29 | | '\\' -> "\\\\" 30 | | '\x20' .. '\x7E' as c -> 31 | let b = Bytes.create 1 in 32 | bytes_unsafe_set b 0 c; 33 | bytes_unsafe_to_string b 34 | | c (* hex escape *) -> 35 | let b = Bytes.create 4 in 36 | hex_escape b 0 c; 37 | bytes_unsafe_to_string b 38 | 39 | let char_escape_char = function 40 | | '\\' -> "\\\\" 41 | | '\'' -> "\\'" 42 | | '\b' -> "\\b" 43 | | '\t' -> "\\t" 44 | | '\n' -> "\\n" 45 | | '\r' -> "\\r" 46 | | '\x20' .. '\x7E' as c -> 47 | let b = Bytes.create 1 in 48 | bytes_unsafe_set b 0 c; 49 | bytes_unsafe_to_string b 50 | | c (* hex escape *) -> 51 | let b = Bytes.create 4 in 52 | hex_escape b 0 c; 53 | bytes_unsafe_to_string b 54 | 55 | (* String escapes *) 56 | 57 | let escape s = 58 | let max_idx = string_length s - 1 in 59 | let rec escaped_len i l = 60 | if i > max_idx then l else 61 | match string_unsafe_get s i with 62 | | '\\' -> escaped_len (i + 1) (l + 2) 63 | | '\x20' .. '\x7E' -> escaped_len (i + 1) (l + 1) 64 | | _ (* hex escape *) -> escaped_len (i + 1) (l + 4) 65 | in 66 | let escaped_len = escaped_len 0 0 in 67 | if escaped_len = string_length s then s else 68 | let b = Bytes.create escaped_len in 69 | let rec loop i k = 70 | if i > max_idx then bytes_unsafe_to_string b else 71 | match string_unsafe_get s i with 72 | | '\\' -> 73 | letter_escape b k '\\'; loop (i + 1) (k + 2) 74 | | '\x20' .. '\x7E' as c -> 75 | bytes_unsafe_set b k c; loop (i + 1) (k + 1) 76 | | c -> 77 | hex_escape b k c; loop (i + 1) (k + 4) 78 | in 79 | loop 0 0 80 | 81 | let escape_string s = 82 | let max_idx = string_length s - 1 in 83 | let rec escaped_len i l = 84 | if i > max_idx then l else 85 | match string_unsafe_get s i with 86 | | '\b' | '\t' | '\n' | '\r' | '\"' | '\\' -> 87 | escaped_len (i + 1) (l + 2) 88 | | '\x20' .. '\x7E' -> 89 | escaped_len (i + 1) (l + 1) 90 | | _ (* hex escape *) -> 91 | escaped_len (i + 1) (l + 4) 92 | in 93 | let escaped_len = escaped_len 0 0 in 94 | if escaped_len = string_length s then s else 95 | let b = Bytes.create escaped_len in 96 | let rec loop i k = 97 | if i > max_idx then bytes_unsafe_to_string b else 98 | match string_unsafe_get s i with 99 | | '\b' -> letter_escape b k 'b'; loop (i + 1) (k + 2) 100 | | '\t' -> letter_escape b k 't'; loop (i + 1) (k + 2) 101 | | '\n' -> letter_escape b k 'n'; loop (i + 1) (k + 2) 102 | | '\r' -> letter_escape b k 'r'; loop (i + 1) (k + 2) 103 | | '\"' -> letter_escape b k '"'; loop (i + 1) (k + 2) 104 | | '\\' -> letter_escape b k '\\'; loop (i + 1) (k + 2) 105 | | '\x20' .. '\x7E' as c -> 106 | bytes_unsafe_set b k c; loop (i + 1) (k + 1) 107 | | c -> 108 | hex_escape b k c; loop (i + 1) (k + 4) 109 | in 110 | loop 0 0 111 | 112 | (* Unescaping *) 113 | 114 | let is_hex_digit = function 115 | | '0' .. '9' | 'A' .. 'F' | 'a' .. 'f' -> true 116 | | _ -> false 117 | 118 | let hex_value = function 119 | | '0' .. '9' as c -> char_to_byte c - 0x30 120 | | 'A' .. 'F' as c -> 10 + (char_to_byte c - 0x41) 121 | | 'a' .. 'f' as c -> 10 + (char_to_byte c - 0x61) 122 | | _ -> assert false 123 | 124 | let unescaped_len ~ocaml s = (* derives length and checks syntax validity *) 125 | let max_idx = string_length s - 1 in 126 | let rec loop i l = 127 | if i > max_idx then Some l else 128 | if string_unsafe_get s i <> '\\' then loop (i + 1) (l + 1) else 129 | let i = i + 1 in 130 | if i > max_idx then None (* truncated escape *) else 131 | match string_unsafe_get s i with 132 | | '\\' -> loop (i + 1) (l + 1) 133 | | 'x' -> 134 | let i = i + 2 in 135 | if i > max_idx then None (* truncated escape *) else 136 | if not (is_hex_digit (string_unsafe_get s (i - 1)) && 137 | is_hex_digit (string_unsafe_get s (i ))) 138 | then None (* invalid escape *) 139 | else loop (i + 1) (l + 1) 140 | | ('b' | 't' | 'n' | 'r' | '"' | '\'') when ocaml -> loop (i + 1) (l + 1) 141 | | c -> None (* invalid escape *) 142 | in 143 | loop 0 0 144 | 145 | let _unescape ~ocaml s = match unescaped_len ~ocaml s with 146 | | None -> None 147 | | Some l when l = string_length s -> Some s 148 | | Some l -> 149 | let b = Bytes.create l in 150 | let max_idx = string_length s - 1 in 151 | let rec loop i k = 152 | if i > max_idx then Some (bytes_unsafe_to_string b) else 153 | let c = string_unsafe_get s i in 154 | if c <> '\\' then (bytes_unsafe_set b k c; loop (i + 1) (k + 1)) else 155 | let i = i + 1 (* validity checked by unescaped_len *) in 156 | match string_unsafe_get s i with 157 | | '\\' -> bytes_unsafe_set b k '\\'; loop (i + 1) (k + 1) 158 | | 'x' -> 159 | let i = i + 2 (* validity checked by unescaped_len *) in 160 | let hi = hex_value @@ string_unsafe_get s (i - 1) in 161 | let lo = hex_value @@ string_unsafe_get s (i ) in 162 | let c = char_unsafe_of_byte @@ (hi lsl 4) + lo in 163 | bytes_unsafe_set b k c; loop (i + 1) (k + 1) 164 | (* The following cases are never reached for ~ocaml:false *) 165 | | 'b' -> bytes_unsafe_set b k '\b'; loop (i + 1) (k + 1) 166 | | 't' -> bytes_unsafe_set b k '\t'; loop (i + 1) (k + 1) 167 | | 'n' -> bytes_unsafe_set b k '\n'; loop (i + 1) (k + 1) 168 | | 'r' -> bytes_unsafe_set b k '\r'; loop (i + 1) (k + 1) 169 | | '"' -> bytes_unsafe_set b k '\"'; loop (i + 1) (k + 1) 170 | | '\'' -> bytes_unsafe_set b k '\''; loop (i + 1) (k + 1) 171 | | c -> assert false (* because of unescaped_len *) 172 | in 173 | loop 0 0 174 | 175 | let unescape s = _unescape ~ocaml:false s 176 | let unescape_string s = _unescape ~ocaml:true s 177 | 178 | (*--------------------------------------------------------------------------- 179 | Copyright (c) 2015 The astring programmers 180 | 181 | Permission to use, copy, modify, and/or distribute this software for any 182 | purpose with or without fee is hereby granted, provided that the above 183 | copyright notice and this permission notice appear in all copies. 184 | 185 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 186 | WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 187 | MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 188 | ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 189 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 190 | ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 191 | OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 192 | ---------------------------------------------------------------------------*) 193 | -------------------------------------------------------------------------------- /test/testing.ml: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2015 The astring programmers. All rights reserved. 3 | Distributed under the ISC license, see terms at the end of the file. 4 | ---------------------------------------------------------------------------*) 5 | 6 | (* Value equality and pretty printing *) 7 | 8 | type 'a eq = 'a -> 'a -> bool 9 | type 'a pp = Format.formatter -> 'a -> unit 10 | 11 | (* Pretty printers *) 12 | 13 | let pp = Format.fprintf 14 | let pp_exn ppf v = pp ppf "%s" (Printexc.to_string v) 15 | let pp_bool ppf v = pp ppf "%b" v 16 | let pp_char ppf v = pp ppf "%C" v 17 | let pp_str ppf v = pp ppf "%S" v 18 | let pp_int = Format.pp_print_int 19 | let pp_float ppf v = pp ppf "%.10f" (* bof... *) v 20 | let pp_int32 ppf v = pp ppf "%ld" v 21 | let pp_int64 ppf v = pp ppf "%Ld" v 22 | let pp_text = Format.pp_print_text 23 | let pp_list pp_v ppf l = 24 | let pp_sep ppf () = pp ppf ";@," in 25 | pp ppf "@[<1>[%a]@]" (Format.pp_print_list ~pp_sep pp_v) l 26 | 27 | let pp_option pp_v ppf = function 28 | | None -> Format.fprintf ppf "None" 29 | | Some v -> Format.fprintf ppf "Some %a" pp_v v 30 | 31 | let pp_slot_loc ppf l = 32 | pp ppf "%s:%d.%d-%d:" 33 | l.Printexc.filename l.Printexc.line_number 34 | l.Printexc.start_char l.Printexc.end_char 35 | 36 | let pp_bt ppf bt = match Printexc.backtrace_slots bt with 37 | | None -> pp ppf "@,@[%a@]" pp_text "No backtrace. Did you compile with -g ?" 38 | | Some slots -> 39 | let rec loop = function 40 | | [] -> assert false 41 | | s :: ss -> 42 | begin match Printexc.Slot.location s with 43 | | None -> () 44 | | Some l when l.Printexc.filename = "test/testing.ml" || 45 | l.Printexc.filename = "test/test.ml" -> () 46 | | Some l -> pp ppf "@,%a" pp_slot_loc l 47 | end; 48 | if ss <> [] then (loop ss) else () 49 | in 50 | loop (Array.to_list slots) 51 | 52 | (* Assertion counters *) 53 | 54 | let fail_count = ref 0 55 | let pass_count = ref 0 56 | 57 | (* Logging *) 58 | 59 | let log_part fmt = Format.printf fmt 60 | let log ?header fmt = match header with 61 | | Some h -> Format.printf ("[%s] " ^^ fmt ^^ "@.") h 62 | | None -> Format.printf (fmt ^^ "@.") 63 | 64 | let log_results () = 65 | let total = !pass_count + !fail_count in 66 | match !fail_count with 67 | | 0 -> log ~header:"OK" "All %d assertions succeeded !@." total; true 68 | | 1 -> log ~header:"FAIL" "1 failure out of %d assertions" total; false 69 | | n -> log ~header:"FAIL" "%d failures out of %d assertions" 70 | !fail_count total; false 71 | 72 | let log_fail msg bt = 73 | log ~header:"FAIL" "@[@[%a@]%a@]" pp_text msg pp_bt bt 74 | 75 | let log_unexpected_exn ~header exn bt = 76 | log ~header:"SUITE" "@[@[ABORTED: unexpected exception:@]@,%a%a@]" 77 | pp_exn exn pp_bt bt 78 | 79 | (* Testing scopes *) 80 | 81 | exception Fail 82 | exception Fail_handled 83 | 84 | let block f = try f () with 85 | | Fail | Fail_handled -> () 86 | | exn -> 87 | let bt = Printexc.get_raw_backtrace () in 88 | incr fail_count; 89 | log_unexpected_exn ~header:"BLOCK" exn bt 90 | 91 | type test = string * (unit -> unit) 92 | 93 | let test n f = n, f 94 | let run_test (n, f) = 95 | log "* %s" n; 96 | try f () with 97 | | Fail | Fail_handled -> 98 | log ~header:"TEST" "ABORTED: a test failure blew the test scope" 99 | | exn -> 100 | let bt = Printexc.get_raw_backtrace () in 101 | incr fail_count; 102 | log_unexpected_exn ~header:"TEST" exn bt 103 | 104 | type suite = string * test list 105 | let suite n ts = n, ts 106 | let run_suite (n, ts) = try log "%s" n; List.iter run_test ts with 107 | | exn -> 108 | let bt = Printexc.get_raw_backtrace () in 109 | incr fail_count; 110 | log_unexpected_exn ~header:"SUITE" exn bt 111 | 112 | let run suites = List.iter run_suite suites 113 | 114 | (* Passing and failing tests *) 115 | 116 | let pass () = incr pass_count 117 | let fail fmt = 118 | let bt = Printexc.get_callstack 10 in 119 | let fail _ = log_fail (Format.flush_str_formatter ()) bt in 120 | (incr fail_count; Format.kfprintf fail Format.str_formatter fmt) 121 | 122 | (* Checking values *) 123 | 124 | let pp_neq pp_v ppf (v, v') = pp ppf "@[%a@]@ <>@ @[%a@]@]" pp_v v pp_v v' 125 | 126 | let fail_eq pp v v' = fail "%a" (pp_neq pp) (v, v') 127 | 128 | let eq ~eq ~pp v v' = if eq v v' then pass () else fail_eq pp v v' 129 | let eq_char = eq ~eq:(=) ~pp:pp_char 130 | let eq_str = eq ~eq:(=) ~pp:pp_str 131 | let eq_bool = eq ~eq:(=) ~pp:Format.pp_print_bool 132 | let eq_int = eq ~eq:(=) ~pp:Format.pp_print_int 133 | let eq_int32 = eq ~eq:(=) ~pp:pp_int32 134 | let eq_int64 = eq ~eq:(=) ~pp:pp_int64 135 | let eq_float = eq ~eq:(=) ~pp:pp_float 136 | let eq_nan f = 137 | if f <> f then pass () else fail "@[%a@]@ is@ not a NaN" pp_float f 138 | 139 | let eq_option ~eq:eq_v ~pp = 140 | let eq_opt v v' = match v, v' with 141 | | Some v, Some v' -> eq_v v v' 142 | | None, None -> true 143 | | _ -> false 144 | in 145 | let pp = pp_option pp in 146 | fun v v' -> eq ~eq:eq_opt ~pp v v' 147 | 148 | let eq_some = function 149 | | Some _ -> pass () 150 | | None -> fail "None <> Some _" 151 | 152 | let eq_none ~pp = function 153 | | None -> pass () 154 | | Some v -> fail "@[%a <>@ None@]" pp v 155 | 156 | let eq_list ~eq:eq_v ~pp:pp_v = 157 | let eql l l' = try List.for_all2 eq_v l l' with Invalid_argument _ -> false in 158 | fun l l' -> eq ~eq:eql ~pp:(pp_list pp_v) l l' 159 | 160 | (* Tracing and checking function applications. *) 161 | 162 | type app = (* Gathers information about the application *) 163 | { fail_count : int; (* fail_count checkpoint when the app starts *) 164 | pp_args : Format.formatter -> unit -> unit; } 165 | 166 | let ctx () = { fail_count = -1; pp_args = fun ppf () -> (); } 167 | 168 | let log_app_raised app exn = 169 | log "@[<2>@[%a@]==> raised %a" app.pp_args () pp_exn exn 170 | 171 | let pp_app app pp_v ppf v = 172 | pp ppf "@[<2>@[%a@]==>@ @[%a@]@]" app.pp_args () pp_v v 173 | 174 | let log_app app pp_v v = log "%a" (pp_app app pp_v) v 175 | 176 | let ( $ ) f k = k (ctx ()) f 177 | 178 | let ( @-> ) (pp_v : 'a pp) k app f v = 179 | let pp_args ppf () = app.pp_args ppf (); pp ppf "%a@ " pp_v v in 180 | let fc = if app.fail_count = -1 then !fail_count else app.fail_count in 181 | let app = { fail_count = fc; pp_args } in 182 | try k app (f v) with 183 | | Fail -> 184 | log_app app pp_v v; 185 | raise Fail_handled 186 | | Fail_handled as e -> raise e 187 | | exn -> 188 | log_app_raised app exn; 189 | fail "unexpected exception %a raised" pp_exn exn; 190 | raise Fail_handled 191 | 192 | let ret pp app v = 193 | if !fail_count <> app.fail_count then log_app app pp v; 194 | v 195 | 196 | let ret_eq ~eq pp r app v = 197 | if eq r v then (pass (); ret pp app v) else 198 | (fail "@[%a@,%a@]" (pp_neq pp) (r, v) (pp_app app pp) v; 199 | raise Fail_handled) 200 | 201 | let ret_none pp app v = match v with 202 | | None -> pass (); ret (pp_option pp) app v 203 | | Some _ -> ret_eq ~eq:(=) (pp_option pp) None app v 204 | 205 | let ret_some pp app v = match v with 206 | | Some _ as v -> pass (); ret (pp_option pp) app v 207 | | None as v -> 208 | fail "@[Some _ <> None@,%a@]" (pp_app app (pp_option pp)) v; 209 | raise Fail_handled 210 | 211 | let ret_get_option pp app v = match ret_some pp app v with 212 | | Some v -> v 213 | | None -> assert false 214 | 215 | (* I think we could handle the following functions on app traced ones 216 | by enriching the app type and have alternate functions to $ for 217 | handling these cases. Note that the only place were we can check 218 | for these things are in the @-> combinator *) 219 | 220 | let app_invalid ~pp f v = 221 | try 222 | let r = f v in 223 | fail "%a <> exception Invalid_arg _" pp r 224 | with 225 | | Invalid_argument _ -> pass () 226 | | exn -> fail "exception %a <> exception Invalid_arg _" pp_exn exn 227 | 228 | let app_exn ~pp e f v = 229 | try 230 | let r = f v in 231 | fail "%a <> exception %a" pp r pp_exn e 232 | with 233 | | exn when exn = e -> pass () 234 | | exn -> fail "exception %a <> exception %a_" pp_exn exn pp_exn e 235 | 236 | let app_raises ~pp f v = 237 | try 238 | let r = f v in 239 | fail "%a <> exception _ " pp r 240 | with 241 | | exn -> pass () 242 | 243 | (*--------------------------------------------------------------------------- 244 | Copyright (c) 2015 The astring programmers 245 | 246 | Permission to use, copy, modify, and/or distribute this software for any 247 | purpose with or without fee is hereby granted, provided that the above 248 | copyright notice and this permission notice appear in all copies. 249 | 250 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 251 | WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 252 | MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 253 | ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 254 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 255 | ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 256 | OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 257 | ---------------------------------------------------------------------------*) 258 | -------------------------------------------------------------------------------- /src/astring_sub.ml: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2015 The astring programmers. All rights reserved. 3 | Distributed under the ISC license, see terms at the end of the file. 4 | ---------------------------------------------------------------------------*) 5 | 6 | open Astring_unsafe 7 | 8 | let sunsafe_get = string_unsafe_get 9 | 10 | (* Errors *) 11 | 12 | let strf = Format.asprintf 13 | let err_base = "not on the same base string" 14 | let err_empty_sub pos = strf "empty substring [%d;%d]" pos pos 15 | let err_pos_range start stop len = 16 | strf "invalid start:%d stop:%d for position range [0;%d]" start stop len 17 | 18 | (* From strings *) 19 | 20 | let v ?(start = 0) ?stop s = 21 | let s_len = string_length s in 22 | let stop = match stop with None -> s_len | Some stop -> stop in 23 | if start < 0 || stop > s_len || stop < start 24 | then invalid_arg (err_pos_range start stop s_len) 25 | else (s, start, stop) 26 | 27 | let of_string_with_range ?(first = 0) ?(len = max_int) s = 28 | if len < 0 then invalid_arg (Astring_base.err_neg_len len) else 29 | let s_len = string_length s in 30 | let max_idx = s_len - 1 in 31 | let empty = function 32 | | first when first < 0 -> (s, 0, 0) 33 | | first when first > max_idx -> (s, s_len, s_len) 34 | | first -> (s, first, first) 35 | in 36 | if len = 0 then empty first else 37 | let last (* index *) = match len with 38 | | len when len = max_int -> max_idx 39 | | len -> 40 | let last = first + len - 1 in 41 | if last > max_idx then max_idx else last 42 | in 43 | let first = if first < 0 then 0 else first in 44 | if first > max_idx || last < 0 || first > last then empty first else 45 | (s, first, last + 1 (* position *)) 46 | 47 | let of_string_with_index_range ?(first = 0) ? last s = 48 | let s_len = string_length s in 49 | let max_idx = s_len - 1 in 50 | let empty = function 51 | | first when first < 0 -> (s, 0, 0) 52 | | first when first > max_idx -> (s, s_len, s_len) 53 | | first -> (s, first, first) 54 | in 55 | let last (* index *) = match last with 56 | | None -> max_idx 57 | | Some last -> if last > max_idx then max_idx else last 58 | in 59 | let first = if first < 0 then 0 else first in 60 | if first > max_idx || last < 0 || first > last then empty first else 61 | (s, first, last + 1 (* position *)) 62 | 63 | (* Substrings *) 64 | 65 | type t = string * int * int 66 | 67 | let empty = (Astring_base.empty, 0, 0) 68 | let start_pos (_, start, _) = start 69 | let stop_pos (_, _, stop) = stop 70 | let base_string (s, _, _) = s 71 | let length (_, start, stop) = stop - start 72 | let get (s, start, _) i = string_safe_get s (start + i) 73 | let get_byte s i = char_to_byte (get s i) 74 | let unsafe_get (s, start, _) i = string_unsafe_get s (start + i) 75 | let unsafe_get_byte s i = char_to_byte (unsafe_get s i) 76 | 77 | let head ?(rev = false) (s, start, stop) = 78 | if start = stop then None else 79 | Some (string_unsafe_get s (if rev then stop - 1 else start)) 80 | 81 | let get_head ?(rev = false) (s, start, stop) = 82 | if start = stop then invalid_arg (err_empty_sub start) else 83 | string_unsafe_get s (if rev then stop - 1 else start) 84 | 85 | let of_string s = v s 86 | let to_string (s, start, stop) = 87 | if start = stop then Astring_base.empty else 88 | if start = 0 && stop = string_length s then s else 89 | unsafe_string_sub s start (stop - start) 90 | 91 | let rebase (_, start, stop as sub) = (to_string sub, 0, stop - start) 92 | let hash s = Hashtbl.hash s 93 | 94 | (* Stretching substrings *) 95 | 96 | let start (s, start, _) = (s, start, start) 97 | let stop (s, _, stop) = (s, stop, stop) 98 | let base (s, _, _) = (s, 0, string_length s) 99 | 100 | let tail ?(rev = false) (s, start, stop as sub) = 101 | if start = stop then sub else 102 | if rev then (s, start, stop - 1) else (s, start + 1, stop) 103 | 104 | let fextend ?max ~sat (s, start, stop) = 105 | let max_idx = string_length s - 1 in 106 | let max_idx = match max with 107 | | None -> max_idx 108 | | Some max when max < 0 -> invalid_arg (Astring_base.err_neg_max max) 109 | | Some max -> let i = stop + max - 1 in if i > max_idx then max_idx else i 110 | in 111 | let rec loop i = 112 | if i > max_idx then (s, start, i) else 113 | if sat (string_unsafe_get s i) then loop (i + 1) else 114 | (s, start, i) 115 | in 116 | loop stop 117 | 118 | let rextend ?max ~sat (s, start, stop) = 119 | let min_idx = match max with 120 | | None -> 0 121 | | Some max when max < 0 -> invalid_arg (Astring_base.err_neg_max max) 122 | | Some max -> let i = start - max in if i < 0 then 0 else i 123 | in 124 | let rec loop i = 125 | if i < min_idx then (s, min_idx, stop) else 126 | if sat (string_unsafe_get s i) then loop (i - 1) else 127 | (s, i + 1, stop) 128 | in 129 | loop (start - 1) 130 | 131 | let extend ?(rev = false) ?max ?(sat = (fun _ -> true)) sub = match rev with 132 | | true -> rextend ?max ~sat sub 133 | | false -> fextend ?max ~sat sub 134 | 135 | let freduce ?max ~sat (s, start, stop as sub) = 136 | if start = stop then sub else 137 | let min_idx = match max with 138 | | None -> start 139 | | Some max when max < 0 -> invalid_arg (Astring_base.err_neg_max max) 140 | | Some max -> let i = stop - max in if i < start then start else i 141 | in 142 | let rec loop i = 143 | if i < min_idx then (s, start, min_idx) else 144 | if sat (string_unsafe_get s i) then loop (i - 1) else 145 | (s, start, i + 1) 146 | in 147 | loop (stop - 1) 148 | 149 | let rreduce ?max ~sat (s, start, stop as sub) = 150 | if start = stop then sub else 151 | let max_idx = stop - 1 in 152 | let max_idx = match max with 153 | | None -> max_idx 154 | | Some max when max < 0 -> invalid_arg (Astring_base.err_neg_max max) 155 | | Some max -> let i = start + max - 1 in if i > max_idx then max_idx else i 156 | in 157 | let rec loop i = 158 | if i > max_idx then (s, i, stop) else 159 | if sat (string_unsafe_get s i) then loop (i + 1) else 160 | (s, i, stop) 161 | in 162 | loop start 163 | 164 | let reduce ?(rev = false) ?max ?(sat = (fun _ -> true)) sub = match rev with 165 | | true -> rreduce ?max ~sat sub 166 | | false -> freduce ?max ~sat sub 167 | 168 | let extent (s0, start0, stop0) (s1, start1, stop1) = 169 | if s0 != s1 then invalid_arg err_base else 170 | let start = if start0 < start1 then start0 else start1 in 171 | let stop = if stop0 < stop1 then stop1 else stop0 in 172 | (s0, start, stop) 173 | 174 | let overlap (s0, start0, stop0) (s1, start1, stop1) = 175 | if s0 != s1 then invalid_arg err_base else 176 | if not (start0 <= stop1 && start1 <= stop0) then None else 177 | let start = if start0 < start1 then start1 else start0 in 178 | let stop = if stop0 < stop1 then stop0 else stop1 in 179 | Some (s0, start, stop) 180 | 181 | (* Appending substrings *) 182 | 183 | let append (s0, start0, _ as sub0) (s1, start1, _ as sub1) = 184 | let l0 = length sub0 in 185 | if l0 = 0 then rebase sub1 else 186 | let l1 = length sub1 in 187 | if l1 = 0 then rebase sub0 else 188 | let len = l0 + l1 in 189 | let b = Bytes.create len in 190 | bytes_unsafe_blit_string s0 start0 b 0 l0; 191 | bytes_unsafe_blit_string s1 start1 b l0 l1; 192 | (bytes_unsafe_to_string b, 0, len) 193 | 194 | let concat ?sep:(sep, sep_start, _ as sep_sub = empty) = function 195 | | [] -> empty 196 | | [s] -> rebase s 197 | | (s, start, _ as sub) :: ss -> 198 | let sub_len = length sub in 199 | let sep_len = length sep_sub in 200 | let rec cat_len sep_count l ss = 201 | if l < 0 then l else 202 | match ss with 203 | | s :: ss -> cat_len (sep_count + 1) (l + length s) ss 204 | | [] -> 205 | if sep_len = 0 then l else 206 | let max_sep_count = Sys.max_string_length / sep_len in 207 | if sep_count < 0 || sep_count > max_sep_count then -1 else 208 | sep_count * sep_len + l 209 | in 210 | let cat_len = cat_len 0 sub_len ss in 211 | if cat_len < 0 then invalid_arg Astring_base.err_max_string_len else 212 | let b = Bytes.create cat_len in 213 | bytes_unsafe_blit_string s start b 0 sub_len; 214 | let rec loop i = function 215 | | [] -> bytes_unsafe_to_string b 216 | | (str, str_start, _ as str_sub) :: ss -> 217 | let sep_pos = i in 218 | let str_pos = i + sep_len in 219 | let str_len = length str_sub in 220 | bytes_unsafe_blit_string sep sep_start b sep_pos sep_len; 221 | bytes_unsafe_blit_string str str_start b str_pos str_len; 222 | loop (str_pos + str_len) ss 223 | in 224 | (loop sub_len ss, 0, cat_len) 225 | 226 | (* Predicates *) 227 | 228 | let is_empty (_, start, stop) = stop - start = 0 229 | 230 | let is_prefix ~affix:(affix, astart, _ as affix_sub) (s, sstart, _ as s_sub) = 231 | let len_a = length affix_sub in 232 | let len_s = length s_sub in 233 | if len_a > len_s then false else 234 | let max_zidx (* zero based idx *) = len_a - 1 in 235 | let rec loop i = 236 | if i > max_zidx then true else 237 | if sunsafe_get affix (astart + i) <> sunsafe_get s (sstart + i) 238 | then false 239 | else loop (i + 1) 240 | in 241 | loop 0 242 | 243 | let is_infix ~affix:(affix, astart, _ as affix_sub) (s, sstart, _ as s_sub) = 244 | let len_a = length affix_sub in 245 | let len_s = length s_sub in 246 | if len_a > len_s then false else 247 | let max_zidx_a (* zero based idx *) = len_a - 1 in 248 | let max_zidx_s (* zero based idx *) = len_s - len_a in 249 | let rec loop i k = 250 | if i > max_zidx_s then false else 251 | if k > max_zidx_a then true else 252 | if k > 0 then 253 | if sunsafe_get affix (astart + k) = sunsafe_get s (sstart + i + k) 254 | then loop i (k + 1) 255 | else loop (i + 1) 0 256 | else if sunsafe_get affix astart = sunsafe_get s (sstart + i) 257 | then loop i 1 258 | else loop (i + 1) 0 259 | in 260 | loop 0 0 261 | 262 | let is_suffix ~affix:(affix, _, astop as affix_sub) (s, _, sstop as s_sub) = 263 | let len_a = length affix_sub in 264 | let len_s = length s_sub in 265 | if len_a > len_s then false else 266 | let max_zidx (* zero based idx *) = len_a - 1 in 267 | let max_idx_a = astop - 1 in 268 | let max_idx_s = sstop - 1 in 269 | let rec loop i = 270 | if i > max_zidx then true else 271 | if sunsafe_get affix (max_idx_a - i) <> sunsafe_get s (max_idx_s - i) 272 | then false 273 | else loop (i + 1) 274 | in 275 | loop 0 276 | 277 | let for_all sat (s, start, stop) = 278 | Astring_base.for_all sat s ~first:start ~last:(stop - 1) 279 | 280 | let exists sat (s, start, stop) = 281 | Astring_base.exists sat s ~first:start ~last:(stop - 1) 282 | 283 | let same_base (s0, _, _) (s1, _, _) = s0 == s1 284 | 285 | let equal_bytes (s0, start0, stop0) (s1, start1, stop1) = 286 | if s0 == s1 && start0 = start1 && stop0 = stop1 then true else 287 | let len0 = stop0 - start0 in 288 | let len1 = stop1 - start1 in 289 | if len0 <> len1 then false else 290 | let max_zidx = len0 - 1 in 291 | let rec loop i = 292 | if i > max_zidx then true else 293 | if sunsafe_get s0 (start0 + i) <> sunsafe_get s1 (start1 + i) 294 | then false 295 | else loop (i + 1) 296 | in 297 | loop 0 298 | 299 | let compare_bytes (s0, start0, stop0) (s1, start1, stop1) = 300 | if s0 == s1 && start0 = start1 && stop0 = stop1 then 0 else 301 | let len0 = stop0 - start0 in 302 | let len1 = stop1 - start1 in 303 | let min_len = if len0 < len1 then len0 else len1 in 304 | let max_i = min_len - 1 in 305 | let rec loop i = 306 | if i > max_i then compare len0 len1 else 307 | let c0 = sunsafe_get s0 (start0 + i) in 308 | let c1 = sunsafe_get s1 (start1 + i) in 309 | let cmp = compare c0 c1 in 310 | if cmp <> 0 then cmp else 311 | loop (i + 1) 312 | in 313 | loop 0 314 | 315 | let eq_pos : int -> int -> bool = fun p0 p1 -> p0 = p1 316 | let equal (s0, start0, stop0) (s1, start1, stop1) = 317 | if s0 != s1 then invalid_arg err_base else 318 | eq_pos start0 start1 && eq_pos stop0 stop1 319 | 320 | let compare_pos : int -> int -> int = compare 321 | let compare (s0, start0, stop0) (s1, start1, stop1) = 322 | if s0 != s1 then invalid_arg err_base else 323 | let c = compare_pos start0 start1 in 324 | if c <> 0 then c else 325 | compare_pos stop0 stop1 326 | 327 | (* Extracting substrings *) 328 | 329 | let with_range ?(first = 0) ?(len = max_int) (s, start, stop) = 330 | if len < 0 then invalid_arg (Astring_base.err_neg_len len) else 331 | let s_len = stop - start in 332 | let max_idx = s_len - 1 in 333 | let empty = function 334 | | first when first < 0 -> (s, start, start) 335 | | first when first > max_idx -> (s, stop, stop) 336 | | first -> (s, start + first, start + first) 337 | in 338 | if len = 0 then empty first else 339 | let last (* index *) = match len with 340 | | len when len = max_int -> max_idx 341 | | len -> 342 | let last = first + len - 1 in 343 | if last > max_idx then max_idx else last 344 | in 345 | let first = if first < 0 then 0 else first in 346 | if first > max_idx || last < 0 || first > last then empty first else 347 | (s, start + first, start + last + 1 (* position *)) 348 | 349 | let with_index_range ?(first = 0) ? last (s, start, stop) = 350 | let s_len = stop - start in 351 | let max_idx = s_len - 1 in 352 | let empty = function 353 | | first when first < 0 -> (s, start, start) 354 | | first when first > max_idx -> (s, stop, stop) 355 | | first -> (s, start + first, start + first) 356 | in 357 | let last (* index *) = match last with 358 | | None -> max_idx 359 | | Some last -> if last > max_idx then max_idx else last 360 | in 361 | let first = if first < 0 then 0 else first in 362 | if first > max_idx || last < 0 || first > last then empty first else 363 | (s, start + first, start + last + 1 (* position *)) 364 | 365 | let trim ?(drop = Astring_char.Ascii.is_white) (s, start, stop as sub) = 366 | let len = stop - start in 367 | if len = 0 then sub else 368 | let max_pos = stop in 369 | let max_idx = stop - 1 in 370 | let rec left_pos i = 371 | if i > max_idx then max_pos else 372 | if drop (sunsafe_get s i) then left_pos (i + 1) else i 373 | in 374 | let rec right_pos i = 375 | if i < start then start else 376 | if drop (sunsafe_get s i) then right_pos (i - 1) else (i + 1) 377 | in 378 | let left = left_pos start in 379 | if left = max_pos then (s, (start + stop) / 2, (start + stop) / 2) else 380 | let right = right_pos max_idx in 381 | if left = start && right = max_pos then sub else 382 | (s, left, right) 383 | 384 | let fspan ~min ~max ~sat (s, start, stop as sub) = 385 | if min < 0 then invalid_arg (Astring_base.err_neg_min min) else 386 | if max < 0 then invalid_arg (Astring_base.err_neg_max max) else 387 | if min > max || max = 0 then ((s, start, start), sub) else 388 | let max_idx = stop - 1 in 389 | let max_idx = 390 | let k = start + max - 1 in (if k > max_idx || k < 0 then max_idx else k) 391 | in 392 | let need_idx = start + min in 393 | let rec loop i = 394 | if i <= max_idx && sat (sunsafe_get s i) then loop (i + 1) else 395 | if i < need_idx || i = 0 then ((s, start, start), sub) else 396 | if i = stop then (sub, (s, stop, stop)) else 397 | (s, start, i), (s, i, stop) 398 | in 399 | loop start 400 | 401 | let rspan ~min ~max ~sat (s, start, stop as sub) = 402 | if min < 0 then invalid_arg (Astring_base.err_neg_min min) else 403 | if max < 0 then invalid_arg (Astring_base.err_neg_max max) else 404 | if min > max || max = 0 then (sub, (s, stop, stop)) else 405 | let max_idx = stop - 1 in 406 | let min_idx = let k = stop - max in if k < start then start else k in 407 | let need_idx = stop - min - 1 in 408 | let rec loop i = 409 | if i >= min_idx && sat (sunsafe_get s i) then loop (i - 1) else 410 | if i > need_idx || i = max_idx then (sub, (s, stop, stop)) else 411 | if i = start - 1 then ((s, start, start), sub) else 412 | (s, start, i + 1), (s, i + 1, stop) 413 | in 414 | loop max_idx 415 | 416 | let span ?(rev = false) ?(min = 0) ?(max = max_int) ?(sat = fun _ -> true) sub = 417 | match rev with 418 | | true -> rspan ~min ~max ~sat sub 419 | | false -> fspan ~min ~max ~sat sub 420 | 421 | let take ?(rev = false) ?min ?max ?sat s = 422 | (if rev then snd else fst) @@ span ~rev ?min ?max ?sat s 423 | 424 | let drop ?(rev = false) ?min ?max ?sat s = 425 | (if rev then fst else snd) @@ span ~rev ?min ?max ?sat s 426 | 427 | let fcut ~sep:(sep, sep_start, sep_stop) (s, start, stop) = 428 | let sep_len = sep_stop - sep_start in 429 | if sep_len = 0 then invalid_arg Astring_base.err_empty_sep else 430 | let max_sep_zidx = sep_len - 1 in 431 | let max_s_idx = stop - sep_len in 432 | let rec check_sep i k = 433 | if k > max_sep_zidx then Some ((s, start, i), (s, i + sep_len, stop)) 434 | else if sunsafe_get s (i + k) = sunsafe_get sep (sep_start + k) 435 | then check_sep i (k + 1) 436 | else scan (i + 1) 437 | and scan i = 438 | if i > max_s_idx then None else 439 | if sunsafe_get s i = sunsafe_get sep sep_start 440 | then check_sep i 1 441 | else scan (i + 1) 442 | in 443 | scan start 444 | 445 | let rcut ~sep:(sep, sep_start, sep_stop) (s, start, stop) = 446 | let sep_len = sep_stop - sep_start in 447 | if sep_len = 0 then invalid_arg Astring_base.err_empty_sep else 448 | let max_sep_zidx = sep_len - 1 in 449 | let max_s_idx = stop - 1 in 450 | let rec check_sep i k = 451 | if k > max_sep_zidx then Some ((s, start, i), (s, i + sep_len, stop)) 452 | else if sunsafe_get s (i + k) = sunsafe_get sep (sep_start + k) 453 | then check_sep i (k + 1) 454 | else rscan (i - 1) 455 | and rscan i = 456 | if i < start then None else 457 | if sunsafe_get s i = sunsafe_get sep sep_start 458 | then check_sep i 1 459 | else rscan (i - 1) 460 | in 461 | rscan (max_s_idx - max_sep_zidx) 462 | 463 | let cut ?(rev = false) ~sep s = match rev with 464 | | true -> rcut ~sep s 465 | | false -> fcut ~sep s 466 | 467 | let add_sub ~no_empty s ~start ~stop acc = 468 | if start = stop then (if no_empty then acc else (s, start, start) :: acc) else 469 | (s, start, stop) :: acc 470 | 471 | let fcuts ~no_empty ~sep:(sep, sep_start, sep_stop) (s, start, stop as sub) = 472 | let sep_len = sep_stop - sep_start in 473 | if sep_len = 0 then invalid_arg Astring_base.err_empty_sep else 474 | let s_len = stop - start in 475 | let max_sep_zidx = sep_len - 1 in 476 | let max_s_idx = stop - sep_len in 477 | let rec check_sep sstart i k acc = 478 | if k > max_sep_zidx then 479 | let new_start = i + sep_len in 480 | scan new_start new_start (add_sub ~no_empty s ~start:sstart ~stop:i acc) 481 | else 482 | if sunsafe_get s (i + k) = sunsafe_get sep (sep_start + k) 483 | then check_sep sstart i (k + 1) acc 484 | else scan sstart (i + 1) acc 485 | and scan sstart i acc = 486 | if i > max_s_idx then 487 | if sstart = start then (if no_empty && s_len = 0 then [] else [sub]) else 488 | List.rev (add_sub ~no_empty s ~start:sstart ~stop acc) 489 | else 490 | if sunsafe_get s i = sunsafe_get sep sep_start 491 | then check_sep sstart i 1 acc 492 | else scan sstart (i + 1) acc 493 | in 494 | scan start start [] 495 | 496 | let rcuts ~no_empty ~sep:(sep, sep_start, sep_stop) (s, start, stop as sub) = 497 | let sep_len = sep_stop - sep_start in 498 | if sep_len = 0 then invalid_arg Astring_base.err_empty_sep else 499 | let s_len = stop - start in 500 | let max_sep_zidx = sep_len - 1 in 501 | let max_s_idx = stop - 1 in 502 | let rec check_sep sstop i k acc = 503 | if k > max_sep_zidx then 504 | let start = i + sep_len in 505 | rscan i (i - sep_len) (add_sub ~no_empty s ~start ~stop:sstop acc) 506 | else 507 | if sunsafe_get s (i + k) = sunsafe_get sep (sep_start + k) 508 | then check_sep sstop i (k + 1) acc 509 | else rscan sstop (i - 1) acc 510 | and rscan sstop i acc = 511 | if i < start then 512 | if sstop = stop then (if no_empty && s_len = 0 then [] else [sub]) else 513 | add_sub ~no_empty s ~start ~stop:sstop acc 514 | else 515 | if sunsafe_get s i = sunsafe_get sep sep_start 516 | then check_sep sstop i 1 acc 517 | else rscan sstop (i - 1) acc 518 | in 519 | rscan stop (max_s_idx - max_sep_zidx) [] 520 | 521 | let cuts ?(rev = false) ?(empty = true) ~sep s = match rev with 522 | | true -> rcuts ~no_empty:(not empty) ~sep s 523 | | false -> fcuts ~no_empty:(not empty) ~sep s 524 | 525 | let fields 526 | ?(empty = false) ?(is_sep = Astring_char.Ascii.is_white) 527 | (s, start, stop as sub) 528 | = 529 | let no_empty = not empty in 530 | let max_pos = stop in 531 | let rec loop i end_pos acc = 532 | if i < start then begin 533 | if end_pos = max_pos 534 | then (if no_empty && max_pos = start then [] else [sub]) 535 | else add_sub ~no_empty s ~start ~stop:end_pos acc 536 | end else begin 537 | if not (is_sep (sunsafe_get s i)) then loop (i - 1) end_pos acc else 538 | loop (i - 1) i (add_sub ~no_empty s ~start:(i + 1) ~stop:end_pos acc) 539 | end 540 | in 541 | loop (max_pos - 1) max_pos [] 542 | 543 | (* Traversing *) 544 | 545 | let ffind sat (s, start, stop) = 546 | let max_idx = stop - 1 in 547 | let rec loop i = 548 | if i > max_idx then None else 549 | if sat (sunsafe_get s i) then Some (s, i, i + 1) else loop (i + 1) 550 | in 551 | loop start 552 | 553 | let rfind sat (s, start, stop) = 554 | let rec loop i = 555 | if i < start then None else 556 | if sat (sunsafe_get s i) then Some (s, i, i + 1) else loop (i - 1) 557 | in 558 | loop (stop - 1) 559 | 560 | let find ?(rev = false) sat sub = match rev with 561 | | true -> rfind sat sub 562 | | false -> ffind sat sub 563 | 564 | let ffind_sub ~sub:(sub, sub_start, sub_stop) (s, start, stop) = 565 | let len_sub = sub_stop - sub_start in 566 | let len_s = stop - start in 567 | if len_sub > len_s then None else 568 | let max_zidx_sub = len_sub - 1 in 569 | let max_idx_s = start + len_s - len_sub in 570 | let rec loop i k = 571 | if i > max_idx_s then None else 572 | if k > max_zidx_sub then Some (s, i, i + len_sub) else 573 | if k > 0 then 574 | if sunsafe_get sub (sub_start + k) = sunsafe_get s (i + k) 575 | then loop i (k + 1) 576 | else loop (i + 1) 0 577 | else if sunsafe_get sub sub_start = sunsafe_get s i then loop i 1 else 578 | loop (i + 1) 0 579 | in 580 | loop start 0 581 | 582 | let rfind_sub ~sub:(sub, sub_start, sub_stop) (s, start, stop) = 583 | let len_sub = sub_stop - sub_start in 584 | let len_s = stop - start in 585 | if len_sub > len_s then None else 586 | let max_zidx_sub = len_sub - 1 in 587 | let rec loop i k = 588 | if i < start then None else 589 | if k > max_zidx_sub then Some (s, i, i + len_sub) else 590 | if k > 0 then 591 | if sunsafe_get sub (sub_start + k) = sunsafe_get s (i + k) 592 | then loop i (k + 1) 593 | else loop (i - 1) 0 594 | else if sunsafe_get sub sub_start = sunsafe_get s i then loop i 1 else 595 | loop (i - 1) 0 596 | in 597 | loop (stop - len_sub) 0 598 | 599 | let find_sub ?(rev = false) ~sub start = match rev with 600 | | true -> rfind_sub ~sub start 601 | | false -> ffind_sub ~sub start 602 | 603 | let filter sat (s, start, stop) = 604 | let len = stop - start in 605 | if len = 0 then empty else 606 | let b = Bytes.create len in 607 | let max_idx = stop - 1 in 608 | let rec loop b k i = (* k is the write index in b *) 609 | if i > max_idx then 610 | ((if k = len then bytes_unsafe_to_string b else Bytes.sub_string b 0 k), 611 | 0, k) 612 | else 613 | let c = sunsafe_get s i in 614 | if sat c then (bytes_unsafe_set b k c; loop b (k + 1) (i + 1)) else 615 | loop b k (i + 1) 616 | in 617 | loop b 0 start 618 | 619 | let filter_map f (s, start, stop) = 620 | let len = stop - start in 621 | if len = 0 then empty else 622 | let b = Bytes.create len in 623 | let max_idx = stop - 1 in 624 | let rec loop b k i = (* k is the write index in b *) 625 | if i > max_idx then 626 | ((if k = len then bytes_unsafe_to_string b else Bytes.sub_string b 0 k), 627 | 0, k) 628 | else 629 | match f (sunsafe_get s i) with 630 | | None -> loop b k (i + 1) 631 | | Some c -> bytes_unsafe_set b k c; loop b (k + 1) (i + 1) 632 | in 633 | loop b 0 start 634 | 635 | let map f (s, start, stop) = 636 | let len = stop - start in 637 | if len = 0 then empty else 638 | let b = Bytes.create len in 639 | for i = 0 to len - 1 do 640 | bytes_unsafe_set b i (f (sunsafe_get s (start + i))) 641 | done; 642 | (bytes_unsafe_to_string b, 0, len) 643 | 644 | let mapi f (s, start, stop) = 645 | let len = stop - start in 646 | if len = 0 then empty else 647 | let b = Bytes.create len in 648 | for i = 0 to len - 1 do 649 | bytes_unsafe_set b i (f i (sunsafe_get s (start + i))) 650 | done; 651 | (bytes_unsafe_to_string b, 0, len) 652 | 653 | let fold_left f acc (s, start, stop) = 654 | Astring_base.fold_left f acc s ~first:start ~last:(stop - 1) 655 | 656 | let fold_right f (s, start, stop) acc = 657 | Astring_base.fold_right f s acc ~first:start ~last:(stop - 1) 658 | 659 | let iter f (s, start, stop) = 660 | for i = start to stop - 1 do f (sunsafe_get s i) done 661 | 662 | let iteri f (s, start, stop) = 663 | for i = start to stop - 1 do f (i - start) (sunsafe_get s i) done 664 | 665 | (* Pretty printing *) 666 | 667 | let pp ppf s = 668 | Format.pp_print_string ppf (to_string s) 669 | 670 | let dump ppf s = 671 | Format.pp_print_char ppf '"'; 672 | Format.pp_print_string ppf (Astring_escape.escape_string (to_string s)); 673 | Format.pp_print_char ppf '"'; 674 | () 675 | 676 | let dump_raw ppf (s, start, stop) = 677 | Format.fprintf ppf "@[<1>(@[<1>(base@ \"%s\")@]@ @[<1>(start@ %d)@]@ \ 678 | @[(stop@ %d)@])@]" 679 | (Astring_escape.escape_string s) start stop 680 | 681 | (* OCaml base type conversions *) 682 | 683 | let of_char c = v (Astring_base.of_char c) 684 | let to_char s = Astring_base.to_char (to_string s) 685 | let of_bool b = v (Astring_base.of_bool b) 686 | let to_bool s = Astring_base.to_bool (to_string s) 687 | let of_int i = v (Astring_base.of_int i) 688 | let to_int s = Astring_base.to_int (to_string s) 689 | let of_nativeint i = v (Astring_base.of_nativeint i) 690 | let to_nativeint s = Astring_base.to_nativeint (to_string s) 691 | let of_int32 i = v (Astring_base.of_int32 i) 692 | let to_int32 s = Astring_base.to_int32 (to_string s) 693 | let of_int64 i = v (Astring_base.of_int64 i) 694 | let to_int64 s = Astring_base.to_int64 (to_string s) 695 | let of_float f = v (Astring_base.of_float f) 696 | let to_float s = Astring_base.to_float (to_string s) 697 | 698 | (*--------------------------------------------------------------------------- 699 | Copyright (c) 2015 The astring programmers 700 | 701 | Permission to use, copy, modify, and/or distribute this software for any 702 | purpose with or without fee is hereby granted, provided that the above 703 | copyright notice and this permission notice appear in all copies. 704 | 705 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 706 | WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 707 | MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 708 | ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 709 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 710 | ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 711 | OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 712 | ---------------------------------------------------------------------------*) 713 | -------------------------------------------------------------------------------- /src/astring_string.ml: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2015 The astring programmers. All rights reserved. 3 | Distributed under the ISC license, see terms at the end of the file. 4 | ---------------------------------------------------------------------------*) 5 | 6 | open Astring_unsafe 7 | 8 | let strf = Format.asprintf 9 | 10 | (* String *) 11 | 12 | type t = string 13 | 14 | let empty = Astring_base.empty 15 | let v ~len f = 16 | let b = Bytes.create len in 17 | for i = 0 to len - 1 do bytes_unsafe_set b i (f i) done; 18 | bytes_unsafe_to_string b 19 | 20 | let length = string_length 21 | let get = string_safe_get 22 | let get_byte s i = char_to_byte (get s i) 23 | let unsafe_get = string_unsafe_get 24 | let unsafe_get_byte s i = char_to_byte (unsafe_get s i) 25 | 26 | let head ?(rev = false) s = 27 | let len = length s in 28 | if len = 0 then None else 29 | Some (string_unsafe_get s (if rev then len - 1 else 0)) 30 | 31 | let get_head ?(rev = false) s = 32 | let len = length s in 33 | if len = 0 then invalid_arg Astring_base.err_empty_string else 34 | string_unsafe_get s (if rev then len - 1 else 0) 35 | 36 | let hash c = Hashtbl.hash c 37 | 38 | (* Appending strings *) 39 | 40 | let append s0 s1 = 41 | let l0 = length s0 in 42 | if l0 = 0 then s1 else 43 | let l1 = length s1 in 44 | if l1 = 0 then s0 else 45 | let b = Bytes.create (l0 + l1) in 46 | bytes_unsafe_blit_string s0 0 b 0 l0; 47 | bytes_unsafe_blit_string s1 0 b l0 l1; 48 | bytes_unsafe_to_string b 49 | 50 | let concat ?(sep = empty) = function 51 | | [] -> empty 52 | | [s] -> s 53 | | s :: ss -> 54 | let s_len = length s in 55 | let sep_len = length sep in 56 | let rec cat_len sep_count l ss = 57 | if l < 0 then l else 58 | match ss with 59 | | s :: ss -> cat_len (sep_count + 1) (l + length s) ss 60 | | [] -> 61 | if sep_len = 0 then l else 62 | let max_sep_count = Sys.max_string_length / sep_len in 63 | if sep_count < 0 || sep_count > max_sep_count then -1 else 64 | sep_count * sep_len + l 65 | in 66 | let cat_len = cat_len 0 s_len ss in 67 | if cat_len < 0 then invalid_arg Astring_base.err_max_string_len else 68 | let b = Bytes.create cat_len in 69 | bytes_unsafe_blit_string s 0 b 0 s_len; 70 | let rec loop i = function 71 | | [] -> bytes_unsafe_to_string b 72 | | str :: ss -> 73 | let sep_first = i in 74 | let str_first = i + sep_len in 75 | let str_len = length str in 76 | bytes_unsafe_blit_string sep 0 b sep_first sep_len; 77 | bytes_unsafe_blit_string str 0 b str_first str_len; 78 | loop (str_first + str_len) ss 79 | in 80 | loop s_len ss 81 | 82 | (* Predicates *) 83 | 84 | let is_empty s = length s = 0 85 | 86 | let is_prefix ~affix s = 87 | let len_a = length affix in 88 | let len_s = length s in 89 | if len_a > len_s then false else 90 | let max_idx_a = len_a - 1 in 91 | let rec loop i = 92 | if i > max_idx_a then true else 93 | if unsafe_get affix i <> unsafe_get s i then false else loop (i + 1) 94 | in 95 | loop 0 96 | 97 | let is_infix ~affix s = 98 | let len_a = length affix in 99 | let len_s = length s in 100 | if len_a > len_s then false else 101 | let max_idx_a = len_a - 1 in 102 | let max_idx_s = len_s - len_a in 103 | let rec loop i k = 104 | if i > max_idx_s then false else 105 | if k > max_idx_a then true else 106 | if k > 0 then 107 | if unsafe_get affix k = unsafe_get s (i + k) 108 | then loop i (k + 1) else loop (i + 1) 0 109 | else 110 | if unsafe_get affix 0 = unsafe_get s i 111 | then loop i 1 else loop (i + 1) 0 112 | in 113 | loop 0 0 114 | 115 | let is_suffix ~affix s = 116 | let max_idx_a = length affix - 1 in 117 | let max_idx_s = length s - 1 in 118 | if max_idx_a > max_idx_s then false else 119 | let rec loop i = 120 | if i > max_idx_a then true else 121 | if unsafe_get affix (max_idx_a - i) <> unsafe_get s (max_idx_s - i) 122 | then false 123 | else loop (i + 1) 124 | in 125 | loop 0 126 | 127 | let for_all sat s = Astring_base.for_all sat s ~first:0 ~last:(length s - 1) 128 | let exists sat s = Astring_base.exists sat s ~first:0 ~last:(length s - 1) 129 | let equal = string_equal 130 | let compare = string_compare 131 | 132 | (* Extracting substrings *) 133 | 134 | let with_range ?(first = 0) ?(len = max_int) s = 135 | if len < 0 then invalid_arg (Astring_base.err_neg_len len) else 136 | if len = 0 then empty else 137 | let s_len = length s in 138 | let max_idx = s_len - 1 in 139 | let last = match len with 140 | | len when len = max_int -> max_idx 141 | | len -> 142 | let last = first + len - 1 in 143 | if last > max_idx then max_idx else last 144 | in 145 | let first = if first < 0 then 0 else first in 146 | if first > max_idx || last < 0 || first > last then empty else 147 | if first = 0 && last = max_idx then s else 148 | unsafe_string_sub s first (last + 1 - first) 149 | 150 | let with_index_range ?(first = 0) ?last s = 151 | let s_len = length s in 152 | let max_idx = s_len - 1 in 153 | let last = match last with 154 | | None -> max_idx 155 | | Some last -> if last > max_idx then max_idx else last 156 | in 157 | let first = if first < 0 then 0 else first in 158 | if first > max_idx || last < 0 || first > last then empty else 159 | if first = 0 && last = max_idx then s else 160 | unsafe_string_sub s first (last + 1 - first) 161 | 162 | let trim ?(drop = Astring_char.Ascii.is_white) s = 163 | let len = length s in 164 | if len = 0 then s else 165 | let max_idx = len - 1 in 166 | let rec left_pos i = 167 | if i > max_idx then len else 168 | if drop (unsafe_get s i) then left_pos (i + 1) else i 169 | in 170 | let rec right_pos i = 171 | if i < 0 then 0 else 172 | if drop (unsafe_get s i) then right_pos (i - 1) else (i + 1) 173 | in 174 | let left = left_pos 0 in 175 | if left = len then empty else 176 | let right = right_pos max_idx in 177 | if left = 0 && right = len then s else 178 | unsafe_string_sub s left (right - left) 179 | 180 | let fspan ?(min = 0) ?(max = max_int) ?(sat = fun _ -> true) s = 181 | if min < 0 then invalid_arg (Astring_base.err_neg_min min) else 182 | if max < 0 then invalid_arg (Astring_base.err_neg_max max) else 183 | if min > max || max = 0 then (empty, s) else 184 | let len = length s in 185 | let max_idx = len - 1 in 186 | let max_idx = let k = max - 1 in (if k > max_idx then max_idx else k) in 187 | let need_idx = min in 188 | let rec loop i = 189 | if i <= max_idx && sat (unsafe_get s i) then loop (i + 1) else 190 | if i < need_idx || i = 0 then (empty, s) else 191 | if i = len then (s, empty) else 192 | unsafe_string_sub s 0 i, unsafe_string_sub s i (len - i) 193 | in 194 | loop 0 195 | 196 | let rspan ?(min = 0) ?(max = max_int) ?(sat = fun _ -> true) s = 197 | if min < 0 then invalid_arg (Astring_base.err_neg_min min) else 198 | if max < 0 then invalid_arg (Astring_base.err_neg_max max) else 199 | if min > max || max = 0 then (s, empty) else 200 | let len = length s in 201 | let max_idx = len - 1 in 202 | let min_idx = let k = len - max in (if k < 0 then 0 else k) in 203 | let need_idx = max_idx - min in 204 | let rec loop i = 205 | if i >= min_idx && sat (unsafe_get s i) then loop (i - 1) else 206 | if i > need_idx || i = max_idx then (s, empty) else 207 | if i = -1 then (empty, s) else 208 | let cut = i + 1 in 209 | unsafe_string_sub s 0 cut, unsafe_string_sub s cut (len - cut) 210 | in 211 | loop max_idx 212 | 213 | let span ?(rev = false) ?min ?max ?sat s = match rev with 214 | | true -> rspan ?min ?max ?sat s 215 | | false -> fspan ?min ?max ?sat s 216 | 217 | (* N.B. c&p of fspan *) 218 | let ftake ?(min = 0) ?(max = max_int) ?(sat = fun _ -> true) s = 219 | if min < 0 then invalid_arg (Astring_base.err_neg_min min) else 220 | if max < 0 then invalid_arg (Astring_base.err_neg_max max) else 221 | if min > max || max = 0 then empty else 222 | let len = length s in 223 | let max_idx = len - 1 in 224 | let max_idx = let k = max - 1 in (if k > max_idx then max_idx else k) in 225 | let need_idx = min in 226 | let rec loop i = 227 | if i <= max_idx && sat (unsafe_get s i) then loop (i + 1) else 228 | if i < need_idx || i = 0 then empty else 229 | if i = len then s else 230 | unsafe_string_sub s 0 i 231 | in 232 | loop 0 233 | 234 | (* N.B. c&p of rspan *) 235 | let rtake ?(min = 0) ?(max = max_int) ?(sat = fun _ -> true) s = 236 | if min < 0 then invalid_arg (Astring_base.err_neg_min min) else 237 | if max < 0 then invalid_arg (Astring_base.err_neg_max max) else 238 | if min > max || max = 0 then empty else 239 | let len = length s in 240 | let max_idx = len - 1 in 241 | let min_idx = let k = len - max in (if k < 0 then 0 else k) in 242 | let need_idx = max_idx - min in 243 | let rec loop i = 244 | if i >= min_idx && sat (unsafe_get s i) then loop (i - 1) else 245 | if i > need_idx || i = max_idx then empty else 246 | if i = -1 then s else 247 | let cut = i + 1 in 248 | unsafe_string_sub s cut (len - cut) 249 | in 250 | loop max_idx 251 | 252 | let take ?(rev = false) ?min ?max ?sat s = match rev with 253 | | true -> rtake ?min ?max ?sat s 254 | | false -> ftake ?min ?max ?sat s 255 | 256 | (* N.B. c&p of fspan *) 257 | let fdrop ?(min = 0) ?(max = max_int) ?(sat = fun _ -> true) s = 258 | if min < 0 then invalid_arg (Astring_base.err_neg_min min) else 259 | if max < 0 then invalid_arg (Astring_base.err_neg_max max) else 260 | if min > max || max = 0 then s else 261 | let len = length s in 262 | let max_idx = len - 1 in 263 | let max_idx = let k = max - 1 in (if k > max_idx then max_idx else k) in 264 | let need_idx = min in 265 | let rec loop i = 266 | if i <= max_idx && sat (unsafe_get s i) then loop (i + 1) else 267 | if i < need_idx || i = 0 then s else 268 | if i = len then empty else 269 | unsafe_string_sub s i (len - i) 270 | in 271 | loop 0 272 | 273 | (* N.B. c&p of rspan *) 274 | let rdrop ?(min = 0) ?(max = max_int) ?(sat = fun _ -> true) s = 275 | if min < 0 then invalid_arg (Astring_base.err_neg_min min) else 276 | if max < 0 then invalid_arg (Astring_base.err_neg_max max) else 277 | if min > max || max = 0 then s else 278 | let len = length s in 279 | let max_idx = len - 1 in 280 | let min_idx = let k = len - max in (if k < 0 then 0 else k) in 281 | let need_idx = max_idx - min in 282 | let rec loop i = 283 | if i >= min_idx && sat (unsafe_get s i) then loop (i - 1) else 284 | if i > need_idx || i = max_idx then s else 285 | if i = -1 then empty else 286 | let cut = i + 1 in 287 | unsafe_string_sub s 0 cut 288 | in 289 | loop max_idx 290 | 291 | let drop ?(rev = false) ?min ?max ?sat s = match rev with 292 | | true -> rdrop ?min ?max ?sat s 293 | | false -> fdrop ?min ?max ?sat s 294 | 295 | let fcut ~sep s = 296 | let sep_len = length sep in 297 | if sep_len = 0 then invalid_arg Astring_base.err_empty_sep else 298 | let s_len = length s in 299 | let max_sep_idx = sep_len - 1 in 300 | let max_s_idx = s_len - sep_len in 301 | let rec check_sep i k = 302 | if k > max_sep_idx then 303 | let r_start = i + sep_len in 304 | Some (unsafe_string_sub s 0 i, 305 | unsafe_string_sub s r_start (s_len - r_start)) 306 | else 307 | if unsafe_get s (i + k) = unsafe_get sep k 308 | then check_sep i (k + 1) 309 | else scan (i + 1) 310 | and scan i = 311 | if i > max_s_idx then None else 312 | if unsafe_get s i = unsafe_get sep 0 then check_sep i 1 else scan (i + 1) 313 | in 314 | scan 0 315 | 316 | let rcut ~sep s = 317 | let sep_len = length sep in 318 | if sep_len = 0 then invalid_arg Astring_base.err_empty_sep else 319 | let s_len = length s in 320 | let max_sep_idx = sep_len - 1 in 321 | let max_s_idx = s_len - 1 in 322 | let rec check_sep i k = 323 | if k > max_sep_idx then 324 | let r_start = i + sep_len in 325 | Some (unsafe_string_sub s 0 i, 326 | unsafe_string_sub s r_start (s_len - r_start)) 327 | else 328 | if unsafe_get s (i + k) = unsafe_get sep k 329 | then check_sep i (k + 1) 330 | else rscan (i - 1) 331 | and rscan i = 332 | if i < 0 then None else 333 | if unsafe_get s i = unsafe_get sep 0 then check_sep i 1 else rscan (i - 1) 334 | in 335 | rscan (max_s_idx - max_sep_idx) 336 | 337 | let cut ?(rev = false) ~sep s = if rev then rcut ~sep s else fcut ~sep s 338 | 339 | let add_sub ~no_empty s ~start ~stop acc = 340 | if start = stop then (if no_empty then acc else empty :: acc) else 341 | unsafe_string_sub s start (stop - start) :: acc 342 | 343 | let fcuts ~no_empty ~sep s = 344 | let sep_len = length sep in 345 | if sep_len = 0 then invalid_arg Astring_base.err_empty_sep else 346 | let s_len = length s in 347 | let max_sep_idx = sep_len - 1 in 348 | let max_s_idx = s_len - sep_len in 349 | let rec check_sep start i k acc = 350 | if k > max_sep_idx then 351 | let new_start = i + sep_len in 352 | scan new_start new_start (add_sub ~no_empty s ~start ~stop:i acc) 353 | else 354 | if unsafe_get s (i + k) = unsafe_get sep k 355 | then check_sep start i (k + 1) acc 356 | else scan start (i + 1) acc 357 | and scan start i acc = 358 | if i > max_s_idx then 359 | if start = 0 then (if no_empty && s_len = 0 then [] else [s]) else 360 | List.rev (add_sub ~no_empty s ~start ~stop:s_len acc) 361 | else 362 | if unsafe_get s i = unsafe_get sep 0 363 | then check_sep start i 1 acc 364 | else scan start (i + 1) acc 365 | in 366 | scan 0 0 [] 367 | 368 | let rcuts ~no_empty ~sep s = 369 | let sep_len = length sep in 370 | if sep_len = 0 then invalid_arg Astring_base.err_empty_sep else 371 | let s_len = length s in 372 | let max_sep_idx = sep_len - 1 in 373 | let max_s_idx = s_len - 1 in 374 | let rec check_sep stop i k acc = 375 | if k > max_sep_idx then 376 | let start = i + sep_len in 377 | rscan i (i - sep_len) (add_sub ~no_empty s ~start ~stop acc) 378 | else if unsafe_get s (i + k) = unsafe_get sep k 379 | then check_sep stop i (k + 1) acc 380 | else rscan stop (i - 1) acc 381 | and rscan stop i acc = 382 | if i < 0 then 383 | if stop = s_len then (if no_empty && s_len = 0 then [] else [s]) else 384 | add_sub ~no_empty s ~start:0 ~stop:stop acc 385 | else if unsafe_get s i = unsafe_get sep 0 386 | then check_sep stop i 1 acc 387 | else rscan stop (i - 1) acc 388 | in 389 | rscan s_len (max_s_idx - max_sep_idx) [] 390 | 391 | let cuts ?(rev = false) ?(empty = true) ~sep s = match rev with 392 | | true -> rcuts ~no_empty:(not empty) ~sep s 393 | | false -> fcuts ~no_empty:(not empty) ~sep s 394 | 395 | let fields ?(empty = true) ?(is_sep = Astring_char.Ascii.is_white) s = 396 | let no_empty = not empty in 397 | let max_pos = length s in 398 | let rec loop i end_pos acc = 399 | if i < 0 then begin 400 | if end_pos = max_pos 401 | then (if no_empty && max_pos = 0 then [] else [s]) 402 | else add_sub ~no_empty s ~start:0 ~stop:end_pos acc 403 | end else begin 404 | if not (is_sep (unsafe_get s i)) then loop (i - 1) end_pos acc else 405 | loop (i - 1) i (add_sub ~no_empty s ~start:(i + 1) ~stop:end_pos acc) 406 | end 407 | in 408 | loop (max_pos - 1) max_pos [] 409 | 410 | (* Substrings *) 411 | 412 | type sub = Astring_sub.t 413 | 414 | module Sub = Astring_sub 415 | 416 | let sub = Sub.v 417 | let sub_with_range = Sub.of_string_with_range 418 | let sub_with_index_range = Sub.of_string_with_index_range 419 | 420 | (* Traversing *) 421 | 422 | let ffind ?start sat s = 423 | let max_idx = length s - 1 in 424 | let rec loop i = 425 | if i > max_idx then None else 426 | if sat (unsafe_get s i) then Some i else loop (i + 1) 427 | in 428 | match start with 429 | | None -> loop 0 430 | | Some i when i < 0 -> loop 0 431 | | Some i -> loop i 432 | 433 | let rfind ?start sat s = 434 | let max_idx = length s - 1 in 435 | let rec loop i = 436 | if i < 0 then None else 437 | if sat (unsafe_get s i) then Some i else loop (i - 1) 438 | in 439 | match start with 440 | | None -> loop max_idx 441 | | Some i when i > max_idx -> loop max_idx 442 | | Some i -> loop i 443 | 444 | let find ?(rev = false) ?start sat s = match rev with 445 | | false -> ffind ?start sat s 446 | | true -> rfind ?start sat s 447 | 448 | let ffind_sub ?start ~sub s = 449 | let len_sub = length sub in 450 | let len_s = length s in 451 | let max_idx_sub = len_sub - 1 in 452 | let max_idx_s = if len_sub <> 0 then len_s - len_sub else len_s - 1 in 453 | let rec loop i k = 454 | if i > max_idx_s then None else 455 | if k > max_idx_sub then Some i else 456 | if k > 0 then 457 | if unsafe_get sub k = unsafe_get s (i + k) 458 | then loop i (k + 1) else loop (i + 1) 0 459 | else 460 | if unsafe_get sub 0 = unsafe_get s i 461 | then loop i 1 else loop (i + 1) 0 462 | in 463 | match start with 464 | | None -> loop 0 0 465 | | Some i when i < 0 -> loop 0 0 466 | | Some i -> loop i 0 467 | 468 | let rfind_sub ?start ~sub s = 469 | let len_sub = length sub in 470 | let len_s = length s in 471 | let max_idx_sub = len_sub - 1 in 472 | let max_idx_s = if len_sub <> 0 then len_s - len_sub else len_s - 1 in 473 | let rec loop i k = 474 | if i < 0 then None else 475 | if k > max_idx_sub then Some i else 476 | if k > 0 then 477 | if unsafe_get sub k = unsafe_get s (i + k) 478 | then loop i (k + 1) else loop (i - 1) 0 479 | else 480 | if unsafe_get sub 0 = unsafe_get s i 481 | then loop i 1 else loop (i - 1) 0 482 | in 483 | match start with 484 | | None -> loop max_idx_s 0 485 | | Some i when i > max_idx_s -> loop max_idx_s 0 486 | | Some i -> loop i 0 487 | 488 | let find_sub ?(rev = false) ?start ~sub s = match rev with 489 | | false -> ffind_sub ?start ~sub s 490 | | true -> rfind_sub ?start ~sub s 491 | 492 | let filter sat s = 493 | let max_idx = length s - 1 in 494 | let rec with_buf b k i = (* k is the write index in b *) 495 | if i > max_idx then Bytes.sub_string b 0 k else 496 | let c = unsafe_get s i in 497 | if sat c then (bytes_unsafe_set b k c; with_buf b (k + 1) (i + 1)) else 498 | with_buf b k (i + 1) 499 | in 500 | let rec try_no_alloc i = 501 | if i > max_idx then s else 502 | if (sat (unsafe_get s i)) then try_no_alloc (i + 1) else 503 | if i = max_idx then unsafe_string_sub s 0 i else 504 | let b = Bytes.of_string s in (* copy and overwrite starting from i *) 505 | with_buf b i (i + 1) 506 | in 507 | try_no_alloc 0 508 | 509 | let filter_map f s = 510 | let max_idx = length s - 1 in 511 | let rec with_buf b k i = (* k is the write index in b *) 512 | if i > max_idx then 513 | (if k > max_idx then bytes_unsafe_to_string b else Bytes.sub_string b 0 k) 514 | else 515 | match f (unsafe_get s i) with 516 | | None -> with_buf b k (i + 1) 517 | | Some c -> bytes_unsafe_set b k c; with_buf b (k + 1) (i + 1) 518 | in 519 | let rec try_no_alloc i = 520 | if i > max_idx then s else 521 | let c = unsafe_get s i in 522 | match f c with 523 | | None -> 524 | if i = max_idx then unsafe_string_sub s 0 i else 525 | let b = Bytes.of_string s in 526 | with_buf b i (i + 1) 527 | | Some cm when cm <> c -> 528 | let b = Bytes.of_string s in 529 | bytes_unsafe_set b i cm; 530 | with_buf b (i + 1) (i + 1) 531 | | Some _ -> 532 | try_no_alloc (i + 1) 533 | in 534 | try_no_alloc 0 535 | 536 | let map f s = 537 | let max_idx = length s - 1 in 538 | let rec with_buf b i = 539 | if i > max_idx then bytes_unsafe_to_string b else 540 | (bytes_unsafe_set b i (f (unsafe_get s i)); with_buf b (i + 1)) 541 | in 542 | let rec try_no_alloc i = 543 | if i > max_idx then s else 544 | let c = unsafe_get s i in 545 | match f c with 546 | | cm when cm <> c -> 547 | let b = Bytes.of_string s in 548 | bytes_unsafe_set b i cm; 549 | with_buf b (i + 1) 550 | | _ -> 551 | try_no_alloc (i + 1) 552 | in 553 | try_no_alloc 0 554 | 555 | let mapi f s = 556 | let max_idx = length s - 1 in 557 | let rec with_buf b i = 558 | if i > max_idx then bytes_unsafe_to_string b else 559 | (bytes_unsafe_set b i (f i (unsafe_get s i)); with_buf b (i + 1)) 560 | in 561 | let rec try_no_alloc i = 562 | if i > max_idx then s else 563 | let c = unsafe_get s i in 564 | match f i c with 565 | | cm when cm <> c -> 566 | let b = Bytes.of_string s in 567 | bytes_unsafe_set b i cm; 568 | with_buf b (i + 1) 569 | | _ -> 570 | try_no_alloc (i + 1) 571 | in 572 | try_no_alloc 0 573 | 574 | let fold_left f acc s = 575 | Astring_base.fold_left f acc s ~first:0 ~last:(length s - 1) 576 | 577 | let fold_right f s acc = 578 | Astring_base.fold_right f s acc ~first:0 ~last:(length s - 1) 579 | 580 | let iter f s = for i = 0 to length s - 1 do f (unsafe_get s i) done 581 | let iteri f s = for i = 0 to length s - 1 do f i (unsafe_get s i) done 582 | 583 | (* Strings as US-ASCII code point sequences *) 584 | 585 | module Ascii = struct 586 | 587 | let is_valid s = 588 | let max_idx = length s - 1 in 589 | let rec loop i = 590 | if i > max_idx then true else 591 | if unsafe_get s i > Astring_char.Ascii.max_ascii then false else 592 | loop (i + 1) 593 | in 594 | loop 0 595 | 596 | (* Casing transforms *) 597 | 598 | let caseify is_not_case to_case s = 599 | let max_idx = length s - 1 in 600 | let caseify b i = 601 | for k = i to max_idx do 602 | bytes_unsafe_set b k (to_case (unsafe_get s k)) 603 | done; 604 | bytes_unsafe_to_string b 605 | in 606 | let rec try_no_alloc i = 607 | if i > max_idx then s else 608 | if is_not_case (unsafe_get s i) then caseify (Bytes.of_string s) i else 609 | try_no_alloc (i + 1) 610 | in 611 | try_no_alloc 0 612 | 613 | let uppercase s = 614 | caseify Astring_char.Ascii.is_lower Astring_char.Ascii.uppercase s 615 | 616 | let lowercase s = 617 | caseify Astring_char.Ascii.is_upper Astring_char.Ascii.lowercase s 618 | 619 | let caseify_first is_not_case to_case s = 620 | if length s = 0 then s else 621 | let c = unsafe_get s 0 in 622 | if not (is_not_case c) then s else 623 | let b = Bytes.of_string s in 624 | bytes_unsafe_set b 0 (to_case c); 625 | bytes_unsafe_to_string b 626 | 627 | let capitalize s = 628 | caseify_first Astring_char.Ascii.is_lower Astring_char.Ascii.uppercase s 629 | 630 | let uncapitalize s = 631 | caseify_first Astring_char.Ascii.is_upper Astring_char.Ascii.lowercase s 632 | 633 | (* Escape *) 634 | 635 | let escape = Astring_escape.escape 636 | let unescape = Astring_escape.unescape 637 | let escape_string = Astring_escape.escape_string 638 | let unescape_string = Astring_escape.unescape_string 639 | end 640 | 641 | (* Pretty printing *) 642 | 643 | let pp = Format.pp_print_string 644 | let dump ppf s = 645 | Format.pp_print_char ppf '"'; 646 | Format.pp_print_string ppf (Ascii.escape_string s); 647 | Format.pp_print_char ppf '"'; 648 | () 649 | 650 | (* String sets and maps *) 651 | 652 | module Set = struct 653 | include Set.Make (String) 654 | 655 | let pp ?sep:(pp_sep = Format.pp_print_cut) pp_elt ppf ss = 656 | let pp_elt elt is_first = 657 | if is_first then () else pp_sep ppf (); 658 | pp_elt ppf elt; false 659 | in 660 | ignore (fold pp_elt ss true) 661 | 662 | let dump_str = dump 663 | let dump ppf ss = 664 | let pp_elt elt is_first = 665 | if is_first then () else Format.fprintf ppf "@ "; 666 | Format.fprintf ppf "%a" dump_str elt; 667 | false 668 | in 669 | Format.fprintf ppf "@[<1>{"; 670 | ignore (fold pp_elt ss true); 671 | Format.fprintf ppf "}@]"; 672 | () 673 | 674 | let err_empty () = invalid_arg "empty set" 675 | let err_absent s ss = 676 | invalid_arg (strf "%a not in set %a" dump_str s dump ss) 677 | 678 | let get_min_elt ss = try min_elt ss with Not_found -> err_empty () 679 | let min_elt ss = try Some (min_elt ss) with Not_found -> None 680 | 681 | let get_max_elt ss = try max_elt ss with Not_found -> err_empty () 682 | let max_elt ss = try Some (max_elt ss) with Not_found -> None 683 | 684 | let get_any_elt ss = try choose ss with Not_found -> err_empty () 685 | let choose ss = try Some (choose ss) with Not_found -> None 686 | 687 | let get s ss = try find s ss with Not_found -> err_absent s ss 688 | let find s ss = try Some (find s ss) with Not_found -> None 689 | 690 | let of_list = List.fold_left (fun acc s -> add s acc) empty 691 | 692 | let of_stdlib_set s = s 693 | let to_stdlib_set s = s 694 | end 695 | 696 | module Map = struct 697 | include Map.Make (String) 698 | 699 | let err_empty () = invalid_arg "empty map" 700 | let err_absent s = invalid_arg (strf "%a is not bound in map" dump s) 701 | 702 | let get_min_binding m = try min_binding m with Not_found -> err_empty () 703 | let min_binding m = try Some (min_binding m) with Not_found -> None 704 | 705 | let get_max_binding m = try max_binding m with Not_found -> err_empty () 706 | let max_binding m = try Some (max_binding m) with Not_found -> None 707 | 708 | let get_any_binding m = try choose m with Not_found -> err_empty () 709 | let choose m = try Some (choose m) with Not_found -> None 710 | 711 | let get k s = try find k s with Not_found -> err_absent k 712 | let find k m = try Some (find k m) with Not_found -> None 713 | 714 | let dom m = fold (fun k _ acc -> Set.add k acc) m Set.empty 715 | 716 | let of_list bs = List.fold_left (fun m (k,v) -> add k v m) empty bs 717 | 718 | let of_stdlib_map m = m 719 | let to_stdlib_map m = m 720 | 721 | let pp ?sep:(pp_sep = Format.pp_print_cut) pp_binding ppf (m : 'a t) = 722 | let pp_binding k v is_first = 723 | if is_first then () else pp_sep ppf (); 724 | pp_binding ppf (k, v); false 725 | in 726 | ignore (fold pp_binding m true) 727 | 728 | let dump_str = dump 729 | let dump pp_v ppf m = 730 | let pp_binding k v is_first = 731 | if is_first then () else Format.fprintf ppf "@ "; 732 | Format.fprintf ppf "@[<1>(@[%a@],@ @[%a@])@]" dump k pp_v v; 733 | false 734 | in 735 | Format.fprintf ppf "@[<1>{"; 736 | ignore (fold pp_binding m true); 737 | Format.fprintf ppf "}@]"; 738 | () 739 | 740 | let dump_string_map ppf m = dump dump_str ppf m 741 | end 742 | 743 | type set = Set.t 744 | type 'a map = 'a Map.t 745 | 746 | (* Uniqueness *) 747 | 748 | let uniquify ss = 749 | let add (seen, ss as acc) v = 750 | if Set.mem v seen then acc else (Set.add v seen, v :: ss) 751 | in 752 | List.rev (snd (List.fold_left add (Set.empty, []) ss)) 753 | 754 | (* OCaml base type conversions *) 755 | 756 | let of_char = Astring_base.of_char 757 | let to_char = Astring_base.to_char 758 | let of_bool = Astring_base.of_bool 759 | let to_bool = Astring_base.to_bool 760 | let of_int = Astring_base.of_int 761 | let to_int = Astring_base.to_int 762 | let of_nativeint = Astring_base.of_nativeint 763 | let to_nativeint = Astring_base.to_nativeint 764 | let of_int32 = Astring_base.of_int32 765 | let to_int32 = Astring_base.to_int32 766 | let of_int64 = Astring_base.of_int64 767 | let to_int64 = Astring_base.to_int64 768 | let of_float = Astring_base.of_float 769 | let to_float = Astring_base.to_float 770 | 771 | (*--------------------------------------------------------------------------- 772 | Copyright (c) 2015 The astring programmers 773 | 774 | Permission to use, copy, modify, and/or distribute this software for any 775 | purpose with or without fee is hereby granted, provided that the above 776 | copyright notice and this permission notice appear in all copies. 777 | 778 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 779 | WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 780 | MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 781 | ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 782 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 783 | ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 784 | OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 785 | ---------------------------------------------------------------------------*) 786 | -------------------------------------------------------------------------------- /src/astring.mli: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2015 The astring programmers. All rights reserved. 3 | Distributed under the ISC license, see terms at the end of the file. 4 | ---------------------------------------------------------------------------*) 5 | 6 | (** Alternative [Char] and [String] modules. 7 | 8 | Open the module to use it. This defines {{!strf}one value} in your 9 | scope, redefines the [(^)] operator, the [Char] module and the [String] 10 | module. 11 | 12 | Consult the {{!diff}differences} with the OCaml 13 | {{!Stdlib.String}[String]} 14 | module, the {{!port}porting guide} and a few {{!examples}examples}. *) 15 | 16 | (** {1 String} *) 17 | 18 | val strf : ('a, Format.formatter, unit, string) format4 -> 'a 19 | (** [strf] is {!Format.asprintf}. *) 20 | 21 | val ( ^ ) : string -> string -> string 22 | (** [s ^ s'] is {!val:String.append}. *) 23 | 24 | (** Characters (bytes in fact). *) 25 | module Char : sig 26 | 27 | (** {1 Bytes} *) 28 | 29 | type t = char 30 | (** The type for bytes. *) 31 | 32 | val of_byte : int -> char 33 | (** [of_byte b] is a byte from [b]. 34 | 35 | @raise Invalid_argument if [b] is not in the range \[[0x00];[0xFF]\]. *) 36 | 37 | (**/**) 38 | val unsafe_of_byte : int -> char 39 | (**/**) 40 | 41 | val of_int : int -> char option 42 | (** [of_int b] is a byte from [b]. [None] is returned if [b] is not in the 43 | range \[[0x00];[0xFF]\]. *) 44 | 45 | val to_int : char -> int 46 | (** [to_int b] is the byte [b] as an integer. *) 47 | 48 | val hash : char -> int 49 | (** [hash] is {!Hashtbl.hash}. *) 50 | 51 | (** {1:pred Predicates} *) 52 | 53 | val equal : char -> char -> bool 54 | (** [equal b b'] is [b = b']. *) 55 | 56 | val compare : char -> char -> int 57 | (** [compare b b'] is {!Stdlib.compare}[ b b']. *) 58 | 59 | (** {1 Bytes as US-ASCII characters} *) 60 | 61 | (** US-ASCII character support 62 | 63 | The following functions act only on US-ASCII code points, that 64 | is on the bytes in range \[[0x00];[0x7F]\]. The functions can 65 | be safely used on UTF-8 encoded strings, they will of course 66 | only deal with US-ASCII related matters. 67 | 68 | {b References.} 69 | {ul 70 | {- Vint Cerf. 71 | {{:http://tools.ietf.org/html/rfc20} 72 | {e ASCII format for Network Interchange}}. RFC 20, 1969.}} *) 73 | module Ascii : sig 74 | 75 | (** {1 Predicates} *) 76 | 77 | val is_valid : char -> bool 78 | (** [is_valid c] is [true] iff [c] is an US-ASCII character, 79 | that is a byte in the range \[[0x00];[0x7F]\]. *) 80 | 81 | val is_digit : char -> bool 82 | (** [is_digit c] is [true] iff [c] is an US-ASCII digit 83 | ['0'] ... ['9'], that is a byte in the range \[[0x30];[0x39]\]. *) 84 | 85 | val is_hex_digit : char -> bool 86 | (** [is_hex_digit c] is [true] iff [c] is an US-ASCII hexadecimal 87 | digit ['0'] ... ['9'], ['a'] ... ['f'], ['A'] ... ['F'], 88 | that is a byte in one of the ranges \[[0x30];[0x39]\], 89 | \[[0x41];[0x46]\], \[[0x61];[0x66]\]. *) 90 | 91 | val is_upper : char -> bool 92 | (** [is_upper c] is [true] iff [c] is an US-ASCII uppercase 93 | letter ['A'] ... ['Z'], that is a byte in the range 94 | \[[0x41];[0x5A]\]. *) 95 | 96 | val is_lower : char -> bool 97 | (** [is_lower c] is [true] iff [c] is an US-ASCII lowercase 98 | letter ['a'] ... ['z'], that is a byte in the range 99 | \[[0x61];[0x7A]\]. *) 100 | 101 | val is_letter : char -> bool 102 | (** [is_letter c] is [is_lower c || is_upper c]. *) 103 | 104 | val is_alphanum : char -> bool 105 | (** [is_alphanum c] is [is_letter c || is_digit c]. *) 106 | 107 | val is_white : char -> bool 108 | (** [is_white c] is [true] iff [c] is an US-ASCII white space 109 | character, that is one of space [' '] ([0x20]), tab ['\t'] 110 | ([0x09]), newline ['\n'] ([0x0A]), vertical tab ([0x0B]), form 111 | feed ([0x0C]), carriage return ['\r'] ([0x0D]). *) 112 | 113 | val is_blank : char -> bool 114 | (** [is_blank c] is [true] iff [c] is an US-ASCII blank character, 115 | that is either space [' '] ([0x20]) or tab ['\t'] ([0x09]). *) 116 | 117 | val is_graphic : char -> bool 118 | (** [is_graphic c] is [true] iff [c] is an US-ASCII graphic 119 | character that is a byte in the range \[[0x21];[0x7E]\]. *) 120 | 121 | val is_print : char -> bool 122 | (** [is_print c] is [is_graphic c || c = ' ']. *) 123 | 124 | val is_control : char -> bool 125 | (** [is_control c] is [true] iff [c] is an US-ASCII control character, 126 | that is a byte in the range \[[0x00];[0x1F]\] or [0x7F]. *) 127 | 128 | (** {1 Casing transforms} *) 129 | 130 | val uppercase : char -> char 131 | (** [uppercase c] is [c] with US-ASCII characters ['a'] to ['z'] mapped 132 | to ['A'] to ['Z']. *) 133 | 134 | val lowercase : char -> char 135 | (** [lowercase c] is [c] with US-ASCII characters ['A'] to ['Z'] mapped 136 | to ['a'] to ['z']. *) 137 | 138 | (** {1 Escaping to printable US-ASCII} *) 139 | 140 | val escape : char -> string 141 | (** [escape c] escapes [c] with: 142 | {ul 143 | {- ['\\'] ([0x5C]) escaped to the sequence ["\\\\"] ([0x5C],[0x5C]).} 144 | {- Any byte in the ranges \[[0x00];[0x1F]\] and 145 | \[[0x7F];[0xFF]\] escaped by an {e hexadecimal} ["\xHH"] 146 | escape with [H] a capital hexadecimal number. These bytes 147 | are the US-ASCII control characters and non US-ASCII bytes.} 148 | {- Any other byte is left unchanged.}} 149 | 150 | Use {!String.Ascii.unescape} to unescape. *) 151 | 152 | val escape_char : char -> string 153 | (** [escape_char c] is like {!escape} except is escapes [s] according 154 | to OCaml's lexical conventions for characters with: 155 | {ul 156 | {- ['\b'] ([0x08]) escaped to the sequence ["\\b"] ([0x5C,0x62]).} 157 | {- ['\t'] ([0x09]) escaped to the sequence ["\\t"] ([0x5C,0x74]).} 158 | {- ['\n'] ([0x0A]) escaped to the sequence ["\\n"] ([0x5C,0x6E]).} 159 | {- ['\r'] ([0x0D]) escaped to the sequence ["\\r"] ([0x5C,0x72]).} 160 | {- ['\\''] ([0x27]) escaped to the sequence ["\\'"] ([0x5C,0x27]).} 161 | {- Other bytes follow the rules of {!escape}}} 162 | 163 | Use {!String.Ascii.unescape_string} to unescape. *) 164 | end 165 | 166 | (** {1:pp Pretty printing} *) 167 | 168 | val pp : Format.formatter -> char -> unit 169 | (** [pp ppf c] prints [c] on [ppf]. *) 170 | 171 | val dump : Format.formatter -> char -> unit 172 | (** [dump ppf c] prints [c] as a syntactically valid OCaml 173 | char on [ppf] using {!Ascii.escape_char} *) 174 | end 175 | 176 | (** Strings, {{!Sub}substrings}, string {{!Set}sets} and {{!Map}maps}. 177 | 178 | A string [s] of length [l] is a zero-based indexed sequence of [l] 179 | bytes. An index [i] of [s] is an integer in the range 180 | \[[0];[l-1]\], it represents the [i]th byte of [s] which can be 181 | accessed using the string indexing operator [s.[i]]. 182 | 183 | {b Important.} OCaml's [string]s became immutable since 4.02. 184 | Whenever possible compile your code with the [-safe-string] 185 | option. This module does not expose any mutable operation on 186 | strings and {b assumes} strings are immutable. See the 187 | {{!port}porting guide}. *) 188 | module String : sig 189 | 190 | (** {1 String} *) 191 | 192 | type t = string 193 | (** The type for strings. Finite sequences of immutable bytes. *) 194 | 195 | val empty : string 196 | (** [empty] is an empty string. *) 197 | 198 | val v : len:int -> (int -> char) -> string 199 | (** [v len f] is a string [s] of length [len] with [s.[i] = f 200 | i] for all indices [i] of [s]. [f] is invoked 201 | in increasing index order. 202 | 203 | @raise Invalid_argument if [len] is not in the range \[[0]; 204 | {!Sys.max_string_length}\]. *) 205 | 206 | val length : string -> int 207 | (** [length s] is the number of bytes in [s]. *) 208 | 209 | val get : string -> int -> char 210 | (** [get s i] is the byte of [s]' at index [i]. This is 211 | equivalent to the [s.[i]] notation. 212 | 213 | @raise Invalid_argument if [i] is not an index of [s]. *) 214 | 215 | val get_byte : string -> int -> int 216 | (** [get_byte s i] is [Char.to_int (get s i)] *) 217 | 218 | (**/**) 219 | val unsafe_get : string -> int -> char 220 | val unsafe_get_byte : string -> int -> int 221 | (**/**) 222 | 223 | val head : ?rev:bool -> string -> char option 224 | (** [head s] is [Some (get s h)] with [h = 0] if [rev = false] (default) or 225 | [h = length s - 1] if [rev = true]. [None] is returned if [s] is 226 | empty. *) 227 | 228 | val get_head : ?rev:bool -> string -> char 229 | (** [get_head s] is like {!head} but @raise Invalid_argument if [s] 230 | is empty. *) 231 | 232 | val hash : string -> int 233 | (** [hash s] is {!Hashtbl.hash}[ s]. *) 234 | 235 | (** {1:append Appending strings} *) 236 | 237 | val append : string -> string -> string 238 | (** [append s s'] appends [s'] to [s]. This is equivalent to 239 | [s ^ s']. 240 | 241 | @raise Invalid_argument if the result is longer than 242 | {!Sys.max_string_length}. *) 243 | 244 | val concat : ?sep:string -> string list -> string 245 | (** [concat ~sep ss] concatenates the list of strings [ss], separating 246 | each consecutive elements in the list [ss] with [sep] (defaults to 247 | {!empty}). 248 | 249 | @raise Invalid_argument if the result is longer than 250 | {!Sys.max_string_length}. *) 251 | 252 | (** {1 Predicates} *) 253 | 254 | val is_empty : string -> bool 255 | (** [is_empty s] is [length s = 0]. *) 256 | 257 | val is_prefix : affix:string -> string -> bool 258 | (** [is_prefix ~affix s] is [true] iff [affix.[i] = s.[i]] for 259 | all indices [i] of [affix]. *) 260 | 261 | val is_infix : affix:string -> string -> bool 262 | (** [is_infix ~affix s] is [true] iff there exists an index [j] in [s] such 263 | that for all indices [i] of [affix] we have [affix.[i] = s.[j + i]]. *) 264 | 265 | val is_suffix : affix:string -> string -> bool 266 | (** [is_suffix ~affix s] is true iff [affix.[n - i] = s.[m - i]] for all 267 | indices [i] of [affix] with [n = String.length affix - 1] and [m = 268 | String.length s - 1]. *) 269 | 270 | val for_all : (char -> bool) -> string -> bool 271 | (** [for_all p s] is [true] iff for all indices [i] of [s], [p s.[i] 272 | = true]. *) 273 | 274 | val exists : (char -> bool) -> string -> bool 275 | (** [exists p s] is [true] iff there exists an index [i] of [s] with 276 | [p s.[i] = true]. *) 277 | 278 | val equal : string -> string -> bool 279 | (** [equal s s'] is [s = s']. *) 280 | 281 | val compare : string -> string -> int 282 | (** [compare s s'] is [Stdlib.compare s s'], it compares the 283 | byte sequences of [s] and [s'] in lexicographical order. *) 284 | 285 | (** {1:extract Extracting substrings} 286 | 287 | {b Tip.} These functions extract substrings as new strings. Using 288 | {{!Sub}substrings} may be less wasteful and more flexible. *) 289 | 290 | val with_range : ?first:int -> ?len:int -> string -> string 291 | (** [with_range ~first ~len s] are the consecutive bytes of [s] whose 292 | indices exist in the range \[[first];[first + len - 1]\]. 293 | 294 | [first] defaults to [0] and [len] to [max_int]. Note that 295 | [first] can be any integer and [len] any positive integer. 296 | 297 | @raise Invalid_argument if [len] is negative. *) 298 | 299 | val with_index_range : ?first:int -> ?last:int -> string -> string 300 | (** [with_index_range ~first ~last s] are the consecutive bytes of 301 | [s] whose indices exist in the range \[[first];[last]\]. 302 | 303 | [first] defaults to [0] and [last] to [String.length s - 1]. 304 | 305 | Note that both [first] and [last] can be any integer. If 306 | [first > last] the interval is empty and the empty string 307 | is returned. *) 308 | 309 | val trim : ?drop:(char -> bool) -> string -> string 310 | (** [trim ~drop s] is [s] with prefix and suffix bytes satisfying 311 | [drop] in [s] removed. [drop] defaults to {!Char.Ascii.is_white}. *) 312 | 313 | val span : ?rev:bool -> ?min:int -> ?max:int -> ?sat:(char -> bool) -> 314 | string -> (string * string) 315 | (** [span ~rev ~min ~max ~sat s] is [(l, r)] where: 316 | {ul 317 | {- if [rev] is [false] (default), [l] is at least [min] 318 | and at most [max] consecutive [sat] satisfying initial bytes of 319 | [s] or {!empty} if there are no such bytes. [r] are the remaining 320 | bytes of [s].} 321 | {- if [rev] is [true], [r] is at least [min] and at most [max] 322 | consecutive [sat] satisfying final bytes of [s] or {!empty} 323 | if there are no such bytes. [l] are the remaining 324 | the bytes of [s].}} 325 | If [max] is unspecified the span is unlimited. If [min] 326 | is unspecified it defaults to [0]. If [min > max] the condition 327 | can't be satisfied and the left or right span, depending on [rev], is 328 | always empty. [sat] defaults to [(fun _ -> true)]. 329 | 330 | The invariant [l ^ r = s] holds. 331 | 332 | @raise Invalid_argument if [max] or [min] is negative. *) 333 | 334 | val take : ?rev:bool -> ?min:int -> ?max:int -> ?sat:(char -> bool) -> 335 | string -> string 336 | (** [take ~rev ~min ~max ~sat s] is the matching span of {!span} without 337 | the remaining one. In other words: 338 | {[(if rev then snd else fst) @@ span ~rev ~min ~max ~sat s]} *) 339 | 340 | val drop : ?rev:bool -> ?min:int -> ?max:int -> ?sat:(char -> bool) -> 341 | string -> string 342 | (** [drop ~rev ~min ~max ~sat s] is the remaining span of {!span} without 343 | the matching span. In other words: 344 | {[(if rev then fst else snd) @@ span ~rev ~min ~max ~sat s]} *) 345 | 346 | val cut : ?rev:bool -> sep:string -> string -> (string * string) option 347 | (** [cut ~sep s] is either the pair [Some (l,r)] of the two 348 | (possibly empty) substrings of [s] that are delimited by the 349 | first match of the non empty separator string [sep] or [None] if 350 | [sep] can't be matched in [s]. Matching starts from the 351 | beginning of [s] ([rev] is [false], default) or the end ([rev] 352 | is [true]). 353 | 354 | The invariant [l ^ sep ^ r = s] holds. 355 | 356 | @raise Invalid_argument if [sep] is the empty string. *) 357 | 358 | val cuts : ?rev:bool -> ?empty:bool -> sep:string -> string -> string list 359 | (** [cuts sep s] is the list of all substrings of [s] that are 360 | delimited by matches of the non empty separator string 361 | [sep]. Empty substrings are omitted in the list if [empty] is 362 | [false] (defaults to [true]). 363 | 364 | Matching separators in [s] starts from the beginning of [s] 365 | ([rev] is [false], default) or the end ([rev] is [true]). Once 366 | one is found, the separator is skipped and matching starts 367 | again, that is separator matches can't overlap. If there is no 368 | separator match in [s], the list [[s]] is returned. 369 | 370 | The following invariants hold: 371 | {ul 372 | {- [concat ~sep (cuts ~empty:true ~sep s) = s]} 373 | {- [cuts ~empty:true ~sep s <> []]}} 374 | 375 | @raise Invalid_argument if [sep] is the empty string. *) 376 | 377 | val fields : ?empty:bool -> ?is_sep:(char -> bool) -> string -> string list 378 | (** [fields ~empty ~is_sep s] is the list of (possibly empty) 379 | substrings that are delimited by bytes for which [is_sep] is 380 | [true]. Empty substrings are omitted in the list if [empty] is 381 | [false] (defaults to [true]). [is_sep] defaults to 382 | {!Char.Ascii.is_white}. *) 383 | 384 | (** {1:subs Substrings} *) 385 | 386 | type sub 387 | (** The type for {{!Sub}substrings}. *) 388 | 389 | val sub : ?start:int -> ?stop:int -> string -> sub 390 | (** [sub] is {!Sub.v}. *) 391 | 392 | val sub_with_range : ?first:int -> ?len:int -> string -> sub 393 | (** [sub_with_range] is like {!with_range} but returns a substring 394 | value. If [first] is smaller than [0] the empty string at the start 395 | of [s] is returned. If [first] is greater than the last index of [s] 396 | the empty string at the end of [s] is returned. *) 397 | 398 | val sub_with_index_range : ?first:int -> ?last:int -> string -> sub 399 | (** [sub_with_index_range] is like {!with_index_range} but returns 400 | a substring value. If [first] and [last] are smaller than [0] 401 | the empty string at the start of [s] is returned. If [first] and 402 | is greater than the last index of [s] the empty string at 403 | the end of [s] is returned. If [first > last] and [first] is an 404 | index of [s] the empty string at [first] is returned. *) 405 | 406 | (** Substrings. 407 | 408 | A substring defines a possibly empty subsequence of bytes in 409 | a {e base} string. 410 | 411 | The positions of a string [s] of length [l] are the slits found 412 | before each byte and after the last byte of the string. They 413 | are labelled from left to right by increasing number in the 414 | range \[[0];[l]\]. 415 | {v 416 | positions 0 1 2 3 4 l-1 l 417 | +---+---+---+---+ +-----+ 418 | indices | 0 | 1 | 2 | 3 | ... | l-1 | 419 | +---+---+---+---+ +-----+ 420 | v} 421 | 422 | The [i]th byte index is between positions [i] and [i+1]. 423 | 424 | Formally we define a substring of [s] as being a subsequence 425 | of bytes defined by a {e start} and a {e stop} position. The 426 | former is always smaller or equal to the latter. When both 427 | positions are equal the substring is {e empty}. Note that for a 428 | given base string there are as many empty substrings as there 429 | are positions in the string. 430 | 431 | Like in strings, we index the bytes of a substring using 432 | zero-based indices. 433 | 434 | See how to {{!examples}use} substrings to parse data. *) 435 | module Sub : sig 436 | 437 | (** {1 Substrings} *) 438 | 439 | type t = sub 440 | (** The type for substrings. *) 441 | 442 | val empty : sub 443 | (** [empty] is the empty substring of the empty string {!String.empty}. *) 444 | 445 | val v : ?start:int -> ?stop:int -> string -> sub 446 | (** [v ~start ~stop s] is the substring of [s] that starts 447 | at position [start] (defaults to [0]) and stops at position 448 | [stop] (defaults to [String.length s]). 449 | 450 | @raise Invalid_argument if [start] or [stop] are not positions of 451 | [s] or if [stop < start]. *) 452 | 453 | val start_pos : sub -> int 454 | (** [start_pos s] is [s]'s start position in the base string. *) 455 | 456 | val stop_pos : sub -> int 457 | (** [stop_pos s] is [s]'s stop position in the base string. *) 458 | 459 | val base_string : sub -> string 460 | (** [base_string s] is [s]'s base string. *) 461 | 462 | val length : sub -> int 463 | (** [length s] is the number of bytes in [s]. *) 464 | 465 | val get : sub -> int -> char 466 | (** [get s i] is the byte of [s] at its zero-based index [i]. 467 | 468 | @raise Invalid_argument if [i] is not an index of [s]. *) 469 | 470 | val get_byte : sub -> int -> int 471 | (** [get_byte s i] is [Char.to_int (get s i)]. *) 472 | 473 | (**/**) 474 | val unsafe_get : sub -> int -> char 475 | val unsafe_get_byte : sub -> int -> int 476 | (**/**) 477 | 478 | val head : ?rev:bool -> sub -> char option 479 | (** [head s] is [Some (get s h)] with [h = 0] if [rev = false] (default) or 480 | [h = length s - 1] if [rev = true]. [None] is returned if [s] is 481 | empty. *) 482 | 483 | val get_head : ?rev:bool -> sub -> char 484 | (** [get_head s] is like {!head} but @raise Invalid_argument if [s] 485 | is empty. *) 486 | 487 | val of_string : string -> sub 488 | (** [of_string s] is [v s] *) 489 | 490 | val to_string : sub -> string 491 | (** [to_string s] is the bytes of [s] as a string. *) 492 | 493 | val rebase : sub -> sub 494 | (** [rebase s] is [v (to_string s)]. This puts [s] on a base 495 | string made solely of its bytes. *) 496 | 497 | val hash : sub -> int 498 | (** [hash s] is {!Hashtbl.hash s}. *) 499 | 500 | (** {1:stretch Stretching substrings} 501 | 502 | See the {{!fig}graphical guide}. *) 503 | 504 | val start : sub -> sub 505 | (** [start s] is the empty substring at the start position of [s]. *) 506 | 507 | val stop : sub -> sub 508 | (** [stop s] is the empty substring at the stop position of [s]. *) 509 | 510 | val base : sub -> sub 511 | (** [base s] is a substring that spans the whole base string of [s]. *) 512 | 513 | val tail : ?rev:bool -> sub -> sub 514 | (** [tail s] is [s] without its first ([rev] is [false], default) 515 | or last ([rev] is [true]) byte or [s] if it is empty. *) 516 | 517 | val extend : ?rev:bool -> ?max:int -> ?sat:(char -> bool) -> sub -> sub 518 | (** [extend ~rev ~max ~sat s] extends [s] by at most [max] 519 | consecutive [sat] satisfiying bytes of the base string located 520 | after [stop s] ([rev] is [false], default) or before [start s] 521 | ([rev] is [true]). If [max] is unspecified the extension is 522 | limited by the extents of the base string of [s]. [sat] 523 | defaults to [fun _ -> true]. 524 | 525 | @raise Invalid_argument if [max] is negative. *) 526 | 527 | val reduce : ?rev:bool -> ?max:int -> ?sat:(char -> bool) -> sub -> sub 528 | (** [reduce ~rev ~max ~sat s] reduces [s] by at most [max] 529 | consecutive [sat] satisfying bytes of [s] located before [stop 530 | s] ([rev] is [false], default) or after [start s] ([rev] is 531 | [true]). If [max] is unspecified the reduction is limited by 532 | the extents of the substring [s]. [sat] defaults to [fun _ -> 533 | true]. 534 | 535 | @raise Invalid_argument if [max] is negative. *) 536 | 537 | val extent : sub -> sub -> sub 538 | (** [extent s s'] is the smallest substring that includes all the 539 | positions of [s] and [s']. 540 | 541 | @raise Invalid_argument if [s] and [s'] are not on the same base 542 | string according to physical equality. *) 543 | 544 | val overlap : sub -> sub -> sub option 545 | (** [overlap s s'] is the smallest substring that includes all the 546 | positions common to [s] and [s'] or [None] if there are no 547 | such positions. Note that the overlap substring may be empty. 548 | 549 | @raise Invalid_argument if [s] and [s'] are not on the same base 550 | string according to physical equality. *) 551 | 552 | (** {1:append Appending substrings} *) 553 | 554 | val append : sub -> sub -> sub 555 | (** [append s s'] is like {!String.append}. The substrings can be 556 | on different bases and the result is on a base string that holds 557 | exactly the appended bytes. *) 558 | 559 | val concat : ?sep:sub -> sub list -> sub 560 | (** [concat ~sep ss] is like {!String.concat}. The substrings can 561 | all be on different bases and the result is on a base string that 562 | holds exactly the concatenated bytes. *) 563 | 564 | (** {1:pred Predicates} *) 565 | 566 | val is_empty : sub -> bool 567 | (** [is_empty s] is [length s = 0]. *) 568 | 569 | val is_prefix : affix:sub -> sub -> bool 570 | (** [is_prefix] is like {!String.is_prefix}. Only bytes 571 | are compared, [affix] can be on a different base string. *) 572 | 573 | val is_infix : affix:sub -> sub -> bool 574 | (** [is_infix] is like {!String.is_infix}. Only bytes 575 | are compared, [affix] can be on a different base string. *) 576 | 577 | val is_suffix : affix:sub -> sub -> bool 578 | (** [is_suffix] is like {!String.is_suffix}. Only bytes 579 | are compared, [affix] can be on a different base string. *) 580 | 581 | val for_all : (char -> bool) -> sub -> bool 582 | (** [for_all] is like {!String.for_all} on the substring. *) 583 | 584 | val exists : (char -> bool) -> sub -> bool 585 | (** [exists] is like {!String.exists} on the substring. *) 586 | 587 | val same_base : sub -> sub -> bool 588 | (** [same_base s s'] is [true] iff the substrings [s] and [s'] 589 | have the same base string according to physical equality. *) 590 | 591 | val equal_bytes : sub -> sub -> bool 592 | (** [equal_bytes s s'] is [true] iff the substrings [s] and [s'] have 593 | exactly the same bytes. The substrings can be on a different 594 | base string. *) 595 | 596 | val compare_bytes : sub -> sub -> int 597 | (** [compare_bytes s s'] compares the bytes of [s] and [s]' in 598 | lexicographical order. The substrings can be on a different 599 | base string. *) 600 | 601 | val equal : sub -> sub -> bool 602 | (** [equal s s'] is [true] iff [s] and [s'] have the same positions. 603 | 604 | @raise Invalid_argument if [s] and [s'] are not on the same base 605 | string according to physical equality. *) 606 | 607 | val compare : sub -> sub -> int 608 | (** [compare s s'] compares the positions of [s] and [s'] in 609 | lexicographical order. 610 | 611 | @raise Invalid_argument if [s] and [s'] are not on the same base 612 | string according to physical equality. *) 613 | 614 | (** {1:extract Extracting substrings} 615 | 616 | Extracted substrings are always on the same base string as the 617 | substring [s] acted upon. *) 618 | 619 | val with_range : ?first:int -> ?len:int -> sub -> sub 620 | (** [with_range] is like {!String.sub_with_range}. The indices are the 621 | substring's zero-based ones, not those in the base string. *) 622 | 623 | val with_index_range : ?first:int -> ?last:int -> sub -> sub 624 | (** [with_index_range] is like {!String.sub_with_index_range}. The 625 | indices are the substring's zero-based ones, not those in the 626 | base string. *) 627 | 628 | val trim : ?drop:(char -> bool) -> sub -> sub 629 | (** [trim] is like {!String.trim}. If all bytes are dropped returns 630 | an empty string located in the middle of the argument. *) 631 | 632 | val span : ?rev:bool -> ?min:int -> ?max:int -> ?sat:(char -> bool) -> 633 | sub -> (sub * sub) 634 | (** [span] is like {!String.span}. For a substring [s] a left 635 | empty span is [start s] and a right empty span is [stop s]. *) 636 | 637 | val take : ?rev:bool -> ?min:int -> ?max:int -> ?sat:(char -> bool) -> 638 | sub -> sub 639 | (** [take] is like {!String.take}. *) 640 | 641 | val drop : ?rev:bool -> ?min:int -> ?max:int -> ?sat:(char -> bool) -> 642 | sub -> sub 643 | (** [drop] is like {!String.drop}. *) 644 | 645 | val cut : ?rev:bool -> sep:sub -> sub -> (sub * sub) option 646 | (** [cut] is like {!String.cut}. [sep] can be on a different base string *) 647 | 648 | val cuts : ?rev:bool -> ?empty:bool -> sep:sub -> sub -> sub list 649 | (** [cuts] is like {!String.cuts}. [sep] can be on a different base 650 | string *) 651 | 652 | val fields : ?empty:bool -> ?is_sep:(char -> bool) -> sub -> sub list 653 | (** [fields] is like {!String.fields}. *) 654 | 655 | (** {1:traverse Traversing substrings} *) 656 | 657 | val find : ?rev:bool -> (char -> bool) -> sub -> sub option 658 | (** [find ~rev sat s] is the substring of [s] (if any) that spans the 659 | first byte that satisfies [sat] in [s] after position [start s] 660 | ([rev] is [false], default) or before [stop s] ([rev] is [true]). 661 | [None] is returned if there is no matching byte in [s]. *) 662 | 663 | val find_sub :?rev:bool -> sub:sub -> sub -> sub option 664 | (** [find_sub ~rev ~sub s] is the substring of [s] (if any) that 665 | spans the first match of [sub] in [s] after position [start s] 666 | ([rev] is [false], default) or before [stop s] ([rev] is 667 | [true]). Only bytes are compared and [sub] can be on a 668 | different base string. [None] is returned if there is no match of 669 | [sub] in [s]. *) 670 | 671 | val filter : (char -> bool) -> sub -> sub 672 | (** [filter sat s] is like {!String.filter}. The result is on a 673 | base string that holds only the filtered bytes. *) 674 | 675 | val filter_map : (char -> char option) -> sub -> sub 676 | (** [filter_map f s] is like {!String.filter_map}. The result is on a 677 | base string that holds only the filtered bytes. *) 678 | 679 | val map : (char -> char) -> sub -> sub 680 | (** [map] is like {!String.map}. The result is on a base string that 681 | holds only the mapped bytes. *) 682 | 683 | val mapi : (int -> char -> char) -> sub -> sub 684 | (** [mapi] is like {!String.mapi}. The result is on a base string that 685 | holds only the mapped bytes. The indices are the substring's 686 | zero-based ones, not those in the base string. *) 687 | 688 | val fold_left : ('a -> char -> 'a) -> 'a -> sub -> 'a 689 | (** [fold_left] is like {!String.fold_left}. *) 690 | 691 | val fold_right : (char -> 'a -> 'a) -> sub -> 'a -> 'a 692 | (** [fold_right] is like {!String.fold_right}. *) 693 | 694 | val iter : (char -> unit) -> sub -> unit 695 | (** [iter] is like {!String.iter}. *) 696 | 697 | val iteri : (int -> char -> unit) -> sub -> unit 698 | (** [iteri] is like {!String.iteri}. The indices are the 699 | substring's zero-based ones, not those in the base string. *) 700 | 701 | (** {1:pp Pretty printing} *) 702 | 703 | val pp : Format.formatter -> sub -> unit 704 | (** [pp ppf s] prints [s]'s bytes on [ppf]. *) 705 | 706 | val dump : Format.formatter -> sub -> unit 707 | (** [dump ppf s] prints [s] as a syntactically valid OCaml string 708 | on [ppf] using {!Ascii.escape_string}. *) 709 | 710 | val dump_raw : Format.formatter -> sub -> unit 711 | (** [dump_raw ppf s] prints an unspecified raw internal 712 | representation of [s] on ppf. *) 713 | 714 | (** {1:convert OCaml base type conversions} *) 715 | 716 | val of_char : char -> sub 717 | (** [of_char c] is a string that contains the byte [c]. *) 718 | 719 | val to_char : sub -> char option 720 | (** [to_char s] is the single byte in [s] or [None] if there is no byte 721 | or more than one in [s]. *) 722 | 723 | val of_bool : bool -> sub 724 | (** [of_bool b] is a string representation for [b]. Relies on 725 | {!Stdlib.string_of_bool}. *) 726 | 727 | val to_bool : sub -> bool option 728 | (** [to_bool s] is a [bool] from [s], if any. Relies on 729 | {!Stdlib.bool_of_string}. *) 730 | 731 | val of_int : int -> sub 732 | (** [of_int i] is a string representation for [i]. Relies on 733 | {!Stdlib.string_of_int}. *) 734 | 735 | val to_int : sub -> int option 736 | (** [to_int] is an [int] from [s], if any. Relies on 737 | {!Stdlib.int_of_string}. *) 738 | 739 | val of_nativeint : nativeint -> sub 740 | (** [of_nativeint i] is a string representation for [i]. Relies on 741 | {!Nativeint.of_string}. *) 742 | 743 | val to_nativeint : sub -> nativeint option 744 | (** [to_nativeint] is an [nativeint] from [s], if any. Relies on 745 | {!Nativeint.to_string}. *) 746 | 747 | val of_int32 : int32 -> sub 748 | (** [of_int32 i] is a string representation for [i]. Relies on 749 | {!Int32.of_string}. *) 750 | 751 | val to_int32 : sub -> int32 option 752 | (** [to_int32] is an [int32] from [s], if any. Relies on 753 | {!Int32.to_string}. *) 754 | 755 | val of_int64 : int64 -> sub 756 | (** [of_int64 i] is a string representation for [i]. Relies on 757 | {!Int64.of_string}. *) 758 | 759 | val to_int64 : sub -> int64 option 760 | (** [to_int64] is an [int64] from [s], if any. Relies on 761 | {!Int64.to_string}. *) 762 | 763 | val of_float : float -> sub 764 | (** [of_float f] is a string representation for [f]. Relies on 765 | {!Stdlib.string_of_float}. *) 766 | 767 | val to_float : sub -> float option 768 | (** [to_float s] is a [float] from [s], if any. Relies 769 | on {!Stdlib.float_of_string}. *) 770 | 771 | (** {1:fig Substring stretching graphical guide} 772 | 773 | {v 774 | +---+---+---+---+---+---+---+---+---+---+---+ 775 | | R | e | v | o | l | t | | n | o | w | ! | 776 | +---+---+---+---+---+---+---+---+---+---+---+ 777 | |---------------| a 778 | | start a 779 | | stop a 780 | |-----------| tail a 781 | |-----------| tail ~rev:true a 782 | |-----------------------------------| extend a 783 | |-----------------------| extend ~rev:true a 784 | |-------------------------------------------| base a 785 | |-----------| b 786 | | start b 787 | | stop b 788 | |-------| tail b 789 | |-------| tail ~rev:true b 790 | |-------------------------------------------| extend b 791 | |-----------| extend ~rev:true b 792 | |-------------------------------------------| base b 793 | |-----------------------| extent a b 794 | |---| overlap a b 795 | | c 796 | | start c 797 | | stop c 798 | | tail c 799 | | tail ~rev:true c 800 | |---------------| extend c 801 | |---------------------------| extend ~rev:true c 802 | |-------------------------------------------| base c 803 | |-------------------| extent a c 804 | None overlap a c 805 | |---------------| d 806 | | start d 807 | | stop d 808 | |-----------| tail d 809 | |-----------| tail ~rev:true d 810 | |---------------| extend d 811 | |-------------------------------------------| extend ~rev:true d 812 | |-------------------------------------------| base d 813 | |---------------| extent d c 814 | | overlap d c 815 | v} *) 816 | end 817 | 818 | (** {1:traverse Traversing strings} *) 819 | 820 | val find : ?rev:bool -> ?start:int -> (char -> bool) -> string -> int option 821 | (** [find ~rev ~start sat s] is: 822 | {ul 823 | {- If [rev] is [false] (default). The smallest index [i], if any, 824 | greater or equal to [start] such that [sat s.[i]] is [true]. 825 | [start] defaults to [0].} 826 | {- If [rev] is [true]. The greatest index [i], if any, smaller or equal 827 | to [start] such that [sat s.[i]] is [true]. 828 | [start] defaults to [String.length s - 1].}} 829 | Note that [start] can be any integer. *) 830 | 831 | val find_sub :?rev:bool -> ?start:int -> sub:string -> string -> int option 832 | (** [find_sub ~rev ~start ~sub s] is: 833 | {ul 834 | {- If [rev] is [false] (default). The smallest index [i], if any, 835 | greater or equal to [start] such that [sub] can be found starting 836 | at [i] in [s] that is [s.[i] = sub.[0]], [s.[i+1] = sub.[1]], ... 837 | [start] defaults to [0].} 838 | {- If [rev] is [true]. The greatest index [i], if any, smaller 839 | or equal to [start] such that [sub] can be found starting at 840 | [i] in [s] that is [s.[i] = sub.[0]], [s.[i+1] = sub.[1]], ... 841 | [start] defaults to [String.length s - 1].}} 842 | Note that [start] can be any integer. *) 843 | 844 | val filter : (char -> bool) -> string -> string 845 | (** [filter sat s] is the string made of the bytes of [s] that satisfy [sat], 846 | in the same order. *) 847 | 848 | val filter_map : (char -> char option) -> string -> string 849 | (** [filter_map f s] is the string made of the bytes of [s] as mapped by 850 | [f], in the same order. *) 851 | 852 | val map : (char -> char) -> string -> string 853 | (** [map f s] is [s'] with [s'.[i] = f s.[i]] for all indices [i] 854 | of [s]. [f] is invoked in increasing index order. *) 855 | 856 | val mapi : (int -> char -> char) -> string -> string 857 | (** [mapi f s] is [s'] with [s'.[i] = f i s.[i]] for all indices [i] 858 | of [s]. [f] is invoked in increasing index order. *) 859 | 860 | val fold_left : ('a -> char -> 'a) -> 'a -> string -> 'a 861 | (** [fold_left f acc s] is 862 | [f (]...[(f (f acc s.[0]) s.[1])]...[) s.[m]] 863 | with [m = String.length s - 1]. *) 864 | 865 | val fold_right : (char -> 'a -> 'a) -> string -> 'a -> 'a 866 | (** [fold_right f s acc] is 867 | [f s.[0] (f s.[1] (]...[(f s.[m] acc) )]...[)] 868 | with [m = String.length s - 1]. *) 869 | 870 | val iter : (char -> unit) -> string -> unit 871 | (** [iter f s] is [f s.[0]; f s.[1];] ... 872 | [f s.[m]] with [m = String.length s - 1]. *) 873 | 874 | val iteri : (int -> char -> unit) -> string -> unit 875 | (** [iteri f s] is [f 0 s.[0]; f 1 s.[1];] ... 876 | [f m s.[m]] with [m = String.length s - 1]. *) 877 | 878 | (** {1:unique Uniqueness} *) 879 | 880 | val uniquify : string list -> string list 881 | (** [uniquify ss] is [ss] without duplicates, the list order is 882 | preserved. *) 883 | 884 | (** {1:ascii Strings as US-ASCII character sequences} *) 885 | 886 | (** US-ASCII string support. 887 | 888 | {b References.} 889 | {ul 890 | {- Vint Cerf. 891 | {{:http://tools.ietf.org/html/rfc20} 892 | {e ASCII format for Network Interchange}}. RFC 20, 1969.}} *) 893 | module Ascii : sig 894 | 895 | (** {1:pred Predicates} *) 896 | 897 | val is_valid : string -> bool 898 | (** [is_valid s] is [true] iff only for all indices [i] of [s], 899 | [s.[i]] is an US-ASCII character, i.e. a byte in the range 900 | \[[0x00];[0x7F]\]. *) 901 | 902 | (** {1:case Casing transforms} 903 | 904 | The following functions act only on US-ASCII code points that 905 | is on bytes in range \[[0x00];[0x7F]\], leaving any other byte 906 | intact. The functions can be safely used on UTF-8 encoded 907 | strings; they will of course only deal with US-ASCII 908 | casings. *) 909 | 910 | val uppercase : string -> string 911 | (** [uppercase s] is [s] with US-ASCII characters ['a'] to ['z'] mapped 912 | to ['A'] to ['Z']. *) 913 | 914 | val lowercase : string -> string 915 | (** [lowercase s] is [s] with US-ASCII characters ['A'] to ['Z'] mapped 916 | to ['a'] to ['z']. *) 917 | 918 | val capitalize : string -> string 919 | (** [capitalize s] is like {!uppercase} but performs the map only 920 | on [s.[0]]. *) 921 | 922 | val uncapitalize : string -> string 923 | (** [uncapitalize s] is like {!lowercase} but performs the map only 924 | on [s.[0]]. *) 925 | 926 | (** {1:esc Escaping to printable US-ASCII} *) 927 | 928 | val escape : string -> string 929 | (** [escape s] is [s] with: 930 | {ul 931 | {- Any ['\\'] ([0x5C]) escaped to the sequence 932 | ["\\\\"] ([0x5C],[0x5C]).} 933 | {- Any byte in the ranges \[[0x00];[0x1F]\] and 934 | \[[0x7F];[0xFF]\] escaped by an {e hexadecimal} ["\xHH"] 935 | escape with [H] a capital hexadecimal number. These bytes 936 | are the US-ASCII control characters and non US-ASCII bytes.} 937 | {- Any other byte is left unchanged.}} *) 938 | 939 | val unescape : string -> string option 940 | (** [unescape s] unescapes what {!escape} did. The letters of hex 941 | escapes can be upper, lower or mixed case, and any two letter 942 | hex escape is decoded to its corresponding byte. Any other 943 | escape not defined by {!escape} or truncated escape makes the 944 | function return [None]. 945 | 946 | The invariant [unescape (escape s) = Some s] holds. *) 947 | 948 | val escape_string : string -> string 949 | (** [escape_string s] is like {!escape} except it escapes [s] 950 | according to OCaml's lexical conventions for strings with: 951 | {ul 952 | {- Any ['\b'] ([0x08]) escaped to the sequence ["\\b"] ([0x5C,0x62]).} 953 | {- Any ['\t'] ([0x09]) escaped to the sequence ["\\t"] ([0x5C,0x74]).} 954 | {- Any ['\n'] ([0x0A]) escaped to the sequence ["\\n"] ([0x5C,0x6E]).} 955 | {- Any ['\r'] ([0x0D]) escaped to the sequence ["\\r"] ([0x5C,0x72]).} 956 | {- Any ['\"'] ([0x22]) escaped to the sequence ["\\\""] ([0x5C,0x22]).} 957 | {- Any other byte follows the rules of {!escape}}} *) 958 | 959 | val unescape_string : string -> string option 960 | (** [unescape_string] is to {!escape_string} what {!unescape} 961 | is to {!escape} and also additionally unescapes 962 | the sequence ["\\'"] ([0x5C,0x27]) to ["'"] ([0x27]). *) 963 | end 964 | 965 | (** {1:pp Pretty printing} *) 966 | 967 | val pp : Format.formatter -> string -> unit 968 | (** [pp ppf s] prints [s]'s bytes on [ppf]. *) 969 | 970 | val dump : Format.formatter -> string -> unit 971 | (** [dump ppf s] prints [s] as a syntactically valid OCaml string on 972 | [ppf] using {!Ascii.escape_string}. *) 973 | 974 | (** {1 String sets and maps} *) 975 | 976 | type set 977 | (** The type for string sets. *) 978 | 979 | (** String sets. *) 980 | module Set : sig 981 | 982 | (** {1 String sets} *) 983 | 984 | include Set.S with type elt := string 985 | and type t := set 986 | 987 | type t = set 988 | 989 | val min_elt : set -> string option 990 | (** Exception safe {!Set.S.min_elt}. *) 991 | 992 | val get_min_elt : set -> string 993 | (** [get_min_elt] is like {!min_elt} but @raise Invalid_argument 994 | on the empty set. *) 995 | 996 | val max_elt : set -> string option 997 | (** Exception safe {!Set.S.max_elt}. *) 998 | 999 | val get_max_elt : set -> string 1000 | (** [get_max_elt] is like {!max_elt} but @raise Invalid_argument 1001 | on the empty set. *) 1002 | 1003 | val choose : set -> string option 1004 | (** Exception safe {!Set.S.choose}. *) 1005 | 1006 | val get_any_elt : set -> string 1007 | (** [get_any_elt] is like {!choose} but @raise Invalid_argument on the 1008 | empty set. *) 1009 | 1010 | val find : string -> set -> string option 1011 | (** Exception safe {!Set.S.find}. *) 1012 | 1013 | val get : string -> set -> string 1014 | (** [get] is like {!Set.S.find} but @raise Invalid_argument if 1015 | [elt] is not in [s]. *) 1016 | 1017 | val of_list : string list -> set 1018 | (** [of_list ss] is a set from the list [ss]. *) 1019 | 1020 | val of_stdlib_set : Set.Make(String).t -> set 1021 | (** [of_stdlib_set s] is a set from the stdlib-compatible set [s]. *) 1022 | 1023 | val to_stdlib_set : set -> Set.Make(String).t 1024 | (** [to_stdlib_set s] is the stdlib-compatible set equivalent to [s]. *) 1025 | 1026 | val pp : ?sep:(Format.formatter -> unit -> unit) -> 1027 | (Format.formatter -> string -> unit) -> 1028 | Format.formatter -> set -> unit 1029 | (** [pp ~sep pp_elt ppf ss] formats the elements of [ss] on 1030 | [ppf]. Each element is formatted with [pp_elt] and elements 1031 | are separated by [~sep] (defaults to 1032 | {!Format.pp_print_cut}. If the set is empty leaves [ppf] 1033 | untouched. *) 1034 | 1035 | val dump : Format.formatter -> set -> unit 1036 | (** [dump ppf ss] prints an unspecified representation of [ss] on 1037 | [ppf]. *) 1038 | end 1039 | 1040 | (** String maps. *) 1041 | module Map : sig 1042 | 1043 | (** {1 String maps} *) 1044 | 1045 | include Map.S with type key := string 1046 | 1047 | val min_binding : 'a t -> (string * 'a) option 1048 | (** Exception safe {!Map.S.min_binding}. *) 1049 | 1050 | val get_min_binding : 'a t -> (string * 'a) 1051 | (** [get_min_binding] is like {!min_binding} but @raise Invalid_argument 1052 | on the empty map. *) 1053 | 1054 | val max_binding : 'a t -> (string * 'a) option 1055 | (** Exception safe {!Map.S.max_binding}. *) 1056 | 1057 | val get_max_binding : 'a t -> string * 'a 1058 | (** [get_max_binding] is like {!max_binding} but @raise Invalid_argument 1059 | on the empty map. *) 1060 | 1061 | val choose : 'a t -> (string * 'a) option 1062 | (** Exception safe {!Map.S.choose}. *) 1063 | 1064 | val get_any_binding : 'a t -> (string * 'a) 1065 | (** [get_any_binding] is like {!choose} but @raise Invalid_argument 1066 | on the empty map. *) 1067 | 1068 | val find : string -> 'a t -> 'a option 1069 | (** Exception safe {!Map.S.find}. *) 1070 | 1071 | val get : string -> 'a t -> 'a 1072 | (** [get k m] is like {!Map.S.find} but raises [Invalid_argument] if 1073 | [k] is not bound in [m]. *) 1074 | 1075 | val dom : 'a t -> set 1076 | (** [dom m] is the domain of [m]. *) 1077 | 1078 | val of_list : (string * 'a) list -> 'a t 1079 | (** [of_list bs] is [List.fold_left (fun m (k, v) -> add k v m) empty 1080 | bs]. *) 1081 | 1082 | val of_stdlib_map : 'a Map.Make(String).t -> 'a t 1083 | (** [of_stdlib_map m] is a map from the stdlib-compatible map [m]. *) 1084 | 1085 | val to_stdlib_map : 'a t -> 'a Map.Make(String).t 1086 | (** [to_stdlib_map m] is the stdlib-compatible map equivalent to [m]. *) 1087 | 1088 | val pp : ?sep:(Format.formatter -> unit -> unit) -> 1089 | (Format.formatter -> string * 'a -> unit) -> Format.formatter -> 1090 | 'a t -> unit 1091 | (** [pp ~sep pp_binding ppf m] formats the bindings of [m] on 1092 | [ppf]. Each binding is formatted with [pp_binding] and 1093 | bindings are separated by [sep] (defaults to 1094 | {!Format.pp_print_cut}). If the map is empty leaves [ppf] 1095 | untouched. *) 1096 | 1097 | val dump : (Format.formatter -> 'a -> unit) -> Format.formatter -> 1098 | 'a t -> unit 1099 | (** [dump pp_v ppf m] prints an unspecified representation of [m] on 1100 | [ppf] using [pp_v] to print the map codomain elements. *) 1101 | 1102 | val dump_string_map : Format.formatter -> string t -> unit 1103 | (** [dump_string_map ppf m] prints an unspecified representation of the 1104 | string map [m] on [ppf]. *) 1105 | end 1106 | 1107 | type +'a map = 'a Map.t 1108 | (** The type for maps from strings to values of type 'a. *) 1109 | 1110 | (** {1:convert OCaml base type conversions} *) 1111 | 1112 | val of_char : char -> string 1113 | (** [of_char c] is a string that contains the byte [c]. *) 1114 | 1115 | val to_char : string -> char option 1116 | (** [to_char s] is the single byte in [s] or [None] if there is no byte 1117 | or more than one in [s]. *) 1118 | 1119 | val of_bool : bool -> string 1120 | (** [of_bool b] is a string representation for [b]. Relies on 1121 | {!Stdlib.string_of_bool}. *) 1122 | 1123 | val to_bool : string -> bool option 1124 | (** [to_bool s] is a [bool] from [s], if any. Relies on 1125 | {!Stdlib.bool_of_string}. *) 1126 | 1127 | val of_int : int -> string 1128 | (** [of_int i] is a string representation for [i]. Relies on 1129 | {!Stdlib.string_of_int}. *) 1130 | 1131 | val to_int : string -> int option 1132 | (** [to_int] is an [int] from [s], if any. Relies on 1133 | {!Stdlib.int_of_string}. *) 1134 | 1135 | val of_nativeint : nativeint -> string 1136 | (** [of_nativeint i] is a string representation for [i]. Relies on 1137 | {!Nativeint.of_string}. *) 1138 | 1139 | val to_nativeint : string -> nativeint option 1140 | (** [to_nativeint] is an [nativeint] from [s], if any. Relies on 1141 | {!Nativeint.to_string}. *) 1142 | 1143 | val of_int32 : int32 -> string 1144 | (** [of_int32 i] is a string representation for [i]. Relies on 1145 | {!Int32.of_string}. *) 1146 | 1147 | val to_int32 : string -> int32 option 1148 | (** [to_int32] is an [int32] from [s], if any. Relies on 1149 | {!Int32.to_string}. *) 1150 | 1151 | val of_int64 : int64 -> string 1152 | (** [of_int64 i] is a string representation for [i]. Relies on 1153 | {!Int64.of_string}. *) 1154 | 1155 | val to_int64 : string -> int64 option 1156 | (** [to_int64] is an [int64] from [s], if any. Relies on 1157 | {!Int64.to_string}. *) 1158 | 1159 | val of_float : float -> string 1160 | (** [of_float f] is a string representation for [f]. Relies on 1161 | {!Stdlib.string_of_float}. *) 1162 | 1163 | val to_float : string -> float option 1164 | (** [to_float s] is a [float] from [s], if any. Relies 1165 | on {!Stdlib.float_of_string}. *) 1166 | end 1167 | 1168 | (** {1:diff Differences with the OCaml [String] module} 1169 | 1170 | First note that it is not a goal of {!Astring} to maintain 1171 | compatibility with the OCaml {{!Stdlib.String}[String]} module. 1172 | 1173 | In [Astring]: 1174 | {ul 1175 | {- Strings are assumed to be immutable.} 1176 | {- Deprecated functions are not included.} 1177 | {- Some rarely used functions are dropped, some signatures and names 1178 | are altered, a few often needed functions are added.} 1179 | {- Scanning functions are not doubled for supporting forward and 1180 | reverse directions. Both directions are supported via a single 1181 | function and an optional [rev] argument.} 1182 | {- Functions do not raise [Not_found]. They return [option] values 1183 | instead.} 1184 | {- Functions escaping bytes to printable US-ASCII characters use 1185 | capital hexadecimal escapes rather than decimal ones.} 1186 | {- US-ASCII string support is collected in the {!Char.Ascii} and 1187 | {!String.Ascii} submodules. The functions make sure to operate 1188 | only on the US-ASCII code points (rather than 1189 | {{:http://www.ecma-international.org/publications/standards/Ecma-094.htm}ISO/IEC 1190 | 8859-1} code points). This means they can safely be used on 1191 | UTF-8 encoded strings, they will of course only deal with the 1192 | US-ASCII subset U+0000 to U+007F of 1193 | {{:http://unicode.org/glossary/#unicode_scalar_value} Unicode 1194 | scalar values}.} 1195 | {- The module has pre-applied exception safe {!String.Set} 1196 | and {!String.Map} submodules.}} 1197 | 1198 | {1:port Porting guide} 1199 | 1200 | Opening [Astring] at the top of a module that uses the OCaml 1201 | standard library in a project that compiles with [-safe-string] 1202 | will either result in typing errors or compatible behaviour except 1203 | for uses of the {!String.trim} function, {{!porttrim}see below}. 1204 | 1205 | If for some reason you can't compile your project with 1206 | [-safe-string] this {b may} not be a problem. However you have to 1207 | make sure that your code does not depend on fresh strings being 1208 | returned by functions of the [String] module. The functions of 1209 | {!Astring.String} assume strings to be immutable and thus do not 1210 | always allocate fresh strings for their results. This is the case 1211 | for example for the {!( ^ )} operator redefinition: no string is 1212 | allocated whenever one of its arguments is an empty string. That 1213 | being said it is still better to first make your project compile 1214 | with [-safe-string] and then port to [Astring]. 1215 | 1216 | The 1217 | {{:http://caml.inria.fr/pub/docs/manual-ocaml/libref/String.html#VALsub}[String.sub]} function 1218 | is renamed to {!String.with_range}. If you are working with 1219 | {!String.find} you may find it easier to use 1220 | {!String.with_index_range} which takes indices as arguments and is thus 1221 | directly usable with the result of {!String.find}. But in general 1222 | index based string processing should be frowned upon and replaced 1223 | by {{!String.extract} substring extraction} combinators. 1224 | 1225 | {2:porttrim Porting [String.trim] usages} 1226 | 1227 | The standard OCaml [String.trim] function only trims the 1228 | characters [' '], ['\t'], ['\n'], ['\012'], ['\r']. In 1229 | [Astring] the {{!Char.Ascii.is_white}default set} adds 1230 | vertical tab ([0x0B]) to the set to match the behaviour of 1231 | the C [isspace(3)] function. 1232 | 1233 | If you want to preserve the behaviour of the original function you 1234 | can replace any use of [String.trim] with the following 1235 | [std_ocaml_trim] function: 1236 | {[ 1237 | let std_ocaml_trim s = 1238 | let drop = function 1239 | | ' ' | '\n' | '\012' | '\r' | '\t' -> true 1240 | | _ -> false 1241 | in 1242 | String.trim ~drop s 1243 | ]} 1244 | 1245 | {1:examples Examples} 1246 | 1247 | We show how to use {{!String.Sub}substrings} to quickly devise LL(1) 1248 | parsers. To keep it simple we do not implement precise error 1249 | report, but note that it would be easy to add it by replacing the 1250 | [raise Exit] calls by an exception with more information: we have 1251 | everything at hand at these call points to report good error 1252 | messages. 1253 | 1254 | The first example parses version numbers structured as follows: 1255 | {[ 1256 | [v|V]major.minor[.patch][(+|-)info] 1257 | ]} 1258 | an unreadable {!Str} regular expression for this would be: 1259 | {[ 1260 | "[vV]?\\([0-9]+\\)\\.\\([0-9]+\\)\\(\\.\\([0-9]+\\)\\)?\\([+-]\\(.*\\)\\)?" 1261 | ]} 1262 | Using substrings is certainly less terse but note that the parser is 1263 | made of reusable sub-functions. 1264 | {[ 1265 | let parse_version : string -> (int * int * int * string option) option = 1266 | fun s -> try 1267 | let parse_opt_v s = match String.Sub.head s with 1268 | | Some ('v'|'V') -> String.Sub.tail s 1269 | | Some _ -> s 1270 | | None -> raise Exit 1271 | in 1272 | let parse_dot s = match String.Sub.head s with 1273 | | Some '.' -> String.Sub.tail s 1274 | | Some _ | None -> raise Exit 1275 | in 1276 | let parse_int s = 1277 | match String.Sub.span ~min:1 ~sat:Char.Ascii.is_digit s with 1278 | | (i, _) when String.Sub.is_empty i -> raise Exit 1279 | | (i, s) -> 1280 | match String.Sub.to_int i with 1281 | | None -> raise Exit | Some i -> i, s 1282 | in 1283 | let maj, s = parse_int (parse_opt_v (String.sub s)) in 1284 | let min, s = parse_int (parse_dot s) in 1285 | let patch, s = match String.Sub.head s with 1286 | | Some '.' -> parse_int (parse_dot s) 1287 | | _ -> 0, s 1288 | in 1289 | let info = match String.Sub.head s with 1290 | | Some ('+' | '-') -> Some (String.Sub.(to_string (tail s))) 1291 | | Some _ -> raise Exit 1292 | | None -> None 1293 | in 1294 | Some (maj, min, patch, info) 1295 | with Exit -> None 1296 | ]} 1297 | 1298 | The second example parses space separated key-value bindings 1299 | environments of the form: 1300 | {[ 1301 | key0 = value0 key2 = value2 ...]} 1302 | To support values with spaces, values can be quoted between two 1303 | ['"'] characters. If they are quoted then any ["\\\""] subsequence 1304 | ([0x2F],[0x22]) is interpreted as the character ['"'] ([0x22]) and 1305 | ["\\\\"] ([0x2F],[0x2F]) is interpreted as the character ['\\'] 1306 | ([0x22]). 1307 | 1308 | {[ 1309 | let parse_env : string -> string String.map option = 1310 | fun s -> try 1311 | let skip_white s = String.Sub.drop ~sat:Char.Ascii.is_white s in 1312 | let parse_key s = 1313 | let id_char c = Char.Ascii.is_letter c || c = '_' in 1314 | match String.Sub.span ~min:1 ~sat:id_char s with 1315 | | (key, _) when String.Sub.is_empty key -> raise Exit 1316 | | (key, rem) -> (String.Sub.to_string key), rem 1317 | in 1318 | let parse_eq s = match String.Sub.head s with 1319 | | Some '=' -> String.Sub.tail s 1320 | | Some _ | None -> raise Exit 1321 | in 1322 | let parse_value s = match String.Sub.head s with 1323 | | Some '"' -> (* quoted *) 1324 | let is_data = function '\\' | '"' -> false | _ -> true in 1325 | let rec loop acc s = 1326 | let data, rem = String.Sub.span ~sat:is_data s in 1327 | match String.Sub.head rem with 1328 | | Some '"' -> 1329 | let acc = List.rev (data :: acc) in 1330 | String.Sub.(to_string @@ concat acc), (String.Sub.tail rem) 1331 | | Some '\\' -> 1332 | let rem = String.Sub.tail rem in 1333 | begin match String.Sub.head rem with 1334 | | Some ('"' | '\\' as c) -> 1335 | let acc = String.(sub (of_char c)) :: data :: acc in 1336 | loop acc (String.Sub.tail rem) 1337 | | Some _ | None -> raise Exit 1338 | end 1339 | | None | Some _ -> raise Exit 1340 | in 1341 | loop [] (String.Sub.tail s) 1342 | | Some _ -> 1343 | let is_data c = not (Char.Ascii.is_white c) in 1344 | let data, rem = String.Sub.span ~sat:is_data s in 1345 | String.Sub.to_string data, rem 1346 | | None -> "", s 1347 | in 1348 | let rec parse_bindings acc s = 1349 | if String.Sub.is_empty s then acc else 1350 | let key, s = parse_key s in 1351 | let value, s = s |> skip_white |> parse_eq |> skip_white |> parse_value in 1352 | parse_bindings (String.Map.add key value acc) (skip_white s) 1353 | in 1354 | Some (String.sub s |> skip_white |> parse_bindings String.Map.empty) 1355 | with Exit -> None 1356 | ]} 1357 | 1358 | *) 1359 | 1360 | (*--------------------------------------------------------------------------- 1361 | Copyright (c) 2015 The astring programmers 1362 | 1363 | Permission to use, copy, modify, and/or distribute this software for any 1364 | purpose with or without fee is hereby granted, provided that the above 1365 | copyright notice and this permission notice appear in all copies. 1366 | 1367 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 1368 | WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 1369 | MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 1370 | ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 1371 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 1372 | ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 1373 | OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 1374 | ---------------------------------------------------------------------------*) 1375 | -------------------------------------------------------------------------------- /test/test_string.ml: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2015 The astring programmers. All rights reserved. 3 | Distributed under the ISC license, see terms at the end of the file. 4 | ---------------------------------------------------------------------------*) 5 | 6 | open Testing 7 | open Astring 8 | 9 | let pp_pair ppf (a, b) = 10 | Format.fprintf ppf "@[<1>(%a,%a)@]" String.dump a String.dump b 11 | 12 | let misc = test "Misc. base functions" @@ fun () -> 13 | eq_str String.empty ""; 14 | app_invalid ~pp:pp_str (String.v ~len:(-1)) (fun i -> 'a'); 15 | app_invalid ~pp:pp_str (String.v ~len:(Sys.max_string_length + 1)) 16 | (fun i -> 'a'); 17 | eq_int (String.length "") 0; 18 | eq_int (String.length "1") 1; 19 | eq_int (String.length "12") 2; 20 | eq_char (String.get "12" 0) '1'; 21 | eq_char (String.get "12" 1) '2'; 22 | app_invalid ~pp:pp_char (String.get "12") 3; 23 | app_invalid ~pp:pp_char (String.get "12") (-1); 24 | eq_int (String.get_byte "12" 0) 0x31; 25 | eq_int (String.get_byte "12" 1) 0x32; 26 | app_invalid ~pp:pp_int (String.get_byte "12") 3; 27 | app_invalid ~pp:pp_int (String.get_byte "12") (-1); 28 | eq_str (String.v ~len:3 (fun i -> Char.of_byte (0x30 + i))) "012"; 29 | eq_str (String.v ~len:0 (fun i -> Char.of_byte (0x30 + i))) ""; 30 | () 31 | 32 | let head = test "String.[get_]head" @@ fun () -> 33 | let eq_ochar = eq_option ~eq:(=) ~pp:pp_char in 34 | eq_ochar (String.head "") None; 35 | eq_ochar (String.head ~rev:true "") None; 36 | eq_ochar (String.head "bc") (Some 'b'); 37 | eq_ochar (String.head ~rev:true "bc") (Some 'c'); 38 | eq_char (String.get_head "bc") 'b'; 39 | eq_char (String.get_head ~rev:true "bc") 'c'; 40 | app_invalid ~pp:pp_char String.get_head ""; 41 | () 42 | 43 | (* Appending strings *) 44 | 45 | let append = test "String.append" @@ fun () -> 46 | let no_allocl s s' = eq_bool (s ^ s' == s) true in 47 | let no_allocr s s' = eq_bool (s ^ s' == s') true in 48 | no_allocl String.empty String.empty; 49 | no_allocr String.empty String.empty; 50 | no_allocl "bla" ""; 51 | no_allocr "" "bli"; 52 | eq_str (String.append "a" "") "a"; 53 | eq_str (String.append "" "a") "a"; 54 | eq_str (String.append "ab" "") "ab"; 55 | eq_str (String.append "" "ab") "ab"; 56 | eq_str (String.append "ab" "cd") "abcd"; 57 | eq_str (String.append "cd" "ab") "cdab"; 58 | () 59 | 60 | let concat = test "String.concat" @@ fun () -> 61 | let no_alloc ~sep s = eq_bool (String.concat ~sep [s] == s) true in 62 | no_alloc ~sep:"" ""; 63 | no_alloc ~sep:"-" ""; 64 | no_alloc ~sep:"" "abc"; 65 | no_alloc ~sep:"-" "abc"; 66 | eq_str (String.concat ~sep:"" []) ""; 67 | eq_str (String.concat ~sep:"" [""]) ""; 68 | eq_str (String.concat ~sep:"" ["";""]) ""; 69 | eq_str (String.concat ~sep:"" ["a";"b";]) "ab"; 70 | eq_str (String.concat ~sep:"" ["a";"b";"";"c"]) "abc"; 71 | eq_str (String.concat ~sep:"-" []) ""; 72 | eq_str (String.concat ~sep:"-" [""]) ""; 73 | eq_str (String.concat ~sep:"-" ["a"]) "a"; 74 | eq_str (String.concat ~sep:"-" ["a";""]) "a-"; 75 | eq_str (String.concat ~sep:"-" ["";"a"]) "-a"; 76 | eq_str (String.concat ~sep:"-" ["";"a";""]) "-a-"; 77 | eq_str (String.concat ~sep:"-" ["a";"b";"c"]) "a-b-c"; 78 | eq_str (String.concat ~sep:"--" ["a";"b";"c"]) "a--b--c"; 79 | eq_str (String.concat ~sep:"ab" ["a";"b";"c"]) "aabbabc"; 80 | eq_str (String.concat ["a";"b";""; "c"]) "abc"; 81 | () 82 | 83 | (* Predicates *) 84 | 85 | let is_empty = test "String.is_empty" @@ fun () -> 86 | eq_bool (String.is_empty "") true; 87 | eq_bool (String.is_empty "heyho") false; 88 | () 89 | 90 | let is_prefix = test "String.is_prefix" @@ fun () -> 91 | eq_bool (String.is_prefix ~affix:"" "") true; 92 | eq_bool (String.is_prefix ~affix:"" "habla") true; 93 | eq_bool (String.is_prefix ~affix:"ha" "") false; 94 | eq_bool (String.is_prefix ~affix:"ha" "h") false; 95 | eq_bool (String.is_prefix ~affix:"ha" "ha") true; 96 | eq_bool (String.is_prefix ~affix:"ha" "hab") true; 97 | eq_bool (String.is_prefix ~affix:"ha" "habla") true; 98 | eq_bool (String.is_prefix ~affix:"ha" "abla") false; 99 | () 100 | 101 | let is_infix = test "String.is_infix" @@ fun () -> 102 | eq_bool (String.is_infix ~affix:"" "") true; 103 | eq_bool (String.is_infix ~affix:"" "habla") true; 104 | eq_bool (String.is_infix ~affix:"ha" "") false; 105 | eq_bool (String.is_infix ~affix:"ha" "h") false; 106 | eq_bool (String.is_infix ~affix:"ha" "ha") true; 107 | eq_bool (String.is_infix ~affix:"ha" "hab") true; 108 | eq_bool (String.is_infix ~affix:"ha" "hub") false; 109 | eq_bool (String.is_infix ~affix:"ha" "hubhab") true; 110 | eq_bool (String.is_infix ~affix:"ha" "hubh") false; 111 | eq_bool (String.is_infix ~affix:"ha" "hubha") true; 112 | eq_bool (String.is_infix ~affix:"ha" "hubhb") false; 113 | eq_bool (String.is_infix ~affix:"ha" "abla") false; 114 | eq_bool (String.is_infix ~affix:"ha" "ablah") false; 115 | () 116 | 117 | let is_suffix = test "String.is_suffix" @@ fun () -> 118 | eq_bool (String.is_suffix ~affix:"" "") true; 119 | eq_bool (String.is_suffix ~affix:"" "adsf") true; 120 | eq_bool (String.is_suffix ~affix:"ha" "") false; 121 | eq_bool (String.is_suffix ~affix:"ha" "a") false; 122 | eq_bool (String.is_suffix ~affix:"ha" "h") false; 123 | eq_bool (String.is_suffix ~affix:"ha" "ah") false; 124 | eq_bool (String.is_suffix ~affix:"ha" "ha") true; 125 | eq_bool (String.is_suffix ~affix:"ha" "aha") true; 126 | eq_bool (String.is_suffix ~affix:"ha" "haha") true; 127 | eq_bool (String.is_suffix ~affix:"ha" "hahb") false; 128 | () 129 | 130 | let for_all = test "String.for_all" @@ fun () -> 131 | eq_bool (String.for_all (fun _ -> false) "") true; 132 | eq_bool (String.for_all (fun _ -> true) "") true; 133 | eq_bool (String.for_all (fun c -> Char.to_int c < 0x34) "123") true; 134 | eq_bool (String.for_all (fun c -> Char.to_int c < 0x34) "412") false; 135 | eq_bool (String.for_all (fun c -> Char.to_int c < 0x34) "142") false; 136 | eq_bool (String.for_all (fun c -> Char.to_int c < 0x34) "124") false; 137 | () 138 | 139 | let exists = test "String.exists" @@ fun () -> 140 | eq_bool (String.exists (fun _ -> false) "") false; 141 | eq_bool (String.exists (fun _ -> true) "") false; 142 | eq_bool (String.exists (fun c -> Char.to_int c < 0x34) "541") true; 143 | eq_bool (String.exists (fun c -> Char.to_int c < 0x34) "541") true; 144 | eq_bool (String.exists (fun c -> Char.to_int c < 0x34) "154") true; 145 | eq_bool (String.exists (fun c -> Char.to_int c < 0x34) "654") false; 146 | () 147 | 148 | let equal = test "String.equal" @@ fun () -> 149 | eq_bool (String.equal "" "") true; 150 | eq_bool (String.equal "" "a") false; 151 | eq_bool (String.equal "a" "") false; 152 | eq_bool (String.equal "ab" "ab") true; 153 | eq_bool (String.equal "cd" "ab") false; 154 | () 155 | 156 | let compare = test "String.compare" @@ fun () -> 157 | eq_int (String.compare "" "ab") (-1); 158 | eq_int (String.compare "" "") (0); 159 | eq_int (String.compare "ab" "") (1); 160 | eq_int (String.compare "ab" "abc") (-1); 161 | () 162 | 163 | (* Extracting substrings *) 164 | 165 | let with_range = test "String.with_range" @@ fun () -> 166 | let no_alloc ?first ?len s = 167 | eq_bool (String.with_range s ?first ?len == s || 168 | String.(equal empty s)) true 169 | in 170 | let is_empty ?first ?len s = 171 | let s = String.with_range ?first ?len s in 172 | eq_str s String.empty; 173 | eq_bool (s == String.empty) true; 174 | in 175 | let invalid ?first ?len s = 176 | app_invalid ~pp:pp_str (String.with_range ?first ?len) s 177 | in 178 | let eq_range ?first ?len s s' = eq_str (String.with_range ?first ?len s) s' in 179 | no_alloc ""; 180 | invalid "" ~len:(-1); 181 | no_alloc "" ~len:0; 182 | no_alloc "" ~len:1; 183 | no_alloc "" ~len:2; 184 | no_alloc "" ~first:(-1); 185 | no_alloc "" ~first:0; 186 | no_alloc "" ~first:1; 187 | invalid "" ~first:(-1) ~len:(-1); 188 | no_alloc "" ~first:(-1) ~len:0; 189 | no_alloc "" ~first:(-1) ~len:1; 190 | invalid "" ~first:0 ~len:(-1); 191 | no_alloc "" ~first:0 ~len:0; 192 | no_alloc "" ~first:0 ~len:1; 193 | invalid "" ~first:1 ~len:(-1); 194 | no_alloc "" ~first:1 ~len:0; 195 | no_alloc "" ~first:1 ~len:1; 196 | no_alloc "a"; 197 | invalid "a" ~len:(-1); 198 | is_empty "a" ~len:0; 199 | no_alloc "a" ~len:1; 200 | no_alloc "a" ~len:2; 201 | no_alloc "a" ~first:(-1); 202 | no_alloc "a" ~first:0; 203 | is_empty "a" ~first:1; 204 | invalid "a" ~first:(-1) ~len:(-1); 205 | is_empty "a" ~first:(-1) ~len:0; 206 | is_empty "a" ~first:(-1) ~len:1; 207 | no_alloc "a" ~first:(-1) ~len:2; 208 | no_alloc "a" ~first:(-1) ~len:3; 209 | invalid "a" ~first:0 ~len:(-1); 210 | is_empty "a" ~first:0 ~len:0; 211 | no_alloc "a" ~first:0 ~len:1; 212 | no_alloc "a" ~first:0 ~len:2; 213 | no_alloc "a" ~first:0 ~len:3; 214 | invalid "a" ~first:1 ~len:(-1); 215 | is_empty "a" ~first:1 ~len:0; 216 | is_empty "a" ~first:1 ~len:1; 217 | is_empty "a" ~first:1 ~len:2; 218 | is_empty "a" ~first:1 ~len:3; 219 | no_alloc "ab"; 220 | invalid "ab" ~len:(-1); 221 | is_empty "ab" ~len:0; 222 | eq_range "ab" ~len:1 "a"; 223 | no_alloc "ab" ~len:2; 224 | no_alloc "ab" ~len:3; 225 | no_alloc "ab" ~first:(-1); 226 | no_alloc "ab" ~first:0; 227 | eq_range "ab" ~first:1 "b"; 228 | is_empty "ab" ~first:2; 229 | invalid "ab" ~first:(-1) ~len:(-1); 230 | is_empty "ab" ~first:(-1) ~len:0; 231 | is_empty "ab" ~first:(-1) ~len:1; 232 | eq_range "ab" ~first:(-1) ~len:2 "a"; 233 | no_alloc "ab" ~first:(-1) ~len:3; 234 | no_alloc "ab" ~first:(-1) ~len:4; 235 | invalid "ab" ~first:0 ~len:(-1); 236 | is_empty "ab" ~first:0 ~len:0; 237 | eq_range "ab" ~first:0 ~len:1 "a"; 238 | no_alloc "ab" ~first:0 ~len:2; 239 | no_alloc "ab" ~first:0 ~len:3; 240 | no_alloc "ab" ~first:0 ~len:4; 241 | invalid "ab" ~first:1 ~len:(-1); 242 | is_empty "ab" ~first:1 ~len:0; 243 | eq_range "ab" ~first:1 ~len:1 "b"; 244 | eq_range "ab" ~first:1 ~len:2 "b"; 245 | eq_range "ab" ~first:1 ~len:3 "b"; 246 | eq_range "ab" ~first:1 ~len:4 "b"; 247 | invalid "ab" ~first:2 ~len:(-1); 248 | is_empty "ab" ~first:2 ~len:0; 249 | is_empty "ab" ~first:2 ~len:1; 250 | is_empty "ab" ~first:2 ~len:2; 251 | is_empty "ab" ~first:2 ~len:3; 252 | is_empty "ab" ~first:2 ~len:4; 253 | no_alloc "abc"; 254 | invalid "abc" ~len:(-1); 255 | is_empty "abc" ~len:0; 256 | eq_range "abc" ~len:1 "a"; 257 | eq_range "abc" ~len:2 "ab"; 258 | no_alloc "abc" ~len:3; 259 | no_alloc "abc" ~len:4; 260 | no_alloc "abc" ~first:(-1); 261 | no_alloc "abc" ~first:0; 262 | eq_range "abc" ~first:1 "bc"; 263 | eq_range "abc" ~first:2 "c"; 264 | is_empty "abc" ~first:3; 265 | invalid "abc" ~first:(-1) ~len:(-1); 266 | is_empty "abc" ~first:(-1) ~len:0; 267 | is_empty "abc" ~first:(-1) ~len:1; 268 | eq_range "abc" ~first:(-1) ~len:2 "a"; 269 | eq_range "abc" ~first:(-1) ~len:3 "ab"; 270 | eq_range "abc" ~first:(-1) ~len:4 "abc"; 271 | no_alloc "abc" ~first:(-1) ~len:5; 272 | invalid "abc" ~first:0 ~len:(-1); 273 | is_empty "abc" ~first:0 ~len:0; 274 | eq_range "abc" ~first:0 ~len:1 "a"; 275 | eq_range "abc" ~first:0 ~len:2 "ab"; 276 | no_alloc "abc" ~first:0 ~len:3; 277 | no_alloc "abc" ~first:0 ~len:4; 278 | no_alloc "abc" ~first:0 ~len:5; 279 | invalid "abc" ~first:1 ~len:(-1); 280 | is_empty "abc" ~first:1 ~len:0; 281 | eq_range "abc" ~first:1 ~len:1 "b"; 282 | eq_range "abc" ~first:1 ~len:2 "bc"; 283 | eq_range "abc" ~first:1 ~len:3 "bc"; 284 | eq_range "abc" ~first:1 ~len:4 "bc"; 285 | eq_range "abc" ~first:1 ~len:5 "bc"; 286 | invalid "abc" ~first:2 ~len:(-1); 287 | is_empty "abc" ~first:2 ~len:0; 288 | eq_range "abc" ~first:2 ~len:1 "c"; 289 | eq_range "abc" ~first:2 ~len:2 "c"; 290 | eq_range "abc" ~first:2 ~len:3 "c"; 291 | eq_range "abc" ~first:2 ~len:4 "c"; 292 | eq_range "abc" ~first:2 ~len:5 "c"; 293 | invalid "abc" ~first:3 ~len:(-1); 294 | is_empty "abc" ~first:3 ~len:0; 295 | is_empty "abc" ~first:3 ~len:1; 296 | is_empty "abc" ~first:3 ~len:2; 297 | is_empty "abc" ~first:3 ~len:3; 298 | is_empty "abc" ~first:3 ~len:4; 299 | is_empty "abc" ~first:3 ~len:5; 300 | () 301 | 302 | let with_index_range = test "String.with_index_range" @@ fun () -> 303 | let no_alloc ?first ?last s = 304 | eq_bool (String.with_index_range s ?first ?last == s || 305 | String.(equal empty s)) true 306 | in 307 | let is_empty ?first ?last s = 308 | let s = String.with_index_range ?first ?last s in 309 | eq_str s String.empty; 310 | eq_bool (s == String.empty) true; 311 | in 312 | let eq_range ?first ?last s s' = 313 | eq_str (String.with_index_range ?first ?last s) s' 314 | in 315 | no_alloc ""; 316 | no_alloc "" ~first:(-1); 317 | no_alloc "" ~first:0; 318 | no_alloc "" ~first:1; 319 | no_alloc "" ~first:2; 320 | no_alloc "" ~last:(-1); 321 | no_alloc "" ~last:0; 322 | no_alloc "" ~last:1; 323 | no_alloc "" ~last:2; 324 | no_alloc "" ~first:(-1) ~last:(-1); 325 | no_alloc "" ~first:(-1) ~last:0; 326 | no_alloc "" ~first:(-1) ~last:1; 327 | no_alloc "" ~first:0 ~last:(-1); 328 | no_alloc "" ~first:0 ~last:0; 329 | no_alloc "" ~first:0 ~last:1; 330 | no_alloc "" ~first:1 ~last:(-1); 331 | no_alloc "" ~first:1 ~last:0; 332 | no_alloc "" ~first:1 ~last:1; 333 | no_alloc "a"; 334 | no_alloc "a" ~first:(-1); 335 | no_alloc "a" ~first:0; 336 | is_empty "a" ~first:1; 337 | is_empty "a" ~first:2; 338 | is_empty "a" ~last:(-1); 339 | no_alloc "a" ~last:0; 340 | no_alloc "a" ~last:1; 341 | no_alloc "a" ~last:2; 342 | is_empty "a" ~first:(-1) ~last:(-1); 343 | no_alloc "a" ~first:(-1) ~last:0; 344 | no_alloc "a" ~first:(-1) ~last:1; 345 | no_alloc "a" ~first:(-1) ~last:2; 346 | no_alloc "a" ~first:(-1) ~last:3; 347 | is_empty "a" ~first:0 ~last:(-1); 348 | no_alloc "a" ~first:0 ~last:0; 349 | no_alloc "a" ~first:0 ~last:1; 350 | no_alloc "a" ~first:0 ~last:2; 351 | no_alloc "a" ~first:0 ~last:3; 352 | is_empty "a" ~first:1 ~last:(-1); 353 | is_empty "a" ~first:1 ~last:0; 354 | is_empty "a" ~first:1 ~last:1; 355 | is_empty "a" ~first:1 ~last:2; 356 | is_empty "a" ~first:1 ~last:3; 357 | no_alloc "ab"; 358 | no_alloc "ab" ~first:(-1); 359 | no_alloc "ab" ~first:0; 360 | eq_range "ab" ~first:1 "b"; 361 | is_empty "ab" ~first:2; 362 | is_empty "ab" ~last:(-1); 363 | eq_range "ab" ~last:0 "a"; 364 | no_alloc "ab" ~last:1; 365 | no_alloc "ab" ~last:2; 366 | no_alloc "ab" ~last:3; 367 | is_empty "ab" ~first:(-1) ~last:(-1); 368 | eq_range "ab" ~first:(-1) ~last:0 "a"; 369 | no_alloc "ab" ~first:(-1) ~last:1; 370 | no_alloc "ab" ~first:(-1) ~last:2; 371 | no_alloc "ab" ~first:(-1) ~last:3; 372 | no_alloc "ab" ~first:(-1) ~last:4; 373 | is_empty "ab" ~first:0 ~last:(-1); 374 | eq_range "ab" ~first:0 ~last:0 "a"; 375 | no_alloc "ab" ~first:0 ~last:1; 376 | no_alloc "ab" ~first:0 ~last:2; 377 | no_alloc "ab" ~first:0 ~last:3; 378 | no_alloc "ab" ~first:0 ~last:4; 379 | is_empty "ab" ~first:1 ~last:(-1); 380 | is_empty "ab" ~first:1 ~last:0; 381 | eq_range "ab" ~first:1 ~last:1 "b"; 382 | eq_range "ab" ~first:1 ~last:2 "b"; 383 | eq_range "ab" ~first:1 ~last:3 "b"; 384 | eq_range "ab" ~first:1 ~last:4 "b"; 385 | is_empty "ab" ~first:2 ~last:(-1); 386 | is_empty "ab" ~first:2 ~last:0; 387 | is_empty "ab" ~first:2 ~last:1; 388 | is_empty "ab" ~first:2 ~last:2; 389 | is_empty "ab" ~first:2 ~last:3; 390 | is_empty "ab" ~first:2 ~last:4; 391 | no_alloc "abc"; 392 | no_alloc "abc" ~first:(-1); 393 | no_alloc "abc" ~first:0; 394 | eq_range "abc" ~first:1 "bc"; 395 | eq_range "abc" ~first:2 "c"; 396 | is_empty "abc" ~first:3; 397 | is_empty "abc" ~last:(-1); 398 | eq_range "abc" ~last:0 "a"; 399 | eq_range "abc" ~last:1 "ab"; 400 | no_alloc "abc" ~last:2; 401 | no_alloc "abc" ~last:3; 402 | no_alloc "abc" ~last:4; 403 | is_empty "abc" ~first:(-1) ~last:(-1); 404 | eq_range "abc" ~first:(-1) ~last:0 "a"; 405 | eq_range "abc" ~first:(-1) ~last:1 "ab"; 406 | no_alloc "abc" ~first:(-1) ~last:2; 407 | no_alloc "abc" ~first:(-1) ~last:3; 408 | no_alloc "abc" ~first:(-1) ~last:4; 409 | no_alloc "abc" ~first:(-1) ~last:5; 410 | is_empty "abc" ~first:0 ~last:(-1); 411 | eq_range "abc" ~first:0 ~last:0 "a"; 412 | eq_range "abc" ~first:0 ~last:1 "ab"; 413 | no_alloc "abc" ~first:0 ~last:2; 414 | no_alloc "abc" ~first:0 ~last:3; 415 | no_alloc "abc" ~first:0 ~last:4; 416 | no_alloc "abc" ~first:0 ~last:5; 417 | is_empty "abc" ~first:1 ~last:(-1); 418 | is_empty "abc" ~first:1 ~last:0; 419 | eq_range "abc" ~first:1 ~last:1 "b"; 420 | eq_range "abc" ~first:1 ~last:2 "bc"; 421 | eq_range "abc" ~first:1 ~last:3 "bc"; 422 | eq_range "abc" ~first:1 ~last:4 "bc"; 423 | eq_range "abc" ~first:1 ~last:5 "bc"; 424 | is_empty "abc" ~first:2 ~last:(-1); 425 | is_empty "abc" ~first:2 ~last:0; 426 | is_empty "abc" ~first:2 ~last:1; 427 | eq_range "abc" ~first:2 ~last:2 "c"; 428 | eq_range "abc" ~first:2 ~last:3 "c"; 429 | eq_range "abc" ~first:2 ~last:4 "c"; 430 | eq_range "abc" ~first:2 ~last:5 "c"; 431 | is_empty "abc" ~first:3 ~last:(-1); 432 | is_empty "abc" ~first:3 ~last:0; 433 | is_empty "abc" ~first:3 ~last:1; 434 | is_empty "abc" ~first:3 ~last:2; 435 | is_empty "abc" ~first:3 ~last:3; 436 | is_empty "abc" ~first:3 ~last:4; 437 | is_empty "abc" ~first:3 ~last:5; 438 | () 439 | 440 | let trim = test "String.trim" @@ fun () -> 441 | let drop_a c = c = 'a' in 442 | let no_alloc ?drop s = eq_bool (String.trim ?drop s == s) true in 443 | no_alloc ""; 444 | no_alloc ~drop:drop_a ""; 445 | no_alloc "bc"; 446 | no_alloc ~drop:drop_a "bc"; 447 | eq_str (String.trim "\t abcd \r ") "abcd"; 448 | no_alloc ~drop:drop_a "\x00 abcd \x1F "; 449 | no_alloc "aaaabcdaaaa"; 450 | eq_str (String.trim ~drop:drop_a "aaaabcdaaaa") "bcd"; 451 | eq_str (String.trim ~drop:drop_a "aaaabcd") "bcd"; 452 | eq_str (String.trim ~drop:drop_a "bcdaaaa") "bcd"; 453 | eq_str (String.trim ~drop:drop_a "aaaa") ""; 454 | eq_str (String.trim " ") ""; 455 | () 456 | 457 | let span = test "String.{span,take,drop}" @@ fun () -> 458 | let eq_pair (l0, r0) (l1, r1) = String.equal l0 l1 && String.equal r0 r1 in 459 | let eq_pair = eq ~eq:eq_pair ~pp:pp_pair in 460 | let eq ?(rev = false) ?min ?max ?sat s (sl, sr as spec) = 461 | let (l, r as pair) = String.span ~rev ?min ?max ?sat s in 462 | let t = String.take ~rev ?min ?max ?sat s in 463 | let d = String.drop ~rev ?min ?max ?sat s in 464 | eq_pair pair spec; 465 | eq_str t (if rev then sr else sl); 466 | eq_str d (if rev then sl else sr); 467 | if sl = "" then begin 468 | eq_bool (l == String.empty) true; 469 | eq_bool (r == s) true; 470 | eq_bool ((if rev then d else t) == String.empty) true; 471 | end; 472 | if sr = "" then begin 473 | eq_bool (r == String.empty) true; 474 | eq_bool (l == s) true; 475 | eq_bool ((if rev then t else d) == String.empty) true; 476 | end 477 | in 478 | let invalid ?rev ?min ?max ?sat s = 479 | app_invalid ~pp:pp_pair (String.span ?rev ?min ?max ?sat) s 480 | in 481 | invalid ~rev:false ~min:(-1) ""; 482 | invalid ~rev:true ~min:(-1) ""; 483 | invalid ~rev:false ~max:(-1) ""; 484 | invalid ~rev:true ~max:(-1) ""; 485 | eq ~rev:false String.empty ("",""); 486 | eq ~rev:true String.empty ("",""); 487 | eq ~rev:false ~min:0 ~max:0 String.empty ("",""); 488 | eq ~rev:true ~min:0 ~max:0 String.empty ("",""); 489 | eq ~rev:false ~min:1 ~max:0 String.empty ("",""); 490 | eq ~rev:true ~min:1 ~max:0 String.empty ("",""); 491 | eq ~rev:false ~max:0 "ab_cd" ("","ab_cd"); 492 | eq ~rev:true ~max:0 "ab_cd" ("ab_cd",""); 493 | eq ~rev:false ~max:2 "ab_cd" ("ab", "_cd"); 494 | eq ~rev:true ~max:2 "ab_cd" ("ab_", "cd"); 495 | eq ~rev:false ~min:6 "ab_cd" ("", "ab_cd"); 496 | eq ~rev:true ~min:6 "ab_cd" ("ab_cd", ""); 497 | eq ~rev:false "ab_cd" ("ab_cd", ""); 498 | eq ~rev:true "ab_cd" ("", "ab_cd"); 499 | eq ~rev:false ~max:30 "ab_cd" ("ab_cd", ""); 500 | eq ~rev:true ~max:30 "ab_cd" ("", "ab_cd"); 501 | eq ~rev:false ~sat:Char.Ascii.is_white "ab_cd" ("","ab_cd"); 502 | eq ~rev:true ~sat:Char.Ascii.is_white "ab_cd" ("ab_cd",""); 503 | eq ~rev:false ~sat:Char.Ascii.is_letter "ab_cd" ("ab", "_cd"); 504 | eq ~rev:true ~sat:Char.Ascii.is_letter "ab_cd" ("ab_", "cd"); 505 | eq ~rev:false ~sat:Char.Ascii.is_letter ~max:0 "ab_cd" ("", "ab_cd"); 506 | eq ~rev:true ~sat:Char.Ascii.is_letter ~max:0 "ab_cd" ("ab_cd", ""); 507 | eq ~rev:false ~sat:Char.Ascii.is_letter ~max:1 "ab_cd" ("a", "b_cd"); 508 | eq ~rev:true ~sat:Char.Ascii.is_letter ~max:1 "ab_cd" ("ab_c", "d"); 509 | eq ~rev:false ~sat:Char.Ascii.is_letter ~min:2 ~max:1 "ab_cd" ("", "ab_cd"); 510 | eq ~rev:true ~sat:Char.Ascii.is_letter ~min:2 ~max:1 "ab_cd" ("ab_cd", ""); 511 | eq ~rev:false ~sat:Char.Ascii.is_letter ~min:3 "ab_cd" ("", "ab_cd"); 512 | eq ~rev:true ~sat:Char.Ascii.is_letter ~min:3 "ab_cd" ("ab_cd", ""); 513 | () 514 | 515 | let cut = test "String.cut" @@ fun () -> 516 | let ppp = pp_option pp_pair in 517 | let eqo = eq_option ~eq:(=) ~pp:pp_pair in 518 | app_invalid ~pp:ppp (String.cut ~sep:"") ""; 519 | app_invalid ~pp:ppp (String.cut ~sep:"") "123"; 520 | eqo (String.cut "," "") None; 521 | eqo (String.cut "," ",") (Some ("", "")); 522 | eqo (String.cut "," ",,") (Some ("", ",")); 523 | eqo (String.cut "," ",,,") (Some ("", ",,")); 524 | eqo (String.cut "," "123") None; 525 | eqo (String.cut "," ",123") (Some ("", "123")); 526 | eqo (String.cut "," "123,") (Some ("123", "")); 527 | eqo (String.cut "," "1,2,3") (Some ("1", "2,3")); 528 | eqo (String.cut "," " 1,2,3") (Some (" 1", "2,3")); 529 | eqo (String.cut "<>" "") None; 530 | eqo (String.cut "<>" "<>") (Some ("", "")); 531 | eqo (String.cut "<>" "<><>") (Some ("", "<>")); 532 | eqo (String.cut "<>" "<><><>") (Some ("", "<><>")); 533 | eqo (String.cut ~rev:true ~sep:"<>" "1") None; 534 | eqo (String.cut "<>" "123") None; 535 | eqo (String.cut "<>" "<>123") (Some ("", "123")); 536 | eqo (String.cut "<>" "123<>") (Some ("123", "")); 537 | eqo (String.cut "<>" "1<>2<>3") (Some ("1", "2<>3")); 538 | eqo (String.cut "<>" " 1<>2<>3") (Some (" 1", "2<>3")); 539 | eqo (String.cut "<>" ">>><>>>><>>>><>>>>") (Some (">>>", ">>><>>>><>>>>")); 540 | eqo (String.cut "<->" "<->>->") (Some ("", ">->")); 541 | eqo (String.cut ~rev:true ~sep:"<->" "<-") None; 542 | eqo (String.cut "aa" "aa") (Some ("", "")); 543 | eqo (String.cut "aa" "aaa") (Some ("", "a")); 544 | eqo (String.cut "aa" "aaaa") (Some ("", "aa")); 545 | eqo (String.cut "aa" "aaaaa") (Some ("", "aaa";)); 546 | eqo (String.cut "aa" "aaaaaa") (Some ("", "aaaa")); 547 | eqo (String.cut ~sep:"ab" "faaaa") None; 548 | let rev = true in 549 | app_invalid ~pp:ppp (String.cut ~rev ~sep:"") ""; 550 | app_invalid ~pp:ppp (String.cut ~rev ~sep:"") "123"; 551 | eqo (String.cut ~rev ~sep:"," "") None; 552 | eqo (String.cut ~rev ~sep:"," ",") (Some ("", "")); 553 | eqo (String.cut ~rev ~sep:"," ",,") (Some (",", "")); 554 | eqo (String.cut ~rev ~sep:"," ",,,") (Some (",,", "")); 555 | eqo (String.cut ~rev ~sep:"," "123") None; 556 | eqo (String.cut ~rev ~sep:"," ",123") (Some ("", "123")); 557 | eqo (String.cut ~rev ~sep:"," "123,") (Some ("123", "")); 558 | eqo (String.cut ~rev ~sep:"," "1,2,3") (Some ("1,2", "3")); 559 | eqo (String.cut ~rev ~sep:"," "1,2,3 ") (Some ("1,2", "3 ")); 560 | eqo (String.cut ~rev ~sep:"<>" "") None; 561 | eqo (String.cut ~rev ~sep:"<>" "<>") (Some ("", "")); 562 | eqo (String.cut ~rev ~sep:"<>" "<><>") (Some ("<>", "")); 563 | eqo (String.cut ~rev ~sep:"<>" "<><><>") (Some ("<><>", "")); 564 | eqo (String.cut ~rev ~sep:"<>" "1") None; 565 | eqo (String.cut ~rev ~sep:"<>" "123") None; 566 | eqo (String.cut ~rev ~sep:"<>" "<>123") (Some ("", "123")); 567 | eqo (String.cut ~rev ~sep:"<>" "123<>") (Some ("123", "")); 568 | eqo (String.cut ~rev ~sep:"<>" "1<>2<>3") (Some ("1<>2", "3")); 569 | eqo (String.cut ~rev ~sep:"<>" "1<>2<>3 ") (Some ("1<>2", "3 ")); 570 | eqo (String.cut ~rev ~sep:"<>" ">>><>>>><>>>><>>>>") 571 | (Some (">>><>>>><>>>>", ">>>")); 572 | eqo (String.cut ~rev ~sep:"<->" "<->>->") (Some ("", ">->")); 573 | eqo (String.cut ~rev ~sep:"<->" "<-") None; 574 | eqo (String.cut ~rev ~sep:"aa" "aa") (Some ("", "")); 575 | eqo (String.cut ~rev ~sep:"aa" "aaa") (Some ("a", "")); 576 | eqo (String.cut ~rev ~sep:"aa" "aaaa") (Some ("aa", "")); 577 | eqo (String.cut ~rev ~sep:"aa" "aaaaa") (Some ("aaa", "";)); 578 | eqo (String.cut ~rev ~sep:"aa" "aaaaaa") (Some ("aaaa", "")); 579 | eqo (String.cut ~rev ~sep:"ab" "afaaaa") None; 580 | () 581 | 582 | let cuts = test "String.cuts" @@ fun () -> 583 | let ppl = pp_list String.dump in 584 | let eql = eq_list ~eq:String.equal ~pp:String.dump in 585 | let no_alloc ?rev ~sep s = 586 | eq_bool (List.hd (String.cuts ?rev ~sep s) == s) true 587 | in 588 | app_invalid ~pp:ppl (String.cuts ~sep:"") ""; 589 | app_invalid ~pp:ppl (String.cuts ~sep:"") "123"; 590 | no_alloc ~sep:"," ""; 591 | no_alloc ~sep:"," "abcd"; 592 | eql (String.cuts ~empty:true ~sep:"," "") [""]; 593 | eql (String.cuts ~empty:false ~sep:"," "") []; 594 | eql (String.cuts ~empty:true ~sep:"," ",") [""; ""]; 595 | eql (String.cuts ~empty:false ~sep:"," ",") []; 596 | eql (String.cuts ~empty:true ~sep:"," ",,") [""; ""; ""]; 597 | eql (String.cuts ~empty:false ~sep:"," ",,") []; 598 | eql (String.cuts ~empty:true ~sep:"," ",,,") [""; ""; ""; ""]; 599 | eql (String.cuts ~empty:false ~sep:"," ",,,") []; 600 | eql (String.cuts ~empty:true ~sep:"," "123") ["123"]; 601 | eql (String.cuts ~empty:false ~sep:"," "123") ["123"]; 602 | eql (String.cuts ~empty:true ~sep:"," ",123") [""; "123"]; 603 | eql (String.cuts ~empty:false ~sep:"," ",123") ["123"]; 604 | eql (String.cuts ~empty:true ~sep:"," "123,") ["123"; ""]; 605 | eql (String.cuts ~empty:false ~sep:"," "123,") ["123";]; 606 | eql (String.cuts ~empty:true ~sep:"," "1,2,3") ["1"; "2"; "3"]; 607 | eql (String.cuts ~empty:false ~sep:"," "1,2,3") ["1"; "2"; "3"]; 608 | eql (String.cuts ~empty:true ~sep:"," "1, 2, 3") ["1"; " 2"; " 3"]; 609 | eql (String.cuts ~empty:false ~sep:"," "1, 2, 3") ["1"; " 2"; " 3"]; 610 | eql (String.cuts ~empty:true ~sep:"," ",1,2,,3,") [""; "1"; "2"; ""; "3"; ""]; 611 | eql (String.cuts ~empty:false ~sep:"," ",1,2,,3,") ["1"; "2"; "3";]; 612 | eql (String.cuts ~empty:true ~sep:"," ", 1, 2,, 3,") 613 | [""; " 1"; " 2"; ""; " 3"; ""]; 614 | eql (String.cuts ~empty:false ~sep:"," ", 1, 2,, 3,") [" 1"; " 2";" 3";]; 615 | eql (String.cuts ~empty:true ~sep:"<>" "") [""]; 616 | eql (String.cuts ~empty:false ~sep:"<>" "") []; 617 | eql (String.cuts ~empty:true ~sep:"<>" "<>") [""; ""]; 618 | eql (String.cuts ~empty:false ~sep:"<>" "<>") []; 619 | eql (String.cuts ~empty:true ~sep:"<>" "<><>") [""; ""; ""]; 620 | eql (String.cuts ~empty:false ~sep:"<>" "<><>") []; 621 | eql (String.cuts ~empty:true ~sep:"<>" "<><><>") [""; ""; ""; ""]; 622 | eql (String.cuts ~empty:false ~sep:"<>" "<><><>") []; 623 | eql (String.cuts ~empty:true ~sep:"<>" "123") [ "123" ]; 624 | eql (String.cuts ~empty:false ~sep:"<>" "123") [ "123" ]; 625 | eql (String.cuts ~empty:true ~sep:"<>" "<>123") [""; "123"]; 626 | eql (String.cuts ~empty:false ~sep:"<>" "<>123") ["123"]; 627 | eql (String.cuts ~empty:true ~sep:"<>" "123<>") ["123"; ""]; 628 | eql (String.cuts ~empty:false ~sep:"<>" "123<>") ["123"]; 629 | eql (String.cuts ~empty:true ~sep:"<>" "1<>2<>3") ["1"; "2"; "3"]; 630 | eql (String.cuts ~empty:false ~sep:"<>" "1<>2<>3") ["1"; "2"; "3"]; 631 | eql (String.cuts ~empty:true ~sep:"<>" "1<> 2<> 3") ["1"; " 2"; " 3"]; 632 | eql (String.cuts ~empty:false ~sep:"<>" "1<> 2<> 3") ["1"; " 2"; " 3"]; 633 | eql (String.cuts ~empty:true ~sep:"<>" "<>1<>2<><>3<>") 634 | [""; "1"; "2"; ""; "3"; ""]; 635 | eql (String.cuts ~empty:false ~sep:"<>" "<>1<>2<><>3<>") ["1"; "2";"3";]; 636 | eql (String.cuts ~empty:true ~sep:"<>" "<> 1<> 2<><> 3<>") 637 | [""; " 1"; " 2"; ""; " 3";""]; 638 | eql (String.cuts ~empty:false ~sep:"<>" "<> 1<> 2<><> 3<>")[" 1"; " 2"; " 3"]; 639 | eql (String.cuts ~empty:true ~sep:"<>" ">>><>>>><>>>><>>>>") 640 | [">>>"; ">>>"; ">>>"; ">>>" ]; 641 | eql (String.cuts ~empty:false ~sep:"<>" ">>><>>>><>>>><>>>>") 642 | [">>>"; ">>>"; ">>>"; ">>>" ]; 643 | eql (String.cuts ~empty:true ~sep:"<->" "<->>->") [""; ">->"]; 644 | eql (String.cuts ~empty:false ~sep:"<->" "<->>->") [">->"]; 645 | eql (String.cuts ~empty:true ~sep:"aa" "aa") [""; ""]; 646 | eql (String.cuts ~empty:false ~sep:"aa" "aa") []; 647 | eql (String.cuts ~empty:true ~sep:"aa" "aaa") [""; "a"]; 648 | eql (String.cuts ~empty:false ~sep:"aa" "aaa") ["a"]; 649 | eql (String.cuts ~empty:true ~sep:"aa" "aaaa") [""; ""; ""]; 650 | eql (String.cuts ~empty:false ~sep:"aa" "aaaa") []; 651 | eql (String.cuts ~empty:true ~sep:"aa" "aaaaa") [""; ""; "a"]; 652 | eql (String.cuts ~empty:false ~sep:"aa" "aaaaa") ["a"]; 653 | eql (String.cuts ~empty:true ~sep:"aa" "aaaaaa") [""; ""; ""; ""]; 654 | eql (String.cuts ~empty:false ~sep:"aa" "aaaaaa") []; 655 | let rev = true in 656 | app_invalid ~pp:ppl (String.cuts ~rev ~sep:"") ""; 657 | app_invalid ~pp:ppl (String.cuts ~rev ~sep:"") "123"; 658 | no_alloc ~rev ~sep:"," ""; 659 | no_alloc ~rev ~sep:"," "abcd"; 660 | eql (String.cuts ~rev ~empty:true ~sep:"," "") [""]; 661 | eql (String.cuts ~rev ~empty:false ~sep:"," "") []; 662 | eql (String.cuts ~rev ~empty:true ~sep:"," ",") [""; ""]; 663 | eql (String.cuts ~rev ~empty:false ~sep:"," ",") []; 664 | eql (String.cuts ~rev ~empty:true ~sep:"," ",,") [""; ""; ""]; 665 | eql (String.cuts ~rev ~empty:false ~sep:"," ",,") []; 666 | eql (String.cuts ~rev ~empty:true ~sep:"," ",,,") [""; ""; ""; ""]; 667 | eql (String.cuts ~rev ~empty:false ~sep:"," ",,,") []; 668 | eql (String.cuts ~rev ~empty:true ~sep:"," "123") ["123"]; 669 | eql (String.cuts ~rev ~empty:false ~sep:"," "123") ["123"]; 670 | eql (String.cuts ~rev ~empty:true ~sep:"," ",123") [""; "123"]; 671 | eql (String.cuts ~rev ~empty:false ~sep:"," ",123") ["123"]; 672 | eql (String.cuts ~rev ~empty:true ~sep:"," "123,") ["123"; ""]; 673 | eql (String.cuts ~rev ~empty:false ~sep:"," "123,") ["123";]; 674 | eql (String.cuts ~rev ~empty:true ~sep:"," "1,2,3") ["1"; "2"; "3"]; 675 | eql (String.cuts ~rev ~empty:false ~sep:"," "1,2,3") ["1"; "2"; "3"]; 676 | eql (String.cuts ~rev ~empty:true ~sep:"," "1, 2, 3") ["1"; " 2"; " 3"]; 677 | eql (String.cuts ~rev ~empty:false ~sep:"," "1, 2, 3") ["1"; " 2"; " 3"]; 678 | eql (String.cuts ~rev ~empty:true ~sep:"," ",1,2,,3,") 679 | [""; "1"; "2"; ""; "3"; ""]; 680 | eql (String.cuts ~rev ~empty:false ~sep:"," ",1,2,,3,") ["1"; "2"; "3"]; 681 | eql (String.cuts ~rev ~empty:true ~sep:"," ", 1, 2,, 3,") 682 | [""; " 1"; " 2"; ""; " 3"; ""]; 683 | eql (String.cuts ~rev ~empty:false ~sep:"," ", 1, 2,, 3,") [" 1"; " 2"; " 3"]; 684 | eql (String.cuts ~rev ~empty:true ~sep:"<>" "") [""]; 685 | eql (String.cuts ~rev ~empty:false ~sep:"<>" "") []; 686 | eql (String.cuts ~rev ~empty:true ~sep:"<>" "<>") [""; ""]; 687 | eql (String.cuts ~rev ~empty:false ~sep:"<>" "<>") []; 688 | eql (String.cuts ~rev ~empty:true ~sep:"<>" "<><>") [""; ""; ""]; 689 | eql (String.cuts ~rev ~empty:false ~sep:"<>" "<><>") []; 690 | eql (String.cuts ~rev ~empty:true ~sep:"<>" "<><><>") [""; ""; ""; ""]; 691 | eql (String.cuts ~rev ~empty:false ~sep:"<>" "<><><>") []; 692 | eql (String.cuts ~rev ~empty:true ~sep:"<>" "123") [ "123" ]; 693 | eql (String.cuts ~rev ~empty:false ~sep:"<>" "123") [ "123" ]; 694 | eql (String.cuts ~rev ~empty:true ~sep:"<>" "<>123") [""; "123"]; 695 | eql (String.cuts ~rev ~empty:false ~sep:"<>" "<>123") ["123"]; 696 | eql (String.cuts ~rev ~empty:true ~sep:"<>" "123<>") ["123"; ""]; 697 | eql (String.cuts ~rev ~empty:false ~sep:"<>" "123<>") ["123";]; 698 | eql (String.cuts ~rev ~empty:true ~sep:"<>" "1<>2<>3") ["1"; "2"; "3"]; 699 | eql (String.cuts ~rev ~empty:false ~sep:"<>" "1<>2<>3") ["1"; "2"; "3"]; 700 | eql (String.cuts ~rev ~empty:true ~sep:"<>" "1<> 2<> 3") ["1"; " 2"; " 3"]; 701 | eql (String.cuts ~rev ~empty:false ~sep:"<>" "1<> 2<> 3") ["1"; " 2"; " 3"]; 702 | eql (String.cuts ~rev ~empty:true ~sep:"<>" "<>1<>2<><>3<>") 703 | [""; "1"; "2"; ""; "3"; ""]; 704 | eql (String.cuts ~rev ~empty:false ~sep:"<>" "<>1<>2<><>3<>") 705 | ["1"; "2"; "3"]; 706 | eql (String.cuts ~rev ~empty:true ~sep:"<>" "<> 1<> 2<><> 3<>") 707 | [""; " 1"; " 2"; ""; " 3";""]; 708 | eql (String.cuts ~rev ~empty:false ~sep:"<>" "<> 1<> 2<><> 3<>") 709 | [" 1"; " 2"; " 3";]; 710 | eql (String.cuts ~rev ~empty:true ~sep:"<>" ">>><>>>><>>>><>>>>") 711 | [">>>"; ">>>"; ">>>"; ">>>" ]; 712 | eql (String.cuts ~rev ~empty:false ~sep:"<>" ">>><>>>><>>>><>>>>") 713 | [">>>"; ">>>"; ">>>"; ">>>" ]; 714 | eql (String.cuts ~rev ~empty:true ~sep:"<->" "<->>->") [""; ">->"]; 715 | eql (String.cuts ~rev ~empty:false ~sep:"<->" "<->>->") [">->"]; 716 | eql (String.cuts ~rev ~empty:true ~sep:"aa" "aa") [""; ""]; 717 | eql (String.cuts ~rev ~empty:false ~sep:"aa" "aa") []; 718 | eql (String.cuts ~rev ~empty:true ~sep:"aa" "aaa") ["a"; ""]; 719 | eql (String.cuts ~rev ~empty:false ~sep:"aa" "aaa") ["a"]; 720 | eql (String.cuts ~rev ~empty:true ~sep:"aa" "aaaa") [""; ""; ""]; 721 | eql (String.cuts ~rev ~empty:false ~sep:"aa" "aaaa") []; 722 | eql (String.cuts ~rev ~empty:true ~sep:"aa" "aaaaa") ["a"; ""; "";]; 723 | eql (String.cuts ~rev ~empty:false ~sep:"aa" "aaaaa") ["a";]; 724 | eql (String.cuts ~rev ~empty:true ~sep:"aa" "aaaaaa") [""; ""; ""; ""]; 725 | eql (String.cuts ~rev ~empty:false ~sep:"aa" "aaaaaa") []; 726 | () 727 | 728 | let fields = test "String.fields" @@ fun () -> 729 | let eql = eq_list ~eq:String.equal ~pp:String.dump in 730 | let no_alloc ?empty ?is_sep s = 731 | eq_bool (List.hd (String.fields ?empty ?is_sep s) == s) true 732 | in 733 | let is_a c = c = 'a' in 734 | no_alloc ~empty:true "a"; 735 | no_alloc ~empty:false "a"; 736 | no_alloc ~empty:true "abc"; 737 | no_alloc ~empty:false "abc"; 738 | no_alloc ~empty:true ~is_sep:is_a "bcdf"; 739 | no_alloc ~empty:false ~is_sep:is_a "bcdf"; 740 | eql (String.fields ~empty:true "") [""]; 741 | eql (String.fields ~empty:false "") []; 742 | eql (String.fields ~empty:true "\n\r") ["";"";""]; 743 | eql (String.fields ~empty:false "\n\r") []; 744 | eql (String.fields ~empty:true " \n\rabc") ["";"";"";"abc"]; 745 | eql (String.fields ~empty:false " \n\rabc") ["abc"]; 746 | eql (String.fields ~empty:true " \n\racd de") ["";"";"";"acd";"de"]; 747 | eql (String.fields ~empty:false " \n\racd de") ["acd";"de"]; 748 | eql (String.fields ~empty:true " \n\racd de ") ["";"";"";"acd";"de";""]; 749 | eql (String.fields ~empty:false " \n\racd de ") ["acd";"de"]; 750 | eql (String.fields ~empty:true "\n\racd\nde \r") ["";"";"acd";"de";"";""]; 751 | eql (String.fields ~empty:false "\n\racd\nde \r") ["acd";"de"]; 752 | eql (String.fields ~empty:true ~is_sep:is_a "") [""]; 753 | eql (String.fields ~empty:false ~is_sep:is_a "") []; 754 | eql (String.fields ~empty:true ~is_sep:is_a "abaac aaa") 755 | ["";"b";"";"c ";"";"";""]; 756 | eql (String.fields ~empty:false ~is_sep:is_a "abaac aaa") ["b"; "c "]; 757 | eql (String.fields ~empty:true ~is_sep:is_a "aaaa") ["";"";"";"";""]; 758 | eql (String.fields ~empty:false ~is_sep:is_a "aaaa") []; 759 | eql (String.fields ~empty:true ~is_sep:is_a "aaaa ") ["";"";"";"";" "]; 760 | eql (String.fields ~empty:false ~is_sep:is_a "aaaa ") [" "]; 761 | eql (String.fields ~empty:true ~is_sep:is_a "aaaab") ["";"";"";"";"b"]; 762 | eql (String.fields ~empty:false ~is_sep:is_a "aaaab") ["b"]; 763 | eql (String.fields ~empty:true ~is_sep:is_a "baaaa") ["b";"";"";"";""]; 764 | eql (String.fields ~empty:false ~is_sep:is_a "baaaa") ["b"]; 765 | eql (String.fields ~empty:true ~is_sep:is_a "abaaaa") ["";"b";"";"";"";""]; 766 | eql (String.fields ~empty:false ~is_sep:is_a "abaaaa") ["b"]; 767 | eql (String.fields ~empty:true ~is_sep:is_a "aba") ["";"b";""]; 768 | eql (String.fields ~empty:false ~is_sep:is_a "aba") ["b"]; 769 | eql (String.fields ~empty:false "tokenize me please") 770 | ["tokenize"; "me"; "please"]; 771 | () 772 | 773 | (* Traversing strings *) 774 | 775 | let find = test "String.find" @@ fun () -> 776 | let eq = eq_option ~eq:(=) ~pp:pp_int in 777 | let a c = c = 'a' in 778 | eq (String.find ~rev:false a "") None; 779 | eq (String.find ~rev:false ~start:(-1) a "") None; 780 | eq (String.find ~rev:false ~start:0 a "") None; 781 | eq (String.find ~rev:false ~start:1 a "") None; 782 | eq (String.find ~rev:true a "") None; 783 | eq (String.find ~rev:true ~start:(-1) a "") None; 784 | eq (String.find ~rev:true ~start:0 a "") None; 785 | eq (String.find ~rev:true ~start:1 a "") None; 786 | eq (String.find ~rev:false ~start:(-1) a "a") (Some 0); 787 | eq (String.find ~rev:false ~start:0 a "a") (Some 0); 788 | eq (String.find ~rev:false ~start:1 a "a") None; 789 | eq (String.find ~rev:true ~start:(-1) a "a") None; 790 | eq (String.find ~rev:true ~start:0 a "a") (Some 0); 791 | eq (String.find ~rev:true ~start:1 a "a") (Some 0); 792 | eq (String.find ~rev:false ~start:(-1) a "ba") (Some 1); 793 | eq (String.find ~rev:false ~start:0 a "ba") (Some 1); 794 | eq (String.find ~rev:false ~start:1 a "ba") (Some 1); 795 | eq (String.find ~rev:false ~start:2 a "ba") None; 796 | eq (String.find ~rev:true ~start:(-1) a "ba") None; 797 | eq (String.find ~rev:true ~start:0 a "ba") None; 798 | eq (String.find ~rev:true ~start:1 a "ba") (Some 1); 799 | eq (String.find ~rev:true ~start:2 a "ba") (Some 1); 800 | eq (String.find ~rev:true ~start:3 a "ba") (Some 1); 801 | eq (String.find ~rev:false a "aba") (Some 0); 802 | eq (String.find ~rev:false ~start:(-1) a "aba") (Some 0); 803 | eq (String.find ~rev:false ~start:0 a "aba") (Some 0); 804 | eq (String.find ~rev:false ~start:1 a "aba") (Some 2); 805 | eq (String.find ~rev:false ~start:2 a "aba") (Some 2); 806 | eq (String.find ~rev:false ~start:3 a "aba") None; 807 | eq (String.find ~rev:false ~start:4 a "aba") None; 808 | eq (String.find ~rev:true a "aba") (Some 2); 809 | eq (String.find ~rev:true ~start:(-1) a "aba") None; 810 | eq (String.find ~rev:true ~start:0 a "aba") (Some 0); 811 | eq (String.find ~rev:true ~start:1 a "aba") (Some 0); 812 | eq (String.find ~rev:true ~start:2 a "aba") (Some 2); 813 | eq (String.find ~rev:true ~start:3 a "aba") (Some 2); 814 | eq (String.find ~rev:true ~start:4 a "aba") (Some 2); 815 | eq (String.find ~rev:false a "bab") (Some 1); 816 | eq (String.find ~rev:false ~start:(-1) a "bab") (Some 1); 817 | eq (String.find ~rev:false ~start:0 a "bab") (Some 1); 818 | eq (String.find ~rev:false ~start:1 a "bab") (Some 1); 819 | eq (String.find ~rev:false ~start:2 a "bab") None; 820 | eq (String.find ~rev:false ~start:3 a "bab") None; 821 | eq (String.find ~rev:false ~start:4 a "bab") None; 822 | eq (String.find ~rev:true a "bab") (Some 1); 823 | eq (String.find ~rev:true ~start:(-1) a "bab") None; 824 | eq (String.find ~rev:true ~start:0 a "bab") None; 825 | eq (String.find ~rev:true ~start:1 a "bab") (Some 1); 826 | eq (String.find ~rev:true ~start:2 a "bab") (Some 1); 827 | eq (String.find ~rev:true ~start:3 a "bab") (Some 1); 828 | eq (String.find ~rev:true ~start:4 a "bab") (Some 1); 829 | eq (String.find ~rev:false a "baab") (Some 1); 830 | eq (String.find ~rev:false ~start:(-1) a "baab") (Some 1); 831 | eq (String.find ~rev:false ~start:0 a "baab") (Some 1); 832 | eq (String.find ~rev:false ~start:1 a "baab") (Some 1); 833 | eq (String.find ~rev:false ~start:2 a "baab") (Some 2); 834 | eq (String.find ~rev:false ~start:3 a "baab") None; 835 | eq (String.find ~rev:false ~start:4 a "baab") None; 836 | eq (String.find ~rev:false ~start:5 a "baab") None; 837 | eq (String.find ~rev:true ~start:(-1) a "baab") None; 838 | eq (String.find ~rev:true ~start:0 a "baab") None; 839 | eq (String.find ~rev:true ~start:1 a "baab") (Some 1); 840 | eq (String.find ~rev:true ~start:2 a "baab") (Some 2); 841 | eq (String.find ~rev:true ~start:3 a "baab") (Some 2); 842 | eq (String.find ~rev:true ~start:4 a "baab") (Some 2); 843 | eq (String.find ~rev:true ~start:5 a "baab") (Some 2); 844 | () 845 | 846 | let find_sub = test "String.find_sub" @@ fun () -> 847 | let eq = eq_option ~eq:(=) ~pp:pp_int in 848 | eq (String.find_sub ~rev:false ~sub:"" "ab") (Some 0); 849 | eq (String.find_sub ~rev:false ~start:(-1) ~sub:"" "ab") (Some 0); 850 | eq (String.find_sub ~rev:false ~start:0 ~sub:"" "ab") (Some 0); 851 | eq (String.find_sub ~rev:false ~start:1 ~sub:"" "ab") (Some 1); 852 | eq (String.find_sub ~rev:false ~start:2 ~sub:"" "ab") None; 853 | eq (String.find_sub ~rev:true ~sub:"" "ab") (Some 1); 854 | eq (String.find_sub ~rev:true ~start:(-1) ~sub:"" "ab") None; 855 | eq (String.find_sub ~rev:true ~start:0 ~sub:"" "ab") (Some 0); 856 | eq (String.find_sub ~rev:true ~start:1 ~sub:"" "ab") (Some 1); 857 | eq (String.find_sub ~rev:true ~start:2 ~sub:"" "ab") (Some 1); 858 | eq (String.find_sub ~rev:false ~sub:"" "") None; 859 | eq (String.find_sub ~rev:false ~start:(-1) ~sub:"" "") None; 860 | eq (String.find_sub ~rev:false ~start:0 ~sub:"" "") None; 861 | eq (String.find_sub ~rev:false ~start:1 ~sub:"" "") None; 862 | eq (String.find_sub ~rev:true ~sub:"" "") None; 863 | eq (String.find_sub ~rev:true ~start:(-1) ~sub:"" "") None; 864 | eq (String.find_sub ~rev:true ~start:0 ~sub:"" "") None; 865 | eq (String.find_sub ~rev:true ~start:1 ~sub:"" "") None; 866 | eq (String.find_sub ~rev:false ~sub:"ab" "") None; 867 | eq (String.find_sub ~rev:false ~start:(-1) ~sub:"ab" "") None; 868 | eq (String.find_sub ~rev:false ~start:0 ~sub:"ab" "") None; 869 | eq (String.find_sub ~rev:false ~start:1 ~sub:"ab" "") None; 870 | eq (String.find_sub ~rev:true ~sub:"ab" "") None; 871 | eq (String.find_sub ~rev:true ~start:(-1) ~sub:"ab" "") None; 872 | eq (String.find_sub ~rev:true ~start:0 ~sub:"ab" "") None; 873 | eq (String.find_sub ~rev:true ~start:1 ~sub:"ab" "") None; 874 | eq (String.find_sub ~rev:false ~sub:"ab" "a") None; 875 | eq (String.find_sub ~rev:false ~start:0 ~sub:"ab" "a") None; 876 | eq (String.find_sub ~rev:false ~start:1 ~sub:"ab" "a") None; 877 | eq (String.find_sub ~rev:false ~start:2 ~sub:"ab" "a") None; 878 | eq (String.find_sub ~rev:true ~sub:"ab" "a") None; 879 | eq (String.find_sub ~rev:true ~start:0 ~sub:"ab" "a") None; 880 | eq (String.find_sub ~rev:true ~start:1 ~sub:"ab" "a") None; 881 | eq (String.find_sub ~rev:true ~start:2 ~sub:"ab" "a") None; 882 | eq (String.find_sub ~rev:false ~start:(-1) ~sub:"ab" "ab") (Some 0); 883 | eq (String.find_sub ~rev:false ~start:0 ~sub:"ab" "ab") (Some 0); 884 | eq (String.find_sub ~rev:false ~start:1 ~sub:"ab" "ab") None; 885 | eq (String.find_sub ~rev:false ~start:2 ~sub:"ab" "ab") None; 886 | eq (String.find_sub ~rev:true ~sub:"ab" "ab") (Some 0); 887 | eq (String.find_sub ~rev:true ~start:(-1) ~sub:"ab" "ab") None; 888 | eq (String.find_sub ~rev:true ~start:0 ~sub:"ab" "ab") (Some 0); 889 | eq (String.find_sub ~rev:true ~start:1 ~sub:"ab" "ab") (Some 0); 890 | eq (String.find_sub ~rev:true ~start:2 ~sub:"ab" "ab") (Some 0); 891 | eq (String.find_sub ~rev:true ~start:3 ~sub:"ab" "ab") (Some 0); 892 | eq (String.find_sub ~rev:false ~sub:"ab" "aba") (Some 0); 893 | eq (String.find_sub ~rev:false ~start:(-1) ~sub:"ab" "aba") (Some 0); 894 | eq (String.find_sub ~rev:false ~start:0 ~sub:"ab" "aba") (Some 0); 895 | eq (String.find_sub ~rev:false ~start:1 ~sub:"ab" "aba") None; 896 | eq (String.find_sub ~rev:false ~start:2 ~sub:"ab" "aba") None; 897 | eq (String.find_sub ~rev:false ~start:3 ~sub:"ab" "aba") None; 898 | eq (String.find_sub ~rev:false ~start:4 ~sub:"ab" "aba") None; 899 | eq (String.find_sub ~rev:true ~sub:"ab" "aba") (Some 0); 900 | eq (String.find_sub ~rev:true ~start:(-1) ~sub:"ab" "aba") None; 901 | eq (String.find_sub ~rev:true ~start:0 ~sub:"ab" "aba") (Some 0); 902 | eq (String.find_sub ~rev:true ~start:1 ~sub:"ab" "aba") (Some 0); 903 | eq (String.find_sub ~rev:true ~start:2 ~sub:"ab" "aba") (Some 0); 904 | eq (String.find_sub ~rev:true ~start:3 ~sub:"ab" "aba") (Some 0); 905 | eq (String.find_sub ~rev:true ~start:4 ~sub:"ab" "aba") (Some 0); 906 | eq (String.find_sub ~rev:false ~sub:"ab" "bab") (Some 1); 907 | eq (String.find_sub ~rev:false ~start:(-1) ~sub:"ab" "bab") (Some 1); 908 | eq (String.find_sub ~rev:false ~start:0 ~sub:"ab" "bab") (Some 1); 909 | eq (String.find_sub ~rev:false ~start:1 ~sub:"ab" "bab") (Some 1); 910 | eq (String.find_sub ~rev:false ~start:2 ~sub:"ab" "bab") None; 911 | eq (String.find_sub ~rev:false ~start:3 ~sub:"ab" "bab") None; 912 | eq (String.find_sub ~rev:false ~start:4 ~sub:"ab" "bab") None; 913 | eq (String.find_sub ~rev:true ~sub:"ab" "bab") (Some 1); 914 | eq (String.find_sub ~rev:true ~start:(-1) ~sub:"ab" "bab") None; 915 | eq (String.find_sub ~rev:true ~start:0 ~sub:"ab" "bab") None; 916 | eq (String.find_sub ~rev:true ~start:1 ~sub:"ab" "bab") (Some 1); 917 | eq (String.find_sub ~rev:true ~start:2 ~sub:"ab" "bab") (Some 1); 918 | eq (String.find_sub ~rev:true ~start:3 ~sub:"ab" "bab") (Some 1); 919 | eq (String.find_sub ~rev:true ~start:4 ~sub:"ab" "bab") (Some 1); 920 | eq (String.find_sub ~rev:false ~sub:"ab" "abab") (Some 0); 921 | eq (String.find_sub ~rev:false ~start:(-1) ~sub:"ab" "abab") (Some 0); 922 | eq (String.find_sub ~rev:false ~start:0 ~sub:"ab" "abab") (Some 0); 923 | eq (String.find_sub ~rev:false ~start:1 ~sub:"ab" "abab") (Some 2); 924 | eq (String.find_sub ~rev:false ~start:2 ~sub:"ab" "abab") (Some 2); 925 | eq (String.find_sub ~rev:false ~start:3 ~sub:"ab" "abab") None; 926 | eq (String.find_sub ~rev:false ~start:4 ~sub:"ab" "abab") None; 927 | eq (String.find_sub ~rev:false ~start:5 ~sub:"ab" "abab") None; 928 | eq (String.find_sub ~rev:true ~sub:"ab" "abab") (Some 2); 929 | eq (String.find_sub ~rev:true ~start:(-1) ~sub:"ab" "abab") None; 930 | eq (String.find_sub ~rev:true ~start:0 ~sub:"ab" "abab") (Some 0); 931 | eq (String.find_sub ~rev:true ~start:1 ~sub:"ab" "abab") (Some 0); 932 | eq (String.find_sub ~rev:true ~start:2 ~sub:"ab" "abab") (Some 2); 933 | eq (String.find_sub ~rev:true ~start:3 ~sub:"ab" "abab") (Some 2); 934 | eq (String.find_sub ~rev:true ~start:4 ~sub:"ab" "abab") (Some 2); 935 | eq (String.find_sub ~rev:true ~start:5 ~sub:"ab" "abab") (Some 2); 936 | () 937 | 938 | let filter = test "String.filter[_map]" @@ fun () -> 939 | let no_alloc k f s = eq_bool (k f s == s) true in 940 | no_alloc String.filter (fun _ -> true) ""; 941 | no_alloc String.filter (fun _ -> true) "abcd"; 942 | no_alloc String.filter_map (fun c -> Some c) ""; 943 | no_alloc String.filter_map (fun c -> Some c) "abcd"; 944 | let gen_filter : 945 | 'a. ('a -> string -> string) -> 'a -> unit = 946 | fun filter a -> 947 | no_alloc filter a ""; 948 | no_alloc filter a "a"; 949 | no_alloc filter a "aa"; 950 | no_alloc filter a "aaa"; 951 | eq_str (filter a "ab") "a"; 952 | eq_str (filter a "ba") "a"; 953 | eq_str (filter a "abc") "a"; 954 | eq_str (filter a "bac") "a"; 955 | eq_str (filter a "bca") "a"; 956 | eq_str (filter a "aba") "aa"; 957 | eq_str (filter a "aab") "aa"; 958 | eq_str (filter a "baa") "aa"; 959 | eq_str (filter a "aabc") "aa"; 960 | eq_str (filter a "abac") "aa"; 961 | eq_str (filter a "abca") "aa"; 962 | eq_str (filter a "baca") "aa"; 963 | eq_str (filter a "bcaa") "aa"; 964 | in 965 | gen_filter String.filter (fun c -> c = 'a'); 966 | gen_filter String.filter_map (fun c -> if c = 'a' then Some c else None); 967 | let subst_a = function 'a' -> Some 'z' | c -> Some c in 968 | no_alloc String.filter_map subst_a ""; 969 | no_alloc String.filter_map subst_a "b"; 970 | no_alloc String.filter_map subst_a "bcd"; 971 | eq_str (String.filter_map subst_a "a") "z"; 972 | eq_str (String.filter_map subst_a "aa") "zz"; 973 | eq_str (String.filter_map subst_a "aaa") "zzz"; 974 | eq_str (String.filter_map subst_a "ab") "zb"; 975 | eq_str (String.filter_map subst_a "ba") "bz"; 976 | eq_str (String.filter_map subst_a "abc") "zbc"; 977 | eq_str (String.filter_map subst_a "bac") "bzc"; 978 | eq_str (String.filter_map subst_a "bca") "bcz"; 979 | eq_str (String.filter_map subst_a "aba") "zbz"; 980 | eq_str (String.filter_map subst_a "aab") "zzb"; 981 | eq_str (String.filter_map subst_a "baa") "bzz"; 982 | eq_str (String.filter_map subst_a "aabc") "zzbc"; 983 | eq_str (String.filter_map subst_a "abac") "zbzc"; 984 | eq_str (String.filter_map subst_a "abca") "zbcz"; 985 | eq_str (String.filter_map subst_a "baca") "bzcz"; 986 | eq_str (String.filter_map subst_a "bcaa") "bczz"; 987 | let subst_a_del_b = function 'a' -> Some 'z' | 'b' -> None | c -> Some c in 988 | no_alloc String.filter_map subst_a_del_b ""; 989 | no_alloc String.filter_map subst_a_del_b "c"; 990 | no_alloc String.filter_map subst_a_del_b "cd"; 991 | eq_str (String.filter_map subst_a_del_b "a") "z"; 992 | eq_str (String.filter_map subst_a_del_b "aa") "zz"; 993 | eq_str (String.filter_map subst_a_del_b "aaa") "zzz"; 994 | eq_str (String.filter_map subst_a_del_b "ab") "z"; 995 | eq_str (String.filter_map subst_a_del_b "ba") "z"; 996 | eq_str (String.filter_map subst_a_del_b "abc") "zc"; 997 | eq_str (String.filter_map subst_a_del_b "bac") "zc"; 998 | eq_str (String.filter_map subst_a_del_b "bca") "cz"; 999 | eq_str (String.filter_map subst_a_del_b "aba") "zz"; 1000 | eq_str (String.filter_map subst_a_del_b "aab") "zz"; 1001 | eq_str (String.filter_map subst_a_del_b "baa") "zz"; 1002 | eq_str (String.filter_map subst_a_del_b "aabc") "zzc"; 1003 | eq_str (String.filter_map subst_a_del_b "abac") "zzc"; 1004 | eq_str (String.filter_map subst_a_del_b "abca") "zcz"; 1005 | eq_str (String.filter_map subst_a_del_b "baca") "zcz"; 1006 | eq_str (String.filter_map subst_a_del_b "bcaa") "czz"; 1007 | () 1008 | 1009 | let map = test "String.map[i]" @@ fun () -> 1010 | let next_letter c = Char.(of_byte @@ to_int c + 1) in 1011 | let no_alloc map f s = eq_bool (map f s == s) true in 1012 | no_alloc String.map (fun c -> c) String.empty; 1013 | no_alloc String.map (fun c -> c) "abcd"; 1014 | eq_str (String.map (fun c -> fail "invoked"; c) "") ""; 1015 | eq_str (String.map next_letter "abcd") "bcde"; 1016 | no_alloc String.mapi (fun _ c -> c) String.empty; 1017 | no_alloc String.mapi (fun _ c -> c) "abcd"; 1018 | eq_str (String.mapi (fun _ c -> fail "invoked"; c) "") ""; 1019 | eq_str (String.mapi (fun i c -> Char.(of_byte @@ to_int c + i)) "abcd") 1020 | "aceg"; 1021 | () 1022 | 1023 | let fold = test "String.fold_{left,right}" @@ fun () -> 1024 | let eql = eq_list ~eq:(=) ~pp:pp_char in 1025 | String.fold_left (fun _ _ -> fail "invoked") () ""; 1026 | eql (String.fold_left (fun acc c -> c :: acc) [] "") []; 1027 | eql (String.fold_left (fun acc c -> c :: acc) [] "abc") ['c';'b';'a']; 1028 | String.fold_right (fun _ _ -> fail "invoked") "" (); 1029 | eql (String.fold_right (fun c acc -> c :: acc) "" []) []; 1030 | eql (String.fold_right (fun c acc -> c :: acc) "abc" []) ['a';'b';'c']; 1031 | () 1032 | 1033 | let iter = test "String.iter[i]" @@ fun () -> 1034 | let s = "abcd" in 1035 | String.iter (fun _ -> fail "invoked") ""; 1036 | String.iteri (fun _ _ -> fail "invoked") ""; 1037 | (let i = ref 0 in String.iter (fun c -> eq_char s.[!i] c; incr i) s); 1038 | String.iteri (fun i c -> eq_char s.[i] c) s; 1039 | () 1040 | 1041 | (* Ascii support *) 1042 | 1043 | let ascii_is_valid = test "String.Ascii.is_valid" @@ fun () -> 1044 | eq_bool (String.Ascii.is_valid "") true; 1045 | eq_bool (String.Ascii.is_valid "a") true; 1046 | eq_bool (String.(Ascii.is_valid (v ~len:(0x7F + 1) 1047 | (fun i -> Char.of_byte i)))) true; 1048 | () 1049 | 1050 | let ascii_casing = 1051 | test "String.Ascii.{uppercase,lowercase,capitalize,uncapitalize}" 1052 | @@ fun () -> 1053 | let no_alloc f s = eq_bool (f s == s) true in 1054 | no_alloc String.Ascii.uppercase ""; 1055 | no_alloc String.Ascii.uppercase "HEHEY \x7F\xFF\x00\x0A"; 1056 | eq_str (String.Ascii.uppercase "HeHey \x7F\xFF\x00\x0A") 1057 | "HEHEY \x7F\xFF\x00\x0A"; 1058 | eq_str (String.Ascii.uppercase "abcdefghijklmnopqrstuvwxyz") 1059 | "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; 1060 | no_alloc String.Ascii.lowercase ""; 1061 | no_alloc String.Ascii.lowercase "hehey \x7F\xFF\x00\x0A"; 1062 | eq_str (String.Ascii.lowercase "hEhEY \x7F\xFF\x00\x0A") 1063 | "hehey \x7F\xFF\x00\x0A"; 1064 | eq_str (String.Ascii.lowercase "ABCDEFGHIJKLMNOPQRSTUVWXYZ") 1065 | "abcdefghijklmnopqrstuvwxyz"; 1066 | no_alloc String.Ascii.capitalize ""; 1067 | no_alloc String.Ascii.capitalize "Hehey"; 1068 | no_alloc String.Ascii.capitalize "\x00hehey"; 1069 | eq_str (String.Ascii.capitalize "hehey") "Hehey"; 1070 | no_alloc String.Ascii.uncapitalize ""; 1071 | no_alloc String.Ascii.uncapitalize "hehey"; 1072 | no_alloc String.Ascii.uncapitalize "\x00hehey"; 1073 | eq_str (String.Ascii.uncapitalize "Hehey") "hehey"; 1074 | () 1075 | 1076 | let ascii_escapes = test "String.Ascii.escape[_string]" @@ fun () -> 1077 | let no_alloc s = eq_bool ((String.Ascii.escape s) == s) true in 1078 | no_alloc ""; 1079 | no_alloc "abcd"; 1080 | no_alloc "~"; 1081 | no_alloc " "; 1082 | eq_str (String.Ascii.escape "\x00abc") "\\x00abc"; 1083 | eq_str (String.Ascii.escape "\nabc") "\\x0Aabc"; 1084 | eq_str (String.Ascii.escape "\nab\xFFc") "\\x0Aab\\xFFc"; 1085 | eq_str (String.Ascii.escape "\nab\xFF") "\\x0Aab\\xFF"; 1086 | eq_str (String.Ascii.escape "\nab\\") "\\x0Aab\\\\"; 1087 | eq_str (String.Ascii.escape "\\") "\\\\"; 1088 | eq_str (String.Ascii.escape "\\\x00\x1F\x7F\xFF") "\\\\\\x00\\x1F\\x7F\\xFF"; 1089 | let no_alloc s = 1090 | eq_bool ((String.Ascii.escape_string s) == s) true 1091 | in 1092 | no_alloc ""; 1093 | no_alloc "abcd"; 1094 | no_alloc "~"; 1095 | no_alloc " "; 1096 | eq_str (String.Ascii.escape_string "\x00abc") "\\x00abc"; 1097 | eq_str (String.Ascii.escape_string "\nabc") "\\nabc"; 1098 | eq_str (String.Ascii.escape_string "\nab\xFFc") "\\nab\\xFFc"; 1099 | eq_str (String.Ascii.escape_string "\nab\xFF") "\\nab\\xFF"; 1100 | eq_str (String.Ascii.escape_string "\nab\\") "\\nab\\\\"; 1101 | eq_str (String.Ascii.escape_string "\\") "\\\\"; 1102 | eq_str (String.Ascii.escape_string "\b\t\n\r\"\\\x00\x1F\x7F\xFF") 1103 | "\\b\\t\\n\\r\\\"\\\\\\x00\\x1F\\x7F\\xFF"; 1104 | () 1105 | 1106 | let ascii_unescapes = test "String.Ascii.unescape[_string]" @@ fun () -> 1107 | let no_alloc unescape s = match unescape s with 1108 | | None -> fail "expected (Some %S)" s 1109 | | Some s' -> eq_bool (s == s') true 1110 | in 1111 | let eq_o = eq_option ~eq:String.equal ~pp:pp_str in 1112 | no_alloc String.Ascii.unescape ""; 1113 | no_alloc String.Ascii.unescape "abcd"; 1114 | no_alloc String.Ascii.unescape "~"; 1115 | no_alloc String.Ascii.unescape " "; 1116 | eq_o (String.Ascii.unescape "\\x00abc") (Some "\x00abc"); 1117 | eq_o (String.Ascii.unescape "\\x0Aabc") (Some "\nabc"); 1118 | eq_o (String.Ascii.unescape "\\x0Aab\\xFFc") (Some "\nab\xFFc"); 1119 | eq_o (String.Ascii.unescape "\\x0Aab\\xFF") (Some "\nab\xFF"); 1120 | eq_o (String.Ascii.unescape "\\x0Aab\\\\") (Some "\nab\\"); 1121 | eq_o (String.Ascii.unescape "a\\\\") (Some "a\\"); 1122 | eq_o (String.Ascii.unescape "\\\\") (Some "\\"); 1123 | eq_o (String.Ascii.unescape "a\\\\\\x00\\x1F\\x7F\\xFF") 1124 | (Some "a\\\x00\x1F\x7F\xFF"); 1125 | eq_o (String.Ascii.unescape "\\x61") (Some "a"); 1126 | eq_o (String.Ascii.unescape "\\x20") (Some " "); 1127 | eq_o (String.Ascii.unescape "\\x2") None; 1128 | eq_o (String.Ascii.unescape "\\x") None; 1129 | eq_o (String.Ascii.unescape "\\") None; 1130 | eq_o (String.Ascii.unescape "a\\b") None; 1131 | eq_o (String.Ascii.unescape "a\\t") None; 1132 | eq_o (String.Ascii.unescape "b\\n") None; 1133 | eq_o (String.Ascii.unescape "b\\r") None; 1134 | eq_o (String.Ascii.unescape "b\\\"") None; 1135 | eq_o (String.Ascii.unescape "b\\z") None; 1136 | eq_o (String.Ascii.unescape "b\\1") None; 1137 | no_alloc String.Ascii.unescape_string ""; 1138 | no_alloc String.Ascii.unescape_string "abcd"; 1139 | no_alloc String.Ascii.unescape_string "~"; 1140 | no_alloc String.Ascii.unescape_string " "; 1141 | eq_o (String.Ascii.unescape_string "\\x00abc") (Some "\x00abc"); 1142 | eq_o (String.Ascii.unescape_string "\\nabc") (Some "\nabc"); 1143 | eq_o (String.Ascii.unescape_string "\\nab\\xFFc") (Some "\nab\xFFc"); 1144 | eq_o (String.Ascii.unescape_string "\\nab\\xFF") (Some "\nab\xFF"); 1145 | eq_o (String.Ascii.unescape_string "\\nab\\\\") (Some "\nab\\"); 1146 | eq_o (String.Ascii.unescape_string "a\\\\") (Some "a\\"); 1147 | eq_o (String.Ascii.unescape_string "\\\\") (Some "\\"); 1148 | eq_o (String.Ascii.unescape_string 1149 | "\\b\\t\\n\\r\\\"\\\\\\x00\\x1F\\x7F\\xFF") 1150 | (Some "\b\t\n\r\"\\\x00\x1F\x7F\xFF"); 1151 | eq_o (String.Ascii.unescape_string "\\x61") (Some "a"); 1152 | eq_o (String.Ascii.unescape_string "\\x20") (Some " "); 1153 | eq_o (String.Ascii.unescape_string "\\x2") None; 1154 | eq_o (String.Ascii.unescape_string "\\x") None; 1155 | eq_o (String.Ascii.unescape_string "\\") None; 1156 | eq_o (String.Ascii.unescape_string "a\\b") (Some "a\b"); 1157 | eq_o (String.Ascii.unescape_string "a\\t") (Some "a\t"); 1158 | eq_o (String.Ascii.unescape_string "b\\n") (Some "b\n"); 1159 | eq_o (String.Ascii.unescape_string "b\\r") (Some "b\r"); 1160 | eq_o (String.Ascii.unescape_string "b\\\"") (Some "b\""); 1161 | eq_o (String.Ascii.unescape_string "b\\\'") (Some "b'"); 1162 | eq_o (String.Ascii.unescape_string "b\\z") None; 1163 | eq_o (String.Ascii.unescape_string "b\\1") None; 1164 | () 1165 | 1166 | (* Uniqueness *) 1167 | 1168 | let uniquify = test "String.uniquify" @@ fun () -> 1169 | let eq = eq_list ~eq:(=) ~pp:pp_str in 1170 | eq (String.uniquify []) []; 1171 | eq (String.uniquify ["a";"b";"c"]) ["a";"b";"c"]; 1172 | eq (String.uniquify ["a";"a";"b";"c"]) ["a";"b";"c"]; 1173 | eq (String.uniquify ["a";"b";"a";"c"]) ["a";"b";"c"]; 1174 | eq (String.uniquify ["a";"b";"c";"a"]) ["a";"b";"c"]; 1175 | eq (String.uniquify ["b";"a";"b";"c"]) ["b";"a";"c"]; 1176 | eq (String.uniquify ["a";"b";"b";"c"]) ["a";"b";"c"]; 1177 | eq (String.uniquify ["a";"b";"c";"b"]) ["a";"b";"c"]; 1178 | () 1179 | 1180 | let suite = suite "String functions" 1181 | [ misc; 1182 | head; 1183 | append; 1184 | concat; 1185 | is_empty; 1186 | is_prefix; 1187 | is_infix; 1188 | is_suffix; 1189 | for_all; 1190 | exists; 1191 | equal; 1192 | compare; 1193 | with_range; 1194 | with_index_range; 1195 | trim; 1196 | span; 1197 | cut; 1198 | cuts; 1199 | fields; 1200 | find; 1201 | find_sub; 1202 | filter; 1203 | map; 1204 | iter; 1205 | fold; 1206 | ascii_is_valid; 1207 | ascii_casing; 1208 | ascii_escapes; 1209 | ascii_unescapes; 1210 | uniquify; ] 1211 | 1212 | (*--------------------------------------------------------------------------- 1213 | Copyright (c) 2015 The astring programmers 1214 | 1215 | Permission to use, copy, modify, and/or distribute this software for any 1216 | purpose with or without fee is hereby granted, provided that the above 1217 | copyright notice and this permission notice appear in all copies. 1218 | 1219 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 1220 | WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 1221 | MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 1222 | ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 1223 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 1224 | ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 1225 | OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 1226 | ---------------------------------------------------------------------------*) 1227 | --------------------------------------------------------------------------------