├── .gitignore ├── .header ├── .merlin ├── .ocamlinit ├── .travis.yml ├── LICENSE ├── Makefile ├── README.md ├── bigstring-unix.opam ├── bigstring.opam ├── dune-project ├── src ├── bigstring.ml ├── bigstring.mli ├── bigstring_unix.ml ├── bigstring_unix.mli ├── bigstring_unix_stubs.c ├── configure.ml └── dune └── test ├── dune └── test.ml /.gitignore: -------------------------------------------------------------------------------- 1 | _build/ 2 | *.install 3 | .merlin 4 | 5 | # qtest 6 | qtest* 7 | -------------------------------------------------------------------------------- /.header: -------------------------------------------------------------------------------- 1 | (* This file is free software, copyright Simon Cruanes. See file "LICENSE" for more details. *) 2 | -------------------------------------------------------------------------------- /.merlin: -------------------------------------------------------------------------------- 1 | S src 2 | B _build/** 3 | PKG bigarray 4 | PKG bytes 5 | FLG -w +a-4-44 6 | -------------------------------------------------------------------------------- /.ocamlinit: -------------------------------------------------------------------------------- 1 | #use "topfind";; 2 | #require "bigarray";; 3 | #directory "_build/src";; 4 | #load "bigstring.cma";; 5 | #install_printer Bigstring.print;; 6 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: c 2 | install: wget https://raw.githubusercontent.com/ocaml/ocaml-ci-scripts/master/.travis-docker.sh 3 | script: bash -ex .travis-docker.sh 4 | services: 5 | - docker 6 | env: 7 | global: 8 | - PINS="bigstring:. bigstring-unix:." 9 | - DISTRO="ubuntu-16.04" 10 | matrix: 11 | #- PACKAGE="bigstring" OCAML_VERSION="4.02.3" DEPOPTS="base-unix" 12 | - PACKAGE="bigstring" OCAML_VERSION="4.03" 13 | - PACKAGE="bigstring" OCAML_VERSION="4.04" 14 | - PACKAGE="bigstring" OCAML_VERSION="4.05" 15 | - PACKAGE="bigstring" OCAML_VERSION="4.06" 16 | - PACKAGE="bigstring" OCAML_VERSION="4.08" 17 | - PACKAGE="bigstring" OCAML_VERSION="4.09" 18 | - PACKAGE="bigstring-unix" OCAML_VERSION="4.03" 19 | - PACKAGE="bigstring-unix" OCAML_VERSION="4.04" 20 | - PACKAGE="bigstring-unix" OCAML_VERSION="4.05" 21 | - PACKAGE="bigstring-unix" OCAML_VERSION="4.06" 22 | - PACKAGE="bigstring-unix" OCAML_VERSION="4.08" 23 | - PACKAGE="bigstring-unix" OCAML_VERSION="4.09" 24 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016, Simon Cruanes 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 15 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 17 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 18 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 20 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 21 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 22 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | all: build test 2 | 3 | build: 4 | @dune build @install 5 | 6 | test: 7 | @dune runtest 8 | 9 | clean: 10 | @dune clean 11 | 12 | .PHONY: clean all test build 13 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Bigstring [![Build Status](https://travis-ci.org/c-cube/ocaml-bigstring.svg?branch=master)](https://travis-ci.org/c-cube/ocaml-bigstring) 2 | 3 | **NOTE**: I recommend using [bigstringaf](https://github.com/inhabitedtype/bigstringaf) now. It has fast operations written in C and is better maintained. 4 | 5 | A set of utils for dealing with `bigarrays` of `char` as if they were proper 6 | OCaml strings. 7 | 8 | ## Usage 9 | 10 | ```ocaml 11 | #require "bigstring";; 12 | #install_printer Bigstring.print;; 13 | module B = Bigstring;; 14 | 15 | # let s1 = B.of_string " abcd ";; 16 | val s1 : B.t = " abcd " 17 | 18 | # let s2 = B.trim s1;; 19 | val s2 : B.t = "abcd" 20 | 21 | # B.index ~c:'b' s2 ;; 22 | - : int = 1 23 | 24 | # let str = "__";; 25 | val str : string = "__" 26 | 27 | # B.blit_of_string str 0 s2 1 2;; 28 | - : unit = () 29 | 30 | # s2;; 31 | - : B.t = "a__d" 32 | ``` 33 | 34 | ## Documentation 35 | 36 | http://c-cube.github.io/ocaml-bigstring/ 37 | 38 | ## License 39 | 40 | This code is free, under the BSD license. 41 | -------------------------------------------------------------------------------- /bigstring-unix.opam: -------------------------------------------------------------------------------- 1 | opam-version: "2.0" 2 | name: "bigstring-unix" 3 | version: "0.3" 4 | authors: "Simon Cruanes " 5 | maintainer: "Simon Cruanes " 6 | synopsis: "I/O functions for bigstrings using file descriptors and memory-maps" 7 | tags: [ "bigstring" "bigarray" ] 8 | homepage: "https://github.com/c-cube/ocaml-bigstring/" 9 | bug-reports: "https://github.com/c-cube/ocaml-bigstring/issues" 10 | dev-repo: "git://github.com/c-cube/ocaml-bigstring.git" 11 | build: [ 12 | [ "dune" "build" "-p" name "-j" jobs ] 13 | [ "dune" "runtest" "-p" name "-j" jobs ] {with-test} 14 | ] 15 | depends: [ 16 | "dune" {>= "1.2"} 17 | "base-bigarray" 18 | "base-unix" 19 | "ocaml" {>= "4.03.0"} 20 | "alcotest" {with-test} 21 | "bigstring" {with-test} 22 | ] 23 | 24 | -------------------------------------------------------------------------------- /bigstring.opam: -------------------------------------------------------------------------------- 1 | opam-version: "2.0" 2 | name: "bigstring" 3 | version: "0.3" 4 | synopsis: "A set of utils for dealing with `bigarrays` of `char`" 5 | authors: "Simon Cruanes " 6 | maintainer: "Simon Cruanes " 7 | tags: [ "bigstring" "bigarray" ] 8 | homepage: "https://github.com/c-cube/ocaml-bigstring/" 9 | bug-reports: "https://github.com/c-cube/ocaml-bigstring/issues" 10 | dev-repo: "git://github.com/c-cube/ocaml-bigstring.git" 11 | build: [ 12 | [ "dune" "build" "-p" name "-j" jobs ] 13 | [ "dune" "runtest" "-p" name "-j" jobs ] {with-test} 14 | ] 15 | depends: [ 16 | "dune" {>= "1.2"} 17 | "base-bigarray" 18 | "base-bytes" 19 | "ocaml" {>= "4.03.0"} 20 | "alcotest" {with-test} 21 | "bigstring-unix" {with-test} 22 | ] 23 | 24 | -------------------------------------------------------------------------------- /dune-project: -------------------------------------------------------------------------------- 1 | (lang dune 1.2) 2 | (name bigstring) 3 | -------------------------------------------------------------------------------- /src/bigstring.ml: -------------------------------------------------------------------------------- 1 | 2 | (* This file is free software, copyright Simon Cruanes. See file "LICENSE" for more details. *) 3 | 4 | (** {1 Interface to 1-dimension Bigarrays of bytes (char)} *) 5 | 6 | type 'a gen = unit -> 'a option 7 | type 'a sequence = ('a -> unit) -> unit 8 | type 'a printer = Format.formatter -> 'a -> unit 9 | 10 | module B = Bigarray.Array1 11 | 12 | type t = (char, Bigarray.int8_unsigned_elt, Bigarray.c_layout) Bigarray.Array1.t 13 | 14 | let create size = B.create Bigarray.char Bigarray.c_layout size 15 | 16 | let empty = create 0 17 | 18 | let make sz c = 19 | let buf = create sz in 20 | B.fill buf c; 21 | buf 22 | 23 | let init size f = 24 | let a = create size in 25 | for i = 0 to size-1 do 26 | B.unsafe_set a i (f i) 27 | done; 28 | a 29 | 30 | let fill = B.fill 31 | 32 | let fill_slice s c i len = fill (B.sub s i len) c 33 | 34 | let get = B.get 35 | 36 | let unsafe_get = B.unsafe_get 37 | 38 | let set = B.set 39 | 40 | let unsafe_set = B.unsafe_set 41 | 42 | let size = B.dim 43 | let length = B.dim 44 | 45 | let sub = B.sub 46 | 47 | let blit a i b j len = 48 | let a' = sub a i len in 49 | let b' = sub b j len in 50 | B.blit a' b' 51 | 52 | let copy a = 53 | let b = create (size a) in 54 | B.blit a b; 55 | b 56 | 57 | (*$T 58 | copy (of_string "abcd") |> to_string = "abcd" 59 | *) 60 | 61 | let fold f acc a = 62 | let rec aux f acc a i len = 63 | if i = len then acc 64 | else 65 | let acc = f acc (get a i) in 66 | aux f acc a (i+1) len 67 | in 68 | aux f acc a 0 (size a) 69 | 70 | let foldi f acc a = 71 | let rec aux f acc a i len = 72 | if i = len then acc 73 | else 74 | let acc = f acc i (get a i) in 75 | aux f acc a (i+1) len 76 | in 77 | aux f acc a 0 (size a) 78 | 79 | let iter f a = 80 | let n = size a in 81 | for i = 0 to n-1 do 82 | f (B.unsafe_get a i) 83 | done 84 | 85 | let iteri f a = 86 | let n = size a in 87 | for i = 0 to n-1 do 88 | f i (B.unsafe_get a i) 89 | done 90 | 91 | let rec equal_rec a b i len = 92 | i = len 93 | || 94 | ( get a i = get b i && equal_rec a b (i+1) len) 95 | 96 | let equal a b = 97 | size a = size b 98 | && 99 | equal_rec a b 0 (size a) 100 | 101 | (*$Q 102 | Q.(pair printable_string printable_string) (fun (s1, s2) -> \ 103 | let a1 = of_string s1 and a2 = of_string s2 in \ 104 | equal a1 a2 = (s1 = s2)) 105 | *) 106 | 107 | let rec compare_rec a b i len_a len_b = 108 | if i=len_a && i=len_b then 0 109 | else if i=len_a then -1 110 | else if i=len_b then 1 111 | else 112 | match Char.compare (get a i) (get b i) with 113 | | 0 -> compare_rec a b (i+1) len_a len_b 114 | | n -> n 115 | 116 | let compare a b = 117 | compare_rec a b 0 (size a) (size b) 118 | 119 | (*$T 120 | compare (of_string "abc") (of_string "abd") < 0 121 | compare (of_string "abc") (of_string "abcd") < 0 122 | compare (of_string "abcd") (of_string "abc") > 0 123 | compare (of_string "abc") (of_string "b") < 0 124 | *) 125 | 126 | (*$Q 127 | Q.(pair string string) (fun (s1, s2) -> \ 128 | let a1 = of_string s1 and a2 = of_string s2 in \ 129 | (compare a1 a2 <= 0) = (String.compare s1 s2 <= 0)) 130 | *) 131 | 132 | (** {2 Conversions} *) 133 | 134 | let to_bytes a = 135 | Bytes.init (size a) (fun i -> B.unsafe_get a i) 136 | 137 | let of_bytes b = 138 | init (Bytes.length b) (fun i -> Bytes.get b i) 139 | 140 | let of_bytes_slice b i len = 141 | if i < 0 || i+len > Bytes.length b then invalid_arg "Bigstring.of_bytes"; 142 | init len (fun j -> Bytes.get b (i+j)) 143 | 144 | let sub_bytes a i len = 145 | if i < 0 || i+len > size a then invalid_arg "Bigstring.sub_bytes"; 146 | Bytes.init len (fun j -> B.get a (i+j)) 147 | 148 | let blit_to_bytes a i b j len = 149 | if i < 0 || j < 0 || i+len > size a || j+len > Bytes.length b 150 | then invalid_arg "Bigstring.blit_to_bytes"; 151 | for x=0 to len-1 do 152 | Bytes.set b (j+x) (B.get a (i+x)) 153 | done 154 | 155 | let blit_of_bytes a i b j len = 156 | if i < 0 || j < 0 || i+len > Bytes.length a || j+len > size b 157 | then invalid_arg "Bigstring.blit_of_bytes"; 158 | for x=0 to len-1 do 159 | B.set b (j+x) (Bytes.get a (i+x)) 160 | done 161 | 162 | (* naive replacement for {!String.init}, which is only available after 4.02 *) 163 | let str_init_ n f = 164 | let bytes = Bytes.init n f in 165 | Bytes.unsafe_to_string bytes 166 | 167 | let to_string a = 168 | str_init_ (size a) (fun i -> B.unsafe_get a i) 169 | 170 | let of_string s = 171 | init (String.length s) (fun i -> String.get s i) 172 | 173 | let of_string_slice s i len = 174 | if i < 0 || i+len > String.length s then invalid_arg "Bigstring.of_string_slice"; 175 | init len (fun j -> String.get s (i+j)) 176 | 177 | let of_buffer b = 178 | let len = Buffer.length b in 179 | init len (Buffer.nth b) 180 | 181 | let of_gen g = 182 | (* read [g] into some buffer *) 183 | let rec aux_ b g = match g() with 184 | | None -> () 185 | | Some c -> Buffer.add_char b c; aux_ b g 186 | in 187 | let b = Buffer.create 64 in 188 | aux_ b g; 189 | of_buffer b 190 | 191 | let sub_string a i len = 192 | if i < 0 || i+len > size a then invalid_arg "Bigstring.sub_string"; 193 | str_init_ len (fun j -> B.get a (i+j)) 194 | 195 | (*$T 196 | of_string_slice "abcde" 1 3 |> to_string = "bcd" 197 | *) 198 | 199 | let blit_of_string a i b j len = 200 | if i < 0 || j < 0 || i+len > String.length a || j+len > size b 201 | then invalid_arg "Bigstring.blit_of_string"; 202 | for x=0 to len-1 do 203 | B.set b (j+x) (String.get a (i+x)) 204 | done 205 | 206 | let blit_of_buffer buf i s j len = 207 | if i < 0 || j < 0 || i+len > Buffer.length buf || j+len > size s 208 | then invalid_arg "Bigstring.blit_of_buffer"; 209 | for x=0 to len-1 do 210 | B.set s (j+x) (Buffer.nth buf (i+x)) 211 | done 212 | 213 | let to_seq a k = iter k a 214 | 215 | let to_gen a = 216 | let i = ref 0 in 217 | let n = size a in 218 | fun () -> 219 | if !i = n then None 220 | else ( 221 | let x = get a !i in 222 | incr i; 223 | Some x 224 | ) 225 | 226 | (*$T 227 | of_string "abcd" |> to_gen |> of_gen |> to_string = "abcd" 228 | *) 229 | 230 | let to_seq_slice a i len = 231 | to_seq (sub a i len) 232 | 233 | let to_gen_slice a i len = 234 | to_gen (sub a i len) 235 | 236 | let to_buffer s buf = iter (Buffer.add_char buf) s 237 | 238 | let print out s = 239 | Format.pp_print_char out '"'; 240 | iter 241 | (function 242 | | '\n' -> Format.pp_print_string out "\\n" 243 | | '\t' -> Format.pp_print_string out "\\t" 244 | | '\\' -> Format.pp_print_string out "\\\\" 245 | | '\000' -> Format.pp_print_string out "\\000" 246 | | c -> Format.pp_print_char out c) 247 | s; 248 | Format.pp_print_char out '"' 249 | 250 | (*$= & ~printer:(fun s->s) 251 | (Format.asprintf "%a" print (create 3)) "\"\\000\\000\\000\"" 252 | (Format.asprintf "%a" print (init 3 (fun i->Char.chr (i+65)))) "\"ABC\"" 253 | *) 254 | 255 | (** {2 Utils} *) 256 | 257 | let concat sep l = 258 | let len_sep = String.length sep in 259 | (* compute length of result *) 260 | let len = 261 | List.fold_left 262 | (fun n s -> 263 | let n = if n>0 then n+len_sep else n in (* add length of separator *) 264 | n + length s) 265 | 0 l 266 | in 267 | (* allocate result *) 268 | let res = create len in 269 | let i = ref 0 in 270 | let j = ref 0 in 271 | List.iter 272 | (fun s -> 273 | if !j > 0 then ( 274 | blit_of_string sep 0 res !i len_sep; 275 | i := !i + len_sep 276 | ); 277 | incr j; 278 | blit s 0 res !i (length s); 279 | i := !i + length s) 280 | l; 281 | assert (!i = len); 282 | res 283 | 284 | (*$T 285 | concat ";" [of_string "ab"; of_string "cd"; of_string "ef"] |> to_string = "ab;cd;ef" 286 | concat "yolo" [] |> to_string = "" 287 | concat "" [of_string "a"; of_string "bc"; of_string ""; of_string "d"] |> to_string = "abcd" 288 | *) 289 | 290 | let map ~f s = init (length s) (fun i -> f (unsafe_get s i)) 291 | 292 | let mapi ~f s = init (length s) (fun i -> f i (unsafe_get s i)) 293 | 294 | let lowercase s = map ~f:Char.lowercase_ascii s 295 | 296 | let uppercase s = map ~f:Char.uppercase_ascii s 297 | 298 | let index_pred ~f s = 299 | let rec aux f s i = 300 | if i=size s then raise Not_found 301 | else if f (unsafe_get s i) then i 302 | else aux f s (i+1) 303 | in 304 | aux f s 0 305 | 306 | let rindex_pred ~f s = 307 | let rec aux f s i = 308 | if i= ~-1 then raise Not_found 309 | else if f (unsafe_get s i) then i 310 | else aux f s (i-1) 311 | in 312 | aux f s (size s-1) 313 | 314 | let index s ~c = index_pred s ~f:(fun c' -> c=c') 315 | 316 | (*$T 317 | index (of_string "abcdabcd") ~c:'a' = 0 318 | index (of_string "abcdabcd") ~c:'c' = 2 319 | try ignore (index (of_string "abcdabcd") ~c:'e'); false with Not_found -> true 320 | *) 321 | 322 | let rindex s ~c = rindex_pred s ~f:(fun c' -> c=c') 323 | 324 | (*$T 325 | rindex (of_string "abcdabcd") ~c:'a' = 4 326 | rindex (of_string "abcdabcd") ~c:'c' = 6 327 | try ignore (rindex (of_string "abcdabcd") ~c:'e'); false with Not_found -> true 328 | *) 329 | 330 | let contains s ~c = 331 | try ignore (index s ~c); true 332 | with Not_found -> false 333 | 334 | (*$T 335 | of_string "abcd" |> contains ~c:'a' 336 | of_string "abcd" |> contains ~c:'c' 337 | not (of_string "abcd" |> contains ~c:'e') 338 | *) 339 | 340 | let is_not_white_ = function ' ' | '\t' | '\r' | '\n' | '\012' -> false | _ -> true 341 | 342 | let trim s = 343 | try 344 | let i = index_pred s ~f:is_not_white_ in 345 | let j = rindex_pred s ~f:is_not_white_ in 346 | assert (i empty 350 | 351 | (*$T 352 | trim empty |> to_string = "" 353 | of_string " \t\n\r\012 " |> trim |> to_string = "" 354 | of_string " hello world \n" |> trim |> to_string = "hello world" 355 | of_string "hello world \n" |> trim |> to_string = "hello world" 356 | of_string " hello world" |> trim |> to_string = "hello world" 357 | *) 358 | 359 | let for_all ~f s = 360 | try ignore (index_pred s ~f:(fun c -> not (f c))); false 361 | with Not_found -> true 362 | 363 | let exists ~f s = 364 | try ignore (index_pred s ~f); true 365 | with Not_found -> false 366 | 367 | let split_gen ~by s = 368 | let stop = ref false in 369 | let cur_sub = ref s in (* suffix slice of [s] *) 370 | (* line generator *) 371 | let g() = 372 | if !stop then None 373 | else 374 | try 375 | let i = index ~c:by !cur_sub in 376 | let slice = B.sub !cur_sub 0 i in 377 | cur_sub := B.sub !cur_sub (i+1) (size !cur_sub - i-1); 378 | Some slice 379 | with Not_found -> 380 | stop := true; 381 | if size !cur_sub > 0 then Some !cur_sub else None 382 | in 383 | g 384 | 385 | let split ~by s = 386 | let rec gen_to_list acc g = match g() with 387 | | None -> List.rev acc 388 | | Some x -> gen_to_list (x::acc) g 389 | in 390 | gen_to_list [] (split_gen ~by s) 391 | 392 | let lines_gen s = split_gen ~by:'\n' s 393 | let lines s = split ~by:'\n' s 394 | 395 | (*$T 396 | empty |> lines = [] 397 | of_string "ab\ncde\nfg\nh" |> lines |> List.map to_string = ["ab"; "cde"; "fg"; "h"] 398 | *) 399 | -------------------------------------------------------------------------------- /src/bigstring.mli: -------------------------------------------------------------------------------- 1 | 2 | (* This file is free software, copyright Simon Cruanes. See file "LICENSE" for more details. *) 3 | 4 | type 'a gen = unit -> 'a option 5 | type 'a sequence = ('a -> unit) -> unit 6 | type 'a printer = Format.formatter -> 'a -> unit 7 | 8 | (** {1 Interface to 1-dimension Bigarrays of bytes (char)} 9 | 10 | A "bigstring" here is simply a bigarray of chars. It can be used instead 11 | of regular strings when IO involve calling C (or another language), 12 | when very large strings are required, or for memory-mapping. *) 13 | 14 | type t = (char, Bigarray.int8_unsigned_elt, Bigarray.c_layout) Bigarray.Array1.t 15 | 16 | val create : int -> t 17 | (** Create a new bigstring of the given size. Content is arbitrary. *) 18 | 19 | val make : int -> char -> t 20 | (** Create a new bigstring of the given size filled with [c]. *) 21 | 22 | val empty : t 23 | (** Empty string *) 24 | 25 | val init : int -> (int -> char) -> t 26 | (** Initialize with the given function (called at every index). 27 | [init n f] is the string [f 0, f 1, ..., f (n-1)]. *) 28 | 29 | val fill : t -> char -> unit 30 | (** Fill the string with the given byte. *) 31 | 32 | val fill_slice : t -> char -> int -> int -> unit 33 | (** [fill_slice s c i len] is the same as [fill (sub s i len) c], it 34 | fills the slice from [i] to [i+len-1] of [s] with the char [c] *) 35 | 36 | val size : t -> int 37 | (** Number of bytes *) 38 | 39 | val length : t -> int 40 | (** Alias for {!size}. *) 41 | 42 | val get : t -> int -> char 43 | (** Obtain the byte at the given index. 44 | @raise Invalid_argument if the index is invalid *) 45 | 46 | val unsafe_get : t -> int -> char 47 | (** Same as {!get}, but without bound check. Can fail arbitrarily (including 48 | segfault) if used improperly. *) 49 | 50 | val set : t -> int -> char -> unit 51 | (** Change the byte at the given index. 52 | @raise Invalid_argument if the index is invalid *) 53 | 54 | val unsafe_set : t -> int -> char -> unit 55 | (** Same as {!set}, but without bound check. Can fail arbitrarily (including 56 | segfault) if used improperly. *) 57 | 58 | val blit : t -> int -> t -> int -> int -> unit 59 | (** Blit a slice of the bigstring into another. 60 | [blit s1 i1 s2 i2 len] means that elements from [s1] whose indices 61 | range from [i1] to [i1+len-1] are copied into the slots of [s2] 62 | whose indices range from [i2] to [i2+len-1]. This is similar to 63 | {!String.blit} or {!Bytes.blit} or {!Array.blit}. *) 64 | 65 | val copy : t -> t 66 | (** Copy of the string *) 67 | 68 | val sub : t -> int -> int -> t 69 | (** [sub s i len] takes a slice of length [len] from the string [s], starting 70 | at offset [i]. The slice shares the same memory as [s], meaning that 71 | modifications of the slice will modify [s] as well. 72 | Slicing is cheap since it does not involve copying the whole range. 73 | @raise Invalid_argument if [i, len] doesn't designate a valid substring *) 74 | 75 | val fold : ('a -> char -> 'a) -> 'a -> t -> 'a 76 | (** Fold on every char in increasing order *) 77 | 78 | val foldi : ('a -> int -> char -> 'a) -> 'a -> t -> 'a 79 | (** Fold on every char in increasing order *) 80 | 81 | val iter : (char -> unit) -> t -> unit 82 | (** Iterate on every char in increasing order *) 83 | 84 | val iteri : (int -> char -> unit) -> t -> unit 85 | (** Iterate on every char in increasing order *) 86 | 87 | val equal : t -> t -> bool 88 | (** Equality of content. *) 89 | 90 | val compare : t -> t -> int 91 | (** Lexicographic order *) 92 | 93 | (** {2 Conversions} *) 94 | 95 | val to_bytes : t -> Bytes.t 96 | 97 | val of_bytes : Bytes.t -> t 98 | 99 | val of_bytes_slice : Bytes.t -> int -> int -> t 100 | 101 | val sub_bytes : t -> int -> int -> Bytes.t 102 | 103 | val blit_to_bytes : t -> int -> Bytes.t -> int -> int -> unit 104 | 105 | val blit_of_bytes : Bytes.t -> int -> t -> int -> int -> unit 106 | 107 | val blit_of_buffer : Buffer.t -> int -> t -> int -> int -> unit 108 | 109 | val to_string : t -> string 110 | 111 | val of_string : string -> t 112 | 113 | val of_string_slice : string -> int -> int -> t 114 | 115 | val of_buffer : Buffer.t -> t 116 | (** [of_buffer b] creates a string that contains the same as [b] *) 117 | 118 | val of_gen : char gen -> t 119 | 120 | val sub_string : t -> int -> int -> string 121 | 122 | val blit_of_string : string -> int -> t -> int -> int -> unit 123 | 124 | val to_seq : t -> char sequence 125 | 126 | val to_gen : t -> char gen 127 | 128 | val to_seq_slice : t -> int -> int -> char sequence 129 | 130 | val to_gen_slice : t -> int -> int -> char gen 131 | 132 | val to_buffer : t -> Buffer.t -> unit 133 | (** Add the whole string at the end of the buffer *) 134 | 135 | val print : t printer 136 | (** Pretty-print the string into a formatter, surrounded with '"' *) 137 | 138 | (** {2 Utils} *) 139 | 140 | val concat : string -> t list -> t 141 | (** [concat set l] concatenates the list [l], inserting [sep] between 142 | each pair of string. *) 143 | 144 | val map : f:(char -> char) -> t -> t 145 | 146 | val mapi : f:(int -> char -> char) -> t -> t 147 | 148 | val lowercase : t -> t 149 | (** Copy of the string with all characters in lowercase (see {!Char.lowercase}) *) 150 | 151 | val uppercase : t -> t 152 | (** Copy of the string with all characters in uppercase (see {!Char.uppercase}) *) 153 | 154 | val trim : t -> t 155 | (** [trim s] returns a slice of [s] without the leading and trailing 156 | whitespaces, where whitespaces are defined identically to {!String.trim}. 157 | note that it does not copy the substring, but returns a slice! 158 | @return a slice of [s], or empty if [s] is totally composed of whitespaces *) 159 | 160 | val index : t -> c:char -> int 161 | (** [index s ~c] returns the index of the first 162 | occurrence of character [c] in string [s]. 163 | @raise Not_found if [c] does not occurr in [s] *) 164 | 165 | val rindex : t -> c:char -> int 166 | (** [rindex s ~c] returns the index of the last 167 | occurrence of character [c] in string [s]. 168 | @raise Not_found if [c] does not occurr in [s] *) 169 | 170 | val index_pred : f:(char -> bool) -> t -> int 171 | (** [index_pred ~f s] returns the index of the first char in [s] that 172 | satisfies [s]. 173 | @raise Not_found if no character in [s] satisfies [p] *) 174 | 175 | val rindex_pred : f:(char -> bool) -> t -> int 176 | (** [rindex_pred ~f s] returns the index of the last char in [s] that 177 | satisfies [s]. 178 | @raise Not_found if no character in [s] satisfies [p] *) 179 | 180 | val contains : t -> c:char -> bool 181 | (** [String.contains s c] tests if character [c] appears in the string [s]. *) 182 | 183 | val for_all : f:(char -> bool) -> t -> bool 184 | (** True for all chars? *) 185 | 186 | val exists : f:(char -> bool) -> t -> bool 187 | (** True for some char? *) 188 | 189 | val split : by:char -> t -> t list 190 | (** [split s ~by] splits [s] along the occurrences of [by]. *) 191 | 192 | val split_gen : by:char -> t -> t gen 193 | (** Same as {!split} but returns a generator *) 194 | 195 | val lines : t -> t list 196 | (** [lines s] returns a list of the lines of [s] (splits along '\n') *) 197 | 198 | val lines_gen : t -> t gen 199 | (** [lines_gen s] returns a generator of the lines of [s] (splits along '\n') 200 | where every line is a slice of [s] *) 201 | -------------------------------------------------------------------------------- /src/bigstring_unix.ml: -------------------------------------------------------------------------------- 1 | 2 | (* This file is free software, copyright Simon Cruanes. See file "LICENSE" for more details. *) 3 | 4 | type t = (char, Bigarray.int8_unsigned_elt, Bigarray.c_layout) Bigarray.Array1.t 5 | 6 | (** {2 I/O} *) 7 | 8 | let get_bounds name ?(off=0) ?len t = 9 | let buffer_len = Bigarray.Array1.dim t in 10 | let len = match len with 11 | | Some len -> len 12 | | None -> buffer_len - off 13 | in 14 | if len < 0 || off < 0 || buffer_len - off < len 15 | then invalid_arg ("Bigstring_unix." ^ name) 16 | else (off, len) 17 | 18 | external read_fd : Unix.file_descr -> t -> int -> int -> int = "ocaml_bigstring_unix_read" 19 | external write_fd : Unix.file_descr -> t -> int -> int -> int = "ocaml_bigstring_unix_write" 20 | 21 | let read fd ?off ?len t = 22 | let off, len = get_bounds "read" ?off ?len t in 23 | read_fd fd t off len 24 | and write fd ?off ?len t = 25 | let off, len = get_bounds "write" ?off ?len t in 26 | write_fd fd t off len 27 | 28 | 29 | (** {2 Memory-map} *) 30 | 31 | let map_file_descr ?pos ?(shared=false) fd len = 32 | Bigarray.array1_of_genarray @@ 33 | Bigstring_compat.map_file fd ?pos Bigarray.char Bigarray.c_layout shared [|len|] 34 | 35 | let with_map_file ?pos ?len ?(mode=0o644) ?(flags=[Open_rdonly]) ?shared name f = 36 | let ic = open_in_gen flags mode name in 37 | let len = match len with 38 | | None -> in_channel_length ic 39 | | Some n -> n 40 | in 41 | let a = map_file_descr ?pos ?shared (Unix.descr_of_in_channel ic) len in 42 | try 43 | let x = f a in 44 | close_in ic; 45 | x 46 | with e -> 47 | close_in ic; 48 | raise e 49 | -------------------------------------------------------------------------------- /src/bigstring_unix.mli: -------------------------------------------------------------------------------- 1 | 2 | (* This file is free software, copyright Simon Cruanes. See file "LICENSE" for more details. *) 3 | 4 | type t = (char, Bigarray.int8_unsigned_elt, Bigarray.c_layout) Bigarray.Array1.t 5 | (** This is equivalent to [Bigstring.t]. It is redifined here to avoid depending 6 | on [Bigstring]. 7 | @since 0.3 *) 8 | 9 | (** {2 I/O} *) 10 | 11 | (** These I/O functions are missing from the Bigarray library. 12 | They release the runtime during I/O. *) 13 | 14 | val read : Unix.file_descr -> ?off:int -> ?len:int -> t -> int 15 | val write : Unix.file_descr -> ?off:int -> ?len:int -> t -> int 16 | 17 | 18 | (** {2 Memory-map} *) 19 | 20 | val with_map_file : 21 | ?pos:int64 -> ?len:int -> ?mode:int -> ?flags:open_flag list -> ?shared:bool -> 22 | string -> (t -> 'a) -> 'a 23 | (** [with_map_file name f] maps the file into memory, opening it, and 24 | call [f] with a slice [pos.... pos+len] of the bytes of the file 25 | where [len] is the length of the file if not provided. 26 | When [f] returns, the file is closed. 27 | @param pos offset in the file (default 0) 28 | @param shared if true, modifications are shared between processes that 29 | have mapped this file (requires the filedescr to be open in write mode). 30 | @param mode the mode for the file, if it's created 31 | @param flags opening flags (default rdonly) 32 | see {!Bigarray.Array1.map_file} for more details *) 33 | 34 | val map_file_descr : ?pos:int64 -> ?shared:bool -> Unix.file_descr -> int -> t 35 | (** [map_file_descr descr len] is a lower-level access to an underlying file descriptor. 36 | @param shared if true, modifications are shared between processes that 37 | have mapped this file (requires the filedescr to be open in write mode). 38 | see {!Bigarray.Array1.map_file} for more details *) 39 | -------------------------------------------------------------------------------- /src/bigstring_unix_stubs.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, Christopher Zimmermann 3 | * 4 | * Permission to use, copy, modify, and/or distribute this software for any 5 | * purpose with or without fee is hereby granted, provided that the above 6 | * copyright notice and this permission notice appear in all copies. 7 | * 8 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 9 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 10 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY 11 | * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 12 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION 13 | * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN 14 | * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 15 | */ 16 | 17 | 18 | #include 19 | #include 20 | #include 21 | #include 22 | 23 | #ifndef Handle_val 24 | #include 25 | #endif 26 | 27 | CAMLprim value 28 | ocaml_bigstring_unix_read(value vfd, value vba, value voff, value vlen) 29 | { 30 | void *iobuf = ((char *)Caml_ba_data_val(vba)) + Unsigned_long_val(voff); 31 | #ifdef Handle_val 32 | unsigned len; 33 | int err; 34 | 35 | if (Descr_kind_val(vfd) == KIND_SOCKET) { 36 | SOCKET s = Socket_val(vfd); 37 | caml_release_runtime_system(); 38 | if ((err = recv(s, iobuf, Unsigned_int_val(vlen), 0)) == SOCKET_ERROR) 39 | err = WSAGetLastError(); 40 | else { 41 | len = err; 42 | err = 0; 43 | } 44 | caml_acquire_runtime_system(); 45 | } else { 46 | HANDLE h = Handle_val(vfd); 47 | caml_release_runtime_system(); 48 | if (ReadFile(h, iobuf, Unsigned_int_val(vlen), &len, NULL)) 49 | err = 0; 50 | else { 51 | check_error: 52 | switch (err = GetLastError()) { 53 | case ERROR_BROKEN_PIPE: 54 | /* This is no error, but just a closed pipe. */ 55 | err = len = 0; 56 | break; 57 | case ERROR_MORE_DATA: 58 | #if 1 59 | do { 60 | char buf[1024]; 61 | unsigned dummy_len; 62 | if (ReadFile(h, buf, sizeof(buf), &dummy_len, NULL)) 63 | break; 64 | else 65 | goto check_error; 66 | } while (0); 67 | #else 68 | err = 0; 69 | #endif 70 | default: 71 | break; 72 | } 73 | } 74 | caml_acquire_runtime_system(); 75 | } 76 | /* GetLastError() and WSAGetLastError() error numbers _are_ compatible, 77 | * although not documented this behaviour will hopefully never change. */ 78 | if (err) { 79 | win32_maperr(err); 80 | uerror("Bigstringaf.read", Nothing); 81 | } 82 | else 83 | return Val_int(len); 84 | 85 | #else 86 | ssize_t ret; 87 | 88 | caml_release_runtime_system(); 89 | ret = read(Int_val(vfd), iobuf, Unsigned_long_val(vlen)); 90 | caml_acquire_runtime_system(); 91 | if (ret < 0) uerror("Bigstringaf.read", Nothing); 92 | return Val_long(ret); 93 | #endif 94 | } 95 | 96 | CAMLprim value 97 | ocaml_bigstring_unix_write(value vfd, value vba, value voff, value vlen) 98 | { 99 | char *iobuf = ((char *)Caml_ba_data_val(vba)) + Unsigned_long_val(voff); 100 | #ifdef Handle_val 101 | unsigned len; 102 | int err; 103 | 104 | if (Descr_kind_val(vfd) == KIND_SOCKET) { 105 | SOCKET s = Socket_val(vfd); 106 | caml_release_runtime_system(); 107 | if ((err = send(s, iobuf, Unsigned_int_val(vlen), 0)) == SOCKET_ERROR) 108 | err = WSAGetLastError(); 109 | else { 110 | len = err; 111 | err = 0; 112 | } 113 | caml_acquire_runtime_system(); 114 | } else { 115 | HANDLE h = Handle_val(vfd); 116 | caml_release_runtime_system(); 117 | if (WriteFile(h, iobuf, Unsigned_int_val(vlen), &len, NULL)) 118 | err = 0; 119 | else 120 | err = GetLastError(); 121 | caml_acquire_runtime_system(); 122 | } 123 | /* GetLastError() and WSAGetLastError() error numbers _are_ compatible, 124 | * although not documented this behaviour will hopefully never change. */ 125 | if (err) { 126 | win32_maperr(err); 127 | uerror("Bigstringaf.write", Nothing); 128 | } 129 | return Val_long(len); 130 | 131 | #else 132 | ssize_t ret; 133 | 134 | caml_release_runtime_system(); 135 | ret = write(Int_val(vfd), iobuf, Unsigned_long_val(vlen)); 136 | caml_acquire_runtime_system(); 137 | if (ret < 0) uerror("Bigstringaf.write", Nothing); 138 | return Val_long(ret); 139 | #endif 140 | } 141 | -------------------------------------------------------------------------------- /src/configure.ml: -------------------------------------------------------------------------------- 1 | let () = 2 | print_string "let map_file = "; 3 | if String.compare Sys.ocaml_version "4.06.0" < 0 4 | then print_endline "Bigarray.Genarray.map_file" 5 | else print_endline "Unix.map_file" 6 | -------------------------------------------------------------------------------- /src/dune: -------------------------------------------------------------------------------- 1 | (executable 2 | (name configure) 3 | (modules Configure)) 4 | 5 | (rule 6 | (targets bigstring_compat.ml) 7 | (action (with-stdout-to %{targets} 8 | (run ./configure.bc)))) 9 | 10 | (library 11 | (name bigstring) 12 | (public_name bigstring) 13 | (modules Bigstring) 14 | (synopsis "Bigstring built on top of bigarrays, and convenient functions.") 15 | (libraries bytes bigarray)) 16 | 17 | (library 18 | (name bigstring_unix) 19 | (public_name bigstring-unix) 20 | (modules Bigstring_compat Bigstring_unix) 21 | (flags :standard -warn-error -3) ; deprecation 22 | (c_names bigstring_unix_stubs) 23 | (synopsis "Bigstrings from Unix memory mapping.") 24 | (libraries bigarray unix)) 25 | -------------------------------------------------------------------------------- /test/dune: -------------------------------------------------------------------------------- 1 | (test 2 | (name test) 3 | (modules test) 4 | (flags :standard -thread) 5 | (libraries alcotest bigstring bigstring-unix threads) 6 | (action 7 | ; This enables runtime checks for use-after-free and double-free. 8 | (setenv MALLOC_CHECK_ 3 9 | (setenv MALLOC_PERTURB_ 195 10 | (run %{test} --color=always -q))) 11 | ) 12 | ) 13 | -------------------------------------------------------------------------------- /test/test.ml: -------------------------------------------------------------------------------- 1 | module Unix_ = Unix 2 | open Alcotest 3 | 4 | let socketpair _dom ty proto = 5 | let open Unix_ in 6 | let s = socket PF_INET ty proto 7 | and c = socket PF_INET ty proto in 8 | bind s (ADDR_INET (inet_addr_loopback, 0)); 9 | let saddr = getsockname s in 10 | match ty with 11 | | SOCK_DGRAM -> 12 | connect c saddr; 13 | let caddr = getsockname c in 14 | connect s caddr; 15 | begin match select [c;s] [] [] 0. with 16 | | [], [], [] -> s, c 17 | | _ -> failwith "Unix.socketpair: unexpected data" 18 | end 19 | | SOCK_SEQPACKET 20 | | SOCK_STREAM -> 21 | listen s 1; 22 | set_nonblock c; 23 | (try connect c saddr; assert false 24 | with Unix_error ((EINPROGRESS | EWOULDBLOCK), _, _) -> ()); 25 | clear_nonblock c; 26 | let s, caddr = 27 | match select [s] [] [] 0.1 with 28 | | [s'], [], [] when s' == s -> 29 | let ret = accept s in 30 | close s; 31 | ret 32 | | [], [], [] -> failwith "Unix.socketpair: timeout waiting for client" 33 | | _ -> assert false 34 | in 35 | assert begin 36 | match caddr with 37 | | ADDR_INET (addr, _) when addr = inet_addr_loopback -> true 38 | | _ -> false 39 | end; 40 | begin match select [] [c] [c] 0.1 with 41 | | [], [c'], _ when c == c' -> () 42 | | [], [], [c'] when c == c' -> 43 | begin match getsockopt_error c with 44 | | Some e -> raise (Unix_error (e, "socketpair", "")) 45 | | None -> assert false 46 | end 47 | | [], [], [] -> failwith "Unix.socketpair: timeout waiting for server" 48 | | _ -> assert false 49 | end; 50 | s, c 51 | | SOCK_RAW -> 52 | invalid_arg "Unix.socketpair: SOCK_RAW not supported" 53 | 54 | let bigstring_unix = 55 | let module Unix = Unix_ in 56 | let empty = char_of_int 0xdf in 57 | let source, drain = socketpair Unix.PF_UNIX Unix.SOCK_STREAM 0 in 58 | Unix.set_nonblock source; 59 | Unix.set_nonblock drain; 60 | let check_read ?(shift=0) ?off ?len () = 61 | let off' = match off with None -> 0 | Some off -> off in 62 | let buf = Bigstring.make 50 empty in 63 | let pre = Bigstring.sub buf 0 (10+off') in 64 | let sub = Bigstring.sub buf 10 30 in 65 | let check_buf read_len = 66 | Gc.compact (); 67 | let read_sub = Bigstring.sub sub off' read_len in 68 | let post = Bigstring.sub buf (10+off'+read_len) (40 - off' - read_len) in 69 | Bigstring.iter (check char "pre untouched" empty) pre; 70 | Bigstring.iteri (fun i c -> check char "correct read" (char_of_int (i + 97 + shift)) c) read_sub; 71 | Bigstring.iter (check char "post untouched" empty) post; 72 | in 73 | match Bigstring_unix.read source sub ?off ?len with 74 | | exception (Unix.Unix_error(Unix.EAGAIN, _, _) as e) -> 75 | check_buf 0; 76 | raise e 77 | | exception (Unix.Unix_error(Unix.EWOULDBLOCK, _, _) as e) -> 78 | check_buf 0; 79 | raise e 80 | | read_len -> 81 | check bool "sensible read size" true (read_len <= 30 - off' && read_len >= 0); 82 | check_buf read_len; 83 | read_len 84 | in 85 | let write ?off ?len () = 86 | let buf = Bytes.create 1024 in 87 | while try Unix.read source buf 0 (Bytes.length buf) |> ignore; true with _ -> false do () done; 88 | let count = Bigstring.init 20 (fun i -> char_of_int (i+97)) in 89 | Bigstring_unix.write drain count ?off ?len 90 | |> match len with None -> ignore |Some len -> check int "write length" len 91 | in 92 | "Bigstring_unix", 93 | [ "read", `Quick, begin fun () -> 94 | write ~len:15 (); check_read () |> check int "read length" 15; 95 | write ~len:15 (); check_read ~off:22 () |> check int "read length" 8; 96 | write ~len:15 (); check_read ~len:6 () |> check int "read length" 6; 97 | write ~len:15 (); check_read ~off:3 ~len:6 () |> check int "read length" 6; 98 | write ~len:7 (); check_read () |> check int "read length" 7; 99 | end 100 | ; "write", `Quick, begin fun () -> 101 | write (); check_read () |> check int "read length" 20; 102 | write ~off:0 ~len:10 (); check_read () |> check int "read length" 10; 103 | write ~off:2 ~len:10 (); check_read ~shift:2 () |> check int "read length" 10; 104 | write ~off:2 ~len:8 (); check_read ~shift:2 () |> check int "read length" 8; 105 | write ~off:2 (); check_read ~shift:2 () |> check int "read length" 18; 106 | end 107 | ; "bounds check", `Quick, begin fun () -> 108 | let buf = Bigstring.create 0 in 109 | check_raises "bounds -off" (Invalid_argument "Bigstring_unix.read") 110 | (fun () -> ignore @@ Bigstring_unix.read source buf ~off:~-1); 111 | check_raises "bounds -len" (Invalid_argument "Bigstring_unix.read") 112 | (fun () -> ignore @@ Bigstring_unix.read source buf ~len:~-1); 113 | check_raises "bounds off >" (Invalid_argument "Bigstring_unix.read") 114 | (fun () -> ignore @@ Bigstring_unix.read source buf ~off:21); 115 | check_raises "bounds len >" (Invalid_argument "Bigstring_unix.read") 116 | (fun () -> ignore @@ Bigstring_unix.read source buf ~len:21); 117 | check_raises "bounds off len >" (Invalid_argument "Bigstring_unix.read") 118 | (fun () -> ignore @@ Bigstring_unix.read source buf ~off:15 ~len:6); 119 | check_raises "bounds -off" (Invalid_argument "Bigstring_unix.write") 120 | (fun () -> write ~off:~-1 ()); 121 | check_raises "bounds -len" (Invalid_argument "Bigstring_unix.write") 122 | (fun () -> write ~len:~-1 ()); 123 | check_raises "bounds off >" (Invalid_argument "Bigstring_unix.write") 124 | (fun () -> write ~off:21 ()); 125 | check_raises "bounds len >" (Invalid_argument "Bigstring_unix.write") 126 | (fun () -> write ~len:21 ()); 127 | check_raises "bounds off len >" (Invalid_argument "Bigstring_unix.write") 128 | (fun () -> write ~off:15 ~len:6 ()); 129 | end 130 | ] 131 | 132 | let () = 133 | run "Bigstring" 134 | [ bigstring_unix ] 135 | --------------------------------------------------------------------------------