├── .gitignore ├── README.md ├── bin ├── digest.ml └── dune ├── dune-project └── lib ├── dune ├── fiber.ml └── fiber.mli /.gitignore: -------------------------------------------------------------------------------- 1 | _build 2 | setup.data 3 | setup.log 4 | doc/*.html 5 | *.native 6 | *.byte 7 | *.so 8 | lib/decompress_conf.ml 9 | *.tar.gz 10 | _tests 11 | lib_test/files 12 | zpipe 13 | c/dpipe 14 | *.merlin 15 | *.install -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MiniFiber 2 | 3 | By `Marshal` module and with `Unix.fork`, I think it's possible to launch real 4 | parallel computation with OCaml. This library wants to provide the simplest way 5 | to execute in parallel (such as hash algorithm) computation without a huge 6 | code-base (currently, only `Unix` and `Marshal` are needed). 7 | 8 | It's the OCaml _multicore_ for poor people - because we use `Unix.fork`. 9 | 10 | The code comes from `dune` and some of my knowledge but I'm not sure about the 11 | ready-to-production aspect - guy, `fork` ... and shared data are limited to the 12 | size of used pipe. 13 | 14 | ## An example 15 | 16 | In `bin/` an example is to launch SHA1 on multiple files. The program should 17 | take the advantage of multiple cores (one per files). 18 | 19 | ```sh 20 | $ dune exec bin/digest.exe -- file1 file2 file3 21 | ``` 22 | -------------------------------------------------------------------------------- /bin/digest.ml: -------------------------------------------------------------------------------- 1 | open Fiber 2 | 3 | let io_buffer_size = 65536 4 | 5 | let ( <.> ) f g = fun x -> f (g x) 6 | 7 | let digest_file : string -> unit -> (string * Digestif.SHA1.t, [> Rresult.R.msg ]) result 8 | = fun filename () -> 9 | try 10 | let tp = Bytes.create io_buffer_size in 11 | let fd = Unix.openfile filename Unix.[ O_RDONLY ] 0o600 in 12 | let rec go ctx = 13 | let len = Unix.read fd tp 0 io_buffer_size in 14 | if len = 0 then ctx 15 | else 16 | let ctx = Digestif.SHA1.feed_bytes ctx tp ~off:0 ~len in 17 | go ctx in 18 | let ctx = go Digestif.SHA1.empty in 19 | Unix.close fd ; Rresult.R.ok (filename, Digestif.SHA1.get ctx) 20 | with Unix.Unix_error (err, _, _) -> 21 | Rresult.R.error_msgf "%s: %s" filename (Unix.error_message err) 22 | 23 | let print = function 24 | | Ok (Ok (filename, hash)) -> Format.printf "%a %s\n%!" Digestif.SHA1.pp hash filename 25 | | Ok (Error (`Msg err)) -> Format.eprintf "%s\n%!" err 26 | | Error exit_code -> Format.eprintf "%s: %d\n%!" Sys.argv.(0) exit_code 27 | 28 | let run filenames = 29 | let fiber = parallel_map filenames ~f:(run_process <.> digest_file) in 30 | List.iter print (run fiber) 31 | 32 | let () = match Sys.argv with 33 | | [| _ |] -> Format.eprintf "%s [ ...]\n%!" Sys.argv.(0) 34 | | [||] -> assert false 35 | | _ -> 36 | let[@warning "-8"] _ :: filenames = Array.to_list Sys.argv in 37 | run filenames 38 | -------------------------------------------------------------------------------- /bin/dune: -------------------------------------------------------------------------------- 1 | (executable 2 | (name digest) 3 | (libraries rresult digestif fiber)) 4 | -------------------------------------------------------------------------------- /dune-project: -------------------------------------------------------------------------------- 1 | (lang dune 2.6) 2 | -------------------------------------------------------------------------------- /lib/dune: -------------------------------------------------------------------------------- 1 | (library 2 | (name fiber) 3 | (libraries unix)) 4 | -------------------------------------------------------------------------------- /lib/fiber.ml: -------------------------------------------------------------------------------- 1 | type 'a t = ('a -> unit) -> unit 2 | 3 | let return x k = k x 4 | let ( >>> ) a b k = a (fun () -> b k) 5 | let ( >>= ) t f k = t (fun x -> f x k) 6 | let ( >>| ) t f k = t (fun x -> k (f x)) 7 | 8 | let both a b = 9 | a >>= fun a -> 10 | b >>= fun b -> 11 | return (a, b) 12 | 13 | module Ivar = struct 14 | type 'a state = 15 | | Full of 'a 16 | | Empty of ('a -> unit) Queue.t 17 | 18 | type 'a t = { mutable state : 'a state } 19 | 20 | let create () = { state= Empty (Queue.create ()) } 21 | 22 | let fill t x = 23 | match t.state with 24 | | Full _ -> failwith "Ivar.fill" 25 | | Empty q -> t.state <- Full x ; Queue.iter (fun f -> f x) q 26 | 27 | let read t k = 28 | match t.state with 29 | | Full x -> k x 30 | | Empty q -> Queue.push k q 31 | end 32 | 33 | type 'a ivar = 'a Ivar.t 34 | 35 | module Future = struct 36 | let wait = Ivar.read 37 | end 38 | 39 | let fork f k = 40 | let ivar = Ivar.create () in 41 | f () (fun x -> Ivar.fill ivar x) ; 42 | k ivar 43 | 44 | let fork_and_join f g = 45 | fork f >>= fun a -> 46 | fork g >>= fun b -> both (Future.wait a) (Future.wait b) 47 | 48 | let fork_and_join_unit f g = 49 | fork f >>= fun a -> 50 | fork g >>= fun b -> 51 | Future.wait a >>> Future.wait b 52 | 53 | let rec parallel_map l ~f = 54 | match l with 55 | | [] -> return [] 56 | | x :: l -> 57 | fork (fun () -> f x) >>= fun future -> 58 | parallel_map l ~f >>= fun l -> 59 | Future.wait future >>= fun x -> return (x :: l) 60 | 61 | let rec parallel_iter l ~f = 62 | match l with 63 | | [] -> return () 64 | | x :: l -> 65 | fork (fun () -> f x) >>= fun future -> 66 | parallel_iter l ~f >>= fun () -> Future.wait future 67 | 68 | let safe_close fd = 69 | try Unix.close fd with Unix.Unix_error _ -> () 70 | 71 | let create_process prgn = 72 | let out0, out1 = Unix.pipe () in 73 | match Unix.fork () with 74 | | 0 -> 75 | Unix.close out0 ; 76 | let oc = Unix.out_channel_of_descr out1 in 77 | ( try 78 | Marshal.to_channel oc (prgn ()) [ Marshal.No_sharing ] ; 79 | flush oc ; Unix.close out1 ; exit 0 80 | with _ -> exit 127 ) 81 | | pid -> 82 | Unix.close out1 ; 83 | out0, pid 84 | 85 | let concurrency = 4 86 | let running = Hashtbl.create ~random:false concurrency 87 | let waiting_for_slot = Queue.create () 88 | 89 | let throttle () = 90 | if Hashtbl.length running >= concurrency 91 | then 92 | let ivar = Ivar.create () in 93 | Queue.push ivar waiting_for_slot ; 94 | Ivar.read ivar 95 | else return () 96 | 97 | let restart_throttle () = 98 | while Hashtbl.length running < concurrency 99 | && (not (Queue.is_empty waiting_for_slot)) 100 | do Ivar.fill (Queue.pop waiting_for_slot) () done 101 | 102 | let run_process prgn = 103 | throttle () >>= fun () -> 104 | let fd, pid = create_process prgn in 105 | let ivar = Ivar.create () in 106 | Hashtbl.add running pid ivar ; 107 | Ivar.read ivar >>= fun status -> 108 | let ic = Unix.in_channel_of_descr fd in 109 | let res = Marshal.from_channel ic in 110 | safe_close fd ; 111 | match status with 112 | | Unix.WEXITED 0 -> return (Ok res) 113 | | Unix.WEXITED n -> return (Error n) 114 | | Unix.WSIGNALED _ -> return (Error 255) 115 | | Unix.WSTOPPED _ -> assert false 116 | 117 | let run fiber = 118 | let result = ref None in 119 | fiber (fun x -> result := Some x) ; 120 | let rec loop () = 121 | if Hashtbl.length running > 0 122 | then ( let pid, status = Unix.wait () in 123 | let ivar = Hashtbl.find running pid in 124 | Hashtbl.remove running pid ; 125 | Ivar.fill ivar status ; 126 | restart_throttle () ; 127 | loop () ) 128 | else match !result with 129 | | Some x -> x 130 | | None -> failwith "fiber" in 131 | loop () 132 | -------------------------------------------------------------------------------- /lib/fiber.mli: -------------------------------------------------------------------------------- 1 | type 'a t 2 | type 'a ivar 3 | 4 | val return : 'a -> 'a t 5 | val ( >>= ) : 'a t -> ('a -> 'b t) -> 'b t 6 | val ( >>| ) : 'a t -> ('a -> 'b) -> 'b t 7 | val ( >>> ) : unit t -> unit t -> unit t 8 | 9 | val both : 'a t -> 'b t -> ('a * 'b) t 10 | 11 | val fork : (unit -> 'a t) -> 'a ivar t 12 | val fork_and_join : (unit -> 'a t) -> (unit -> 'b t) -> ('a * 'b) t 13 | val fork_and_join_unit : (unit -> unit t) -> (unit -> unit t) -> unit t 14 | 15 | val parallel_map : 'a list -> f:('a -> 'b t) -> 'b list t 16 | val parallel_iter : 'a list -> f:('a -> unit t) -> unit t 17 | 18 | val run_process : (unit -> 'a) -> ('a, int) result t 19 | 20 | val run : 'a t -> 'a 21 | --------------------------------------------------------------------------------