├── .gitignore ├── lib ├── jbuild ├── jobqueue.mli └── jobqueue.ml ├── test ├── jbuild └── test_jobqueue.ml ├── Makefile ├── jobqueue.opam ├── README.md ├── .ocp-indent ├── OMakefile └── LICENSE.md /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | /_build 3 | .merlin 4 | *.install 5 | -------------------------------------------------------------------------------- /lib/jbuild: -------------------------------------------------------------------------------- 1 | (jbuild_version 1) 2 | 3 | (library 4 | ((name jobqueue) 5 | (public_name jobqueue) 6 | (libraries (lwt lwt.unix)) 7 | (synopsis "Queue of jobs to be forked off into their own process."))) 8 | -------------------------------------------------------------------------------- /test/jbuild: -------------------------------------------------------------------------------- 1 | (jbuild_version 1) 2 | 3 | (executable 4 | ((name test_jobqueue) 5 | (libraries (alcotest jobqueue)))) 6 | 7 | (alias 8 | ((name runtest) 9 | (deps (test_jobqueue.exe)) 10 | (action (run ${<} -q --color=always)))) 11 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: build install uninstall reinstall test clean 2 | 3 | build: 4 | jbuilder build @install 5 | 6 | install: 7 | jbuilder install 8 | 9 | uninstall: 10 | jbuilder uninstall 11 | 12 | reinstall: 13 | $(MAKE) uninstall 14 | $(MAKE) install 15 | 16 | test: 17 | jbuilder runtest -f 18 | 19 | clean: 20 | jbuilder clean 21 | -------------------------------------------------------------------------------- /jobqueue.opam: -------------------------------------------------------------------------------- 1 | opam-version: "1.2" 2 | maintainer: "martin@mjambon.com" 3 | authors: ["Martin Jambon"] 4 | homepage: "https://github.com/mjambon/jobqueue" 5 | bug-reports: "https://github.com/mjambon/jobqueue/issues" 6 | dev-repo: "https://github.com/mjambon/jobqueue.git" 7 | build: [ 8 | ["jbuilder" "build" "-p" name "-j" jobs] 9 | ] 10 | 11 | build-test: [ 12 | ["jbuilder" "runtest" "-p" name] 13 | ] 14 | depends: ["jbuilder" {build}] 15 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | jobqueue 2 | == 3 | 4 | This is an OCaml library for detaching CPU-intensive jobs into their 5 | own process. The maximum number of jobs running in parallel is 6 | configurable. 7 | 8 | Properties that may not exist in other libraries include: 9 | 10 | * ability to continuously submit new jobs 11 | * no need for the input data to be serializable 12 | 13 | Implementation 14 | -- 15 | 16 | A process is created by a call to `fork()`, allowing the child process 17 | to inherit all the data it needs from its parent without 18 | complications. Only the result of its execution is serialized, which 19 | is done with the `Marshal` module, and passed back to the parent process 20 | using a pipe. 21 | 22 | Concurrency is managed with the `Lwt` library. 23 | 24 | Due to the reliance on `fork()`, this library won't work on Windows. 25 | 26 | Authors 27 | -- 28 | 29 | The original Jobqueue module was written by Martin Jambon at Esper 30 | and released to the public in 2017. 31 | -------------------------------------------------------------------------------- /.ocp-indent: -------------------------------------------------------------------------------- 1 | # See https://github.com/OCamlPro/ocp-indent/blob/master/.ocp-indent for more 2 | 3 | # Indent for clauses inside a pattern-match (after the arrow): 4 | # match foo with 5 | # | _ -> 6 | # ^^^^bar 7 | # the default is 2, which aligns the pattern and the expression 8 | match_clause = 4 9 | 10 | # When nesting expressions on the same line, their indentation are in 11 | # some cases stacked, so that it remains correct if you close them one 12 | # at a line. This may lead to large indents in complex code though, so 13 | # this parameter can be used to set a maximum value. Note that it only 14 | # affects indentation after function arrows and opening parens at end 15 | # of line. 16 | # 17 | # for example (left: `none`; right: `4`) 18 | # let f = g (h (i (fun x -> # let f = g (h (i (fun x -> 19 | # x) # x) 20 | # ) # ) 21 | # ) # ) 22 | max_indent = 2 23 | -------------------------------------------------------------------------------- /OMakefile: -------------------------------------------------------------------------------- 1 | OCAML_LIBS = $(OCAML_LIBS_util) 2 | 3 | FILES[] = 4 | util_test 5 | util_abstract_value 6 | util_string 7 | util_search 8 | util_json 9 | util_json_tuple 10 | util_float 11 | # Universal time and RFC-3339 formatting 12 | util_time 13 | # Switch to specified timezone, list timezones 14 | util_timezone 15 | # Calendar times (relative to some unspecified timezone or origin) 16 | util_dateonly 17 | util_timeonly 18 | util_localtime 19 | util_workday 20 | util_weekend 21 | util_enc 22 | util_hex 23 | util_sha256 24 | util_hmac_sha256 25 | util_rng 26 | util_exn 27 | util_text 28 | util_http_client 29 | util_signal 30 | util_url 31 | util_html 32 | util_csv 33 | util_conc 34 | util_list 35 | util_ref 36 | util_geo 37 | util_model 38 | util_shutdown 39 | util_stream 40 | util_lwt 41 | util_lwt_stream 42 | util_lwt_main 43 | util_half_lazy 44 | util_late_init 45 | util_cache 46 | util_shell 47 | util_jobqueue 48 | util_counter 49 | util_linux 50 | util_prio 51 | util_gzip 52 | 53 | OCamlLibrary(util, $(FILES)) 54 | Meta(util) 55 | 56 | .DEFAULT: META util.cma util.cmxa 57 | 58 | .PHONY: clean 59 | clean: 60 | rm -f *.o *.a *.cm* *~ *.annot *.run *.opt META $(ATDGEN_OUTFILES) 61 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Copyright © 2017 Esper Technologies, Inc. 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions 6 | are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | 2. Redistributions in binary form must reproduce the above copyright 11 | notice, this list of conditions and the following disclaimer in the 12 | documentation and/or other materials provided with the distribution. 13 | 3. The name of the author may not be used to endorse or promote products 14 | derived from this software without specific prior written permission. 15 | 16 | THIS SOFTWARE IS PROVIDED BY THE AUTHOR “AS IS” AND ANY EXPRESS OR 17 | IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 | OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 | IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 20 | INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 21 | NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 25 | THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | -------------------------------------------------------------------------------- /lib/jobqueue.mli: -------------------------------------------------------------------------------- 1 | (** 2 | Run CPU-intensive jobs in their own process and restrict 3 | how many such jobs may run in parallel and how long they take 4 | to complete. 5 | *) 6 | 7 | (** The type of a job queue. *) 8 | type t 9 | 10 | (** Return the number of jobs that have been submitted and haven't 11 | started yet. This can be used by the application to throttle job 12 | submission so as to avoid using too much memory. 13 | *) 14 | val pending : t -> int 15 | 16 | (** Return the number of jobs currently running. *) 17 | val running : t -> int 18 | 19 | type 'a result = [ 20 | (** Normal result of a job. *) 21 | | `Value of 'a 22 | 23 | (** Exception that was raised by the job. 24 | This exception is meant to be printed out only. 25 | The exception cannot be used in pattern-matching or in comparisons, 26 | due to implementation limitations having to do with serialization. *) 27 | | `User_exception of exn 28 | 29 | (** Indicates that the job timed out. *) 30 | | `Timeout 31 | 32 | (** Some other error occurred. *) 33 | | `Error of string 34 | ] 35 | 36 | (** Extract the value or raise a [Failure] exception. *) 37 | val value_exn : 'a result -> 'a 38 | 39 | (** 40 | Create a queue of jobs to be run in their own process. 41 | 42 | @param max_running is the maximum number of jobs to run at the same 43 | time. The default is 1. 44 | *) 45 | val create : ?max_running:int -> unit -> t 46 | 47 | (** Return the [max_running] parameter of a queue. *) 48 | val max_running : t -> int 49 | 50 | (** 51 | Run a computation in its own process using a call to [Unix.fork()]. 52 | The result is serialized using [Marshal] and returned back to the 53 | parent using a pipe. 54 | 55 | Beware that any change in global variables won't be shared 56 | with the parent process. 57 | 58 | @param timeout the time limit for the execution of the job. There's 59 | no timeout by default. 60 | *) 61 | val submit : ?timeout:float -> t -> (unit -> 'a) -> 'a result Lwt.t 62 | 63 | (** 64 | Parallel list map. 65 | [map q l f] applies [f] to each element of the list, each in a detached 66 | process. 67 | *) 68 | val map : 69 | ?timeout:float -> 70 | t -> 71 | 'a list -> 72 | ('a -> 'b) -> 73 | 'b result list Lwt.t 74 | -------------------------------------------------------------------------------- /lib/jobqueue.ml: -------------------------------------------------------------------------------- 1 | (* 2 | Queue of jobs run within their own process with cpu time limit. 3 | *) 4 | 5 | open Printf 6 | open Lwt 7 | 8 | type 'a result = [ 9 | | `Value of 'a 10 | | `Timeout 11 | | `User_exception of exn 12 | (* Exception raised by the user's function. 13 | It went through marshalling, so pattern matching on it won't work. *) 14 | | `Error of string 15 | (* Error internal to the implementation of this module. *) 16 | ] 17 | 18 | let value_exn = function 19 | | `Value x -> x 20 | | `Timeout -> 21 | failwith "Jobqueue timeout" 22 | | `User_exception e -> 23 | failwith (sprintf "Jobqueue user exception: %s" 24 | (Printexc.to_string e)) 25 | | `Error s -> 26 | failwith (sprintf "Jobqueue error: %s" s) 27 | 28 | let run_child fd_out f = 29 | let result : _ result = 30 | try `Value (f ()) 31 | with e -> `User_exception e 32 | in 33 | let oc = Unix.out_channel_of_descr fd_out in 34 | Marshal.to_channel oc result []; 35 | close_out oc 36 | 37 | (* 38 | Run an lwt thread with a timeout. 39 | If the timeout is reached, None is returned and it is the responsibility 40 | for the caller to kill the job that's still running in the background. 41 | *) 42 | let with_timeout ~timeout x = 43 | let sleep = 44 | Lwt_unix.sleep timeout >>= fun () -> 45 | return `Timeout 46 | in 47 | Lwt.choose [ sleep; x ] 48 | 49 | let with_opt_timeout opt_timeout x = 50 | match opt_timeout with 51 | | None -> x 52 | | Some timeout -> with_timeout ~timeout x 53 | 54 | let rec waitpid pid = 55 | match 56 | try Some (Lwt_unix.waitpid [] pid) 57 | with Unix.Unix_error (Unix.EINTR, _, _) -> None 58 | with 59 | | Some result -> result 60 | | None -> waitpid pid 61 | 62 | let terminate_child pid = 63 | Unix.kill pid Sys.sigkill 64 | 65 | let run_parent opt_timeout ic child_pid = 66 | with_opt_timeout opt_timeout (Lwt_io.read_value ic) >>= fun result -> 67 | Lwt_io.close ic >>= fun () -> 68 | (match result with 69 | | `Timeout -> terminate_child child_pid 70 | | `Value _ 71 | | `User_exception _ -> () 72 | | `Error _ -> assert false 73 | ); 74 | waitpid child_pid >>= fun (pid, process_status) -> 75 | match process_status with 76 | | Unix.WEXITED 0 -> 77 | return result 78 | | Unix.WSIGNALED n when n = Sys.sigkill -> 79 | return result 80 | 81 | | Unix.WEXITED n -> 82 | return (`Error (sprintf "Child process exited with code %i" n)) 83 | | Unix.WSIGNALED n -> 84 | return (`Error (sprintf "Child process killed by signal %i" n)) 85 | | Unix.WSTOPPED n -> 86 | return (`Error (sprintf "Child process stopped by signal %i" n)) 87 | 88 | let run_job opt_timeout (f : unit -> 'a) : 'a result Lwt.t = 89 | Lwt_io.flush_all () >>= fun () -> 90 | let fd_read_from_child, fd_write_to_parent = Lwt_unix.pipe_in () in 91 | let child_pid = Lwt_unix.fork () in 92 | if child_pid = 0 then ( 93 | (* Child process *) 94 | try 95 | (* Everything is done synchronously in the child from here. 96 | Not sure how to deal with Lwt's main loop that's still running. *) 97 | ignore (Lwt_unix.close fd_read_from_child); (* does this work? *) 98 | run_child fd_write_to_parent f; 99 | exit 0 100 | with e -> 101 | eprintf "Fatal exception in child process created by Jobqueue: %s\n%!" 102 | (Printexc.to_string e); 103 | exit 1 104 | ) 105 | else ( 106 | (* Parent process *) 107 | let fd_read_from_child = 108 | Lwt_io.of_fd ~mode:Lwt_io.input fd_read_from_child in 109 | Unix.close fd_write_to_parent; 110 | run_parent opt_timeout fd_read_from_child child_pid 111 | ) 112 | 113 | type t = { 114 | mutable pending : int; (* informational only *) 115 | mutable running : int; 116 | avail_condition : unit Lwt_condition.t; 117 | max_running : int; 118 | } 119 | 120 | let pending q = q.pending 121 | let running q = q.running 122 | let max_running q = q.max_running 123 | 124 | let create ?(max_running = 1) () = 125 | if max_running <= 0 then 126 | invalid_arg "Jobqueue.create" 127 | else { 128 | pending = 0; 129 | running = 0; 130 | avail_condition = Lwt_condition.create (); 131 | max_running; 132 | } 133 | 134 | let submit ?timeout:opt_timeout queue f : _ result Lwt.t = 135 | assert (queue.pending >= 0); 136 | assert (queue.running >= 0); 137 | let run () = 138 | queue.pending <- queue.pending - 1; 139 | queue.running <- queue.running + 1; 140 | Lwt.finalize 141 | (fun () -> run_job opt_timeout f) 142 | (fun () -> 143 | queue.running <- queue.running - 1; 144 | Lwt_condition.signal queue.avail_condition (); 145 | return () 146 | ) 147 | in 148 | queue.pending <- queue.pending + 1; 149 | if queue.running >= queue.max_running then 150 | Lwt_condition.wait queue.avail_condition >>= fun () -> 151 | run () 152 | else 153 | run () 154 | 155 | let map ?timeout queue l f = 156 | let promises = 157 | List.fold_left (fun acc x -> 158 | let job () = f x in 159 | submit ?timeout queue job :: acc 160 | ) [] l 161 | |> List.rev 162 | in 163 | Lwt_list.map_s (fun x -> x) promises 164 | -------------------------------------------------------------------------------- /test/test_jobqueue.ml: -------------------------------------------------------------------------------- 1 | (* 2 | Test suite for the Jobqueue module/library. 3 | *) 4 | 5 | open Printf 6 | open Lwt 7 | 8 | (* Tests *) 9 | 10 | let sleep t = ignore (Unix.select [] [] [] t) 11 | 12 | let test_pid () = 13 | let q = Jobqueue.create () in 14 | let parent_pid = Unix.getpid () in 15 | Lwt_main.run ( 16 | Jobqueue.submit q (fun () -> 17 | Unix.getpid () 18 | ) 19 | >>= function 20 | | `Value child_pid -> 21 | assert (Unix.getpid () = parent_pid); 22 | assert (child_pid <> parent_pid); 23 | return () 24 | | _ -> 25 | assert false 26 | ) 27 | 28 | let test_order () = 29 | let main () = 30 | let q = Jobqueue.create ~max_running:2 () in 31 | let order = 32 | let n = ref 0 in 33 | fun () -> incr n; !n 34 | in 35 | let job1 () = sleep 0.2 in 36 | let job2 () = sleep 0.5 in 37 | let job3 () = sleep 0.1 in 38 | let job4 () = sleep 0.1 in 39 | let submit job = 40 | Jobqueue.submit q job >>= function 41 | | `Value () -> 42 | return (order ()) 43 | | _ -> 44 | assert false 45 | in 46 | (* Submit the jobs in this specific order. *) 47 | let x1 = submit job1 in 48 | let x2 = submit job2 in 49 | (* Queue is now full, job3 will have to wait. *) 50 | let x3 = submit job3 in 51 | (* job4 should start after job3. *) 52 | let x4 = submit job4 in 53 | Lwt_list.map_p (fun x -> x) [x1; x2; x3; x4] 54 | in 55 | let result = Lwt_main.run (main ()) in 56 | (* Check order of arrival. Jobs 3 and 4 should both finish before job 2. *) 57 | assert (result = [1; 4; 2; 3]) 58 | 59 | let test_timeout () = 60 | let main () = 61 | let q = Jobqueue.create ~max_running:5 () in 62 | let job () = sleep 0.11 in 63 | Jobqueue.submit ~timeout:0.1 q job >>= function 64 | | `Timeout -> return () 65 | | `Value _ -> assert false 66 | | `User_exception _ -> assert false 67 | | `Error s -> 68 | eprintf "Error: %s\n%!" s; 69 | assert false 70 | in 71 | Lwt_main.run (main ()) 72 | 73 | let test_values () = 74 | let main () = 75 | let q = Jobqueue.create ~max_running:5 () in 76 | let job () = 77 | Array.init 3 (fun i -> i) 78 | in 79 | Jobqueue.submit ~timeout:0.1 q job >>= function 80 | | `Value [| 0; 1; 2 |] -> return () 81 | | `Value _ -> assert false 82 | | `Timeout -> assert false 83 | | `User_exception _ -> assert false 84 | | `Error _ -> assert false 85 | in 86 | Lwt_main.run (main ()) 87 | 88 | let test_exceptions () = 89 | let main () = 90 | let q = Jobqueue.create ~max_running:5 () in 91 | let job () = failwith "this is a test" in 92 | Jobqueue.submit ~timeout:1. q job >>= function 93 | | `User_exception _ -> return () 94 | | `Value _ -> assert false 95 | | `Timeout -> assert false 96 | | `Error _ -> assert false 97 | in 98 | Lwt_main.run (main ()) 99 | 100 | let test_map () = 101 | let main () = 102 | let q = Jobqueue.create ~max_running:2 () in 103 | Jobqueue.map q [0; 1; 2; 3; 4; 5] (fun x -> x * x) >>= fun l -> 104 | return (List.map Jobqueue.value_exn l) 105 | in 106 | assert (Lwt_main.run (main ()) 107 | = [0; 1; 4; 9; 16; 25]) 108 | 109 | (* 110 | Go over the default limit of 1024 file descriptors per process to 111 | check we're not leaking any. 112 | *) 113 | let test_many_jobs () = 114 | let num_todo = 2000 in 115 | let main () = 116 | let q = Jobqueue.create ~max_running:4 () in 117 | let inputs = Array.(to_list (init num_todo (fun i -> i))) in 118 | Jobqueue.map q inputs (fun i -> 119 | printf "I am job %i, process %i.\n%!" i (Unix.getpid ()) 120 | ) >>= fun l -> 121 | ignore (List.map Jobqueue.value_exn l); 122 | return () 123 | in 124 | Lwt_main.run (main ()) 125 | 126 | (* 127 | Avoid having too many items into the queue at the same time. 128 | *) 129 | let test_throttling () = 130 | let input_list = [1; 2; 3; 4; 5; 6; 7; 8; 9; 10] in 131 | let inputs = Lwt_stream.of_list input_list in 132 | let max_running = 2 in 133 | let max_pending = 3 in 134 | let q = Jobqueue.create ~max_running () in 135 | let print_stats () = 136 | printf "pending: %i, running: %i\n%!" 137 | (Jobqueue.pending q) (Jobqueue.running q) 138 | in 139 | let done_ = ref 0 in 140 | let rec consume_inputs previous_jobs = 141 | print_stats (); 142 | assert (Jobqueue.pending q <= max_pending); 143 | assert (Jobqueue.running q <= max_running); 144 | if Jobqueue.pending q < max_pending then 145 | Lwt_stream.get inputs >>= function 146 | | None -> return () 147 | | Some x -> 148 | let job = 149 | Jobqueue.submit q (fun () -> sleep 0.001) >>= fun result -> 150 | incr done_; 151 | consume_inputs previous_jobs 152 | in 153 | Lwt.join [job; consume_inputs previous_jobs] 154 | else 155 | previous_jobs 156 | in 157 | Lwt_main.run (consume_inputs (return ())); 158 | assert (!done_ = List.length input_list) 159 | 160 | let jobqueue_suite = [ 161 | "pid", `Quick, test_pid; 162 | "order", `Quick, test_order; 163 | "timeout", `Quick, test_timeout; 164 | "values", `Quick, test_values; 165 | "exceptions", `Quick, test_exceptions; 166 | "map", `Quick, test_map; 167 | "many jobs", `Quick, test_many_jobs; 168 | "throttling", `Quick, test_throttling; 169 | ] 170 | 171 | let suites = [ 172 | "Jobqueue", jobqueue_suite; 173 | ] 174 | 175 | let () = 176 | Printexc.record_backtrace true; 177 | Alcotest.run "jobqueue" suites 178 | --------------------------------------------------------------------------------