├── doc
    ├── dune
    ├── migrating-off-managed.org
    └── main.org
├── dune-project
├── .ocamlformat
├── .gitignore
├── test
    ├── fd.mli
    ├── qtest.mli
    ├── env_test.mli
    ├── krb_expert.mli
    ├── run_exn.mli
    ├── timeouts.mli
    ├── wrap_test.mli
    ├── breaking_things.mli
    ├── copy_executable.mli
    ├── stress_test.mli
    ├── buffer_age_limit.mli
    ├── print_and_run.sh
    ├── remove_running_executable.mli
    ├── raise_on_connection_state_init.mli
    ├── shutdown_during_worker_init.mli
    ├── spawn_in_foreground_zombie.mli
    ├── dune
    ├── setup.sh
    ├── raise_on_connection_state_init.ml
    ├── breaking_things.ml
    ├── run_exn.ml
    ├── krb_expert.ml
    ├── wrap_test.ml
    ├── spawn_in_foreground_zombie.ml
    ├── test-rpc-settings.t
    ├── shutdown_during_worker_init.ml
    ├── remove_running_executable.ml
    ├── stress_test.ml
    ├── test-examples.t
    ├── buffer_age_limit.ml
    ├── env_test.ml
    ├── qtest.ml
    ├── timeouts.ml
    ├── copy_executable.ml
    ├── ssh_test_server.sh
    └── fd.ml
├── example
    ├── serve.mli
    ├── add_numbers.mli
    ├── async_log.mli
    ├── number_stats.mli
    ├── reverse_pipe.mli
    ├── side_arg.mli
    ├── abort_direct_pipe.mli
    ├── alternative_init.mli
    ├── rpc_direct_pipe.mli
    ├── stream_workers.mli
    ├── worker_binprot.mli
    ├── reverse_direct_pipe.mli
    ├── spawn_in_foreground.mli
    ├── workers_as_masters.mli
    ├── dune
    ├── side_arg.ml
    ├── add_numbers.ml
    ├── alternative_init.ml
    ├── spawn_in_foreground.ml
    ├── async_log.ml
    ├── serve.ml
    ├── reverse_pipe.ml
    ├── reverse_direct_pipe.ml
    ├── number_stats.ml
    ├── rpc_direct_pipe.ml
    ├── worker_binprot.ml
    ├── abort_direct_pipe.ml
    ├── workers_as_masters.ml
    └── stream_workers.ml
├── src
    ├── parallel.mli
    ├── prog_and_args.ml
    ├── prog_and_args.mli
    ├── fd_redirection.ml
    ├── dune
    ├── fd_redirection.mli
    ├── for_testing_internal.ml
    ├── how_to_run.ml
    ├── how_to_run.mli
    ├── rpc_parallel.ml
    ├── for_testing_internal.mli
    ├── rpc_settings.mli
    ├── remote_executable.mli
    ├── rpc_settings.ml
    ├── utils.mli
    ├── utils.ml
    ├── remote_executable.ml
    ├── managed.mli
    ├── managed.ml
    └── map_reduce.mli
├── expect_test
    ├── workers.mli
    ├── state_worker.mli
    ├── managed_on_failure.mli
    ├── master_pid_decoration.mli
    ├── sum_worker.mli
    ├── product_worker.mli
    ├── dune
    ├── rpc_parallel_expect_test.ml
    ├── workers.ml
    ├── product_worker.ml
    ├── state_worker.ml
    ├── sum_worker.ml
    ├── managed_on_failure.ml
    └── master_pid_decoration.ml
├── unauthenticated
    ├── backend.mli
    ├── rpc_parallel_unauthenticated.ml
    ├── dune
    ├── start_app.ml
    ├── start_app.mli
    └── backend.ml
├── krb
    ├── rpc_parallel_krb_public.ml
    ├── dune
    ├── backend.mli
    ├── mode.ml
    ├── start_app.ml
    ├── mode.mli
    ├── start_app.mli
    └── backend.ml
├── Makefile
├── rpc_parallel.opam
├── LICENSE.md
├── CONTRIBUTING.md
└── CHANGES.md


/doc/dune:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/dune-project:
--------------------------------------------------------------------------------
1 | (lang dune 3.17)
2 | 


--------------------------------------------------------------------------------
/.ocamlformat:
--------------------------------------------------------------------------------
1 | profile=janestreet
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | _build
2 | *.install
3 | *.merlin
4 | _opam
5 | 
6 | 


--------------------------------------------------------------------------------
/test/fd.mli:
--------------------------------------------------------------------------------
1 | (*_ This signature is deliberately empty. *)
2 | 


--------------------------------------------------------------------------------
/test/qtest.mli:
--------------------------------------------------------------------------------
1 | (*_ This signature is deliberately empty. *)
2 | 


--------------------------------------------------------------------------------
/example/serve.mli:
--------------------------------------------------------------------------------
1 | (*_ This signature is deliberately empty. *)
2 | 


--------------------------------------------------------------------------------
/src/parallel.mli:
--------------------------------------------------------------------------------
1 | include Parallel_intf.Parallel (** @inline *)
2 | 


--------------------------------------------------------------------------------
/test/env_test.mli:
--------------------------------------------------------------------------------
1 | (*_ This signature is deliberately empty. *)
2 | 


--------------------------------------------------------------------------------
/test/krb_expert.mli:
--------------------------------------------------------------------------------
1 | (*_ This signature is deliberately empty. *)
2 | 


--------------------------------------------------------------------------------
/test/run_exn.mli:
--------------------------------------------------------------------------------
1 | (*_ This signature is deliberately empty. *)
2 | 


--------------------------------------------------------------------------------
/test/timeouts.mli:
--------------------------------------------------------------------------------
1 | (*_ This signature is deliberately empty. *)
2 | 


--------------------------------------------------------------------------------
/test/wrap_test.mli:
--------------------------------------------------------------------------------
1 | (*_ This signature is deliberately empty. *)
2 | 


--------------------------------------------------------------------------------
/example/add_numbers.mli:
--------------------------------------------------------------------------------
1 | (*_ This signature is deliberately empty. *)
2 | 


--------------------------------------------------------------------------------
/example/async_log.mli:
--------------------------------------------------------------------------------
1 | (*_ This signature is deliberately empty. *)
2 | 


--------------------------------------------------------------------------------
/example/number_stats.mli:
--------------------------------------------------------------------------------
1 | (*_ This signature is deliberately empty. *)
2 | 


--------------------------------------------------------------------------------
/example/reverse_pipe.mli:
--------------------------------------------------------------------------------
1 | (*_ This signature is deliberately empty. *)
2 | 


--------------------------------------------------------------------------------
/example/side_arg.mli:
--------------------------------------------------------------------------------
1 | (*_ This signature is deliberately empty. *)
2 | 


--------------------------------------------------------------------------------
/expect_test/workers.mli:
--------------------------------------------------------------------------------
1 | (*_ This signature is deliberately empty. *)
2 | 


--------------------------------------------------------------------------------
/test/breaking_things.mli:
--------------------------------------------------------------------------------
1 | (*_ This signature is deliberately empty. *)
2 | 


--------------------------------------------------------------------------------
/test/copy_executable.mli:
--------------------------------------------------------------------------------
1 | (*_ This signature is deliberately empty. *)
2 | 


--------------------------------------------------------------------------------
/test/stress_test.mli:
--------------------------------------------------------------------------------
1 | (*_ This signature is deliberately empty. *)
2 | 


--------------------------------------------------------------------------------
/example/abort_direct_pipe.mli:
--------------------------------------------------------------------------------
1 | (*_ This signature is deliberately empty. *)
2 | 


--------------------------------------------------------------------------------
/example/alternative_init.mli:
--------------------------------------------------------------------------------
1 | (*_ This signature is deliberately empty. *)
2 | 


--------------------------------------------------------------------------------
/example/rpc_direct_pipe.mli:
--------------------------------------------------------------------------------
1 | (*_ This signature is deliberately empty. *)
2 | 


--------------------------------------------------------------------------------
/example/stream_workers.mli:
--------------------------------------------------------------------------------
1 | (*_ This signature is deliberately empty. *)
2 | 


--------------------------------------------------------------------------------
/example/worker_binprot.mli:
--------------------------------------------------------------------------------
1 | (*_ This signature is deliberately empty. *)
2 | 


--------------------------------------------------------------------------------
/expect_test/state_worker.mli:
--------------------------------------------------------------------------------
1 | (*_ This signature is deliberately empty. *)
2 | 


--------------------------------------------------------------------------------
/test/buffer_age_limit.mli:
--------------------------------------------------------------------------------
1 | (*_ This signature is deliberately empty. *)
2 | 


--------------------------------------------------------------------------------
/example/reverse_direct_pipe.mli:
--------------------------------------------------------------------------------
1 | (*_ This signature is deliberately empty. *)
2 | 


--------------------------------------------------------------------------------
/example/spawn_in_foreground.mli:
--------------------------------------------------------------------------------
1 | (*_ This signature is deliberately empty. *)
2 | 


--------------------------------------------------------------------------------
/example/workers_as_masters.mli:
--------------------------------------------------------------------------------
1 | (*_ This signature is deliberately empty. *)
2 | 


--------------------------------------------------------------------------------
/expect_test/managed_on_failure.mli:
--------------------------------------------------------------------------------
1 | (*_ This signature is deliberately empty. *)
2 | 


--------------------------------------------------------------------------------
/test/print_and_run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | echo "Ran with print_and_run!"
3 | $@
4 | 


--------------------------------------------------------------------------------
/test/remove_running_executable.mli:
--------------------------------------------------------------------------------
1 | (*_ This signature is deliberately empty. *)
2 | 


--------------------------------------------------------------------------------
/expect_test/master_pid_decoration.mli:
--------------------------------------------------------------------------------
1 | (*_ This signature is deliberately empty. *)
2 | 


--------------------------------------------------------------------------------
/test/raise_on_connection_state_init.mli:
--------------------------------------------------------------------------------
1 | (*_ This signature is deliberately empty. *)
2 | 


--------------------------------------------------------------------------------
/test/shutdown_during_worker_init.mli:
--------------------------------------------------------------------------------
1 | (*_ This signature is deliberately empty. *)
2 | 


--------------------------------------------------------------------------------
/test/spawn_in_foreground_zombie.mli:
--------------------------------------------------------------------------------
1 | (*_ This signature is deliberately empty. *)
2 | 


--------------------------------------------------------------------------------
/unauthenticated/backend.mli:
--------------------------------------------------------------------------------
1 | include Rpc_parallel.Backend with type Settings.t = unit
2 | 


--------------------------------------------------------------------------------
/unauthenticated/rpc_parallel_unauthenticated.ml:
--------------------------------------------------------------------------------
1 | open! Core
2 | include Start_app
3 | 


--------------------------------------------------------------------------------
/krb/rpc_parallel_krb_public.ml:
--------------------------------------------------------------------------------
1 | open! Core
2 | module Mode = Mode
3 | include Start_app
4 | 


--------------------------------------------------------------------------------
/expect_test/sum_worker.mli:
--------------------------------------------------------------------------------
1 | open! Core
2 | open Async
3 | 
4 | val main : int -> int Deferred.t
5 | 


--------------------------------------------------------------------------------
/expect_test/product_worker.mli:
--------------------------------------------------------------------------------
1 | open! Core
2 | open Async
3 | 
4 | val main : int -> int Deferred.t
5 | 


--------------------------------------------------------------------------------
/src/prog_and_args.ml:
--------------------------------------------------------------------------------
1 | open! Core
2 | 
3 | type t =
4 |   { prog : string
5 |   ; args : string list
6 |   }
7 | [@@deriving sexp_of]
8 | 


--------------------------------------------------------------------------------
/src/prog_and_args.mli:
--------------------------------------------------------------------------------
1 | open! Core
2 | 
3 | type t =
4 |   { prog : string
5 |   ; args : string list
6 |   }
7 | [@@deriving sexp_of]
8 | 


--------------------------------------------------------------------------------
/krb/dune:
--------------------------------------------------------------------------------
1 | (library
2 |  (name rpc_parallel_krb_public)
3 |  (libraries async core core_unix krb rpc_parallel)
4 |  (preprocess
5 |   (pps ppx_jane)))
6 | 


--------------------------------------------------------------------------------
/src/fd_redirection.ml:
--------------------------------------------------------------------------------
1 | open Core
2 | 
3 | type t =
4 |   [ `Dev_null
5 |   | `File_append of string
6 |   | `File_truncate of string
7 |   ]
8 | [@@deriving sexp]
9 | 


--------------------------------------------------------------------------------
/krb/backend.mli:
--------------------------------------------------------------------------------
1 | open Core
2 | 
3 | module Settings : sig
4 |   type t = Mode.t [@@deriving bin_io, sexp]
5 | end
6 | 
7 | include Rpc_parallel.Backend with module Settings := Settings
8 | 


--------------------------------------------------------------------------------
/unauthenticated/dune:
--------------------------------------------------------------------------------
1 | (library
2 |  (name rpc_parallel_unauthenticated)
3 |  (public_name rpc_parallel.unauthenticated)
4 |  (libraries async core rpc_parallel)
5 |  (preprocess
6 |   (pps ppx_jane)))
7 | 


--------------------------------------------------------------------------------
/expect_test/dune:
--------------------------------------------------------------------------------
1 | (library
2 |  (name rpc_parallel_expect_test)
3 |  (libraries async core re rpc_parallel rpc_parallel_krb_public transaction
4 |    core_kernel.uuid)
5 |  (preprocess
6 |   (pps ppx_jane)))
7 | 


--------------------------------------------------------------------------------
/expect_test/rpc_parallel_expect_test.ml:
--------------------------------------------------------------------------------
1 | module Managed_on_failure = Managed_on_failure
2 | module Master_pid_decoration = Master_pid_decoration
3 | module Product_worker = Product_worker
4 | module State_worker = State_worker
5 | module Sum_worker = Sum_worker
6 | module Workers = Workers
7 | 


--------------------------------------------------------------------------------
/src/dune:
--------------------------------------------------------------------------------
1 | (library
2 |  (name rpc_parallel)
3 |  (public_name rpc_parallel)
4 |  (libraries async core_unix.command_unix core core_unix current_exe
5 |    core_unix.daemon core_unix.linux_ext core_kernel.pairing_heap sexplib
6 |    core_kernel.uuid core_unix.uuid)
7 |  (preprocess
8 |   (pps ppx_jane)))
9 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | INSTALL_ARGS := $(if $(PREFIX),--prefix $(PREFIX),)
 2 | 
 3 | default:
 4 | 	dune build
 5 | 
 6 | install:
 7 | 	dune install $(INSTALL_ARGS)
 8 | 
 9 | uninstall:
10 | 	dune uninstall $(INSTALL_ARGS)
11 | 
12 | reinstall: uninstall install
13 | 
14 | clean:
15 | 	dune clean
16 | 
17 | .PHONY: default install uninstall reinstall clean
18 | 


--------------------------------------------------------------------------------
/expect_test/workers.ml:
--------------------------------------------------------------------------------
 1 | open! Core
 2 | open Async
 3 | 
 4 | let () = Rpc_parallel_krb_public.For_testing.initialize [%here]
 5 | 
 6 | let%expect_test "" =
 7 |   let%bind sum = Sum_worker.main 10
 8 |   and product = Product_worker.main 10 in
 9 |   printf "sum: %d\n" sum;
10 |   printf "product: %d\n" product;
11 |   [%expect
12 |     {|
13 |     sum: 45
14 |     product: 3628800
15 |     |}];
16 |   return ()
17 | ;;
18 | 


--------------------------------------------------------------------------------
/src/fd_redirection.mli:
--------------------------------------------------------------------------------
 1 | open! Core
 2 | 
 3 | (** A specification for where to write outputs. *)
 4 | type t =
 5 |   [ `Dev_null (** Do not save the output anywhere. *)
 6 |   | `File_append of string
 7 |     (** Absolute path of a file to write to, creating the file if it does not already
 8 |         exist. *)
 9 |   | `File_truncate of string
10 |     (** Absolute path of a file to write to, overwriting any existing file. *)
11 |   ]
12 | [@@deriving sexp]
13 | 


--------------------------------------------------------------------------------
/example/dune:
--------------------------------------------------------------------------------
 1 | (executables
 2 |  (modes byte exe)
 3 |  (names abort_direct_pipe add_numbers alternative_init async_log number_stats
 4 |    reverse_direct_pipe reverse_pipe rpc_direct_pipe serve side_arg
 5 |    spawn_in_foreground stream_workers worker_binprot workers_as_masters)
 6 |  (libraries async core_unix.command_unix core jane rpc_parallel
 7 |    rpc_parallel_krb_public rpc_parallel_unauthenticated sexplib
 8 |    core_unix.time_float_unix)
 9 |  (preprocess
10 |   (pps ppx_jane)))
11 | 


--------------------------------------------------------------------------------
/test/dune:
--------------------------------------------------------------------------------
 1 | (executables
 2 |  (modes byte exe)
 3 |  (names breaking_things buffer_age_limit copy_executable env_test fd
 4 |    krb_expert qtest raise_on_connection_state_init remove_running_executable
 5 |    run_exn shutdown_during_worker_init spawn_in_foreground_zombie stress_test
 6 |    timeouts wrap_test)
 7 |  (libraries async core_unix.command_unix core core_unix procfs_async
 8 |    qtest_deprecated rpc_parallel rpc_parallel_krb_public sexplib)
 9 |  (preprocess
10 |   (pps ppx_jane)))
11 | 
12 | (rule
13 |  (deps qtest.exe)
14 |  (action
15 |   (bash "TESTING_FRAMEWORK= ./qtest.exe"))
16 |  (alias runtest))
17 | 


--------------------------------------------------------------------------------
/src/for_testing_internal.ml:
--------------------------------------------------------------------------------
 1 | open! Core
 2 | open Async
 3 | 
 4 | let env_var = "RPC_PARALLEL_EXPECT_TEST_INITIALIZE"
 5 | let initialize_source_code_position = ref None
 6 | 
 7 | let set_initialize_source_code_position here =
 8 |   initialize_source_code_position := Some (Source_code_position.to_string here)
 9 | ;;
10 | 
11 | let worker_environment () =
12 |   match !initialize_source_code_position with
13 |   | None -> []
14 |   | Some here -> [ env_var, here ]
15 | ;;
16 | 
17 | let worker_should_initialize here =
18 |   match Unix.getenv env_var with
19 |   | None -> false
20 |   | Some env -> String.equal (Source_code_position.to_string here) env
21 | ;;
22 | 


--------------------------------------------------------------------------------
/src/how_to_run.ml:
--------------------------------------------------------------------------------
 1 | open! Core
 2 | open Async
 3 | 
 4 | type t =
 5 |   env:(string * string) list
 6 |   -> worker_command_args:string list
 7 |   -> wrap:(Prog_and_args.t -> Prog_and_args.t)
 8 |   -> Process.t Or_error.t Deferred.t
 9 | 
10 | let local ~env ~worker_command_args ~wrap =
11 |   let { Prog_and_args.prog; args } =
12 |     wrap { Prog_and_args.prog = Current_exe.get_path (); args = worker_command_args }
13 |   in
14 |   Process.create ~prog ~argv0:(Sys.get_argv ()).(0) ~args ~env:(`Extend env) ()
15 | ;;
16 | 
17 | let remote ?assert_binary_hash exec ~env ~worker_command_args ~wrap =
18 |   Remote_executable.run ?assert_binary_hash exec ~env ~args:worker_command_args ~wrap
19 | ;;
20 | 
21 | let wrap t ~f ~env ~worker_command_args ~wrap =
22 |   t ~env ~worker_command_args ~wrap:(fun prog_and_args -> f (wrap prog_and_args))
23 | ;;
24 | 
25 | let run t ~env ~worker_command_args = t ~env ~worker_command_args ~wrap:Fn.id
26 | 


--------------------------------------------------------------------------------
/krb/mode.ml:
--------------------------------------------------------------------------------
 1 | open! Core
 2 | open Async
 3 | 
 4 | module Kerberized = struct
 5 |   type t =
 6 |     { conn_type : Krb.Conn_type.Stable.V1.t
 7 |     ; server_key_source : Krb.Server_key_source.Stable.V2.t
 8 |     }
 9 |   [@@deriving bin_io, sexp]
10 | 
11 |   let krb_server_mode t =
12 |     let conn_type_preference = Krb.Conn_type_preference.accept_only t.conn_type in
13 |     let key_source = t.server_key_source in
14 |     Krb.Mode.Server.Expert.kerberized ~conn_type_preference ~key_source |> return
15 |   ;;
16 | 
17 |   let krb_client_mode t =
18 |     let conn_type_preference = Krb.Conn_type_preference.accept_only t.conn_type in
19 |     Krb.Mode.Client.kerberized ~conn_type_preference ()
20 |   ;;
21 | end
22 | 
23 | type t =
24 |   | Kerberized of Kerberized.t
25 |   | For_unit_test
26 | [@@deriving bin_io, sexp]
27 | 
28 | let kerberized ~key_source ~conn_type () =
29 |   Kerberized { conn_type; server_key_source = key_source }
30 | ;;
31 | 
32 | let for_unit_test = For_unit_test
33 | 


--------------------------------------------------------------------------------
/src/how_to_run.mli:
--------------------------------------------------------------------------------
 1 | open! Core
 2 | open Async
 3 | 
 4 | (** Describes everything needed for Rpc_parallel to launch a worker. *)
 5 | type t
 6 | 
 7 | (** A local worker needs no special information. Rpc_parallel runs the same executable
 8 |     that the master is using. *)
 9 | val local : t
10 | 
11 | (** In order to run a worker remotely, you must ensure that the executable is available on
12 |     the remote host. If [~assert_binary_hash:false] is provided, the check that the remote
13 |     binary's hash matches the currently running binary's hash is skipped. *)
14 | val remote : ?assert_binary_hash:bool -> _ Remote_executable.t -> t
15 | 
16 | (** [wrap] allows you to customize how the executable is launched. For example, you can
17 |     run the worker via some wrapper command. *)
18 | val wrap : t -> f:(Prog_and_args.t -> Prog_and_args.t) -> t
19 | 
20 | val run
21 |   :  t
22 |   -> env:(string * string) list
23 |   -> worker_command_args:string list
24 |   -> Process.t Or_error.t Deferred.t
25 | 


--------------------------------------------------------------------------------
/rpc_parallel.opam:
--------------------------------------------------------------------------------
 1 | opam-version: "2.0"
 2 | maintainer: "Jane Street developers"
 3 | authors: ["Jane Street Group, LLC"]
 4 | homepage: "https://github.com/janestreet/rpc_parallel"
 5 | bug-reports: "https://github.com/janestreet/rpc_parallel/issues"
 6 | dev-repo: "git+https://github.com/janestreet/rpc_parallel.git"
 7 | doc: "https://ocaml.janestreet.com/ocaml-core/latest/doc/rpc_parallel/index.html"
 8 | license: "MIT"
 9 | build: [
10 |   ["dune" "build" "-p" name "-j" jobs]
11 | ]
12 | depends: [
13 |   "ocaml" {>= "5.1.0"}
14 |   "async"
15 |   "core"
16 |   "core_kernel"
17 |   "core_unix"
18 |   "current_exe"
19 |   "ppx_jane"
20 |   "sexplib"
21 |   "dune"        {>= "3.17.0"}
22 | ]
23 | available: arch != "arm32" & arch != "x86_32"
24 | synopsis: "Type-safe parallel library built on top of Async_rpc"
25 | description: "
26 | Rpc_parallel offers an API to define various workers and protocols,
27 | spawn workers as separate processes, and communicate with them using
28 | Async Rpc.
29 | 
30 | "
31 | 


--------------------------------------------------------------------------------
/src/rpc_parallel.ml:
--------------------------------------------------------------------------------
 1 | open! Core
 2 | 
 3 | (** A type-safe parallel library built on top of Async_rpc.
 4 | 
 5 |     {[
 6 |       module Worker = Rpc_parallel.Make ((T : Worker_spec))
 7 |     ]}
 8 | 
 9 |     The [Worker] module can be used to spawn new workers, either locally or remotely, and
10 |     run functions on these workers. [T] specifies which functions can be run on a
11 |     [Worker.t] as well as the implementations for these functions. In addition, [T]
12 |     specifies worker states and connection states. See README for more details *)
13 | 
14 | module Fd_redirection = Fd_redirection
15 | module How_to_run = How_to_run
16 | module Map_reduce = Map_reduce
17 | module Prog_and_args = Prog_and_args
18 | module Remote_executable = Remote_executable
19 | module Rpc_settings = Rpc_settings
20 | module Utils = Utils
21 | 
22 | module Managed = Managed
23 | [@@alert legacy "Prefer using the plain [Rpc_parallel] instead of [Rpc_parallel.Managed]"]
24 | 
25 | include Parallel
26 | 
27 | module Parallel = Parallel
28 | [@@deprecated "[since 2016-11] Use [Rpc_parallel] instead of [Rpc_parallel.Parallel]"]
29 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | The MIT License
 2 | 
 3 | Copyright (c) 2014--2025 Jane Street Group, LLC <opensource-contacts@janestreet.com>
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/src/for_testing_internal.mli:
--------------------------------------------------------------------------------
 1 | open! Core
 2 | 
 3 | (** In order for expect tests to work properly, we need the following to happen before any
 4 |     tests are run, but after all the definitions of any workers used in the tests:
 5 | 
 6 |     - Master initialization In the main process, we must start the master server.
 7 | 
 8 |     - Worker initialization In spawned processes, we must start the worker server and
 9 |       start the Async scheduler.
10 | 
11 |     We need to be especially careful in the following case: a.ml: defines worker A b.ml:
12 |     defines worker B do.ml: uses worker A and worker B
13 | 
14 |     Assuming a.ml and b.ml each have their own expect tests, they will need worker
15 |     initialization code at their top levels. However, do.ml needs the worker
16 |     initialization code to run after the definitions of the workers in a.ml and b.ml.
17 | 
18 |     We use an environment variable to track the root of the dependency tree. We ensure
19 |     that worker initialization only occurs at the root. *)
20 | 
21 | val set_initialize_source_code_position : Source_code_position.t -> unit
22 | val worker_environment : unit -> (string * string) list
23 | val worker_should_initialize : Source_code_position.t -> bool
24 | 


--------------------------------------------------------------------------------
/krb/start_app.ml:
--------------------------------------------------------------------------------
 1 | open! Core
 2 | open! Async
 3 | 
 4 | let backend = (module Backend : Rpc_parallel.Backend)
 5 | 
 6 | let backend_and_settings krb_mode =
 7 |   Rpc_parallel.Backend_and_settings.T ((module Backend), krb_mode)
 8 | ;;
 9 | 
10 | let start_app
11 |   ?rpc_max_message_size
12 |   ?rpc_buffer_age_limit
13 |   ?rpc_handshake_timeout
14 |   ?rpc_heartbeat_config
15 |   ?when_parsing_succeeds
16 |   ~krb_mode
17 |   command
18 |   =
19 |   Rpc_parallel.start_app
20 |     ?rpc_max_message_size
21 |     ?rpc_buffer_age_limit
22 |     ?rpc_handshake_timeout
23 |     ?rpc_heartbeat_config
24 |     ?when_parsing_succeeds
25 |     (backend_and_settings krb_mode)
26 |     command
27 | ;;
28 | 
29 | module For_testing = struct
30 |   let initialize here =
31 |     Rpc_parallel.For_testing.initialize (backend_and_settings Mode.for_unit_test) here
32 |   ;;
33 | end
34 | 
35 | module Expert = struct
36 |   let start_master_server_exn
37 |     ?rpc_max_message_size
38 |     ?rpc_buffer_age_limit
39 |     ?rpc_handshake_timeout
40 |     ?rpc_heartbeat_config
41 |     ?pass_name
42 |     ~krb_mode
43 |     ~worker_command_args
44 |     ()
45 |     =
46 |     Rpc_parallel.Expert.start_master_server_exn
47 |       ?rpc_max_message_size
48 |       ?rpc_buffer_age_limit
49 |       ?rpc_handshake_timeout
50 |       ?rpc_heartbeat_config
51 |       ?pass_name
52 |       (backend_and_settings krb_mode)
53 |       ~worker_command_args
54 |       ()
55 |   ;;
56 | 
57 |   let worker_command = Rpc_parallel.Expert.worker_command backend
58 | end
59 | 


--------------------------------------------------------------------------------
/unauthenticated/start_app.ml:
--------------------------------------------------------------------------------
 1 | open! Core
 2 | open! Async
 3 | 
 4 | let backend = (module Backend : Rpc_parallel.Backend)
 5 | let backend_and_settings = Rpc_parallel.Backend_and_settings.T ((module Backend), ())
 6 | 
 7 | let start_app
 8 |   ?rpc_max_message_size
 9 |   ?rpc_buffer_age_limit
10 |   ?rpc_handshake_timeout
11 |   ?rpc_heartbeat_config
12 |   ?when_parsing_succeeds
13 |   ?complete_subcommands
14 |   ?add_validate_parsing_flag
15 |   ?argv
16 |   command
17 |   =
18 |   Rpc_parallel.start_app
19 |     ?rpc_max_message_size
20 |     ?rpc_buffer_age_limit
21 |     ?rpc_handshake_timeout
22 |     ?rpc_heartbeat_config
23 |     ?when_parsing_succeeds
24 |     ?complete_subcommands
25 |     ?add_validate_parsing_flag
26 |     ?argv
27 |     backend_and_settings
28 |     command
29 | ;;
30 | 
31 | module For_testing = struct
32 |   let initialize = Rpc_parallel.For_testing.initialize backend_and_settings
33 | end
34 | 
35 | module Expert = struct
36 |   let start_master_server_exn
37 |     ?rpc_max_message_size
38 |     ?rpc_buffer_age_limit
39 |     ?rpc_handshake_timeout
40 |     ?rpc_heartbeat_config
41 |     ?pass_name
42 |     =
43 |     Rpc_parallel.Expert.start_master_server_exn
44 |       ?rpc_max_message_size
45 |       ?rpc_buffer_age_limit
46 |       ?rpc_handshake_timeout
47 |       ?rpc_heartbeat_config
48 |       ?pass_name
49 |       backend_and_settings
50 |   ;;
51 | 
52 |   let worker_command = Rpc_parallel.Expert.worker_command backend
53 |   let start_worker_server_exn = Rpc_parallel.Expert.start_worker_server_exn backend
54 | end
55 | 


--------------------------------------------------------------------------------
/krb/mode.mli:
--------------------------------------------------------------------------------
 1 | open! Core
 2 | open Async
 3 | 
 4 | module Kerberized : sig
 5 |   type t
 6 | 
 7 |   val krb_server_mode : t -> Krb.Mode.Server.t Deferred.t
 8 |   val krb_client_mode : t -> Krb.Mode.Client.t
 9 | end
10 | 
11 | type t =
12 |   | Kerberized of Kerberized.t
13 |   | For_unit_test
14 | [@@deriving bin_io, sexp]
15 | 
16 | (** In this mode, communication to and from workers will use Kerberos for authentication.
17 | 
18 |     [key_source] must be a valid key source for all the hosts where a worker is running.
19 |     For example, if your key source specifies a keytab path, that path must have a valid
20 |     keytab on hosts where you spawn a worker.
21 | 
22 |     [conn_type] specifies the level of protection for the rpc communication to and from
23 |     workers. If your application is extremely performance sensitive and sends a lot of
24 |     data to and from your workers, you should use [Auth]. Otherwise, using [Priv] as a
25 |     default is appropriate. See lib/krb/public/src/conn_type.mli for more documentation.
26 | 
27 |     Regardless of what [conn_type] you choose, connections will only be accepted from
28 |     principals that match the principal associated with [key_source]. By default, this is
29 |     the principal associated with the user that started the rpc_parallel process. *)
30 | val kerberized
31 |   :  key_source:Krb.Server_key_source.t
32 |   -> conn_type:Krb.Conn_type.t
33 |   -> unit
34 |   -> t
35 | 
36 | (** In this mode, communication to and from workers will not be kerberized. This is only
37 |     appropriate if you are running a test. *)
38 | val for_unit_test : t
39 | 


--------------------------------------------------------------------------------
/test/setup.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | export rpc_parallel_base=$(jenga root)/lib/rpc_parallel/public
 3 | 
 4 | function do_run() {
 5 |   local one_line=$1
 6 |   shift
 7 |   local exe=$1
 8 |   shift
 9 | 
10 |   local output_file=$(mktemp)
11 |   # Redirect stderr to stdout so we can "-one-line" stderr
12 |   "$exe" "$@" > "$output_file" 2>&1 &
13 |   local pid=$!
14 |   wait $pid
15 | 
16 |   if "$one_line"; then
17 |     cat "$output_file" | tr -d '\n'
18 |     echo ""
19 |   else
20 |     cat "$output_file"
21 |   fi
22 |   rm "$output_file"
23 | 
24 |   # Ensure all the workers shut down in some timely fashion
25 |   local children=""
26 |   for i in $(seq 50); do
27 |     children=$(pgrep -f "/proc/$pid/exe")
28 |     if test -z "$children"; then
29 |       break
30 |     fi
31 |     sleep 0.1
32 |   done
33 | 
34 |   if ! test -z "$children"; then
35 |     echo "Didn't clean up worker processes"
36 |   fi
37 | }
38 | export -f do_run
39 | 
40 | function run_one_line() {
41 |   local exe=$rpc_parallel_base/$1
42 |   shift
43 | 
44 |   do_run true "$exe" "$@"
45 | }
46 | export -f run_one_line
47 | 
48 | function run() {
49 |   local exe=$rpc_parallel_base/$1
50 |   shift
51 | 
52 |   do_run false "$exe" "$@"
53 | }
54 | export -f run
55 | 
56 | function run_absolute_path() {
57 |   local exe=$1
58 |   shift
59 | 
60 |   do_run false "$exe" "$@"
61 | }
62 | export -f run_absolute_path
63 | 
64 | #  We must set the TESTING_FRAMEWORK environment variable so the am_running_test
65 | #  function returns true. The rpc_parallel_krb library bypasses kerberos when
66 | #  that function returns true.
67 | export TESTING_FRAMEWORK=
68 | 


--------------------------------------------------------------------------------
/unauthenticated/start_app.mli:
--------------------------------------------------------------------------------
 1 | (** See lib/rpc_parallel/src/parallel_intf.ml for documentation. *)
 2 | open Core
 3 | 
 4 | open Async
 5 | 
 6 | (** See lib/rpc_parallel/src/parallel_intf.ml for documentation. *)
 7 | val start_app
 8 |   :  ?rpc_max_message_size:int
 9 |   -> ?rpc_buffer_age_limit:Writer.buffer_age_limit
10 |   -> ?rpc_handshake_timeout:Time_float.Span.t
11 |   -> ?rpc_heartbeat_config:Rpc.Connection.Heartbeat_config.t
12 |   -> ?when_parsing_succeeds:(unit -> unit)
13 |   -> ?complete_subcommands:
14 |        (path:string list -> part:string -> string list list -> string list option)
15 |   -> ?add_validate_parsing_flag:bool
16 |   -> ?argv:string list
17 |   -> Command.t
18 |   -> unit
19 | 
20 | module For_testing : sig
21 |   (** See lib/rpc_parallel/src/parallel_intf.ml for documentation. *)
22 |   val initialize : Source_code_position.t -> unit
23 | end
24 | 
25 | module Expert : sig
26 |   (** See lib/rpc_parallel/src/parallel_intf.ml for documentation. *)
27 |   val start_master_server_exn
28 |     :  ?rpc_max_message_size:int
29 |     -> ?rpc_buffer_age_limit:Writer.buffer_age_limit
30 |     -> ?rpc_handshake_timeout:Time_float.Span.t
31 |     -> ?rpc_heartbeat_config:Rpc.Connection.Heartbeat_config.t
32 |     -> ?pass_name:bool (** default: true *)
33 |     -> worker_command_args:string list
34 |     -> unit
35 |     -> unit
36 | 
37 |   (** See lib/rpc_parallel/src/parallel_intf.ml for documentation. *)
38 |   val worker_command : Command.t
39 | 
40 |   (** See lib/rpc_parallel/src/parallel_intf.ml for documentation. *)
41 |   val start_worker_server_exn : Rpc_parallel.Expert.Worker_env.t -> unit
42 | end
43 | 


--------------------------------------------------------------------------------
/krb/start_app.mli:
--------------------------------------------------------------------------------
 1 | (** See lib/rpc_parallel/src/parallel_intf.ml for documentation. *)
 2 | 
 3 | open Core
 4 | open Async
 5 | 
 6 | (** See lib/rpc_parallel/src/parallel_intf.ml for documentation. Backend_and_settings.t is
 7 |     specialized to expose the krb server and client modes. *)
 8 | val start_app
 9 |   :  ?rpc_max_message_size:int
10 |   -> ?rpc_buffer_age_limit:Writer.buffer_age_limit
11 |   -> ?rpc_handshake_timeout:Time_float.Span.t
12 |   -> ?rpc_heartbeat_config:Rpc.Connection.Heartbeat_config.t
13 |   -> ?when_parsing_succeeds:(unit -> unit)
14 |   -> krb_mode:Mode.t
15 |   -> Command.t
16 |   -> unit
17 | 
18 | module For_testing : sig
19 |   (** See lib/rpc_parallel/src/parallel_intf.ml for documentation. The krb server and
20 |       client modes are set for testing so that manually configuring a kerberos sandbox in
21 |       your jbuild is unnecessary. *)
22 |   val initialize : Source_code_position.t -> unit
23 | end
24 | 
25 | module Expert : sig
26 |   (** See lib/rpc_parallel/src/parallel_intf.ml for documentation. Backend_and_settings.t
27 |       is specialized to expose the krb server and client modes. *)
28 |   val start_master_server_exn
29 |     :  ?rpc_max_message_size:int
30 |     -> ?rpc_buffer_age_limit:Writer.buffer_age_limit
31 |     -> ?rpc_handshake_timeout:Time_float.Span.t
32 |     -> ?rpc_heartbeat_config:Rpc.Connection.Heartbeat_config.t
33 |     -> ?pass_name:bool (** default: true *)
34 |     -> krb_mode:Mode.t
35 |     -> worker_command_args:string list
36 |     -> unit
37 |     -> unit
38 | 
39 |   (** See lib/rpc_parallel/src/parallel_intf.ml for documentation. *)
40 |   val worker_command : Command.t
41 | end
42 | 


--------------------------------------------------------------------------------
/src/rpc_settings.mli:
--------------------------------------------------------------------------------
 1 | open! Core
 2 | open! Async
 3 | 
 4 | type t =
 5 |   { max_message_size : int option
 6 |   ; buffer_age_limit : Writer.buffer_age_limit option
 7 |   ; handshake_timeout : Time_float.Span.t option
 8 |   ; heartbeat_config : Rpc.Connection.Heartbeat_config.t option
 9 |   }
10 | [@@deriving bin_io, sexp]
11 | 
12 | (** [env_var] is the name of the environment variable read by rpc-parallel on start-up to
13 |     inject additional rpc-settings for the application. *)
14 | val env_var : string
15 | 
16 | (** Use all the default rpc settings. This is the record with [None] in every field. *)
17 | val default : t
18 | 
19 | (** [to_string_for_env_var] generates the expected string format from the arguments
20 |     matching the [start_app] function to be used with the [env_var] above. *)
21 | val to_string_for_env_var
22 |   :  ?max_message_size:int
23 |   -> ?buffer_age_limit:Writer.buffer_age_limit
24 |   -> ?handshake_timeout:Time_float.Span.t
25 |   -> ?heartbeat_config:Rpc.Connection.Heartbeat_config.t
26 |   -> unit
27 |   -> string
28 | 
29 | val create_with_env_override
30 |   :  max_message_size:int option
31 |   -> buffer_age_limit:Writer.buffer_age_limit option
32 |   -> handshake_timeout:Time_float.Span.t option
33 |   -> heartbeat_config:Rpc.Connection.Heartbeat_config.t option
34 |   -> t
35 | 
36 | module For_internal_testing : sig
37 |   val create_with_env_override
38 |     :  env_var:string
39 |     -> max_message_size:int option
40 |     -> buffer_age_limit:Writer.buffer_age_limit option
41 |     -> handshake_timeout:Time_float.Span.t option
42 |     -> heartbeat_config:Rpc.Connection.Heartbeat_config.t option
43 |     -> t
44 | end
45 | 


--------------------------------------------------------------------------------
/test/raise_on_connection_state_init.ml:
--------------------------------------------------------------------------------
 1 | open Core
 2 | open Async
 3 | 
 4 | module Worker = struct
 5 |   module T = struct
 6 |     type 'worker functions = unit
 7 | 
 8 |     module Worker_state = struct
 9 |       type init_arg = unit [@@deriving bin_io]
10 |       type t = unit
11 |     end
12 | 
13 |     module Connection_state = struct
14 |       type init_arg = unit [@@deriving bin_io]
15 |       type t = unit
16 |     end
17 | 
18 |     module Functions (_ : Rpc_parallel.Creator) = struct
19 |       let init_connection_state ~connection:_ ~worker_state:_ () =
20 |         failwith "text of expected failure"
21 |       ;;
22 | 
23 |       let init_worker_state () = Deferred.unit
24 |       let functions = ()
25 |     end
26 |   end
27 | 
28 |   include Rpc_parallel.Make (T)
29 | end
30 | 
31 | let command =
32 |   let open Command.Let_syntax in
33 |   Command.async
34 |     ~summary:"testing worker shutdown"
35 |     (let%map () = return () in
36 |      fun () ->
37 |        let open Deferred.Let_syntax in
38 |        let%bind worker =
39 |          Worker.spawn_exn
40 |            ~redirect_stdout:`Dev_null
41 |            ~redirect_stderr:`Dev_null
42 |            ~shutdown_on:Called_shutdown_function
43 |            ~on_failure:Error.raise
44 |            ()
45 |        in
46 |        match%bind Worker.Connection.client worker () with
47 |        | Ok (_ : Worker.Connection.t) -> failwith "expected to fail but did not"
48 |        | Error e ->
49 |          printf !"expected failure:\n%{sexp:Error.t}\n" e;
50 |          let%bind () = Worker.shutdown worker >>| ok_exn in
51 |          Deferred.unit)
52 |     ~behave_nicely_in_pipeline:false
53 | ;;
54 | 
55 | let () = Rpc_parallel_krb_public.start_app ~krb_mode:For_unit_test command
56 | 


--------------------------------------------------------------------------------
/example/side_arg.ml:
--------------------------------------------------------------------------------
 1 | open Core
 2 | open Async
 3 | 
 4 | module Side_arg_map_function = Rpc_parallel.Map_reduce.Make_map_function_with_init (struct
 5 |     type state_type = string
 6 | 
 7 |     module Param = struct
 8 |       type t = string [@@deriving bin_io]
 9 |     end
10 | 
11 |     module Input = struct
12 |       type t = unit [@@deriving bin_io]
13 |     end
14 | 
15 |     module Output = struct
16 |       type t = string [@@deriving bin_io]
17 |     end
18 | 
19 |     let init param ~worker_index:(_ : int) =
20 |       Random.self_init ();
21 |       return (sprintf "[%i] %s" (Random.bits ()) param)
22 |     ;;
23 | 
24 |     let map state () = return state
25 |   end)
26 | 
27 | let command =
28 |   Command.async_spec
29 |     ~summary:"Pass a side arg"
30 |     Command.Spec.(
31 |       empty
32 |       +> flag "ntimes" (optional_with_default 100 int) ~doc:" Number of things to map"
33 |       +> flag "nworkers" (optional_with_default 4 int) ~doc:" Number of workers")
34 |     (fun ntimes nworkers () ->
35 |       let list = Pipe.of_list (List.init ntimes ~f:(fun _i -> ())) in
36 |       let config =
37 |         Rpc_parallel.Map_reduce.Config.create
38 |           ~local:nworkers
39 |           ()
40 |           ~redirect_stderr:`Dev_null
41 |           ~redirect_stdout:`Dev_null
42 |       in
43 |       let%bind output_reader =
44 |         Rpc_parallel.Map_reduce.map_unordered
45 |           config
46 |           list
47 |           ~m:(module Side_arg_map_function)
48 |           ~param:"Message from the master"
49 |       in
50 |       Pipe.iter output_reader ~f:(fun (message, index) ->
51 |         printf "%i: %s\n" index message;
52 |         Deferred.unit))
53 |     ~behave_nicely_in_pipeline:false
54 | ;;
55 | 
56 | let () = Rpc_parallel_krb_public.start_app ~krb_mode:For_unit_test command
57 | 


--------------------------------------------------------------------------------
/expect_test/product_worker.ml:
--------------------------------------------------------------------------------
 1 | open Core
 2 | open Async
 3 | 
 4 | module T = struct
 5 |   type 'worker functions = { product : ('worker, int, int) Rpc_parallel.Function.t }
 6 | 
 7 |   module Worker_state = struct
 8 |     type init_arg = unit [@@deriving bin_io]
 9 |     type t = unit
10 |   end
11 | 
12 |   module Connection_state = struct
13 |     type init_arg = unit [@@deriving bin_io]
14 |     type t = unit
15 |   end
16 | 
17 |   module Functions
18 |       (C : Rpc_parallel.Creator
19 |            with type worker_state := Worker_state.t
20 |             and type connection_state := Connection_state.t) =
21 |   struct
22 |     let product_impl ~worker_state:() ~conn_state:() arg =
23 |       let product = List.fold ~init:1 ~f:( * ) (List.init arg ~f:(( + ) 1)) in
24 |       [%log.global.info_format "Prod_worker.product: %i\n" product];
25 |       return product
26 |     ;;
27 | 
28 |     let product =
29 |       C.create_rpc ~f:product_impl ~bin_input:Int.bin_t ~bin_output:Int.bin_t ()
30 |     ;;
31 | 
32 |     let functions = { product }
33 |     let init_worker_state () = Deferred.unit
34 |     let init_connection_state ~connection:_ ~worker_state:_ = return
35 |   end
36 | end
37 | 
38 | include Rpc_parallel.Make (T)
39 | 
40 | let main max =
41 |   let%bind conn =
42 |     spawn
43 |       ~on_failure:Error.raise
44 |       ~shutdown_on:Connection_closed
45 |       ~redirect_stdout:`Dev_null
46 |       ~redirect_stderr:`Dev_null
47 |       ~connection_state_init_arg:()
48 |       ()
49 |     >>| ok_exn
50 |   in
51 |   Connection.run_exn conn ~f:functions.product ~arg:max
52 | ;;
53 | 
54 | let () = Rpc_parallel_krb_public.For_testing.initialize [%here]
55 | 
56 | let%expect_test "" =
57 |   let%bind res = main 10 in
58 |   printf "%d\n" res;
59 |   [%expect {| 3628800 |}];
60 |   return ()
61 | ;;
62 | 


--------------------------------------------------------------------------------
/test/breaking_things.ml:
--------------------------------------------------------------------------------
 1 | open Core
 2 | open Async
 3 | 
 4 | module Worker = struct
 5 |   module T = struct
 6 |     type 'worker functions =
 7 |       { add_one : unit -> ('worker, int, int) Rpc_parallel.Function.t }
 8 | 
 9 |     module Worker_state = struct
10 |       type init_arg = unit [@@deriving bin_io]
11 |       type t = unit
12 |     end
13 | 
14 |     module Connection_state = struct
15 |       type init_arg = unit [@@deriving bin_io]
16 |       type t = unit
17 |     end
18 | 
19 |     module Functions
20 |         (C : Rpc_parallel.Creator
21 |              with type worker_state := Worker_state.t
22 |               and type connection_state := Connection_state.t) =
23 |     struct
24 |       let add_one () =
25 |         C.create_rpc
26 |           ~f:(fun ~worker_state:() ~conn_state:() n -> return (n + 1))
27 |           ~bin_input:Int.bin_t
28 |           ~bin_output:Int.bin_t
29 |           ()
30 |       ;;
31 | 
32 |       let functions = { add_one }
33 |       let init_worker_state () = Deferred.unit
34 |       let init_connection_state ~connection:_ ~worker_state:_ = return
35 |     end
36 |   end
37 | 
38 |   include Rpc_parallel.Make (T)
39 | end
40 | 
41 | let main () =
42 |   Worker.spawn
43 |     ~on_failure:Error.raise
44 |     ~shutdown_on:Connection_closed
45 |     ~redirect_stdout:`Dev_null
46 |     ~redirect_stderr:`Dev_null
47 |     ~connection_state_init_arg:()
48 |     ()
49 |   >>=? fun worker_conn ->
50 |   Worker.Connection.run worker_conn ~f:(Worker.functions.add_one ()) ~arg:10
51 |   >>=? fun res ->
52 |   Core.Printf.printf "worker: %d\n%!" res;
53 |   Deferred.Or_error.ok_unit
54 | ;;
55 | 
56 | let command =
57 |   Command.async_spec_or_error
58 |     ~summary:""
59 |     Command.Spec.empty
60 |     main
61 |     ~behave_nicely_in_pipeline:false
62 | ;;
63 | 
64 | let () = Rpc_parallel_krb_public.start_app ~krb_mode:For_unit_test command
65 | 


--------------------------------------------------------------------------------
/unauthenticated/backend.ml:
--------------------------------------------------------------------------------
 1 | open! Core
 2 | open! Async
 3 | 
 4 | let name = "Unauthenticated Async RPC"
 5 | 
 6 | module Settings = struct
 7 |   type t = unit [@@deriving bin_io, sexp]
 8 | end
 9 | 
10 | let serve
11 |   ?max_message_size
12 |   ?buffer_age_limit
13 |   ?handshake_timeout
14 |   ?heartbeat_config
15 |   ~implementations
16 |   ~initial_connection_state
17 |   ~where_to_listen
18 |   ()
19 |   =
20 |   let make_transport fd ~max_message_size =
21 |     Rpc.Transport.of_fd ?buffer_age_limit fd ~max_message_size
22 |   in
23 |   Rpc.Connection.serve
24 |     ?max_message_size
25 |     ~make_transport
26 |     ?handshake_timeout
27 |     ?heartbeat_config
28 |     ~implementations
29 |     ~initial_connection_state
30 |     ~where_to_listen
31 |     ()
32 | ;;
33 | 
34 | let client
35 |   ?implementations
36 |   ?max_message_size
37 |   ?buffer_age_limit
38 |   ?handshake_timeout
39 |   ?heartbeat_config
40 |   ?description
41 |   ()
42 |   where_to_connect
43 |   =
44 |   let make_transport fd ~max_message_size =
45 |     Rpc.Transport.of_fd ?buffer_age_limit fd ~max_message_size
46 |   in
47 |   Rpc.Connection.client
48 |     ?implementations
49 |     ?max_message_size
50 |     ~make_transport
51 |     ?handshake_timeout
52 |     ?heartbeat_config
53 |     ?description
54 |     where_to_connect
55 |   |> Deferred.Or_error.of_exn_result
56 | ;;
57 | 
58 | let with_client
59 |   ?implementations
60 |   ?max_message_size
61 |   ?buffer_age_limit
62 |   ?handshake_timeout
63 |   ?heartbeat_config
64 |   ()
65 |   where_to_connect
66 |   f
67 |   =
68 |   let make_transport fd ~max_message_size =
69 |     Rpc.Transport.of_fd ?buffer_age_limit fd ~max_message_size
70 |   in
71 |   Rpc.Connection.with_client
72 |     ?implementations
73 |     ?max_message_size
74 |     ~make_transport
75 |     ?handshake_timeout
76 |     ?heartbeat_config
77 |     where_to_connect
78 |     f
79 |   |> Deferred.Or_error.of_exn_result
80 | ;;
81 | 


--------------------------------------------------------------------------------
/test/run_exn.ml:
--------------------------------------------------------------------------------
 1 | open Core
 2 | open Async
 3 | 
 4 | module Failer_impl = struct
 5 |   type 'a functions = ('a, unit, unit) Rpc_parallel.Function.t
 6 | 
 7 |   module Worker_state = struct
 8 |     type init_arg = unit [@@deriving bin_io]
 9 |     type t = unit
10 |   end
11 | 
12 |   module Connection_state = struct
13 |     type init_arg = unit [@@deriving bin_io]
14 |     type t = unit
15 |   end
16 | 
17 |   module Functions
18 |       (C : Rpc_parallel.Creator
19 |            with type worker_state := Worker_state.t
20 |             and type connection_state := Connection_state.t) =
21 |   struct
22 |     let functions =
23 |       C.create_rpc
24 |         ~bin_input:Unit.bin_t
25 |         ~bin_output:Unit.bin_t
26 |         ~f:(fun ~worker_state:() ~conn_state:() () -> failwith "text of expected failure")
27 |         ()
28 |     ;;
29 | 
30 |     let init_worker_state () = Deferred.unit
31 |     let init_connection_state ~connection:_ ~worker_state:_ = return
32 |   end
33 | end
34 | 
35 | module Failer = Rpc_parallel.Make (Failer_impl)
36 | 
37 | let command =
38 |   Command.async_spec_or_error
39 |     ~summary:"ensure that raising in a worker function passes the exception to the master"
40 |     Command.Spec.empty
41 |     (fun () ->
42 |       let%bind conn =
43 |         Failer.spawn_exn
44 |           ~on_failure:Error.raise
45 |           ~shutdown_on:Connection_closed
46 |           ~redirect_stdout:`Dev_null
47 |           ~redirect_stderr:`Dev_null
48 |           ~connection_state_init_arg:()
49 |           ()
50 |       in
51 |       match%bind Failer.Connection.run conn ~f:Failer.functions ~arg:() with
52 |       | Ok () -> failwith "expected to fail but did not"
53 |       | Error e ->
54 |         printf !"expected failure:\n%{sexp:Error.t}\n" e;
55 |         Deferred.Or_error.ok_unit)
56 |     ~behave_nicely_in_pipeline:false
57 | ;;
58 | 
59 | let () = Rpc_parallel_krb_public.start_app ~krb_mode:For_unit_test command
60 | 


--------------------------------------------------------------------------------
/src/remote_executable.mli:
--------------------------------------------------------------------------------
 1 | open Core
 2 | open Async
 3 | 
 4 | (** This module is used to transfer the currently running executable to a remote server *)
 5 | type 'a t
 6 | 
 7 | (** [existing_on_host ~executable_path ?strict_host_key_checking host] will create a [t]
 8 |     from the supplied host and path. The executable MUST be the exact same executable that
 9 |     will be run in the master process. There will be a check for this in [spawn_worker].
10 |     Use [strict_host_key_checking] to change the StrictHostKeyChecking option used when
11 |     sshing into this host *)
12 | val existing_on_host
13 |   :  executable_path:string
14 |   -> ?strict_host_key_checking:[ `No | `Ask | `Yes ]
15 |   -> string
16 |   -> [ `Undeletable ] t
17 | 
18 | (** [copy_to_host ~executable_dir ?strict_host_key_checking host] will copy the currently
19 |     running executable to the desired host and path. It will keep the same name but add a
20 |     suffix .XXXXXXXX. Use [strict_host_key_checking] to change the StrictHostKeyChecking
21 |     option used when sshing into this host *)
22 | val copy_to_host
23 |   :  executable_dir:string
24 |   -> ?strict_host_key_checking:[ `No | `Ask | `Yes ]
25 |   -> string
26 |   -> [ `Deletable ] t Or_error.t Deferred.t
27 | 
28 | (** [delete t] will delete a remote executable that was copied over by a previous call to
29 |     [copy_to_host] *)
30 | val delete : [ `Deletable ] t -> unit Or_error.t Deferred.t
31 | 
32 | (** Get the underlying path, host, and host_key_checking *)
33 | val path : _ t -> string
34 | 
35 | val host : _ t -> string
36 | val host_key_checking : _ t -> string list
37 | 
38 | (** Run the executable remotely with the given environment and arguments. This checks to
39 |     make sure [t] matches the currently running executable that [run] is called from. The
40 |     check can be disabled by supplying [~assert_binary_hash:false]. *)
41 | val run
42 |   :  ?assert_binary_hash:bool
43 |   -> _ t
44 |   -> env:(string * string) list
45 |   -> args:string list
46 |   -> wrap:(Prog_and_args.t -> Prog_and_args.t)
47 |   -> Process.t Or_error.t Deferred.t
48 | 


--------------------------------------------------------------------------------
/expect_test/state_worker.ml:
--------------------------------------------------------------------------------
 1 | open Core
 2 | open Async
 3 | 
 4 | module T = struct
 5 |   type 'worker functions =
 6 |     { string_and_updates :
 7 |         ('worker, int, string * char Pipe.Reader.t) Rpc_parallel.Function.t
 8 |     }
 9 | 
10 |   module Worker_state = struct
11 |     type init_arg = unit [@@deriving bin_io]
12 |     type t = unit
13 |   end
14 | 
15 |   module Connection_state = struct
16 |     type init_arg = unit [@@deriving bin_io]
17 |     type t = unit
18 |   end
19 | 
20 |   module Functions
21 |       (C : Rpc_parallel.Creator
22 |            with type worker_state := Worker_state.t
23 |             and type connection_state := Connection_state.t) =
24 |   struct
25 |     let string_and_updates_impl ~worker_state:() ~conn_state:() arg =
26 |       let string = "hello world " in
27 |       let chars = List.init arg ~f:(fun i -> Char.of_int_exn (Char.to_int 'a' + i)) in
28 |       return (string, Pipe.of_list chars)
29 |     ;;
30 | 
31 |     let string_and_updates =
32 |       C.create_state
33 |         ~f:string_and_updates_impl
34 |         ~bin_query:Int.bin_t
35 |         ~bin_state:String.bin_t
36 |         ~bin_update:Char.bin_t
37 |         ()
38 |     ;;
39 | 
40 |     let functions = { string_and_updates }
41 |     let init_worker_state () = Deferred.unit
42 |     let init_connection_state ~connection:_ ~worker_state:_ = return
43 |   end
44 | end
45 | 
46 | include Rpc_parallel.Make (T)
47 | 
48 | let main num_chars =
49 |   let%bind conn =
50 |     spawn
51 |       ~on_failure:Error.raise
52 |       ~shutdown_on:Connection_closed
53 |       ~redirect_stdout:`Dev_null
54 |       ~redirect_stderr:`Dev_null
55 |       ~connection_state_init_arg:()
56 |       ()
57 |     >>| ok_exn
58 |   in
59 |   Connection.run_exn conn ~f:functions.string_and_updates ~arg:num_chars
60 | ;;
61 | 
62 | let () = Rpc_parallel_krb_public.For_testing.initialize [%here]
63 | 
64 | let%expect_test "" =
65 |   let%bind state, chars = main 10 in
66 |   let%bind all_chars = Pipe.read_all chars >>| Queue.to_list in
67 |   let result = state ^ String.of_char_list all_chars in
68 |   printf "%s\n" result;
69 |   [%expect {| hello world abcdefghij |}];
70 |   return ()
71 | ;;
72 | 


--------------------------------------------------------------------------------
/test/krb_expert.ml:
--------------------------------------------------------------------------------
 1 | open Core
 2 | open Async
 3 | 
 4 | module T = struct
 5 |   type 'worker functions = { f : ('worker, unit, unit) Rpc_parallel.Function.t }
 6 | 
 7 |   module Worker_state = struct
 8 |     type init_arg = unit [@@deriving bin_io]
 9 |     type t = unit
10 |   end
11 | 
12 |   module Connection_state = struct
13 |     type init_arg = unit [@@deriving bin_io]
14 |     type t = unit
15 |   end
16 | 
17 |   module Functions
18 |       (C : Rpc_parallel.Creator
19 |            with type worker_state := Worker_state.t
20 |             and type connection_state := Connection_state.t) =
21 |   struct
22 |     let f_impl ~worker_state:() ~conn_state:() () = return ()
23 | 
24 |     let f =
25 |       C.create_rpc
26 |         ~f:f_impl
27 |         ~bin_input:[%bin_type_class: unit]
28 |         ~bin_output:[%bin_type_class: unit]
29 |         ()
30 |     ;;
31 | 
32 |     let functions = { f }
33 |     let init_worker_state () = Deferred.unit
34 |     let init_connection_state ~connection:_ ~worker_state:_ = return
35 |   end
36 | end
37 | 
38 | include Rpc_parallel.Make (T)
39 | 
40 | let main_command =
41 |   let open Command.Let_syntax in
42 |   Command.async
43 |     ~summary:"start the master and spawn a worker"
44 |     (let%map_open () = return () in
45 |      fun () ->
46 |        let open Deferred.Let_syntax in
47 |        Rpc_parallel_krb_public.Expert.start_master_server_exn
48 |          ~krb_mode:For_unit_test
49 |          ~worker_command_args:[ "worker" ]
50 |          ();
51 |        let%bind conn =
52 |          spawn_exn
53 |            ~on_failure:Error.raise
54 |            ~shutdown_on:Connection_closed
55 |            ~connection_state_init_arg:()
56 |            ~redirect_stdout:`Dev_null
57 |            ~redirect_stderr:`Dev_null
58 |            ()
59 |        in
60 |        let%map () = Connection.run_exn conn ~f:functions.f ~arg:() in
61 |        print_endline "Success.")
62 |     ~behave_nicely_in_pipeline:false
63 | ;;
64 | 
65 | let command =
66 |   Command.group
67 |     ~summary:"Using Rpc_parallel.Expert"
68 |     [ "worker", Rpc_parallel_krb_public.Expert.worker_command; "main", main_command ]
69 | ;;
70 | 
71 | let () = Command_unix.run command
72 | 


--------------------------------------------------------------------------------
/src/rpc_settings.ml:
--------------------------------------------------------------------------------
 1 | open! Core
 2 | open! Async
 3 | 
 4 | type t =
 5 |   { max_message_size : int option [@sexp.option]
 6 |   ; buffer_age_limit : Writer.buffer_age_limit option [@sexp.option]
 7 |   ; handshake_timeout : Time_float.Span.t option [@sexp.option]
 8 |   ; heartbeat_config : Rpc.Connection.Heartbeat_config.t option [@sexp.option]
 9 |   }
10 | [@@deriving sexp, bin_io]
11 | 
12 | let env_var = "RPC_PARALLEL_RPC_SETTINGS"
13 | 
14 | let default =
15 |   { max_message_size = None
16 |   ; buffer_age_limit = None
17 |   ; handshake_timeout = None
18 |   ; heartbeat_config = None
19 |   }
20 | ;;
21 | 
22 | let to_string_for_env_var
23 |   ?max_message_size
24 |   ?buffer_age_limit
25 |   ?handshake_timeout
26 |   ?heartbeat_config
27 |   ()
28 |   =
29 |   let t = { max_message_size; buffer_age_limit; handshake_timeout; heartbeat_config } in
30 |   Sexp.to_string (sexp_of_t t)
31 | ;;
32 | 
33 | let%expect_test _ =
34 |   let heartbeat_config =
35 |     Rpc.Connection.Heartbeat_config.create
36 |       ~timeout:Time_ns.Span.hour
37 |       ~send_every:Time_ns.Span.minute
38 |       ()
39 |   in
40 |   let () = print_string (to_string_for_env_var ()) in
41 |   [%expect {| () |}];
42 |   let () = print_string (to_string_for_env_var ~heartbeat_config ()) in
43 |   [%expect {| ((heartbeat_config((timeout 1h)(send_every 1m)))) |}];
44 |   return ()
45 | ;;
46 | 
47 | let create_with_env_override'
48 |   ~env_var
49 |   ~max_message_size
50 |   ~buffer_age_limit
51 |   ~handshake_timeout
52 |   ~heartbeat_config
53 |   =
54 |   match Sys.getenv env_var with
55 |   | None -> { max_message_size; buffer_age_limit; handshake_timeout; heartbeat_config }
56 |   | Some value ->
57 |     let from_env = [%of_sexp: t] (Sexp.of_string value) in
58 |     { max_message_size = Option.first_some from_env.max_message_size max_message_size
59 |     ; buffer_age_limit = Option.first_some from_env.buffer_age_limit buffer_age_limit
60 |     ; handshake_timeout = Option.first_some from_env.handshake_timeout handshake_timeout
61 |     ; heartbeat_config = Option.first_some from_env.heartbeat_config heartbeat_config
62 |     }
63 | ;;
64 | 
65 | let create_with_env_override = create_with_env_override' ~env_var
66 | 
67 | module For_internal_testing = struct
68 |   let create_with_env_override = create_with_env_override'
69 | end
70 | 


--------------------------------------------------------------------------------
/example/add_numbers.ml:
--------------------------------------------------------------------------------
 1 | open Core
 2 | open Async
 3 | 
 4 | module Add_numbers_map_function = Rpc_parallel.Map_reduce.Make_map_function (struct
 5 |     module Input = struct
 6 |       type t = int * int [@@deriving bin_io]
 7 |     end
 8 | 
 9 |     module Output = struct
10 |       type t = int * int [@@deriving bin_io]
11 |     end
12 | 
13 |     let rec spin ntimes =
14 |       match ntimes with
15 |       | 0 -> ()
16 |       | _ -> spin (ntimes - 1)
17 |     ;;
18 | 
19 |     let map (index, max) =
20 |       spin 100000000;
21 |       (* Waste some CPU time *)
22 |       return (index, List.fold ~init:0 ~f:( + ) (List.init max ~f:Fn.id))
23 |     ;;
24 |   end)
25 | 
26 | let command =
27 |   Command.async_spec
28 |     ~summary:"Add numbers in parallel"
29 |     Command.Spec.(
30 |       empty
31 |       +> flag "max" (required int) ~doc:" Number to add up to"
32 |       +> flag
33 |            "ntimes"
34 |            (optional_with_default 1000 int)
35 |            ~doc:" Number of times to repeat the operation"
36 |       +> flag "nworkers" (optional_with_default 4 int) ~doc:" Number of workers"
37 |       +> flag "ordered" (optional_with_default true bool) ~doc:" Ordered or unordered")
38 |     (fun max ntimes nworkers ordered () ->
39 |       let list = Pipe.of_list (List.init ntimes ~f:(fun i -> i, max)) in
40 |       let config =
41 |         Rpc_parallel.Map_reduce.Config.create
42 |           ~local:nworkers
43 |           ~redirect_stderr:`Dev_null
44 |           ~redirect_stdout:`Dev_null
45 |           ()
46 |       in
47 |       if ordered
48 |       then (
49 |         let%bind output_reader =
50 |           Rpc_parallel.Map_reduce.map
51 |             config
52 |             list
53 |             ~m:(module Add_numbers_map_function)
54 |             ~param:()
55 |         in
56 |         Pipe.iter output_reader ~f:(fun (index, sum) ->
57 |           printf "%i: %i\n" index sum;
58 |           Deferred.unit))
59 |       else (
60 |         let%bind output_reader =
61 |           Rpc_parallel.Map_reduce.map_unordered
62 |             config
63 |             list
64 |             ~m:(module Add_numbers_map_function)
65 |             ~param:()
66 |         in
67 |         Pipe.iter output_reader ~f:(fun ((index, sum), mf_index) ->
68 |           assert (index = mf_index);
69 |           printf "%i:%i: %i\n" mf_index index sum;
70 |           Deferred.unit)))
71 |     ~behave_nicely_in_pipeline:false
72 | ;;
73 | 
74 | let () = Rpc_parallel_krb_public.start_app ~krb_mode:For_unit_test command
75 | 


--------------------------------------------------------------------------------
/example/alternative_init.ml:
--------------------------------------------------------------------------------
 1 | (* Note: this example uses deprecated initialization functions which are only available in
 2 |    the rpc_parallel_unauthenticated library, and not rpc_parallel_krb *)
 3 | open Core
 4 | open Async
 5 | 
 6 | module Worker = struct
 7 |   module T = struct
 8 |     type 'worker functions = unit
 9 | 
10 |     module Worker_state = struct
11 |       type init_arg = unit [@@deriving bin_io]
12 |       type t = unit
13 |     end
14 | 
15 |     module Connection_state = struct
16 |       type init_arg = unit [@@deriving bin_io]
17 |       type t = unit
18 |     end
19 | 
20 |     module Functions
21 |         (_ : Rpc_parallel.Creator
22 |              with type worker_state := Worker_state.t
23 |               and type connection_state := Connection_state.t) =
24 |     struct
25 |       let functions = ()
26 |       let init_worker_state () = Deferred.unit
27 |       let init_connection_state ~connection:_ ~worker_state:_ = return
28 |     end
29 |   end
30 | 
31 |   include Rpc_parallel.Make (T)
32 | end
33 | 
34 | let worker_command =
35 |   let open Command.Let_syntax in
36 |   Command.Staged.async
37 |     ~summary:"for internal use"
38 |     (let%map_open () = return () in
39 |      fun () ->
40 |        let worker_env = Rpc_parallel.Expert.worker_init_before_async_exn () in
41 |        stage (fun `Scheduler_started ->
42 |          Rpc_parallel_unauthenticated.Expert.start_worker_server_exn worker_env;
43 |          Deferred.never ()))
44 |     ~behave_nicely_in_pipeline:false
45 | ;;
46 | 
47 | let main_command =
48 |   let open Command.Let_syntax in
49 |   Command.async
50 |     ~summary:"start the master and spawn a worker"
51 |     (let%map_open () = return () in
52 |      fun () ->
53 |        let open Deferred.Let_syntax in
54 |        Rpc_parallel_unauthenticated.Expert.start_master_server_exn
55 |          ~worker_command_args:[ "worker" ]
56 |          ();
57 |        let%map (_connection : Worker.Connection.t) =
58 |          Worker.spawn_exn
59 |            ~on_failure:Error.raise
60 |            ~shutdown_on:Connection_closed
61 |            ~connection_state_init_arg:()
62 |            ~redirect_stdout:`Dev_null
63 |            ~redirect_stderr:`Dev_null
64 |            ()
65 |        in
66 |        printf "Success.\n")
67 |     ~behave_nicely_in_pipeline:false
68 | ;;
69 | 
70 | let command =
71 |   Command.group
72 |     ~summary:"Using Rpc_parallel.Expert"
73 |     [ "worker", worker_command; "main", main_command ]
74 | ;;
75 | 
76 | let () = Command_unix.run command
77 | 


--------------------------------------------------------------------------------
/src/utils.mli:
--------------------------------------------------------------------------------
 1 | open Core
 2 | open Async
 3 | 
 4 | module Worker_id : sig
 5 |   type t [@@deriving bin_io, sexp]
 6 | 
 7 |   val create : unit -> t
 8 | 
 9 |   include Identifiable.S with type t := t
10 | end
11 | 
12 | module Worker_type_id : Unique_id
13 | 
14 | (* The internal connection state we keep with every connection to a worker.
15 | 
16 |    [conn_state] is the user supplied connection state (well there is really a [Set_once.t]
17 |    in there as well)
18 | 
19 |    [worker_state] is the worker state associated with the given worker. A reference is
20 |    stored here to gracefully handle the cleanup needed when [close_server] is called when
21 |    there are still open connections.
22 | 
23 |    [worker_id] is the id of the worker server that this connection is to. This is
24 |    needed because there can be multiple instances of a given worker server in a single
25 |    process.
26 | 
27 |    The [Rpc.Connection.t] is the underlying connection that has this state *)
28 | module Internal_connection_state : sig
29 |   type ('worker_state, 'conn_state) t1 =
30 |     { worker_state : 'worker_state
31 |     ; conn_state : 'conn_state
32 |     ; worker_id : Worker_id.t
33 |     }
34 | 
35 |   type ('worker_state, 'conn_state) t =
36 |     Rpc.Connection.t * ('worker_state, 'conn_state) t1 Set_once.t
37 | end
38 | 
39 | (* Like [Monitor.try_with], but raise any additional exceptions (raised after [f ()] has
40 |    been determined) to the specified monitor. *)
41 | 
42 | val try_within : monitor:Monitor.t -> (unit -> 'a Deferred.t) -> 'a Or_error.t Deferred.t
43 | 
44 | (* Any exceptions that are raised before [f ()] is determined will be raised to the
45 |    current monitor. Exceptions raised after [f ()] is determined will be raised to the
46 |    passed in monitor *)
47 | 
48 | val try_within_exn : monitor:Monitor.t -> (unit -> 'a Deferred.t) -> 'a Deferred.t
49 | 
50 | (* Get an md5 hash of the currently running binary *)
51 | 
52 | val our_md5 : unit -> string Or_error.t Deferred.t
53 | 
54 | (* Determine in what context the current executable is running *)
55 | 
56 | val whoami : unit -> [ `Worker | `Master ]
57 | 
58 | (* Clear any environment variables that this library has set *)
59 | 
60 | val clear_env : unit -> unit
61 | 
62 | (* Create an environment for a spawned worker to run in *)
63 | 
64 | val create_worker_env : extra:(string * string) list -> (string * string) list Or_error.t
65 | 
66 | val to_daemon_fd_redirection
67 |   :  [ `Dev_null | `File_append of string | `File_truncate of string ]
68 |   -> Daemon.Fd_redirection.t
69 | 


--------------------------------------------------------------------------------
/example/spawn_in_foreground.ml:
--------------------------------------------------------------------------------
 1 | open Core
 2 | open Async
 3 | 
 4 | module Worker = struct
 5 |   module T = struct
 6 |     type 'worker functions = { print : ('worker, string, unit) Rpc_parallel.Function.t }
 7 | 
 8 |     module Worker_state = struct
 9 |       type init_arg = unit [@@deriving bin_io]
10 |       type t = unit
11 |     end
12 | 
13 |     module Connection_state = struct
14 |       type init_arg = unit [@@deriving bin_io]
15 |       type t = unit
16 |     end
17 | 
18 |     module Functions
19 |         (C : Rpc_parallel.Creator
20 |              with type worker_state := Worker_state.t
21 |               and type connection_state := Connection_state.t) =
22 |     struct
23 |       let print_impl ~worker_state:() ~conn_state:() string =
24 |         printf "%s\n" string;
25 |         return ()
26 |       ;;
27 | 
28 |       let print =
29 |         C.create_rpc ~f:print_impl ~bin_input:String.bin_t ~bin_output:Unit.bin_t ()
30 |       ;;
31 | 
32 |       let functions = { print }
33 |       let init_worker_state () = Deferred.unit
34 |       let init_connection_state ~connection:_ ~worker_state:_ = return
35 |     end
36 |   end
37 | 
38 |   include Rpc_parallel.Make (T)
39 | end
40 | 
41 | let main () =
42 |   Worker.spawn_in_foreground
43 |     ~shutdown_on:Connection_closed
44 |     ~connection_state_init_arg:()
45 |     ~on_failure:Error.raise
46 |     ()
47 |   >>=? fun (conn, process) ->
48 |   Worker.Connection.run conn ~f:Worker.functions.print ~arg:"HELLO"
49 |   >>=? fun () ->
50 |   Worker.Connection.run conn ~f:Worker.functions.print ~arg:"HELLO2"
51 |   >>=? fun () ->
52 |   let%bind () = Worker.Connection.close conn in
53 |   let%bind (_ : Unix.Exit_or_signal.t) = Process.wait process in
54 |   let worker_stderr = Reader.lines (Process.stderr process) in
55 |   let worker_stdout = Reader.lines (Process.stdout process) in
56 |   let%bind () =
57 |     Pipe.iter worker_stderr ~f:(fun line ->
58 |       let line' = sprintf "[WORKER STDERR]: %s\n" line in
59 |       Writer.write (Lazy.force Writer.stdout) line' |> return)
60 |   in
61 |   let%bind () =
62 |     Pipe.iter worker_stdout ~f:(fun line ->
63 |       let line' = sprintf "[WORKER STDOUT]: %s\n" line in
64 |       Writer.write (Lazy.force Writer.stdout) line' |> return)
65 |   in
66 |   Deferred.Or_error.ok_unit
67 | ;;
68 | 
69 | let command =
70 |   Command.async_spec_or_error
71 |     ~summary:"Example of spawn_in_foreground"
72 |     Command.Spec.empty
73 |     main
74 |     ~behave_nicely_in_pipeline:false
75 | ;;
76 | 
77 | let () = Rpc_parallel_krb_public.start_app ~krb_mode:For_unit_test command
78 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | This repository contains open source software that is developed and
 2 | maintained by [Jane Street][js].
 3 | 
 4 | Contributions to this project are welcome and should be submitted via
 5 | GitHub pull requests.
 6 | 
 7 | Signing contributions
 8 | ---------------------
 9 | 
10 | We require that you sign your contributions. Your signature certifies
11 | that you wrote the patch or otherwise have the right to pass it on as
12 | an open-source patch. The rules are pretty simple: if you can certify
13 | the below (from [developercertificate.org][dco]):
14 | 
15 | ```
16 | Developer Certificate of Origin
17 | Version 1.1
18 | 
19 | Copyright (C) 2004, 2006 The Linux Foundation and its contributors.
20 | 1 Letterman Drive
21 | Suite D4700
22 | San Francisco, CA, 94129
23 | 
24 | Everyone is permitted to copy and distribute verbatim copies of this
25 | license document, but changing it is not allowed.
26 | 
27 | 
28 | Developer's Certificate of Origin 1.1
29 | 
30 | By making a contribution to this project, I certify that:
31 | 
32 | (a) The contribution was created in whole or in part by me and I
33 |     have the right to submit it under the open source license
34 |     indicated in the file; or
35 | 
36 | (b) The contribution is based upon previous work that, to the best
37 |     of my knowledge, is covered under an appropriate open source
38 |     license and I have the right under that license to submit that
39 |     work with modifications, whether created in whole or in part
40 |     by me, under the same open source license (unless I am
41 |     permitted to submit under a different license), as indicated
42 |     in the file; or
43 | 
44 | (c) The contribution was provided directly to me by some other
45 |     person who certified (a), (b) or (c) and I have not modified
46 |     it.
47 | 
48 | (d) I understand and agree that this project and the contribution
49 |     are public and that a record of the contribution (including all
50 |     personal information I submit with it, including my sign-off) is
51 |     maintained indefinitely and may be redistributed consistent with
52 |     this project or the open source license(s) involved.
53 | ```
54 | 
55 | Then you just add a line to every git commit message:
56 | 
57 | ```
58 | Signed-off-by: Joe Smith <joe.smith@email.com>
59 | ```
60 | 
61 | Use your real name (sorry, no pseudonyms or anonymous contributions.)
62 | 
63 | If you set your `user.name` and `user.email` git configs, you can sign
64 | your commit automatically with git commit -s.
65 | 
66 | [dco]: http://developercertificate.org/
67 | [js]: https://opensource.janestreet.com/
68 | 


--------------------------------------------------------------------------------
/expect_test/sum_worker.ml:
--------------------------------------------------------------------------------
 1 | open Core
 2 | open Async
 3 | 
 4 | (* A bare bones use case of the [Rpc_parallel] library. This demonstrates
 5 |    how to define a simple worker type that implements some functions. The master then
 6 |    spawns a worker of this type and calls a function to run on this worker *)
 7 | 
 8 | module T = struct
 9 |   (* A [Sum_worker.worker] implements a single function [sum : int -> int]. Because this
10 |      function is parameterized on a ['worker], it can only be run on workers of the
11 |      [Sum_worker.worker] type. *)
12 |   type 'worker functions = { sum : ('worker, int, int) Rpc_parallel.Function.t }
13 | 
14 |   (* No initialization upon spawn *)
15 |   module Worker_state = struct
16 |     type init_arg = unit [@@deriving bin_io]
17 |     type t = unit
18 |   end
19 | 
20 |   module Connection_state = struct
21 |     type init_arg = unit [@@deriving bin_io]
22 |     type t = unit
23 |   end
24 | 
25 |   module Functions
26 |       (C : Rpc_parallel.Creator
27 |            with type worker_state := Worker_state.t
28 |             and type connection_state := Connection_state.t) =
29 |   struct
30 |     (* Define the implementation for the [sum] function *)
31 |     let sum_impl ~worker_state:() ~conn_state:() arg =
32 |       let sum = List.fold ~init:0 ~f:( + ) (List.init arg ~f:Fn.id) in
33 |       [%log.global.info_format "Sum_worker.sum: %i\n" sum];
34 |       return sum
35 |     ;;
36 | 
37 |     (* Create a [Rpc_parallel.Function.t] from the above implementation *)
38 |     let sum = C.create_rpc ~f:sum_impl ~bin_input:Int.bin_t ~bin_output:Int.bin_t ()
39 | 
40 |     (* This type must match the ['worker functions] type defined above *)
41 |     let functions = { sum }
42 |     let init_worker_state () = Deferred.unit
43 |     let init_connection_state ~connection:_ ~worker_state:_ = return
44 |   end
45 | end
46 | 
47 | include Rpc_parallel.Make (T)
48 | 
49 | let main max =
50 |   let%bind conn =
51 |     (* This is the main function called in the master. Spawn a local worker and run
52 |        the [sum] function on this worker *)
53 |     spawn
54 |       ~on_failure:Error.raise
55 |       ~shutdown_on:Connection_closed
56 |       ~redirect_stdout:`Dev_null
57 |       ~redirect_stderr:`Dev_null
58 |       ~connection_state_init_arg:()
59 |       ()
60 |     >>| ok_exn
61 |   in
62 |   Connection.run_exn conn ~f:functions.sum ~arg:max
63 | ;;
64 | 
65 | let () = Rpc_parallel_krb_public.For_testing.initialize [%here]
66 | 
67 | let%expect_test "" =
68 |   let%bind res = main 42 in
69 |   printf "%d\n" res;
70 |   [%expect {| 861 |}];
71 |   return ()
72 | ;;
73 | 


--------------------------------------------------------------------------------
/example/async_log.ml:
--------------------------------------------------------------------------------
 1 | open Core
 2 | open Async
 3 | 
 4 | module Worker = struct
 5 |   module T = struct
 6 |     type 'worker functions =
 7 |       { write_to_log_global : ('worker, unit, unit) Rpc_parallel.Function.t }
 8 | 
 9 |     module Worker_state = struct
10 |       type init_arg = unit [@@deriving bin_io]
11 |       type t = unit
12 |     end
13 | 
14 |     module Connection_state = struct
15 |       type init_arg = unit [@@deriving bin_io]
16 |       type t = unit
17 |     end
18 | 
19 |     module Functions (C : Rpc_parallel.Creator) = struct
20 |       let write_to_log_global =
21 |         C.create_rpc
22 |           ~bin_input:Unit.bin_t
23 |           ~bin_output:Unit.bin_t
24 |           ~f:(fun ~worker_state:_ ~conn_state:_ () ->
25 |             [%log.global.info_string "worker log message"];
26 |             Log.Global.flushed ())
27 |           ()
28 |       ;;
29 | 
30 |       let functions = { write_to_log_global }
31 |       let init_worker_state () = Deferred.unit
32 |       let init_connection_state ~connection:_ ~worker_state:_ = return
33 |     end
34 |   end
35 | 
36 |   include Rpc_parallel.Make (T)
37 | end
38 | 
39 | let expect_log_entry readers =
40 |   Deferred.List.iter ~how:`Sequential readers ~f:(fun reader ->
41 |     match%map Pipe.read reader with
42 |     | `Eof -> failwith "Unexpected EOF"
43 |     | `Ok entry -> printf !"%{sexp:Log.Message.Stable.V2.t}\n" entry)
44 | ;;
45 | 
46 | let main () =
47 |   let%bind worker =
48 |     Worker.spawn_exn
49 |       ~on_failure:Error.raise
50 |       ~shutdown_on:Heartbeater_connection_timeout
51 |       ~redirect_stdout:`Dev_null
52 |       ~redirect_stderr:`Dev_null
53 |       ()
54 |   in
55 |   let%bind conn = Worker.Connection.client_exn worker () in
56 |   let get_log_reader () =
57 |     Worker.Connection.run_exn conn ~f:Rpc_parallel.Function.async_log ~arg:()
58 |   in
59 |   let worker_write_to_log_global () =
60 |     Worker.Connection.run_exn conn ~f:Worker.functions.write_to_log_global ~arg:()
61 |   in
62 |   let%bind log_reader1 = get_log_reader () in
63 |   let%bind log_reader2 = get_log_reader () in
64 |   let%bind () = worker_write_to_log_global () in
65 |   let%bind () = expect_log_entry [ log_reader1; log_reader2 ] in
66 |   Pipe.close_read log_reader1;
67 |   let%bind () = worker_write_to_log_global () in
68 |   expect_log_entry [ log_reader2 ]
69 | ;;
70 | 
71 | let command =
72 |   Command.async
73 |     ~summary:"Using the built in log redirection function"
74 |     (Command.Param.return main)
75 |     ~behave_nicely_in_pipeline:false
76 | ;;
77 | 
78 | let () = Rpc_parallel_krb_public.start_app ~krb_mode:For_unit_test command
79 | 


--------------------------------------------------------------------------------
/example/serve.ml:
--------------------------------------------------------------------------------
 1 | open Core
 2 | open Async
 3 | 
 4 | module Worker = struct
 5 |   module T = struct
 6 |     type 'worker functions = { inc : ('worker, unit, int) Rpc_parallel.Function.t }
 7 | 
 8 |     module Worker_state = struct
 9 |       type init_arg = int [@@deriving bin_io]
10 |       type t = int ref
11 |     end
12 | 
13 |     module Connection_state = struct
14 |       type init_arg = unit [@@deriving bin_io]
15 |       type t = unit
16 |     end
17 | 
18 |     module Functions
19 |         (C : Rpc_parallel.Creator
20 |              with type worker_state := Worker_state.t
21 |               and type connection_state := Connection_state.t) =
22 |     struct
23 |       let inc =
24 |         C.create_rpc
25 |           ~f:(fun ~worker_state ~conn_state:() () ->
26 |             incr worker_state;
27 |             return !worker_state)
28 |           ~bin_input:Unit.bin_t
29 |           ~bin_output:Int.bin_t
30 |           ()
31 |       ;;
32 | 
33 |       let functions = { inc }
34 |       let init_worker_state arg = return (ref arg)
35 |       let init_connection_state ~connection:_ ~worker_state:_ = return
36 |     end
37 |   end
38 | 
39 |   include Rpc_parallel.Make (T)
40 | end
41 | 
42 | let main () =
43 |   let%map () =
44 |     Deferred.List.iter ~how:`Parallel (List.init 10 ~f:Fn.id) ~f:(fun i ->
45 |       let%bind worker = Worker.serve i in
46 |       let%bind connection1 = Worker.Connection.client_exn worker () in
47 |       let%bind connection2 = Worker.Connection.client_exn worker () in
48 |       let%bind i_plus_one =
49 |         Worker.Connection.run_exn connection1 ~f:Worker.functions.inc ~arg:()
50 |       in
51 |       let%bind i_plus_two =
52 |         Worker.Connection.run_exn connection2 ~f:Worker.functions.inc ~arg:()
53 |       in
54 |       assert (i + 1 = i_plus_one);
55 |       assert (i + 2 = i_plus_two);
56 |       let%bind () =
57 |         Worker.Connection.run_exn
58 |           connection1
59 |           ~f:Rpc_parallel.Function.close_server
60 |           ~arg:()
61 |       in
62 |       (* Ensure we can't connect to this server anymore *)
63 |       match%bind Worker.Connection.client worker () with
64 |       | Ok _ -> failwith "Should not have been able to connect"
65 |       | Error _ ->
66 |         (* Ensure existing connections still work *)
67 |         let%map i_plus_three =
68 |           Worker.Connection.run_exn connection1 ~f:Worker.functions.inc ~arg:()
69 |         in
70 |         assert (i + 3 = i_plus_three))
71 |   in
72 |   printf "Success.\n"
73 | ;;
74 | 
75 | let command =
76 |   Command.async_spec
77 |     ~summary:"Use of the in process [serve] functionality"
78 |     Command.Spec.empty
79 |     main
80 |     ~behave_nicely_in_pipeline:false
81 | ;;
82 | 
83 | let () = Rpc_parallel_krb_public.start_app ~krb_mode:For_unit_test command
84 | 


--------------------------------------------------------------------------------
/doc/migrating-off-managed.org:
--------------------------------------------------------------------------------
 1 | #+TITLE: Migrating off Rpc\_parallel.Managed
 2 | 
 3 | * What is Rpc\_parallel.Managed?
 4 | 
 5 |   This module exposes a =Make= functor that is built on top of
 6 |   =Rpc_parallel.Make=. The primary difference is that the interface
 7 |   doesn't expose a =Connection.t=. Under the hood, =spawn= will spawn
 8 |   a worker and make a connection to the worker. This connection is
 9 |   stored in a hash table. When this connection is closed, the
10 |   connection is removed from the table. =run= looks for a cached
11 |   connection, reconnecting if there is none.
12 | 
13 | * Why you shouldn't use this
14 | 
15 |   The semantics of the reconnect and error reporting are not
16 |   well-defined.
17 | 
18 |   Regarding reconnect, the library will attempt to reconnect, but it
19 |   won't attempt to respawn. Unless your worker is intentionally
20 |   closing connections, it is most likely the case that a connection
21 |   closure is indicative of a problem that would require a respawn
22 |   (e.g. the worker actually exited).
23 | 
24 |   Regarding error reporting, there is an exposed =on_failure= callback
25 |   that is passed through as an argument to =on_failure= for the
26 |   unmanaged worker. In addition, there is an
27 |   =on_connection_to_worker_closed= callback used to report when the
28 |   first connection is closed. Subsequent connection closures don't
29 |   trigger the callback. Some classes of errors might result in
30 |   =on_failure= and =on_connection_to_worker_closed= both being called
31 |   while others result in just one of them being called.
32 | 
33 |   This module was created primarily for backwards compatibility with
34 |   code that used earlier versions of =Rpc_parallel=. New code should
35 |   use =Rpc_parallel.Make=.
36 | 
37 | * How to migrate
38 | 
39 |   If your code is never reconnecting to a spawned worker, you can
40 |   safely use =Rpc_parallel.Make= and pass through
41 |   =~shutdown_on:Connection_closed= to =spawn=. This will give you back
42 |   a =Connection.t=, so it isn't even possible for you to reconnect.
43 | 
44 |   If you don't know if you are reconnecting, you can add some logging
45 |   to the =on_error= callback that you supply to the managed worker's
46 |   =spawn= function. In general, if you aren't storing your =Worker.t=
47 |   anywhere and are immediately calling =run= after =spawn=, you are
48 |   almost certainly not reconnecting.
49 | 
50 |   If you are relying on reconnect, we don't have a great alternative
51 |   right now. We recommend you think carefully about the semantics you
52 |   want for your reconnect logic and code something up. If there is
53 |   enough desire, we could add some library support. From looking
54 |   through our code base, very few users (if any!) of
55 |   =Rpc_parallel.Managed= actually reconnect.
56 | 


--------------------------------------------------------------------------------
/test/wrap_test.ml:
--------------------------------------------------------------------------------
 1 | open Core
 2 | open Async
 3 | 
 4 | module Worker = struct
 5 |   module T = struct
 6 |     type 'worker functions = { ping : ('worker, unit, unit) Rpc_parallel.Function.t }
 7 | 
 8 |     module Worker_state = struct
 9 |       type init_arg = unit [@@deriving bin_io]
10 |       type t = unit
11 |     end
12 | 
13 |     module Connection_state = struct
14 |       type init_arg = unit [@@deriving bin_io]
15 |       type t = unit
16 |     end
17 | 
18 |     module Functions
19 |         (C : Rpc_parallel.Creator
20 |              with type worker_state := Worker_state.t
21 |               and type connection_state := Connection_state.t) =
22 |     struct
23 |       let ping =
24 |         C.create_rpc
25 |           ~f:(fun ~worker_state:() ~conn_state:() () -> return ())
26 |           ~bin_input:Unit.bin_t
27 |           ~bin_output:Unit.bin_t
28 |           ()
29 |       ;;
30 | 
31 |       let functions = { ping }
32 |       let init_worker_state () = Deferred.unit
33 |       let init_connection_state ~connection:_ ~worker_state:_ = return
34 |     end
35 |   end
36 | 
37 |   include Rpc_parallel.Make (T)
38 | end
39 | 
40 | let main ~host ~wrapper =
41 |   let open Deferred.Or_error.Let_syntax in
42 |   let executable_dir = Filename.temp_dir_name in
43 |   let%bind remote_exec =
44 |     Rpc_parallel.Remote_executable.copy_to_host
45 |       ~strict_host_key_checking:`No
46 |       ~executable_dir
47 |       host
48 |   in
49 |   let how =
50 |     Rpc_parallel.How_to_run.remote remote_exec
51 |     |> Rpc_parallel.How_to_run.wrap ~f:(fun { prog; args } ->
52 |       { prog = wrapper; args = prog :: args })
53 |   in
54 |   let%bind conn, process =
55 |     Worker.spawn_in_foreground
56 |       ~how
57 |       ~shutdown_on:Connection_closed
58 |       ~connection_state_init_arg:()
59 |       ~on_failure:Error.raise
60 |       ()
61 |   in
62 |   let%bind () = Worker.Connection.run conn ~f:Worker.functions.ping ~arg:() in
63 |   print_endline "Worker successfully started";
64 |   let%bind () = Deferred.ok (Worker.Connection.close conn) in
65 |   let%bind (_ : Unix.Exit_or_signal.t) = Deferred.ok (Process.wait process) in
66 |   let%bind () = Rpc_parallel.Remote_executable.delete remote_exec in
67 |   let worker_stdout = Reader.lines (Process.stdout process) in
68 |   Pipe.iter_without_pushback worker_stdout ~f:(fun line ->
69 |     let line' = sprintf "[WORKER STDOUT]: %s\n" line in
70 |     Writer.write (Lazy.force Writer.stdout) line')
71 |   |> Deferred.ok
72 | ;;
73 | 
74 | let () =
75 |   Command.async_or_error
76 |     ~summary:"test using custom run exec_policy"
77 |     (let%map_open.Command host = flag "host" (required string) ~doc:" host to connect to"
78 |      and wrapper = flag "wrapper" (required string) ~doc:" wrapper command" in
79 |      fun () -> main ~host ~wrapper)
80 |     ~behave_nicely_in_pipeline:false
81 |   |> Rpc_parallel_krb_public.start_app ~krb_mode:For_unit_test
82 | ;;
83 | 


--------------------------------------------------------------------------------
/example/reverse_pipe.ml:
--------------------------------------------------------------------------------
 1 | open Core
 2 | open Async
 3 | 
 4 | module Shard = struct
 5 |   module T = struct
 6 |     type 'worker functions =
 7 |       ('worker, int * int Pipe.Reader.t, string list) Rpc_parallel.Function.t
 8 | 
 9 |     module Worker_state = struct
10 |       type t = int
11 |       type init_arg = int [@@deriving bin_io]
12 |     end
13 | 
14 |     module Connection_state = struct
15 |       type t = unit
16 |       type init_arg = unit [@@deriving bin_io]
17 |     end
18 | 
19 |     module Functions
20 |         (Creator : Rpc_parallel.Creator
21 |                    with type worker_state = Worker_state.t
22 |                     and type connection_state = Connection_state.t) =
23 |     struct
24 |       let functions =
25 |         Creator.create_reverse_pipe
26 |           ~bin_query:Int.bin_t
27 |           ~bin_update:Int.bin_t
28 |           ~bin_response:(List.bin_t String.bin_t)
29 |           ~f:(fun ~worker_state:id ~conn_state:() prefix numbers ->
30 |             Pipe.fold_without_pushback numbers ~init:[] ~f:(fun acc number ->
31 |               sprintf "worker %d got %d:%d" id prefix number :: acc))
32 |           ()
33 |       ;;
34 | 
35 |       let init_worker_state = return
36 |       let init_connection_state ~connection:_ ~worker_state:_ () = Deferred.unit
37 |     end
38 |   end
39 | 
40 |   include T
41 |   include Rpc_parallel.Make (T)
42 | end
43 | 
44 | let main () =
45 |   let shards = 10 in
46 |   let%bind connections =
47 |     Array.init shards ~f:(fun id ->
48 |       Shard.spawn_exn
49 |         ~shutdown_on:Connection_closed
50 |         ~redirect_stdout:`Dev_null
51 |         ~redirect_stderr:`Dev_null
52 |         ~on_failure:Error.raise
53 |         ~connection_state_init_arg:()
54 |         id)
55 |     |> Deferred.Array.all
56 |   in
57 |   let readers =
58 |     let readers, writers =
59 |       Array.init shards ~f:(fun (_ : int) -> Pipe.create ()) |> Array.unzip
60 |     in
61 |     let write_everything =
62 |       let%map () =
63 |         Sequence.init 1_000 ~f:Fn.id
64 |         |> Sequence.delayed_fold
65 |              ~init:()
66 |              ~f:(fun () i ~k -> Pipe.write writers.(i % shards) i >>= k)
67 |              ~finish:return
68 |       in
69 |       Array.iter writers ~f:Pipe.close
70 |     in
71 |     don't_wait_for write_everything;
72 |     readers
73 |   in
74 |   let%bind () =
75 |     Array.mapi connections ~f:(fun i connection ->
76 |       Shard.Connection.run_exn connection ~f:Shard.functions ~arg:(i, readers.(i)))
77 |     |> Deferred.Array.all
78 |     >>| printf !"%{sexp: string list array}\n"
79 |   in
80 |   Array.map connections ~f:Shard.Connection.close |> Deferred.Array.all_unit
81 | ;;
82 | 
83 | let () =
84 |   Rpc_parallel_krb_public.start_app
85 |     ~krb_mode:For_unit_test
86 |     (Command.async
87 |        ~summary:"Demonstrate using Rpc_parallel with reverse pipes"
88 |        (Command.Param.return main)
89 |        ~behave_nicely_in_pipeline:false)
90 | ;;
91 | 


--------------------------------------------------------------------------------
/src/utils.ml:
--------------------------------------------------------------------------------
  1 | open Core
  2 | open Poly
  3 | open Async
  4 | 
  5 | module Worker_id = struct
  6 |   let create = Uuid_unix.create
  7 | 
  8 |   (* If we do not use the stable sexp serialization, when running
  9 |      inline tests, we will create UUIDs that fail tests *)
 10 |   module T = Uuid.Stable.V1
 11 | 
 12 |   type t = T.t [@@deriving sexp, bin_io]
 13 | 
 14 |   include Comparable.Make_binable (T)
 15 |   include Hashable.Make_binable (T)
 16 |   include Sexpable.To_stringable (T)
 17 | 
 18 |   let pp fmt t = String.pp fmt (Sexp.to_string ([%sexp_of: t] t))
 19 | end
 20 | 
 21 | module Worker_type_id = Unique_id.Int ()
 22 | 
 23 | module Internal_connection_state = struct
 24 |   type ('worker_state, 'conn_state) t1 =
 25 |     { worker_state : 'worker_state
 26 |     ; conn_state : 'conn_state
 27 |     ; worker_id : Worker_id.t
 28 |     }
 29 | 
 30 |   type ('worker_state, 'conn_state) t =
 31 |     Rpc.Connection.t * ('worker_state, 'conn_state) t1 Set_once.t
 32 | end
 33 | 
 34 | let try_within ~monitor f =
 35 |   let ivar = Ivar.create () in
 36 |   Scheduler.within ~monitor (fun () ->
 37 |     Monitor.try_with ~run:`Now ~rest:`Raise f
 38 |     >>> fun r -> Ivar.fill_exn ivar (Result.map_error r ~f:Error.of_exn));
 39 |   Ivar.read ivar
 40 | ;;
 41 | 
 42 | let try_within_exn ~monitor f =
 43 |   match%map try_within ~monitor f with
 44 |   | Ok x -> x
 45 |   | Error e -> Error.raise e
 46 | ;;
 47 | 
 48 | let our_md5 =
 49 |   let our_md5_lazy =
 50 |     lazy
 51 |       (Process.run ~prog:"md5sum" ~args:[ Current_exe.get_path () ] ()
 52 |        >>|? fun our_md5 ->
 53 |        let our_md5, _ = String.lsplit2_exn ~on:' ' our_md5 in
 54 |        our_md5)
 55 |   in
 56 |   fun () -> Lazy.force our_md5_lazy
 57 | ;;
 58 | 
 59 | let is_child_env_var = "ASYNC_PARALLEL_IS_CHILD_MACHINE"
 60 | 
 61 | let whoami () =
 62 |   match Sys.getenv is_child_env_var with
 63 |   | Some _ -> `Worker
 64 |   | None -> `Master
 65 | ;;
 66 | 
 67 | let clear_env () = (Unix.unsetenv [@ocaml.alert "-unsafe_multidomain"]) is_child_env_var
 68 | 
 69 | let validate_env env =
 70 |   match List.find env ~f:(fun (key, _) -> key = is_child_env_var) with
 71 |   | Some e ->
 72 |     Or_error.error
 73 |       "Environment variable conflicts with Rpc_parallel machinery"
 74 |       e
 75 |       [%sexp_of: string * string]
 76 |   | None -> Ok ()
 77 | ;;
 78 | 
 79 | (* Don't run tests in the worker if we are running an expect test. A call to
 80 |    [Rpc_parallel.For_testing.initialize] will initialize the worker and start the
 81 |    Async scheduler. *)
 82 | let force_drop_inline_test =
 83 |   if Core.am_running_test then [ "FORCE_DROP_INLINE_TEST", "" ] else []
 84 | ;;
 85 | 
 86 | let create_worker_env ~extra =
 87 |   let open Or_error.Let_syntax in
 88 |   let%map () = validate_env extra in
 89 |   extra
 90 |   @ force_drop_inline_test
 91 |   @ For_testing_internal.worker_environment ()
 92 |   @ [ is_child_env_var, "" ]
 93 | ;;
 94 | 
 95 | let to_daemon_fd_redirection = function
 96 |   | `Dev_null -> `Dev_null
 97 |   | `File_append s -> `File_append s
 98 |   | `File_truncate s -> `File_truncate s
 99 | ;;
100 | 


--------------------------------------------------------------------------------
/src/remote_executable.ml:
--------------------------------------------------------------------------------
 1 | open Core
 2 | open Poly
 3 | open Async
 4 | 
 5 | type 'a t =
 6 |   { host : string
 7 |   ; path : string
 8 |   ; host_key_checking : string list
 9 |   }
10 | [@@deriving fields ~getters]
11 | 
12 | let hostkey_checking_options opt =
13 |   match opt with
14 |   | None -> []
15 |   (* Use ssh default *)
16 |   | Some `Ask -> [ "-o"; "StrictHostKeyChecking=ask" ]
17 |   | Some `No -> [ "-o"; "StrictHostKeyChecking=no" ]
18 |   | Some `Yes -> [ "-o"; "StrictHostKeyChecking=yes" ]
19 | ;;
20 | 
21 | let existing_on_host ~executable_path ?strict_host_key_checking host =
22 |   { host
23 |   ; path = executable_path
24 |   ; host_key_checking = hostkey_checking_options strict_host_key_checking
25 |   }
26 | ;;
27 | 
28 | let copy_to_host ~executable_dir ?strict_host_key_checking host =
29 |   let our_basename = Filename.basename Sys.executable_name in
30 |   Process.run ~prog:"mktemp" ~args:[ "-u"; sprintf "%s.XXXXXXXX" our_basename ] ()
31 |   >>=? fun new_basename ->
32 |   let options = hostkey_checking_options strict_host_key_checking in
33 |   let path = String.strip (executable_dir ^/ new_basename) in
34 |   Process.run
35 |     ~prog:"scp"
36 |     ~args:(options @ [ Current_exe.get_path (); sprintf "%s:%s" host path ])
37 |     ()
38 |   >>|? Fn.const { host; path; host_key_checking = options }
39 | ;;
40 | 
41 | let delete executable =
42 |   Process.run
43 |     ~prog:"ssh"
44 |     ~args:(executable.host_key_checking @ [ executable.host; "rm"; executable.path ])
45 |     ()
46 |   >>|? Fn.const ()
47 | ;;
48 | 
49 | let env_for_ssh env =
50 |   let env =
51 |     (* If we are running a test, we should propagate the relevant environment variable
52 |        through ssh so the spawned workers know they are running a test. *)
53 |     if am_running_test then ("TESTING_FRAMEWORK", "") :: env else env
54 |   in
55 |   let cheesy_escape str = Sexp.to_string (String.sexp_of_t str) in
56 |   List.map env ~f:(fun (key, data) -> key ^ "=" ^ cheesy_escape data)
57 | ;;
58 | 
59 | let run ?(assert_binary_hash = true) exec ~env ~args ~wrap =
60 |   let%bind.Deferred.Or_error () =
61 |     match assert_binary_hash with
62 |     | false -> Deferred.Or_error.ok_unit
63 |     | true ->
64 |       Utils.our_md5 ()
65 |       >>=? fun md5 ->
66 |       Process.run
67 |         ~prog:"ssh"
68 |         ~args:(exec.host_key_checking @ [ exec.host; "md5sum"; exec.path ])
69 |         ()
70 |       >>=? fun remote_md5 ->
71 |       (match String.lsplit2 ~on:' ' remote_md5 with
72 |        | None ->
73 |          Deferred.Or_error.errorf
74 |            "Failed to compute an md5 checksum for %s:%s. Perhaps ssh received a signal?. \
75 |             Output: %s"
76 |            exec.host
77 |            exec.path
78 |            remote_md5
79 |        | Some (remote_md5, _) ->
80 |          if md5 <> remote_md5
81 |          then
82 |            Deferred.Or_error.errorf
83 |              "The remote executable %s:%s does not match the local executable"
84 |              exec.host
85 |              exec.path
86 |          else Deferred.Or_error.ok_unit)
87 |   in
88 |   let { Prog_and_args.prog; args } = wrap { Prog_and_args.prog = exec.path; args } in
89 |   Process.create
90 |     ~prog:"ssh"
91 |     ~args:(exec.host_key_checking @ [ exec.host ] @ env_for_ssh env @ [ prog ] @ args)
92 |     ()
93 | ;;
94 | 


--------------------------------------------------------------------------------
/src/managed.mli:
--------------------------------------------------------------------------------
 1 | open Core
 2 | open Async
 3 | 
 4 | (** This module is primarily meant for backwards compatibility with code that used earlier
 5 |     versions of [Rpc_parallel]. Please consider using the [Rpc_parallel.Make()] functor
 6 |     instead as the semantics are more transparent and intuitive.
 7 | 
 8 |     This functor keeps cached connections to workers and dispatches on these connections.
 9 |     The semantics of reconnect are not currently well-defined. If you expect connections
10 |     to drop or want multiple connections to the same worker, using [Rpc_parallel.Make()]
11 |     is probably the better choice. *)
12 | 
13 | module type Worker = sig
14 |   type t [@@deriving sexp_of]
15 |   type unmanaged_t
16 |   type 'a functions
17 | 
18 |   (** Accessor for the functions implemented by this worker type *)
19 |   val functions : unmanaged_t functions
20 | 
21 |   type worker_state_init_arg
22 |   type connection_state_init_arg
23 | 
24 |   module Id : Identifiable
25 | 
26 |   val id : t -> Id.t
27 | 
28 |   val spawn
29 |     :  ?how:How_to_run.t
30 |     -> ?name:string
31 |     -> ?env:(string * string) list
32 |     -> ?connection_timeout:Time_float.Span.t
33 |     -> ?cd:string (** default / *)
34 |     -> ?umask:int (** defaults to use existing umask *)
35 |     -> redirect_stdout:Fd_redirection.t
36 |     -> redirect_stderr:Fd_redirection.t
37 |     -> worker_state_init_arg
38 |     -> connection_state_init_arg
39 |     -> on_failure:(Error.t -> unit) (** See [on_failure] in parallel_intf.ml *)
40 |     -> on_connection_to_worker_closed:(Error.t -> unit)
41 |          (** Called when the connection to the spawned worker is closed. If a worker
42 |              process terminates, both [on_failure] and [on_connection_to_worker_closed]
43 |              might get called. *)
44 |     -> t Or_error.t Deferred.t
45 | 
46 |   val spawn_exn
47 |     :  ?how:How_to_run.t
48 |     -> ?name:string
49 |     -> ?env:(string * string) list
50 |     -> ?connection_timeout:Time_float.Span.t
51 |     -> ?cd:string
52 |     -> ?umask:int
53 |     -> redirect_stdout:Fd_redirection.t
54 |     -> redirect_stderr:Fd_redirection.t
55 |     -> worker_state_init_arg
56 |     -> connection_state_init_arg
57 |     -> on_failure:(Error.t -> unit)
58 |     -> on_connection_to_worker_closed:(Error.t -> unit)
59 |     -> t Deferred.t
60 | 
61 |   (** [run t] and [run_exn t] will connect to [t] if there is not already a connection,
62 |       but if there is currently a connection that has gone stale, they will fail with an
63 |       error. Trying again will attempt a reconnection. *)
64 |   val run
65 |     :  t
66 |     -> f:(unmanaged_t, 'query, 'response) Parallel.Function.t
67 |     -> arg:'query
68 |     -> 'response Or_error.t Deferred.t
69 | 
70 |   val run_exn
71 |     :  t
72 |     -> f:(unmanaged_t, 'query, 'response) Parallel.Function.t
73 |     -> arg:'query
74 |     -> 'response Deferred.t
75 | 
76 |   (** Using these functions will not result in [on_failure] reporting a closed connection,
77 |       unlike running the [shutdown] function. *)
78 |   val kill : t -> unit Or_error.t Deferred.t
79 | 
80 |   val kill_exn : t -> unit Deferred.t
81 | end
82 | 
83 | module Make (S : Parallel.Worker_spec) :
84 |   Worker
85 |   with type 'a functions := 'a S.functions
86 |    and type worker_state_init_arg := S.Worker_state.init_arg
87 |    and type connection_state_init_arg := S.Connection_state.init_arg
88 | 


--------------------------------------------------------------------------------
/test/spawn_in_foreground_zombie.ml:
--------------------------------------------------------------------------------
  1 | open Core
  2 | open Async
  3 | 
  4 | module Worker = struct
  5 |   module T = struct
  6 |     type 'worker functions = unit
  7 | 
  8 |     module Worker_state = struct
  9 |       type init_arg = unit [@@deriving bin_io]
 10 |       type t = unit
 11 |     end
 12 | 
 13 |     module Connection_state = struct
 14 |       type init_arg = unit [@@deriving bin_io]
 15 |       type t = unit
 16 |     end
 17 | 
 18 |     module Functions (_ : Rpc_parallel.Creator) = struct
 19 |       let functions = ()
 20 |       let init_worker_state () = Deferred.unit
 21 | 
 22 |       let init_connection_state ~connection:_ ~worker_state:_ =
 23 |         raise_s [%message "[init_connection_state] failure"]
 24 |       ;;
 25 |     end
 26 |   end
 27 | 
 28 |   include Rpc_parallel.Make (T)
 29 | end
 30 | 
 31 | let spawn_in_foreground ?connection_timeout () =
 32 |   match%bind
 33 |     Worker.For_internal_testing.spawn_in_foreground
 34 |       ?connection_timeout
 35 |       ~connection_state_init_arg:()
 36 |       ~on_failure:Error.raise
 37 |       ~shutdown_on:Connection_closed
 38 |       ()
 39 |   with
 40 |   | Ok (_, _) -> failwith "Unexpected success while spawning worker"
 41 |   | Error (_, `Worker_process worker_process) ->
 42 |     (match worker_process with
 43 |      | None -> Deferred.unit
 44 |      | Some exit_or_signal -> exit_or_signal >>| (ignore : Unix.Exit_or_signal.t -> unit))
 45 | ;;
 46 | 
 47 | let pgrep_children ~pid (f : Procfs_async.Process.t -> bool) =
 48 |   let open Procfs_async in
 49 |   pgrep (fun process ->
 50 |     return ([%compare.equal: Pid.t option] process.stat.ppid (Some pid) && f process))
 51 | ;;
 52 | 
 53 | let is_zombie process =
 54 |   process
 55 |   |> Procfs_async.Process.stat
 56 |   |> Procfs_async.Process.Stat.state
 57 |   |> Procfs_async.Process.Stat.State.equal Zombie
 58 | ;;
 59 | 
 60 | let wait_until_no_running_children ~pid =
 61 |   Deferred.repeat_until_finished () (fun () ->
 62 |     let%bind non_zombie_processes =
 63 |       pgrep_children ~pid (fun process -> not (is_zombie process))
 64 |     in
 65 |     if List.is_empty non_zombie_processes
 66 |     then return (`Finished ())
 67 |     else return (`Repeat ()))
 68 | ;;
 69 | 
 70 | let print_zombies ~pid =
 71 |   match%map pgrep_children ~pid is_zombie with
 72 |   | [] -> print_s [%message "No zombies"]
 73 |   | zombies -> print_s [%message "Zombies" (zombies : Procfs_async.Process.t list)]
 74 | ;;
 75 | 
 76 | let test ?connection_timeout () =
 77 |   let pid = Unix.getpid () in
 78 |   let%bind () = spawn_in_foreground ?connection_timeout () in
 79 |   let%bind () = wait_until_no_running_children ~pid in
 80 |   let%bind () = print_zombies ~pid in
 81 |   Deferred.unit
 82 | ;;
 83 | 
 84 | let main () =
 85 |   (* By setting [connection_timeout] to [Time.Span.zero], we force a failure in the part
 86 |      of the worker initialization where the master waits for the recently spawned worker
 87 |      to connect back. *)
 88 |   let%bind () = test ~connection_timeout:Time_float.Span.zero () in
 89 |   let%bind () = test () in
 90 |   Deferred.unit
 91 | ;;
 92 | 
 93 | let command =
 94 |   Command.async
 95 |     ~summary:"[spawn_in_foreground]: ensure no zombies"
 96 |     (Command.Param.return main)
 97 |     ~behave_nicely_in_pipeline:false
 98 | ;;
 99 | 
100 | let () = Rpc_parallel_krb_public.start_app ~krb_mode:For_unit_test command
101 | 


--------------------------------------------------------------------------------
/test/test-rpc-settings.t:
--------------------------------------------------------------------------------
 1 | # we get a timeout because sleeping for 5 seconds exceeds the heartbeat timeout
 2 |   $ APP_RPC_SETTINGS_FOR_TEST="((heartbeat_config((timeout 3s)(send_every 1s))))" run_one_line test/timeouts.exe timeout -sleep-for 5
 3 |   ((rpc_error  (Connection_closed   (*"No heartbeats received*"* (glob)
 4 | 
 5 | # we don't get a timeout because we are overriding the rpc settings with the RPC_PARALLEL_RPC_SETTINGS env var
 6 |   $ APP_RPC_SETTINGS_FOR_TEST="((heartbeat_config((timeout 3s)(send_every 1s))))" RPC_PARALLEL_RPC_SETTINGS="((heartbeat_config((timeout 8s)(send_every 2s))))" run_one_line test/timeouts.exe timeout -sleep-for 5
 7 |   unresponsive worker returned
 8 | 
 9 | # In all the below tests, we expect the rpc settings to always align. The
10 | # APP_RPC_SETTINGS_FOR_TEST environment variable is used to supply arguments to
11 | # the top-level Rpc_parallel.start_app call.
12 | 
13 |   $ run test/timeouts.exe rpc-settings
14 |   master : ()
15 |   spawned worker (client side) : ()
16 |   spawned worker (server side) : ()
17 |   served worker (client side) : ()
18 |   served worker (server side) : ()
19 | 
20 |   $ APP_RPC_SETTINGS_FOR_TEST="((handshake_timeout 1s))" run test/timeouts.exe rpc-settings
21 |   master : ((handshake_timeout 1s))
22 |   spawned worker (client side) : ((handshake_timeout 1s))
23 |   spawned worker (server side) : ((handshake_timeout 1s))
24 |   served worker (client side) : ((handshake_timeout 1s))
25 |   served worker (server side) : ((handshake_timeout 1s))
26 | 
27 |   $ RPC_PARALLEL_RPC_SETTINGS="((handshake_timeout 2s))" run test/timeouts.exe rpc-settings
28 |   master : ((handshake_timeout 2s))
29 |   spawned worker (client side) : ((handshake_timeout 2s))
30 |   spawned worker (server side) : ((handshake_timeout 2s))
31 |   served worker (client side) : ((handshake_timeout 2s))
32 |   served worker (server side) : ((handshake_timeout 2s))
33 | 
34 |   $ APP_RPC_SETTINGS_FOR_TEST="((handshake_timeout 1s))" RPC_PARALLEL_RPC_SETTINGS="((handshake_timeout 2s))" run test/timeouts.exe rpc-settings
35 |   master : ((handshake_timeout 2s))
36 |   spawned worker (client side) : ((handshake_timeout 2s))
37 |   spawned worker (server side) : ((handshake_timeout 2s))
38 |   served worker (client side) : ((handshake_timeout 2s))
39 |   served worker (server side) : ((handshake_timeout 2s))
40 | 
41 |   $ APP_RPC_SETTINGS_FOR_TEST="((handshake_timeout 1s) (max_message_size 1500))" RPC_PARALLEL_RPC_SETTINGS="((handshake_timeout 2s))" run test/timeouts.exe rpc-settings
42 |   master : ((max_message_size 1500) (handshake_timeout 2s))
43 |   spawned worker (client side) : ((max_message_size 1500) (handshake_timeout 2s))
44 |   spawned worker (server side) : ((max_message_size 1500) (handshake_timeout 2s))
45 |   served worker (client side) : ((max_message_size 1500) (handshake_timeout 2s))
46 |   served worker (server side) : ((max_message_size 1500) (handshake_timeout 2s))
47 | 
48 |   $ APP_RPC_SETTINGS_FOR_TEST="((handshake_timeout 1s))" RPC_PARALLEL_RPC_SETTINGS="((handshake_timeout 2s) (max_message_size 1500))" run test/timeouts.exe rpc-settings
49 |   master : ((max_message_size 1500) (handshake_timeout 2s))
50 |   spawned worker (client side) : ((max_message_size 1500) (handshake_timeout 2s))
51 |   spawned worker (server side) : ((max_message_size 1500) (handshake_timeout 2s))
52 |   served worker (client side) : ((max_message_size 1500) (handshake_timeout 2s))
53 |   served worker (server side) : ((max_message_size 1500) (handshake_timeout 2s))
54 | 


--------------------------------------------------------------------------------
/krb/backend.ml:
--------------------------------------------------------------------------------
  1 | open! Core
  2 | open! Async
  3 | 
  4 | let name = "Kerberized Async RPC"
  5 | 
  6 | module Settings = struct
  7 |   type t = Mode.t [@@deriving bin_io, sexp]
  8 | 
  9 |   let test_principal = lazy (Krb.Principal.Name.User (Core_unix.getlogin ()))
 10 | 
 11 |   let server_mode = function
 12 |     | Mode.Kerberized kerberized -> Mode.Kerberized.krb_server_mode kerberized
 13 |     | For_unit_test ->
 14 |       return
 15 |         (Krb.Mode.Server.test_with_principal ~test_principal:(force test_principal) ())
 16 |   ;;
 17 | 
 18 |   let client_mode = function
 19 |     | Mode.Kerberized kerberized -> Mode.Kerberized.krb_client_mode kerberized
 20 |     | For_unit_test ->
 21 |       Krb.Mode.Client.test_with_principal ~test_principal:(force test_principal) ()
 22 |   ;;
 23 | end
 24 | 
 25 | let authorize_current_principal () =
 26 |   let%map principal_to_authorize =
 27 |     if am_running_test
 28 |     then
 29 |       (* There isn't a cred cache in the testing environment, so just use the current
 30 |          username. *)
 31 |       Unix.getlogin () >>| fun x -> Krb.Principal.Name.User x
 32 |     else
 33 |       Krb.Cred_cache.default_principal ()
 34 |       (* This will raise if there is the default credential cache doesn't exist. If this
 35 |          is the case, we'd expect [Krb.Rpc.Connection.serve] to have already failed. *)
 36 |       >>| Or_error.ok_exn
 37 |   in
 38 |   Krb.Authorize.accept_single principal_to_authorize
 39 | ;;
 40 | 
 41 | let serve
 42 |   ?max_message_size
 43 |   ?buffer_age_limit
 44 |   ?handshake_timeout
 45 |   ?heartbeat_config
 46 |   ~implementations
 47 |   ~initial_connection_state
 48 |   ~where_to_listen
 49 |   settings
 50 |   =
 51 |   let%bind authorize = authorize_current_principal () in
 52 |   let%bind krb_mode = Settings.server_mode settings in
 53 |   Krb.Rpc.Connection.serve
 54 |     ~implementations
 55 |     ~initial_connection_state:(fun (_ : Krb.Client_identity.t) inet connection ->
 56 |       initial_connection_state inet connection)
 57 |     ~authorize
 58 |     ~krb_mode
 59 |     ?max_message_size
 60 |     ?buffer_age_limit
 61 |     ?handshake_timeout
 62 |     ?heartbeat_config
 63 |     ~where_to_listen
 64 |     ()
 65 |   |> Deferred.Or_error.ok_exn
 66 | ;;
 67 | 
 68 | let with_client
 69 |   ?implementations
 70 |   ?max_message_size
 71 |   ?buffer_age_limit
 72 |   ?handshake_timeout
 73 |   ?heartbeat_config
 74 |   settings
 75 |   where_to_connect
 76 |   f
 77 |   =
 78 |   let%bind authorize = authorize_current_principal () in
 79 |   let krb_mode = Settings.client_mode settings in
 80 |   Krb.Rpc.Connection.with_client
 81 |     ?implementations:(Option.map ~f:Fn.const implementations)
 82 |     ?max_message_size
 83 |     ?buffer_age_limit
 84 |     ?handshake_timeout
 85 |     ?heartbeat_config
 86 |     ~krb_mode
 87 |     ~authorize
 88 |     where_to_connect
 89 |     f
 90 | ;;
 91 | 
 92 | let client
 93 |   ?implementations
 94 |   ?max_message_size
 95 |   ?buffer_age_limit
 96 |   ?handshake_timeout
 97 |   ?heartbeat_config
 98 |   ?description
 99 |   settings
100 |   where_to_connect
101 |   =
102 |   let%bind authorize = authorize_current_principal () in
103 |   let krb_mode = Settings.client_mode settings in
104 |   Krb.Rpc.Connection.client
105 |     ?implementations:(Option.map ~f:Fn.const implementations)
106 |     ?max_message_size
107 |     ?buffer_age_limit
108 |     ?handshake_timeout
109 |     ?heartbeat_config
110 |     ?description
111 |     ~krb_mode
112 |     ~authorize
113 |     where_to_connect
114 | ;;
115 | 


--------------------------------------------------------------------------------
/test/shutdown_during_worker_init.ml:
--------------------------------------------------------------------------------
 1 | open Core
 2 | open Async
 3 | 
 4 | module Slow_worker = struct
 5 |   module T = struct
 6 |     type 'worker functions = unit
 7 | 
 8 |     module Worker_state = struct
 9 |       type init_arg = unit [@@deriving bin_io]
10 |       type t = unit
11 |     end
12 | 
13 |     module Connection_state = struct
14 |       type init_arg = unit [@@deriving bin_io]
15 |       type t = unit
16 |     end
17 | 
18 |     module Functions
19 |         (_ : Rpc_parallel.Creator
20 |              with type worker_state := Worker_state.t
21 |               and type connection_state := Connection_state.t) =
22 |     struct
23 |       let functions = ()
24 |       let init_worker_state () = after (sec 60.)
25 |       let init_connection_state ~connection:_ ~worker_state:_ = return
26 |     end
27 |   end
28 | 
29 |   include Rpc_parallel.Make (T)
30 | end
31 | 
32 | let readme () =
33 |   {|
34 | This is a manual rpc-parallel test. You should kill the master process a couple seconds after
35 | starting it and then see whether or not the child immediately shuts itself down or not.
36 | |}
37 | ;;
38 | 
39 | let command =
40 |   Command.async
41 |     ~summary:"Exercise a worker who is in the middle of init when the master shutsdown"
42 |     ~readme
43 |     (let%map_open.Command shutdown_on =
44 |        choose_one
45 |          ~if_nothing_chosen:Raise
46 |          [ flag
47 |              "-shutdown-on-heartbeater-connection-timeout"
48 |              (no_arg_some `Heartbeater_connection_timeout)
49 |              ~doc:"supply ~shutdown_on:Heartbeater_connection_timeout during spawn"
50 |          ; flag
51 |              "-shutdown-on-connection-closed"
52 |              (no_arg_some `Connection_closed)
53 |              ~doc:"supply ~shutdown_on:Connection_closed during spawn"
54 |          ; flag
55 |              "-shutdown-on-called-shutdown-function"
56 |              (no_arg_some `Called_shutdown_function)
57 |              ~doc:"supply ~shutdown_on:Called_shutdown_function during spawn"
58 |          ]
59 |      in
60 |      fun () ->
61 |        match shutdown_on with
62 |        | `Heartbeater_connection_timeout ->
63 |          let%bind (_ : Slow_worker.t) =
64 |            Slow_worker.spawn_exn
65 |              ~shutdown_on:Heartbeater_connection_timeout
66 |              ~redirect_stdout:`Dev_null
67 |              ~redirect_stderr:`Dev_null
68 |              ~on_failure:(fun e -> Error.raise (Error.tag e ~tag:"spawn_exn"))
69 |              ()
70 |          in
71 |          raise_s [%message "Worker spawn finished"]
72 |        | `Connection_closed ->
73 |          let%bind (_ : Slow_worker.Connection.t) =
74 |            Slow_worker.spawn_exn
75 |              ~shutdown_on:Connection_closed
76 |              ~redirect_stdout:`Dev_null
77 |              ~redirect_stderr:`Dev_null
78 |              ~on_failure:(fun e -> Error.raise (Error.tag e ~tag:"spawn_exn"))
79 |              ~connection_state_init_arg:()
80 |              ()
81 |          in
82 |          raise_s [%message "Worker spawn finished"]
83 |        | `Called_shutdown_function ->
84 |          let%bind (_ : Slow_worker.t) =
85 |            Slow_worker.spawn_exn
86 |              ~shutdown_on:Called_shutdown_function
87 |              ~redirect_stdout:`Dev_null
88 |              ~redirect_stderr:`Dev_null
89 |              ~on_failure:(fun e -> Error.raise (Error.tag e ~tag:"spawn_exn"))
90 |              ()
91 |          in
92 |          raise_s [%message "Worker spawn finished"])
93 | ;;
94 | 
95 | let () = Rpc_parallel_krb_public.start_app ~krb_mode:For_unit_test command
96 | 


--------------------------------------------------------------------------------
/expect_test/managed_on_failure.ml:
--------------------------------------------------------------------------------
  1 | open Core
  2 | open Async
  3 | 
  4 | let () = Dynamic.set_root Backtrace.elide true
  5 | 
  6 | module T = struct
  7 |   type 'worker functions = { fail : ('worker, unit, unit) Rpc_parallel.Function.t }
  8 | 
  9 |   module Worker_state = struct
 10 |     type init_arg = unit [@@deriving bin_io]
 11 |     type t = unit
 12 |   end
 13 | 
 14 |   module Connection_state = struct
 15 |     type init_arg = unit [@@deriving bin_io]
 16 |     type t = unit
 17 |   end
 18 | 
 19 |   module Functions
 20 |       (C : Rpc_parallel.Creator
 21 |            with type worker_state := Worker_state.t
 22 |             and type connection_state := Connection_state.t) =
 23 |   struct
 24 |     let fail =
 25 |       C.create_one_way
 26 |         ~f:(fun ~worker_state:() ~conn_state:() () ->
 27 |           (* Make sure this exception is raised asynchronously. I'm not sure how to do
 28 |              this in a non-racy way, but hopefully 0.01 seconds strikes the right balance
 29 |              of not being racy but not introducing too much of a delay. *)
 30 |           upon (after (sec 0.01)) (fun () -> failwith "asynchronous exception"))
 31 |         ~bin_input:Unit.bin_t
 32 |         ()
 33 |     ;;
 34 | 
 35 |     let functions = { fail }
 36 |     let init_worker_state () = Deferred.unit
 37 |     let init_connection_state ~connection:_ ~worker_state:_ = return
 38 |   end
 39 | end
 40 | 
 41 | include Rpc_parallel.Managed.Make [@alert "-legacy"] (T)
 42 | 
 43 | let uuid_re =
 44 |   Re.Pcre.re "[a-z0-9]{8}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{12}" |> Re.compile
 45 | ;;
 46 | 
 47 | let uuid_replacement = Uuid.Stable.V1.for_testing |> Uuid.to_string
 48 | 
 49 | let error_to_string_masking_uuid error =
 50 |   Re.replace_string uuid_re ~by:uuid_replacement (Error.to_string_hum error)
 51 | ;;
 52 | 
 53 | let main () =
 54 |   let errors = Transaction.Var.create [] in
 55 |   let add_error ~tag error =
 56 |     Transaction.Var.replace_now errors (fun errors -> Error.tag ~tag error :: errors)
 57 |   in
 58 |   let%bind worker =
 59 |     spawn
 60 |       ~on_failure:(add_error ~tag:"on_failure")
 61 |       ~on_connection_to_worker_closed:(add_error ~tag:"on_connection_to_worker_closed")
 62 |       ~redirect_stdout:`Dev_null
 63 |       ~redirect_stderr:`Dev_null
 64 |       ()
 65 |       ()
 66 |     >>| ok_exn
 67 |   in
 68 |   let%bind () = run_exn worker ~f:functions.fail ~arg:() in
 69 |   match%bind
 70 |     (let open Transaction.Let_syntax in
 71 |      match%bind Transaction.Var.get errors with
 72 |      | _ :: _ :: _ as errors -> return errors
 73 |      | _ -> Transaction.retry ())
 74 |     |> Transaction.run_with_timeout (Time_ns.Span.of_sec 10.)
 75 |   with
 76 |   | Result errors ->
 77 |     let errors =
 78 |       errors
 79 |       |> List.map ~f:error_to_string_masking_uuid
 80 |       |> List.sort ~compare:String.compare
 81 |     in
 82 |     print_s [%message (errors : string list)];
 83 |     return ()
 84 |   | Timeout () ->
 85 |     print_s [%message "Timeout"];
 86 |     return ()
 87 | ;;
 88 | 
 89 | let () = Rpc_parallel_krb_public.For_testing.initialize [%here]
 90 | 
 91 | let%expect_test "" =
 92 |   let%bind () = main () in
 93 |   [%expect
 94 |     {|
 95 |     (errors
 96 |      ("(on_connection_to_worker_closed \"Lost connection with worker\")"
 97 |        "(on_failure\
 98 |       \n (5a863fc1-67b7-3a0a-dc90-aca2995afbf9\
 99 |       \n  (monitor.ml.Error (Failure \"asynchronous exception\")\
100 |       \n   (\"<backtrace elided in test>\"))))"))
101 |     |}];
102 |   return ()
103 | ;;
104 | 


--------------------------------------------------------------------------------
/test/remove_running_executable.ml:
--------------------------------------------------------------------------------
  1 | open Core
  2 | open Async
  3 | 
  4 | module Worker_impl = struct
  5 |   type 'a functions = unit
  6 | 
  7 |   module Worker_state = struct
  8 |     type init_arg = unit [@@deriving bin_io]
  9 |     type t = unit
 10 |   end
 11 | 
 12 |   module Connection_state = struct
 13 |     type init_arg = unit [@@deriving bin_io]
 14 |     type t = unit
 15 |   end
 16 | 
 17 |   module Functions
 18 |       (_ : Rpc_parallel.Creator
 19 |            with type worker_state := Worker_state.t
 20 |             and type connection_state := Connection_state.t) =
 21 |   struct
 22 |     let functions = ()
 23 |     let init_worker_state () = Deferred.unit
 24 |     let init_connection_state ~connection:_ ~worker_state:_ = return
 25 |   end
 26 | end
 27 | 
 28 | module Worker = Rpc_parallel.Make (Worker_impl)
 29 | 
 30 | let with_rename f =
 31 |   (* Force the lazy because `readlink /proc/PID/exe` changes when you rename
 32 |      the executable  *)
 33 |   let%bind _worker =
 34 |     Worker.spawn_exn
 35 |       ~on_failure:Error.raise
 36 |       ~shutdown_on:Heartbeater_connection_timeout
 37 |       ~redirect_stdout:`Dev_null
 38 |       ~redirect_stderr:`Dev_null
 39 |       ()
 40 |   in
 41 |   (* move the currently running executable *)
 42 |   let%bind cwd = Unix.getcwd () in
 43 |   let old_path =
 44 |     if Filename.is_absolute Sys.executable_name
 45 |     then Sys.executable_name
 46 |     else cwd ^/ Sys.executable_name
 47 |   in
 48 |   let new_path = sprintf "%s.bak" old_path in
 49 |   let%bind () = Unix.rename ~src:old_path ~dst:new_path in
 50 |   (* run f *)
 51 |   match%map
 52 |     Monitor.protect
 53 |       ~run:`Schedule (* consider [~run:`Now] instead; see: https://wiki/x/ByVWF *)
 54 |       ~rest:`Log
 55 |       (* consider [`Raise] instead; see: https://wiki/x/Ux4xF *) f
 56 |       ~finally:(fun () -> Unix.rename ~src:new_path ~dst:old_path)
 57 |   with
 58 |   | Ok () -> printf "Ok.\n"
 59 |   | Error e -> Error.raise e
 60 | ;;
 61 | 
 62 | let spawn_local () =
 63 |   with_rename (fun () ->
 64 |     Worker.spawn
 65 |       ~on_failure:Error.raise
 66 |       ~shutdown_on:Heartbeater_connection_timeout
 67 |       ~redirect_stdout:`Dev_null
 68 |       ~redirect_stderr:`Dev_null
 69 |       ()
 70 |     >>|? fun _worker -> ())
 71 | ;;
 72 | 
 73 | let spawn_remote () =
 74 |   with_rename (fun () ->
 75 |     let%bind cwd = Unix.getcwd () in
 76 |     let host = Unix.gethostname () in
 77 |     Rpc_parallel.Remote_executable.copy_to_host
 78 |       ~executable_dir:cwd
 79 |       host
 80 |       ~strict_host_key_checking:`No
 81 |     >>=? fun executable ->
 82 |     Worker.spawn
 83 |       ~on_failure:Error.raise
 84 |       ~shutdown_on:Heartbeater_connection_timeout
 85 |       ~redirect_stdout:`Dev_null
 86 |       ~redirect_stderr:`Dev_null
 87 |       ~how:(Rpc_parallel.How_to_run.remote executable)
 88 |       ()
 89 |     >>=? fun _worker -> Rpc_parallel.Remote_executable.delete executable)
 90 | ;;
 91 | 
 92 | let command =
 93 |   let open Command.Let_syntax in
 94 |   Command.async
 95 |     ~summary:""
 96 |     (let%map_open local = flag "spawn-local" no_arg ~doc:" local spawn"
 97 |      and remote = flag "spawn-remote" no_arg ~doc:" remote spawn (on local machine)" in
 98 |      fun () ->
 99 |        match local, remote with
100 |        | true, true | false, false -> failwith "specify -spawn-local or -spawn-remote"
101 |        | true, false -> spawn_local ()
102 |        | false, true -> spawn_remote ())
103 |     ~behave_nicely_in_pipeline:false
104 | ;;
105 | 
106 | let () = Rpc_parallel_krb_public.start_app ~krb_mode:For_unit_test command
107 | 


--------------------------------------------------------------------------------
/example/reverse_direct_pipe.ml:
--------------------------------------------------------------------------------
  1 | open Core
  2 | open Async
  3 | 
  4 | module Shard = struct
  5 |   module T = struct
  6 |     type 'worker functions =
  7 |       ( 'worker
  8 |         , int * (int Rpc.Pipe_rpc.Direct_stream_writer.t -> unit Or_error.t Deferred.t)
  9 |         , string list )
 10 |         Rpc_parallel.Function.t
 11 | 
 12 |     module Worker_state = struct
 13 |       type t = int
 14 |       type init_arg = int [@@deriving bin_io]
 15 |     end
 16 | 
 17 |     module Connection_state = struct
 18 |       type t = unit
 19 |       type init_arg = unit [@@deriving bin_io]
 20 |     end
 21 | 
 22 |     module Functions
 23 |         (Creator : Rpc_parallel.Creator
 24 |                    with type worker_state = Worker_state.t
 25 |                     and type connection_state = Connection_state.t) =
 26 |     struct
 27 |       let functions =
 28 |         Creator.create_reverse_direct_pipe
 29 |           ~bin_query:Int.bin_t
 30 |           ~bin_update:Int.bin_t
 31 |           ~bin_response:(List.bin_t String.bin_t)
 32 |           ~f:(fun ~worker_state:id ~conn_state:() prefix numbers ->
 33 |             Pipe.fold_without_pushback numbers ~init:[] ~f:(fun acc number ->
 34 |               sprintf "worker %d got %d:%d" id prefix number :: acc))
 35 |           ()
 36 |       ;;
 37 | 
 38 |       let init_worker_state = return
 39 |       let init_connection_state ~connection:_ ~worker_state:_ () = Deferred.unit
 40 |     end
 41 |   end
 42 | 
 43 |   include T
 44 |   include Rpc_parallel.Make (T)
 45 | end
 46 | 
 47 | let main () =
 48 |   let shards = 10 in
 49 |   let%bind connections =
 50 |     Array.init shards ~f:(fun id ->
 51 |       Shard.spawn_exn
 52 |         ~shutdown_on:Connection_closed
 53 |         ~redirect_stdout:`Dev_null
 54 |         ~redirect_stderr:`Dev_null
 55 |         ~on_failure:Error.raise
 56 |         ~connection_state_init_arg:()
 57 |         id)
 58 |     |> Deferred.Array.all
 59 |   in
 60 |   let readers =
 61 |     let readers, writers =
 62 |       Array.init shards ~f:(fun (_ : int) -> Pipe.create ()) |> Array.unzip
 63 |     in
 64 |     let write_everything =
 65 |       let%map () =
 66 |         Sequence.init 1_000 ~f:Fn.id
 67 |         |> Sequence.delayed_fold
 68 |              ~init:()
 69 |              ~f:(fun () i ~k -> Pipe.write writers.(i % shards) i >>= k)
 70 |              ~finish:return
 71 |       in
 72 |       Array.iter writers ~f:Pipe.close
 73 |     in
 74 |     don't_wait_for write_everything;
 75 |     readers
 76 |   in
 77 |   let%bind () =
 78 |     Array.mapi connections ~f:(fun i connection ->
 79 |       let write_fun direct_writer =
 80 |         (* This defeats the point of using a direct writer, of course, but the diff
 81 |            between this file and reverse_pipe.ml is small as a result. *)
 82 |         let%map () =
 83 |           Pipe.iter readers.(i) ~f:(fun n ->
 84 |             match Rpc.Pipe_rpc.Direct_stream_writer.write direct_writer n with
 85 |             | `Closed -> return ()
 86 |             | `Flushed flushed -> flushed)
 87 |         in
 88 |         Rpc.Pipe_rpc.Direct_stream_writer.close direct_writer;
 89 |         Ok ()
 90 |       in
 91 |       Shard.Connection.run_exn connection ~f:Shard.functions ~arg:(i, write_fun))
 92 |     |> Deferred.Array.all
 93 |     >>| printf !"%{sexp: string list array}\n"
 94 |   in
 95 |   Array.map connections ~f:Shard.Connection.close |> Deferred.Array.all_unit
 96 | ;;
 97 | 
 98 | let () =
 99 |   Rpc_parallel_krb_public.start_app
100 |     ~krb_mode:For_unit_test
101 |     (Command.async
102 |        ~summary:"Demonstrate using Rpc_parallel with reverse pipes"
103 |        (Command.Param.return main)
104 |        ~behave_nicely_in_pipeline:false)
105 | ;;
106 | 


--------------------------------------------------------------------------------
/test/stress_test.ml:
--------------------------------------------------------------------------------
  1 | open Core
  2 | open Async
  3 | 
  4 | module Worker = struct
  5 |   module T = struct
  6 |     type 'worker functions = { ping : ('worker, unit, unit) Rpc_parallel.Function.t }
  7 | 
  8 |     module Worker_state = struct
  9 |       type init_arg = unit [@@deriving bin_io]
 10 |       type t = unit
 11 |     end
 12 | 
 13 |     module Connection_state = struct
 14 |       type init_arg = unit [@@deriving bin_io]
 15 |       type t = unit
 16 |     end
 17 | 
 18 |     module Functions
 19 |         (C : Rpc_parallel.Creator
 20 |              with type worker_state := Worker_state.t
 21 |               and type connection_state := Connection_state.t) =
 22 |     struct
 23 |       let ping =
 24 |         C.create_rpc
 25 |           ~f:(fun ~worker_state:() ~conn_state:() -> return)
 26 |           ~bin_input:Unit.bin_t
 27 |           ~bin_output:Unit.bin_t
 28 |           ()
 29 |       ;;
 30 | 
 31 |       let functions = { ping }
 32 |       let init_worker_state () = Deferred.unit
 33 |       let init_connection_state ~connection:_ ~worker_state:_ = return
 34 |     end
 35 |   end
 36 | 
 37 |   include Rpc_parallel.Make (T)
 38 | end
 39 | 
 40 | let command =
 41 |   Command.async_spec
 42 |     ~summary:"Stress testing Rpc parallel"
 43 |     Command.Spec.(
 44 |       empty
 45 |       +> flag "-worker" (optional string) ~doc:" worker to run test on"
 46 |       +> flag
 47 |            "-dir"
 48 |            (optional_with_default "~" string)
 49 |            ~doc:" directory to copy executable to"
 50 |       +> flag
 51 |            "-num-workers"
 52 |            (optional_with_default 20 int)
 53 |            ~doc:" number of workers to spawn"
 54 |       +> flag "-num-loops" (optional_with_default 30 int) ~doc:" number of loops to test")
 55 |     (fun worker dir num_workers num_loops () ->
 56 |       let setup =
 57 |         match worker with
 58 |         | None -> return (Rpc_parallel.How_to_run.local, fun () -> return ())
 59 |         | Some w ->
 60 |           (match%map
 61 |              Rpc_parallel.Remote_executable.copy_to_host ~executable_dir:dir w
 62 |            with
 63 |            | Error e -> Error.raise e
 64 |            | Ok exec ->
 65 |              ( Rpc_parallel.How_to_run.remote exec
 66 |              , fun () -> Rpc_parallel.Remote_executable.delete exec >>| Or_error.ok_exn ))
 67 |       in
 68 |       let%bind executable, cleanup = setup in
 69 |       let rec loop remaining =
 70 |         if remaining = 0
 71 |         then return ()
 72 |         else (
 73 |           let start =
 74 |             Time_float.to_span_since_epoch (Time_float.now ()) |> Time_float.Span.to_sec
 75 |           in
 76 |           let%bind () =
 77 |             Deferred.all_unit
 78 |               (List.map (List.range 0 num_workers) ~f:(fun _i ->
 79 |                  let%bind conn =
 80 |                    Worker.spawn_exn
 81 |                      ~how:executable
 82 |                      ~redirect_stdout:`Dev_null
 83 |                      ~shutdown_on:Connection_closed
 84 |                      ~redirect_stderr:`Dev_null
 85 |                      ~on_failure:Error.raise
 86 |                      ~connection_state_init_arg:()
 87 |                      ()
 88 |                  in
 89 |                  Worker.Connection.run_exn conn ~f:Worker.functions.ping ~arg:()))
 90 |           in
 91 |           let end_ =
 92 |             Time_float.to_span_since_epoch (Time_float.now ()) |> Time_float.Span.to_sec
 93 |           in
 94 |           Core.Printf.printf "%f\n%!" (end_ -. start);
 95 |           loop (remaining - 1))
 96 |       in
 97 |       let%bind () = loop num_loops in
 98 |       cleanup ())
 99 |     ~behave_nicely_in_pipeline:false
100 | ;;
101 | 
102 | let () = Rpc_parallel_krb_public.start_app ~krb_mode:For_unit_test command
103 | 


--------------------------------------------------------------------------------
/test/test-examples.t:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | 
  4 | # NOTE:
  5 | # 
  6 | # if this test fails with the error "Permission denied
  7 | # (publickey,gssapi-keyex,gssapi-with-mic,password)", you should run the
  8 | # following:
  9 | # 
 10 | #   $ cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
 11 | #   $ chmod 600 ~/.ssh/authorized_keys
 12 | 
 13 |   $ $TESTDIR/ssh_test_server.sh with run test/copy_executable.exe -dir $(pwd) -worker $(hostname)
 14 |   Ok
 15 | 
 16 |   $ run example/serve.exe
 17 |   Success.
 18 | 
 19 |   $ $TESTDIR/ssh_test_server.sh with run test/wrap_test.exe -wrapper $TESTDIR/print_and_run.sh -host localhost
 20 |   Worker successfully started
 21 |   [WORKER STDOUT]: Ran with print_and_run!
 22 | 
 23 |   $ run test/env_test.exe basic-test
 24 |   WORKER: TEST_ENV_KEY=potentially "problematic" \"test\" string ()!
 25 |   WORKER: SHOULD_NOT_EXIST=<none>
 26 | 
 27 |   $ run test/env_test.exe special-var
 28 |   WORKER: OCAMLRUNPARAM=foo=bar
 29 |   WORKER: OCAMLRUNPARAM=foo=user-supplied
 30 | 
 31 |   $ $TESTDIR/ssh_test_server.sh with run test/env_test.exe special-var -host localhost
 32 |   WORKER: OCAMLRUNPARAM=foo=bar
 33 |   WORKER: OCAMLRUNPARAM=foo=user-supplied
 34 | 
 35 |   $ run example/alternative_init.exe main
 36 |   Success.
 37 | 
 38 |   $ run_one_line test/raise_on_connection_state_init.exe
 39 |   expected failure:*text of expected failure* (glob)
 40 | 
 41 |   $ run_one_line test/run_exn.exe
 42 |   expected failure:*text of expected failure* (glob)
 43 | 
 44 |   $ run example/number_stats.exe -nblocks 100
 45 |   Samples: * (glob)
 46 |   Mean: * (glob)
 47 |   Variance: * (glob)
 48 | 
 49 |   $ run example/add_numbers.exe -max 100 -ntimes 10
 50 |   0: 4950
 51 |   1: 4950
 52 |   2: 4950
 53 |   3: 4950
 54 |   4: 4950
 55 |   5: 4950
 56 |   6: 4950
 57 |   7: 4950
 58 |   8: 4950
 59 |   9: 4950
 60 | 
 61 |   $ run example/rpc_direct_pipe.exe -max 10 -delay 0.01s
 62 |   Sum_worker.sum: 0
 63 |   Sum_worker.sum: 1
 64 |   Sum_worker.sum: 3
 65 |   Sum_worker.sum: 6
 66 |   Sum_worker.sum: 10
 67 |   Sum_worker.sum: 15
 68 |   Sum_worker.sum: 21
 69 |   Sum_worker.sum: 28
 70 |   Sum_worker.sum: 36
 71 |   Sum_worker.sum: 45
 72 | 
 73 |   $ run example/abort_direct_pipe.exe
 74 |   Ping: 0
 75 |   Pong: 0
 76 |   Ping: 1
 77 |   Pong: 1
 78 |   Ping: 2
 79 |   Pong: 2
 80 |   Ping: 3
 81 |   Pong: 3
 82 |   Ping: 4
 83 |   Pong: 4
 84 |   Closed: By_remote_side
 85 |   Worker reports pongs closed
 86 | 
 87 |   $ run example/async_log.exe
 88 |   (V2
 89 |    ((time (*)) (level (Info)) (glob)
 90 |     (message (String "worker log message")) (tags ())))
 91 |   (V2
 92 |    ((time (*)) (level (Info)) (glob)
 93 |     (message (String "worker log message")) (tags ())))
 94 |   (V2
 95 |    ((time (*)) (level (Info)) (glob)
 96 |     (message (String "worker log message")) (tags ())))
 97 | 
 98 |   $ run example/stream_workers.exe -num-elts 10
 99 |   Ok.
100 | 
101 |   $ run test/fd.exe
102 |   Ok
103 | 
104 |   $ run test/krb_expert.exe main
105 |   Success.
106 | 
107 |   $ run example/spawn_in_foreground.exe
108 |   [WORKER STDERR]: *-*-* *:*:*.* Info ("Rpc_parallel: initial client connection closed... Shutting down."(reason* (glob)
109 |   [WORKER STDOUT]: HELLO
110 |   [WORKER STDOUT]: HELLO2
111 | 
112 |   $ run test/spawn_in_foreground_zombie.exe
113 |   "No zombies"
114 |   "No zombies"
115 | 
116 | # Treat this test specially because it removes the exe momentarily. This causes
117 | # jenga to loop (rebuilding the exe then rerunning the test)
118 |   $ cp $rpc_parallel_base/test/remove_running_executable.exe /tmp
119 | 
120 |   $ run_absolute_path /tmp/remove_running_executable.exe -spawn-local
121 |   Ok.
122 | 
123 |   $ $TESTDIR/ssh_test_server.sh with run_absolute_path /tmp/remove_running_executable.exe -spawn-remote
124 |   Ok.
125 | 
126 |   $ rm /tmp/remove_running_executable.exe
127 | 


--------------------------------------------------------------------------------
/test/buffer_age_limit.ml:
--------------------------------------------------------------------------------
  1 | open Core
  2 | open Async
  3 | 
  4 | module Shard = struct
  5 |   module T = struct
  6 |     type 'worker functions =
  7 |       ( 'worker
  8 |         , unit
  9 |           * (Float.t Rpc.Pipe_rpc.Direct_stream_writer.t -> unit Or_error.t Deferred.t)
 10 |         , unit )
 11 |         Rpc_parallel.Function.t
 12 | 
 13 |     module Worker_state = struct
 14 |       type t = unit
 15 |       type init_arg = unit [@@deriving bin_io]
 16 |     end
 17 | 
 18 |     module Connection_state = struct
 19 |       type t = unit
 20 |       type init_arg = unit [@@deriving bin_io]
 21 |     end
 22 | 
 23 |     module Functions
 24 |         (Creator : Rpc_parallel.Creator
 25 |                    with type worker_state = Worker_state.t
 26 |                     and type connection_state = Connection_state.t) =
 27 |     struct
 28 |       let do_work_for span = Core_unix.sleep (Float.to_int (Time_ns.Span.to_sec span))
 29 | 
 30 |       let functions =
 31 |         Creator.create_reverse_direct_pipe
 32 |           ~bin_query:Unit.bin_t
 33 |           ~bin_update:Float.bin_t
 34 |           ~bin_response:Unit.bin_t
 35 |           ~f:(fun ~worker_state:() ~conn_state:() () pipe ->
 36 |             Pipe.iter_without_pushback pipe ~f:(fun update ->
 37 |               ignore update;
 38 |               do_work_for (Time_ns.Span.of_min 3.)))
 39 |           ()
 40 |       ;;
 41 | 
 42 |       let init_worker_state () = Deferred.unit
 43 |       let init_connection_state ~connection:_ ~worker_state:_ () = Deferred.unit
 44 |     end
 45 |   end
 46 | 
 47 |   include T
 48 |   include Rpc_parallel.Make (T)
 49 | end
 50 | 
 51 | let main () =
 52 |   let%bind connection =
 53 |     Shard.spawn_exn
 54 |       ~shutdown_on:Connection_closed
 55 |       ~redirect_stdout:`Dev_null
 56 |       ~redirect_stderr:`Dev_null
 57 |       ~on_failure:Error.raise
 58 |       ~connection_state_init_arg:()
 59 |       ()
 60 |   in
 61 |   let () =
 62 |     let f direct_writer =
 63 |       Clock.every (Time_float.Span.of_sec 1.) (fun () ->
 64 |         for _ = 0 to 1_000_000 do
 65 |           match
 66 |             Rpc.Pipe_rpc.Direct_stream_writer.write_without_pushback
 67 |               direct_writer
 68 |               (Random.float 100.)
 69 |           with
 70 |           | `Closed -> ()
 71 |           | `Ok -> ()
 72 |         done);
 73 |       Ok () |> return
 74 |     in
 75 |     Shard.Connection.run_exn connection ~f:Shard.functions ~arg:((), f) |> don't_wait_for
 76 |   in
 77 |   Deferred.never ()
 78 | ;;
 79 | 
 80 | let readme () =
 81 |   {|
 82 | Running the command without setting any environment variables causes the following error
 83 | after about 2 minutes:
 84 | 
 85 | $ ./buffer_age_limit
 86 | (monitor.ml.Error
 87 |  ((rpc_error
 88 |    (Uncaught_exn
 89 |     (monitor.ml.Error
 90 |      ("writer buffer has data older than" (maximum_age 2m)
 91 |      ...
 92 | 
 93 | If you set the RPC_PARALLEL_RPC_SETTINGS environment variable to "((buffer_age_limit Unlimited))"
 94 | then after 10 minutes you get the following error:
 95 | 
 96 | $ RPC_PARALLEL_RPC_SETTINGS="((buffer_age_limit Unlimited))" ./buffer_age_limit.exe
 97 | (monitor.ml.Error
 98 |  ((rpc_error (Connection_closed ("No heartbeats received for 10m.")))
 99 |   (connection_description
100 |    ("Kerberized RPC client" (connected_to (<omitted> 39075))
101 |     (client_principal (User <omitted>)) (server_principal (User <omitted>))))
102 |   (rpc_name rpc_parallel_reverse_piped_0) (rpc_version 0))
103 |  ("Called from Base__Or_error.ok_exn in file \"or_error.ml\", line 118, characters 17-32"))
104 | |}
105 | ;;
106 | 
107 | let command =
108 |   Command.async
109 |     ~summary:"test"
110 |     ~readme
111 |     (let%map_open.Command () = return () in
112 |      fun () -> main ())
113 | ;;
114 | 
115 | let () =
116 |   Rpc_parallel_krb_public.start_app
117 |     ~krb_mode:For_unit_test
118 |     ~rpc_heartbeat_config:
119 |       (Rpc.Connection.Heartbeat_config.create ~timeout:(Time_ns.Span.of_min 10.) ())
120 |     command
121 | ;;
122 | 


--------------------------------------------------------------------------------
/test/env_test.ml:
--------------------------------------------------------------------------------
  1 | open Core
  2 | open Async
  3 | 
  4 | module Pre_worker = struct
  5 |   type 'w functions = { getenv : ('w, string, string option) Rpc_parallel.Function.t }
  6 | 
  7 |   module Worker_state = struct
  8 |     type init_arg = unit [@@deriving bin_io]
  9 |     type t = unit
 10 |   end
 11 | 
 12 |   module Connection_state = struct
 13 |     type init_arg = unit [@@deriving bin_io]
 14 |     type t = unit
 15 |   end
 16 | 
 17 |   module Functions
 18 |       (C : Rpc_parallel.Creator
 19 |            with type worker_state := Worker_state.t
 20 |             and type connection_state := Connection_state.t) =
 21 |   struct
 22 |     let getenv =
 23 |       C.create_rpc
 24 |         ~bin_input:String.bin_t
 25 |         ~bin_output:(Option.bin_t String.bin_t)
 26 |         ~f:(fun ~worker_state:() ~conn_state:() key -> return (Unix.getenv key))
 27 |         ()
 28 |     ;;
 29 | 
 30 |     let functions = { getenv }
 31 |     let init_worker_state () = Deferred.unit
 32 |     let init_connection_state ~connection:_ ~worker_state:_ = return
 33 |   end
 34 | end
 35 | 
 36 | module Worker = Rpc_parallel.Make (Pre_worker)
 37 | 
 38 | let spawn ?env ~host () =
 39 |   let%bind how, cleanup_fn =
 40 |     match host with
 41 |     | None -> return (Rpc_parallel.How_to_run.local, fun () -> return ())
 42 |     | Some host ->
 43 |       let executable_dir = Filename.temp_dir_name in
 44 |       let%map remote_executable =
 45 |         Rpc_parallel.Remote_executable.copy_to_host
 46 |           ~strict_host_key_checking:`No
 47 |           ~executable_dir
 48 |           host
 49 |         >>| ok_exn
 50 |       in
 51 |       ( Rpc_parallel.How_to_run.remote remote_executable
 52 |       , fun () -> Rpc_parallel.Remote_executable.delete remote_executable >>| ok_exn )
 53 |   in
 54 |   let%bind conn =
 55 |     Worker.spawn
 56 |       ?env
 57 |       ~how
 58 |       ~shutdown_on:Connection_closed
 59 |       ~redirect_stdout:`Dev_null
 60 |       ~redirect_stderr:`Dev_null
 61 |       ~on_failure:Error.raise
 62 |       ~connection_state_init_arg:()
 63 |       ()
 64 |   in
 65 |   let%map () = cleanup_fn () in
 66 |   conn
 67 | ;;
 68 | 
 69 | let print_worker_env conn ~key =
 70 |   Worker.Connection.run conn ~f:Worker.functions.getenv ~arg:key
 71 |   >>=? fun result ->
 72 |   printf "WORKER: %s=%s\n" key (Option.value result ~default:"<none>");
 73 |   Deferred.Or_error.ok_unit
 74 | ;;
 75 | 
 76 | let basic_test =
 77 |   Command.async_spec_or_error
 78 |     ~summary:"Using environment variables for great good"
 79 |     Command.Spec.(empty +> flag "host" (optional string) ~doc:"HOST run worker on HOST")
 80 |     (fun host () ->
 81 |       let key = "TEST_ENV_KEY" in
 82 |       let data = "potentially \"problematic\" \\\"test\\\" string ()!" in
 83 |       spawn ~env:[ key, data ] ~host ()
 84 |       >>=? fun conn ->
 85 |       print_worker_env conn ~key
 86 |       >>=? fun () ->
 87 |       print_worker_env conn ~key:"SHOULD_NOT_EXIST"
 88 |       >>=? fun () -> Deferred.Or_error.ok_unit)
 89 |     ~behave_nicely_in_pipeline:false
 90 | ;;
 91 | 
 92 | let special_var =
 93 |   Command.async_spec_or_error
 94 |     ~summary:"Child inherits variables that influence process execution"
 95 |     Command.Spec.(empty +> flag "host" (optional string) ~doc:"HOST run worker on HOST")
 96 |     (fun host () ->
 97 |       let envvar = "OCAMLRUNPARAM" in
 98 |       let envval = "foo=bar" in
 99 |       (Unix.putenv [@ocaml.alert "-unsafe_multidomain"]) ~key:envvar ~data:envval;
100 |       spawn ~host ()
101 |       >>=? fun conn ->
102 |       print_worker_env conn ~key:envvar
103 |       >>=? fun () ->
104 |       spawn ~host ~env:[ envvar, "foo=user-supplied" ] ()
105 |       >>=? fun conn ->
106 |       print_worker_env conn ~key:envvar >>=? fun () -> Deferred.Or_error.ok_unit)
107 |     ~behave_nicely_in_pipeline:false
108 | ;;
109 | 
110 | let () =
111 |   Command.group
112 |     ~summary:"Environment Variable Tests"
113 |     [ "basic-test", basic_test; "special-var", special_var ]
114 |   |> Rpc_parallel_krb_public.start_app ~krb_mode:For_unit_test
115 | ;;
116 | 


--------------------------------------------------------------------------------
/example/number_stats.ml:
--------------------------------------------------------------------------------
  1 | open Core
  2 | open Jane
  3 | open Async
  4 | 
  5 | module Generate_random_map_function =
  6 | Rpc_parallel.Map_reduce.Make_map_function_with_init (struct
  7 |     type state_type = unit
  8 | 
  9 |     module Param = struct
 10 |       type t = unit [@@deriving bin_io]
 11 |     end
 12 | 
 13 |     module Input = struct
 14 |       type t = unit [@@deriving bin_io]
 15 |     end
 16 | 
 17 |     module Output = struct
 18 |       type t = float array [@@deriving bin_io]
 19 |     end
 20 | 
 21 |     let init () ~worker_index:(_ : int) =
 22 |       Random.self_init ();
 23 |       Deferred.unit
 24 |     ;;
 25 | 
 26 |     let map () () =
 27 |       return
 28 |         (Array.init 50000 ~f:(fun _ ->
 29 |            Float.(atan (atan (atan (atan (atan (atan (Random.float 100.)))))))))
 30 |     ;;
 31 |   end)
 32 | 
 33 | module Compute_stats_map_reduce_function =
 34 | Rpc_parallel.Map_reduce.Make_map_reduce_function (struct
 35 |     module Accum = struct
 36 |       type t = immutable Rstats.t [@@deriving bin_io]
 37 |     end
 38 | 
 39 |     module Input = struct
 40 |       type t = float array [@@deriving bin_io]
 41 |     end
 42 | 
 43 |     let map input =
 44 |       return
 45 |         (Array.fold input ~init:(Rstats.empty ()) ~f:(fun acc x -> Rstats.update acc x))
 46 |     ;;
 47 | 
 48 |     let combine acc1 acc2 = return (Rstats.merge acc1 acc2)
 49 |   end)
 50 | 
 51 | let command =
 52 |   Command.async_spec
 53 |     ~summary:"Compute summary statistics in parallel"
 54 |     Command.Spec.(
 55 |       empty
 56 |       +> flag
 57 |            "nblocks"
 58 |            (optional_with_default 10000 int)
 59 |            ~doc:" Blocks to generate (total number of random numbers is 50000 * blocks)"
 60 |       +> flag "nworkers" (optional_with_default 4 int) ~doc:" Number of workers"
 61 |       +> flag "remote-host" (optional string) ~doc:" Remote host name"
 62 |       +> flag "remote-path" (optional string) ~doc:" Path to this exe on the remote host"
 63 |       +> flag
 64 |            "ordered"
 65 |            (optional_with_default true bool)
 66 |            ~doc:" Commutative or noncommutative fold (should not affect the result)")
 67 |     (fun nblocks nworkers remote_host remote_path ordered () ->
 68 |       let config =
 69 |         match remote_host with
 70 |         | Some remote_host ->
 71 |           (match remote_path with
 72 |            | Some remote_path ->
 73 |              Rpc_parallel.Map_reduce.Config.create
 74 |                ~remote:
 75 |                  [ ( Rpc_parallel.Remote_executable.existing_on_host
 76 |                        ~executable_path:remote_path
 77 |                        remote_host
 78 |                    , nworkers )
 79 |                  ]
 80 |                ~redirect_stderr:`Dev_null
 81 |                ~redirect_stdout:`Dev_null
 82 |                ()
 83 |            | _ -> failwith "No remote path specified")
 84 |         | _ ->
 85 |           Rpc_parallel.Map_reduce.Config.create
 86 |             ~local:nworkers
 87 |             ~redirect_stderr:`Dev_null
 88 |             ~redirect_stdout:`Dev_null
 89 |             ()
 90 |       in
 91 |       let%bind blocks =
 92 |         Rpc_parallel.Map_reduce.map_unordered
 93 |           config
 94 |           (Pipe.of_list (List.init nblocks ~f:(Fn.const ())))
 95 |           ~m:(module Generate_random_map_function)
 96 |           ~param:()
 97 |       in
 98 |       match%bind
 99 |         (if ordered
100 |          then Rpc_parallel.Map_reduce.map_reduce
101 |          else Rpc_parallel.Map_reduce.map_reduce_commutative)
102 |           config
103 |           (Pipe.map blocks ~f:(fun (block, _index) -> block))
104 |           ~m:(module Compute_stats_map_reduce_function)
105 |           ~param:()
106 |       with
107 |       | Some stats ->
108 |         printf "Samples: %i\n" (Rstats.samples stats);
109 |         printf "Mean: %f\n" (Rstats.mean stats);
110 |         printf "Variance: %f\n" (Rstats.var stats);
111 |         Deferred.unit
112 |       | None -> Deferred.unit)
113 |     ~behave_nicely_in_pipeline:false
114 | ;;
115 | 
116 | let () = Rpc_parallel_krb_public.start_app ~krb_mode:For_unit_test command
117 | 


--------------------------------------------------------------------------------
/expect_test/master_pid_decoration.ml:
--------------------------------------------------------------------------------
  1 | open! Core
  2 | open Async
  3 | 
  4 | module Pid_and_host = struct
  5 |   type t =
  6 |     { pid : Pid.t
  7 |     ; host : string
  8 |     }
  9 |   [@@deriving bin_io, compare, sexp_of]
 10 | 
 11 |   let get_mine () = { pid = Unix.getpid (); host = Unix.gethostname () }
 12 | 
 13 |   let get_master () =
 14 |     let open Option.Let_syntax in
 15 |     let%bind pid_and_host =
 16 |       Array.find_map (Sys.get_argv ()) ~f:(String.chop_prefix ~prefix:"child-of-")
 17 |     in
 18 |     let%map pid, host = String.lsplit2 pid_and_host ~on:'@' in
 19 |     { pid = Pid.of_string pid; host }
 20 |   ;;
 21 | end
 22 | 
 23 | module Master_and_worker = struct
 24 |   type t =
 25 |     { master : Pid_and_host.t option
 26 |     ; worker : Pid_and_host.t
 27 |     }
 28 |   [@@deriving bin_io, compare, sexp_of]
 29 | 
 30 |   let get () = { master = Pid_and_host.get_master (); worker = Pid_and_host.get_mine () }
 31 | end
 32 | 
 33 | module rec Worker : sig
 34 |   type t
 35 | 
 36 |   val spawn : children_to_spawn:int -> t Deferred.t
 37 |   val get_master_and_worker_chain : t -> Master_and_worker.t list Deferred.t
 38 | end = struct
 39 |   module Impl = struct
 40 |     type 'w functions =
 41 |       { get_master_and_worker_chain :
 42 |           ('w, unit, Master_and_worker.t list) Rpc_parallel.Function.t
 43 |       }
 44 | 
 45 |     module Worker_state = struct
 46 |       type init_arg = { children_to_spawn : int } [@@deriving bin_io]
 47 |       type t = { child : Worker.t option }
 48 |     end
 49 | 
 50 |     module Connection_state = struct
 51 |       type init_arg = unit [@@deriving bin_io]
 52 |       type t = unit
 53 |     end
 54 | 
 55 |     module Functions
 56 |         (C : Rpc_parallel.Creator
 57 |              with type worker_state := Worker_state.t
 58 |               and type connection_state := Connection_state.t) =
 59 |     struct
 60 |       let functions =
 61 |         { get_master_and_worker_chain =
 62 |             C.create_rpc
 63 |               ~bin_input:[%bin_type_class: unit]
 64 |               ~bin_output:[%bin_type_class: Master_and_worker.t list]
 65 |               ~f:(fun ~worker_state:{ child } ~conn_state:() () ->
 66 |                 let%map tl =
 67 |                   match child with
 68 |                   | None -> return []
 69 |                   | Some child -> Worker.get_master_and_worker_chain child
 70 |                 in
 71 |                 Master_and_worker.get () :: tl)
 72 |               ()
 73 |         }
 74 |       ;;
 75 | 
 76 |       let init_worker_state { Worker_state.children_to_spawn } =
 77 |         if children_to_spawn > 0
 78 |         then (
 79 |           let%map child = Worker.spawn ~children_to_spawn:(pred children_to_spawn) in
 80 |           { Worker_state.child = Some child })
 81 |         else return { Worker_state.child = None }
 82 |       ;;
 83 | 
 84 |       let init_connection_state
 85 |         ~connection:(_ : Rpc.Connection.t)
 86 |         ~worker_state:(_ : Worker_state.t)
 87 |         ()
 88 |         =
 89 |         return ()
 90 |       ;;
 91 |     end
 92 |   end
 93 | 
 94 |   module Rpc_parallel_worker = Rpc_parallel.Make (Impl)
 95 | 
 96 |   type t = Rpc_parallel_worker.Connection.t
 97 | 
 98 |   let spawn ~children_to_spawn =
 99 |     Rpc_parallel_worker.spawn_exn
100 |       ~on_failure:Error.raise
101 |       ~shutdown_on:Connection_closed
102 |       ~redirect_stdout:`Dev_null
103 |       ~redirect_stderr:`Dev_null
104 |       ~connection_state_init_arg:()
105 |       { children_to_spawn }
106 |   ;;
107 | 
108 |   let get_master_and_worker_chain t =
109 |     Rpc_parallel_worker.Connection.run_exn
110 |       t
111 |       ~f:Rpc_parallel_worker.functions.get_master_and_worker_chain
112 |       ~arg:()
113 |   ;;
114 | end
115 | 
116 | let () = Rpc_parallel_krb_public.For_testing.initialize [%here]
117 | 
118 | let%expect_test _ =
119 |   let%bind worker = Worker.spawn ~children_to_spawn:10 in
120 |   let%map chain = Worker.get_master_and_worker_chain worker in
121 |   let chain = Master_and_worker.get () :: chain in
122 |   let masters, workers =
123 |     List.unzip (List.map chain ~f:(fun { master; worker } -> master, worker))
124 |   in
125 |   [%test_result: Pid_and_host.t option list]
126 |     (None :: List.map (List.drop_last_exn workers) ~f:Option.some)
127 |     ~expect:masters
128 | ;;
129 | 


--------------------------------------------------------------------------------
/test/qtest.ml:
--------------------------------------------------------------------------------
  1 | open Core
  2 | open Poly
  3 | open Async
  4 | open Qtest_deprecated.Std
  5 | 
  6 | module Add_map_function = Rpc_parallel.Map_reduce.Make_map_function_with_init (struct
  7 |     module Param = Int
  8 |     module Input = Int
  9 |     module Output = Int
 10 | 
 11 |     type state_type = int
 12 | 
 13 |     let init param ~worker_index:(_ : int) = return param
 14 |     let map state x = return (x + state)
 15 |   end)
 16 | 
 17 | module Count_map_reduce_function =
 18 | Rpc_parallel.Map_reduce.Make_map_reduce_function_with_init (struct
 19 |     module Param = Int
 20 |     module Accum = Int
 21 | 
 22 |     module Input = struct
 23 |       type t = int list [@@deriving bin_io]
 24 |     end
 25 | 
 26 |     type state_type = int
 27 | 
 28 |     let init param ~worker_index:(_ : int) = return param
 29 |     let map state l = return (state * List.fold l ~init:0 ~f:( + ))
 30 |     let combine _state x y = return (x + y)
 31 |   end)
 32 | 
 33 | module Concat_map_reduce_function =
 34 | Rpc_parallel.Map_reduce.Make_map_reduce_function (struct
 35 |     module Accum = struct
 36 |       type t = int list [@@deriving bin_io]
 37 |     end
 38 | 
 39 |     module Input = Int
 40 | 
 41 |     let map x = return [ x ]
 42 |     let combine l1 l2 = return (l1 @ l2)
 43 |   end)
 44 | 
 45 | let test_map_unordered () =
 46 |   let n = 1000 in
 47 |   let input = List.init n ~f:Fn.id in
 48 |   let config =
 49 |     Rpc_parallel.Map_reduce.Config.create
 50 |       ~local:5
 51 |       ~redirect_stderr:`Dev_null
 52 |       ~redirect_stdout:`Dev_null
 53 |       ()
 54 |   in
 55 |   let%bind output =
 56 |     Rpc_parallel.Map_reduce.map_unordered
 57 |       config
 58 |       (Pipe.of_list input)
 59 |       ~m:(module Add_map_function)
 60 |       ~param:n
 61 |     >>= Pipe.to_list
 62 |   in
 63 |   let numbers = List.sort (List.map output ~f:fst) ~compare:Int.compare in
 64 |   let expected_numbers = List.init n ~f:(( + ) n) in
 65 |   let indices = List.sort (List.map output ~f:snd) ~compare:Int.compare in
 66 |   let expected_indices = input in
 67 |   assert (List.equal ( = ) indices expected_indices);
 68 |   assert (List.equal ( = ) numbers expected_numbers);
 69 |   Deferred.unit
 70 | ;;
 71 | 
 72 | let test_map () =
 73 |   let n = 1000 in
 74 |   let input = List.init n ~f:Fn.id in
 75 |   let config =
 76 |     Rpc_parallel.Map_reduce.Config.create
 77 |       ~local:5
 78 |       ~redirect_stderr:`Dev_null
 79 |       ~redirect_stdout:`Dev_null
 80 |       ()
 81 |   in
 82 |   let%bind output =
 83 |     Rpc_parallel.Map_reduce.map
 84 |       config
 85 |       (Pipe.of_list input)
 86 |       ~m:(module Add_map_function)
 87 |       ~param:n
 88 |     >>= Pipe.to_list
 89 |   in
 90 |   let expected_output = List.init n ~f:(( + ) n) in
 91 |   assert (List.equal ( = ) output expected_output);
 92 |   Deferred.unit
 93 | ;;
 94 | 
 95 | let test_map_reduce_commutative () =
 96 |   let n = 1000 in
 97 |   let multiplier = 2 in
 98 |   let input = List.init n ~f:(fun m -> List.init m ~f:Fn.id) in
 99 |   let config =
100 |     Rpc_parallel.Map_reduce.Config.create
101 |       ~local:5
102 |       ~redirect_stderr:`Dev_null
103 |       ~redirect_stdout:`Dev_null
104 |       ()
105 |   in
106 |   let%bind sum =
107 |     Rpc_parallel.Map_reduce.map_reduce_commutative
108 |       config
109 |       (Pipe.of_list input)
110 |       ~m:(module Count_map_reduce_function)
111 |       ~param:multiplier
112 |   in
113 |   assert (
114 |     Option.value_exn sum
115 |     = multiplier
116 |       * List.fold ~init:0 ~f:( + ) (List.map input ~f:(List.fold ~init:0 ~f:( + ))));
117 |   Deferred.unit
118 | ;;
119 | 
120 | let test_map_reduce () =
121 |   let n = 1000 in
122 |   let input = List.init n ~f:Fn.id in
123 |   let config =
124 |     Rpc_parallel.Map_reduce.Config.create
125 |       ~local:5
126 |       ~redirect_stderr:`Dev_null
127 |       ~redirect_stdout:`Dev_null
128 |       ()
129 |   in
130 |   let%bind l =
131 |     Rpc_parallel.Map_reduce.map_reduce
132 |       config
133 |       (Pipe.of_list input)
134 |       ~m:(module Concat_map_reduce_function)
135 |       ~param:()
136 |   in
137 |   assert (Option.value_exn l = input);
138 |   Deferred.unit
139 | ;;
140 | 
141 | let tests =
142 |   [ "map_unordered", test_map_unordered
143 |   ; "map", test_map
144 |   ; "map_reduce_commutative", test_map_reduce_commutative
145 |   ; "map_reduce", test_map_reduce
146 |   ]
147 | ;;
148 | 
149 | let () =
150 |   Rpc_parallel_krb_public.start_app
151 |     ~krb_mode:For_unit_test
152 |     (Command.basic_spec Command.Spec.empty ~summary:"Run tests" (fun () ->
153 |        Runner.main ~check_fds:false tests))
154 | ;;
155 | 


--------------------------------------------------------------------------------
/example/rpc_direct_pipe.ml:
--------------------------------------------------------------------------------
  1 | open Core
  2 | open Async
  3 | 
  4 | module Sum_worker = struct
  5 |   module Sum_arg = struct
  6 |     type t =
  7 |       { max : int
  8 |       ; delay : Time_float.Span.t
  9 |       }
 10 |     [@@deriving bin_io]
 11 |   end
 12 | 
 13 |   module T = struct
 14 |     type 'worker functions =
 15 |       { sum : ('worker, Sum_arg.t, string) Rpc_parallel.Function.Direct_pipe.t }
 16 | 
 17 |     module Worker_state = struct
 18 |       type init_arg = unit [@@deriving bin_io]
 19 |       type t = unit
 20 |     end
 21 | 
 22 |     module Connection_state = struct
 23 |       type init_arg = unit [@@deriving bin_io]
 24 |       type t = unit
 25 |     end
 26 | 
 27 |     module Functions
 28 |         (C : Rpc_parallel.Creator
 29 |              with type worker_state := Worker_state.t
 30 |               and type connection_state := Connection_state.t) =
 31 |     struct
 32 |       let sum_impl ~worker_state:() ~conn_state:() { Sum_arg.max; delay } writer =
 33 |         (* We make sure to write to the direct stream writer in a [don't_wait_for] because
 34 |            according to the docs at lib/async_rpc_kernel/src/rpc.mli:
 35 | 
 36 |            Though the implementation function is given a writer immediately, the result of the
 37 |            client's call to [dispatch] will not be determined until after the implementation
 38 |            function returns. Elements written before the function returns will be queued up to
 39 |            be written after the function returns. *)
 40 |         don't_wait_for
 41 |           (let%bind (_ : int) =
 42 |              Deferred.List.fold
 43 |                ~init:0
 44 |                ~f:(fun acc x ->
 45 |                  let acc = acc + x in
 46 |                  let output = sprintf "Sum_worker.sum: %i\n" acc in
 47 |                  let (_ : [ `Closed | `Flushed of unit Deferred.t ]) =
 48 |                    Rpc.Pipe_rpc.Direct_stream_writer.write writer output
 49 |                  in
 50 |                  let%bind () = after delay in
 51 |                  return acc)
 52 |                (List.init max ~f:Fn.id)
 53 |            in
 54 |            Rpc.Pipe_rpc.Direct_stream_writer.close writer;
 55 |            return ());
 56 |         Deferred.unit
 57 |       ;;
 58 | 
 59 |       let sum =
 60 |         C.create_direct_pipe
 61 |           ~f:sum_impl
 62 |           ~bin_input:Sum_arg.bin_t
 63 |           ~bin_output:String.bin_t
 64 |           ()
 65 |       ;;
 66 | 
 67 |       let functions = { sum }
 68 |       let init_worker_state () = return ()
 69 |       let init_connection_state ~connection:_ ~worker_state:_ = return
 70 |     end
 71 |   end
 72 | 
 73 |   include Rpc_parallel.Make (T)
 74 | end
 75 | 
 76 | let main ~max ~delay ~log_dir =
 77 |   let open Deferred.Or_error.Let_syntax in
 78 |   let redirect_stdout, redirect_stderr =
 79 |     match log_dir with
 80 |     | None -> `Dev_null, `Dev_null
 81 |     | Some _ -> `File_append "sum.out", `File_append "sum.err"
 82 |   in
 83 |   let%bind conn =
 84 |     Sum_worker.spawn
 85 |       ~on_failure:Error.raise
 86 |       ?cd:log_dir
 87 |       ~shutdown_on:Connection_closed
 88 |       ~redirect_stdout
 89 |       ~redirect_stderr
 90 |       ~connection_state_init_arg:()
 91 |       ()
 92 |   in
 93 |   let closed_ivar = Ivar.create () in
 94 |   let on_write = function
 95 |     | Rpc.Pipe_rpc.Pipe_message.Closed _ ->
 96 |       Ivar.fill_exn closed_ivar ();
 97 |       Rpc.Pipe_rpc.Pipe_response.Continue
 98 |     | Update s ->
 99 |       (* Make sure to flush the output after each write so we can show we are getting
100 |          responded back in a streaming fashion *)
101 |       Core.printf "%s%!" s;
102 |       Rpc.Pipe_rpc.Pipe_response.Continue
103 |   in
104 |   let%bind (_ : Sum_worker.worker Rpc_parallel.Function.Direct_pipe.Id.t) =
105 |     Sum_worker.Connection.run
106 |       conn
107 |       ~f:Sum_worker.functions.sum
108 |       ~arg:({ max; delay }, on_write)
109 |   in
110 |   Ivar.read closed_ivar |> Deferred.ok
111 | ;;
112 | 
113 | let command =
114 |   Command.async_or_error
115 |     ~summary:"Example using rpc_parallel with a direct pipe"
116 |     (let%map_open.Command max = flag "max" (required int) ~doc:"NUM max number to sum up"
117 |      and delay =
118 |        flag
119 |          "delay"
120 |          (required Time_float_unix.Span.arg_type)
121 |          ~doc:"SPAN delay between writes"
122 |      and log_dir =
123 |        flag "log-dir" (optional string) ~doc:"DIR Folder to write worker logs to"
124 |      in
125 |      fun () -> main ~max ~delay ~log_dir)
126 | ;;
127 | 
128 | let () = Rpc_parallel_krb_public.start_app ~krb_mode:For_unit_test command
129 | 


--------------------------------------------------------------------------------
/doc/main.org:
--------------------------------------------------------------------------------
  1 | #+TITLE: Rpc_parallel
  2 | 
  3 | =Rpc_parallel= is a library that uses processes to achieve
  4 | parallelism. Because of the garbage collector and async locks,
  5 | thread-level parallelism in OCaml is not achievable.
  6 | 
  7 | The library works by spawning processes that start rpc servers. The
  8 | spawned process is running /proc/self/exe (i.e. the same executable as
  9 | the running process). Communication between a "master" and a "worker"
 10 | involves sending rpc queries and receiving rpc responses. The "worker"
 11 | already has the code to do computation because it is running the same
 12 | binary!
 13 | 
 14 | * Getting started
 15 | 
 16 | If you are looking for an entry point into the library, take a look at the
 17 | sum_worker.ml example in lib/rpc_parallel/public/expect_test. This is a test that
 18 | demonstrates how to spawn a worker and run a function on it. A lot of the other
 19 | examples are used to test specific aspects of the library, so they might not be
 20 | as useful as a source of documentation.
 21 | 
 22 | * Backends
 23 | 
 24 | There are two backends for Rpc_parallel: the unauthenticated and kerberized
 25 | backends, available in the rpc_parallel_unauthenticated and rpc_parallel_krb
 26 | libraries respectively. They provide identical functionality, except that the
 27 | unauthenticated library uses unencrypted RPCs and the kerberized library uses
 28 | kerberized RPCs. The kerberized library only allows connections between masters
 29 | and workers that have the same kerberos principal to ensure that your cluster is
 30 | not connected to by other users.
 31 | 
 32 | When using Rpc_parallel, you will mostly use functions and modules from the
 33 | rpc_parallel library. However, to initialize the library, you will need to
 34 | either implement your own Backend module (not recommended unless you have
 35 | special needs) or use the initialization functions in rpc_parallel_krb
 36 | (recommended) or rpc_parallel_unauthenticated (unkerberized, "legacy") which
 37 | handle producing a Backend module for you.
 38 | 
 39 | * Mental Model
 40 | 
 41 | - =Worker.t= identifies a worker rpc server
 42 | - =spawn= (=serve=) starts a worker rpc server in another process (the same
 43 |   process)
 44 | - =client= connects to a worker rpc server
 45 | - =run= dispatches on a connection to a worker rpc server
 46 | 
 47 | * Top-level
 48 | 
 49 | It is highly recommended for =Rpc_parallel.start_app= and =Rpc_parallel.Make=
 50 | calls to be top-level. But the real requirements are:
 51 | 
 52 | 1) The master's state is initialized before any calls to =spawn=. This will be
 53 |    achieved either by =Rpc_parallel.start_app= or
 54 |    =Rpc_parallel.Expert.start_master_server_exn=.
 55 | 
 56 | 2) Spawned workers (runs of your executable with a certain environment variable
 57 |    set) must start running as a worker. This will be achieved either by
 58 |    =Rpc_parallel.start_app= or =Rpc_parallel.Expert.worker_command=.
 59 | 
 60 | 3) Spawned workers must be able to find their function implementations when they
 61 |    start running as a worker. These implementations are gathered on the
 62 |    application of the =Rpc_parallel.Make= functor. So that masters and workers
 63 |    agree on certain generated ids, all of the worker implementations must be
 64 |    defined in all code paths leading up to =start_app=,
 65 |    =start_master_server_exn=, or =worker_command=, and they should be defined in
 66 |    the same order.
 67 | 
 68 | * Monitoring your workers
 69 | 
 70 | Uncaught exceptions in workers will always result in the worker
 71 | calling =Shutdown.shutdown=. The master can be notified of these
 72 | exceptions in multiple ways:
 73 | 
 74 | - If the exception occured in a function implementation =f= before =f= is
 75 |   determined, the exception will be returned back to the caller. E.g. the caller
 76 |   of =spawn= or =run= will get an =Error.t= describing the exception.
 77 | 
 78 | - If the exception occured after =f= is determined, =on_failure exn= will be
 79 |   called (in =Monitor.current ()= at the time of =spawn=) in the spawning
 80 |   process.
 81 | 
 82 | - If =redirect_stderr= specifies a file, the worker will also write its
 83 |   exception to that file before shutting down.
 84 | 
 85 | * Dealing with long async cycles
 86 | 
 87 | Long async cycles can cause the connections to your workers to close.
 88 | If you are using =~shutdown_on:Disconnect= (which is recommended!),
 89 | then this connection closing will result in your worker shutting down.
 90 | 
 91 | You can bump the =max_message_size=, =heartbeat_config=, and
 92 | =handshake_timeout= settings that are used for all rpc communication.
 93 | These settings are determined by (in descending order of preference):
 94 | 
 95 | 1) The environment variable =RPC_PARALLEL_RPC_SETTINGS= (see
 96 |    =Rpc_settings= in =lib/rpc_parallel/src/parallel.ml= for how to
 97 |    construct a value)
 98 | 2) Arguments supplied to =start_app= or =Expert.start_master_server_exn=
 99 | 3) The defaults supplied by the =Rpc= library
100 | 


--------------------------------------------------------------------------------
/example/worker_binprot.ml:
--------------------------------------------------------------------------------
  1 | open Core
  2 | open Async
  3 | 
  4 | (* This demonstrates how to take advantage of the feature in the
  5 |    [Rpc_parallel] library that a [Worker.t] type is defined [with bin_io].
  6 | *)
  7 | 
  8 | module Worker = struct
  9 |   module T = struct
 10 |     (* A [Worker.t] implements two functions: [ping : unit -> int] and
 11 |        [dispatch : Worker.t -> int] *)
 12 |     type 'worker functions = { ping : ('worker, unit, int) Rpc_parallel.Function.t }
 13 | 
 14 |     (* Internal state for each [Worker.t]. Every [Worker.t] has a counter that gets
 15 |        incremented anytime it gets pinged *)
 16 |     module Worker_state = struct
 17 |       type init_arg = unit [@@deriving bin_io]
 18 |       type t = int ref
 19 |     end
 20 | 
 21 |     module Connection_state = struct
 22 |       type init_arg = unit [@@deriving bin_io]
 23 |       type t = unit
 24 |     end
 25 | 
 26 |     module Functions
 27 |         (C : Rpc_parallel.Creator
 28 |              with type worker_state := Worker_state.t
 29 |               and type connection_state := Connection_state.t) =
 30 |     struct
 31 |       (* When a worker gets a [ping ()] call, increment its counter and return the current
 32 |          value *)
 33 |       let ping_impl ~worker_state:counter ~conn_state:() () =
 34 |         incr counter;
 35 |         return !counter
 36 |       ;;
 37 | 
 38 |       let ping = C.create_rpc ~f:ping_impl ~bin_input:Unit.bin_t ~bin_output:Int.bin_t ()
 39 |       let functions = { ping }
 40 |       let init_worker_state () = return (ref 0)
 41 |       let init_connection_state ~connection:_ ~worker_state:_ = return
 42 |     end
 43 |   end
 44 | 
 45 |   include Rpc_parallel.Make (T)
 46 | end
 47 | 
 48 | module Dispatcher = struct
 49 |   module T = struct
 50 |     type 'worker functions =
 51 |       { dispatch : ('worker, Worker.t, int) Rpc_parallel.Function.t }
 52 | 
 53 |     module Worker_state = struct
 54 |       type init_arg = unit [@@deriving bin_io]
 55 |       type t = unit
 56 |     end
 57 | 
 58 |     module Connection_state = struct
 59 |       type init_arg = unit [@@deriving bin_io]
 60 |       type t = unit
 61 |     end
 62 | 
 63 |     module Functions
 64 |         (C : Rpc_parallel.Creator
 65 |              with type worker_state := Worker_state.t
 66 |               and type connection_state := Connection_state.t) =
 67 |     struct
 68 |       (* When a worker gets a [dispatch worker] call, call [ping] on the supplied worker
 69 |          and return the same result. *)
 70 |       let dispatch_impl ~worker_state:_ ~conn_state:() worker =
 71 |         Worker.Connection.with_client worker () ~f:(fun conn ->
 72 |           Worker.Connection.run_exn conn ~f:Worker.functions.ping ~arg:())
 73 |         >>| Or_error.ok_exn
 74 |       ;;
 75 | 
 76 |       let dispatch =
 77 |         C.create_rpc ~f:dispatch_impl ~bin_input:Worker.bin_t ~bin_output:Int.bin_t ()
 78 |       ;;
 79 | 
 80 |       let functions = { dispatch }
 81 |       let init_worker_state () = Deferred.unit
 82 |       let init_connection_state ~connection:_ ~worker_state:_ = return
 83 |     end
 84 |   end
 85 | 
 86 |   include Rpc_parallel.Make (T)
 87 | end
 88 | 
 89 | let command =
 90 |   Command.async_spec_or_error
 91 |     ~summary:
 92 |       "Example of a worker taking in another worker as an argument to one of its \
 93 |        functions"
 94 |     Command.Spec.(empty)
 95 |     (fun () ->
 96 |       Worker.spawn
 97 |         ~shutdown_on:Heartbeater_connection_timeout
 98 |         ~redirect_stdout:`Dev_null
 99 |         ~redirect_stderr:`Dev_null
100 |         ~on_failure:Error.raise
101 |         ()
102 |       >>=? fun worker ->
103 |       Worker.Connection.client worker ()
104 |       >>=? fun worker_conn ->
105 |       Dispatcher.spawn
106 |         ~shutdown_on:Connection_closed
107 |         ~redirect_stdout:`Dev_null
108 |         ~redirect_stderr:`Dev_null
109 |         ~on_failure:Error.raise
110 |         ~connection_state_init_arg:()
111 |         ()
112 |       >>=? fun dispatcher_conn ->
113 |       let repeat job n =
114 |         Deferred.List.iter ~how:`Sequential (List.range 0 n) ~f:(fun _i -> job ())
115 |       in
116 |       let%bind () =
117 |         Deferred.all_unit
118 |           [ repeat
119 |               (fun () ->
120 |                 let%bind count =
121 |                   Dispatcher.Connection.run_exn
122 |                     dispatcher_conn
123 |                     ~f:Dispatcher.functions.dispatch
124 |                     ~arg:worker
125 |                 in
126 |                 Core.Printf.printf "worker pinged from dispatcher: %d\n%!" count;
127 |                 return ())
128 |               10
129 |           ; repeat
130 |               (fun () ->
131 |                 let%bind count =
132 |                   Worker.Connection.run_exn worker_conn ~f:Worker.functions.ping ~arg:()
133 |                 in
134 |                 Core.Printf.printf "worker pinged from master: %d\n%!" count;
135 |                 return ())
136 |               10
137 |           ]
138 |       in
139 |       Deferred.Or_error.ok_unit)
140 |     ~behave_nicely_in_pipeline:false
141 | ;;
142 | 
143 | let () = Rpc_parallel_krb_public.start_app ~krb_mode:For_unit_test command
144 | 


--------------------------------------------------------------------------------
/example/abort_direct_pipe.ml:
--------------------------------------------------------------------------------
  1 | open! Core
  2 | open! Async
  3 | 
  4 | module Worker = struct
  5 |   module T = struct
  6 |     type 'worker functions =
  7 |       { ping : ('worker, int, unit) Rpc_parallel.Function.t
  8 |       ; pongs : ('worker, unit, int) Rpc_parallel.Function.Direct_pipe.t
  9 |       ; pongs_closed : ('worker, unit, unit) Rpc_parallel.Function.t
 10 |       }
 11 | 
 12 |     module Worker_state = struct
 13 |       type t =
 14 |         { ping : (int, read_write) Mvar.t
 15 |         ; pongs_closed : unit Ivar.t
 16 |         }
 17 | 
 18 |       type init_arg = unit [@@deriving bin_io]
 19 |     end
 20 | 
 21 |     module Connection_state = struct
 22 |       type t = unit
 23 |       type init_arg = unit [@@deriving bin_io]
 24 |     end
 25 | 
 26 |     module Functions
 27 |         (C : Rpc_parallel.Creator
 28 |              with type worker_state := Worker_state.t
 29 |               and type connection_state := Connection_state.t) =
 30 |     struct
 31 |       let init_worker_state () =
 32 |         return { Worker_state.ping = Mvar.create (); pongs_closed = Ivar.create () }
 33 |       ;;
 34 | 
 35 |       let init_connection_state
 36 |         ~connection:(_ : Rpc.Connection.t)
 37 |         ~worker_state:(_ : Worker_state.t)
 38 |         ()
 39 |         =
 40 |         return ()
 41 |       ;;
 42 | 
 43 |       let functions =
 44 |         { ping =
 45 |             C.create_rpc
 46 |               ~f:(fun ~worker_state:{ ping; pongs_closed = _ } ~conn_state:() i ->
 47 |                 Mvar.put ping i)
 48 |               ~bin_input:[%bin_type_class: int]
 49 |               ~bin_output:[%bin_type_class: unit]
 50 |               ()
 51 |         ; pongs =
 52 |             C.create_direct_pipe
 53 |               ~f:(fun ~worker_state:{ ping; pongs_closed } ~conn_state:() () writer ->
 54 |                 Deferred.repeat_until_finished () (fun () ->
 55 |                   let module Writer = Rpc.Pipe_rpc.Direct_stream_writer in
 56 |                   match%map
 57 |                     choose
 58 |                       [ choice (Mvar.take ping) (Writer.write_without_pushback writer)
 59 |                       ; choice (Writer.closed writer) (fun () -> `Closed)
 60 |                       ]
 61 |                   with
 62 |                   | `Ok -> `Repeat ()
 63 |                   | `Closed -> `Finished ())
 64 |                 >>> Ivar.fill_exn pongs_closed;
 65 |                 return ())
 66 |               ~bin_input:[%bin_type_class: unit]
 67 |               ~bin_output:[%bin_type_class: int]
 68 |               ()
 69 |         ; pongs_closed =
 70 |             C.create_rpc
 71 |               ~f:(fun ~worker_state:{ ping = _; pongs_closed } ~conn_state:() () ->
 72 |                 Ivar.read pongs_closed)
 73 |               ~bin_input:[%bin_type_class: unit]
 74 |               ~bin_output:[%bin_type_class: unit]
 75 |               ()
 76 |         }
 77 |       ;;
 78 |     end
 79 |   end
 80 | 
 81 |   include T
 82 |   include Rpc_parallel.Make (T)
 83 | end
 84 | 
 85 | let command =
 86 |   Command.async
 87 |     ~summary:"Abort a direct pipe rpc"
 88 |     (let%map_open.Command n =
 89 |        flag_optional_with_default_doc
 90 |          "n"
 91 |          int
 92 |          [%sexp_of: int]
 93 |          ~default:5
 94 |          ~doc:"INT number of pings to send"
 95 |      in
 96 |      fun () ->
 97 |        assert (n > 0);
 98 |        let%bind connection =
 99 |          Worker.spawn_exn
100 |            ~on_failure:Error.raise
101 |            ~shutdown_on:Connection_closed
102 |            ~redirect_stdout:`Dev_null
103 |            ~redirect_stderr:`Dev_null
104 |            ()
105 |            ~connection_state_init_arg:()
106 |        in
107 |        let ping i =
108 |          printf "Ping: %d\n" i;
109 |          Worker.Connection.run_exn connection ~f:Worker.functions.ping ~arg:i
110 |        in
111 |        let the_id = Set_once.create () in
112 |        let closed = Ivar.create () in
113 |        let%bind id =
114 |          Worker.Connection.run_exn
115 |            connection
116 |            ~f:Worker.functions.pongs
117 |            ~arg:
118 |              ( ()
119 |              , fun message ->
120 |                  (match message with
121 |                   | Update i ->
122 |                     printf "Pong: %d\n" i;
123 |                     let i = succ i in
124 |                     if i < n
125 |                     then don't_wait_for (ping i)
126 |                     else Worker.Connection.abort connection ~id:(Set_once.get_exn the_id)
127 |                   | Closed reason -> Ivar.fill_exn closed reason);
128 |                  Continue )
129 |        in
130 |        Set_once.set_exn the_id id;
131 |        let%bind () = ping 0 in
132 |        let%bind reason = Ivar.read closed in
133 |        printf !"Closed: %{sexp: [ `By_remote_side | `Error of Error.t ]}\n" reason;
134 |        let%bind () =
135 |          Worker.Connection.run_exn connection ~f:Worker.functions.pongs_closed ~arg:()
136 |        in
137 |        printf "Worker reports pongs closed\n";
138 |        Worker.Connection.close connection)
139 |     ~behave_nicely_in_pipeline:false
140 | ;;
141 | 
142 | let () = Rpc_parallel_krb_public.start_app ~krb_mode:For_unit_test command
143 | 


--------------------------------------------------------------------------------
/test/timeouts.ml:
--------------------------------------------------------------------------------
  1 | open Core
  2 | open Async
  3 | module Rpc_settings = Rpc_parallel.Rpc_settings
  4 | 
  5 | module Unresponsive_worker = struct
  6 |   module T = struct
  7 |     (* An [Unresponsive_worker.t] implements a single function [wait : int ->
  8 |        unit]. It waits for the specified number of seconds (blocking Async) and then
  9 |        returns (). *)
 10 |     type 'worker functions = { wait : ('worker, int, unit) Rpc_parallel.Function.t }
 11 | 
 12 |     module Worker_state = struct
 13 |       type init_arg = unit [@@deriving bin_io]
 14 |       type t = unit
 15 |     end
 16 | 
 17 |     module Connection_state = struct
 18 |       type init_arg = unit [@@deriving bin_io]
 19 |       type t = unit
 20 |     end
 21 | 
 22 |     module Functions
 23 |         (C : Rpc_parallel.Creator
 24 |              with type worker_state := Worker_state.t
 25 |               and type connection_state := Connection_state.t) =
 26 |     struct
 27 |       let wait_impl ~worker_state:() ~conn_state:() seconds =
 28 |         Core_unix.sleep seconds;
 29 |         return ()
 30 |       ;;
 31 | 
 32 |       let wait = C.create_rpc ~f:wait_impl ~bin_input:Int.bin_t ~bin_output:Unit.bin_t ()
 33 |       let functions = { wait }
 34 |       let init_worker_state () = Deferred.unit
 35 |       let init_connection_state ~connection:_ ~worker_state:_ = return
 36 |     end
 37 |   end
 38 | 
 39 |   include Rpc_parallel.Make (T)
 40 | end
 41 | 
 42 | let timeout_command =
 43 |   Command.async_spec
 44 |     ~summary:"Exercise timeouts in Rpc parallel"
 45 |     Command.Spec.(empty +> flag "sleep-for" (required int) ~doc:"")
 46 |     (fun sleep_for () ->
 47 |       let%bind conn =
 48 |         Unresponsive_worker.spawn_exn
 49 |           ~shutdown_on:Connection_closed
 50 |           ~redirect_stdout:`Dev_null
 51 |           ~redirect_stderr:`Dev_null
 52 |           ~on_failure:(fun e -> Error.raise (Error.tag e ~tag:"spawn_exn"))
 53 |           ~connection_state_init_arg:()
 54 |           ()
 55 |       in
 56 |       match%map
 57 |         Unresponsive_worker.Connection.run
 58 |           conn
 59 |           ~f:Unresponsive_worker.functions.wait
 60 |           ~arg:sleep_for
 61 |       with
 62 |       | Error e -> printf !"%{sexp:Error.t}\n" e
 63 |       | Ok () -> printf "unresponsive worker returned\n")
 64 |     ~behave_nicely_in_pipeline:false
 65 | ;;
 66 | 
 67 | let report_rpc_settings_command =
 68 |   Command.async
 69 |     ~summary:"Test for rpc_settings alignment"
 70 |     (let%map_open.Command () = return () in
 71 |      fun () ->
 72 |        let%bind spawned_worker =
 73 |          Unresponsive_worker.spawn_exn
 74 |            ~shutdown_on:Heartbeater_connection_timeout
 75 |            ~redirect_stdout:`Dev_null
 76 |            ~redirect_stderr:`Dev_null
 77 |            ~on_failure:(fun e -> Error.raise (Error.tag e ~tag:"spawn_exn"))
 78 |            ()
 79 |        in
 80 |        let%bind served_worker = Unresponsive_worker.serve () in
 81 |        let master_rpc_settings =
 82 |          Unresponsive_worker.For_internal_testing.master_app_rpc_settings ()
 83 |        in
 84 |        printf !"master : %{sexp:Rpc_settings.t}\n" master_rpc_settings;
 85 |        let print_worker_rpc_settings ~which worker =
 86 |          let%bind worker_conn = Unresponsive_worker.Connection.client_exn worker () in
 87 |          let worker_client_side_rpc_settings = Unresponsive_worker.rpc_settings worker in
 88 |          let%bind worker_server_side_rpc_settings =
 89 |            Unresponsive_worker.Connection.run_exn
 90 |              worker_conn
 91 |              ~f:Rpc_parallel.Function.For_internal_testing.worker_server_rpc_settings
 92 |              ~arg:()
 93 |          in
 94 |          printf
 95 |            !"%s worker (client side) : %{sexp:Rpc_settings.t}\n"
 96 |            which
 97 |            worker_client_side_rpc_settings;
 98 |          printf
 99 |            !"%s worker (server side) : %{sexp:Rpc_settings.t}\n"
100 |            which
101 |            worker_server_side_rpc_settings;
102 |          return ()
103 |        in
104 |        let%bind () = print_worker_rpc_settings ~which:"spawned" spawned_worker in
105 |        let%bind () = print_worker_rpc_settings ~which:"served" served_worker in
106 |        return ())
107 |     ~behave_nicely_in_pipeline:false
108 | ;;
109 | 
110 | let app_rpc_settings =
111 |   Rpc_settings.For_internal_testing.create_with_env_override
112 |     ~env_var:"APP_RPC_SETTINGS_FOR_TEST"
113 |     ~max_message_size:None
114 |     ~buffer_age_limit:None
115 |     ~handshake_timeout:None
116 |     ~heartbeat_config:None
117 | ;;
118 | 
119 | let () =
120 |   let { Rpc_settings.max_message_size = rpc_max_message_size
121 |       ; buffer_age_limit = rpc_buffer_age_limit
122 |       ; handshake_timeout = rpc_handshake_timeout
123 |       ; heartbeat_config = rpc_heartbeat_config
124 |       }
125 |     =
126 |     app_rpc_settings
127 |   in
128 |   Command.group
129 |     ~summary:"timeout and rpc-heartbeat testing commands"
130 |     [ "timeout", timeout_command; "rpc-settings", report_rpc_settings_command ]
131 |   |> Rpc_parallel_krb_public.start_app
132 |        ?rpc_max_message_size
133 |        ?rpc_buffer_age_limit
134 |        ?rpc_handshake_timeout
135 |        ?rpc_heartbeat_config
136 |        ~krb_mode:For_unit_test
137 | ;;
138 | 


--------------------------------------------------------------------------------
/test/copy_executable.ml:
--------------------------------------------------------------------------------
  1 | open Core
  2 | open Poly
  3 | open Async
  4 | 
  5 | (* Tests for the [Remote_executable] module *)
  6 | 
  7 | module Worker = struct
  8 |   module T = struct
  9 |     type 'worker functions = { ping : ('worker, unit, unit) Rpc_parallel.Function.t }
 10 | 
 11 |     module Worker_state = struct
 12 |       type init_arg = unit [@@deriving bin_io]
 13 |       type t = unit
 14 |     end
 15 | 
 16 |     module Connection_state = struct
 17 |       type init_arg = unit [@@deriving bin_io]
 18 |       type t = unit
 19 |     end
 20 | 
 21 |     module Functions
 22 |         (C : Rpc_parallel.Creator
 23 |              with type worker_state := Worker_state.t
 24 |               and type connection_state := Connection_state.t) =
 25 |     struct
 26 |       let ping =
 27 |         C.create_rpc
 28 |           ~f:(fun ~worker_state:() ~conn_state:() -> return)
 29 |           ~bin_input:Unit.bin_t
 30 |           ~bin_output:Unit.bin_t
 31 |           ()
 32 |       ;;
 33 | 
 34 |       let functions = { ping }
 35 |       let init_worker_state () = Deferred.unit
 36 |       let init_connection_state ~connection:_ ~worker_state:_ = return
 37 |     end
 38 |   end
 39 | 
 40 |   include Rpc_parallel.Make (T)
 41 | end
 42 | 
 43 | (* Copy an executable to a remote host *)
 44 | let copy_to_host_test worker dir =
 45 |   Rpc_parallel.Remote_executable.copy_to_host ~executable_dir:dir worker
 46 |   >>=? fun executable ->
 47 |   Worker.spawn
 48 |     ~how:(Rpc_parallel.How_to_run.remote executable)
 49 |     ~shutdown_on:Connection_closed
 50 |     ~redirect_stdout:`Dev_null
 51 |     ~redirect_stderr:`Dev_null
 52 |     ~on_failure:Error.raise
 53 |     ~connection_state_init_arg:()
 54 |     ()
 55 |   >>=? fun conn ->
 56 |   Worker.Connection.run conn ~f:Worker.functions.ping ~arg:() >>|? fun () -> executable
 57 | ;;
 58 | 
 59 | (* Spawn a worker using an existing remote executable *)
 60 | let existing_on_host_test worker path =
 61 |   let existing_executable =
 62 |     Rpc_parallel.Remote_executable.existing_on_host ~executable_path:path worker
 63 |   in
 64 |   Worker.spawn
 65 |     ~how:(Rpc_parallel.How_to_run.remote existing_executable)
 66 |     ~shutdown_on:Connection_closed
 67 |     ~redirect_stdout:`Dev_null
 68 |     ~redirect_stderr:`Dev_null
 69 |     ~on_failure:Error.raise
 70 |     ~connection_state_init_arg:()
 71 |     ()
 72 |   >>=? fun conn -> Worker.Connection.run conn ~f:Worker.functions.ping ~arg:()
 73 | ;;
 74 | 
 75 | (* Make sure the library appropriately fails when trying to spawn a worker from a
 76 |    mismatching executable *)
 77 | let mismatching_executable_test worker dir =
 78 |   Rpc_parallel.Remote_executable.copy_to_host ~executable_dir:dir worker
 79 |   >>=? fun executable ->
 80 |   let path = Rpc_parallel.Remote_executable.path executable in
 81 |   (* When building with shared-cache, build artifacts in general and executables in
 82 |      particular are read-only. [cp] and [scp] preserve such permissions, which would
 83 |      cause the ">>" below to fail if we didn't chmod. *)
 84 |   Process.run
 85 |     ~prog:"ssh"
 86 |     ~args:
 87 |       [ "-o"
 88 |       ; "StrictHostKeyChecking=no"
 89 |       ; worker
 90 |       ; sprintf "chmod +w %s; echo 0 >> %s" path path
 91 |       ]
 92 |     ()
 93 |   >>=? fun _ ->
 94 |   match%bind
 95 |     Worker.spawn
 96 |       ~how:(Rpc_parallel.How_to_run.remote executable)
 97 |       ~shutdown_on:Heartbeater_connection_timeout
 98 |       ~redirect_stdout:`Dev_null
 99 |       ~redirect_stderr:`Dev_null
100 |       ~on_failure:Error.raise
101 |       ()
102 |   with
103 |   | Error e ->
104 |     let error = Error.to_string_hum e in
105 |     let expected =
106 |       sprintf
107 |         "The remote executable %s:%s does not match the local executable"
108 |         worker
109 |         path
110 |     in
111 |     assert (error = expected);
112 |     Rpc_parallel.Remote_executable.delete executable
113 |   | Ok (_ : Worker.worker) -> assert false
114 | ;;
115 | 
116 | let delete_test executable = Rpc_parallel.Remote_executable.delete executable
117 | 
118 | let command =
119 |   Command.async_spec_or_error
120 |     ~summary:"Simple use of Async Rpc_parallel V2"
121 |     Command.Spec.(
122 |       empty
123 |       +> flag "-worker" (required string) ~doc:"worker to run copy test on"
124 |       +> flag "-dir" (required string) ~doc:"directory to copy executable to")
125 |     (fun worker dir () ->
126 |       let get_count path =
127 |         let%map res_or_err =
128 |           Process.run
129 |             ~prog:"ssh"
130 |             ~args:
131 |               [ "-o"
132 |               ; "StrictHostKeyChecking=no"
133 |               ; worker
134 |               ; sprintf "find %s* | wc -l" path
135 |               ]
136 |             ()
137 |         in
138 |         Or_error.ok_exn res_or_err |> String.strip |> Int.of_string
139 |       in
140 |       let%bind our_binary =
141 |         Unix.readlink (sprintf "/proc/%d/exe" (Pid.to_int (Unix.getpid ())))
142 |       in
143 |       let filename = Filename.basename our_binary in
144 |       let%bind old_count = get_count (dir ^/ filename) in
145 |       copy_to_host_test worker dir
146 |       >>=? fun executable ->
147 |       existing_on_host_test worker (Rpc_parallel.Remote_executable.path executable)
148 |       >>=? fun () ->
149 |       mismatching_executable_test worker dir
150 |       >>=? fun () ->
151 |       delete_test executable
152 |       >>=? fun () ->
153 |       let%map new_count = get_count (dir ^/ filename) in
154 |       assert (old_count = new_count);
155 |       Ok (printf "Ok\n"))
156 |     ~behave_nicely_in_pipeline:false
157 | ;;
158 | 
159 | let () = Rpc_parallel_krb_public.start_app ~krb_mode:For_unit_test command
160 | 


--------------------------------------------------------------------------------
/example/workers_as_masters.ml:
--------------------------------------------------------------------------------
  1 | open Core
  2 | open Async
  3 | 
  4 | (* An example demonstrating how workers can themselves act as masters and spawn more
  5 |    workers. We have two layers of workers, where the first layer spawns the workers of the
  6 |    second layer. *)
  7 | 
  8 | module Secondary_worker = struct
  9 |   module T = struct
 10 |     type 'worker functions = { ping : ('worker, unit, string) Rpc_parallel.Function.t }
 11 | 
 12 |     module Worker_state = struct
 13 |       type init_arg = unit [@@deriving bin_io]
 14 |       type t = unit
 15 |     end
 16 | 
 17 |     module Connection_state = struct
 18 |       type init_arg = unit [@@deriving bin_io]
 19 |       type t = unit
 20 |     end
 21 | 
 22 |     module Functions
 23 |         (C : Rpc_parallel.Creator
 24 |              with type worker_state := Worker_state.t
 25 |               and type connection_state := Connection_state.t) =
 26 |     struct
 27 |       let ping_impl ~worker_state:() ~conn_state:() () = return "pong"
 28 | 
 29 |       let ping =
 30 |         C.create_rpc ~f:ping_impl ~bin_input:Unit.bin_t ~bin_output:String.bin_t ()
 31 |       ;;
 32 | 
 33 |       let functions = { ping }
 34 |       let init_worker_state () = Deferred.unit
 35 |       let init_connection_state ~connection:_ ~worker_state:_ = return
 36 |     end
 37 |   end
 38 | 
 39 |   include Rpc_parallel.Make (T)
 40 | end
 41 | 
 42 | module Primary_worker = struct
 43 |   module T = struct
 44 |     type ping_result = string list [@@deriving bin_io]
 45 | 
 46 |     type 'worker functions =
 47 |       { run : ('worker, int, unit) Rpc_parallel.Function.t
 48 |       ; ping : ('worker, unit, ping_result) Rpc_parallel.Function.t
 49 |       }
 50 | 
 51 |     let workers = Bag.create ()
 52 |     let next_worker_name () = sprintf "Secondary worker #%i" (Bag.length workers)
 53 | 
 54 |     module Worker_state = struct
 55 |       type init_arg = unit [@@deriving bin_io]
 56 |       type t = unit
 57 |     end
 58 | 
 59 |     module Connection_state = struct
 60 |       type init_arg = unit [@@deriving bin_io]
 61 |       type t = unit
 62 |     end
 63 | 
 64 |     module Functions
 65 |         (C : Rpc_parallel.Creator
 66 |              with type worker_state := Worker_state.t
 67 |               and type connection_state := Connection_state.t) =
 68 |     struct
 69 |       let run_impl ~worker_state:() ~conn_state:() num_workers =
 70 |         Deferred.List.init ~how:`Parallel num_workers ~f:(fun _i ->
 71 |           let%map secondary_worker =
 72 |             Secondary_worker.spawn_exn
 73 |               ~shutdown_on:Heartbeater_connection_timeout
 74 |               ~redirect_stdout:`Dev_null
 75 |               ~redirect_stderr:`Dev_null
 76 |               ~on_failure:Error.raise
 77 |               ()
 78 |           in
 79 |           ignore
 80 |             (Bag.add workers (next_worker_name (), secondary_worker)
 81 |              : (string * Secondary_worker.worker) Bag.Elt.t))
 82 |         >>| ignore
 83 |       ;;
 84 | 
 85 |       let run = C.create_rpc ~f:run_impl ~bin_input:Int.bin_t ~bin_output:Unit.bin_t ()
 86 | 
 87 |       let ping_impl ~worker_state:() ~conn_state:() () =
 88 |         Deferred.List.map ~how:`Parallel (Bag.to_list workers) ~f:(fun (name, worker) ->
 89 |           match%bind Secondary_worker.Connection.client worker () with
 90 |           | Error e -> failwiths "failed connecting to worker" e [%sexp_of: Error.t]
 91 |           | Ok conn ->
 92 |             (match%map
 93 |                Secondary_worker.Connection.run
 94 |                  conn
 95 |                  ~arg:()
 96 |                  ~f:Secondary_worker.functions.ping
 97 |              with
 98 |              | Error e -> sprintf "%s: failed (%s)" name (Error.to_string_hum e)
 99 |              | Ok s -> sprintf "%s: %s" name s))
100 |       ;;
101 | 
102 |       let ping =
103 |         C.create_rpc ~f:ping_impl ~bin_input:Unit.bin_t ~bin_output:bin_ping_result ()
104 |       ;;
105 | 
106 |       let functions = { run; ping }
107 | 
108 |       let init_worker_state () =
109 |         Bag.clear workers;
110 |         Deferred.unit
111 |       ;;
112 | 
113 |       let init_connection_state ~connection:_ ~worker_state:_ = return
114 |     end
115 |   end
116 | 
117 |   include Rpc_parallel.Make (T)
118 | end
119 | 
120 | let command =
121 |   (* Make sure to always use [Command.async] *)
122 |   Command.async_spec_or_error
123 |     ~summary:"Simple use of Async Rpc_parallel V2"
124 |     Command.Spec.(
125 |       empty
126 |       +> flag "primary" (required int) ~doc:" Number of primary workers to spawn"
127 |       +> flag
128 |            "secondary"
129 |            (required int)
130 |            ~doc:" Number of secondary workers each primary worker should spawn")
131 |     (fun primary secondary () ->
132 |       Deferred.Or_error.List.init ~how:`Parallel primary ~f:(fun worker_id ->
133 |         Primary_worker.spawn
134 |           ~shutdown_on:Connection_closed
135 |           ~redirect_stdout:`Dev_null
136 |           ~redirect_stderr:`Dev_null
137 |           ~on_failure:Error.raise
138 |           ~connection_state_init_arg:()
139 |           ()
140 |         >>=? fun conn ->
141 |         Primary_worker.Connection.run conn ~f:Primary_worker.functions.run ~arg:secondary
142 |         >>=? fun () ->
143 |         Primary_worker.Connection.run conn ~f:Primary_worker.functions.ping ~arg:()
144 |         >>|? fun ping_results ->
145 |         List.map ping_results ~f:(fun s -> sprintf "Primary worker #%i: %s" worker_id s))
146 |       >>|? fun l -> List.iter (List.join l) ~f:(printf "%s\n%!"))
147 |     ~behave_nicely_in_pipeline:false
148 | ;;
149 | 
150 | (* This call to [Rpc_parallel.start_app] must be top level *)
151 | let () = Rpc_parallel_krb_public.start_app ~krb_mode:For_unit_test command
152 | 


--------------------------------------------------------------------------------
/src/managed.ml:
--------------------------------------------------------------------------------
  1 | open Core
  2 | open Async
  3 | 
  4 | module type Worker = sig
  5 |   type t [@@deriving sexp_of]
  6 |   type unmanaged_t
  7 |   type 'a functions
  8 | 
  9 |   val functions : unmanaged_t functions
 10 | 
 11 |   type worker_state_init_arg
 12 |   type connection_state_init_arg
 13 | 
 14 |   module Id : Identifiable
 15 | 
 16 |   val id : t -> Id.t
 17 | 
 18 |   val spawn
 19 |     :  ?how:How_to_run.t
 20 |     -> ?name:string
 21 |     -> ?env:(string * string) list
 22 |     -> ?connection_timeout:Time_float.Span.t
 23 |     -> ?cd:string
 24 |     -> ?umask:int
 25 |     -> redirect_stdout:Fd_redirection.t
 26 |     -> redirect_stderr:Fd_redirection.t
 27 |     -> worker_state_init_arg
 28 |     -> connection_state_init_arg
 29 |     -> on_failure:(Error.t -> unit)
 30 |     -> on_connection_to_worker_closed:(Error.t -> unit)
 31 |     -> t Or_error.t Deferred.t
 32 | 
 33 |   val spawn_exn
 34 |     :  ?how:How_to_run.t
 35 |     -> ?name:string
 36 |     -> ?env:(string * string) list
 37 |     -> ?connection_timeout:Time_float.Span.t
 38 |     -> ?cd:string
 39 |     -> ?umask:int
 40 |     -> redirect_stdout:Fd_redirection.t
 41 |     -> redirect_stderr:Fd_redirection.t
 42 |     -> worker_state_init_arg
 43 |     -> connection_state_init_arg
 44 |     -> on_failure:(Error.t -> unit)
 45 |     -> on_connection_to_worker_closed:(Error.t -> unit)
 46 |     -> t Deferred.t
 47 | 
 48 |   val run
 49 |     :  t
 50 |     -> f:(unmanaged_t, 'query, 'response) Parallel.Function.t
 51 |     -> arg:'query
 52 |     -> 'response Or_error.t Deferred.t
 53 | 
 54 |   val run_exn
 55 |     :  t
 56 |     -> f:(unmanaged_t, 'query, 'response) Parallel.Function.t
 57 |     -> arg:'query
 58 |     -> 'response Deferred.t
 59 | 
 60 |   val kill : t -> unit Or_error.t Deferred.t
 61 |   val kill_exn : t -> unit Deferred.t
 62 | end
 63 | 
 64 | module Make (S : Parallel.Worker_spec) = struct
 65 |   module Unmanaged = Parallel.Make (S)
 66 |   module Id = Utils.Worker_id
 67 | 
 68 |   type nonrec t =
 69 |     { unmanaged : Unmanaged.t
 70 |     ; connection_state_init_arg : S.Connection_state.init_arg
 71 |     ; id : Id.t
 72 |     }
 73 | 
 74 |   type unmanaged_t = Unmanaged.t
 75 | 
 76 |   type conn =
 77 |     [ `Pending of Unmanaged.Connection.t Or_error.t Ivar.t
 78 |     | `Connected of Unmanaged.Connection.t
 79 |     ]
 80 | 
 81 |   let sexp_of_t t = [%sexp_of: Unmanaged.t] t.unmanaged
 82 |   let id t = t.id
 83 |   let functions = Unmanaged.functions
 84 |   let workers : conn Id.Table.t = Id.Table.create ()
 85 | 
 86 |   let get_connection { unmanaged = t; connection_state_init_arg; id } =
 87 |     match Hashtbl.find workers id with
 88 |     | Some (`Pending ivar) -> Ivar.read ivar
 89 |     | Some (`Connected conn) -> Deferred.Or_error.return conn
 90 |     | None ->
 91 |       let ivar = Ivar.create () in
 92 |       Hashtbl.add_exn workers ~key:id ~data:(`Pending ivar);
 93 |       (match%map Unmanaged.Connection.client t connection_state_init_arg with
 94 |        | Error e ->
 95 |          Ivar.fill_exn ivar (Error e);
 96 |          Hashtbl.remove workers id;
 97 |          Error e
 98 |        | Ok conn ->
 99 |          Ivar.fill_exn ivar (Ok conn);
100 |          Hashtbl.set workers ~key:id ~data:(`Connected conn);
101 |          (Unmanaged.Connection.close_finished conn >>> fun () -> Hashtbl.remove workers id);
102 |          Ok conn)
103 |   ;;
104 | 
105 |   let with_shutdown_on_error worker ~f =
106 |     match%bind f () with
107 |     | Ok _ as ret -> return ret
108 |     | Error _ as ret ->
109 |       let%bind (_ : unit Or_error.t) = Unmanaged.shutdown worker in
110 |       return ret
111 |   ;;
112 | 
113 |   let spawn
114 |     ?how
115 |     ?name
116 |     ?env
117 |     ?connection_timeout
118 |     ?cd
119 |     ?umask
120 |     ~redirect_stdout
121 |     ~redirect_stderr
122 |     worker_state_init_arg
123 |     connection_state_init_arg
124 |     ~on_failure
125 |     ~on_connection_to_worker_closed
126 |     =
127 |     Unmanaged.spawn
128 |       ?how
129 |       ?env
130 |       ?name
131 |       ?connection_timeout
132 |       ?cd
133 |       ?umask
134 |       ~shutdown_on:Heartbeater_connection_timeout
135 |       ~redirect_stdout
136 |       ~redirect_stderr
137 |       worker_state_init_arg
138 |       ~on_failure
139 |     >>=? fun worker ->
140 |     with_shutdown_on_error worker ~f:(fun () ->
141 |       Unmanaged.Connection.client worker connection_state_init_arg)
142 |     >>|? fun connection ->
143 |     let id = Id.create () in
144 |     Hashtbl.add_exn workers ~key:id ~data:(`Connected connection);
145 |     (Unmanaged.Connection.close_finished connection
146 |      >>> fun () ->
147 |      match Hashtbl.find workers id with
148 |      | None ->
149 |        (* [kill] was called, don't report closed connection *)
150 |        ()
151 |      | Some _ ->
152 |        Hashtbl.remove workers id;
153 |        let error = Error.createf !"Lost connection with worker" in
154 |        on_connection_to_worker_closed error);
155 |     { unmanaged = worker; connection_state_init_arg; id }
156 |   ;;
157 | 
158 |   let spawn_exn
159 |     ?how
160 |     ?name
161 |     ?env
162 |     ?connection_timeout
163 |     ?cd
164 |     ?umask
165 |     ~redirect_stdout
166 |     ~redirect_stderr
167 |     worker_state_init_arg
168 |     connection_init_arg
169 |     ~on_failure
170 |     ~on_connection_to_worker_closed
171 |     =
172 |     spawn
173 |       ?how
174 |       ?name
175 |       ?env
176 |       ?connection_timeout
177 |       ?cd
178 |       ?umask
179 |       ~redirect_stdout
180 |       ~redirect_stderr
181 |       worker_state_init_arg
182 |       connection_init_arg
183 |       ~on_failure
184 |       ~on_connection_to_worker_closed
185 |     >>| Or_error.ok_exn
186 |   ;;
187 | 
188 |   let kill t =
189 |     Hashtbl.remove workers t.id;
190 |     Unmanaged.shutdown t.unmanaged
191 |   ;;
192 | 
193 |   let kill_exn t = kill t >>| Or_error.ok_exn
194 | 
195 |   let run t ~f ~arg =
196 |     get_connection t >>=? fun conn -> Unmanaged.Connection.run conn ~f ~arg
197 |   ;;
198 | 
199 |   let run_exn t ~f ~arg = run t ~f ~arg >>| Or_error.ok_exn
200 | end
201 | 


--------------------------------------------------------------------------------
/test/ssh_test_server.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | # NOTE: This file was written by AI.
  3 | 
  4 | set -uo pipefail
  5 | 
  6 | TEMP_DIR=""
  7 | SSH_PORT_FILE=""
  8 | SSHD_PID_FILE=""
  9 | 
 10 | find_free_port() {
 11 |   python3 -c "
 12 | import socket
 13 | s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
 14 | s.bind(('127.0.0.1', 0))
 15 | port = s.getsockname()[1]
 16 | s.close()
 17 | print(port)
 18 | "
 19 | }
 20 | 
 21 | cleanup() {
 22 |   if [[ -f "$SSHD_PID_FILE" ]]; then
 23 |     local pid
 24 |     pid=$(cat "$SSHD_PID_FILE" 2> /dev/null || true)
 25 |     if [[ -n "$pid" ]] && kill -0 "$pid" 2> /dev/null; then
 26 |       kill "$pid" 2> /dev/null || true
 27 |       local count=0
 28 |       while kill -0 "$pid" 2> /dev/null && [[ $count -lt 10 ]]; do
 29 |         sleep 0.1
 30 |         count=$((count + 1))
 31 |       done
 32 |     fi
 33 |   fi
 34 | 
 35 |   if [[ -n "$TEMP_DIR" ]] && [[ -d "$TEMP_DIR" ]]; then
 36 |     rm -rf "$TEMP_DIR"
 37 |   fi
 38 | }
 39 | 
 40 | start_server() {
 41 |   TEMP_DIR=$(mktemp -d -t ssh_test_server.XXXXXX)
 42 |   SSH_PORT_FILE="$TEMP_DIR/port"
 43 |   SSHD_PID_FILE="$TEMP_DIR/sshd.pid"
 44 | 
 45 |   ssh-keygen -t rsa -f "$TEMP_DIR/ssh_host_rsa_key" -N "" -q
 46 |   ssh-keygen -t ed25519 -f "$TEMP_DIR/ssh_host_ed25519_key" -N "" -q
 47 | 
 48 |   ssh-keygen -t rsa -f "$TEMP_DIR/id_rsa" -N "" -q
 49 | 
 50 |   mkdir -p "$TEMP_DIR/.ssh"
 51 |   cp "$TEMP_DIR/id_rsa.pub" "$TEMP_DIR/.ssh/authorized_keys"
 52 |   chmod 700 "$TEMP_DIR/.ssh"
 53 |   chmod 600 "$TEMP_DIR/.ssh/authorized_keys"
 54 | 
 55 |   local port
 56 |   port=$(find_free_port)
 57 |   echo "$port" > "$SSH_PORT_FILE"
 58 | 
 59 |   cat > "$TEMP_DIR/sshd_config" << EOF
 60 | Port $port
 61 | ListenAddress 0.0.0.0
 62 | ListenAddress ::
 63 | HostKey $TEMP_DIR/ssh_host_rsa_key
 64 | HostKey $TEMP_DIR/ssh_host_ed25519_key
 65 | PidFile $SSHD_PID_FILE
 66 | AuthorizedKeysFile $TEMP_DIR/.ssh/authorized_keys
 67 | PasswordAuthentication no
 68 | PubkeyAuthentication yes
 69 | ChallengeResponseAuthentication no
 70 | UsePAM no
 71 | PermitRootLogin no
 72 | StrictModes no
 73 | Subsystem sftp /usr/lib/openssh/sftp-server
 74 | AcceptEnv LANG LC_*
 75 | AcceptEnv RPC_PARALLEL_*
 76 | AcceptEnv OCAMLRUNPARAM
 77 | AcceptEnv TEST_*
 78 | AcceptEnv APP_*
 79 | AcceptEnv SHOULD_*
 80 | AcceptEnv SSH_TEST_SERVER_RUNNING
 81 | EOF
 82 | 
 83 |   /usr/sbin/sshd -f "$TEMP_DIR/sshd_config" -D > "$TEMP_DIR/sshd.log" 2>&1 &
 84 |   local sshd_pid=$!
 85 |   echo "$sshd_pid" > "$SSHD_PID_FILE"
 86 | 
 87 |   local count=0
 88 |   while ! nc -z 127.0.0.1 "$port" 2> /dev/null && [[ $count -lt 50 ]]; do
 89 |     sleep 0.1
 90 |     count=$((count + 1))
 91 |   done
 92 | 
 93 |   if ! nc -z 127.0.0.1 "$port" 2> /dev/null; then
 94 |     echo "Error: SSH server failed to start on port $port" >&2
 95 |     cleanup
 96 |     exit 1
 97 |   fi
 98 | }
 99 | 
100 | stop_server() {
101 |   cleanup
102 | }
103 | 
104 | with_server() {
105 |   start_server
106 | 
107 |   local port
108 |   port=$(cat "$SSH_PORT_FILE")
109 | 
110 |   export SSH_TEST_SERVER_RUNNING=1
111 |   export SSH_AUTH_SOCK=""
112 | 
113 |   cat > "$TEMP_DIR/ssh" << EOF
114 | #!/bin/bash
115 | # SSH wrapper that ensures we only connect to localhost or current host during tests
116 | port="$port"
117 | identity="$TEMP_DIR/id_rsa"
118 | current_hostname="\$(hostname)"
119 | 
120 | found_host=false
121 | hostname=""
122 | new_args=()
123 | skip_next=false
124 | 
125 | for arg in "\$@"; do
126 |   if [[ "\$skip_next" == "true" ]]; then
127 |     new_args+=("\$arg")
128 |     skip_next=false
129 |     continue
130 |   fi
131 | 
132 |   case "\$arg" in
133 |     -*)
134 |       new_args+=("\$arg")
135 |       if [[ "\$arg" =~ ^-(o|p|l|i|F|c|D|e|E|I|J|L|m|O|Q|R|S|W|w)$ ]]; then
136 |         skip_next=true
137 |       fi
138 |       ;;
139 |     *@*)
140 |       hostname="\${arg#*@}"
141 |       found_host=true
142 |       new_args+=("\$arg")
143 |       ;;
144 |     *)
145 |       if [[ "\$found_host" == "false" ]] && [[ ! "\$arg" =~ ^- ]]; then
146 |         hostname="\$arg"
147 |         found_host=true
148 |       fi
149 |       new_args+=("\$arg")
150 |       ;;
151 |   esac
152 | done
153 | 
154 | if [[ "\$found_host" == "true" ]] && [[ "\$hostname" != "localhost" ]] && [[ "\$hostname" != "127.0.0.1" ]] && [[ "\$hostname" != "\$current_hostname" ]]; then
155 |   echo "ERROR: SSH test wrapper called with unexpected host: \$hostname" >&2
156 |   echo "Tests must only connect to localhost or the current host (\$current_hostname) when using the test SSH server" >&2
157 |   exit 1
158 | fi
159 | 
160 | extra_args=(-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o IdentityFile="\$identity" -o IdentitiesOnly=yes -o LogLevel=ERROR -p "\$port")
161 | 
162 | exec /usr/bin/ssh "\${extra_args[@]}" "\${new_args[@]}"
163 | EOF
164 |   chmod +x "$TEMP_DIR/ssh"
165 | 
166 |   cat > "$TEMP_DIR/scp" << EOF
167 | #!/bin/bash
168 | port="$port"
169 | identity="$TEMP_DIR/id_rsa"
170 | current_hostname="\$(hostname)"
171 | 
172 | for arg in "\$@"; do
173 |   case "\$arg" in
174 |     *:*)
175 |       if [[ "\$arg" =~ ^([^@:]+@)?([^:]+): ]]; then
176 |         hostname="\${BASH_REMATCH[2]}"
177 |         if [[ "\$hostname" != "localhost" ]] && [[ "\$hostname" != "127.0.0.1" ]] && [[ "\$hostname" != "\$current_hostname" ]]; then
178 |           echo "ERROR: SCP test wrapper called with unexpected host: \$hostname" >&2
179 |           echo "Tests must only connect to localhost or the current host (\$current_hostname) when using the test SSH server" >&2
180 |           exit 1
181 |         fi
182 |       fi
183 |       ;;
184 |   esac
185 | done
186 | 
187 | extra_args=(-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o IdentityFile="\$identity" -o IdentitiesOnly=yes -o LogLevel=ERROR -P "\$port")
188 | 
189 | exec /usr/bin/scp "\${extra_args[@]}" "\$@"
190 | EOF
191 |   chmod +x "$TEMP_DIR/scp"
192 | 
193 |   export PATH="$TEMP_DIR:$PATH"
194 | 
195 |   "${@:2}"
196 |   local exit_code=$?
197 | 
198 |   stop_server
199 | 
200 |   return $exit_code
201 | }
202 | 
203 | case "${1:-}" in
204 |   start)
205 |     start_server
206 |     ;;
207 |   stop)
208 |     stop_server
209 |     ;;
210 |   with)
211 |     with_server "$@"
212 |     ;;
213 |   *)
214 |     echo "Usage: $0 {start|stop|with <command>}" >&2
215 |     exit 1
216 |     ;;
217 | esac
218 | 


--------------------------------------------------------------------------------
/test/fd.ml:
--------------------------------------------------------------------------------
  1 | open Core
  2 | open Async
  3 | 
  4 | module Spec = struct
  5 |   type 'worker functions = { ping : ('worker, unit, unit) Rpc_parallel.Function.t }
  6 | 
  7 |   module Worker_state = struct
  8 |     type init_arg = unit [@@deriving bin_io]
  9 |     type t = unit
 10 |   end
 11 | 
 12 |   module Connection_state = struct
 13 |     type init_arg = unit [@@deriving bin_io]
 14 |     type t = unit
 15 |   end
 16 | 
 17 |   module Functions
 18 |       (C : Rpc_parallel.Creator
 19 |            with type worker_state := Worker_state.t
 20 |             and type connection_state := Connection_state.t) =
 21 |   struct
 22 |     let ping =
 23 |       C.create_rpc
 24 |         ~f:(fun ~worker_state:() ~conn_state:() -> return)
 25 |         ~bin_input:Unit.bin_t
 26 |         ~bin_output:Unit.bin_t
 27 |         ()
 28 |     ;;
 29 | 
 30 |     let functions = { ping }
 31 |     let init_worker_state () = Deferred.unit
 32 |     let init_connection_state ~connection:_ ~worker_state:_ = return
 33 |   end
 34 | end
 35 | 
 36 | module Worker = Rpc_parallel.Make (Spec)
 37 | module Worker_managed = Rpc_parallel.Managed.Make [@alert "-legacy"] (Spec)
 38 | 
 39 | let assert_fds here ~listen ~established =
 40 |   let here =
 41 |     sprintf
 42 |       "%s:%d:%d"
 43 |       here.Lexing.pos_fname
 44 |       here.Lexing.pos_lnum
 45 |       (here.Lexing.pos_cnum - here.Lexing.pos_bol)
 46 |   in
 47 |   let pid = Unix.getpid () in
 48 |   (* [lsof] has a return code of 1 for all errors (including finding no matching file
 49 |      descriptors for the query). Unfortunately we never quite know when [lsof] might
 50 |      return no file descriptors (see comment below), so we have to allow an exit code of
 51 |      1. *)
 52 |   let accept_nonzero_exit = [ 1 ] in
 53 |   (* There is a bug somewhere (lsof? linux kernel?) that causes lsof to be
 54 |      nondeterministic. I believe it has something to do with atomicity of /proc/net.
 55 |      we retry multiple times. Once in a while lsof will drop a tcp connection
 56 |      (and display "can't identify protocol"), then it will reappear on the next
 57 |      invocation. I have never seen it fail twice in a row. *)
 58 |   let rec try_lsof retries =
 59 |     match%bind
 60 |       Process.run_lines
 61 |         ~accept_nonzero_exit
 62 |         ~prog:"/usr/bin/lsof"
 63 |         ~args:[ "-ap"; Pid.to_string pid; "-iTCP" ]
 64 |         ()
 65 |     with
 66 |     | Error e -> failwiths "Unable to lsof" e [%sexp_of: Error.t]
 67 |     | Ok lines ->
 68 |       let filter to_match =
 69 |         List.filter lines ~f:(fun line ->
 70 |           Option.is_some (String.substr_index line ~pattern:to_match))
 71 |       in
 72 |       let cur_listen = List.length (filter "(LISTEN)") in
 73 |       let cur_established = List.length (filter "(ESTABLISHED)") in
 74 |       (match
 75 |          [%test_result: string * int] (here, cur_listen) ~expect:(here, listen);
 76 |          [%test_result: string * int] (here, cur_established) ~expect:(here, established)
 77 |        with
 78 |        | () -> Deferred.unit
 79 |        | exception e -> if retries > 0 then try_lsof (retries - 1) else raise e)
 80 |   in
 81 |   try_lsof 10
 82 | ;;
 83 | 
 84 | let test_unmanaged () =
 85 |   let%bind worker =
 86 |     Worker.spawn_exn
 87 |       ~shutdown_on:Heartbeater_connection_timeout
 88 |       ~redirect_stderr:`Dev_null
 89 |       ~redirect_stdout:`Dev_null
 90 |       ~on_failure:Error.raise
 91 |       ()
 92 |   in
 93 |   (* The only connection we have is from
 94 |      [Rpc_parallel.Heartbeater.connect_and_shutdown_on_disconnect_exn]. And now finally the
 95 |      master rpc server has started. *)
 96 |   let%bind () = assert_fds [%here] ~listen:1 ~established:1 in
 97 |   let%bind connection = Worker.Connection.client_exn worker () in
 98 |   (* Now we have another connection *)
 99 |   let%bind () = assert_fds [%here] ~listen:1 ~established:2 in
100 |   let%bind () = Worker.Connection.run_exn connection ~f:Worker.functions.ping ~arg:() in
101 |   (* Still just these connections *)
102 |   let%bind () = assert_fds [%here] ~listen:1 ~established:2 in
103 |   let%bind () = Worker.Connection.close connection in
104 |   let%bind () = Worker.Connection.close_finished connection in
105 |   (* Now just the [Rpc_parallel.Heartbeater] connection *)
106 |   let%bind () = assert_fds [%here] ~listen:1 ~established:1 in
107 |   let%bind () = Worker.shutdown worker >>| ok_exn in
108 |   (* Now no established connections *)
109 |   assert_fds [%here] ~listen:1 ~established:0
110 | ;;
111 | 
112 | let test_serve () =
113 |   let%bind worker = Worker.serve () in
114 |   (* We are listening on one port now, no connections because the
115 |      [Rpc_parallel.Heartbeater] only connects if [spawn] was called. *)
116 |   let%bind () = assert_fds [%here] ~listen:1 ~established:0 in
117 |   let%bind connection = Worker.Connection.client_exn worker () in
118 |   (* Just the single connection we just made, but double counted because both ends
119 |      are in this process *)
120 |   let%bind () = assert_fds [%here] ~listen:1 ~established:2 in
121 |   let%bind () =
122 |     Worker.Connection.run_exn connection ~f:Rpc_parallel.Function.close_server ~arg:()
123 |   in
124 |   (* Existing connections to the in-process worker server remain open *)
125 |   let%bind () = assert_fds [%here] ~listen:0 ~established:2 in
126 |   let%bind () = Worker.Connection.close connection in
127 |   let%bind () = Worker.Connection.close_finished connection in
128 |   (* This closed both counts of that connection *)
129 |   assert_fds [%here] ~listen:0 ~established:0
130 | ;;
131 | 
132 | let test_managed () =
133 |   let%bind worker =
134 |     Worker_managed.spawn_exn
135 |       ~redirect_stderr:`Dev_null
136 |       ~redirect_stdout:`Dev_null
137 |       ~on_failure:Error.raise
138 |       ~on_connection_to_worker_closed:Error.raise
139 |       ()
140 |       ()
141 |   in
142 |   (* We have the [Rpc_parallel.Heartbeater] connection along with the initial connection
143 |      (that we keep because it is a managed [Worker.t]) *)
144 |   let%bind () = assert_fds [%here] ~listen:1 ~established:2 in
145 |   let%bind () = Worker_managed.run_exn worker ~f:Worker_managed.functions.ping ~arg:() in
146 |   (* We should be reusing the same connection *)
147 |   let%bind () = assert_fds [%here] ~listen:1 ~established:2 in
148 |   let%bind () = Worker_managed.kill_exn worker in
149 |   (* Now we should have no connections again *)
150 |   assert_fds [%here] ~listen:1 ~established:0
151 | ;;
152 | 
153 | let command =
154 |   Command.async_spec
155 |     ~summary:"Fd testing Rpc parallel"
156 |     Command.Spec.empty
157 |     (fun () ->
158 |       (* make sure we are not starting servers until we need them *)
159 |       let%bind () = assert_fds [%here] ~listen:0 ~established:0 in
160 |       let%bind () = test_serve () in
161 |       let%bind () = test_unmanaged () in
162 |       let%map () = test_managed () in
163 |       printf "Ok\n")
164 |     ~behave_nicely_in_pipeline:false
165 | ;;
166 | 
167 | let () = Rpc_parallel_krb_public.start_app ~krb_mode:For_unit_test command
168 | 


--------------------------------------------------------------------------------
/example/stream_workers.ml:
--------------------------------------------------------------------------------
  1 | open Core
  2 | open Async
  3 | 
  4 | (* This example involves a [Stream_worker.t] that generates a stream of elements.
  5 |    Each element of the stream is sent to a random [Worker.t] that has registered itself
  6 |    with that stream. Each [Worker.t] processes the elements (in this example by sending it
  7 |    back to the main process for printing) *)
  8 | 
  9 | module Stream_worker = struct
 10 |   module T = struct
 11 |     type 'worker functions =
 12 |       { subscribe : ('worker, unit, int Pipe.Reader.t) Rpc_parallel.Function.t
 13 |       ; start : ('worker, unit, unit) Rpc_parallel.Function.t
 14 |       }
 15 | 
 16 |     module Worker_state = struct
 17 |       type init_arg = int [@@deriving bin_io]
 18 | 
 19 |       type t =
 20 |         { num_elts : int
 21 |         ; mutable workers : int Pipe.Writer.t list
 22 |         }
 23 |     end
 24 | 
 25 |     module Connection_state = struct
 26 |       type init_arg = unit [@@deriving bin_io]
 27 |       type t = unit
 28 |     end
 29 | 
 30 |     module Functions
 31 |         (C : Rpc_parallel.Creator
 32 |              with type connection_state := unit
 33 |               and type worker_state := Worker_state.t) =
 34 |     struct
 35 |       let init_connection_state ~connection:_ ~worker_state:_ = return
 36 |       let init_worker_state num_elts = return { Worker_state.num_elts; workers = [] }
 37 | 
 38 |       let subscribe_impl ~worker_state ~conn_state:() () =
 39 |         let r, w = Pipe.create () in
 40 |         (Pipe.closed w
 41 |          >>> fun () ->
 42 |          worker_state.Worker_state.workers
 43 |          <- List.filter worker_state.Worker_state.workers ~f:(fun worker ->
 44 |               not (Pipe.equal worker w)));
 45 |         worker_state.Worker_state.workers <- w :: worker_state.Worker_state.workers;
 46 |         return r
 47 |       ;;
 48 | 
 49 |       let start_impl ~worker_state ~conn_state:() () =
 50 |         let next_elt = ref 0 in
 51 |         let get_element () =
 52 |           let elt = !next_elt in
 53 |           incr next_elt;
 54 |           elt
 55 |         in
 56 |         don't_wait_for
 57 |           (let%map () =
 58 |              Deferred.repeat_until_finished
 59 |                worker_state.Worker_state.num_elts
 60 |                (fun count ->
 61 |                   let%bind () = Clock.after (sec 0.05) in
 62 |                   if count = 0
 63 |                   then return (`Finished ())
 64 |                   else (
 65 |                     let elt = get_element () in
 66 |                     let to_worker =
 67 |                       List.nth_exn
 68 |                         worker_state.workers
 69 |                         (Random.int (List.length worker_state.workers))
 70 |                     in
 71 |                     let%map () = Pipe.write to_worker elt in
 72 |                     `Repeat (count - 1)))
 73 |            in
 74 |            List.iter worker_state.workers ~f:(fun writer -> Pipe.close writer));
 75 |         return ()
 76 |       ;;
 77 | 
 78 |       let subscribe =
 79 |         C.create_pipe ~f:subscribe_impl ~bin_input:Unit.bin_t ~bin_output:Int.bin_t ()
 80 |       ;;
 81 | 
 82 |       let start =
 83 |         C.create_rpc ~f:start_impl ~bin_input:Unit.bin_t ~bin_output:Unit.bin_t ()
 84 |       ;;
 85 | 
 86 |       let functions = { start; subscribe }
 87 |     end
 88 |   end
 89 | 
 90 |   include Rpc_parallel.Make (T)
 91 | end
 92 | 
 93 | module Worker = struct
 94 |   module T = struct
 95 |     type 'worker functions =
 96 |       { process_elts :
 97 |           ('worker, Stream_worker.t, int Pipe.Reader.t) Rpc_parallel.Function.t
 98 |       }
 99 | 
100 |     module Worker_state = struct
101 |       type t = unit
102 |       type init_arg = unit [@@deriving bin_io]
103 |     end
104 | 
105 |     module Connection_state = struct
106 |       type init_arg = unit [@@deriving bin_io]
107 |       type t = unit
108 |     end
109 | 
110 |     module Functions
111 |         (C : Rpc_parallel.Creator
112 |              with type worker_state := Worker_state.t
113 |               and type connection_state := Connection_state.t) =
114 |     struct
115 |       let process_elts_impl ~worker_state:() ~conn_state:() stream_worker =
116 |         let check_r, check_w = Pipe.create () in
117 |         let%bind conn = Stream_worker.Connection.client_exn stream_worker () in
118 |         let%bind reader =
119 |           Stream_worker.Connection.run_exn
120 |             conn
121 |             ~f:Stream_worker.functions.subscribe
122 |             ~arg:()
123 |         in
124 |         (Pipe.iter reader ~f:(fun i -> Pipe.write check_w i)
125 |          >>> fun () -> Pipe.close check_w);
126 |         return check_r
127 |       ;;
128 | 
129 |       let process_elts =
130 |         C.create_pipe
131 |           ~f:process_elts_impl
132 |           ~bin_input:Stream_worker.bin_t
133 |           ~bin_output:Int.bin_t
134 |           ()
135 |       ;;
136 | 
137 |       let functions = { process_elts }
138 |       let init_worker_state () = Deferred.unit
139 |       let init_connection_state ~connection:_ ~worker_state:_ = return
140 |     end
141 |   end
142 | 
143 |   include Rpc_parallel.Make (T)
144 | end
145 | 
146 | let handle_error worker err = failwiths (sprintf "error in %s" worker) err Error.sexp_of_t
147 | 
148 | let command =
149 |   Command.async_spec_or_error
150 |     ~summary:"foo"
151 |     Command.Spec.(
152 |       empty
153 |       +> flag "-num-workers" (optional_with_default 4 int) ~doc:" number of workers"
154 |       +> flag
155 |            "-num-elts"
156 |            (optional_with_default 50 int)
157 |            ~doc:" number of elements to process")
158 |     (fun num_workers num_elements () ->
159 |       (* Spawn a stream worker *)
160 |       Stream_worker.spawn
161 |         ~shutdown_on:Heartbeater_connection_timeout
162 |         ~redirect_stdout:`Dev_null
163 |         ~redirect_stderr:`Dev_null
164 |         num_elements
165 |         ~on_failure:(handle_error "stream worker")
166 |       >>=? fun stream_worker ->
167 |       (* Spawn workers and tell them about the stream worker  *)
168 |       Deferred.Or_error.List.init ~how:`Sequential num_workers ~f:(fun i ->
169 |         Worker.spawn
170 |           ~shutdown_on:Connection_closed
171 |           ~connection_state_init_arg:()
172 |           ~redirect_stdout:`Dev_null
173 |           ~redirect_stderr:`Dev_null
174 |           ()
175 |           ~on_failure:(handle_error (sprintf "worker %d" i))
176 |         >>=? fun worker_conn ->
177 |         Worker.Connection.run
178 |           worker_conn
179 |           ~f:Worker.functions.process_elts
180 |           ~arg:stream_worker)
181 |       >>=? fun workers ->
182 |       (* Start the stream *)
183 |       Stream_worker.Connection.client stream_worker ()
184 |       >>=? fun stream_conn ->
185 |       Stream_worker.Connection.run stream_conn ~f:Stream_worker.functions.start ~arg:()
186 |       >>=? fun () ->
187 |       (* Collect the results *)
188 |       let elements = List.init num_elements ~f:(fun _i -> Ivar.create ()) in
189 |       don't_wait_for
190 |         (Deferred.List.iter ~how:`Parallel workers ~f:(fun worker ->
191 |            Pipe.iter worker ~f:(fun num ->
192 |              Ivar.fill_exn (List.nth_exn elements num) () |> return)));
193 |       let%map () = Deferred.all_unit (List.map elements ~f:Ivar.read) in
194 |       printf "Ok.\n";
195 |       Or_error.return ())
196 |     ~behave_nicely_in_pipeline:false
197 | ;;
198 | 
199 | let () = Rpc_parallel_krb_public.start_app ~krb_mode:For_unit_test command
200 | 


--------------------------------------------------------------------------------
/src/map_reduce.mli:
--------------------------------------------------------------------------------
  1 | (** A parallel map/reduce library. See examples/add_numbers.ml and
  2 |     examples/number_stats.ml for examples. *)
  3 | 
  4 | open! Core
  5 | open! Async
  6 | 
  7 | module Config : sig
  8 |   type t
  9 | 
 10 |   (** Default is to create the same number of local workers as the cores in local machine. *)
 11 |   val create
 12 |     :  ?local:int
 13 |     -> ?remote:(_ Remote_executable.t * int) list
 14 |     -> ?cd:string (** default / *)
 15 |     -> ?connection_timeout:Time_float.Span.t
 16 |          (** see [with_spawn_args] in parallel_intf.ml *)
 17 |     -> redirect_stderr:[ `Dev_null | `File_append of worker_index:int -> string ]
 18 |     -> redirect_stdout:[ `Dev_null | `File_append of worker_index:int -> string ]
 19 |     -> unit
 20 |     -> t
 21 | end
 22 | 
 23 | module type Worker = sig
 24 |   type t
 25 |   type param_type
 26 |   type run_input_type
 27 |   type run_output_type
 28 | 
 29 |   val spawn_config_exn
 30 |     :  ?how_to_spawn:Monad_sequence.how (** Default: [`Sequential] *)
 31 |     -> Config.t
 32 |     -> param_type
 33 |     -> t list Deferred.t
 34 | 
 35 |   val run_exn : t -> run_input_type -> run_output_type Deferred.t
 36 |   val shutdown_exn : t -> unit Deferred.t
 37 | end
 38 | 
 39 | (** {4 Map functions} *)
 40 | 
 41 | (** [Map_function] modules must be created using the [Make_map_function] or
 42 |     [Make_map_function_with_init] functors. The init variety allows you to specify an
 43 |     [init] function that takes a "param" argument. The non-init variety is equivalent to
 44 |     the init variety with [init] equal to [return] and a [unit] "param" argument.
 45 |     Similarly, [Map_reduce_function] modules must be created using the
 46 |     [Make_map_combine_function] or [Make_map_combine_function_with_init] functors. *)
 47 | 
 48 | module type Map_function = sig
 49 |   module Param : Binable
 50 |   module Input : Binable
 51 |   module Output : Binable
 52 | 
 53 |   module Worker :
 54 |     Worker
 55 |     with type param_type = Param.t
 56 |      and type run_input_type = Input.t
 57 |      and type run_output_type = Output.t
 58 | end
 59 | 
 60 | module type Map_function_with_init_spec = sig
 61 |   type state_type
 62 | 
 63 |   module Param : Binable
 64 |   module Input : Binable
 65 |   module Output : Binable
 66 | 
 67 |   val init : Param.t -> worker_index:int -> state_type Deferred.t
 68 |   val map : state_type -> Input.t -> Output.t Deferred.t
 69 | end
 70 | 
 71 | module Make_map_function_with_init (S : Map_function_with_init_spec) :
 72 |   Map_function
 73 |   with type Param.t = S.Param.t
 74 |    and type Input.t = S.Input.t
 75 |    and type Output.t = S.Output.t
 76 | 
 77 | module type Map_function_spec = sig
 78 |   module Input : Binable
 79 |   module Output : Binable
 80 | 
 81 |   val map : Input.t -> Output.t Deferred.t
 82 | end
 83 | 
 84 | module Make_map_function (S : Map_function_spec) :
 85 |   Map_function
 86 |   with type Param.t = unit
 87 |    and type Input.t = S.Input.t
 88 |    and type Output.t = S.Output.t
 89 | 
 90 | (** The [map_unordered] operation takes ['a Pipe.Reader.t] along with a [Map_function] and
 91 |     sends the ['a] values to workers for mapping. Each pair in the resulting
 92 |     [('b * int) Pipe.Reader.t] contains the mapped value and the index of the value in the
 93 |     input pipe. *)
 94 | val map_unordered
 95 |   :  ?how_to_spawn:Monad_sequence.how (** Default: [`Sequential] *)
 96 |   -> Config.t
 97 |   -> 'a Pipe.Reader.t
 98 |   -> m:
 99 |        (module Map_function
100 |           with type Param.t = 'param
101 |            and type Input.t = 'a
102 |            and type Output.t = 'b)
103 |   -> param:'param
104 |   -> ('b * int) Pipe.Reader.t Deferred.t
105 | 
106 | (** The [map] operation is similar to [map_unordered], but the result is a
107 |     ['b Pipe.Reader.t] where the mapped values are guaranteed to be in the same order as
108 |     the input values. *)
109 | val map
110 |   :  ?how_to_spawn:Monad_sequence.how (** Default: [`Sequential] *)
111 |   -> Config.t
112 |   -> 'a Pipe.Reader.t
113 |   -> m:
114 |        (module Map_function
115 |           with type Param.t = 'param
116 |            and type Input.t = 'a
117 |            and type Output.t = 'b)
118 |   -> param:'param
119 |   -> 'b Pipe.Reader.t Deferred.t
120 | 
121 | (** The [find_map] operation takes ['a Pipe.Reader.t] along with a [Map_function] that
122 |     returns ['b option] values. As soon as [map] returns [Some value], all workers are
123 |     stopped and [Some value] is returned. If [map] never returns [Some value] then [None]
124 |     is returned. If more than one worker returns [Some value], one value is chosen
125 |     arbitrarily and returned. *)
126 | val find_map
127 |   :  ?how_to_spawn:Monad_sequence.how (** Default: [`Sequential] *)
128 |   -> Config.t
129 |   -> 'a Pipe.Reader.t
130 |   -> m:
131 |        (module Map_function
132 |           with type Param.t = 'param
133 |            and type Input.t = 'a
134 |            and type Output.t = 'b option)
135 |   -> param:'param
136 |   -> 'b option Deferred.t
137 | 
138 | (** {4 Map-reduce}
139 |     functions *)
140 | 
141 | module type Map_reduce_function = sig
142 |   module Param : Binable
143 |   module Accum : Binable
144 |   module Input : Binable
145 | 
146 |   module Worker :
147 |     Worker
148 |     with type param_type = Param.t
149 |      and type run_input_type =
150 |       [ `Map of Input.t
151 |       | `Combine of Accum.t * Accum.t
152 |       | `Map_right_combine of Accum.t * Input.t (* combine accum (map input) *)
153 |       ]
154 |      and type run_output_type = Accum.t
155 | end
156 | 
157 | module type Map_reduce_function_with_init_spec = sig
158 |   type state_type
159 | 
160 |   module Param : Binable
161 |   module Accum : Binable
162 |   module Input : Binable
163 | 
164 |   val init : Param.t -> worker_index:int -> state_type Deferred.t
165 |   val map : state_type -> Input.t -> Accum.t Deferred.t
166 |   val combine : state_type -> Accum.t -> Accum.t -> Accum.t Deferred.t
167 | end
168 | 
169 | module Make_map_reduce_function_with_init (S : Map_reduce_function_with_init_spec) :
170 |   Map_reduce_function
171 |   with type Param.t = S.Param.t
172 |    and type Accum.t = S.Accum.t
173 |    and type Input.t = S.Input.t
174 | 
175 | module type Map_reduce_function_spec = sig
176 |   module Accum : Binable
177 |   module Input : Binable
178 | 
179 |   val map : Input.t -> Accum.t Deferred.t
180 |   val combine : Accum.t -> Accum.t -> Accum.t Deferred.t
181 | end
182 | 
183 | module Make_map_reduce_function (S : Map_reduce_function_spec) :
184 |   Map_reduce_function
185 |   with type Param.t = unit
186 |    and type Accum.t = S.Accum.t
187 |    and type Input.t = S.Input.t
188 | 
189 | (** The [map_reduce_commutative] operation takes ['a Pipe.Reader.t] along with a
190 |     [Map_reduce_function] and applies the [map] function to ['a] values (in an unspecified
191 |     order), resulting in ['accum] values. The [combine] function is then called to combine
192 |     the ['accum] values (in an unspecified order) into a single ['accum] value.
193 |     Commutative map-reduce assumes that [combine] is associative and commutative. *)
194 | val map_reduce_commutative
195 |   :  ?how_to_spawn:Monad_sequence.how (** Default: [`Sequential] **)
196 |   -> Config.t
197 |   -> 'a Pipe.Reader.t
198 |   -> m:
199 |        (module Map_reduce_function
200 |           with type Param.t = 'param
201 |            and type Accum.t = 'accum
202 |            and type Input.t = 'a)
203 |   -> param:'param
204 |   -> 'accum option Deferred.t
205 | 
206 | (** The [map_reduce] operation makes strong guarantees about the order in which the values
207 |     are processed by [combine]. For a list a_0, a_1, a_2, ..., a_n of ['a] values, the
208 |     noncommutative map-reduce operation applies the [map] function to produce
209 |     [acc_{i,i+1}] from each [a_i]. The [combine] function is used to compute
210 |     [combine acc_{i,j} acc_{j,k}] for i<j<k, producing [acc_{i,k}]. The [map] and
211 |     [combine] functions are called repeatedly until the entire list is reduced to a single
212 |     [acc_{0,n+1}] value. Noncommutative map-reduce assumes that [combine] is associative. *)
213 | val map_reduce
214 |   :  ?how_to_spawn:Monad_sequence.how (** Default: [`Sequential] *)
215 |   -> Config.t
216 |   -> 'a Pipe.Reader.t
217 |   -> m:
218 |        (module Map_reduce_function
219 |           with type Param.t = 'param
220 |            and type Accum.t = 'accum
221 |            and type Input.t = 'a)
222 |   -> param:'param
223 |   -> 'accum option Deferred.t
224 | 


--------------------------------------------------------------------------------
/CHANGES.md:
--------------------------------------------------------------------------------
  1 | ## Release v0.17.0
  2 | 
  3 | - Add support for `Rpc.State_rpc.t`
  4 | 
  5 | - Fix a bug when using `~shutdown_on:Connection_closed` and the master shuts down during
  6 |   `init_worker_state`
  7 | 
  8 | - Add parameters allowing to control the rpc max buffer age limit
  9 | 
 10 | - Expand documentation for `Parallel_intf.create_direct_pipe` to detail behavior of queued
 11 |   elements written before the function returns.
 12 | 
 13 | ## Release v0.16.0
 14 | 
 15 | - `How_to_run.remote` now accepts an optional `assert_binary_hash` parameter
 16 |   * If set to false, skips the remote binary's hash check against the currently running
 17 |     binary's hash
 18 | 
 19 | - Add `connection_timeout` parameter in `Map_reduce.Config.create`, with the same
 20 |   semantics as in `with_spawn_args`
 21 | 
 22 | - Add support for `Async_rpc`'s direct pipe rpcs
 23 | 
 24 | - Add support for client pushback on pipe rpcs
 25 | 
 26 | - Add `complete_subcommands` pass-through argument to `start_app` for `Command.run`
 27 | 
 28 | - `Remote_executable.run` now accepts an optional `assert_binary_hash` parameter
 29 |   * Allows disabling the check for matching currently running executable with the provided
 30 |     `Remote_executable.t` by setting `assert_binary_hash` to false
 31 | 
 32 | ## Old pre-v0.15 changelogs (very likely stale and incomplete)
 33 | 
 34 | ## git version
 35 | 
 36 | - Added `create_reverse_direct_pipe` to `Parallel_intf`.
 37 | 
 38 | ## v0.10
 39 | 
 40 | - Changed the API to make the shutdown behavior of workers more explicit
 41 | 
 42 | ## v0.9
 43 | 
 44 | ## 113.43.00
 45 | 
 46 | - Take away `@@deriving bin_io` for managed workers.
 47 | 
 48 | - Introduce a `spawn_in_foreground` function that returns a `Process.t` along with the worker.
 49 | 
 50 |   Also use this opportunity to clean up the handling of file descriptors in the `spawn` case as well.
 51 | 
 52 | - Add a couple features to Rpc\_parallel to make debugging connection issues
 53 |   easier.
 54 | 
 55 |   * Add `Worker.Connection.close_reason`
 56 |   * Add `Worker.Connection.sexp_of_t`
 57 | 
 58 | 
 59 | - Add some extra security around making Rpc calls to workers.
 60 | 
 61 |   Because we do not expose the port of a worker, unless you do really
 62 |   hacky things, you are going to go through Rpc\_parallel when running
 63 |   Rpcs.  When Rpc\_parallel connects to a worker, it initializes a
 64 |   connection state (that includes the worker state). This
 65 |   initialization would raise if the worker did not have a server
 66 |   listening on the port that the client was talking to. Add some
 67 |   security by enforcing unification of worker\_ids instead of ports
 68 |   (which will be reused by the OS).
 69 | 
 70 | - Make an Rpc_parallel test case deterministic
 71 | 
 72 | ## 113.33.00
 73 | 
 74 | - Adding the mandatory arguments `redirect_stderr` and `redirect_stdout` to `Map_reduce` so it is easier to debug your workers.
 75 |   Also removed `spawn_exn` in favor of only exposing `spawn_config_exn`. If you want to spawn a single worker, make a config of one worker.
 76 | 
 77 | - Cleans up the implementation-side interface for aborting `Pipe_rpc`s.
 78 | 
 79 |   Summary
 80 | 
 81 |   The `aborted` `Deferred.t` that got passed to `Pipe_rpc` implementations is
 82 |   gone. The situations where it would have been determined now close the reading
 83 |   end of the user-supplied pipe instead.
 84 | 
 85 |   Details
 86 | 
 87 |   Previously, when an RPC dispatcher decided to abort a query, the RPC
 88 |   implementation would get its `aborted` `Deferred.t` filled in, but would remain
 89 |   free to write some final elements to the pipe.
 90 | 
 91 |   This is a little bit more flexible than the new interface, but it's also
 92 |   problematic in that the implementer could easily just not pay attention to
 93 |   `aborted`. (They're not obligated to pay attention to when the pipe is closed,
 94 |   either, but at least they can't keep writing to it.) We don't think the extra
 95 |   flexibility was used at all.
 96 | 
 97 |   In the future, we may also simplify the client side to remove the `abort`
 98 |   function on the dispatch side (at least when not using `dispatch_iter`). For the
 99 |   time being it remains, but closing the received pipe is the preferred way of
100 |   aborting the query.
101 | 
102 |   There are a couple of known ways this might have changed behavior from before.
103 |   Both of these appear not to cause problems in the jane repo.
104 | 
105 |   - In the past, an implementation could write to a pipe as long as the client
106 |     didn't disconnect, even if it closed its pipe. Now writes will raise after
107 |     a client closes its pipe (or calls `abort`), since the implementor's pipe will
108 |     also be closed in this case. Doing this was already unsafe, though, since the
109 |     pipe *was* closed if the RPC connection was closed.
110 | 
111 |   - `aborted` was only determined if a client aborted the query or the connection
112 |     was closed. The new alternative, `Pipe.closed` called on the returned pipe,
113 |     will also be determined if the implementation closes the pipe itself. This is
114 |     unlikely to cause serious issues but could potentially cause some confusing
115 |     logging.
116 | 
117 | - Deal with an fd leak in Managed worker connections
118 | 
119 | - This feature completely restructured `rpc_parallel` to make it more
120 |   transparent in what it does and doesn't do. I offloaded all the
121 |   connection management and cleanup responsibilities to the user.
122 | 
123 |   What `Rpc_parallel` used to do for you:
124 |   ------------------------------------
125 |    - Internally there was a special (behind the scenes) Rpc connection
126 |      maintained between the worker and its master upon spawn. Upon this
127 |      connection closing, the worker would shut itself down and the
128 |      master would be notified of a connection lost.
129 |    - We would keep a connection table for you. `run` was called on the
130 |      worker type (i.e. a host and port) because it's implementation
131 |      would get a connection (using an existing one or doing a
132 |      `Rpc.Connection.client`).
133 | 
134 |   What it now does:
135 |   -----------------
136 |    - No special Rpc connection is maintained, so all cleanup is the job
137 |      of the user
138 |    - No internal connection table. `run` is called on a connection type,
139 |      so you have to manage connections yourself
140 |    - Exposes connection states (unique to each connection) along with
141 |      worker states (shared between connections)
142 |    - There is an explicit `Managed` module that does connection
143 |      management for you
144 |    - There is an explicit `Heartbeater` type that can be used in spawned
145 |      workers to connect to their master and handle cleanup
146 | 
147 | - There was a race condition where `spawn` would return a `Worker.t` even though the worker had not daemonized yet.
148 |   This manifested itself in a spurious test case failure.
149 | 
150 |   This also allowed for running the worker initialization code before we daemonize.
151 | 
152 | - Reuse the master rpc settings when the worker connects using the `Heartbeater` module
153 | 
154 | - Make all global state and rpc servers lazy.
155 | 
156 |   Namely:
157 |   1) Toplevel hashtables are initialized with size 1 to reduce overhead due to linking with `Rpc_parallel`
158 |   2) The master rpc server is only started upon the first call to spawn
159 | 
160 | 
161 | - Fixed a `Rpc_parallel` bug that caused a Master to Worker call to never return.
162 | 
163 | - Add new functions `spawn_and_connect` and `spawn_and_connection_exn` which return back the worker and a connection to the worker.
164 | 
165 |   Also, move all the example code off of the managed module
166 | 
167 | - Some refactoring:
168 | 
169 |   1) Take `serve` out of the `Connection` module. It shouldn't be in there because it does nothing with the `Connection.t` type
170 |   2) Remove some unnecessary functions defined on `Connection.t`'s. E.g. no longer expose `close_reason` because all the exception handling will be done with the registered exn handlers.
171 |   3) Move `Rpc_settings` module
172 |   4) Separate `global_state` into `as_worker` and `as_master`
173 |   5) Some cleanup with how we are managing worker ids
174 |   6) create a record type for `Init_connection_state_rpc.query`
175 | 
176 | ## 113.24.00
177 | 
178 | - Switched to PPX.
179 | 
180 | - Expose the `connection_timeout` argument in rpc\_parallel. This argument
181 |   exists in `Rpc_parallel_core.Parallel`, but it is not exposed in
182 |   `Rpc_parallel.Parallel`.
183 | 
184 | - Allow custom handling of missed async\_rpc heartbeats.
185 | 
186 | - Give a better error message when redirecting output on a remote box to a file
187 |   path that does not exist.
188 | 
189 | - remove unncessary chmod 700 call on the remote executable
190 | 
191 | - Give a clear error message for the common mistake of not making the
192 |   `Parallel.Make_worker()` functor application top-level
193 | 
194 | - Make errors/exceptions in `Rpc_parallel` more observable
195 | 
196 |   - Make stderr and stdout redirection mandatory in order to encourage logging stderr
197 |   - Clean up the use of monitors across `Rpc_parallel`
198 |   - Fix bug with exceptions that are sent directly to `Monitor.main`
199 |     (e.g. `Async_log` does this)
200 | 
201 | - Add the ability to explicitly initialize as a master and use some subcommand for the
202 |   worker. This would allow writing programs with complex command structures that don't have
203 |   to invoke a bunch of `Rpc_parallel` logic and start RPC servers for every command.
204 | 
205 | 
206 | - Add the ability to get log messages from a worker sent back to the master.
207 |   In fact, any worker can register for the log messages of any other workers.
208 | 
209 | ## 113.00.00
210 | 
211 | - Fixed a file-descriptor leak
212 | 
213 |     There was a file descriptor leak when killing workers.  Their stdin,
214 |     stdout, and stderr remain open. We now close them after the worker process
215 |     exits.
216 | 
217 | ## 112.24.00
218 | 
219 | - Added `Parallel.State.get` function, to check whether `Rpc_parallel` has been
220 |   initialized correctly.
221 | - Added `Map_reduce` module, which is an easy-to-use parallel map/reduce library.
222 | 
223 |   It can be used to map/fold over a list while utilizing multiple cores on multiple machines.
224 | 
225 |   Also added support for workers to keep their own inner state.
226 | 
227 | - Fixed bug in which  zombie process was created per spawned worker.
228 | 
229 |   Also fixed shutdown on remote workers
230 | 
231 | - Made it possible for workers to spawn other workers, i.e.  act as masters.
232 | 
233 | - Made the connection timeout configurable and bumped the default to 10s.
234 | 
235 | ## 112.17.00
236 | 
237 | - Follow changes in Async RPC
238 | 
239 | ## 112.01.00
240 | 
241 | Initial import.
242 | 


--------------------------------------------------------------------------------