├── .formatter.exs
├── .github
    └── workflows
    │   └── elixir.yml
├── .gitignore
├── CHANGELOG.md
├── LICENSE.md
├── README.md
├── lib
    ├── application.ex
    ├── flame.ex
    └── flame
    │   ├── backend.ex
    │   ├── code_sync.ex
    │   ├── fly_backend.ex
    │   ├── local_backend.ex
    │   ├── parent.ex
    │   ├── parser
    │       └── json.ex
    │   ├── pool.ex
    │   ├── pool
    │       ├── cleaner.ex
    │       └── supervisor.ex
    │   ├── queue.ex
    │   ├── runner.ex
    │   ├── terminator.ex
    │   ├── terminator
    │       └── supervisor.ex
    │   └── trackable.ex
├── mix.exs
├── mix.lock
└── test
    ├── code_sync_test.exs
    ├── flame_test.exs
    ├── fly_backend_test.exs
    ├── parser
        └── json_test.exs
    ├── queue_test.exs
    ├── runner_test.exs
    ├── support
        ├── code_sync_mock.ex
        └── trackable.ex
    └── test_helper.exs


/.formatter.exs:
--------------------------------------------------------------------------------
1 | # Used by "mix format"
2 | [
3 |   inputs: ["{mix,.formatter}.exs", "{config,lib,test}/**/*.{ex,exs}"]
4 | ]
5 | 


--------------------------------------------------------------------------------
/.github/workflows/elixir.yml:
--------------------------------------------------------------------------------
 1 | name: Elixir CI
 2 | on:
 3 |   pull_request:
 4 |   push:
 5 | 
 6 | jobs:
 7 |   main:
 8 |     runs-on: ubuntu-latest
 9 |     strategy:
10 |       fail-fast: false
11 |       matrix:
12 |         include:
13 |           - elixir_version: 1.15.7
14 |             otp_version: 26.1.2
15 |             lint: true
16 |     steps:
17 |       - uses: actions/checkout@v4
18 |       - uses: erlef/setup-beam@v1
19 |         with:
20 |           otp-version: ${{matrix.otp_version}}
21 |           elixir-version: ${{matrix.elixir_version}}
22 |       - run: mix deps.get
23 |       - run: mix format --check-formatted
24 |         if: ${{ matrix.lint }}
25 |       - run: mix deps.unlock --check-unused
26 |         if: ${{ matrix.lint }}
27 |       - run: mix deps.compile
28 |       - run: mix compile --warnings-as-errors
29 |         if: ${{ matrix.lint }}
30 |       - run: mix test
31 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # The directory Mix will write compiled artifacts to.
 2 | /_build/
 3 | 
 4 | # If you run "mix test --cover", coverage assets end up here.
 5 | /cover/
 6 | 
 7 | # The directory Mix downloads your dependencies sources to.
 8 | /deps/
 9 | 
10 | # Where third-party dependencies like ExDoc output generated docs.
11 | /doc/
12 | 
13 | # Ignore .fetch files in case you like to edit your project deps locally.
14 | /.fetch
15 | 
16 | # If the VM crashes, it generates a dump, let's ignore it too.
17 | erl_crash.dump
18 | 
19 | # Also ignore archive artifacts (built via "mix archive.build").
20 | *.ez
21 | 
22 | # Ignore package tarball (built via "mix hex.build").
23 | flame-*.tar
24 | 
25 | # Temporary files, for example, from tests.
26 | /tmp/
27 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
  1 | # Changelog
  2 | 
  3 | ## 0.5.2 (2024-12-06)
  4 | 
  5 | ### Enhancements
  6 | - Clean up parent tmp code_sync artifacts on pool shutdown
  7 | 
  8 | ## 0.5.1 (2024-09-19)
  9 | 
 10 | ### Enhancements
 11 | - Add basic rate limiting retries to the FlyBackend to abide by Fly's rate limits of 1 request per second, with 3 requests per second burst.
 12 | - Add basic retries for `no capacity` errors in the FlyBackend
 13 | 
 14 | ## 0.5.0 (2024-09-11)
 15 | 
 16 | ### Enhancements
 17 | - Add `copy_apps` option to `code_sync` to copy all apps in the code path, which is
 18 |   set to true when `start_apps` is true.
 19 | - Support `copy_paths` for arbitrary paths unrelated to beams apps for copying arbitrary
 20 |   files on boot to the runner.
 21 | 
 22 | ### Deprecations
 23 | - `copy_paths: true` has been deprecated in favor of `start_apps: true`, to copy
 24 |   all apps and start them. You can also pass `copy_paths: true` to copy all apps
 25 |   without starting them. Now `copy_paths` is reserved for copying arbitrary paths
 26 |   unrelated to beams apps.
 27 | 
 28 | ## 0.4.4 (2024-09-03)
 29 | 
 30 | ### Bug Fixes
 31 | - Fix idle shutdown running before code sync, causing long code syncs to shut runners down prematurely
 32 | 
 33 | ## 0.4.3 (2024-09-02)
 34 | 
 35 | ### Bug Fixes
 36 | - Fix `:compress` to `:code_sync` raise invalid option error
 37 | 
 38 | ## 0.4.2 (2024-08-27)
 39 | 
 40 | ### Enhancements
 41 | -  Support `:compress` option to `code_sync` to control compression of `:copy_paths` and `:sync_beams`.
 42 | 
 43 | ## 0.4.1 (2024-08-27)
 44 | 
 45 | ### Bug Fixes
 46 | - Fix beam files not being copied on first sync
 47 | 
 48 | ## 0.4.0 (2024-08-27)
 49 | 
 50 | ### Bug Fixes
 51 | - Forward `:boot_timeout` to backend options
 52 | 
 53 | ### Enhancements
 54 | -  Optimize concurrent runner booting
 55 | 
 56 | ## 0.3.0 (2024-07-26)
 57 | 
 58 | ### Bug Fixes
 59 | - Copy sym links in `:copy_paths` and `:sync_beams`
 60 | - Fix function error caused by anonymous functions in `:copy_paths` and `:sync_beams`
 61 | 
 62 | ### Enhancements
 63 | - Use OTP 27's `:json` if available
 64 | - Introduce `FLAME.Trackable` protocol for tracking resources
 65 | - Introduce `FLAME.track_resources/3` to recursively track resources
 66 |   on a given node
 67 | 
 68 | ## 0.2.0 (2024-06-17)
 69 | 
 70 | ### Backwards incompatible changes
 71 | - For backend implementations, the `FLAME.Parent` encoded format has changed to include more information about the parent and child. See `FLAME.Parent` moduledoc for more information.
 72 | 
 73 | ### Enhancements
 74 | - Add `:code_sync` pool configuration for syncing beam files and code paths to flames
 75 | 
 76 | ## 0.1.12 (2024-03-14)
 77 | - Support `link: false` on `FLAME.call/3`, `FLAME.cast/3`, and `FLAME.place_child/3` for opt-in allowance of long-running FLAME operations (up to `:shutdown_timeout`) regardless of what happens to the caller process or caller node.
 78 | 
 79 | ## 0.1.11 (2024-02-22)
 80 | - Add ability to configure custom metadata for launch FlyBackend machine
 81 | 
 82 | ## 0.1.10 (2024-02-21)
 83 | - Fix `FLAME.cast/2` defaulting to boot timeout for executions
 84 | 
 85 | ## 0.1.9 (2024-02-20)
 86 | - Fix `FLAME.cast/2` allowing more than allowed max_concurrency operations
 87 | - Explicitly prefer local region in `FlyBackend`
 88 | 
 89 | ## 0.1.8 (2024-01-02)
 90 | - Fix Pool supervisor name collisions
 91 | 
 92 | ## 0.1.7 (2023-12-15)
 93 | - Fix error on concurrent calls when runners are pending
 94 | 
 95 | ## 0.1.6 (2023-12-11)
 96 | - Fix references to incorrectly named FLAME_PARENT export
 97 | 
 98 | ## 0.1.5 (2023-12-07)
 99 | - Allow passing fly guest options to configure cpus, cpu_kind, gpu_kind, and memory_mb
100 | 
101 | ## 0.1.4 (2023-12-06)
102 | 
103 | Public release 🔥
104 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | Copyright (c) 2023 Chris McCord
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining
 6 | a copy of this software and associated documentation files (the
 7 | "Software"), to deal in the Software without restriction, including
 8 | without limitation the rights to use, copy, modify, merge, publish,
 9 | distribute, sublicense, and/or sell copies of the Software, and to
10 | permit persons to whom the Software is furnished to do so, subject to
11 | the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be
14 | included in all copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | [![GitHub Actions Workflow Status](https://img.shields.io/github/actions/workflow/status/phoenixframework/flame/elixir.yml)](https://github.com/phoenixframework/flame/actions/workflows/elixir.yml) [![Hex.pm](https://img.shields.io/hexpm/v/flame.svg)](https://hex.pm/packages/flame) [![Documentation](https://img.shields.io/badge/documentation-gray)](https://hexdocs.pm/flame)
 2 | 
 3 | Imagine if we could auto scale simply by wrapping any existing app code in a function and have that block of code run in a temporary copy of the app.
 4 | 
 5 | Enter the FLAME pattern.
 6 | 
 7 | > FLAME - Fleeting Lambda Application for Modular Execution
 8 | 
 9 | With FLAME, you treat your *entire application* as a lambda, where modular parts can be executed on short-lived infrastructure.
10 | 
11 | Check the screencast to see it in action:
12 | 
13 | [![Video](https://img.youtube.com/vi/l1xt_rkWdic/maxresdefault.jpg)](https://www.youtube.com/watch?v=l1xt_rkWdic)
14 | 
15 | ## Setup
16 | 
17 | First add `:flame` as a dependency:
18 | 
19 | ```elixir
20 | defp deps do
21 |   [
22 |     # For Erlang/OTP 26 and earlier, you also need Jason
23 |     # {:jason, ">= 0.0.0"},
24 |     {:flame, "~> 0.5"}
25 |   ]
26 | end
27 | ```
28 | 
29 | Then start a FLAME pool in your supervision tree, typically on `application.ex`. The example below uses [Fly.io](https://fly.io/)'s backend:
30 | 
31 | ```elixir
32 | children = [
33 |   {FLAME.Pool,
34 |    name: MyApp.SamplePool,
35 |    backend: FLAME.FlyBackend,
36 |    min: 0,
37 |    max: 10,
38 |    max_concurrency: 5,
39 |    idle_shutdown_after: 30_000,
40 |    log: :debug}
41 | ]
42 | ```
43 | 
44 | Now you can wrap any block of code in a `FLAME.call` and it will find or boot a copy of the app, execute the work there, and return the results:
45 | 
46 | ```elixir
47 | def generate_thumbnails(%Video{} = vid, interval) do
48 |   FLAME.call(MyApp.FFMpegRunner, fn ->
49 |     # I'm runner on a short-lived, temporary server
50 |     tmp_dir = Path.join(System.tmp_dir!(), Ecto.UUID.generate())
51 |     File.mkdir!(tmp_dir)
52 |     System.cmd("ffmpeg", ~w(-i #{vid.url} -vf fps=1/#{interval} #{tmp_dir}/%02d.png))
53 |     urls = VideoStore.put_thumbnails(vid, Path.wildcard(tmp_dir <> "/*.png"))
54 |     Repo.insert_all(Thumbnail, Enum.map(urls, &%{video_id: vid.id, url: &1}))
55 |   end)
56 | end
57 | ```
58 | 
59 | Here we wrapped up our CPU expensive `ffmpeg` operation in a `FLAME.call/2`. FLAME accepts a function and any variables that the function closes over. In this example, the `%Video{}` struct and `interval` are passed along automatically. The work happens in a temporary copy of the app. We can do any work inside the FLAME call because we are running the *entire application*, database connection(s) and all.
60 | 
61 | `FLAME` provides the following interfaces for elastically scaled operations:
62 | 
63 |   * `FLAME.call/3` - used for synchronous calls
64 |   * `FLAME.cast/3` - used for async casts where you don't need to wait on the results
65 |   * `FLAME.place_child/3` – used for placing a child spec somewhere to run, in place of `DynamicSupervisor.start_child`, `Task.Supervisor.start_child`, etc
66 | 
67 | The `FLAME.Pool` handles elastically scaling runners up and down, as well as remote monitoring of resources. Check the moduledoc for example usage.
68 | 


--------------------------------------------------------------------------------
/lib/application.ex:
--------------------------------------------------------------------------------
 1 | defmodule FLAME.Application do
 2 |   @moduledoc false
 3 |   use Application
 4 | 
 5 |   @impl true
 6 |   def start(_type, _args) do
 7 |     opts = Application.get_env(:flame, :terminator, [])
 8 |     shutdown = Keyword.get(opts, :shutdown_timeout, 30_000)
 9 | 
10 |     opts = Keyword.put(opts, :name, FLAME.Terminator)
11 | 
12 |     children = [
13 |       Supervisor.child_spec({FLAME.Terminator, opts}, shutdown: shutdown)
14 |     ]
15 | 
16 |     opts = [strategy: :one_for_one, name: FLAME.Supervisor]
17 |     Supervisor.start_link(children, opts)
18 |   end
19 | end
20 | 


--------------------------------------------------------------------------------
/lib/flame.ex:
--------------------------------------------------------------------------------
  1 | defmodule FLAME do
  2 |   @moduledoc ~S"""
  3 |   FLAME remotely executes your application code on ephemeral nodes.
  4 | 
  5 |   FLAME allows you to scale your application operations on a granular
  6 |   level **without rewriting your code**. For example, imagine the following function
  7 |   in your application that transcodes a video, saves the result to video storage,
  8 |   and updates the database:
  9 | 
 10 |       def resize_video_quality(%Video{} = vid) do
 11 |         path = "#{vid.id}_720p.mp4"
 12 |         System.cmd("ffmpeg", ~w(-i #{vid.url} -s 720x480 -c:a copy #{path}))
 13 |         VideoStore.put_file!("videos/#{path}", path)
 14 |         {1, _} = Repo.update_all(from v in Video, where v.id == ^vid.id, set: [file_720p: path])
 15 |         {:ok, path}
 16 |       end
 17 | 
 18 |   This works great locally and in production under no load, but video transcoding
 19 |   is necessarily an expensive CPU bound operation. In production, only a
 20 |   few concurrent users can saturate your CPU and cause your entire application,
 21 |   web requests, etc, to come to crawl. This is where folks typically reach for
 22 |   FaaS or external service solutions, but FLAME gives you a better way.
 23 | 
 24 |   Simply wrap your existing code in a FLAME function and it will be executed
 25 |   on a newly spawned, ephemeral node. Using Elixir and Erlang's built-in distribution
 26 |   features, entire function closures, including any state they close over, can be sent
 27 |   and executed on a remote node:
 28 | 
 29 |       def resize_video_quality(%Video{} = vid) do
 30 |         FLAME.call(MyApp.FFMpegRunner, fn ->
 31 |           path = "#{vid.id}_720p.mp4"
 32 |           System.cmd("ffmpeg", ~w(-i #{vid.url} -s 720x480 -c:a copy #{path}))
 33 |           VideoStore.put_file!("videos/#{path}", path)
 34 |           {1, _} = Repo.update_all(from v in Video, where v.id == ^vid.id, set: [file_720p: path])
 35 |           {:ok, path}
 36 |         end)
 37 |       end
 38 | 
 39 |   That's it! The `%Video{}` struct in this example is captured inside the function
 40 |   and everything executes on the remotely spawned node, returning the result back to the
 41 |   parent node when it completes. Repo calls Just Work because the new node booted
 42 |   your entire application, including the database Repo. As soon as the function is done
 43 |   executing, the ephemeral node is terminated. This means you can elastically scale
 44 |   your app as load increases, and only pay for the resources you need at the time.
 45 | 
 46 |   To support your FLAME calls, you'll need to add a named `FLAME.Pool` to your
 47 |   application's supervision tree, which we'll discuss next.
 48 | 
 49 |   ## Pools
 50 | 
 51 |   A `FLAME.Pool` provides elastic runner scaling, allowing a minimum and
 52 |   maximum number of runners to be configured, and idled down as load decreases.
 53 | 
 54 |   Pools give you elastic scale that maximizes the newly spawned hardware.
 55 |   At the same time, you also want to avoid spawning unbound resources. You also
 56 |   want to keep spawned nodes alive for a period of time to avoid the overhead
 57 |   of booting new ones before idling them down. The following pool configuration
 58 |   takes care of all of this for you:
 59 | 
 60 |       children = [
 61 |         ...,
 62 |         {FLAME.Pool,
 63 |          name: MyApp.FFMpegRunner,
 64 |          min: 0,
 65 |          max: 10,
 66 |          max_concurrency: 5,
 67 |          idle_shutdown_after: 30_000},
 68 |       ]
 69 | 
 70 |   Here we add a `FLAME.Pool` to our application supervision tree, configuring
 71 |   a minimum of 0 and maximum of 10 runners. This achieves "scale to zero" behavior
 72 |   while also allowing the pool to scale up to 10 runners when load increases.
 73 |   Each runner in the case will be able to execute up to 5 concurrent functions.
 74 |   The runners will shut down after 30 seconds of inactivity.
 75 | 
 76 |   Calling a pool is as simple as passing its name to the FLAME functions:
 77 | 
 78 |       FLAME.call(MyApp.FFMpegRunner, fn -> :operation1 end)
 79 | 
 80 |   You'll also often want to enable or disable other application services based on whether
 81 |   your application is being started as child FLAME runner or being run directly.
 82 |   See the next `Deployment Considerations` section below for details.
 83 | 
 84 |   ## Deployment Considerations
 85 | 
 86 |   FLAME nodes effectively clone and start your entire application. This is great
 87 |   because all application services and dependencies are ready to go and be used to
 88 |   support your FLAME calls; however, You'll also often want to enable or disable
 89 |   services based on whether your node is running as a FLAME child or not.
 90 |   For example, there's usually no need to serve your Phoenix endpoint within a FLAME.
 91 |   You also likely only need a single or small number of database connections instead of
 92 |   your existing pool size.
 93 | 
 94 |   To accomplish these you can use `FLAME.Parent.get/0` to conditionally enable or
 95 |   disable processes in your `application.ex` file:
 96 | 
 97 |       def start(_type, _args) do
 98 |         flame_parent = FLAME.Parent.get()
 99 | 
100 |         children = [
101 |           ...,
102 |           {FLAME.Pool,
103 |            name: Thumbs.FFMpegRunner,
104 |            min: 0,
105 |            max: 10,
106 |            max_concurrency: 5,
107 |            idle_shutdown_after: 30_000},
108 |         !flame_parent && ThumbsWeb.Endpoint
109 |         ]
110 |         |> Enum.filter(& &1)
111 | 
112 |         opts = [strategy: :one_for_one, name: Thumbs.Supervisor]
113 |         Supervisor.start_link(children, opts)
114 |       end
115 | 
116 |   Here we filter the Phoenix endpoint from being started when running as a FLAME
117 |   child because we have no need to handle web requests in this case.
118 | 
119 |   Or you can use `FLAME.Parent.get/0` to configure your database pool size:
120 | 
121 |       pool_size =
122 |         if FLAME.Parent.get() do
123 |           1
124 |         else
125 |           String.to_integer(System.get_env("POOL_SIZE") || "10")
126 |         end
127 | 
128 |       config :thumbs, Thumbs.Repo,
129 |         ...,
130 |         pool_size: pool_size
131 | 
132 |   ## Backends
133 | 
134 |   The `FLAME.Backend` behavior defines an interface for spawning remote
135 |   application nodes and sending functions to them. By default, the
136 |   `FLAME.LocalBackend` is used, which is great for development and test
137 |   environments, as you can have your code simply execute locally in most cases
138 |   and worry about scaling the operation only in production.
139 | 
140 |   For production, FLAME provides the `FLAME.FlyBackend`, which uses
141 |   [Fly.io](https://fly.io). Because Fly deploys a containerized machine of
142 |   your application, a single Fly API call can boot a machine running your
143 |   exact Docker deployment image, allowing closures to be executed across
144 |   distributed nodes.
145 | 
146 |   Default backends can be configured in your `config/runtime.exs`:
147 | 
148 |       if config_env() == :prod do
149 |         config :flame, :backend, FLAME.FlyBackend
150 |         config :flame, FLAME.FlyBackend, token: System.fetch_env!("FLY_API_TOKEN")
151 |         ...
152 |       end
153 | 
154 |   ## Termination and remote links
155 | 
156 |   FLAME runs a termination process to allow remotely spawned functions time to
157 |   complete before the node is terminated. This process is started automatically
158 |   with the library. The shutdown timeout by default is 30s, but can be configured
159 |   in your application configuration, such as `config/runtime.exs`:
160 | 
161 |       config :flame, :terminator, shutdown_timeout: :timer.seconds(10)
162 | 
163 |   *Note*: By default `call/3`, `cast/3`, and `place_child/3` will link the caller
164 |   to the remote process to prevent orphaned resources when the caller or the caller's node
165 |   is terminated. This can be disabled by passing `link: false` to the options, which is
166 |   useful for cases where you want to allow long-running work to complete within the
167 |   `:shutdown_timeout` of the remote runner, regardless of what happens to the parent caller
168 |   process and/or the parent caller node, such as a new cold deploy, a caller crash, etc.
169 |   """
170 |   require Logger
171 | 
172 |   @doc """
173 |   Calls a function in a remote runner for the given `FLAME.Pool`.
174 | 
175 |   ## Options
176 | 
177 |     * `:timeout` - The timeout the caller is willing to wait for a response before an
178 |       exit with `:timeout`. Defaults to the configured timeout of the pool.
179 |       The executed function will also be terminated on the remote flame if
180 |       the timeout is reached.
181 | 
182 |     * `:link` – Whether the caller should be linked to the remote call process
183 |       to prevent long-running orphaned resources. Defaults to `true`. Set to `false` to
184 |       support long-running work that you want to complete within the `:shutdown_timeout`
185 |       of the remote runner, even when the parent process or node is terminated.
186 |       *Note*: even when `link: false` is used, an exit in the remote process will raise
187 |       an error on the caller. The caller will need to try/catch the call if they wish
188 |       to handle the error.
189 | 
190 |     * `:track_resources` - When true, traverses the returned result looking for
191 |       resources that implement the `FLAME.Trackable` protocol and make sure the
192 |       FLAME node does not terminate until the tracked resources are removed.
193 | 
194 |   ## Examples
195 | 
196 |       def my_expensive_thing(arg) do
197 |         FLAME.call(MyApp.Runner, fn ->
198 |           # I'm now doing expensive work inside a new node
199 |           # pubsub and repo access all just work
200 |           Phoenix.PubSub.broadcast(MyApp.PubSub, "topic", result)
201 | 
202 |           # can return awaitable results back to caller
203 |           result
204 |         end)
205 |       end
206 | 
207 |   When the caller exits, the remote runner will be terminated.
208 |   """
209 |   def call(pool, func, opts \\ [])
210 |       when is_atom(pool) and is_function(func, 0) and is_list(opts) do
211 |     FLAME.Pool.call(pool, func, opts)
212 |   end
213 | 
214 |   @doc """
215 |   Casts a function to a remote runner for the given `FLAME.Pool`.
216 | 
217 |   ## Options
218 | 
219 |     * `:link` – Whether the caller should be linked to the remote cast process
220 |       to prevent long-running orphaned resources. Defaults to `true`. Set to `false` to
221 |       support long-running work that you want to complete within the `:shutdown_timeout`
222 |       of the remote runner, even when the parent process or node is terminated.
223 |   """
224 |   def cast(pool, func, opts \\ [])
225 |       when is_atom(pool) and is_function(func, 0) and is_list(opts) do
226 |     FLAME.Pool.cast(pool, func, opts)
227 |   end
228 | 
229 |   @doc """
230 |   Places a child process on a remote runner for the given `FLAME.Pool`.
231 | 
232 |   Any child process can be placed on the remote node and it will occupy a space
233 |   in the runner's `max_concurrency` allowance. This is useful for long running
234 |   workloads that you want to run asynchronously from the parent caller.
235 | 
236 |   *Note*: The placed child process is linked to the caller and will only survive
237 |   as long as the caller does. This is to ensure that the child process is never
238 |   orphaned permanently on the remote node.
239 | 
240 |   *Note*: The child spec will be rewritten to use a temporary restart strategy
241 |   to ensure that the child process is never restarted on the remote node when it
242 |   exits. If you want restart behavior, you need to monitor on the parent node and
243 |   replace the child yourself.
244 | 
245 |   ## Options
246 | 
247 |     * `:timeout` - The timeout the caller is willing to wait for a response before an
248 |       exit with `:timeout`. Defaults to the configured timeout of the pool.
249 |       The executed function will also be terminated on the remote flame if
250 |       the timeout is reached.
251 | 
252 |     * `:link` – Whether the caller should be linked to the remote child process
253 |       to prevent long-running orphaned resources. Defaults to `true`. Set to `false` to
254 |       support long-running work that you want to complete within the `:shutdown_timeout`
255 |       of the remote runner, even when the parent process or node is terminated.
256 | 
257 |   Accepts any child spec.
258 | 
259 |   ## Examples
260 | 
261 |       {:ok, pid} = FLAME.place_child(MyRunner, {MyWorker, []})
262 |   """
263 |   def place_child(pool, child_spec, opts \\ []) when is_atom(pool) and is_list(opts) do
264 |     FLAME.Pool.place_child(pool, child_spec, opts)
265 |   end
266 | 
267 |   @doc """
268 |   Callback invoked to recursively track resources
269 |   on a given node.
270 | 
271 |   Sometimes we may want to allocate long lived resources
272 |   in a FLAME but, because FLAME nodes are temporary, the
273 |   node would terminate shortly after. The `:track_resources`
274 |   option tells `FLAME` to look for resources which implement
275 |   the `FLAME.Trackable` protocol. Those resources can then
276 |   spawn PIDs in the remote node and tell FLAME to track them.
277 |   Once all PIDs terminate, the FLAME node will terminate too.
278 | 
279 |   The `data` is any data type, `acc` is a list of PIDs
280 |   (typicalling starts as an empty list), and the `node`
281 |   we have received the resources from. See `FLAME.Trackable`
282 |   for customization.
283 |   """
284 |   def track_resources(data, acc, node)
285 | 
286 |   def track_resources(tuple, acc, node) when is_tuple(tuple) do
287 |     {list, acc} = tuple |> Tuple.to_list() |> track_resources(acc, node)
288 |     {List.to_tuple(list), acc}
289 |   end
290 | 
291 |   def track_resources(list, acc, node) when is_list(list) do
292 |     Enum.map_reduce(list, acc, &track_resources(&1, &2, node))
293 |   end
294 | 
295 |   def track_resources(%_{} = other, acc, node) do
296 |     FLAME.Trackable.track(other, acc, node)
297 |   end
298 | 
299 |   def track_resources(%{} = map, acc, node) do
300 |     {pairs, acc} =
301 |       Enum.map_reduce(map, acc, fn {k, v}, acc ->
302 |         {k, acc} = track_resources(k, acc, node)
303 |         {v, acc} = track_resources(v, acc, node)
304 |         {{k, v}, acc}
305 |       end)
306 | 
307 |     {Map.new(pairs), acc}
308 |   end
309 | 
310 |   def track_resources(other, acc, _node) do
311 |     {other, acc}
312 |   end
313 | end
314 | 


--------------------------------------------------------------------------------
/lib/flame/backend.ex:
--------------------------------------------------------------------------------
 1 | defmodule FLAME.Backend do
 2 |   @moduledoc """
 3 |   Defines a behavior for a FLAME backend.
 4 | 
 5 |   A FLAME backend is responsible for booting remote compute resources,
 6 |   connecting them back to the parent node, and executing functions on them.
 7 | 
 8 |   The default `FLAME.LocalBackend` simply runs your code locally, allowing
 9 |   you to develop and test your application using `FLAME.call/3` without
10 |   running an external backend.
11 | 
12 |   ## Messaging
13 | 
14 |   The `FLAME.Terminator` process runs on remote nodes automatically and is
15 |   responsible for connecting back to the parent node, notifying the parent, and
16 |   handling termination of remote processes started via `FLAME.call/3`, `FLAME.cast/3`,
17 |   and `FLAME.place_child/3`. When the terminator starts on a newly booted remote
18 |   node, it sends the following message to the parent runner process:
19 | 
20 |       {ref, {:remote_up, remote_terminator_pid}}
21 | 
22 |   Where ref is the reference generated by the backend and encoded into the
23 |   `FLAME.Parent.encode/1` string.
24 | 
25 |   When the remote terminator is going away gracefully, it sends the following message:
26 | 
27 |       {ref, {:remote_shutdown, :idle}}
28 | 
29 |   Backend implementations can react to these messages to handle the remotely
30 |   provisioned instance booting up or shutting down.
31 | 
32 |   See `FLAME.FlyBackend` for an example implementation of this behavior.
33 |   """
34 |   @callback init(opts :: Keyword.t()) :: {:ok, state :: term()} | {:error, term()}
35 |   @callback remote_spawn_monitor(state :: term, func :: function() | term) ::
36 |               {:ok, {pid, reference()}} | {:error, reason :: term}
37 |   @callback system_shutdown() :: no_return()
38 |   @callback remote_boot(state :: term) ::
39 |               {:ok, remote_terminator_pid :: pid(), new_state :: term} | {:error, term}
40 |   @callback handle_info(msg :: term, state :: term) ::
41 |               {:noreply, new_state :: term} | {:stop, term, new_state :: term}
42 | 
43 |   @optional_callbacks handle_info: 2
44 | 
45 |   def init(opts), do: impl().init(opts)
46 | 
47 |   def remote_spawn_monitor(state, func) do
48 |     impl().remote_spawn_monitor(state, func)
49 |   end
50 | 
51 |   def system_shutdown do
52 |     impl().system_shutdown()
53 |   end
54 | 
55 |   def remote_boot(state) do
56 |     impl().remote_boot(state)
57 |   end
58 | 
59 |   def handle_info(msg, state) do
60 |     impl().handle_info(msg, state)
61 |   end
62 | 
63 |   def impl, do: Application.get_env(:flame, :backend, FLAME.LocalBackend)
64 | end
65 | 


--------------------------------------------------------------------------------
/lib/flame/code_sync.ex:
--------------------------------------------------------------------------------
  1 | defmodule FLAME.CodeSync.PackagedStream do
  2 |   @moduledoc false
  3 |   defstruct stream: nil,
  4 |             id: nil,
  5 |             extract_dir: nil,
  6 |             tmp_dir: nil,
  7 |             apps_to_start: [],
  8 |             changed_paths: [],
  9 |             sync_beam_hashes: %{},
 10 |             deleted_paths: [],
 11 |             purge_modules: [],
 12 |             verbose: false,
 13 |             compress: false,
 14 |             chunk_size: 64_000
 15 | end
 16 | 
 17 | defmodule FLAME.CodeSync do
 18 |   @moduledoc false
 19 |   require Logger
 20 | 
 21 |   alias FLAME.CodeSync
 22 |   alias FLAME.CodeSync.PackagedStream
 23 | 
 24 |   defstruct id: nil,
 25 |             get_path: nil,
 26 |             sync_beam_hashes: %{},
 27 |             copy_apps: nil,
 28 |             copy_paths: nil,
 29 |             sync_beams: nil,
 30 |             extract_dir: nil,
 31 |             tmp_dir: nil,
 32 |             start_apps: true,
 33 |             apps_to_start: [],
 34 |             changed_paths: [],
 35 |             deleted_paths: [],
 36 |             purge_modules: [],
 37 |             verbose: false,
 38 |             compress: false,
 39 |             chunk_size: 64_000
 40 | 
 41 |   def new(opts \\ []) do
 42 |     Keyword.validate!(opts, [
 43 |       :get_path,
 44 |       :tmp_dir,
 45 |       :extract_dir,
 46 |       :copy_apps,
 47 |       :copy_paths,
 48 |       :sync_beams,
 49 |       :start_apps,
 50 |       :verbose,
 51 |       :compress,
 52 |       :chunk_size
 53 |     ])
 54 | 
 55 |     start_apps = Keyword.get(opts, :start_apps, true)
 56 | 
 57 |     compute_start_apps(%CodeSync{
 58 |       id: System.unique_integer([:positive]),
 59 |       get_path: Keyword.get(opts, :get_path, &:code.get_path/0),
 60 |       start_apps: start_apps,
 61 |       copy_apps: Keyword.get(opts, :copy_apps, start_apps),
 62 |       copy_paths: Keyword.get(opts, :copy_paths, false),
 63 |       sync_beams: Keyword.get(opts, :sync_beams, []),
 64 |       tmp_dir: Keyword.get(opts, :tmp_dir, {System, :tmp_dir!, []}),
 65 |       extract_dir: Keyword.get(opts, :extract_dir, {Function, :identity, ["/"]}),
 66 |       verbose: Keyword.get(opts, :verbose, false),
 67 |       compress: Keyword.get(opts, :compress, true),
 68 |       chunk_size: Keyword.get(opts, :chunk_size, 64_000)
 69 |     })
 70 |   end
 71 | 
 72 |   defp compute_start_apps(%CodeSync{} = code) do
 73 |     apps_to_start =
 74 |       case code.start_apps do
 75 |         true ->
 76 |           Enum.map(Application.started_applications(), fn {app, _desc, _vsn} -> app end)
 77 | 
 78 |         false ->
 79 |           []
 80 | 
 81 |         apps when is_list(apps) ->
 82 |           apps
 83 |       end
 84 | 
 85 |     %{code | apps_to_start: apps_to_start}
 86 |   end
 87 | 
 88 |   def compute_sync_beams(%CodeSync{} = code) do
 89 |     sync_beams_files = lookup_sync_beams_files(code.sync_beams)
 90 | 
 91 |     beam_hashes =
 92 |       for path <- sync_beams_files,
 93 |           into: %{},
 94 |           do: {path, :erlang.md5(File.read!(path))}
 95 | 
 96 |     %{
 97 |       code
 98 |       | sync_beam_hashes: beam_hashes,
 99 |         changed_paths: Enum.uniq(code.changed_paths ++ sync_beams_files)
100 |     }
101 |   end
102 | 
103 |   def compute_changed_paths(%CodeSync{} = code) do
104 |     copy_apps =
105 |       case code.copy_apps do
106 |         true -> lookup_apps_files(code)
107 |         false -> []
108 |       end
109 | 
110 |     changed_paths =
111 |       case code.copy_paths do
112 |         paths when is_list(paths) ->
113 |           Enum.uniq(lookup_copy_paths_files(paths) ++ copy_apps)
114 | 
115 |         false ->
116 |           copy_apps
117 | 
118 |         true ->
119 |           IO.warn(
120 |             "copy_paths: true is deprecated. Passing start_apps: true, now automatically copies all apps. \n" <>
121 |               "You can also pass copy_apps: true to copy all apps without starting them."
122 |           )
123 | 
124 |           lookup_apps_files(code)
125 |       end
126 | 
127 |     %{code | changed_paths: Enum.uniq(code.changed_paths ++ changed_paths)}
128 |   end
129 | 
130 |   def changed?(%CodeSync{} = code) do
131 |     code.changed_paths != [] or code.deleted_paths != [] or code.purge_modules != []
132 |   end
133 | 
134 |   def diff(%CodeSync{sync_beam_hashes: prev_hashes} = prev) do
135 |     current =
136 |       prev
137 |       |> compute_start_apps()
138 |       |> compute_sync_beams()
139 | 
140 |     changed =
141 |       for kv <- current.sync_beam_hashes,
142 |           {path, current_hash} = kv,
143 |           current_hash != prev_hashes[path],
144 |           do: path
145 | 
146 |     deleted_paths =
147 |       for kv <- prev.sync_beam_hashes,
148 |           {path, _prev_hash} = kv,
149 |           not Map.has_key?(current.sync_beam_hashes, path),
150 |           do: path
151 | 
152 |     module_to_purge =
153 |       for path <- deleted_paths,
154 |           do: path |> Path.basename(".beam") |> String.to_atom()
155 | 
156 |     %{
157 |       current
158 |       | changed_paths: changed,
159 |         deleted_paths: deleted_paths,
160 |         purge_modules: module_to_purge,
161 |         apps_to_start: []
162 |     }
163 |   end
164 | 
165 |   def package_to_stream(%CodeSync{} = code) do
166 |     compressed = if code.compress, do: [:compressed], else: []
167 | 
168 |     verbose =
169 |       if code.verbose do
170 |         if !Enum.empty?(code.changed_paths),
171 |           do: log_verbose("packaging changed_paths: #{inspect(code.changed_paths)}")
172 | 
173 |         if !Enum.empty?(code.apps_to_start),
174 |           do: log_verbose("sending apps_to_start: #{inspect(code.apps_to_start)}")
175 | 
176 |         [:verbose]
177 |       else
178 |         []
179 |       end
180 | 
181 |     out_stream =
182 |       if code.changed_paths != [] do
183 |         out_path = Path.join([mfa(code.tmp_dir), "flame_parent_code_sync_#{code.id}.tar.gz"])
184 |         dirs = for path <- code.changed_paths, uniq: true, do: String.to_charlist(path)
185 |         {:ok, tar} = :erl_tar.open(out_path, [:write] ++ compressed)
186 | 
187 |         for dir <- dirs,
188 |             do: :erl_tar.add(tar, dir, trim_leading_slash(dir), [:dereference | verbose])
189 | 
190 |         :ok = :erl_tar.close(tar)
191 | 
192 |         if code.verbose do
193 |           log_verbose("packaged size: #{File.stat!(out_path).size / (1024 * 1024)}mb")
194 |         end
195 | 
196 |         # TODO: Change to File.stream!(out_path, code.chunk_size) once we require Elixir v1.16+
197 |         File.stream!(out_path, [], code.chunk_size)
198 |       end
199 | 
200 |     %PackagedStream{
201 |       id: code.id,
202 |       tmp_dir: code.tmp_dir,
203 |       extract_dir: code.extract_dir,
204 |       sync_beam_hashes: code.sync_beam_hashes,
205 |       changed_paths: code.changed_paths,
206 |       deleted_paths: code.deleted_paths,
207 |       purge_modules: code.purge_modules,
208 |       apps_to_start: code.apps_to_start,
209 |       stream: out_stream,
210 |       verbose: code.verbose,
211 |       compress: code.compress,
212 |       chunk_size: code.chunk_size
213 |     }
214 |   end
215 | 
216 |   defp trim_leading_slash([?/ | path]), do: path
217 |   defp trim_leading_slash([_ | _] = path), do: path
218 | 
219 |   def extract_packaged_stream(%PackagedStream{} = pkg) do
220 |     extract_dir =
221 |       if pkg.stream do
222 |         verbose = if pkg.verbose, do: [:verbose], else: []
223 |         compressed = if pkg.compress, do: [:compressed], else: []
224 |         extract_dir = mfa(pkg.extract_dir)
225 |         target_tmp_path = Path.join([mfa(pkg.tmp_dir), "flame_child_code_sync_#{pkg.id}.tar.gz"])
226 | 
227 |         flame_stream = File.stream!(target_tmp_path)
228 |         Enum.into(pkg.stream, flame_stream)
229 | 
230 |         :ok = :erl_tar.extract(target_tmp_path, [{:cwd, extract_dir}] ++ compressed ++ verbose)
231 |         :ok = add_code_paths_from_tar(pkg, extract_dir)
232 | 
233 |         File.rm(target_tmp_path)
234 | 
235 |         # purge any deleted modules
236 |         for mod <- pkg.purge_modules do
237 |           if pkg.verbose && !Enum.empty?(pkg.purge_modules),
238 |             do: log_verbose("purging #{inspect(pkg.purge_modules)}")
239 | 
240 |           :code.purge(mod)
241 |           :code.delete(mod)
242 |         end
243 | 
244 |         # delete any deleted code paths, and prune empty dirs
245 |         for del_path <- pkg.deleted_paths do
246 |           File.rm(del_path)
247 |           ebin_dir = Path.dirname(del_path)
248 | 
249 |           if File.ls!(ebin_dir) == [] do
250 |             if pkg.verbose, do: log_verbose("deleting path #{ebin_dir}")
251 |             File.rm_rf(ebin_dir)
252 |             :code.del_path(String.to_charlist(ebin_dir))
253 |           end
254 |         end
255 | 
256 |         extract_dir
257 |       end
258 | 
259 |     # start any synced apps
260 |     if !Enum.empty?(pkg.apps_to_start) do
261 |       {:ok, started} = Application.ensure_all_started(pkg.apps_to_start)
262 |       if pkg.verbose, do: log_verbose("started #{inspect(started)}")
263 |     end
264 | 
265 |     extract_dir
266 |   end
267 | 
268 |   def rm_packaged_stream(%PackagedStream{} = pkg) do
269 |     if pkg.stream, do: File.rm(pkg.stream.path)
270 |     :ok
271 |   end
272 | 
273 |   defp lookup_sync_beams_files(paths) do
274 |     paths
275 |     |> Enum.flat_map(&Path.wildcard(Path.join(&1, "**/*.beam")))
276 |     |> Enum.uniq()
277 |   end
278 | 
279 |   defp lookup_apps_files(%CodeSync{get_path: get_path}) do
280 |     otp_lib = to_string(:code.lib_dir())
281 | 
282 |     reject_apps =
283 |       for app <- [:flame, :eex, :elixir, :ex_unit, :iex, :logger, :mix],
284 |           lib_dir = :code.lib_dir(app),
285 |           is_list(lib_dir),
286 |           do: to_string(:filename.join(lib_dir, ~c"ebin"))
287 | 
288 |     get_path.()
289 |     |> Enum.map(&to_string/1)
290 |     |> Kernel.--(["." | reject_apps])
291 |     |> Stream.reject(fn path -> String.starts_with?(path, otp_lib) end)
292 |     |> Stream.map(fn parent_dir ->
293 |       # include ebin's parent if basename is ebin (will include priv)
294 |       case Path.basename(parent_dir) do
295 |         "ebin" -> Path.join(Path.dirname(parent_dir), "**/*")
296 |         _ -> Path.join(parent_dir, "*")
297 |       end
298 |     end)
299 |     |> Stream.uniq()
300 |     |> Stream.flat_map(fn glob -> Path.wildcard(glob) end)
301 |     |> Stream.uniq()
302 |     |> Enum.filter(fn path -> File.regular?(path, [:raw]) end)
303 |   end
304 | 
305 |   defp lookup_copy_paths_files(paths) do
306 |     paths
307 |     |> Stream.map(fn parent_dir ->
308 |       if File.regular?(parent_dir, [:raw]) do
309 |         parent_dir
310 |       else
311 |         Path.join(parent_dir, "*")
312 |       end
313 |     end)
314 |     |> Stream.uniq()
315 |     |> Stream.flat_map(fn glob -> Path.wildcard(glob) end)
316 |     |> Stream.uniq()
317 |     |> Enum.filter(fn path -> File.regular?(path, [:raw]) end)
318 |   end
319 | 
320 |   defp add_code_paths_from_tar(%PackagedStream{} = pkg, extract_dir) do
321 |     init = {_consolidated = [], _regular = [], _beams = [], _reload = [], _seen = MapSet.new()}
322 | 
323 |     Enum.reduce(pkg.changed_paths, init, fn rel_path, {cons, reg, beams, reload, seen} ->
324 |       new_seen = MapSet.put(seen, rel_path)
325 |       dir = extract_dir |> Path.join(rel_path) |> Path.dirname()
326 | 
327 |       new_reload =
328 |         case rel_path |> Path.basename() |> String.split(".beam") do
329 |           [mod_str, ""] ->
330 |             mod = Module.concat([mod_str])
331 |             :code.purge(mod)
332 |             :code.delete(mod)
333 |             [mod | reload]
334 | 
335 |           _ ->
336 |             reload
337 |         end
338 | 
339 |       cond do
340 |         # purge consolidated protocols
341 |         # we only need to track new reloads for protocols as other module
342 |         # references will reload on demand
343 |         MapSet.member?(seen, rel_path) ->
344 |           {cons, reg, beams, new_reload, seen}
345 | 
346 |         Path.basename(dir) == "consolidated" ->
347 |           {[dir | cons], reg, beams, new_reload, new_seen}
348 | 
349 |         pkg.sync_beam_hashes[rel_path] ->
350 |           {cons, reg, [dir | beams], reload, new_seen}
351 | 
352 |         true ->
353 |           {cons, [dir | reg], beams, reload, new_seen}
354 |       end
355 |     end)
356 |     |> then(fn {consolidated, regular, sync_beams, reload, _seen} ->
357 |       # paths already in reverse order, which is what we want for prepend
358 |       if pkg.verbose do
359 |         if !Enum.empty?(consolidated),
360 |           do: log_verbose("prepending consolidated paths: #{inspect(consolidated)}")
361 | 
362 |         if !Enum.empty?(regular),
363 |           do: log_verbose("appending code paths: #{inspect(regular)}")
364 | 
365 |         if !Enum.empty?(sync_beams),
366 |           do: log_verbose("reloading code paths: #{inspect(sync_beams)}")
367 |       end
368 | 
369 |       Code.prepend_paths(regular, cache: true)
370 |       Code.prepend_paths(consolidated, cache: true)
371 |       # don't cache for sync_beams
372 |       Code.prepend_paths(sync_beams)
373 | 
374 |       if pkg.verbose && !Enum.empty?(reload), do: log_verbose("reloading #{inspect(reload)}")
375 |       for mod <- reload, do: :code.load_file(mod)
376 | 
377 |       :ok
378 |     end)
379 |   end
380 | 
381 |   defp log_verbose(msg) do
382 |     Logger.info("[CodeSync #{inspect(node())}] #{msg}")
383 |   end
384 | 
385 |   defp mfa({mod, func, args}), do: apply(mod, func, args)
386 | end
387 | 


--------------------------------------------------------------------------------
/lib/flame/fly_backend.ex:
--------------------------------------------------------------------------------
  1 | defmodule FLAME.FlyBackend do
  2 |   @moduledoc """
  3 |   A `FLAME.Backend` using [Fly.io](https://fly.io) machines.
  4 | 
  5 |   The only required configuration is telling FLAME to use the
  6 |   `FLAME.FlyBackend` by default and the `:token` which is your Fly.io API
  7 |   token. These can be set via application configuration in your `config/runtime.exs`
  8 |   withing a `:prod` block:
  9 | 
 10 |       if config_env() == :prod do
 11 |         config :flame, :backend, FLAME.FlyBackend
 12 |         config :flame, FLAME.FlyBackend, token: System.fetch_env!("FLY_API_TOKEN")
 13 |         ...
 14 |       end
 15 | 
 16 |   To set your `FLY_API_TOKEN` secret, you can run the following commands locally:
 17 | 
 18 |   ```bash
 19 |   $ fly secrets set FLY_API_TOKEN="$(fly auth token)"
 20 |   ```
 21 | 
 22 |   The following backend options are supported, and mirror the
 23 |   [Fly.io machines create API](https://fly.io/docs/machines/api/machines-resource/#machine-config-object-properties):
 24 | 
 25 |   * `:cpu_kind` - The size of the runner CPU. Defaults to `"performance"`.
 26 | 
 27 |   * `:cpus` - The number of runner CPUs. Defaults to `System.schedulers_online()`
 28 |     for the number of cores of the running parent app.
 29 | 
 30 |   * `:memory_mb` - The memory of the runner. Must be a 1024 multiple. Defaults to `4096`.
 31 | 
 32 |   * `:gpu_kind` - The type of GPU reservation to make.
 33 | 
 34 |   * `:gpus` - The number of runner GPUs. Defaults to `1` if `:gpu_kind` is set.
 35 | 
 36 |   * `:boot_timeout` - The boot timeout. Defaults to `30_000`.
 37 | 
 38 |   * `:app` – The name of the otp app. Defaults to `System.get_env("FLY_APP_NAME")`,
 39 | 
 40 |   * `:image` – The URL of the docker image to pass to the machines create endpoint.
 41 |     Defaults to `System.get_env("FLY_IMAGE_REF")` which is the image of your running app.
 42 | 
 43 |   * `:token` – The Fly API token. Defaults to `System.get_env("FLY_API_TOKEN")`.
 44 | 
 45 |   * `:host` – The host of the Fly API. Defaults to `"https://api.machines.dev"`.
 46 | 
 47 |   * `:init` – The init object to pass to the machines create endpoint. Defaults to `%{}`.
 48 |     Possible values include:
 49 | 
 50 |       * `:cmd` – list of strings for the command
 51 |       * `:entrypoint` – list strings for the entrypoint command
 52 |       * `:exec` – list of strings for the exec command
 53 |       * `:kernel_args` - list of strings
 54 |       * `:swap_size_mb` – integer value in megabytes for the swap size
 55 |       * `:tty` – boolean
 56 | 
 57 |   * `:services` - The optional services to run on the machine. Defaults to `[]`.
 58 | 
 59 |   * `:metadata` - The optional map of metadata to set for the machine. Defaults to `%{}`.
 60 | 
 61 |   ## Environment Variables
 62 | 
 63 |   The FLAME Fly machines *do not* inherit the environment variables of the parent.
 64 |   You must explicit provide the environment that you would like to forward to the
 65 |   machine. For example, if your FLAME's are starting your Ecto repos, you can copy
 66 |   the env from the parent:
 67 | 
 68 |   ```elixir
 69 |   config :flame, FLAME.FlyBackend,
 70 |     token: System.fetch_env!("FLY_API_TOKEN"),
 71 |     env: %{
 72 |       "DATABASE_URL" => System.fetch_env!("DATABASE_URL"),
 73 |       "POOL_SIZE" => "1"
 74 |     }
 75 |   ```
 76 | 
 77 |   Or pass the env to each pool:
 78 | 
 79 |   ```elixir
 80 |   {FLAME.Pool,
 81 |     name: MyRunner,
 82 |     backend: {FLAME.FlyBackend, env: %{"DATABASE_URL" => System.fetch_env!("DATABASE_URL")}}
 83 |   }
 84 |   ```
 85 |   """
 86 |   @behaviour FLAME.Backend
 87 | 
 88 |   alias FLAME.FlyBackend
 89 |   alias FLAME.Parser.JSON
 90 | 
 91 |   require Logger
 92 | 
 93 |   @derive {Inspect,
 94 |            only: [
 95 |              :host,
 96 |              :init,
 97 |              :cpu_kind,
 98 |              :cpus,
 99 |              :memory_mb,
100 |              :gpu_kind,
101 |              :gpus,
102 |              :image,
103 |              :app,
104 |              :runner_id,
105 |              :local_ip,
106 |              :remote_terminator_pid,
107 |              :runner_instance_id,
108 |              :runner_private_ip,
109 |              :runner_node_base,
110 |              :runner_node_name,
111 |              :boot_timeout
112 |            ]}
113 |   defstruct host: nil,
114 |             init: %{},
115 |             local_ip: nil,
116 |             env: %{},
117 |             region: nil,
118 |             cpu_kind: nil,
119 |             cpus: nil,
120 |             memory_mb: nil,
121 |             gpu_kind: nil,
122 |             gpus: nil,
123 |             image: nil,
124 |             services: [],
125 |             metadata: %{},
126 |             app: nil,
127 |             token: nil,
128 |             boot_timeout: nil,
129 |             runner_id: nil,
130 |             remote_terminator_pid: nil,
131 |             parent_ref: nil,
132 |             runner_instance_id: nil,
133 |             runner_private_ip: nil,
134 |             runner_node_base: nil,
135 |             runner_node_name: nil,
136 |             log: nil
137 | 
138 |   @retry 10
139 | 
140 |   @valid_opts [
141 |     :app,
142 |     :region,
143 |     :image,
144 |     :token,
145 |     :host,
146 |     :init,
147 |     :cpu_kind,
148 |     :cpus,
149 |     :memory_mb,
150 |     :gpu_kind,
151 |     :gpus,
152 |     :boot_timeout,
153 |     :env,
154 |     :terminator_sup,
155 |     :log,
156 |     :services,
157 |     :metadata
158 |   ]
159 | 
160 |   @impl true
161 |   def init(opts) do
162 |     conf = Application.get_env(:flame, __MODULE__) || []
163 |     [_node_base, ip] = node() |> to_string() |> String.split("@")
164 | 
165 |     default = %FlyBackend{
166 |       app: System.get_env("FLY_APP_NAME"),
167 |       region: System.get_env("FLY_REGION"),
168 |       image: System.get_env("FLY_IMAGE_REF"),
169 |       token: System.get_env("FLY_API_TOKEN"),
170 |       host: "https://api.machines.dev",
171 |       cpu_kind: "performance",
172 |       cpus: System.schedulers_online(),
173 |       memory_mb: 4096,
174 |       boot_timeout: 30_000,
175 |       services: [],
176 |       metadata: %{},
177 |       init: %{},
178 |       log: Keyword.get(conf, :log, false)
179 |     }
180 | 
181 |     provided_opts =
182 |       conf
183 |       |> Keyword.merge(opts)
184 |       |> Keyword.validate!(@valid_opts)
185 | 
186 |     %FlyBackend{} = state = Map.merge(default, Map.new(provided_opts))
187 | 
188 |     for key <- [:token, :image, :host, :app] do
189 |       unless Map.get(state, key) do
190 |         raise ArgumentError, "missing :#{key} config for #{inspect(__MODULE__)}"
191 |       end
192 |     end
193 | 
194 |     state = %{state | runner_node_base: "#{state.app}-flame-#{rand_id(20)}"}
195 |     parent_ref = make_ref()
196 | 
197 |     encoded_parent =
198 |       parent_ref
199 |       |> FLAME.Parent.new(self(), __MODULE__, state.runner_node_base, "FLY_PRIVATE_IP")
200 |       |> FLAME.Parent.encode()
201 | 
202 |     new_env =
203 |       %{"PHX_SERVER" => "false", "FLAME_PARENT" => encoded_parent}
204 |       |> Map.merge(state.env)
205 |       |> then(fn env ->
206 |         if flags = System.get_env("ERL_AFLAGS") do
207 |           Map.put_new(env, "ERL_AFLAGS", flags)
208 |         else
209 |           env
210 |         end
211 |       end)
212 |       |> then(fn env ->
213 |         if flags = System.get_env("ERL_ZFLAGS") do
214 |           Map.put_new(env, "ERL_ZFLAGS", flags)
215 |         else
216 |           env
217 |         end
218 |       end)
219 | 
220 |     new_state = %{state | env: new_env, parent_ref: parent_ref, local_ip: ip}
221 | 
222 |     {:ok, new_state}
223 |   end
224 | 
225 |   @impl true
226 |   # TODO explore spawn_request
227 |   def remote_spawn_monitor(%FlyBackend{} = state, term) do
228 |     case term do
229 |       func when is_function(func, 0) ->
230 |         {pid, ref} = Node.spawn_monitor(state.runner_node_name, func)
231 |         {:ok, {pid, ref}}
232 | 
233 |       {mod, fun, args} when is_atom(mod) and is_atom(fun) and is_list(args) ->
234 |         {pid, ref} = Node.spawn_monitor(state.runner_node_name, mod, fun, args)
235 |         {:ok, {pid, ref}}
236 | 
237 |       other ->
238 |         raise ArgumentError,
239 |               "expected a null arity function or {mod, func, args}. Got: #{inspect(other)}"
240 |     end
241 |   end
242 | 
243 |   @impl true
244 |   def system_shutdown do
245 |     System.stop()
246 |   end
247 | 
248 |   def with_elapsed_ms(func) when is_function(func, 0) do
249 |     {micro, result} = :timer.tc(func)
250 |     {result, div(micro, 1000)}
251 |   end
252 | 
253 |   @impl true
254 |   def remote_boot(%FlyBackend{parent_ref: parent_ref} = state) do
255 |     {resp, req_connect_time} =
256 |       with_elapsed_ms(fn ->
257 |         http_post!("#{state.host}/v1/apps/#{state.app}/machines", @retry,
258 |           content_type: "application/json",
259 |           headers: [
260 |             {"Content-Type", "application/json"},
261 |             {"Authorization", "Bearer #{state.token}"}
262 |           ],
263 |           connect_timeout: state.boot_timeout,
264 |           body:
265 |             JSON.encode!(%{
266 |               name: state.runner_node_base,
267 |               region: state.region,
268 |               config: %{
269 |                 image: state.image,
270 |                 init: state.init,
271 |                 guest: %{
272 |                   cpu_kind: state.cpu_kind,
273 |                   cpus: state.cpus,
274 |                   memory_mb: state.memory_mb,
275 |                   gpu_kind: state.gpu_kind,
276 |                   gpus: if(state.gpu_kind, do: state.gpus || 1)
277 |                 },
278 |                 auto_destroy: true,
279 |                 restart: %{policy: "no"},
280 |                 env: state.env,
281 |                 services: state.services,
282 |                 metadata: Map.put(state.metadata, :flame_parent_ip, state.local_ip)
283 |               }
284 |             })
285 |         )
286 |       end)
287 | 
288 |     if state.log do
289 |       Logger.log(
290 |         state.log,
291 |         "#{inspect(__MODULE__)} #{inspect(node())} machine create #{req_connect_time}ms"
292 |       )
293 |     end
294 | 
295 |     remaining_connect_window = state.boot_timeout - req_connect_time
296 | 
297 |     case resp do
298 |       %{"id" => id, "instance_id" => instance_id, "private_ip" => ip} ->
299 |         new_state =
300 |           %{
301 |             state
302 |             | runner_id: id,
303 |               runner_instance_id: instance_id,
304 |               runner_private_ip: ip
305 |           }
306 | 
307 |         remote_terminator_pid =
308 |           receive do
309 |             {^parent_ref, {:remote_up, remote_terminator_pid}} ->
310 |               remote_terminator_pid
311 |           after
312 |             remaining_connect_window ->
313 |               Logger.error("failed to connect to fly machine within #{state.boot_timeout}ms")
314 |               exit(:timeout)
315 |           end
316 | 
317 |         new_state = %{
318 |           new_state
319 |           | remote_terminator_pid: remote_terminator_pid,
320 |             runner_node_name: node(remote_terminator_pid)
321 |         }
322 | 
323 |         {:ok, remote_terminator_pid, new_state}
324 | 
325 |       other ->
326 |         {:error, other}
327 |     end
328 |   end
329 | 
330 |   defp rand_id(len) do
331 |     len
332 |     |> :crypto.strong_rand_bytes()
333 |     |> Base.encode16(case: :lower)
334 |     |> binary_part(0, len)
335 |   end
336 | 
337 |   defp http_post!(url, remaining_tries, opts) do
338 |     Keyword.validate!(opts, [:headers, :body, :connect_timeout, :content_type])
339 | 
340 |     headers =
341 |       for {field, val} <- Keyword.fetch!(opts, :headers),
342 |           do: {String.to_charlist(field), val}
343 | 
344 |     body = Keyword.fetch!(opts, :body)
345 |     connect_timeout = Keyword.fetch!(opts, :connect_timeout)
346 |     content_type = Keyword.fetch!(opts, :content_type)
347 | 
348 |     http_opts = [
349 |       ssl:
350 |         [
351 |           verify: :verify_peer,
352 |           depth: 2,
353 |           customize_hostname_check: [
354 |             match_fun: :public_key.pkix_verify_hostname_match_fun(:https)
355 |           ]
356 |         ] ++ cacerts_options(),
357 |       connect_timeout: connect_timeout
358 |     ]
359 | 
360 |     case :httpc.request(:post, {url, headers, ~c"#{content_type}", body}, http_opts,
361 |            body_format: :binary
362 |          ) do
363 |       {:ok, {{_, 200, _}, _, response_body}} ->
364 |         JSON.decode!(response_body)
365 | 
366 |       # 429 Too Many Requests (rate limited)
367 |       # 412 Precondition Failed (can't find capacity)
368 |       # 409 Conflict (the flyd tried ending up not having capacity)
369 |       # 422 Unprocessable Entity (could not find capcity for volume workloads)
370 |       {:ok, {{_, status, _}, _, _response_body}}
371 |       when status in [429, 412, 409, 422] and remaining_tries > 0 ->
372 |         Process.sleep(1000)
373 |         http_post!(url, remaining_tries - 1, opts)
374 | 
375 |       {:ok, {{_, status, reason}, _, resp_body}} ->
376 |         raise "failed POST #{url} with #{inspect(status)} (#{inspect(reason)}): #{inspect(resp_body)} #{inspect(headers)}"
377 | 
378 |       {:error, reason} ->
379 |         raise "failed POST #{url} with #{inspect(reason)} #{inspect(headers)}"
380 |     end
381 |   end
382 | 
383 |   defp cacerts_options do
384 |     cond do
385 |       certs = otp_cacerts() ->
386 |         [cacerts: certs]
387 | 
388 |       Application.spec(:castore, :vsn) ->
389 |         [cacertfile: Application.app_dir(:castore, "priv/cacerts.pem")]
390 | 
391 |       true ->
392 |         IO.warn("""
393 |         No certificate trust store was found.
394 | 
395 |         A certificate trust store is required in
396 |         order to download locales for your configuration.
397 |         Since elixir_make could not detect a system
398 |         installed certificate trust store one of the
399 |         following actions may be taken:
400 | 
401 |         1. Use OTP 25+ on an OS that has built-in certificate
402 |            trust store.
403 | 
404 |         2. Install the hex package `castore`. It will
405 |            be automatically detected after recompilation.
406 | 
407 |         """)
408 | 
409 |         []
410 |     end
411 |   end
412 | 
413 |   if System.otp_release() >= "25" do
414 |     defp otp_cacerts do
415 |       :public_key.cacerts_get()
416 |     rescue
417 |       _ -> nil
418 |     end
419 |   else
420 |     defp otp_cacerts, do: nil
421 |   end
422 | end
423 | 


--------------------------------------------------------------------------------
/lib/flame/local_backend.ex:
--------------------------------------------------------------------------------
 1 | defmodule FLAME.LocalBackend do
 2 |   @moduledoc """
 3 |   A `FLAME.Backend` useful for development and testing.
 4 |   """
 5 | 
 6 |   @behaviour FLAME.Backend
 7 | 
 8 |   @impl true
 9 |   def init(opts) do
10 |     defaults =
11 |       Application.get_env(:flame, __MODULE__) || []
12 | 
13 |     _terminator_sup = Keyword.fetch!(opts, :terminator_sup)
14 | 
15 |     {:ok,
16 |      defaults
17 |      |> Keyword.merge(opts)
18 |      |> Enum.into(%{})}
19 |   end
20 | 
21 |   @impl true
22 |   def remote_spawn_monitor(_state, term) do
23 |     case term do
24 |       func when is_function(func, 0) ->
25 |         {pid, ref} = spawn_monitor(func)
26 |         {:ok, {pid, ref}}
27 | 
28 |       {mod, fun, args} when is_atom(mod) and is_atom(fun) and is_list(args) ->
29 |         {pid, ref} = spawn_monitor(mod, fun, args)
30 |         {:ok, {pid, ref}}
31 | 
32 |       other ->
33 |         raise ArgumentError,
34 |               "expected a null arity function or {mod, func, args}. Got: #{inspect(other)}"
35 |     end
36 |   end
37 | 
38 |   @impl true
39 |   def system_shutdown, do: :noop
40 | 
41 |   @impl true
42 |   def remote_boot(state) do
43 |     parent = FLAME.Parent.new(make_ref(), self(), __MODULE__, "nonode", nil)
44 |     name = Module.concat(state.terminator_sup, to_string(System.unique_integer([:positive])))
45 |     opts = [name: name, parent: parent, log: state.log]
46 | 
47 |     spec = Supervisor.child_spec({FLAME.Terminator, opts}, restart: :temporary)
48 |     {:ok, _sup_pid} = DynamicSupervisor.start_child(state.terminator_sup, spec)
49 | 
50 |     case Process.whereis(name) do
51 |       terminator_pid when is_pid(terminator_pid) -> {:ok, terminator_pid, state}
52 |     end
53 |   end
54 | end
55 | 


--------------------------------------------------------------------------------
/lib/flame/parent.ex:
--------------------------------------------------------------------------------
  1 | defmodule FLAME.Parent do
  2 |   @moduledoc """
  3 |   Conveniences for looking up FLAME parent information.
  4 | 
  5 |   ## Parent Information
  6 | 
  7 |   When a FLAME child is started, it contains the `FLAME_PARENT` environment
  8 |   variable that holds the parent node's information base 64 encoded into a
  9 |   map, with the following keys:
 10 | 
 11 |     * `:ref` - The parent node's reference.
 12 |     * `:pid` - The parent node's Pid.
 13 |     * `:backend` - The FLAME backend in use.
 14 |     * `:flame_vsn` - The FLAME version running on the parent.
 15 |     * `:backend_app` - The FLAME backend application running on the parent.
 16 |     * `:backend_vsn` - The FLAME backend version running on the parent.
 17 |     * `:node_base` - The node basename the parent generated for the runner.
 18 |     * `:host_env` - The environment variable name on the runner to use to
 19 |       lookup the runner's hostname for the runner's longname.
 20 |   """
 21 | 
 22 |   @flame_vsn Keyword.fetch!(Mix.Project.config(), :version)
 23 | 
 24 |   defstruct pid: nil,
 25 |             ref: nil,
 26 |             backend: nil,
 27 |             node_base: nil,
 28 |             flame_vsn: nil,
 29 |             backend_vsn: nil,
 30 |             backend_app: nil,
 31 |             host_env: nil
 32 | 
 33 |   @doc """
 34 |   Gets the `%FLAME.Parent{}` struct from the system environment.
 35 | 
 36 |   Returns `nil` if no parent is set.
 37 | 
 38 |   When booting a FLAME node, the `FLAME.Backend` is required to
 39 |   export the `FLAME_PARENT` environment variable for the provisioned
 40 |   instance. This value holds required information about the parent node
 41 |   and can be set using the `encode/1` function.
 42 |   """
 43 |   def get do
 44 |     with {:ok, encoded} <- System.fetch_env("FLAME_PARENT"),
 45 |          %{ref: ref, pid: pid, backend: backend, host_env: host_env, node_base: node_base} =
 46 |            encoded |> Base.decode64!() |> :erlang.binary_to_term() do
 47 |       new(ref, pid, backend, node_base, host_env)
 48 |     else
 49 |       _ -> nil
 50 |     end
 51 |   end
 52 | 
 53 |   @doc """
 54 |   Returns a new `%FLAME.Parent{}` struct.
 55 | 
 56 |   The `pid` is the parent node's `FLAME.Runner` process started by
 57 |   the `FLAME.Pool`.
 58 |   """
 59 |   def new(ref, pid, backend, node_base, host_env)
 60 |       when is_reference(ref) and is_pid(pid) and is_atom(backend) do
 61 |     {backend_app, backend_vsn} =
 62 |       case :application.get_application(backend) do
 63 |         {:ok, app} -> {app, to_string(Application.spec(app, :vsn))}
 64 |         :undefined -> {nil, nil}
 65 |       end
 66 | 
 67 |     %__MODULE__{
 68 |       pid: pid,
 69 |       ref: ref,
 70 |       backend: backend,
 71 |       node_base: node_base,
 72 |       host_env: host_env,
 73 |       flame_vsn: @flame_vsn,
 74 |       backend_app: backend_app,
 75 |       backend_vsn: backend_vsn
 76 |     }
 77 |   end
 78 | 
 79 |   @doc """
 80 |   Encodes a `%FLAME.Parent{}` struct into string.
 81 |   """
 82 |   def encode(%__MODULE__{} = parent) do
 83 |     info =
 84 |       parent
 85 |       |> Map.from_struct()
 86 |       |> Map.take([
 87 |         :ref,
 88 |         :pid,
 89 |         :backend,
 90 |         :flame_vsn,
 91 |         :backend_app,
 92 |         :backend_vsn,
 93 |         :node_base,
 94 |         :host_env
 95 |       ])
 96 | 
 97 |     info |> :erlang.term_to_binary() |> Base.encode64()
 98 |   end
 99 | end
100 | 


--------------------------------------------------------------------------------
/lib/flame/parser/json.ex:
--------------------------------------------------------------------------------
 1 | defmodule FLAME.Parser.JSON do
 2 |   @moduledoc false
 3 |   if Code.ensure_loaded?(:json) do
 4 |     def encode!(data) do
 5 |       data
 6 |       |> :json.encode(&encoder/2)
 7 |       |> IO.iodata_to_binary()
 8 |     end
 9 | 
10 |     def decode!(data) do
11 |       data
12 |       |> :json.decode(:ok, %{null: nil})
13 |       |> handle_decode()
14 |     end
15 | 
16 |     def json_parser, do: :json
17 | 
18 |     defp encoder(nil, _encoder), do: "null"
19 |     defp encoder(term, encoder), do: :json.encode_value(term, encoder)
20 | 
21 |     defp handle_decode({data, :ok, ""}), do: data
22 |   else
23 |     def encode!(data), do: Jason.encode!(data)
24 |     def decode!(data), do: Jason.decode!(data)
25 | 
26 |     def json_parser, do: Jason
27 |   end
28 | end
29 | 


--------------------------------------------------------------------------------
/lib/flame/pool.ex:
--------------------------------------------------------------------------------
  1 | defmodule FLAME.Pool.RunnerState do
  2 |   @moduledoc false
  3 | 
  4 |   defstruct count: nil, pid: nil, monitor_ref: nil
  5 | end
  6 | 
  7 | defmodule FLAME.Pool.WaitingState do
  8 |   @moduledoc false
  9 | 
 10 |   defstruct from: nil, monitor_ref: nil, deadline: nil
 11 | end
 12 | 
 13 | defmodule FLAME.Pool.Caller do
 14 |   @moduledoc false
 15 | 
 16 |   defstruct checkout_ref: nil, monitor_ref: nil, runner_ref: nil
 17 | end
 18 | 
 19 | defmodule FLAME.Pool do
 20 |   @moduledoc """
 21 |   Manages a pool of `FLAME.Runner` processes.
 22 | 
 23 |   Pools support elastic growth and shrinking of the number of runners.
 24 | 
 25 |   ## Examples
 26 | 
 27 |       children = [
 28 |         ...,
 29 |         {FLAME.Pool, name: MyRunner, min: 1, max: 10, max_concurrency: 100}
 30 |       ]
 31 | 
 32 |   See `start_link/1` for supported options.
 33 | 
 34 |   ## TODO
 35 |   [ ] interface to configure min/max at runtime
 36 | 
 37 |   """
 38 |   use GenServer
 39 | 
 40 |   alias FLAME.{Pool, Runner, Queue, CodeSync}
 41 |   alias FLAME.Pool.{RunnerState, WaitingState, Caller}
 42 | 
 43 |   @default_max_concurrency 100
 44 |   @boot_timeout 30_000
 45 |   @idle_shutdown_after 30_000
 46 |   @async_boot_debounce 1_000
 47 | 
 48 |   defstruct name: nil,
 49 |             runner_sup: nil,
 50 |             task_sup: nil,
 51 |             terminator_sup: nil,
 52 |             child_placement_sup: nil,
 53 |             boot_timeout: nil,
 54 |             idle_shutdown_after: nil,
 55 |             min_idle_shutdown_after: nil,
 56 |             min: nil,
 57 |             max: nil,
 58 |             max_concurrency: nil,
 59 |             callers: %{},
 60 |             waiting: Queue.new(),
 61 |             runners: %{},
 62 |             pending_runners: %{},
 63 |             runner_opts: [],
 64 |             on_grow_start: nil,
 65 |             on_grow_end: nil,
 66 |             on_shrink: nil,
 67 |             async_boot_timer: nil,
 68 |             track_resources: false,
 69 |             base_sync_stream: nil
 70 | 
 71 |   def child_spec(opts) do
 72 |     %{
 73 |       id: {__MODULE__, Keyword.fetch!(opts, :name)},
 74 |       start: {FLAME.Pool.Supervisor, :start_link, [opts]},
 75 |       type: :supervisor
 76 |     }
 77 |   end
 78 | 
 79 |   @doc """
 80 |   Starts a pool of runners.
 81 | 
 82 |   ## Options
 83 | 
 84 |     * `:name` - The name of the pool, for example: `MyApp.FFMPegRunner`
 85 | 
 86 |     * `:min` - The minimum number of runners to keep in the pool at all times.
 87 |       For "scale to zero" behavior you may pass `0`. When starting as a flame child,
 88 |       the `:min` will be forced to zero to avoid recursively starting backend resources.
 89 | 
 90 |     * `:max` - The maximum number of runners to elastically grow to in the pool.
 91 | 
 92 |     * `:max_concurrency` - The maximum number of concurrent executions per runner before
 93 |       booting new runners or queueing calls. Defaults to `100`.
 94 | 
 95 |     * `:single_use` - if `true`, runners will be terminated after each call completes.
 96 |       Defaults `false`.
 97 | 
 98 |     * `:backend` - The backend to use. Defaults to the configured `:flame, :backend` or
 99 |       `FLAME.LocalBackend` if not configured.
100 | 
101 |     * `:log` - The log level to use for verbose logging. Defaults to `false`.
102 | 
103 |     * `:timeout` - The time to allow functions to execute on a remote node. Defaults to 30 seconds.
104 |       This value is also used as the default `FLAME.call/3` timeout for the caller.
105 | 
106 |     * `:boot_timeout` - The time to allow for booting and connecting to a remote node.
107 |       Defaults to 30 seconds.
108 | 
109 |     * `:shutdown_timeout` - The time to allow for graceful shutdown on the remote node.
110 |       Defaults to 30 seconds.
111 | 
112 |     * `:idle_shutdown_after` - The amount of time and function check to idle a remote node
113 |       down after a period of inactivity. Defaults to 30 seconds. A tuple may also be passed
114 |       to check a specific condition, for example:
115 | 
116 |           {10_000, fn -> Supervisor.which_children(MySup) == []}
117 | 
118 |     * `:min_idle_shutdown_after` - The same behavior of `:idle_shutdown_after`, but applied
119 |       to the the `:min` pool runners. Defaults to `:infinity`.
120 | 
121 |     * `:on_grow_start` - The optional function to be called when the pool starts booting a new
122 |       runner beyond the configured `:min`. The function receives a map with the following metadata:
123 | 
124 |         * `:name` - The name of the pool
125 |         * `:count` - The number of runners the pool is attempting to grow to
126 |         * `:pid` - The pid of the async process that is booting the new runner
127 | 
128 |      * `:on_grow_end` - The optional 2-arity function to be called when the pool growth process completes.
129 |       The 2-arity function receives either `:ok` or `{:exit, reason}`, and map with the following metadata:
130 | 
131 |         * `:name` - The name of the pool
132 |         * `:count` - The number of runners the pool is now at
133 |         * `:pid` - The pid of the async process that attempted to boot the new runner
134 | 
135 |     * `:on_shrink` - The optional function to be called when the pool shrinks.
136 |       The function receives a map with the following metadata:
137 | 
138 |         * `:name` - The name of the pool
139 |         * `:count` - The number of runners the pool is attempting to shrink to
140 | 
141 |     * `:track_resources` - When true, traverses the returned results from FLAME
142 |      operations looking for resources that implement the `FLAME.Trackable` protocol
143 |      and make sure the FLAME node does not terminate until the tracked resources are removed.
144 |      Defaults `false`.
145 | 
146 |     * `:code_sync` – The optional list of options to enable copying and syncing code paths
147 |       from the parent node to the runner node. Disabled by default. The options are:
148 | 
149 |       * `:start_apps` – Either a boolean or a list of specific OTP application names to start
150 |         when the runner boots. When `true`, all applications currently running on the parent node
151 |         are sent to the runner node to be started. Defaults to `false`. When set to `true`,
152 |         `copy_apps` will also be set to `true` if not explicitly set to `false`.
153 | 
154 |       * `:copy_apps` – The boolean flag to copy all the application artifacts and their beam
155 |         files from the parent node to the runner node on boot. Defaults `false`.
156 |         When passing `start_apps: true`, automatically sets `copy_paths: true`.
157 | 
158 |       * `:copy_paths` – The list of arbitrary paths to copy from the parent node to the runner
159 |         node on boot. Defaults to `[]`.
160 | 
161 |       * `:sync_beams` – A list of specific beam code paths to sync to the runner node. Useful
162 |         when you want to sync specific beam code paths from the parent after sending all code
163 |         paths from `:copy_apps` on initial boot. For example, with `copy_apps: true`,
164 |         and `sync_beams: ["/home/app/.cache/.../ebin"]`, all the code from the parent will be
165 |         copied on boot, but only the specific beam files will be synced on subsequent calls.
166 |         With `copy_apps: false`, and `sync_beams: ["/home/app/.cache/.../ebin"]`,
167 |         only the specific beam files will be synced on boot and for subsequent calls.
168 |         Defaults to `[]`.
169 | 
170 |       * `:verbose` – If `true`, the pool will log verbose information about the code sync process.
171 |         Defaults to `false`.
172 | 
173 |       * `:compress` – If `true`, the copy_apps, copy_paths, and sync_beams will be compressed
174 |         before sending. Provides savings in network payload size at the cost of CPU time.
175 |         Defaults to `true`.
176 | 
177 |       For example, in [Livebook](https://livebook.dev/), to start a pool with code sync enabled:
178 | 
179 |           Mix.install([:kino, :flame])
180 | 
181 |           Kino.start_child!(
182 |             {FLAME.Pool,
183 |               name: :my_flame,
184 |               code_sync: [
185 |                 start_apps: true,
186 |                 sync_beams: [Path.join(System.tmp_dir!(), "livebook_runtime")]
187 |               ],
188 |               min: 1,
189 |               max: 1,
190 |               max_concurrency: 10,
191 |               backend: {FLAME.FlyBackend,
192 |                 cpu_kind: "performance", cpus: 4, memory_mb: 8192,
193 |                 token: System.fetch_env!("FLY_API_TOKEN"),
194 |                 env: Map.take(System.get_env(), ["LIVEBOOK_COOKIE"]),
195 |               },
196 |               idle_shutdown_after: :timer.minutes(5)}
197 |           )
198 |   """
199 |   def start_link(opts) do
200 |     Keyword.validate!(opts, [
201 |       :name,
202 |       :runner_sup,
203 |       :task_sup,
204 |       :cleaner,
205 |       :terminator_sup,
206 |       :child_placement_sup,
207 |       :idle_shutdown_after,
208 |       :min_idle_shutdown_after,
209 |       :min,
210 |       :max,
211 |       :max_concurrency,
212 |       :backend,
213 |       :log,
214 |       :single_use,
215 |       :timeout,
216 |       :boot_timeout,
217 |       :shutdown_timeout,
218 |       :on_grow_start,
219 |       :on_grow_end,
220 |       :on_shrink,
221 |       :code_sync,
222 |       :track_resources
223 |     ])
224 | 
225 |     Keyword.validate!(opts[:code_sync] || [], [
226 |       :get_path,
227 |       :extract_dir,
228 |       :tmp_dir,
229 |       :copy_apps,
230 |       :copy_paths,
231 |       :sync_beams,
232 |       :start_apps,
233 |       :verbose,
234 |       :compress,
235 |       :chunk_size
236 |     ])
237 | 
238 |     GenServer.start_link(__MODULE__, opts, name: Keyword.fetch!(opts, :name))
239 |   end
240 | 
241 |   @doc """
242 |   Calls a function in a remote runner for the given `FLAME.Pool`.
243 | 
244 |   See `FLAME.call/3` for more information.
245 |   """
246 |   def call(name, func, opts \\ []) when is_function(func, 0) and is_list(opts) do
247 |     caller_pid = self()
248 |     do_call(name, func, caller_pid, opts)
249 |   end
250 | 
251 |   defp do_call(name, func, caller_pid, opts) when is_pid(caller_pid) do
252 |     caller_checkout!(name, opts, :call, [name, func, opts], fn runner_pid,
253 |                                                                remaining_timeout,
254 |                                                                track_resources ->
255 |       opts =
256 |         opts
257 |         |> Keyword.put_new(:timeout, remaining_timeout)
258 |         |> Keyword.put_new(:track_resources, track_resources)
259 | 
260 |       {:cancel, :ok, Runner.call(runner_pid, caller_pid, func, opts)}
261 |     end)
262 |   end
263 | 
264 |   @doc """
265 |   Casts a function to a remote runner for the given `FLAME.Pool`.
266 | 
267 |   See `FLAME.cast/3` for more information.
268 |   """
269 |   def cast(name, func, opts) when is_function(func, 0) and is_list(opts) do
270 |     %{task_sup: task_sup} = lookup_meta(name)
271 | 
272 |     caller_pid = self()
273 |     opts = Keyword.put_new(opts, :timeout, :infinity)
274 | 
275 |     # we don't care about the result so don't copy it back to the caller
276 |     wrapped = fn ->
277 |       func.()
278 |       :ok
279 |     end
280 | 
281 |     {:ok, _pid} =
282 |       Task.Supervisor.start_child(task_sup, fn -> do_call(name, wrapped, caller_pid, opts) end)
283 | 
284 |     :ok
285 |   end
286 | 
287 |   @doc """
288 |   See `FLAME.place_child/3` for more information.
289 |   """
290 |   def place_child(name, child_spec, opts) do
291 |     caller_checkout!(name, opts, :place_child, [name, child_spec, opts], fn runner_pid,
292 |                                                                             remaining_timeout,
293 |                                                                             track_resources ->
294 |       place_opts =
295 |         opts
296 |         |> Keyword.put(:track_resources, track_resources)
297 |         |> Keyword.put_new(:timeout, remaining_timeout)
298 |         |> Keyword.put_new(:link, true)
299 | 
300 |       case Runner.place_child(runner_pid, child_spec, place_opts) do
301 |         {{:ok, child_pid}, _trackable_pids = []} = result ->
302 |           # we are placing the link back on the parent node, but we are protected
303 |           # from racing the link on the child FLAME because the terminator on
304 |           # the remote flame is monitoring the caller and will terminator the child
305 |           # if we go away
306 |           if Keyword.fetch!(place_opts, :link), do: Process.link(child_pid)
307 |           {:cancel, {:replace, [child_pid]}, result}
308 | 
309 |         {:error, _reason} = result ->
310 |           {:cancel, :ok, result}
311 |       end
312 |     end)
313 |   end
314 | 
315 |   defp caller_checkout!(name, opts, fun_name, args, func) do
316 |     %{boot_timeout: boot_timeout, track_resources: track_resources} = lookup_meta(name)
317 |     timeout = opts[:timeout] || boot_timeout
318 |     track_resources = Keyword.get(opts, :track_resources, track_resources)
319 |     pid = Process.whereis(name) || exit({:noproc, {__MODULE__, fun_name, args}})
320 |     ref = Process.monitor(pid)
321 |     {start_time, deadline} = deadline(timeout)
322 | 
323 |     # Manually implement call to avoid double monitor.
324 |     # Auto-connect is asynchronous. But we still use :noconnect to make sure
325 |     # we send on the monitored connection, and not trigger a new auto-connect.
326 |     Process.send(pid, {:"$gen_call", {self(), ref}, {:checkout, deadline}}, [:noconnect])
327 | 
328 |     receive do
329 |       {^ref, runner_pid} ->
330 |         try do
331 |           Process.demonitor(ref, [:flush])
332 |           remaining_timeout = remaining_timeout(opts, start_time)
333 |           func.(runner_pid, remaining_timeout, track_resources)
334 |         catch
335 |           kind, reason ->
336 |             send_cancel(pid, ref, :catch)
337 |             :erlang.raise(kind, reason, __STACKTRACE__)
338 |         else
339 |           {:cancel, :ok, {result, [_ | _] = trackable_pids}} ->
340 |             send_cancel(pid, ref, {:replace, trackable_pids})
341 |             result
342 | 
343 |           {:cancel, reason, {result, [] = _trackable_pids}} ->
344 |             send_cancel(pid, ref, reason)
345 |             result
346 |         end
347 | 
348 |       {:DOWN, ^ref, _, _, reason} ->
349 |         exit({reason, {__MODULE__, fun_name, args}})
350 |     after
351 |       timeout ->
352 |         send_cancel(pid, ref, :timeout)
353 |         Process.demonitor(ref, [:flush])
354 |         exit({:timeout, {__MODULE__, fun_name, args}})
355 |     end
356 |   end
357 | 
358 |   defp send_cancel(pid, ref, reason) when is_pid(pid) and is_reference(ref) do
359 |     send(pid, {:cancel, ref, self(), reason})
360 |   end
361 | 
362 |   defp remaining_timeout(opts, mono_start) do
363 |     case Keyword.fetch(opts, :timeout) do
364 |       {:ok, :infinity = inf} ->
365 |         inf
366 | 
367 |       {:ok, nil} ->
368 |         nil
369 | 
370 |       {:ok, ms} when is_integer(ms) ->
371 |         elapsed_ms =
372 |           System.convert_time_unit(System.monotonic_time() - mono_start, :native, :millisecond)
373 | 
374 |         ms - elapsed_ms
375 | 
376 |       :error ->
377 |         nil
378 |     end
379 |   end
380 | 
381 |   defp lookup_meta(name) do
382 |     :ets.lookup_element(name, :meta, 2)
383 |   end
384 | 
385 |   @impl true
386 |   def init(opts) do
387 |     name = Keyword.fetch!(opts, :name)
388 |     task_sup = Keyword.fetch!(opts, :task_sup)
389 |     boot_timeout = Keyword.get(opts, :boot_timeout, @boot_timeout)
390 |     track_resources = Keyword.get(opts, :track_resources, false)
391 |     :ets.new(name, [:set, :public, :named_table, read_concurrency: true])
392 | 
393 |     :ets.insert(
394 |       name,
395 |       {:meta, %{boot_timeout: boot_timeout, task_sup: task_sup, track_resources: track_resources}}
396 |     )
397 | 
398 |     terminator_sup = Keyword.fetch!(opts, :terminator_sup)
399 |     cleaner = Keyword.fetch!(opts, :cleaner)
400 |     child_placement_sup = Keyword.fetch!(opts, :child_placement_sup)
401 |     runner_opts = runner_opts(opts, terminator_sup)
402 |     min = Keyword.fetch!(opts, :min)
403 | 
404 |     # we must avoid recursively booting remote runners if we are a child
405 |     min =
406 |       if FLAME.Parent.get() do
407 |         0
408 |       else
409 |         min
410 |       end
411 | 
412 |     base_sync_stream =
413 |       if code_sync_opts = opts[:code_sync] do
414 |         code_sync =
415 |           code_sync_opts
416 |           |> CodeSync.new()
417 |           |> CodeSync.compute_changed_paths()
418 | 
419 |         %CodeSync.PackagedStream{} = parent_stream = CodeSync.package_to_stream(code_sync)
420 | 
421 |         :ok = FLAME.Pool.Cleaner.watch_path(cleaner, parent_stream.stream.path)
422 | 
423 |         parent_stream
424 |       end
425 | 
426 |     state = %Pool{
427 |       runner_sup: Keyword.fetch!(opts, :runner_sup),
428 |       task_sup: task_sup,
429 |       terminator_sup: terminator_sup,
430 |       child_placement_sup: child_placement_sup,
431 |       name: name,
432 |       min: min,
433 |       max: Keyword.fetch!(opts, :max),
434 |       boot_timeout: boot_timeout,
435 |       idle_shutdown_after: Keyword.get(opts, :idle_shutdown_after, @idle_shutdown_after),
436 |       min_idle_shutdown_after: Keyword.get(opts, :min_idle_shutdown_after, :infinity),
437 |       max_concurrency: Keyword.get(opts, :max_concurrency, @default_max_concurrency),
438 |       on_grow_start: opts[:on_grow_start],
439 |       on_grow_end: opts[:on_grow_end],
440 |       on_shrink: opts[:on_shrink],
441 |       track_resources: track_resources,
442 |       runner_opts: runner_opts,
443 |       base_sync_stream: base_sync_stream
444 |     }
445 | 
446 |     {:ok, boot_runners(state)}
447 |   end
448 | 
449 |   defp runner_opts(opts, terminator_sup) do
450 |     defaults = [terminator_sup: terminator_sup, log: Keyword.get(opts, :log, false)]
451 | 
452 |     runner_opts =
453 |       Keyword.take(
454 |         opts,
455 |         [
456 |           :backend,
457 |           :log,
458 |           :single_use,
459 |           :timeout,
460 |           :boot_timeout,
461 |           :shutdown_timeout,
462 |           :idle_shutdown_after,
463 |           :code_sync
464 |         ]
465 |       )
466 | 
467 |     case Keyword.fetch(opts, :backend) do
468 |       {:ok, {backend, opts}} ->
469 |         Keyword.put(runner_opts, :backend, {backend, Keyword.merge(opts, defaults)})
470 | 
471 |       {:ok, backend} ->
472 |         Keyword.put(runner_opts, :backend, {backend, defaults})
473 | 
474 |       :error ->
475 |         backend = FLAME.Backend.impl()
476 |         backend_opts = Application.get_env(:flame, backend) || []
477 |         Keyword.put(runner_opts, :backend, {backend, Keyword.merge(backend_opts, defaults)})
478 |     end
479 |   end
480 | 
481 |   @impl true
482 |   def handle_info({:DOWN, _ref, :process, _pid, _reason} = msg, %Pool{} = state) do
483 |     {:noreply, handle_down(state, msg)}
484 |   end
485 | 
486 |   def handle_info({ref, {:ok, pid}}, %Pool{} = state) when is_reference(ref) do
487 |     {:noreply, handle_runner_async_up(state, pid, ref)}
488 |   end
489 | 
490 |   def handle_info(:async_boot_continue, %Pool{} = state) do
491 |     {:noreply, async_boot_runner(%{state | async_boot_timer: nil})}
492 |   end
493 | 
494 |   def handle_info({:cancel, ref, caller_pid, reason}, state) do
495 |     case reason do
496 |       {:replace, child_pids} ->
497 |         {:noreply, replace_caller(state, ref, caller_pid, child_pids)}
498 | 
499 |       reason when reason in [:ok, :timeout, :catch] ->
500 |         {:noreply, checkin_runner(state, ref, caller_pid, reason)}
501 |     end
502 |   end
503 | 
504 |   @impl true
505 |   def handle_call({:checkout, deadline}, from, state) do
506 |     {:noreply, checkout_runner(state, deadline, from)}
507 |   end
508 | 
509 |   defp runner_count(state) do
510 |     map_size(state.runners) + map_size(state.pending_runners)
511 |   end
512 | 
513 |   defp waiting_count(%Pool{waiting: %Queue{} = waiting}) do
514 |     Queue.size(waiting)
515 |   end
516 | 
517 |   defp min_runner(state) do
518 |     if map_size(state.runners) == 0 do
519 |       nil
520 |     else
521 |       {_ref, min} = Enum.min_by(state.runners, fn {_, %RunnerState{count: count}} -> count end)
522 |       min
523 |     end
524 |   end
525 | 
526 |   defp await_downs(child_pids) do
527 |     if MapSet.size(child_pids) == 0 do
528 |       :ok
529 |     else
530 |       receive do
531 |         {:DOWN, _ref, :process, pid, _reason} -> await_downs(MapSet.delete(child_pids, pid))
532 |       end
533 |     end
534 |   end
535 | 
536 |   defp replace_caller(%Pool{} = state, checkout_ref, caller_pid, [_ | _] = child_pids) do
537 |     # replace caller with child pid and do not inc concurrency counts since we are replacing
538 |     %{^caller_pid => %Caller{checkout_ref: ^checkout_ref} = caller} = state.callers
539 |     Process.demonitor(caller.monitor_ref, [:flush])
540 | 
541 |     # if we have more than 1 child pid, such as for multiple trackables returned for a single
542 |     # call, we monitor all of them under a new process and the new process takes the slot in the
543 |     # pool. When all trackables are finished, the new process goes down and frees the slot.
544 |     child_pid =
545 |       case child_pids do
546 |         [child_pid] ->
547 |           child_pid
548 | 
549 |         [_ | _] ->
550 |           {:ok, child_pid} =
551 |             Task.Supervisor.start_child(state.task_sup, fn ->
552 |               Enum.each(child_pids, &Process.monitor(&1))
553 |               await_downs(MapSet.new(child_pids))
554 |             end)
555 | 
556 |           child_pid
557 |       end
558 | 
559 |     new_caller = %Caller{
560 |       checkout_ref: checkout_ref,
561 |       monitor_ref: Process.monitor(child_pid),
562 |       runner_ref: caller.runner_ref
563 |     }
564 | 
565 |     new_callers =
566 |       state.callers
567 |       |> Map.delete(caller_pid)
568 |       |> Map.put(child_pid, new_caller)
569 | 
570 |     %{state | callers: new_callers}
571 |   end
572 | 
573 |   defp checkin_runner(state, ref, caller_pid, reason)
574 |        when is_reference(ref) and is_pid(caller_pid) do
575 |     case state.callers do
576 |       %{^caller_pid => %Caller{checkout_ref: ^ref} = caller} ->
577 |         Process.demonitor(caller.monitor_ref, [:flush])
578 |         drop_caller(state, caller_pid, caller)
579 | 
580 |       # the only way to race a checkin is if the caller has expired while still in the
581 |       # waiting state and checks in on the timeout before we lease it a runner.
582 |       %{} when reason == :timeout ->
583 |         maybe_drop_waiting(state, caller_pid)
584 | 
585 |       %{} ->
586 |         raise ArgumentError,
587 |               "expected to checkin runner for #{inspect(caller_pid)} that does not exist"
588 |     end
589 |   end
590 | 
591 |   defp checkout_runner(%Pool{} = state, deadline, from, monitor_ref \\ nil) do
592 |     min_runner = min_runner(state)
593 |     runner_count = runner_count(state)
594 | 
595 |     cond do
596 |       min_runner && min_runner.count < state.max_concurrency ->
597 |         reply_runner_checkout(state, min_runner, from, monitor_ref)
598 | 
599 |       runner_count < state.max ->
600 |         if state.async_boot_timer ||
601 |              map_size(state.pending_runners) * state.max_concurrency > waiting_count(state) do
602 |           waiting_in(state, deadline, from)
603 |         else
604 |           state
605 |           |> async_boot_runner()
606 |           |> waiting_in(deadline, from)
607 |         end
608 | 
609 |       true ->
610 |         waiting_in(state, deadline, from)
611 |     end
612 |   end
613 | 
614 |   defp reply_runner_checkout(state, %RunnerState{} = runner, from, monitor_ref) do
615 |     # we pass monitor_ref down from waiting so we don't need to remonitor if already monitoring
616 |     {from_pid, checkout_ref} = from
617 | 
618 |     caller_monitor_ref =
619 |       if monitor_ref do
620 |         monitor_ref
621 |       else
622 |         Process.monitor(from_pid)
623 |       end
624 | 
625 |     GenServer.reply(from, runner.pid)
626 | 
627 |     new_caller = %Caller{
628 |       checkout_ref: checkout_ref,
629 |       monitor_ref: caller_monitor_ref,
630 |       runner_ref: runner.monitor_ref
631 |     }
632 | 
633 |     new_state = %{state | callers: Map.put(state.callers, from_pid, new_caller)}
634 | 
635 |     inc_runner_count(new_state, runner.monitor_ref)
636 |   end
637 | 
638 |   defp waiting_in(%Pool{} = state, deadline, {pid, _tag} = from) do
639 |     ref = Process.monitor(pid)
640 |     waiting = %WaitingState{from: from, monitor_ref: ref, deadline: deadline}
641 |     %{state | waiting: Queue.insert(state.waiting, waiting, pid)}
642 |   end
643 | 
644 |   defp boot_runners(%Pool{} = state) do
645 |     if state.min > 0 do
646 |       # start min runners, and do not idle them down regardless of idle configuration
647 |       # unless `:min_idle_shutdown_after` not infinity
648 |       # TODO: allow % threshold of failed min's to continue startup?
649 |       0..(state.min - 1)
650 |       |> Task.async_stream(
651 |         fn _ -> start_child_runner(state, idle_shutdown_after: state.min_idle_shutdown_after) end,
652 |         max_concurrency: 10,
653 |         timeout: state.boot_timeout
654 |       )
655 |       |> Enum.reduce(state, fn
656 |         {:ok, {:ok, pid}}, acc ->
657 |           {_runner, new_acc} = put_runner(acc, pid)
658 |           new_acc
659 | 
660 |         {:exit, reason}, _acc ->
661 |           raise "failed to boot runner: #{inspect(reason)}"
662 |       end)
663 |     else
664 |       state
665 |     end
666 |   end
667 | 
668 |   defp schedule_async_boot_runner(%Pool{} = state) do
669 |     if state.async_boot_timer, do: Process.cancel_timer(state.async_boot_timer)
670 |     timer = Process.send_after(self(), :async_boot_continue, @async_boot_debounce)
671 |     %{state | async_boot_timer: timer}
672 |   end
673 | 
674 |   defp async_boot_runner(%Pool{on_grow_start: on_grow_start, name: name} = state) do
675 |     new_count = runner_count(state) + 1
676 | 
677 |     task =
678 |       Task.Supervisor.async_nolink(state.task_sup, fn ->
679 |         if on_grow_start, do: on_grow_start.(%{count: new_count, name: name, pid: self()})
680 | 
681 |         start_child_runner(state)
682 |       end)
683 | 
684 |     new_pending = Map.put(state.pending_runners, task.ref, task.pid)
685 |     %{state | pending_runners: new_pending}
686 |   end
687 | 
688 |   defp start_child_runner(%Pool{} = state, runner_opts \\ []) do
689 |     opts = Keyword.merge(state.runner_opts, runner_opts)
690 |     name = Module.concat(state.name, "Runner#{map_size(state.runners) + 1}")
691 | 
692 |     spec = %{
693 |       id: name,
694 |       start: {FLAME.Runner, :start_link, [opts]},
695 |       restart: :temporary
696 |     }
697 | 
698 |     {:ok, pid} = DynamicSupervisor.start_child(state.runner_sup, spec)
699 | 
700 |     try do
701 |       case Runner.remote_boot(pid, state.base_sync_stream) do
702 |         :ok -> {:ok, pid}
703 |         {:error, reason} -> {:error, reason}
704 |       end
705 |     catch
706 |       {:exit, reason} -> {:error, {:exit, reason}}
707 |     end
708 |   end
709 | 
710 |   defp put_runner(%Pool{} = state, pid) when is_pid(pid) do
711 |     ref = Process.monitor(pid)
712 |     runner = %RunnerState{count: 0, pid: pid, monitor_ref: ref}
713 |     new_state = %{state | runners: Map.put(state.runners, runner.monitor_ref, runner)}
714 |     {runner, new_state}
715 |   end
716 | 
717 |   defp inc_runner_count(%Pool{} = state, ref) do
718 |     new_runners =
719 |       Map.update!(state.runners, ref, fn %RunnerState{} = runner ->
720 |         %{runner | count: runner.count + 1}
721 |       end)
722 | 
723 |     %{state | runners: new_runners}
724 |   end
725 | 
726 |   defp dec_runner_count(%Pool{} = state, ref) do
727 |     new_runners =
728 |       Map.update!(state.runners, ref, fn %RunnerState{} = runner ->
729 |         %{runner | count: runner.count - 1}
730 |       end)
731 | 
732 |     %{state | runners: new_runners}
733 |   end
734 | 
735 |   defp drop_child_runner(%Pool{} = state, runner_ref) when is_reference(runner_ref) do
736 |     %{^runner_ref => %RunnerState{}} = state.runners
737 |     Process.demonitor(runner_ref, [:flush])
738 | 
739 |     # kill all callers that still had a checkout for this runner
740 |     new_state =
741 |       Enum.reduce(state.callers, state, fn
742 |         {caller_pid, %Caller{monitor_ref: ref, runner_ref: ^runner_ref}}, acc ->
743 |           Process.demonitor(ref, [:flush])
744 |           Process.exit(caller_pid, :kill)
745 |           %{acc | callers: Map.delete(acc.callers, caller_pid)}
746 | 
747 |         {_caller_pid, %Caller{}}, acc ->
748 |           acc
749 |       end)
750 | 
751 |     maybe_on_shrink(%{new_state | runners: Map.delete(new_state.runners, runner_ref)})
752 |   end
753 | 
754 |   defp drop_caller(%Pool{} = state, caller_pid, %Caller{} = caller) when is_pid(caller_pid) do
755 |     new_state = %{state | callers: Map.delete(state.callers, caller_pid)}
756 | 
757 |     new_state
758 |     |> dec_runner_count(caller.runner_ref)
759 |     |> call_next_waiting_caller()
760 |   end
761 | 
762 |   defp maybe_drop_waiting(%Pool{} = state, caller_pid) when is_pid(caller_pid) do
763 |     %{state | waiting: Queue.delete_by_key(state.waiting, caller_pid)}
764 |   end
765 | 
766 |   defp pop_next_waiting_caller(%Pool{} = state) do
767 |     result =
768 |       Queue.pop_until(state.waiting, fn _pid, %WaitingState{} = waiting ->
769 |         %WaitingState{from: {pid, _}, monitor_ref: ref, deadline: deadline} = waiting
770 |         # we don't need to reply to waiting callers because they will either have died
771 |         # or execeeded their own deadline handled by receive + after
772 |         if Process.alive?(pid) and not deadline_expired?(deadline) do
773 |           true
774 |         else
775 |           Process.demonitor(ref, [:flush])
776 |           false
777 |         end
778 |       end)
779 | 
780 |     case result do
781 |       {nil, %Queue{} = new_waiting} -> {nil, %{state | waiting: new_waiting}}
782 |       {{_pid, %WaitingState{} = first}, %Queue{} = rest} -> {first, %{state | waiting: rest}}
783 |     end
784 |   end
785 | 
786 |   defp call_next_waiting_caller(%Pool{} = state) do
787 |     case pop_next_waiting_caller(state) do
788 |       {nil, new_state} ->
789 |         new_state
790 | 
791 |       {%WaitingState{} = first, new_state} ->
792 |         # checkout_runner will borrow already running monitor
793 |         checkout_runner(new_state, first.deadline, first.from, first.monitor_ref)
794 |     end
795 |   end
796 | 
797 |   defp handle_down(%Pool{} = state, {:DOWN, ref, :process, pid, reason}) do
798 |     state = maybe_drop_waiting(state, pid)
799 | 
800 |     state =
801 |       case state.callers do
802 |         %{^pid => %Caller{monitor_ref: ^ref} = caller} ->
803 |           drop_caller(state, pid, caller)
804 | 
805 |         %{} ->
806 |           state
807 |       end
808 | 
809 |     state =
810 |       case state.runners do
811 |         %{^ref => _} -> drop_child_runner(state, ref)
812 |         %{} -> state
813 |       end
814 | 
815 |     case state.pending_runners do
816 |       %{^ref => _} ->
817 |         state = %{state | pending_runners: Map.delete(state.pending_runners, ref)}
818 |         # we rate limit this to avoid many failed async boot attempts
819 |         if has_unmet_servicable_demand?(state) do
820 |           state
821 |           |> maybe_on_grow_end(pid, {:exit, reason})
822 |           |> schedule_async_boot_runner()
823 |         else
824 |           maybe_on_grow_end(state, pid, {:exit, reason})
825 |         end
826 | 
827 |       %{} ->
828 |         state
829 |     end
830 |   end
831 | 
832 |   defp maybe_on_grow_end(%Pool{on_grow_end: on_grow_end} = state, pid, result) do
833 |     new_count = runner_count(state)
834 |     meta = %{count: new_count, name: state.name, pid: pid}
835 | 
836 |     case result do
837 |       :ok -> if on_grow_end, do: on_grow_end.(:ok, meta)
838 |       {:exit, reason} -> if on_grow_end, do: on_grow_end.({:exit, reason}, meta)
839 |     end
840 | 
841 |     state
842 |   end
843 | 
844 |   defp maybe_on_shrink(%Pool{} = state) do
845 |     new_count = runner_count(state)
846 |     if state.on_shrink, do: state.on_shrink.(%{count: new_count, name: state.name})
847 | 
848 |     state
849 |   end
850 | 
851 |   defp has_unmet_servicable_demand?(%Pool{} = state) do
852 |     waiting_count(state) > map_size(state.pending_runners) * state.max_concurrency and
853 |       runner_count(state) < state.max
854 |   end
855 | 
856 |   defp handle_runner_async_up(%Pool{} = state, pid, ref) when is_pid(pid) and is_reference(ref) do
857 |     %{^ref => task_pid} = state.pending_runners
858 |     Process.demonitor(ref, [:flush])
859 | 
860 |     new_state = %{state | pending_runners: Map.delete(state.pending_runners, ref)}
861 |     {runner, new_state} = put_runner(new_state, pid)
862 |     new_state = maybe_on_grow_end(new_state, task_pid, :ok)
863 | 
864 |     # pop waiting callers up to max_concurrency, but we must handle:
865 |     # 1. the case where we have no waiting callers
866 |     # 2. the case where we process a DOWN for the new runner as we pop DOWNs
867 |     #   looking for fresh waiting
868 |     # 3. if we still have waiting callers at the end, boot more runners if we have capacity
869 |     Enum.reduce_while(1..state.max_concurrency, new_state, fn i, acc ->
870 |       with {:ok, %RunnerState{} = runner} <- Map.fetch(acc.runners, runner.monitor_ref),
871 |            true <- i <= acc.max_concurrency do
872 |         case pop_next_waiting_caller(acc) do
873 |           {%WaitingState{} = next, acc} ->
874 |             {:cont, reply_runner_checkout(acc, runner, next.from, next.monitor_ref)}
875 | 
876 |           {nil, acc} ->
877 |             {:halt, acc}
878 |         end
879 |       else
880 |         _ -> {:halt, acc}
881 |       end
882 |     end)
883 |   end
884 | 
885 |   defp deadline(timeout) when is_integer(timeout) do
886 |     t1 = System.monotonic_time()
887 |     {t1, t1 + System.convert_time_unit(timeout, :millisecond, :native)}
888 |   end
889 | 
890 |   defp deadline(:infinity) do
891 |     {System.monotonic_time(), :infinity}
892 |   end
893 | 
894 |   defp deadline_expired?(deadline) when is_integer(deadline) do
895 |     System.monotonic_time() >= deadline
896 |   end
897 | 
898 |   defp deadline_expired?(:infinity), do: false
899 | end
900 | 


--------------------------------------------------------------------------------
/lib/flame/pool/cleaner.ex:
--------------------------------------------------------------------------------
 1 | defmodule FLAME.Pool.Cleaner do
 2 |   @moduledoc false
 3 |   use GenServer
 4 | 
 5 |   def start_link(opts) do
 6 |     GenServer.start_link(__MODULE__, opts, name: Keyword.fetch!(opts, :name))
 7 |   end
 8 | 
 9 |   def watch_path(server, path) do
10 |     GenServer.call(server, {:watch, path})
11 |   end
12 | 
13 |   def list_paths(server) do
14 |     GenServer.call(server, :list)
15 |   end
16 | 
17 |   def init(_opts) do
18 |     Process.flag(:trap_exit, true)
19 |     {:ok, %{paths: []}}
20 |   end
21 | 
22 |   def handle_call({:watch, path}, _from, state) do
23 |     {:reply, :ok, %{state | paths: [path | state.paths]}}
24 |   end
25 | 
26 |   def handle_call(:list, _from, state) do
27 |     {:reply, state.paths, state}
28 |   end
29 | 
30 |   def terminate(_reason, state) do
31 |     for path <- state.paths, do: File.rm!(path)
32 | 
33 |     :ok
34 |   end
35 | end
36 | 


--------------------------------------------------------------------------------
/lib/flame/pool/supervisor.ex:
--------------------------------------------------------------------------------
 1 | defmodule FLAME.Pool.Supervisor do
 2 |   @moduledoc false
 3 | 
 4 |   use Supervisor
 5 | 
 6 |   def start_link(opts) do
 7 |     name = Keyword.fetch!(opts, :name)
 8 |     pool_sup = Module.concat(name, "PoolSup")
 9 | 
10 |     Supervisor.start_link(__MODULE__, opts, name: pool_sup)
11 |   end
12 | 
13 |   def init(opts) do
14 |     name = Keyword.fetch!(opts, :name)
15 |     runner_sup = Module.concat(name, "RunnerSup")
16 |     cleaner = Module.concat(name, "Cleaner")
17 |     terminator_sup = Module.concat(name, "TerminatorSup")
18 |     task_sup = Module.concat(name, "TaskSup")
19 | 
20 |     child_placement_sup =
21 |       Keyword.get(opts, :child_placement_sup, FLAME.ChildPlacementSup)
22 | 
23 |     pool_opts =
24 |       Keyword.merge(opts,
25 |         task_sup: task_sup,
26 |         cleaner: cleaner,
27 |         runner_sup: runner_sup,
28 |         terminator_sup: terminator_sup,
29 |         child_placement_sup: child_placement_sup
30 |       )
31 | 
32 |     children =
33 |       [
34 |         {FLAME.Pool.Cleaner, name: cleaner},
35 |         {Task.Supervisor, name: task_sup, strategy: :one_for_one},
36 |         {DynamicSupervisor, name: runner_sup, strategy: :one_for_one},
37 |         {DynamicSupervisor, name: terminator_sup, strategy: :one_for_one},
38 |         %{
39 |           id: {FLAME.Pool, Keyword.fetch!(opts, :name)},
40 |           start: {FLAME.Pool, :start_link, [pool_opts]},
41 |           type: :worker
42 |         }
43 |       ]
44 | 
45 |     Supervisor.init(children, strategy: :one_for_all)
46 |   end
47 | end
48 | 


--------------------------------------------------------------------------------
/lib/flame/queue.ex:
--------------------------------------------------------------------------------
  1 | defmodule FLAME.Queue do
  2 |   @moduledoc false
  3 |   # Provides a FIFO queue with secondary key lookup/delete support.
  4 | 
  5 |   defstruct tree: :gb_trees.empty(), keys: %{}, idx: 0
  6 | 
  7 |   alias FLAME.Queue
  8 | 
  9 |   @doc """
 10 |   Builds a new queue.
 11 |   """
 12 |   def new, do: %FLAME.Queue{}
 13 | 
 14 |   @doc """
 15 |   Returns the size of the queue.
 16 |   """
 17 |   def size(%Queue{} = queue), do: :gb_trees.size(queue.tree)
 18 | 
 19 |   @doc """
 20 |   Inserts a new item into the queue with a secondary key.
 21 |   """
 22 |   def insert(%Queue{idx: idx} = queue, item, key) do
 23 |     new_tree = :gb_trees.insert(idx, {key, item}, queue.tree)
 24 |     new_keys = Map.put(queue.keys, key, idx)
 25 |     %{queue | tree: new_tree, keys: new_keys, idx: idx + 1}
 26 |   end
 27 | 
 28 |   @doc """
 29 |   Pops an item from the queue returning the key/item pair.
 30 | 
 31 |   Returns `{nil, new_queue}` when the queue is empty.
 32 | 
 33 |   ## Examples
 34 | 
 35 |       iex> queue = Queue.insert(Queue.new(), "item1", :key1)
 36 |       iex> {{:key1, "item1"}, %Queue{} = new_queue} = Queue.pop(queue)
 37 |       iex> {nil, %Queue{} = new_queue} = Queue.pop(queue)
 38 |   """
 39 |   def pop(%Queue{tree: tree, keys: keys, idx: idx} = queue) do
 40 |     if size(queue) > 0 do
 41 |       {_smallest_idx, {key, val}, new_tree} = :gb_trees.take_smallest(tree)
 42 |       new_keys = Map.delete(keys, key)
 43 |       new_idx = if :gb_trees.is_empty(new_tree), do: 0, else: idx
 44 |       {{key, val}, %{queue | tree: new_tree, keys: new_keys, idx: new_idx}}
 45 |     else
 46 |       {nil, queue}
 47 |     end
 48 |   end
 49 | 
 50 |   @doc """
 51 |   Pops items from the queue until the function returns true.
 52 | 
 53 |   Returns the first key/item pair for which the function returns true, and the new queue.
 54 |   """
 55 |   def pop_until(%Queue{} = queue, func) when is_function(func, 2) do
 56 |     case pop(queue) do
 57 |       {nil, %Queue{} = new_queue} ->
 58 |         {nil, new_queue}
 59 | 
 60 |       {{key, item}, %Queue{} = new_queue} ->
 61 |         if func.(key, item) do
 62 |           {{key, item}, new_queue}
 63 |         else
 64 |           pop_until(new_queue, func)
 65 |         end
 66 |     end
 67 |   end
 68 | 
 69 |   @doc """
 70 |   Looks up an item by key.
 71 | 
 72 |   Returns `nil` for unknown keys.
 73 | 
 74 |   ## Examples
 75 | 
 76 |       queue = Queue.insert(Queue.new(), "item1", :key1)
 77 |       "item1" = Queue.get_by_key(queue, :key1)
 78 |   """
 79 |   def get_by_key(%Queue{} = queue, key) do
 80 |     case queue.keys do
 81 |       %{^key => idx} ->
 82 |         {:value, {^key, item}} = :gb_trees.lookup(idx, queue.tree)
 83 |         item
 84 | 
 85 |       %{} ->
 86 |         nil
 87 |     end
 88 |   end
 89 | 
 90 |   @doc """
 91 |   Deletes an item by key.
 92 | 
 93 |   Unknown keys are ignored.
 94 | 
 95 |   ## Examples
 96 | 
 97 |       queue = Queue.insert(Queue.new(), "item1", :key1)
 98 |       new_queue = Queue.delete_by_key(queue, :key1)
 99 |   """
100 |   def delete_by_key(%Queue{tree: tree, keys: keys} = queue, key) do
101 |     case keys do
102 |       %{^key => index} ->
103 |         new_tree = :gb_trees.delete_any(index, tree)
104 |         new_keys = Map.delete(keys, key)
105 |         new_idx = if :gb_trees.is_empty(new_tree), do: 0, else: queue.idx
106 |         %{queue | tree: new_tree, keys: new_keys, idx: new_idx}
107 | 
108 |       %{} ->
109 |         queue
110 |     end
111 |   end
112 | end
113 | 


--------------------------------------------------------------------------------
/lib/flame/runner.ex:
--------------------------------------------------------------------------------
  1 | defmodule FLAME.Runner do
  2 |   @moduledoc false
  3 |   # ## Runners
  4 | 
  5 |   # In practice, users utilize the `FLAME.call/3` and `FLAME.cast/3` functions
  6 |   # to accomplish their work. These functions are backed by a `FLAME.Pool` of
  7 |   # `FLAME.Runner`'s
  8 |   #
  9 |   # A `FLAME.Runner` is responsible for booting a new node, and executing concurrent
 10 |   # functions on it. For example:
 11 |   #
 12 |   #     {:ok, runner} = Runner.start_link(backend: FLAME.FlyBackend)
 13 |   #     :ok = Runner.remote_boot(runner)
 14 |   #     Runner.call(runner, fn -> :operation1 end)
 15 |   #     Runner.shutdown(runner)
 16 |   #
 17 |   # When a caller exits or crashes, the remote node will automatically be terminated.
 18 |   # For distributed erlang backends, like `FLAME.FlyBackend`, this will be
 19 |   # accomplished automatically by the `FLAME.Terminator`, but other methods
 20 |   # are possible.
 21 | 
 22 |   use GenServer
 23 |   require Logger
 24 | 
 25 |   alias FLAME.{Runner, Terminator, CodeSync}
 26 | 
 27 |   @derive {Inspect,
 28 |            only: [
 29 |              :id,
 30 |              :backend,
 31 |              :terminator,
 32 |              :instance_id,
 33 |              :private_ip,
 34 |              :node_name,
 35 |              :single_use,
 36 |              :timeout,
 37 |              :status,
 38 |              :log,
 39 |              :boot_timeout,
 40 |              :idle_shutdown_after,
 41 |              :idle_shutdown_check
 42 |            ]}
 43 | 
 44 |   defstruct id: nil,
 45 |             instance_id: nil,
 46 |             private_ip: nil,
 47 |             backend: nil,
 48 |             terminator: nil,
 49 |             backend_init: nil,
 50 |             node_name: nil,
 51 |             single_use: false,
 52 |             timeout: 30_000,
 53 |             status: nil,
 54 |             log: :info,
 55 |             boot_timeout: 10_000,
 56 |             shutdown_timeout: 5_000,
 57 |             idle_shutdown_after: nil,
 58 |             idle_shutdown_check: nil,
 59 |             code_sync_opts: false,
 60 |             code_sync: nil
 61 | 
 62 |   @doc """
 63 |   Starts a runner.
 64 | 
 65 |   ## Options
 66 | 
 67 |     `:backend` - The `Flame.Backend` implementation to use
 68 |     `:log` - The log level, or `false`
 69 |     `:single_use` - The boolean on whether to terminate the runner after it's first call
 70 |     `:timeout` - The execution timeout of calls
 71 |     `:boot_timeout` - The boot timeout of the runner
 72 |     `:shutdown_timeout` - The shutdown timeout
 73 |     `:idle_shutdown_after` - The idle shutdown time
 74 |     `:code_sync` - The code sync options. See the `FLAME.Pool` module for more information.
 75 |   """
 76 |   def start_link(opts \\ []) do
 77 |     GenServer.start_link(__MODULE__, opts)
 78 |   end
 79 | 
 80 |   def shutdown(runner, timeout \\ nil) when is_pid(runner) do
 81 |     GenServer.call(runner, {:runner_shutdown, timeout})
 82 |   end
 83 | 
 84 |   @doc """
 85 |   Boots the remote runner using the `FLAME.Backend`.
 86 |   """
 87 |   def remote_boot(pid, base_sync_stream, timeout \\ nil) when is_pid(pid) do
 88 |     GenServer.call(pid, {:remote_boot, base_sync_stream, timeout}, timeout || :infinity)
 89 |   end
 90 | 
 91 |   @doc """
 92 |   Places a child process on the remote node.
 93 | 
 94 |   The started child spec will be rewritten to use the `:temporary` restart strategy
 95 |   to ensure that the child is not restarted if it exits. If you want restart
 96 |   behavior, you must monitor the process yourself on the parent node and replace it.
 97 |   """
 98 |   def place_child(runner_pid, child_spec, opts)
 99 |       when is_pid(runner_pid) and is_list(opts) do
100 |     # we must rewrite :temporary restart strategy for the spec to avoid restarting placed children
101 |     new_spec = Supervisor.child_spec(child_spec, restart: :temporary)
102 |     caller_pid = self()
103 |     link? = Keyword.get(opts, :link, true)
104 | 
105 |     call(
106 |       runner_pid,
107 |       caller_pid,
108 |       fn terminator ->
109 |         Terminator.place_child(terminator, caller_pid, link?, new_spec)
110 |       end,
111 |       opts
112 |     )
113 |   end
114 | 
115 |   @doc """
116 |   Calls a function on the remote node.
117 |   """
118 |   def call(runner_pid, caller_pid, func, opts \\ [])
119 |       when is_pid(runner_pid) and is_pid(caller_pid) and is_function(func) and is_list(opts) do
120 |     link? = Keyword.get(opts, :link, true)
121 |     track_resources? = Keyword.get(opts, :track_resources, false)
122 |     {ref, %Runner{} = runner, backend_state} = checkout(runner_pid)
123 |     %Runner{terminator: terminator} = runner
124 |     call_timeout = opts[:timeout] || runner.timeout
125 | 
126 |     result =
127 |       remote_call(runner, backend_state, call_timeout, track_resources?, fn ->
128 |         if link?, do: Process.link(caller_pid)
129 |         :ok = Terminator.deadline_me(terminator, call_timeout)
130 |         if is_function(func, 1), do: func.(terminator), else: func.()
131 |       end)
132 | 
133 |     case result do
134 |       {:ok, {value, trackable_pids}} ->
135 |         :ok = checkin(runner_pid, ref, trackable_pids)
136 |         {value, trackable_pids}
137 | 
138 |       {:exit, reason} ->
139 |         :ok = checkin(runner_pid, ref, [])
140 |         exit(reason)
141 |     end
142 |   end
143 | 
144 |   defp checkout(runner_pid) do
145 |     GenServer.call(runner_pid, :checkout)
146 |   end
147 | 
148 |   defp checkin(runner_pid, ref, trackable_pids) do
149 |     GenServer.call(runner_pid, {:checkin, ref, trackable_pids})
150 |   end
151 | 
152 |   @impl true
153 |   def init(opts) do
154 |     runner = new(opts)
155 | 
156 |     case runner.backend_init do
157 |       {:ok, backend_state} ->
158 |         state = %{
159 |           runner: runner,
160 |           checkouts: %{},
161 |           backend_state: backend_state,
162 |           otp_app: if(otp_app = System.get_env("RELEASE_NAME"), do: String.to_atom(otp_app))
163 |         }
164 | 
165 |         {:ok, state}
166 | 
167 |       {:error, reason} ->
168 |         {:stop, reason}
169 |     end
170 |   end
171 | 
172 |   @impl true
173 |   def handle_info({:DOWN, ref, :process, pid, reason} = msg, state) do
174 |     %{runner: %Runner{} = runner} = state
175 | 
176 |     case runner do
177 |       %Runner{terminator: ^pid} ->
178 |         {:stop, reason, state}
179 | 
180 |       %Runner{terminator: _} ->
181 |         case state.checkouts do
182 |           %{^ref => _from_pid} ->
183 |             new_state = drop_checkout(state, ref)
184 | 
185 |             if runner.single_use do
186 |               {:stop, reason, new_state}
187 |             else
188 |               {:noreply, new_state}
189 |             end
190 | 
191 |           %{} ->
192 |             {:noreply, maybe_backend_handle_info(state, msg)}
193 |         end
194 |     end
195 |   end
196 | 
197 |   def handle_info({_ref, {:remote_shutdown, reason}}, state) do
198 |     {:stop, {:shutdown, reason}, state}
199 |   end
200 | 
201 |   def handle_info(msg, state) do
202 |     {:noreply, maybe_backend_handle_info(state, msg)}
203 |   end
204 | 
205 |   defp maybe_backend_handle_info(state, msg) do
206 |     %Runner{backend: backend} = state.runner
207 | 
208 |     if function_exported?(backend, :handle_info, 2) do
209 |       case backend.handle_info(msg, state.backend_state) do
210 |         {:noreply, new_backend_state} ->
211 |           %{state | backend_state: new_backend_state}
212 | 
213 |         other ->
214 |           raise ArgumentError,
215 |                 "expected #{inspect(backend)}.handle_info/2 to return {:noreply, state}, got: #{inspect(other)}"
216 |       end
217 |     else
218 |       state
219 |     end
220 |   end
221 | 
222 |   @impl true
223 |   def handle_call({:runner_shutdown, timeout}, _from, state) do
224 |     %{runner: runner} = state
225 |     timeout = timeout || runner.shutdown_timeout
226 |     ref = make_ref()
227 |     parent = self()
228 |     %Runner{terminator: terminator} = runner
229 | 
230 |     state = drain_checkouts(state, timeout)
231 | 
232 |     {:ok, {remote_pid, remote_monitor_ref}} =
233 |       runner.backend.remote_spawn_monitor(state.backend_state, fn ->
234 |         :ok = Terminator.system_shutdown(terminator)
235 |         send(parent, {ref, :ok})
236 |       end)
237 | 
238 |     receive do
239 |       {^ref, :ok} ->
240 |         {:stop, :normal, :ok, state}
241 | 
242 |       {:DOWN, ^remote_monitor_ref, :process, ^remote_pid, reason} ->
243 |         {:stop, {:shutdown, reason}, {:error, reason}, state}
244 |     after
245 |       timeout -> exit(:timeout)
246 |     end
247 |   end
248 | 
249 |   def handle_call(:checkout, {from_pid, _tag}, state) do
250 |     state =
251 |       case maybe_diff_code_paths(state) do
252 |         {new_state, nil} ->
253 |           new_state
254 | 
255 |         {new_state, %CodeSync.PackagedStream{} = parent_pkg} ->
256 |           terminator = state.runner.terminator
257 | 
258 |           remote_call!(state.runner, state.backend_state, state.runner.boot_timeout, false, fn ->
259 |             if extract_dir = CodeSync.extract_packaged_stream(parent_pkg) do
260 |               FLAME.Terminator.watch_path(terminator, extract_dir)
261 |             end
262 |           end)
263 | 
264 |           CodeSync.rm_packaged_stream(parent_pkg)
265 | 
266 |           new_state
267 |       end
268 | 
269 |     {new_state, ref} = put_checkout(state, from_pid)
270 |     {:reply, {ref, new_state.runner, new_state.backend_state}, new_state}
271 |   end
272 | 
273 |   def handle_call({:checkin, ref, trackable_pids}, _from, state) do
274 |     Process.demonitor(ref, [:flush])
275 | 
276 |     new_state =
277 |       Enum.reduce(trackable_pids, state, fn pid, acc ->
278 |         {acc, _ref} = put_checkout(acc, pid)
279 |         acc
280 |       end)
281 | 
282 |     {:reply, :ok, drop_checkout(new_state, ref)}
283 |   end
284 | 
285 |   def handle_call({:remote_boot, base_sync_stream, _timeout}, _from, state) do
286 |     %{runner: runner, backend_state: backend_state, otp_app: otp_app} = state
287 | 
288 |     case runner.status do
289 |       :booted ->
290 |         {:reply, {:error, :already_booted}, state}
291 | 
292 |       :awaiting_boot ->
293 |         time(runner, "runner connect", fn ->
294 |           case runner.backend.remote_boot(backend_state) do
295 |             {:ok, remote_terminator_pid, new_backend_state} when is_pid(remote_terminator_pid) ->
296 |               Process.monitor(remote_terminator_pid)
297 |               new_runner = %{runner | terminator: remote_terminator_pid, status: :booted}
298 |               new_state = %{state | runner: new_runner, backend_state: new_backend_state}
299 |               {new_state, beams_stream} = maybe_stream_code_paths(new_state)
300 | 
301 |               %Runner{
302 |                 single_use: single_use,
303 |                 idle_shutdown_after: idle_after,
304 |                 idle_shutdown_check: idle_check,
305 |                 terminator: term
306 |               } = new_runner
307 | 
308 |               {:ok, _} =
309 |                 remote_call!(runner, new_backend_state, runner.boot_timeout, false, fn ->
310 |                   # ensure app is fully started if parent connects before up
311 |                   if otp_app, do: {:ok, _} = Application.ensure_all_started(otp_app)
312 | 
313 |                   if extract_dir =
314 |                        base_sync_stream && CodeSync.extract_packaged_stream(base_sync_stream) do
315 |                     FLAME.Terminator.watch_path(term, extract_dir)
316 |                   end
317 | 
318 |                   if extract_dir = beams_stream && CodeSync.extract_packaged_stream(beams_stream) do
319 |                     FLAME.Terminator.watch_path(term, extract_dir)
320 |                   end
321 | 
322 |                   :ok =
323 |                     Terminator.schedule_idle_shutdown(term, idle_after, idle_check, single_use)
324 | 
325 |                   :ok
326 |                 end)
327 | 
328 |               {:reply, :ok, new_state}
329 | 
330 |             {:error, reason} ->
331 |               {:stop, {:shutdown, reason}, state}
332 | 
333 |             other ->
334 |               raise ArgumentError,
335 |                     "expected #{inspect(runner.backend)}.remote_boot/1 to return {:ok, remote_terminator_pid, new_state} | {:error, reason}, got: #{inspect(other)}"
336 |           end
337 |         end)
338 |     end
339 |   end
340 | 
341 |   @doc false
342 |   def new(opts) when is_list(opts) do
343 |     opts =
344 |       Keyword.validate!(opts, [
345 |         :backend,
346 |         :log,
347 |         :single_use,
348 |         :timeout,
349 |         :boot_timeout,
350 |         :shutdown_timeout,
351 |         :idle_shutdown_after,
352 |         :code_sync
353 |       ])
354 | 
355 |     Keyword.validate!(opts[:code_sync] || [], [
356 |       :get_path,
357 |       :copy_apps,
358 |       :copy_paths,
359 |       :sync_beams,
360 |       :start_apps,
361 |       :tmp_dir,
362 |       :extract_dir,
363 |       :verbose,
364 |       :compress,
365 |       :chunk_size
366 |     ])
367 | 
368 |     {idle_shutdown_after_ms, idle_check} =
369 |       case Keyword.fetch(opts, :idle_shutdown_after) do
370 |         {:ok, :infinity} -> {:infinity, fn -> false end}
371 |         {:ok, ms} when is_integer(ms) -> {ms, fn -> true end}
372 |         {:ok, {ms, func}} when is_integer(ms) and is_function(func, 0) -> {ms, func}
373 |         other when other in [{:ok, nil}, :error] -> {30_000, fn -> true end}
374 |       end
375 | 
376 |     runner =
377 |       %Runner{
378 |         status: :awaiting_boot,
379 |         backend: :pending,
380 |         backend_init: :pending,
381 |         log: Keyword.get(opts, :log, false),
382 |         single_use: Keyword.get(opts, :single_use, false),
383 |         timeout: opts[:timeout] || 30_000,
384 |         boot_timeout: opts[:boot_timeout] || 30_000,
385 |         shutdown_timeout: opts[:shutdown_timeout] || 30_000,
386 |         idle_shutdown_after: idle_shutdown_after_ms,
387 |         idle_shutdown_check: idle_check,
388 |         terminator: nil,
389 |         code_sync_opts: Keyword.get(opts, :code_sync, false)
390 |       }
391 | 
392 |     base_backend_opts = Keyword.take(opts, [:boot_timeout])
393 | 
394 |     {backend, backend_init} =
395 |       case Keyword.fetch!(opts, :backend) do
396 |         backend when is_atom(backend) ->
397 |           backend_opts =
398 |             Keyword.merge(base_backend_opts, Application.get_env(:flame, backend) || [])
399 | 
400 |           {backend, backend.init(backend_opts)}
401 | 
402 |         {backend, backend_opts} when is_atom(backend) and is_list(backend_opts) ->
403 |           {backend, backend.init(Keyword.merge(base_backend_opts, backend_opts))}
404 |       end
405 | 
406 |     %{runner | backend: backend, backend_init: backend_init}
407 |   end
408 | 
409 |   defp time(%Runner{log: false} = _runner, _label, func) do
410 |     func.()
411 |   end
412 | 
413 |   # TODO move this to telemetry
414 |   defp time(%Runner{log: level}, label, func) do
415 |     Logger.log(level, "#{label}: start")
416 |     {elapsed_micro, result} = :timer.tc(func)
417 |     millisec = System.convert_time_unit(elapsed_micro, :microsecond, :millisecond)
418 |     Logger.log(level, "#{label}: completed in #{millisec}ms")
419 |     result
420 |   end
421 | 
422 |   defp put_checkout(state, from_pid) when is_pid(from_pid) do
423 |     ref = Process.monitor(from_pid)
424 |     {%{state | checkouts: Map.put(state.checkouts, ref, from_pid)}, ref}
425 |   end
426 | 
427 |   defp drop_checkout(state, ref) when is_reference(ref) do
428 |     %{^ref => _from_pid} = state.checkouts
429 |     %{state | checkouts: Map.delete(state.checkouts, ref)}
430 |   end
431 | 
432 |   defp remote_call!(%Runner{} = runner, backend_state, timeout, track_resources?, func) do
433 |     case remote_call(runner, backend_state, timeout, track_resources?, func) do
434 |       {:ok, value} -> value
435 |       {:exit, reason} -> exit(reason)
436 |     end
437 |   end
438 | 
439 |   defp remote_call(%Runner{} = runner, backend_state, timeout, track_resources?, func) do
440 |     %{terminator: terminator} = runner
441 |     parent_ref = make_ref()
442 |     parent = self()
443 | 
444 |     {:ok, {remote_pid, remote_monitor_ref}} =
445 |       runner.backend.remote_spawn_monitor(backend_state, fn ->
446 |         # This runs on the remote node
447 |         result = func.()
448 |         send(parent, {parent_ref, result})
449 | 
450 |         if track_resources? do
451 |           monitor_ref = Process.monitor(parent)
452 | 
453 |           receive do
454 |             {^parent_ref, [_ | _] = to_watch} ->
455 |               Terminator.watch(terminator, to_watch)
456 |               # Hold the result until here so they are not premature garbage collected
457 |               __MODULE__.identity(result)
458 | 
459 |             {^parent_ref, []} ->
460 |               :ok
461 | 
462 |             {:DOWN, ^monitor_ref, _, _, _} ->
463 |               :ok
464 |           end
465 |         end
466 | 
467 |         :ok
468 |       end)
469 | 
470 |     receive do
471 |       {^parent_ref, result} ->
472 |         Process.demonitor(remote_monitor_ref, [:flush])
473 | 
474 |         if track_resources? do
475 |           {result, pids} = FLAME.track_resources(result, [], node(remote_pid))
476 |           send(remote_pid, {parent_ref, pids})
477 |           {:ok, {result, pids}}
478 |         else
479 |           {:ok, {result, []}}
480 |         end
481 | 
482 |       {:DOWN, ^remote_monitor_ref, :process, ^remote_pid, reason} ->
483 |         case reason do
484 |           :killed -> {:exit, :timeout}
485 |           other -> {:exit, other}
486 |         end
487 | 
488 |       {:EXIT, ^remote_pid, reason} ->
489 |         {:exit, reason}
490 |     after
491 |       timeout ->
492 |         {:exit, :timeout}
493 |     end
494 |   end
495 | 
496 |   @doc """
497 |   Used to avoid garbage collection of remote terms.
498 |   """
499 |   def identity(term), do: term
500 | 
501 |   @drain_timeout :drain_timeout
502 |   defp drain_checkouts(state, timeout) do
503 |     case state.checkouts do
504 |       checkouts when checkouts == %{} ->
505 |         state
506 | 
507 |       checkouts ->
508 |         Process.send_after(self(), @drain_timeout, timeout)
509 | 
510 |         Enum.reduce(checkouts, state, fn {ref, _from_pid}, acc ->
511 |           receive do
512 |             {:checkin, ^ref} -> drop_checkout(acc, ref)
513 |             {:DOWN, ^ref, :process, _pid, _reason} -> drop_checkout(acc, ref)
514 |             @drain_timeout -> exit(:timeout)
515 |           end
516 |         end)
517 |     end
518 |   end
519 | 
520 |   defp maybe_stream_code_paths(%{runner: %Runner{} = runner} = state) do
521 |     if code_sync_opts = runner.code_sync_opts do
522 |       code_sync =
523 |         code_sync_opts
524 |         |> CodeSync.new()
525 |         |> CodeSync.compute_sync_beams()
526 | 
527 |       %CodeSync.PackagedStream{} = parent_stream = CodeSync.package_to_stream(code_sync)
528 |       new_runner = %{runner | code_sync: code_sync}
529 |       {%{state | runner: new_runner}, parent_stream}
530 |     else
531 |       {state, nil}
532 |     end
533 |   end
534 | 
535 |   defp maybe_diff_code_paths(%{runner: %Runner{} = runner} = state) do
536 |     if runner.code_sync do
537 |       diffed_code = CodeSync.diff(runner.code_sync)
538 |       new_runner = %{runner | code_sync: diffed_code}
539 |       new_state = %{state | runner: new_runner}
540 | 
541 |       if CodeSync.changed?(diffed_code) do
542 |         %CodeSync.PackagedStream{} = parent_stream = CodeSync.package_to_stream(diffed_code)
543 |         {new_state, parent_stream}
544 |       else
545 |         {new_state, nil}
546 |       end
547 |     else
548 |       {state, nil}
549 |     end
550 |   end
551 | end
552 | 


--------------------------------------------------------------------------------
/lib/flame/terminator.ex:
--------------------------------------------------------------------------------
  1 | defmodule FLAME.Terminator.Caller do
  2 |   @moduledoc false
  3 | 
  4 |   defstruct from_pid: nil, timer: nil, placed_child_ref: nil, placed_caller_ref: nil, link?: false
  5 | end
  6 | 
  7 | defmodule FLAME.Terminator do
  8 |   @moduledoc false
  9 |   # The terminator is responsible for ensuring RPC deadlines and parent monitoring.
 10 | 
 11 |   # All FLAME calls are deadlined with a timeout. The runners will spawn a
 12 |   # function on a remote node, check in with the terminator and ask to be deadlined
 13 |   # with a timeout, and then perform their work. If the process exists beyond the
 14 |   # deadline, it is forcefully killed by the terminator. The termintor also ensures
 15 |   # a configured shutdown timeout to give existing RPC calls time to finish when
 16 |   # the system is shutting down.
 17 | 
 18 |   # The Terminator also handles connecting back to the parent node and monitoring
 19 |   # it when the node is started as FLAME child. If the connection is not
 20 |   # established with a failsafe time, or connection is lost, the system is shut
 21 |   # down by the terminator.
 22 |   use GenServer
 23 | 
 24 |   require Logger
 25 | 
 26 |   alias FLAME.{Terminator, Parent}
 27 |   alias FLAME.Terminator.Caller
 28 | 
 29 |   defstruct parent: nil,
 30 |             parent_monitor_ref: nil,
 31 |             child_placement_sup: nil,
 32 |             single_use: false,
 33 |             calls: %{},
 34 |             watchers: %{},
 35 |             paths: [],
 36 |             log: false,
 37 |             status: nil,
 38 |             failsafe_timer: nil,
 39 |             connect_timer: nil,
 40 |             connect_attempts: 0,
 41 |             idle_shutdown_after: nil,
 42 |             idle_shutdown_check: nil,
 43 |             idle_shutdown_timer: nil
 44 | 
 45 |   def child_spec(opts) do
 46 |     %{
 47 |       id: {FLAME.Terminator.Supervisor, Keyword.fetch!(opts, :name)},
 48 |       start: {FLAME.Terminator.Supervisor, :start_link, [opts]},
 49 |       type: :supervisor
 50 |     }
 51 |   end
 52 | 
 53 |   @doc """
 54 |   Starts the Terminator.
 55 | 
 56 |   ## Options
 57 | 
 58 |     * `:name` – The optional name of the GenServer.
 59 | 
 60 |     * `:parent` – The `%FLAME.Parent{}` of the parent runner.
 61 |       Defaults to lookup from `FLAME.Parent.get/0`.
 62 | 
 63 |     * `:failsafe_timeout` - The time to wait for a connection to the parent node
 64 |       before shutting down the system. Defaults to 2 seconds.
 65 | 
 66 |     * `:log` - The optional logging level. Defaults `false`.
 67 |   """
 68 |   def start_link(opts) do
 69 |     Keyword.validate!(opts, [:name, :parent, :child_placement_sup, :failsafe_timeout, :log])
 70 |     GenServer.start_link(__MODULE__, opts, name: opts[:name])
 71 |   end
 72 | 
 73 |   def watch(terminator, pids) do
 74 |     GenServer.call(terminator, {:watch, pids})
 75 |   end
 76 | 
 77 |   def watch_path(terminator, path) do
 78 |     GenServer.call(terminator, {:watch_path, path})
 79 |   end
 80 | 
 81 |   def deadline_me(terminator, timeout) do
 82 |     GenServer.call(terminator, {:deadline, timeout})
 83 |   end
 84 | 
 85 |   def schedule_idle_shutdown(terminator, idle_shutdown, idle_check, single_use?) do
 86 |     GenServer.call(terminator, {:schedule_idle_shutdown, idle_shutdown, idle_check, single_use?})
 87 |   end
 88 | 
 89 |   def system_shutdown(terminator) when is_pid(terminator) do
 90 |     GenServer.call(terminator, :system_shutdown)
 91 |   end
 92 | 
 93 |   def place_child(terminator, caller, link?, child_spec)
 94 |       when is_pid(caller) and is_boolean(link?) do
 95 |     dynamic_sup = FLAME.Terminator.Supervisor.child_placement_sup_name(terminator)
 96 |     %{start: start} = child_spec = Supervisor.child_spec(child_spec, [])
 97 |     gl = Process.group_leader()
 98 | 
 99 |     rewritten_start =
100 |       {__MODULE__, :start_child_inside_sup, [start, terminator, caller, link?, gl]}
101 | 
102 |     wrapped_child_spec = %{child_spec | start: rewritten_start}
103 |     DynamicSupervisor.start_child(dynamic_sup, wrapped_child_spec)
104 |   end
105 | 
106 |   # This runs inside the supervisor
107 |   # We rewrite the child spec in place_child/3 to call this function which starts
108 |   # the DynamicSupervisor child inside the child placement supervisor, and notifies the
109 |   # terminator via the {:placed_child, caller, child_pid} message.
110 |   # This approach allows the caller to place the child outside of terminator, safely.
111 |   def start_child_inside_sup({mod, fun, args}, terminator, caller, link?, gl) do
112 |     # We switch the group leader, so that the newly started
113 |     # process gets the same group leader as the caller
114 |     initial_gl = Process.group_leader()
115 |     Process.group_leader(self(), gl)
116 | 
117 |     try do
118 |       {resp, pid} =
119 |         case apply(mod, fun, args) do
120 |           {:ok, pid} = resp -> {resp, pid}
121 |           {:ok, pid, _info} = resp -> {resp, pid}
122 |           resp -> {resp, nil}
123 |         end
124 | 
125 |       if pid, do: GenServer.call(terminator, {:placed_child, caller, pid, link?})
126 | 
127 |       resp
128 |     after
129 |       Process.group_leader(self(), initial_gl)
130 |     end
131 |   end
132 | 
133 |   @impl true
134 |   def init(opts) do
135 |     Process.flag(:trap_exit, true)
136 |     failsafe_timeout = Keyword.get(opts, :failsafe_timeout, 20_000)
137 |     log = Keyword.get(opts, :log, false)
138 | 
139 |     case opts[:parent] || FLAME.Parent.get() do
140 |       nil ->
141 |         if log, do: Logger.log(log, "no parent found, :ignore")
142 |         :ignore
143 | 
144 |       %FLAME.Parent{} = parent ->
145 |         :global_group.monitor_nodes(true)
146 |         failsafe_timer = Process.send_after(self(), :failsafe_shutdown, failsafe_timeout)
147 | 
148 |         child_placement_sup =
149 |           case Keyword.fetch!(opts, :child_placement_sup) do
150 |             pid when is_pid(pid) -> pid
151 |             name when is_atom(name) -> Process.whereis(name)
152 |           end
153 | 
154 |         state = %Terminator{
155 |           status: :connecting,
156 |           child_placement_sup: child_placement_sup,
157 |           parent: parent,
158 |           calls: %{},
159 |           log: log,
160 |           failsafe_timer: failsafe_timer,
161 |           idle_shutdown_timer: {nil, nil}
162 |         }
163 | 
164 |         log(state, "starting with parent #{inspect(parent)}")
165 | 
166 |         {:ok, state, {:continue, :connect}}
167 |     end
168 |   end
169 | 
170 |   @impl true
171 |   def handle_continue(:connect, %Terminator{} = state) do
172 |     {:noreply, connect(state)}
173 |   end
174 | 
175 |   @impl true
176 |   def handle_info(:connect, state) do
177 |     if state.parent_monitor_ref do
178 |       {:noreply, state}
179 |     else
180 |       {:noreply, connect(state)}
181 |     end
182 |   end
183 | 
184 |   def handle_info({:timeout, ref}, state) do
185 |     # we can't rely on the ref to be there as Process.cancel_timer may still have delivered
186 |     case state.calls do
187 |       %{^ref => %Caller{} = caller} ->
188 |         Process.demonitor(ref, [])
189 |         Process.exit(caller.from_pid, :kill)
190 |         {:noreply, drop_caller(state, ref)}
191 | 
192 |       %{} ->
193 |         {:noreply, state}
194 |     end
195 |   end
196 | 
197 |   def handle_info({:DOWN, ref, :process, pid, reason}, %Terminator{} = state) do
198 |     case state do
199 |       %{parent: %{pid: ^pid}} ->
200 |         message = "parent pid #{inspect(pid)} went away #{inspect(reason)}. Going down"
201 |         {:noreply, system_stop(state, message)}
202 | 
203 |       %{watchers: %{^ref => _} = watchers} ->
204 |         state = %{state | watchers: Map.delete(watchers, ref)}
205 |         {:noreply, maybe_schedule_shutdown(state)}
206 | 
207 |       %{} ->
208 |         {:noreply, drop_caller(state, ref)}
209 |     end
210 |   end
211 | 
212 |   def handle_info({:nodeup, who}, %Terminator{parent: parent} = state) do
213 |     if !state.parent_monitor_ref && who === node(parent.pid) do
214 |       {:noreply, connect(state)}
215 |     else
216 |       {:noreply, state}
217 |     end
218 |   end
219 | 
220 |   def handle_info({:nodedown, who}, %Terminator{parent: parent} = state) do
221 |     if who === node(parent.pid) do
222 |       new_state = system_stop(state, "nodedown #{inspect(who)}")
223 |       {:noreply, new_state}
224 |     else
225 |       {:noreply, state}
226 |     end
227 |   end
228 | 
229 |   def handle_info(:failsafe_shutdown, %Terminator{} = state) do
230 |     new_state = system_stop(state, "failsafe connect timeout")
231 |     {:noreply, new_state}
232 |   end
233 | 
234 |   def handle_info({:idle_shutdown, timer_ref}, %Terminator{parent: parent} = state) do
235 |     {_current_timer, current_timer_ref} = state.idle_shutdown_timer
236 | 
237 |     if timer_ref == current_timer_ref && state.idle_shutdown_check.() do
238 |       send_parent(parent, {:remote_shutdown, :idle})
239 |       new_state = system_stop(state, "idle shutdown")
240 |       {:noreply, new_state}
241 |     else
242 |       {:noreply, schedule_idle_shutdown(state)}
243 |     end
244 |   end
245 | 
246 |   @impl true
247 |   def handle_call({:placed_child, caller, child_pid, link?}, _from, %Terminator{} = state) do
248 |     {child_ref, new_state} = deadline_caller(state, child_pid, :infinity)
249 |     {caller_ref, new_state} = deadline_caller(new_state, caller, :infinity)
250 | 
251 |     new_state =
252 |       new_state
253 |       |> update_caller(child_ref, fn child ->
254 |         %{child | placed_caller_ref: caller_ref, link?: link?}
255 |       end)
256 |       |> update_caller(caller_ref, fn caller ->
257 |         %{caller | placed_child_ref: child_ref, link?: link?}
258 |       end)
259 | 
260 |     {:reply, {:ok, child_pid}, new_state}
261 |   end
262 | 
263 |   def handle_call({:watch, pids}, _from, %Terminator{watchers: watchers} = state) do
264 |     watchers =
265 |       Enum.reduce(pids, watchers, fn pid, acc -> Map.put(acc, Process.monitor(pid), []) end)
266 | 
267 |     state = %{state | watchers: watchers}
268 |     {:reply, :ok, cancel_idle_shutdown(state)}
269 |   end
270 | 
271 |   def handle_call({:watch_path, path}, _from, %Terminator{watchers: paths} = state) do
272 |     {:reply, :ok, %{state | paths: [path | paths]}}
273 |   end
274 | 
275 |   def handle_call(:system_shutdown, _from, %Terminator{} = state) do
276 |     {:reply, :ok,
277 |      system_stop(state, "system shutdown instructed from parent #{inspect(state.parent.pid)}")}
278 |   end
279 | 
280 |   def handle_call({:deadline, timeout}, {from_pid, _tag}, %Terminator{} = state) do
281 |     {_ref, new_state} = deadline_caller(state, from_pid, timeout)
282 |     {:reply, :ok, new_state}
283 |   end
284 | 
285 |   def handle_call(
286 |         {:schedule_idle_shutdown, idle_after, idle_check, single_use?},
287 |         _from,
288 |         %Terminator{} = state
289 |       ) do
290 |     new_state = %{
291 |       state
292 |       | single_use: single_use?,
293 |         idle_shutdown_after: idle_after,
294 |         idle_shutdown_check: idle_check
295 |     }
296 | 
297 |     {:reply, :ok, schedule_idle_shutdown(new_state)}
298 |   end
299 | 
300 |   defp clean_up_paths(paths) do
301 |     for path <- paths do
302 |       File.rm_rf(path)
303 |     end
304 |   end
305 | 
306 |   @impl true
307 |   def terminate(_reason, %Terminator{} = state) do
308 |     state =
309 |       state
310 |       |> cancel_idle_shutdown()
311 |       |> system_stop("terminating")
312 | 
313 |     # clean up any paths that were watched before waiting to not be killed
314 |     clean_up_paths(state.paths)
315 | 
316 |     # supervisor will force kill us if we take longer than configured shutdown_timeout
317 |     Enum.each(state.calls, fn
318 |       # skip callers that placed a child since they are on the remote node
319 |       {_ref, %Caller{placed_child_ref: ref}} when not is_nil(ref) ->
320 |         :ok
321 | 
322 |       {ref, %Caller{}} ->
323 |         receive do
324 |           {:DOWN, ^ref, :process, _pid, _reason} -> :ok
325 |         end
326 |     end)
327 |   end
328 | 
329 |   defp update_caller(%Terminator{} = state, ref, func)
330 |        when is_reference(ref) and is_function(func, 1) do
331 |     %{state | calls: Map.update!(state.calls, ref, func)}
332 |   end
333 | 
334 |   defp deadline_caller(%Terminator{} = state, from_pid, timeout)
335 |        when is_pid(from_pid) and
336 |               (is_integer(timeout) or timeout == :infinity) do
337 |     ref = Process.monitor(from_pid)
338 | 
339 |     timer =
340 |       case timeout do
341 |         :infinity -> nil
342 |         ms when is_integer(ms) -> Process.send_after(self(), {:timeout, ref}, ms)
343 |       end
344 | 
345 |     caller = %Caller{from_pid: from_pid, timer: timer}
346 |     new_state = %{state | calls: Map.put(state.calls, ref, caller)}
347 |     {ref, cancel_idle_shutdown(new_state)}
348 |   end
349 | 
350 |   defp drop_caller(%Terminator{} = state, ref) when is_reference(ref) do
351 |     %{^ref => %Caller{} = caller} = state.calls
352 |     if caller.timer, do: Process.cancel_timer(caller.timer)
353 |     state = %{state | calls: Map.delete(state.calls, ref)}
354 | 
355 |     # if the caller going down was one that placed a child, and the child is still tracked:
356 |     #  - if the child is not linked (link: false), do nothing
357 |     #  - if the child is linked, terminate the child. there is no need to notify the og caller,
358 |     #   as they linked themselves.
359 |     #
360 |     # Note: there is also a race where we can't rely on the link to have happened to so we
361 |     # must monitor in the terminator even with the remote link
362 |     state =
363 |       with placed_child_ref <- caller.placed_child_ref,
364 |            true <- is_reference(placed_child_ref),
365 |            %{^placed_child_ref => %Caller{} = placed_child} <- state.calls,
366 |            true <- placed_child.link? do
367 |         if placed_child.timer, do: Process.cancel_timer(placed_child.timer)
368 |         Process.demonitor(placed_child_ref, [:flush])
369 |         DynamicSupervisor.terminate_child(state.child_placement_sup, placed_child.from_pid)
370 |         %{state | calls: Map.delete(state.calls, placed_child_ref)}
371 |       else
372 |         _ -> state
373 |       end
374 | 
375 |     # if the caller going down was a placed child, clean up the placed caller ref
376 |     state =
377 |       with placed_caller_ref <- caller.placed_caller_ref,
378 |            true <- is_reference(placed_caller_ref),
379 |            %{^placed_caller_ref => %Caller{} = placed_caller} <- state.calls do
380 |         if placed_caller.timer, do: Process.cancel_timer(placed_caller.timer)
381 |         Process.demonitor(placed_caller_ref, [:flush])
382 |         %{state | calls: Map.delete(state.calls, placed_caller_ref)}
383 |       else
384 |         _ -> state
385 |       end
386 | 
387 |     state =
388 |       if state.single_use do
389 |         system_stop(state, "single use completed. Going down")
390 |       else
391 |         state
392 |       end
393 | 
394 |     maybe_schedule_shutdown(state)
395 |   end
396 | 
397 |   defp maybe_schedule_shutdown(%{calls: calls, watchers: watchers} = state) do
398 |     if map_size(calls) == 0 and map_size(watchers) == 0 do
399 |       schedule_idle_shutdown(state)
400 |     else
401 |       state
402 |     end
403 |   end
404 | 
405 |   defp schedule_idle_shutdown(%Terminator{} = state) do
406 |     state = cancel_idle_shutdown(state)
407 | 
408 |     case state.idle_shutdown_after do
409 |       time when time in [nil, :infinity] ->
410 |         %{state | idle_shutdown_timer: {nil, make_ref()}}
411 | 
412 |       time when is_integer(time) ->
413 |         timer_ref = make_ref()
414 |         timer = Process.send_after(self(), {:idle_shutdown, timer_ref}, time)
415 |         %{state | idle_shutdown_timer: {timer, timer_ref}}
416 |     end
417 |   end
418 | 
419 |   defp cancel_idle_shutdown(%Terminator{} = state) do
420 |     {timer, _ref} = state.idle_shutdown_timer
421 |     if timer, do: Process.cancel_timer(timer)
422 |     %{state | idle_shutdown_timer: {nil, make_ref()}}
423 |   end
424 | 
425 |   defp connect(%Terminator{parent: %Parent{} = parent} = state) do
426 |     new_attempts = state.connect_attempts + 1
427 |     state.connect_timer && Process.cancel_timer(state.connect_timer)
428 |     connected? = Node.connect(node(parent.pid))
429 | 
430 |     log(state, "connect (#{new_attempts}) #{inspect(node(parent.pid))}: #{inspect(connected?)}")
431 | 
432 |     if connected? do
433 |       state.failsafe_timer && Process.cancel_timer(state.failsafe_timer)
434 |       ref = Process.monitor(parent.pid)
435 | 
436 |       send_parent(parent, {:remote_up, self()})
437 | 
438 |       %{
439 |         state
440 |         | status: :connected,
441 |           parent_monitor_ref: ref,
442 |           failsafe_timer: nil,
443 |           connect_timer: nil,
444 |           connect_attempts: new_attempts
445 |       }
446 |     else
447 |       %{
448 |         state
449 |         | connect_timer: Process.send_after(self(), :connect, 100),
450 |           connect_attempts: new_attempts
451 |       }
452 |     end
453 |   end
454 | 
455 |   defp system_stop(%Terminator{parent: parent} = state, log) do
456 |     if state.status != :stopping do
457 |       log(state, "#{inspect(__MODULE__)}.system_stop: #{log}")
458 |       parent.backend.system_shutdown()
459 |     end
460 | 
461 |     %{state | status: :stopping}
462 |   end
463 | 
464 |   defp log(%Terminator{log: level}, message) do
465 |     if level do
466 |       Logger.log(level, message)
467 |     end
468 |   end
469 | 
470 |   defp send_parent(%Parent{} = parent, msg) do
471 |     send(parent.pid, {parent.ref, msg})
472 |   end
473 | end
474 | 


--------------------------------------------------------------------------------
/lib/flame/terminator/supervisor.ex:
--------------------------------------------------------------------------------
 1 | defmodule FLAME.Terminator.Supervisor do
 2 |   @moduledoc false
 3 | 
 4 |   use Supervisor
 5 | 
 6 |   def start_link(opts) do
 7 |     sup_name = opts |> Keyword.fetch!(:name) |> Module.concat("Supervisor")
 8 |     Supervisor.start_link(__MODULE__, opts, name: sup_name)
 9 |   end
10 | 
11 |   def child_placement_sup_name(terminator_pid) when is_pid(terminator_pid) do
12 |     {:registered_name, name} = Process.info(terminator_pid, :registered_name)
13 |     child_placement_sup_name(name)
14 |   end
15 | 
16 |   def child_placement_sup_name(terminator_name) when is_atom(terminator_name) do
17 |     Module.concat(terminator_name, "ChildPlacementSup")
18 |   end
19 | 
20 |   def init(opts) do
21 |     {shutdown_timeout, opts} = Keyword.pop(opts, :shutdown_timeout, 30_000)
22 |     name = Keyword.fetch!(opts, :name)
23 |     child_placement_sup = child_placement_sup_name(name)
24 |     terminator_opts = Keyword.merge(opts, child_placement_sup: child_placement_sup)
25 | 
26 |     children =
27 |       [
28 |         {DynamicSupervisor, name: child_placement_sup, strategy: :one_for_one},
29 |         %{
30 |           id: FLAME.Terminator,
31 |           start: {FLAME.Terminator, :start_link, [terminator_opts]},
32 |           type: :worker,
33 |           shutdown: shutdown_timeout
34 |         }
35 |       ]
36 | 
37 |     Supervisor.init(children, strategy: :one_for_all, max_restarts: 0)
38 |   end
39 | end
40 | 


--------------------------------------------------------------------------------
/lib/flame/trackable.ex:
--------------------------------------------------------------------------------
 1 | defprotocol FLAME.Trackable do
 2 |   @moduledoc """
 3 |   A protocol called to track resources.
 4 | 
 5 |   This is invoked by FLAME from `FLAME.track_resources/3`,
 6 |   which is invoked when the `:track_resources` option is
 7 |   set to true.
 8 | 
 9 |   Sometimes we may want to allocate long lived resources
10 |   in a FLAME but, because FLAME nodes are temporary, the
11 |   node would terminate shortly after. The `:track_resources`
12 |   option tells `FLAME` to look for resources which implement
13 |   the `FLAME.Trackable` protocol. Those resources can then
14 |   spawn PIDs in the remote node and tell FLAME to track them.
15 |   Once all PIDs terminate, the FLAME will terminate too.
16 | 
17 |   Implementations of the protocol will receive the data type,
18 |   a list of pids as `acc`, and the `node`. It must return the
19 |   updated data type and an updated list of pids. If you need
20 |   to traverse recursively, you may call `FLAME.track_resources/3`.
21 |   """
22 | 
23 |   @fallback_to_any true
24 | 
25 |   @doc """
26 |   The entry point for tracking.
27 | 
28 |   See the module docs.
29 |   """
30 |   def track(data, acc, node)
31 | end
32 | 
33 | defimpl FLAME.Trackable, for: Any do
34 |   def track(data, acc, _node), do: {data, acc}
35 | end
36 | 


--------------------------------------------------------------------------------
/mix.exs:
--------------------------------------------------------------------------------
 1 | defmodule FLAME.Runner.MixProject do
 2 |   use Mix.Project
 3 | 
 4 |   def project do
 5 |     [
 6 |       app: :flame,
 7 |       version: "0.5.2",
 8 |       elixir: "~> 1.15",
 9 |       elixirc_paths: elixirc_paths(Mix.env()),
10 |       start_permanent: Mix.env() == :prod,
11 |       deps: deps(),
12 |       package: package(),
13 |       source_url: "https://github.com/phoenixframework/flame",
14 |       homepage_url: "http://www.phoenixframework.org",
15 |       description: """
16 |       Treat your entire application as a lambda, where modular parts can be executed on short-lived infrastructure.
17 |       """
18 |     ]
19 |   end
20 | 
21 |   defp package do
22 |     [
23 |       maintainers: ["Chris McCord", "Jason Stiebs"],
24 |       licenses: ["MIT"],
25 |       links: %{
26 |         GitHub: "https://github.com/phoenixframework/flame"
27 |       },
28 |       files: ~w(lib CHANGELOG.md LICENSE.md mix.exs README.md)
29 |     ]
30 |   end
31 | 
32 |   # Run "mix help compile.app" to learn about applications.
33 |   def application do
34 |     [
35 |       mod: {FLAME.Application, []},
36 |       extra_applications: [:logger, inets: :optional, ssl: :optional]
37 |     ]
38 |   end
39 | 
40 |   defp elixirc_paths(:test), do: ["lib", "test/support"]
41 |   defp elixirc_paths(_), do: ["lib"]
42 | 
43 |   # Run "mix help deps" to learn about dependencies.
44 |   defp deps do
45 |     [
46 |       {:jason, ">= 0.0.0", optional: true},
47 |       {:castore, ">= 0.0.0", optional: true},
48 |       {:mox, "~> 1.1.0", only: :test},
49 |       {:ex_doc, ">= 0.0.0", only: :dev, runtime: false}
50 |     ]
51 |   end
52 | end
53 | 


--------------------------------------------------------------------------------
/mix.lock:
--------------------------------------------------------------------------------
 1 | %{
 2 |   "castore": {:hex, :castore, "1.0.7", "b651241514e5f6956028147fe6637f7ac13802537e895a724f90bf3e36ddd1dd", [:mix], [], "hexpm", "da7785a4b0d2a021cd1292a60875a784b6caef71e76bf4917bdee1f390455cf5"},
 3 |   "earmark_parser": {:hex, :earmark_parser, "1.4.43", "34b2f401fe473080e39ff2b90feb8ddfeef7639f8ee0bbf71bb41911831d77c5", [:mix], [], "hexpm", "970a3cd19503f5e8e527a190662be2cee5d98eed1ff72ed9b3d1a3d466692de8"},
 4 |   "ex_doc": {:hex, :ex_doc, "0.37.0", "970f92b39e62c460aa8a367508e938f5e4da6e2ff3eaed3f8530b25870f45471", [:mix], [{:earmark_parser, "~> 1.4.42", [hex: :earmark_parser, repo: "hexpm", optional: false]}, {:makeup_c, ">= 0.1.0", [hex: :makeup_c, repo: "hexpm", optional: true]}, {:makeup_elixir, "~> 0.14 or ~> 1.0", [hex: :makeup_elixir, repo: "hexpm", optional: false]}, {:makeup_erlang, "~> 0.1 or ~> 1.0", [hex: :makeup_erlang, repo: "hexpm", optional: false]}, {:makeup_html, ">= 0.1.0", [hex: :makeup_html, repo: "hexpm", optional: true]}], "hexpm", "b0ee7f17373948e0cf471e59c3a0ee42f3bd1171c67d91eb3626456ef9c6202c"},
 5 |   "jason": {:hex, :jason, "1.4.1", "af1504e35f629ddcdd6addb3513c3853991f694921b1b9368b0bd32beb9f1b63", [:mix], [{:decimal, "~> 1.0 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "fbb01ecdfd565b56261302f7e1fcc27c4fb8f32d56eab74db621fc154604a7a1"},
 6 |   "makeup": {:hex, :makeup, "1.2.1", "e90ac1c65589ef354378def3ba19d401e739ee7ee06fb47f94c687016e3713d1", [:mix], [{:nimble_parsec, "~> 1.4", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "d36484867b0bae0fea568d10131197a4c2e47056a6fbe84922bf6ba71c8d17ce"},
 7 |   "makeup_elixir": {:hex, :makeup_elixir, "1.0.1", "e928a4f984e795e41e3abd27bfc09f51db16ab8ba1aebdba2b3a575437efafc2", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}, {:nimble_parsec, "~> 1.2.3 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "7284900d412a3e5cfd97fdaed4f5ed389b8f2b4cb49efc0eb3bd10e2febf9507"},
 8 |   "makeup_erlang": {:hex, :makeup_erlang, "1.0.2", "03e1804074b3aa64d5fad7aa64601ed0fb395337b982d9bcf04029d68d51b6a7", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}], "hexpm", "af33ff7ef368d5893e4a267933e7744e46ce3cf1f61e2dccf53a111ed3aa3727"},
 9 |   "mox": {:hex, :mox, "1.1.0", "0f5e399649ce9ab7602f72e718305c0f9cdc351190f72844599545e4996af73c", [:mix], [], "hexpm", "d44474c50be02d5b72131070281a5d3895c0e7a95c780e90bc0cfe712f633a13"},
10 |   "nimble_parsec": {:hex, :nimble_parsec, "1.4.2", "8efba0122db06df95bfaa78f791344a89352ba04baedd3849593bfce4d0dc1c6", [:mix], [], "hexpm", "4b21398942dda052b403bbe1da991ccd03a053668d147d53fb8c4e0efe09c973"},
11 | }
12 | 


--------------------------------------------------------------------------------
/test/code_sync_test.exs:
--------------------------------------------------------------------------------
  1 | defmodule FLAME.CodeSyncTest do
  2 |   use ExUnit.Case, async: false
  3 |   alias FLAME.CodeSync
  4 |   alias FLAME.Test.CodeSyncMock
  5 | 
  6 |   def rel(%CodeSyncMock{} = mock, paths) do
  7 |     Enum.map(paths, &Path.relative_to(&1, Path.join([File.cwd!(), "tmp", "#{mock.id}"])))
  8 |   end
  9 | 
 10 |   def started_apps do
 11 |     Enum.map(Application.started_applications(), fn {app, _desc, _vsn} -> app end)
 12 |   end
 13 | 
 14 |   setup do
 15 |     Application.ensure_started(:logger)
 16 |   end
 17 | 
 18 |   describe "new/0" do
 19 |     test "creates a new struct with change tracking" do
 20 |       mock = CodeSyncMock.new()
 21 | 
 22 |       code_sync =
 23 |         mock.opts
 24 |         |> CodeSync.new()
 25 |         |> CodeSync.compute_changed_paths()
 26 | 
 27 |       assert %CodeSync{
 28 |                sync_beam_hashes: %{},
 29 |                changed_paths: changed_paths,
 30 |                deleted_paths: [],
 31 |                purge_modules: []
 32 |              } = code_sync
 33 | 
 34 |       assert code_sync.apps_to_start == started_apps()
 35 | 
 36 |       assert rel(mock, changed_paths) == [
 37 |                "one/ebin/Elixir.FLAME.Test.CodeSyncMock.Mod1.beam",
 38 |                "two/ebin/Elixir.FLAME.Test.CodeSyncMock.Mod2.beam"
 39 |              ]
 40 |     end
 41 |   end
 42 | 
 43 |   test "identifies changed, added, and deleted beams" do
 44 |     mock = CodeSyncMock.new()
 45 | 
 46 |     previous =
 47 |       mock.opts
 48 |       |> CodeSync.new()
 49 |       |> CodeSync.compute_sync_beams()
 50 | 
 51 |     # simulate change to mod1, new mod3, and deleted mod2
 52 |     :ok = CodeSyncMock.simulate_changes(mock)
 53 | 
 54 |     current = CodeSync.diff(previous)
 55 | 
 56 |     assert rel(mock, current.changed_paths) == [
 57 |              "one/ebin/Elixir.FLAME.Test.CodeSyncMock.Mod1.beam",
 58 |              "one/ebin/Elixir.FLAME.Test.CodeSyncMock.Mod3.beam"
 59 |            ]
 60 | 
 61 |     assert rel(mock, current.deleted_paths) == [
 62 |              "two/ebin/Elixir.FLAME.Test.CodeSyncMock.Mod2.beam"
 63 |            ]
 64 | 
 65 |     assert current.purge_modules == [FLAME.Test.CodeSyncMock.Mod2]
 66 | 
 67 |     # new diff should have no changes
 68 |     current = CodeSync.diff(current)
 69 |     assert current.changed_paths == []
 70 |     assert current.deleted_paths == []
 71 |     assert current.purge_modules == []
 72 |     assert current.apps_to_start == []
 73 |   end
 74 | 
 75 |   test "start_apps: false, does not sync started apps" do
 76 |     # cheap way to ensure apps are started on extract. Note async: false is required
 77 |     Application.stop(:logger)
 78 |     refute :logger in started_apps()
 79 |     mock = CodeSyncMock.new(start_apps: false)
 80 |     previous = CodeSync.new(mock.opts)
 81 |     assert previous.apps_to_start == []
 82 | 
 83 |     Application.ensure_started(:logger)
 84 |     current = CodeSync.diff(previous)
 85 |     assert current.apps_to_start == []
 86 |   end
 87 | 
 88 |   test "start_apps with a list syncs listed apps" do
 89 |     # cheap way to ensure apps are started on extract. Note async: false is required
 90 |     Application.stop(:logger)
 91 |     refute :logger in started_apps()
 92 |     mock = CodeSyncMock.new(start_apps: [:logger])
 93 |     previous = CodeSync.new(mock.opts)
 94 |     assert previous.apps_to_start == [:logger]
 95 | 
 96 |     Application.ensure_started(:logger)
 97 |     current = CodeSync.diff(previous)
 98 |     assert current.apps_to_start == []
 99 |   end
100 | 
101 |   test "compute_changed_paths packages and extracts packaged code and starts apps" do
102 |     assert :logger in started_apps()
103 |     mock = CodeSyncMock.new()
104 | 
105 |     code =
106 |       mock.opts
107 |       |> CodeSync.new()
108 |       |> CodeSync.compute_changed_paths()
109 | 
110 |     assert %FLAME.CodeSync.PackagedStream{} = pkg = CodeSync.package_to_stream(code)
111 |     assert File.exists?(pkg.stream.path)
112 | 
113 |     # cheap way to ensure apps are started on extract. Note async: false is required
114 |     Application.stop(:logger)
115 |     refute :logger in started_apps()
116 | 
117 |     assert CodeSync.extract_packaged_stream(pkg) == mock.extract_dir
118 | 
119 |     assert CodeSyncMock.extracted_rel_paths(mock) == [
120 |              "one/ebin/Elixir.FLAME.Test.CodeSyncMock.Mod1.beam",
121 |              "two/ebin/Elixir.FLAME.Test.CodeSyncMock.Mod2.beam"
122 |            ]
123 | 
124 |     assert :logger in started_apps()
125 |   end
126 | end
127 | 


--------------------------------------------------------------------------------
/test/flame_test.exs:
--------------------------------------------------------------------------------
  1 | defmodule FLAME.FLAMETest do
  2 |   use ExUnit.Case, async: true
  3 | 
  4 |   alias FLAME.Pool
  5 | 
  6 |   defp sim_long_running(pool, time \\ 1_000) do
  7 |     ref = make_ref()
  8 |     parent = self()
  9 | 
 10 |     task =
 11 |       Task.start_link(fn ->
 12 |         FLAME.call(pool, fn ->
 13 |           send(parent, {ref, :called})
 14 |           Process.sleep(time)
 15 |         end)
 16 |       end)
 17 | 
 18 |     receive do
 19 |       {^ref, :called} -> task
 20 |     end
 21 |   end
 22 | 
 23 |   setup config do
 24 |     case config do
 25 |       %{runner: runner_opts} ->
 26 |         runner_sup = Module.concat(config.test, "RunnerSup")
 27 |         pool_pid = start_supervised!({Pool, Keyword.merge(runner_opts, name: config.test)})
 28 | 
 29 |         {:ok, runner_sup: runner_sup, pool_pid: pool_pid}
 30 | 
 31 |       %{} ->
 32 |         :ok
 33 |     end
 34 |   end
 35 | 
 36 |   @tag runner: [min: 1, max: 2, max_concurrency: 2]
 37 |   test "init boots min runners synchronously and grows on demand",
 38 |        %{runner_sup: runner_sup} = config do
 39 |     min_pool = Supervisor.which_children(runner_sup)
 40 |     assert [{:undefined, _pid, :worker, [FLAME.Runner]}] = min_pool
 41 |     # execute against single runner
 42 |     assert FLAME.call(config.test, fn -> :works end) == :works
 43 | 
 44 |     # dynamically grows to max
 45 |     _task1 = sim_long_running(config.test)
 46 |     assert FLAME.call(config.test, fn -> :works end) == :works
 47 |     # max concurrency still below threshold
 48 |     assert Supervisor.which_children(runner_sup) == min_pool
 49 |     # max concurrency above threshold boots new runner
 50 |     _task2 = sim_long_running(config.test)
 51 |     assert FLAME.call(config.test, fn -> :works end) == :works
 52 |     new_pool = Supervisor.which_children(runner_sup)
 53 |     refute new_pool == min_pool
 54 |     assert length(new_pool) == 2
 55 |     # caller is now queued while waiting for available runner
 56 |     _task3 = sim_long_running(config.test)
 57 |     _task4 = sim_long_running(config.test)
 58 |     # task is queued and times out
 59 |     queued = spawn(fn -> FLAME.call(config.test, fn -> :queued end, timeout: 100) end)
 60 |     ref = Process.monitor(queued)
 61 |     assert_receive {:DOWN, ^ref, :process, _, {:timeout, _}}, 1000
 62 |     assert FLAME.call(config.test, fn -> :queued end) == :queued
 63 |     assert new_pool == Supervisor.which_children(runner_sup)
 64 |   end
 65 | 
 66 |   @tag runner: [min: 0, max: 1, max_concurrency: 2]
 67 |   test "concurrent calls on fully pending runners",
 68 |        %{runner_sup: runner_sup} = config do
 69 |     assert Supervisor.which_children(runner_sup) == []
 70 |     parent = self()
 71 | 
 72 |     Task.start_link(fn ->
 73 |       FLAME.call(config.test, fn ->
 74 |         send(parent, :called)
 75 |         Process.sleep(:infinity)
 76 |       end)
 77 |     end)
 78 | 
 79 |     Task.start_link(fn ->
 80 |       FLAME.call(config.test, fn ->
 81 |         send(parent, :called)
 82 |         Process.sleep(:infinity)
 83 |       end)
 84 |     end)
 85 | 
 86 |     assert_receive :called
 87 |     assert_receive :called
 88 |   end
 89 | 
 90 |   def on_grow_start(meta) do
 91 |     send(:failure_test, {:grow_start, meta})
 92 | 
 93 |     if Agent.get_and_update(:failure_test_counter, &{&1 + 1, &1 + 1}) <= 1 do
 94 |       raise "boom"
 95 |     end
 96 |   end
 97 | 
 98 |   def on_grow_end(result, meta) do
 99 |     send(:failure_test, {:grow_start_end, result, meta})
100 |   end
101 | 
102 |   @tag runner: [
103 |          min: 1,
104 |          max: 2,
105 |          max_concurrency: 1,
106 |          on_grow_start: &__MODULE__.on_grow_start/1,
107 |          on_grow_end: &__MODULE__.on_grow_end/2
108 |        ]
109 |   test "failure of pending async runner bootup", %{runner_sup: runner_sup} = config do
110 |     parent = self()
111 | 
112 |     ExUnit.CaptureLog.capture_log(fn ->
113 |       start_supervised!(
114 |         {Agent,
115 |          fn ->
116 |            Process.register(self(), :failure_test_counter)
117 |            0
118 |          end}
119 |       )
120 | 
121 |       Process.register(self(), :failure_test)
122 |       assert [{:undefined, _pid, :worker, [FLAME.Runner]}] = Supervisor.which_children(runner_sup)
123 |       # max concurrency above threshold tries to boot new runner
124 |       _task2 = sim_long_running(config.test, :infinity)
125 | 
126 |       spawn_link(fn ->
127 |         FLAME.cast(config.test, fn -> send(parent, :fullfilled) end)
128 |         Process.sleep(:infinity)
129 |       end)
130 | 
131 |       # first attempt fails
132 |       refute_receive :fullfilled
133 |       assert_receive {:grow_start, %{count: 2, pid: pid}}
134 |       assert_receive {:grow_start_end, {:exit, _}, %{pid: ^pid, count: 1}}
135 |       assert length(Supervisor.which_children(runner_sup)) == 1
136 | 
137 |       # retry attempt succeeds
138 |       assert_receive {:grow_start, %{count: 2, pid: pid}}, 1000
139 |       assert_receive {:grow_start_end, :ok, %{pid: ^pid, count: 2}}
140 |       # queued og caller is now fullfilled from retried runner boot
141 |       assert_receive :fullfilled
142 |       assert FLAME.call(config.test, fn -> :works end) == :works
143 |       assert length(Supervisor.which_children(runner_sup)) == 2
144 |     end)
145 |   end
146 | 
147 |   @tag runner: [min: 1, max: 2, max_concurrency: 2, idle_shutdown_after: 500]
148 |   test "idle shutdown", %{runner_sup: runner_sup} = config do
149 |     sim_long_running(config.test, 100)
150 |     sim_long_running(config.test, 100)
151 |     sim_long_running(config.test, 100)
152 | 
153 |     # we've scaled from min 1 to max 2 at this point
154 |     assert [
155 |              {:undefined, runner1, :worker, [FLAME.Runner]},
156 |              {:undefined, runner2, :worker, [FLAME.Runner]}
157 |            ] = Supervisor.which_children(runner_sup)
158 | 
159 |     Process.monitor(runner1)
160 |     Process.monitor(runner2)
161 |     assert_receive {:DOWN, _ref, :process, ^runner2, {:shutdown, :idle}}, 1000
162 |     refute_receive {:DOWN, _ref, :process, ^runner1, {:shutdown, :idle}}
163 | 
164 |     assert [{:undefined, ^runner1, :worker, [FLAME.Runner]}] =
165 |              Supervisor.which_children(runner_sup)
166 |   end
167 | 
168 |   @tag runner: [min: 1, max: 1, max_concurrency: 2, idle_shutdown_after: 500]
169 |   test "pool runner DOWN exits any active checkouts", %{runner_sup: runner_sup} = config do
170 |     {:ok, active_checkout} = sim_long_running(config.test, 10_000)
171 |     Process.unlink(active_checkout)
172 |     Process.monitor(active_checkout)
173 |     assert [{:undefined, runner, :worker, [FLAME.Runner]}] = Supervisor.which_children(runner_sup)
174 |     Process.exit(runner, :brutal_kill)
175 |     assert_receive {:DOWN, _ref, :process, ^active_checkout, :killed}
176 |   end
177 | 
178 |   @tag runner: [min: 0, max: 1, max_concurrency: 2, idle_shutdown_after: 50]
179 |   test "call links", %{runner_sup: runner_sup} = config do
180 |     ExUnit.CaptureLog.capture_log(fn ->
181 |       parent = self()
182 |       # links by defaults
183 |       Process.flag(:trap_exit, true)
184 | 
185 |       caught =
186 |         try do
187 |           FLAME.call(
188 |             config.test,
189 |             fn ->
190 |               send(parent, {:called, self()})
191 |               Process.exit(self(), :kill)
192 |             end
193 |           )
194 |         catch
195 |           kind, reason -> {kind, reason}
196 |         end
197 | 
198 |       [{:undefined, runner, :worker, [FLAME.Runner]}] = Supervisor.which_children(runner_sup)
199 |       Process.monitor(runner)
200 |       assert {:exit, :killed} = caught
201 |       assert_receive {:called, _flame_pid}
202 |       assert_receive {:DOWN, _ref, :process, ^runner, {:shutdown, :idle}}
203 | 
204 |       # link: false
205 |       Process.flag(:trap_exit, false)
206 |       assert Supervisor.which_children(runner_sup) == []
207 |       parent = self()
208 | 
209 |       caught =
210 |         try do
211 |           FLAME.call(
212 |             config.test,
213 |             fn ->
214 |               send(parent, {:called, self()})
215 |               raise "boom"
216 |             end,
217 |             link: false
218 |           )
219 |         catch
220 |           kind, reason -> {kind, reason}
221 |         end
222 | 
223 |       [{:undefined, runner_pid, :worker, [FLAME.Runner]}] = Supervisor.which_children(runner_sup)
224 |       Process.monitor(runner_pid)
225 |       assert {:exit, {%RuntimeError{message: "boom"}, _}} = caught
226 |       assert_receive {:called, flame_pid}
227 |       Process.monitor(flame_pid)
228 |       assert_receive {:DOWN, _ref, :process, ^flame_pid, :noproc}
229 |       assert_receive {:DOWN, _ref, :process, ^runner_pid, {:shutdown, :idle}}
230 |       assert Supervisor.which_children(runner_sup) == []
231 |     end)
232 |   end
233 | 
234 |   @tag runner: [min: 0, max: 1, max_concurrency: 2, idle_shutdown_after: 50]
235 |   test "cast with link false", %{runner_sup: runner_sup} = config do
236 |     ExUnit.CaptureLog.capture_log(fn ->
237 |       assert Supervisor.which_children(runner_sup) == []
238 |       parent = self()
239 | 
240 |       FLAME.cast(
241 |         config.test,
242 |         fn ->
243 |           send(parent, {:called, self()})
244 |           raise "boom"
245 |         end,
246 |         link: false
247 |       )
248 | 
249 |       assert_receive {:called, flame_pid}
250 |       Process.monitor(flame_pid)
251 |       [{:undefined, runner_pid, :worker, [FLAME.Runner]}] = Supervisor.which_children(runner_sup)
252 |       assert_receive {:DOWN, _ref, :process, ^flame_pid, :noproc}
253 |       Process.monitor(runner_pid)
254 |       assert_receive {:DOWN, _ref, :process, ^runner_pid, {:shutdown, :idle}}
255 |       assert Supervisor.which_children(runner_sup) == []
256 |     end)
257 |   end
258 | 
259 |   describe "cast" do
260 |     @tag runner: [min: 1, max: 2, max_concurrency: 2, idle_shutdown_after: 500]
261 |     test "normal execution", %{} = config do
262 |       sim_long_running(config.test, 100)
263 |       parent = self()
264 | 
265 |       assert FLAME.cast(config.test, fn ->
266 |                send(parent, {:ran, self()})
267 | 
268 |                receive do
269 |                  :continue -> :ok
270 |                end
271 |              end) == :ok
272 | 
273 |       assert_receive {:ran, cast_pid}
274 |       Process.monitor(cast_pid)
275 |       send(cast_pid, :continue)
276 |       assert_receive {:DOWN, _ref, :process, ^cast_pid, :normal}
277 |     end
278 | 
279 |     def growth_grow_start(meta) do
280 |       send(Process.whereis(:current_test), {:grow_start, meta})
281 |     end
282 | 
283 |     @tag runner: [
284 |            min: 0,
285 |            max: 2,
286 |            max_concurrency: 1,
287 |            on_grow_start: &__MODULE__.growth_grow_start/1
288 |          ]
289 |     test "pool growth", %{} = config do
290 |       Process.register(self(), :current_test)
291 |       parent = self()
292 | 
293 |       for i <- [1, 2, 3] do
294 |         assert FLAME.cast(config.test, fn ->
295 |                  send(parent, {:ran, i, self()})
296 |                  Process.sleep(500)
297 |                end) == :ok
298 |       end
299 | 
300 |       for i <- [1, 2, 3] do
301 |         assert_receive {:ran, ^i, cast_pid}
302 |         Process.monitor(cast_pid)
303 |         assert_receive {:DOWN, _ref, :process, ^cast_pid, _}, 1000
304 |       end
305 | 
306 |       assert_receive {:grow_start, %{count: 1}}, 1000
307 |       assert_receive {:grow_start, %{count: 2}}, 1000
308 |       refute_receive {:grow_start, _}, 1000
309 |     end
310 | 
311 |     @tag runner: [min: 1, max: 2, max_concurrency: 2, idle_shutdown_after: 500]
312 |     test "with exit and default link", %{} = config do
313 |       ExUnit.CaptureLog.capture_log(fn ->
314 |         Process.flag(:trap_exit, true)
315 |         sim_long_running(config.test, 100)
316 |         parent = self()
317 | 
318 |         assert FLAME.cast(config.test, fn ->
319 |                  send(parent, {:ran, self()})
320 | 
321 |                  receive do
322 |                    :continue -> exit(:boom)
323 |                  end
324 |                end) == :ok
325 | 
326 |         assert_receive {:ran, cast_pid}
327 |         Process.monitor(cast_pid)
328 |         send(cast_pid, :continue)
329 |         assert_receive {:EXIT, ^cast_pid, :boom}
330 |       end)
331 |     end
332 |   end
333 | 
334 |   describe "process placement" do
335 |     @tag runner: [min: 0, max: 2, max_concurrency: 2, idle_shutdown_after: 100]
336 |     test "place_child/2", %{runner_sup: runner_sup} = config do
337 |       assert [] = Supervisor.which_children(runner_sup)
338 |       assert {:ok, pid} = FLAME.place_child(config.test, {Agent, fn -> 1 end})
339 |       Process.monitor(pid)
340 | 
341 |       assert [{:undefined, runner, :worker, [FLAME.Runner]}] =
342 |                Supervisor.which_children(runner_sup)
343 | 
344 |       Process.monitor(runner)
345 |       assert Agent.get(pid, & &1) == 1
346 |       # does not idle down runner or actively placed children
347 |       refute_receive {:DOWN, _ref, :process, _, _}, 1000
348 |       # active caller to prevent idle down
349 |       assert FLAME.cast(config.test, fn ->
350 |                Process.sleep(1_000)
351 |              end) == :ok
352 | 
353 |       Agent.stop(pid)
354 |       assert_receive {:DOWN, _ref, :process, ^pid, _}, 100
355 | 
356 |       # runner does not idle down with active checkout from cast
357 |       refute_receive {:DOWN, _ref, :process, ^runner, _}, 1000
358 | 
359 |       # runner idles down now that placed child and cast callers are gone
360 |       assert_receive {:DOWN, _ref, :process, ^runner, _}, 1000
361 |     end
362 | 
363 |     @tag runner: [min: 0, max: 2, max_concurrency: 2, idle_shutdown_after: 100]
364 |     test "place_child links", %{runner_sup: runner_sup} = config do
365 |       # links by default
366 |       Process.flag(:trap_exit, true)
367 |       assert {:ok, pid} = FLAME.place_child(config.test, {Agent, fn -> 1 end})
368 | 
369 |       assert [{:undefined, runner, :worker, [FLAME.Runner]}] =
370 |                Supervisor.which_children(runner_sup)
371 | 
372 |       Process.monitor(runner)
373 | 
374 |       Process.exit(pid, :kill)
375 |       assert_receive {:EXIT, ^pid, :killed}, 100
376 | 
377 |       # runner idles down now that placed child and cast callers are gone
378 |       assert_receive {:DOWN, _ref, :process, ^runner, _}, 1000
379 | 
380 |       # with explicit link: false
381 |       Process.flag(:trap_exit, false)
382 |       assert {:ok, pid} = FLAME.place_child(config.test, {Agent, fn -> 1 end}, link: false)
383 |       Process.monitor(pid)
384 | 
385 |       assert [{:undefined, runner, :worker, [FLAME.Runner]}] =
386 |                Supervisor.which_children(runner_sup)
387 | 
388 |       Process.monitor(runner)
389 | 
390 |       Process.exit(pid, :kill)
391 |       assert_receive {:DOWN, _ref, :process, ^pid, :killed}, 100
392 | 
393 |       # runner idles down now that placed child and cast callers are gone
394 |       assert_receive {:DOWN, _ref, :process, ^runner, _}, 1000
395 |     end
396 | 
397 |     @tag runner: [min: 0, max: 2, max_concurrency: 2, idle_shutdown_after: 100]
398 |     test "place_child when caller exits", %{runner_sup: runner_sup} = config do
399 |       # links by default
400 |       parent = self()
401 | 
402 |       caller =
403 |         spawn(fn ->
404 |           {:ok, pid} = FLAME.place_child(config.test, {Agent, fn -> 1 end})
405 |           send(parent, {:child, pid})
406 |           Process.sleep(:infinity)
407 |         end)
408 | 
409 |       assert_receive {:child, placed_child}
410 | 
411 |       assert [{:undefined, runner, :worker, [FLAME.Runner]}] =
412 |                Supervisor.which_children(runner_sup)
413 | 
414 |       Process.monitor(runner)
415 |       Process.monitor(placed_child)
416 | 
417 |       Process.exit(caller, :kill)
418 | 
419 |       assert_receive {:DOWN, _ref, :process, ^placed_child, _}
420 |       # runner idles down now that placed child and cast callers are gone
421 |       assert_receive {:DOWN, _ref, :process, ^runner, _}, 1000
422 | 
423 |       # with link: false
424 |       caller =
425 |         spawn(fn ->
426 |           {:ok, pid} = FLAME.place_child(config.test, {Agent, fn -> 1 end}, link: false)
427 |           send(parent, {:child, pid})
428 |           Process.sleep(:infinity)
429 |         end)
430 | 
431 |       assert_receive {:child, placed_child}
432 | 
433 |       assert [{:undefined, runner, :worker, [FLAME.Runner]}] =
434 |                Supervisor.which_children(runner_sup)
435 | 
436 |       Process.monitor(runner)
437 |       Process.monitor(placed_child)
438 |       Process.exit(caller, :kill)
439 | 
440 |       refute_receive {:DOWN, _ref, :process, ^placed_child, _}
441 |       # runner does not idle down when caller goes away since placed child still running
442 |       refute_receive {:DOWN, _ref, :process, ^runner, _}, 1000
443 | 
444 |       Process.exit(placed_child, :kill)
445 |       assert_receive {:DOWN, _ref, :process, ^placed_child, _}
446 |       # runner idles down now that placed child and cast callers are gone
447 |       assert_receive {:DOWN, _ref, :process, ^runner, _}, 1000
448 |     end
449 |   end
450 | 
451 |   describe "resource tracking" do
452 |     @tag runner: [min: 0, max: 1]
453 |     test "local", config do
454 |       name = :"#{config.test}_trackable"
455 |       ref = make_ref()
456 |       trackable = %MyTrackable{name: name, ref: ref}
457 |       non_trackable = URI.new!("/")
458 | 
459 |       {[{map}], [pid]} =
460 |         FLAME.track_resources([{%{"yes" => trackable, "no" => non_trackable}}], [], node())
461 | 
462 |       assert map_size(map) == 2
463 |       assert ^non_trackable = map["no"]
464 |       assert %MyTrackable{name: ^name, ref: ^ref, pid: ^pid} = map["yes"]
465 |       assert Process.whereis(name) == pid
466 | 
467 |       monitor_ref = Process.monitor(pid)
468 |       send(pid, {ref, :stop})
469 |       assert_receive {:DOWN, ^monitor_ref, _, _, :normal}
470 |     end
471 | 
472 |     @tag runner: [min: 0, max: 2, max_concurrency: 2, idle_shutdown_after: 100]
473 |     test "remote without tracking", config do
474 |       name = :"#{config.test}_trackable"
475 |       non_trackable = URI.new!("/")
476 | 
477 |       [{map}] =
478 |         FLAME.call(config.test, fn ->
479 |           ref = make_ref()
480 |           trackable = %MyTrackable{name: name, ref: ref}
481 |           [{%{"yes" => trackable, "no" => non_trackable}}]
482 |         end)
483 | 
484 |       assert map_size(map) == 2
485 |       assert ^non_trackable = map["no"]
486 |       assert %MyTrackable{pid: nil} = map["yes"]
487 |     end
488 | 
489 |     @tag runner: [min: 0, max: 2, max_concurrency: 2, idle_shutdown_after: 100]
490 |     test "remote with tracking", %{runner_sup: runner_sup} = config do
491 |       name = :"#{config.test}_trackable"
492 |       non_trackable = URI.new!("/")
493 | 
494 |       [{map}] =
495 |         FLAME.call(
496 |           config.test,
497 |           fn ->
498 |             ref = make_ref()
499 |             trackable = %MyTrackable{name: name, ref: ref}
500 |             [{%{"yes" => trackable, "no" => non_trackable}}]
501 |           end,
502 |           track_resources: true
503 |         )
504 | 
505 |       assert [{:undefined, runner, :worker, [FLAME.Runner]}] =
506 |                Supervisor.which_children(runner_sup)
507 | 
508 |       Process.monitor(runner)
509 |       assert map_size(map) == 2
510 |       assert ^non_trackable = map["no"]
511 |       assert %MyTrackable{pid: pid} = trackable = map["yes"]
512 |       assert Process.alive?(pid)
513 |       refute_receive {:DOWN, _, _, ^runner, _}, 1000
514 |       send(pid, {trackable.ref, :stop})
515 |       assert_receive {:DOWN, _, _, ^runner, {:shutdown, :idle}}, 1000
516 |     end
517 | 
518 |     @tag runner: [
519 |            min: 0,
520 |            max: 2,
521 |            max_concurrency: 2,
522 |            idle_shutdown_after: 100,
523 |            track_resources: true
524 |          ]
525 |     test "remote with tracking enabled at pool level", %{runner_sup: runner_sup} = config do
526 |       name = :"#{config.test}_trackable"
527 |       non_trackable = URI.new!("/")
528 | 
529 |       [{map}] =
530 |         FLAME.call(
531 |           config.test,
532 |           fn ->
533 |             ref = make_ref()
534 |             trackable = %MyTrackable{name: name, ref: ref}
535 |             [{%{"yes" => trackable, "no" => non_trackable}}]
536 |           end
537 |         )
538 | 
539 |       assert [{:undefined, runner, :worker, [FLAME.Runner]}] =
540 |                Supervisor.which_children(runner_sup)
541 | 
542 |       Process.monitor(runner)
543 |       assert map_size(map) == 2
544 |       assert ^non_trackable = map["no"]
545 |       assert %MyTrackable{pid: pid} = trackable = map["yes"]
546 |       assert Process.alive?(pid)
547 |       refute_receive {:DOWN, _, _, ^runner, _}, 1000
548 |       send(pid, {trackable.ref, :stop})
549 |       assert_receive {:DOWN, _, _, ^runner, {:shutdown, :idle}}, 1000
550 |     end
551 | 
552 |     @tag runner: [
553 |            min: 0,
554 |            max: 1,
555 |            max_concurrency: 1,
556 |            idle_shutdown_after: 100,
557 |            track_resources: true
558 |          ]
559 |     test "remote with tracking max concurrency", %{runner_sup: runner_sup} = config do
560 |       non_trackable = URI.new!("/")
561 | 
562 |       call = fn count ->
563 |         ref = make_ref()
564 | 
565 |         trackables =
566 |           for _ <- 1..count,
567 |               do: %MyTrackable{
568 |                 name: :"#{config.test}_trackable_#{System.unique_integer()}",
569 |                 ref: ref
570 |               }
571 | 
572 |         [{%{"yes" => trackables, "no" => non_trackable}}]
573 |       end
574 | 
575 |       [{map}] = FLAME.call(config.test, fn -> call.(2) end)
576 | 
577 |       assert [{:undefined, runner, :worker, [FLAME.Runner]}] =
578 |                Supervisor.which_children(runner_sup)
579 | 
580 |       Process.monitor(runner)
581 |       assert map_size(map) == 2
582 |       assert ^non_trackable = map["no"]
583 |       assert [%MyTrackable{} = trackable1, %MyTrackable{} = trackable2] = map["yes"]
584 | 
585 |       # original trackables still occupies the slots
586 |       assert Process.alive?(trackable1.pid)
587 |       assert Process.alive?(trackable2.pid)
588 |       refute_receive {:DOWN, _, _, ^runner, _}, 1000
589 | 
590 |       # check in the trackable 1
591 |       send(trackable1.pid, {trackable1.ref, :stop})
592 | 
593 |       # no idle down because second trackable still alive
594 |       refute_receive {:DOWN, _, _, ^runner, _}, 1000
595 | 
596 |       # trackable2 occupies the only available slot, so next call times out
597 |       caught =
598 |         try do
599 |           FLAME.call(config.test, fn -> call.(1) end, timeout: 1000)
600 |         catch
601 |           kind, reason -> {kind, reason}
602 |         end
603 | 
604 |       assert {:exit, {:timeout, _}} = caught
605 | 
606 |       # check in the trackable 2
607 |       send(trackable2.pid, {trackable2.ref, :stop})
608 | 
609 |       # runner is now free for more work on open slot
610 |       [{map}] = FLAME.call(config.test, fn -> call.(1) end)
611 | 
612 |       assert [{:undefined, runner, :worker, [FLAME.Runner]}] =
613 |                Supervisor.which_children(runner_sup)
614 | 
615 |       Process.monitor(runner)
616 |       assert map_size(map) == 2
617 |       assert ^non_trackable = map["no"]
618 |       assert [%MyTrackable{pid: pid} = trackable] = map["yes"]
619 | 
620 |       # check in the trackable
621 |       send(pid, {trackable.ref, :stop})
622 | 
623 |       # runner idles down
624 |       assert_receive {:DOWN, _, _, ^runner, {:shutdown, :idle}}, 1000
625 |     end
626 |   end
627 | 
628 |   test "code_sync artifact cleaner", config do
629 |     mock = FLAME.Test.CodeSyncMock.new()
630 | 
631 |     cleaner = Module.concat(config.test, "Cleaner")
632 | 
633 |     pool_pid =
634 |       start_supervised!(
635 |         {Pool, min: 1, max: 1, max_concurrency: 1, name: config.test, code_sync: mock.opts}
636 |       )
637 | 
638 |     assert [artifact] = FLAME.Pool.Cleaner.list_paths(cleaner)
639 |     assert File.exists?(artifact)
640 |     assert FLAME.call(config.test, fn -> :works end) == :works
641 |     Supervisor.stop(pool_pid)
642 |     refute File.exists?(artifact)
643 |   end
644 | end
645 | 


--------------------------------------------------------------------------------
/test/fly_backend_test.exs:
--------------------------------------------------------------------------------
  1 | defmodule FLAME.FlyBackendTest do
  2 |   use ExUnit.Case, async: false
  3 | 
  4 |   alias FLAME.{Runner, FlyBackend}
  5 | 
  6 |   def new({backend, opts}) do
  7 |     Runner.new(backend: {backend, Keyword.merge([terminator_sup: __MODULE__], opts)})
  8 |   end
  9 | 
 10 |   setup do
 11 |     Application.delete_env(:flame, :backend)
 12 |     Application.delete_env(:flame, FlyBackend)
 13 |   end
 14 | 
 15 |   test "explicit backend" do
 16 |     assert_raise ArgumentError, ~r/missing :token/, fn ->
 17 |       new({FlyBackend, []})
 18 |     end
 19 | 
 20 |     assert_raise ArgumentError, ~r/missing :image/, fn ->
 21 |       new({FlyBackend, token: "123"})
 22 |     end
 23 | 
 24 |     assert_raise ArgumentError, ~r/missing :app/, fn ->
 25 |       new({FlyBackend, token: "123", image: "img"})
 26 |     end
 27 | 
 28 |     assert_raise ArgumentError, ~r/missing :app/, fn ->
 29 |       new({FlyBackend, token: "123", image: "img", boot_timeout: 55123})
 30 |     end
 31 | 
 32 |     assert new({FlyBackend, token: "123", image: "img", app: "app"})
 33 |   end
 34 | 
 35 |   test "extended opts" do
 36 |     opts = [
 37 |       token: "123",
 38 |       image: "img",
 39 |       app: "app",
 40 |       host: "foo.local",
 41 |       env: %{"ONE" => "1"},
 42 |       cpu_kind: "performance",
 43 |       cpus: 1,
 44 |       memory_mb: 256,
 45 |       gpu_kind: "a100-pcie-40gb"
 46 |     ]
 47 | 
 48 |     runner = new({FlyBackend, opts})
 49 |     assert {:ok, init} = runner.backend_init
 50 |     assert init.host == "foo.local"
 51 |     assert init.cpu_kind == "performance"
 52 |     assert init.cpus == 1
 53 |     assert init.memory_mb == 256
 54 |     assert init.gpu_kind == "a100-pcie-40gb"
 55 | 
 56 |     assert %{
 57 |              "ONE" => "1",
 58 |              "FLAME_PARENT" => _,
 59 |              "PHX_SERVER" => "false"
 60 |            } = init.env
 61 |   end
 62 | 
 63 |   test "global configured backend" do
 64 |     assert_raise ArgumentError, ~r/missing :token/, fn ->
 65 |       Application.put_env(:flame, FLAME.FlyBackend, [])
 66 |       Runner.new(backend: FLAME.FlyBackend)
 67 |     end
 68 | 
 69 |     assert_raise ArgumentError, ~r/missing :image/, fn ->
 70 |       Application.put_env(:flame, FLAME.FlyBackend, token: "123")
 71 |       Runner.new(backend: FLAME.FlyBackend)
 72 |     end
 73 | 
 74 |     assert_raise ArgumentError, ~r/missing :app/, fn ->
 75 |       Application.put_env(:flame, FLAME.FlyBackend, token: "123", image: "img")
 76 |       Runner.new(backend: FLAME.FlyBackend)
 77 |     end
 78 | 
 79 |     Application.put_env(:flame, :backend, FLAME.FlyBackend)
 80 |     Application.put_env(:flame, FLAME.FlyBackend, token: "123", image: "img", app: "app")
 81 | 
 82 |     assert Runner.new(backend: FLAME.FlyBackend)
 83 |   end
 84 | 
 85 |   test "parent backend attributes" do
 86 |     assert %FLAME.Parent{
 87 |              pid: _,
 88 |              ref: _,
 89 |              backend: FLAME.FlyBackend,
 90 |              flame_vsn: vsn,
 91 |              backend_vsn: vsn,
 92 |              backend_app: :flame
 93 |            } =
 94 |              FLAME.Parent.new(
 95 |                make_ref(),
 96 |                self(),
 97 |                FLAME.FlyBackend,
 98 |                "app-flame-1",
 99 |                "FLY_PRIVATE_IP"
100 |              )
101 |   end
102 | end
103 | 


--------------------------------------------------------------------------------
/test/parser/json_test.exs:
--------------------------------------------------------------------------------
 1 | defmodule FLAME.Parser.JSONTest do
 2 |   use ExUnit.Case, async: false
 3 | 
 4 |   alias FLAME.Parser.JSON
 5 | 
 6 |   describe "encode!/1" do
 7 |     test "should encode string" do
 8 |       assert JSON.encode!("foo") == "\"foo\""
 9 |     end
10 | 
11 |     test "should encode atom" do
12 |       assert JSON.encode!(:FLAME) == "\"FLAME\""
13 |     end
14 | 
15 |     test "should encode string maps" do
16 |       assert JSON.encode!(%{"foo" => "bar"}) == "{\"foo\":\"bar\"}"
17 |     end
18 | 
19 |     test "should encode atom maps" do
20 |       assert JSON.encode!(%{foo: "bar"}) == "{\"foo\":\"bar\"}"
21 |     end
22 | 
23 |     test "should encode nested maps" do
24 |       assert JSON.encode!(%{foo: "bar", bar: %{baz: nil}}) in [
25 |                "{\"foo\":\"bar\",\"bar\":{\"baz\":null}}",
26 |                "{\"bar\":{\"baz\":null},\"foo\":\"bar\"}"
27 |              ]
28 |     end
29 | 
30 |     test "should encode list" do
31 |       assert JSON.encode!([%{foo: "bar"}]) == "[{\"foo\":\"bar\"}]"
32 |       assert JSON.encode!([%{foo: "bar"}, %{bar: nil}]) == "[{\"foo\":\"bar\"},{\"bar\":null}]"
33 |     end
34 | 
35 |     test "should encode nullable values" do
36 |       assert JSON.encode!(%{foo: nil}) == "{\"foo\":null}"
37 |       assert JSON.encode!(%{"foo" => nil}) == "{\"foo\":null}"
38 |       assert JSON.encode!(nil) == "null"
39 |     end
40 |   end
41 | 
42 |   describe "decode!/1" do
43 |     test "should decode string" do
44 |       assert JSON.decode!("\"foo\"") == "foo"
45 |     end
46 | 
47 |     test "should decode maps" do
48 |       assert JSON.decode!("{\"foo\":\"bar\"}") == %{"foo" => "bar"}
49 |     end
50 | 
51 |     test "should decode nested maps" do
52 |       assert JSON.decode!("{\"bar\":{\"baz\":null},\"foo\":\"bar\"}") == %{
53 |                "foo" => "bar",
54 |                "bar" => %{"baz" => nil}
55 |              }
56 |     end
57 | 
58 |     test "should decode list" do
59 |       assert JSON.decode!("[{\"foo\":\"bar\"}]") == [%{"foo" => "bar"}]
60 | 
61 |       assert JSON.decode!("[{\"foo\":\"bar\"}, {\"bar\":null}]") == [
62 |                %{"foo" => "bar"},
63 |                %{"bar" => nil}
64 |              ]
65 |     end
66 | 
67 |     test "should decode nullable values" do
68 |       assert JSON.decode!("{\"foo\":null}") == %{"foo" => nil}
69 |       assert JSON.decode!("null") == nil
70 |     end
71 |   end
72 | 
73 |   describe "json parser" do
74 |     test "correct json parser based on erlang json availability" do
75 |       if Code.ensure_loaded?(:json) do
76 |         assert JSON.json_parser() == :json
77 |       else
78 |         assert JSON.json_parser() == Jason
79 |       end
80 |     end
81 |   end
82 | end
83 | 


--------------------------------------------------------------------------------
/test/queue_test.exs:
--------------------------------------------------------------------------------
  1 | defmodule FLAME.QueueTest do
  2 |   use ExUnit.Case
  3 | 
  4 |   alias FLAME.Queue
  5 | 
  6 |   describe "new/0" do
  7 |     test "creates a new Queue" do
  8 |       assert %Queue{} = Queue.new()
  9 |     end
 10 |   end
 11 | 
 12 |   describe "insert/3" do
 13 |     test "inserts a new item into the queue" do
 14 |       queue = Queue.insert(Queue.new(), "item1", :key1)
 15 |       assert Queue.size(queue) == 1
 16 |       assert Queue.get_by_key(queue, :key1) == "item1"
 17 |     end
 18 |   end
 19 | 
 20 |   describe "pop/1" do
 21 |     test "pops the first item from the queue" do
 22 |       queue =
 23 |         Queue.new()
 24 |         |> Queue.insert("item1", :key1)
 25 |         |> Queue.insert("item2", :key2)
 26 | 
 27 |       assert queue.keys == %{key1: 0, key2: 1}
 28 | 
 29 |       {popped_item, %Queue{} = queue} = Queue.pop(queue)
 30 | 
 31 |       assert popped_item == {:key1, "item1"}
 32 |       assert Queue.size(queue) == 1
 33 |       assert queue.keys == %{key2: 1}
 34 | 
 35 |       assert {{:key2, "item2"}, %Queue{} = queue} = Queue.pop(queue)
 36 |       assert Queue.size(queue) == 0
 37 |       assert queue.idx == 0
 38 |     end
 39 | 
 40 |     test "returns an error when the queue is empty" do
 41 |       assert {nil, %Queue{}} == Queue.pop(Queue.new())
 42 |     end
 43 |   end
 44 | 
 45 |   describe "pop_until/2" do
 46 |     test "pops until function returns true" do
 47 |       queue = Queue.new()
 48 |       assert Queue.pop_until(queue, fn _, _ -> true end) == {nil, queue}
 49 | 
 50 |       queue =
 51 |         Queue.new()
 52 |         |> Queue.insert(10, :key1)
 53 |         |> Queue.insert(11, :key2)
 54 |         |> Queue.insert(20, :key3)
 55 |         |> Queue.insert(30, :key4)
 56 | 
 57 |       assert {{:key3, 20}, %Queue{} = queue} = Queue.pop_until(queue, fn _key, i -> i >= 20 end)
 58 |       assert Queue.size(queue) == 1
 59 |       assert queue.keys == %{key4: 3}
 60 |     end
 61 |   end
 62 | 
 63 |   describe "access" do
 64 |     test "retrieves an item by index" do
 65 |       queue =
 66 |         Queue.new()
 67 |         |> Queue.insert("item1", :key1)
 68 |         |> Queue.insert("item2", :key2)
 69 | 
 70 |       assert Queue.get_by_key(queue, :key1) == "item1"
 71 |       assert Queue.get_by_key(queue, :key2) == "item2"
 72 |     end
 73 | 
 74 |     test "returns nil for un unknown index or key" do
 75 |       queue = Queue.new()
 76 |       assert Queue.get_by_key(queue, :nope) == nil
 77 |     end
 78 |   end
 79 | 
 80 |   describe "delete_by_key/2" do
 81 |     test "deletes an item by its secondary key" do
 82 |       queue =
 83 |         Queue.new()
 84 |         |> Queue.insert("item1", :key1)
 85 |         |> Queue.insert("item2", :key2)
 86 | 
 87 |       queue = Queue.delete_by_key(queue, :key1)
 88 |       assert Queue.get_by_key(queue, :key1) == nil
 89 |       assert Queue.get_by_key(queue, :key2) == "item2"
 90 |       assert Queue.size(queue) == 1
 91 |       assert queue.idx == 2
 92 |       queue = Queue.delete_by_key(queue, :key2)
 93 |       assert Queue.size(queue) == 0
 94 |       assert queue.idx == 0
 95 | 
 96 |       queue = Queue.insert(queue, "item3", :key3)
 97 |       assert Queue.get_by_key(queue, :key3) == "item3"
 98 |       assert Queue.size(queue) == 1
 99 |       assert queue.idx == 1
100 |     end
101 | 
102 |     test "non-existent key" do
103 |       queue = Queue.new()
104 |       assert queue == Queue.delete_by_key(queue, :key1)
105 |     end
106 |   end
107 | end
108 | 


--------------------------------------------------------------------------------
/test/runner_test.exs:
--------------------------------------------------------------------------------
  1 | defmodule FLAME.RunnerTest do
  2 |   use ExUnit.Case, async: false
  3 | 
  4 |   import Mox
  5 | 
  6 |   alias FLAME.{Runner, MockBackend}
  7 |   alias FLAME.Test.CodeSyncMock
  8 | 
  9 |   # Make sure mocks are verified when the test exits
 10 |   setup :set_mox_global
 11 |   setup :verify_on_exit!
 12 | 
 13 |   setup do
 14 |     term_sup =
 15 |       start_supervised!({DynamicSupervisor, name: __MODULE__.TermSup, strategy: :one_for_one})
 16 | 
 17 |     {:ok, term_sup: term_sup}
 18 |   end
 19 | 
 20 |   @post_success %{
 21 |     "id" => "app",
 22 |     "instance_id" => "iad-app",
 23 |     "private_ip" => node() |> to_string() |> String.split("@") |> Enum.at(-1)
 24 |   }
 25 | 
 26 |   defp remote_boot(state) do
 27 |     parent = FLAME.Parent.new(make_ref(), self(), MockBackend, "app-flame-1", "MY_HOST_IP")
 28 |     name = Module.concat(FLAME.TerminatorTest, to_string(System.unique_integer([:positive])))
 29 |     opts = [name: name, parent: parent]
 30 |     spec = Supervisor.child_spec({FLAME.Terminator, opts}, restart: :temporary)
 31 |     {:ok, _sup_pid} = DynamicSupervisor.start_child(__MODULE__.TermSup, spec)
 32 | 
 33 |     case Process.whereis(name) do
 34 |       terminator_pid when is_pid(terminator_pid) -> {:ok, terminator_pid, state}
 35 |     end
 36 |   end
 37 | 
 38 |   def mock_successful_runner(executions, runner_opts \\ []) do
 39 |     test_pid = self()
 40 | 
 41 |     MockBackend
 42 |     |> expect(:init, fn _opts -> {:ok, :state} end)
 43 |     |> expect(:remote_boot, fn :state -> remote_boot(@post_success) end)
 44 |     |> expect(:handle_info, fn {_ref, {:remote_up, _pid}}, state -> {:noreply, state} end)
 45 |     |> expect(:remote_spawn_monitor, executions, fn @post_success = _state, func ->
 46 |       {:ok, spawn_monitor(func)}
 47 |     end)
 48 |     # we need to send and assert_receive to avoid the race of going down before mox verify
 49 |     |> expect(:system_shutdown, fn -> send(test_pid, :stopped) end)
 50 | 
 51 |     Runner.start_link(
 52 |       Keyword.merge(
 53 |         [backend: {MockBackend, image: "my-imag", app_name: "test", api_token: "secret"}],
 54 |         runner_opts
 55 |       )
 56 |     )
 57 |   end
 58 | 
 59 |   def wrap_exit(runner, func) do
 60 |     prev_trap = Process.flag(:trap_exit, true)
 61 |     Process.unlink(runner)
 62 |     ref = make_ref()
 63 | 
 64 |     ExUnit.CaptureLog.capture_log(fn ->
 65 |       error =
 66 |         try do
 67 |           func.()
 68 |         catch
 69 |           kind, reason -> {kind, reason}
 70 |         end
 71 | 
 72 |       send(self(), {ref, error})
 73 |     end)
 74 | 
 75 |     receive do
 76 |       {^ref, error} ->
 77 |         Process.flag(:trap_exit, prev_trap)
 78 |         error
 79 |     end
 80 |   end
 81 | 
 82 |   setup_all do
 83 |     Mox.defmock(MockBackend, for: FLAME.Backend)
 84 |     Application.put_env(:flame, :backend, FLAME.MockBackend)
 85 |     :ok
 86 |   end
 87 | 
 88 |   test "backend success single_use" do
 89 |     test_pid = self()
 90 | 
 91 |     MockBackend
 92 |     |> expect(:init, fn _opts -> {:ok, :state} end)
 93 |     |> expect(:remote_boot, fn :state -> remote_boot(@post_success) end)
 94 |     |> expect(:handle_info, fn {_ref, {:remote_up, _pid}}, state -> {:noreply, state} end)
 95 |     |> expect(:remote_spawn_monitor, 2, fn @post_success = _state, func ->
 96 |       {:ok, spawn_monitor(func)}
 97 |     end)
 98 |     |> expect(:system_shutdown, fn -> send(test_pid, :stopped) end)
 99 | 
100 |     {:ok, runner} =
101 |       Runner.start_link(
102 |         single_use: true,
103 |         backend: {MockBackend, image: "my-imag", app_name: "test", api_token: "secret"}
104 |       )
105 | 
106 |     assert Runner.remote_boot(runner, nil) == :ok
107 |     assert Runner.call(runner, self(), fn -> :works end) == {:works, []}
108 |     assert_receive :stopped
109 |   end
110 | 
111 |   test "backend success multi use" do
112 |     {:ok, runner} = mock_successful_runner(4)
113 | 
114 |     assert Runner.remote_boot(runner, nil) == :ok
115 |     assert Runner.remote_boot(runner, nil) == {:error, :already_booted}
116 |     assert Runner.call(runner, self(), fn -> :works end) == {:works, []}
117 |     refute_receive :stopped
118 |     assert Runner.call(runner, self(), fn -> :still_works end) == {:still_works, []}
119 |     ref = Process.monitor(runner)
120 |     assert Runner.shutdown(runner) == :ok
121 |     assert_receive :stopped
122 |     assert_receive {:DOWN, ^ref, :process, ^runner, :normal}
123 |   end
124 | 
125 |   test "backend runner spawn connect failure" do
126 |     MockBackend
127 |     |> expect(:init, fn _opts -> {:ok, :state} end)
128 |     |> expect(:remote_boot, fn :state -> {:error, :invalid_authentication} end)
129 | 
130 |     {:ok, runner} =
131 |       Runner.start_link(
132 |         backend: {MockBackend, image: "my-imag", app_name: "test", api_token: "secret"}
133 |       )
134 | 
135 |     assert {:exit, {{:shutdown, :invalid_authentication}, _}} =
136 |              wrap_exit(runner, fn -> Runner.remote_boot(runner, nil) end)
137 |   end
138 | 
139 |   test "backend runner boot failure" do
140 |     MockBackend
141 |     |> expect(:init, fn _opts -> {:ok, :state} end)
142 |     |> expect(:remote_boot, fn :state -> {:error, :nxdomain} end)
143 | 
144 |     {:ok, runner} =
145 |       Runner.start_link(
146 |         backend: {MockBackend, image: "my-imag", app_name: "test", api_token: "secret"}
147 |       )
148 | 
149 |     assert {:exit, {{:shutdown, :nxdomain}, _}} =
150 |              wrap_exit(runner, fn -> Runner.remote_boot(runner, nil) end)
151 |   end
152 | 
153 |   describe "execution failure" do
154 |     test "single use" do
155 |       {:ok, runner} = mock_successful_runner(3, single_use: true)
156 |       assert Runner.remote_boot(runner, nil) == :ok
157 |       error = wrap_exit(runner, fn -> Runner.call(runner, self(), fn -> raise "boom" end) end)
158 |       assert {:exit, {%RuntimeError{message: "boom"}, _}} = error
159 |       assert_receive :stopped
160 |       assert Runner.shutdown(runner) == :ok
161 |     end
162 | 
163 |     test "multi use" do
164 |       {:ok, runner} = mock_successful_runner(4)
165 |       Process.monitor(runner)
166 |       assert Runner.remote_boot(runner, nil) == :ok
167 | 
168 |       error = wrap_exit(runner, fn -> Runner.call(runner, self(), fn -> raise "boom" end) end)
169 |       assert {:exit, {%RuntimeError{message: "boom"}, _}} = error
170 |       refute_receive :stopped
171 |       refute_receive {:DOWN, _ref, :process, ^runner, _}
172 |       assert Runner.call(runner, self(), fn -> :works end) == {:works, []}
173 |       assert Runner.shutdown(runner) == :ok
174 |     end
175 |   end
176 | 
177 |   describe "execution timeout" do
178 |     test "single use" do
179 |       timeout = 100
180 |       {:ok, runner} = mock_successful_runner(3, timeout: timeout, single_use: true)
181 | 
182 |       Process.monitor(runner)
183 |       assert Runner.remote_boot(runner, nil) == :ok
184 | 
185 |       error =
186 |         wrap_exit(runner, fn ->
187 |           Runner.call(runner, self(), fn -> Process.sleep(timeout * 2) end)
188 |         end)
189 | 
190 |       assert error == {:exit, :timeout}
191 | 
192 |       assert_receive :stopped
193 |       assert_receive {:DOWN, _ref, :process, _, :killed}
194 |       assert Runner.shutdown(runner) == :ok
195 |     end
196 | 
197 |     test "multi use" do
198 |       timeout = 100
199 |       {:ok, runner} = mock_successful_runner(4, timeout: timeout)
200 | 
201 |       Process.monitor(runner)
202 |       assert Runner.remote_boot(runner, nil) == :ok
203 | 
204 |       error =
205 |         wrap_exit(runner, fn ->
206 |           Runner.call(runner, self(), fn -> Process.sleep(timeout * 2) end)
207 |         end)
208 | 
209 |       assert error == {:exit, :timeout}
210 | 
211 |       refute_receive :stopped
212 |       refute_receive {:DOWN, _ref, :process, ^runner, _}
213 |       assert Runner.call(runner, self(), fn -> :works end, timeout: 1234) == {:works, []}
214 |       assert Runner.shutdown(runner) == :ok
215 |     end
216 |   end
217 | 
218 |   describe "idle shutdown" do
219 |     test "with time" do
220 |       timeout = 500
221 |       {:ok, runner} = mock_successful_runner(1, idle_shutdown_after: timeout)
222 | 
223 |       Process.unlink(runner)
224 |       Process.monitor(runner)
225 |       assert Runner.remote_boot(runner, nil) == :ok
226 | 
227 |       assert_receive :stopped, timeout * 2
228 |       assert_receive {:DOWN, _ref, :process, ^runner, _}
229 | 
230 |       {:ok, runner} = mock_successful_runner(2, idle_shutdown_after: timeout)
231 |       Process.unlink(runner)
232 |       Process.monitor(runner)
233 |       assert Runner.remote_boot(runner, nil) == :ok
234 |       assert Runner.call(runner, self(), fn -> :works end) == {:works, []}
235 |       assert_receive :stopped, timeout * 2
236 |       assert_receive {:DOWN, _ref, :process, ^runner, _}
237 |     end
238 | 
239 |     test "with timed check" do
240 |       agent = start_supervised!({Agent, fn -> false end})
241 |       timeout = 500
242 |       idle_after = {timeout, fn -> Agent.get(agent, & &1) end}
243 |       {:ok, runner} = mock_successful_runner(1, idle_shutdown_after: idle_after)
244 | 
245 |       Process.unlink(runner)
246 |       Process.monitor(runner)
247 |       assert Runner.remote_boot(runner, nil) == :ok
248 | 
249 |       refute_receive {:DOWN, _ref, :process, ^runner, _}, timeout * 2
250 |       Agent.update(agent, fn _ -> true end)
251 |       assert_receive :stopped, timeout * 2
252 |       assert_receive {:DOWN, _ref, :process, ^runner, _}
253 |     end
254 |   end
255 | 
256 |   describe "code_sync" do
257 |     test "copy_paths: true, copies the code paths and extracts on boot" do
258 |       mock = CodeSyncMock.new()
259 |       # the 4th invocation is the rpc to diff code paths
260 |       code_sync = FLAME.CodeSync.new(mock.opts)
261 |       stream = FLAME.CodeSync.package_to_stream(code_sync)
262 | 
263 |       {:ok, runner} = mock_successful_runner(4, code_sync: mock.opts)
264 | 
265 |       Process.monitor(runner)
266 |       assert Runner.remote_boot(runner, stream) == :ok
267 |       assert Runner.call(runner, self(), fn -> :works end, timeout: 1234) == {:works, []}
268 |       assert Runner.shutdown(runner) == :ok
269 |       # called on remote boot
270 |       assert_receive {CodeSyncMock, {_mock, :extract}}
271 | 
272 |       # called on :works call
273 |       assert_receive {CodeSyncMock, {_mock, :extract}}
274 |     end
275 | 
276 |     test "noops by default" do
277 |       {:ok, runner} = mock_successful_runner(3)
278 | 
279 |       Process.monitor(runner)
280 |       assert Runner.remote_boot(runner, nil) == :ok
281 |       assert Runner.call(runner, self(), fn -> :works end, timeout: 1234) == {:works, []}
282 |       assert Runner.shutdown(runner) == :ok
283 |       refute_receive {CodeSyncMock, _}
284 |     end
285 |   end
286 | end
287 | 


--------------------------------------------------------------------------------
/test/support/code_sync_mock.ex:
--------------------------------------------------------------------------------
  1 | defmodule FLAME.Test.CodeSyncMock.Mod1 do
  2 | end
  3 | 
  4 | defmodule FLAME.Test.CodeSyncMock.Mod1Modified do
  5 | end
  6 | 
  7 | defmodule FLAME.Test.CodeSyncMock.Mod2 do
  8 | end
  9 | 
 10 | defmodule FLAME.Test.CodeSyncMock.Mod3 do
 11 | end
 12 | 
 13 | defmodule FLAME.Test.CodeSyncMock do
 14 |   defstruct opts: nil, id: nil, extract_dir: nil
 15 |   alias FLAME.Test.CodeSyncMock
 16 | 
 17 |   def new(opts \\ []) do
 18 |     test_pid = self()
 19 |     id = System.unique_integer([:positive])
 20 |     tmp_dir = File.cwd!() |> Path.join("tmp") |> Path.expand()
 21 |     working_dir = tmp_dir |> Path.join("#{id}") |> Path.expand()
 22 |     File.rm_rf!(working_dir)
 23 |     mod1_dir = Path.join([working_dir, "one", "ebin"])
 24 |     mod2_dir = Path.join([working_dir, "two", "ebin"])
 25 |     File.mkdir_p!(mod1_dir)
 26 |     File.mkdir_p!(mod2_dir)
 27 | 
 28 |     File.write!(
 29 |       Path.join(mod1_dir, "Elixir.FLAME.Test.CodeSyncMock.Mod1.beam"),
 30 |       obj_code(FLAME.Test.CodeSyncMock.Mod1)
 31 |     )
 32 | 
 33 |     File.write!(
 34 |       Path.join(mod2_dir, "Elixir.FLAME.Test.CodeSyncMock.Mod2.beam"),
 35 |       obj_code(FLAME.Test.CodeSyncMock.Mod2)
 36 |     )
 37 | 
 38 |     extract_dir = Path.join([tmp_dir, "#{id}", "extracted_code"])
 39 |     File.mkdir_p!(extract_dir)
 40 | 
 41 |     get_path =
 42 |       fn ->
 43 |         working_dir
 44 |         |> Path.join("*/ebin")
 45 |         |> Path.wildcard()
 46 |       end
 47 | 
 48 |     default_opts = [
 49 |       start_apps: true,
 50 |       sync_beams: [working_dir],
 51 |       tmp_dir: {Function, :identity, [tmp_dir]},
 52 |       extract_dir: {__MODULE__, :extract_dir, [id, test_pid, extract_dir]},
 53 |       get_path: get_path
 54 |     ]
 55 | 
 56 |     %CodeSyncMock{id: id, opts: Keyword.merge(default_opts, opts), extract_dir: extract_dir}
 57 |   end
 58 | 
 59 |   def extract_dir(id, test_pid, extract_dir) do
 60 |     send(test_pid, {CodeSyncMock, {id, :extract}})
 61 |     extract_dir
 62 |   end
 63 | 
 64 |   def simulate_changes(%CodeSyncMock{id: id} = mock) do
 65 |     # mod1 is modified
 66 |     mod1_dir = Path.join([mfa(mock.opts[:tmp_dir]), "#{id}", "one", "ebin"])
 67 |     mod2_dir = Path.join([mfa(mock.opts[:tmp_dir]), "#{id}", "two", "ebin"])
 68 | 
 69 |     File.write!(
 70 |       Path.join(mod1_dir, "Elixir.FLAME.Test.CodeSyncMock.Mod1.beam"),
 71 |       obj_code(FLAME.Test.CodeSyncMock.Mod1Modified)
 72 |     )
 73 | 
 74 |     # mod2 is deleted
 75 |     File.rm!(Path.join(mod2_dir, "Elixir.FLAME.Test.CodeSyncMock.Mod2.beam"))
 76 | 
 77 |     # mod3 is added
 78 |     File.write!(
 79 |       Path.join(mod1_dir, "Elixir.FLAME.Test.CodeSyncMock.Mod3.beam"),
 80 |       obj_code(FLAME.Test.CodeSyncMock.Mod3)
 81 |     )
 82 | 
 83 |     :ok
 84 |   end
 85 | 
 86 |   defp mfa({mod, func, args}), do: apply(mod, func, args)
 87 | 
 88 |   def extracted_rel_paths(%CodeSyncMock{} = mock) do
 89 |     extracted_beams = Path.wildcard(Path.join(mfa(mock.opts[:extract_dir]), "**/*.beam"))
 90 | 
 91 |     Enum.map(extracted_beams, fn path ->
 92 |       path
 93 |       |> Path.relative_to_cwd()
 94 |       |> Path.relative_to(Path.join(["tmp", "#{mock.id}", "extracted_code", File.cwd!()]))
 95 |       |> Path.relative_to(Path.join(["tmp", "#{mock.id}"]))
 96 |     end)
 97 |   end
 98 | 
 99 |   defp obj_code(mod) do
100 |     {^mod, beam_code, _path} = :code.get_object_code(mod)
101 |     beam_code
102 |   end
103 | end
104 | 


--------------------------------------------------------------------------------
/test/support/trackable.ex:
--------------------------------------------------------------------------------
 1 | defmodule MyTrackable do
 2 |   defstruct [:pid, :ref, :name]
 3 | 
 4 |   defimpl FLAME.Trackable do
 5 |     def track(%{ref: ref, name: name} = data, acc, node) do
 6 |       ^node = node(ref)
 7 |       parent = self()
 8 | 
 9 |       {pid, monitor_ref} =
10 |         Node.spawn_monitor(node, fn ->
11 |           Process.register(self(), name)
12 |           send(parent, {ref, :started})
13 | 
14 |           receive do
15 |             {^ref, :stop} -> :ok
16 |           end
17 |         end)
18 | 
19 |       receive do
20 |         {^ref, :started} ->
21 |           Process.demonitor(monitor_ref)
22 |           {%{data | pid: pid}, [pid | acc]}
23 | 
24 |         {:DOWN, ^monitor_ref, _, _, reason} ->
25 |           exit(reason)
26 |       end
27 |     end
28 |   end
29 | end
30 | 


--------------------------------------------------------------------------------
/test/test_helper.exs:
--------------------------------------------------------------------------------
1 | ExUnit.start()
2 | 


--------------------------------------------------------------------------------