├── .clang-format ├── .clang-tidy ├── .dockerignore ├── .env ├── .gdbinit ├── .github └── workflows │ ├── release.yml │ └── tests.yml ├── .gitignore ├── .readthedocs.yaml ├── .ycm_extra_conf.py ├── CMakeLists.txt ├── LICENSE.md ├── README.md ├── VERSION ├── bin ├── cli.sh ├── create_venv.sh ├── inv_wrapper.sh ├── planner_entrypoint.sh ├── run_clang_tidy.py ├── wait_for_venv.sh └── workon.sh ├── cliff.toml ├── cmake └── ExternalProjects.cmake ├── codecov.yml ├── conan-profile.txt ├── dist-test ├── build.sh ├── build_internal.sh ├── dev_server.sh └── run.sh ├── docker-compose.yml ├── docker ├── faabric-base.dockerfile ├── faabric.dockerfile └── planner.dockerfile ├── docs ├── .gitignore ├── Doxyfile ├── conf.py ├── index.rst └── source │ ├── development.md │ └── mpi.md ├── examples ├── CMakeLists.txt ├── check.cpp └── server.cpp ├── faabric.cpp ├── include └── faabric │ ├── batch-scheduler │ ├── BatchScheduler.h │ ├── BinPackScheduler.h │ ├── CompactScheduler.h │ ├── DecisionCache.h │ ├── SchedulingDecision.h │ └── SpotScheduler.h │ ├── endpoint │ ├── FaabricEndpoint.h │ └── FaabricEndpointHandler.h │ ├── executor │ ├── Executor.h │ ├── ExecutorContext.h │ ├── ExecutorFactory.h │ └── ExecutorTask.h │ ├── flat │ └── .gitignore │ ├── mpi │ ├── MpiContext.h │ ├── MpiMessage.h │ ├── MpiWorld.h │ ├── MpiWorldRegistry.h │ └── mpi.h │ ├── planner │ ├── Planner.h │ ├── PlannerApi.h │ ├── PlannerClient.h │ ├── PlannerEndpointHandler.h │ ├── PlannerServer.h │ └── PlannerState.h │ ├── redis │ └── Redis.h │ ├── runner │ └── FaabricMain.h │ ├── scheduler │ ├── FunctionCallApi.h │ ├── FunctionCallClient.h │ ├── FunctionCallServer.h │ ├── InMemoryMessageQueue.h │ └── Scheduler.h │ ├── snapshot │ ├── SnapshotApi.h │ ├── SnapshotClient.h │ ├── SnapshotRegistry.h │ └── SnapshotServer.h │ ├── state │ ├── InMemoryStateKeyValue.h │ ├── InMemoryStateRegistry.h │ ├── RedisStateKeyValue.h │ ├── State.h │ ├── StateClient.h │ ├── StateKeyValue.h │ └── StateServer.h │ ├── transport │ ├── Message.h │ ├── MessageEndpoint.h │ ├── MessageEndpointClient.h │ ├── MessageEndpointServer.h │ ├── PointToPointBroker.h │ ├── PointToPointCall.h │ ├── PointToPointClient.h │ ├── PointToPointServer.h │ ├── common.h │ ├── macros.h │ └── tcp │ │ ├── Address.h │ │ ├── RecvSocket.h │ │ ├── SendSocket.h │ │ ├── Socket.h │ │ └── SocketOptions.h │ ├── util │ ├── ExecGraph.h │ ├── PeriodicBackgroundThread.h │ ├── asio.h │ ├── barrier.h │ ├── batch.h │ ├── bytes.h │ ├── chaining.h │ ├── clock.h │ ├── compare.h │ ├── concurrent_map.h │ ├── config.h │ ├── crash.h │ ├── delta.h │ ├── dirty.h │ ├── environment.h │ ├── exception.h │ ├── files.h │ ├── func.h │ ├── gids.h │ ├── hwloc.h │ ├── json.h │ ├── latch.h │ ├── locks.h │ ├── logging.h │ ├── macros.h │ ├── memory.h │ ├── network.h │ ├── ptp.h │ ├── queue.h │ ├── random.h │ ├── snapshot.h │ ├── state.h │ ├── string_tools.h │ ├── testing.h │ ├── timing.h │ └── userfaultfd.h │ └── wasm │ └── wasm.h ├── leak-sanitizer-ignorelist.txt ├── pyproject.toml ├── requirements.txt ├── src ├── batch-scheduler │ ├── BatchScheduler.cpp │ ├── BinPackScheduler.cpp │ ├── CMakeLists.txt │ ├── CompactScheduler.cpp │ ├── DecisionCache.cpp │ ├── SchedulingDecision.cpp │ └── SpotScheduler.cpp ├── endpoint │ ├── CMakeLists.txt │ ├── FaabricEndpoint.cpp │ └── FaabricEndpointHandler.cpp ├── executor │ ├── CMakeLists.txt │ ├── Executor.cpp │ ├── ExecutorContext.cpp │ ├── ExecutorFactory.cpp │ └── ExecutorTask.cpp ├── flat │ ├── .gitignore │ ├── CMakeLists.txt │ ├── faabric.fbs │ └── flat.cpp ├── mpi │ ├── CMakeLists.txt │ ├── MpiContext.cpp │ ├── MpiMessage.cpp │ ├── MpiWorld.cpp │ ├── MpiWorldRegistry.cpp │ └── mpi.cpp ├── planner │ ├── CMakeLists.txt │ ├── Planner.cpp │ ├── PlannerClient.cpp │ ├── PlannerEndpointHandler.cpp │ ├── PlannerServer.cpp │ ├── is_app_migratable.cpp │ ├── planner.proto │ └── planner_server.cpp ├── proto │ ├── CMakeLists.txt │ └── faabric.proto ├── redis │ ├── CMakeLists.txt │ └── Redis.cpp ├── runner │ ├── CMakeLists.txt │ └── FaabricMain.cpp ├── scheduler │ ├── CMakeLists.txt │ ├── FunctionCallClient.cpp │ ├── FunctionCallServer.cpp │ └── Scheduler.cpp ├── snapshot │ ├── CMakeLists.txt │ ├── SnapshotClient.cpp │ ├── SnapshotRegistry.cpp │ └── SnapshotServer.cpp ├── state │ ├── CMakeLists.txt │ ├── InMemoryStateKeyValue.cpp │ ├── InMemoryStateRegistry.cpp │ ├── RedisStateKeyValue.cpp │ ├── State.cpp │ ├── StateClient.cpp │ ├── StateKeyValue.cpp │ └── StateServer.cpp ├── transport │ ├── CMakeLists.txt │ ├── Message.cpp │ ├── MessageEndpoint.cpp │ ├── MessageEndpointClient.cpp │ ├── MessageEndpointServer.cpp │ ├── PointToPointBroker.cpp │ ├── PointToPointClient.cpp │ ├── PointToPointServer.cpp │ └── tcp │ │ ├── Address.cpp │ │ ├── CMakeLists.txt │ │ ├── RecvSocket.cpp │ │ ├── SendSocket.cpp │ │ ├── Socket.cpp │ │ └── SocketOptions.cpp └── util │ ├── CMakeLists.txt │ ├── ExecGraph.cpp │ ├── PeriodicBackgroundThread.cpp │ ├── barrier.cpp │ ├── batch.cpp │ ├── bytes.cpp │ ├── clock.cpp │ ├── config.cpp │ ├── crash.cpp │ ├── delta.cpp │ ├── dirty.cpp │ ├── environment.cpp │ ├── files.cpp │ ├── func.cpp │ ├── gids.cpp │ ├── hwloc.cpp │ ├── json.cpp │ ├── latch.cpp │ ├── locks.cpp │ ├── logging.cpp │ ├── memory.cpp │ ├── network.cpp │ ├── ptp.cpp │ ├── queue.cpp │ ├── random.cpp │ ├── snapshot.cpp │ ├── state.cpp │ ├── string_tools.cpp │ ├── testing.cpp │ └── timing.cpp ├── tasks ├── __init__.py ├── call.py ├── dev.py ├── docker.py ├── docs.py ├── examples.py ├── format_code.py ├── git.py ├── tests.py └── util │ ├── __init__.py │ └── env.py ├── tests ├── dist │ ├── CMakeLists.txt │ ├── DistTestExecutor.cpp │ ├── DistTestExecutor.h │ ├── dist_test_fixtures.h │ ├── init.cpp │ ├── init.h │ ├── main.cpp │ ├── mpi │ │ ├── CMakeLists.txt │ │ ├── benchmarks │ │ │ ├── mpi_allreduce.cpp │ │ │ ├── mpi_bench.cpp │ │ │ ├── mpi_bench.hpp │ │ │ └── mpi_send_recv.cpp │ │ ├── examples │ │ │ ├── mpi_allgather.cpp │ │ │ ├── mpi_allreduce.cpp │ │ │ ├── mpi_alltoall.cpp │ │ │ ├── mpi_alltoall_sleep.cpp │ │ │ ├── mpi_barrier.cpp │ │ │ ├── mpi_bcast.cpp │ │ │ ├── mpi_cart_create.cpp │ │ │ ├── mpi_cartesian.cpp │ │ │ ├── mpi_checks.cpp │ │ │ ├── mpi_gather.cpp │ │ │ ├── mpi_helloworld.cpp │ │ │ ├── mpi_isendrecv.cpp │ │ │ ├── mpi_migration.cpp │ │ │ ├── mpi_order.cpp │ │ │ ├── mpi_reduce.cpp │ │ │ ├── mpi_reduce_many.cpp │ │ │ ├── mpi_scan.cpp │ │ │ ├── mpi_scatter.cpp │ │ │ ├── mpi_send.cpp │ │ │ ├── mpi_send_many.cpp │ │ │ ├── mpi_send_sync_async.cpp │ │ │ ├── mpi_sendrecv.cpp │ │ │ ├── mpi_status.cpp │ │ │ └── mpi_typesize.cpp │ │ ├── functions.cpp │ │ ├── mpi_native.cpp │ │ ├── mpi_native.h │ │ ├── native │ │ │ ├── CMakeLists.txt │ │ │ ├── bench_mpi_allreduce.cpp │ │ │ └── bench_mpi_send_recv.cpp │ │ ├── test_mpi_functions.cpp │ │ └── test_multiple_mpi_worlds.cpp │ ├── scheduler │ │ ├── functions.cpp │ │ ├── test_exec_graph.cpp │ │ ├── test_funcs.cpp │ │ ├── test_hosts.cpp │ │ ├── test_snapshots.cpp │ │ └── test_threads.cpp │ ├── server.cpp │ └── transport │ │ ├── functions.cpp │ │ ├── test_coordination.cpp │ │ └── test_point_to_point.cpp ├── test │ ├── CMakeLists.txt │ ├── batch-scheduler │ │ ├── test_batch_scheduler.cpp │ │ ├── test_binpack_scheduler.cpp │ │ ├── test_compact_scheduler.cpp │ │ ├── test_scheduling_decisions.cpp │ │ └── test_spot_scheduler.cpp │ ├── endpoint │ │ └── test_endpoint.cpp │ ├── executor │ │ ├── test_executor.cpp │ │ ├── test_executor_context.cpp │ │ └── test_executor_reaping.cpp │ ├── main.cpp │ ├── mpi │ │ ├── test_mpi_context.cpp │ │ ├── test_mpi_exec_graph.cpp │ │ ├── test_mpi_message.cpp │ │ ├── test_mpi_world.cpp │ │ ├── test_multiple_mpi_worlds.cpp │ │ └── test_remote_mpi_worlds.cpp │ ├── planner │ │ ├── test_planner_client_server.cpp │ │ └── test_planner_endpoint.cpp │ ├── proto │ │ └── test_proto.cpp │ ├── redis │ │ └── test_redis.cpp │ ├── runner │ │ └── test_main.cpp │ ├── scheduler │ │ ├── test_function_client_server.cpp │ │ ├── test_function_migration.cpp │ │ └── test_scheduler.cpp │ ├── snapshot │ │ ├── test_snapshot_client_server.cpp │ │ ├── test_snapshot_diffs.cpp │ │ └── test_snapshot_registry.cpp │ ├── state │ │ ├── test_redis_state.cpp │ │ ├── test_state.cpp │ │ └── test_state_server.cpp │ ├── transport │ │ ├── test_message.cpp │ │ ├── test_message_endpoint_client.cpp │ │ ├── test_message_server.cpp │ │ ├── test_point_to_point.cpp │ │ ├── test_point_to_point_groups.cpp │ │ └── test_tcp_sockets.cpp │ └── util │ │ ├── test_barrier.cpp │ │ ├── test_batch.cpp │ │ ├── test_bytes.cpp │ │ ├── test_concurrent_map.cpp │ │ ├── test_config.cpp │ │ ├── test_delta.cpp │ │ ├── test_dirty.cpp │ │ ├── test_environment.cpp │ │ ├── test_exec_graph.cpp │ │ ├── test_files.cpp │ │ ├── test_func.cpp │ │ ├── test_gids.cpp │ │ ├── test_hwloc.cpp │ │ ├── test_json.cpp │ │ ├── test_latch.cpp │ │ ├── test_locks.cpp │ │ ├── test_memory.cpp │ │ ├── test_network.cpp │ │ ├── test_periodic_thread.cpp │ │ ├── test_queue.cpp │ │ ├── test_random.cpp │ │ ├── test_snapshot.cpp │ │ ├── test_state.cpp │ │ ├── test_strings.cpp │ │ └── test_tokens.cpp └── utils │ ├── CMakeLists.txt │ ├── DummyExecutor.cpp │ ├── DummyExecutor.h │ ├── DummyExecutorFactory.cpp │ ├── DummyExecutorFactory.h │ ├── exec_graph_utils.cpp │ ├── faabric_utils.h │ ├── fixtures.h │ ├── http_utils.cpp │ ├── message_utils.cpp │ ├── planner_utils.cpp │ └── scheduling_utils.cpp └── thread-sanitizer-ignorelist.txt /.clang-format: -------------------------------------------------------------------------------- 1 | --- 2 | BasedOnStyle: Mozilla 3 | 4 | --- 5 | Language: Cpp 6 | 7 | # Avoid excessive linebreaks on function definitions 8 | AlwaysBreakAfterDefinitionReturnType: None 9 | AlwaysBreakAfterReturnType: None 10 | 11 | IndentWidth: 4 12 | DerivePointerAlignment: false 13 | 14 | # Precise control over braces alignment 15 | BreakBeforeBraces: Custom 16 | BraceWrapping: 17 | AfterClass: true 18 | AfterEnum: true 19 | AfterExternBlock: true 20 | AfterFunction: true 21 | AfterStruct: true 22 | AfterUnion: true 23 | SplitEmptyFunction: false 24 | SplitEmptyRecord: false 25 | --- 26 | -------------------------------------------------------------------------------- /.clang-tidy: -------------------------------------------------------------------------------- 1 | --- 2 | 3 | Checks: > 4 | clang-analyzer-*, 5 | -clang-diagnostic-unknown-attributes, 6 | -clang-diagnostic-return-type 7 | modernize-*, 8 | -modernize-use-trailing-return-type, 9 | readability-*, 10 | -readability-magic-numbers, 11 | -readability-redundant-string-cstr, 12 | -readability-implicit-bool-conversion 13 | 14 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | .git 2 | build 3 | venv 4 | docker/*.dockerfile 5 | 6 | -------------------------------------------------------------------------------- /.env: -------------------------------------------------------------------------------- 1 | FAABRIC_VERSION=0.20.0 2 | FAABRIC_CLI_IMAGE=ghcr.io/faasm/faabric:0.20.0 3 | COMPOSE_PROJECT_NAME=faabric-dev 4 | CONAN_CACHE_MOUNT_SOURCE=./conan-cache/ 5 | -------------------------------------------------------------------------------- /.gdbinit: -------------------------------------------------------------------------------- 1 | 2 | handle SIGSEGV nostop noprint 3 | 4 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Release 2 | 3 | on: 4 | push: 5 | tags: 6 | - 'v*.*.*' 7 | 8 | concurrency: 9 | group: ${{ github.workflow }}-${{ github.ref }} 10 | cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} 11 | 12 | jobs: 13 | build-images: 14 | runs-on: ubuntu-latest 15 | strategy: 16 | matrix: 17 | image: [faabric, planner] 18 | steps: 19 | - name: "Get the code" 20 | uses: actions/checkout@v4 21 | - name: "Get tag version" 22 | run: echo "TAG_VERSION=${GITHUB_REF#refs/tags/v*}" >> $GITHUB_ENV 23 | - name: "Print tag version" 24 | run: echo ${{ env.TAG_VERSION }} 25 | - name: "Set up QEMU" 26 | uses: docker/setup-qemu-action@v3 27 | - name: "Set up Docker Buildx" 28 | uses: docker/setup-buildx-action@v3 29 | - name: "Log in to ACR" 30 | uses: docker/login-action@v3 31 | with: 32 | registry: ghcr.io 33 | username: faasm 34 | password: ${{ secrets.GHCR_PAT }} 35 | - name: "Build Faabric container" 36 | uses: docker/build-push-action@v5.2.0 37 | with: 38 | push: true 39 | file: docker/${{ matrix.image }}.dockerfile 40 | context: . 41 | tags: ghcr.io/faasm/${{ matrix.image }}:${{ env.TAG_VERSION }} 42 | build-args: FAABRIC_VERSION=${{ env.TAG_VERSION }} 43 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | build/ 2 | work/ 3 | conan-cache/ 4 | # Clang 5 | .clangd 6 | compile_commands.json 7 | 8 | # Faabric config file 9 | faabric.ini 10 | 11 | # Ansible 12 | *.retry 13 | 14 | # Vim 15 | *.swp 16 | 17 | # Protobuf 18 | *.pb.h 19 | 20 | # C 21 | 22 | # Prerequisites 23 | *.d 24 | 25 | # Object files 26 | *.o 27 | *.ko 28 | *.obj 29 | *.elf 30 | 31 | # Linker output 32 | *.ilk 33 | *.map 34 | *.exp 35 | 36 | # Precompiled Headers 37 | *.gch 38 | *.pch 39 | 40 | # Libraries 41 | *.lib 42 | *.a 43 | *.la 44 | *.lo 45 | 46 | # Shared objects (inc. Windows DLLs) 47 | *.dll 48 | *.so 49 | *.so.* 50 | *.dylib 51 | 52 | # Executables 53 | *.exe 54 | *.out 55 | *.app 56 | *.i*86 57 | *.x86_64 58 | *.hex 59 | 60 | # Debug files 61 | *.dSYM/ 62 | *.su 63 | *.idb 64 | *.pdb 65 | 66 | # Kernel Module Compile Results 67 | *.mod* 68 | *.cmd 69 | .tmp_versions/ 70 | modules.order 71 | Module.symvers 72 | Mkfile.old 73 | dkms.conf 74 | 75 | # C++ 76 | 77 | # Prerequisites 78 | *.d 79 | 80 | # Compiled Object files 81 | *.slo 82 | *.lo 83 | *.o 84 | *.obj 85 | 86 | # Precompiled Headers 87 | *.gch 88 | *.pch 89 | 90 | # Compiled Dynamic libraries 91 | *.so 92 | *.dylib 93 | *.dll 94 | 95 | # Fortran module files 96 | *.mod 97 | *.smod 98 | 99 | # Compiled Static libraries 100 | *.lai 101 | *.la 102 | *.a 103 | *.lib 104 | 105 | # Executables 106 | *.exe 107 | *.out 108 | *.app 109 | 110 | # Python 111 | __pycache__/ 112 | *.pyc 113 | venv/ 114 | venv-bm/ 115 | *.egg-info/ 116 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | build: 4 | os: "ubuntu-20.04" 5 | tools: 6 | python: "3.9" 7 | 8 | sphinx: 9 | configuration: docs/conf.py 10 | 11 | python: 12 | install: 13 | - requirements: requirements.txt 14 | -------------------------------------------------------------------------------- /.ycm_extra_conf.py: -------------------------------------------------------------------------------- 1 | from os.path import dirname, realpath, join, exists 2 | 3 | _PROJ_ROOT = dirname(realpath(__file__)) 4 | 5 | 6 | def Settings(**kwargs): 7 | venv_interpreter = join(_PROJ_ROOT, "venv", "bin", "python") 8 | 9 | if not exists(venv_interpreter): 10 | parent_root = dirname(dirname(_PROJ_ROOT)) 11 | venv_interpreter = join(parent_root, "venv", "bin", "python") 12 | 13 | return {"interpreter_path": venv_interpreter} 14 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Faabric [![Faabric tests](https://github.com/faasm/faabric/workflows/Tests/badge.svg?branch=main)](https://github.com/faasm/faabric/actions) [![License](https://img.shields.io/github/license/faasm/faabric.svg)](https://github.com/faasm/faabric/blob/main/LICENSE.md) [![Release](https://img.shields.io/github/release/faasm/faabric.svg)](https://github.com/faasm/faabric/releases/) [![Contributors](https://img.shields.io/github/contributors/faasm/faabric.svg)](https://github.com/faasm/faabric/graphs/contributors/) [![codecov](https://codecov.io/gh/faasm/faabric/branch/main/graph/badge.svg?token=F7HBQ84OSD)](https://codecov.io/gh/faasm/faabric) 2 | 3 | Faabric is a library that provides scheduling, messaging and state for 4 | distributed serverless runtimes. 5 | 6 | It began life as part of [Faasm](https://github.com/faasm/faasm), but is now a 7 | stand-alone library that can be used as a base for other distributed runtimes. 8 | 9 | See the [documentation](https://faabric.readthedocs.io/en/latest/) for more 10 | details. 11 | 12 | ## Usage 13 | 14 | You can see a simple example in the [examples](examples) directory. 15 | 16 | -------------------------------------------------------------------------------- /VERSION: -------------------------------------------------------------------------------- 1 | 0.20.0 2 | -------------------------------------------------------------------------------- /bin/cli.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | THIS_DIR=$(dirname $(readlink -f $0)) 6 | PROJ_ROOT=${THIS_DIR}/.. 7 | 8 | pushd ${PROJ_ROOT} > /dev/null 9 | 10 | echo "Running Faabric CLI (${FAABRIC_CLI_IMAGE})" 11 | 12 | INNER_SHELL=${SHELL:-"/bin/bash"} 13 | 14 | # Make sure the CLI is running already in the background (avoids creating a new 15 | # container every time) 16 | docker compose \ 17 | up \ 18 | --no-recreate \ 19 | -d \ 20 | cli 21 | 22 | FAABRIC_DOCKER="on" ./bin/wait_for_venv.sh 23 | 24 | # Attach to the CLI container 25 | docker compose \ 26 | exec \ 27 | cli \ 28 | ${INNER_SHELL} 29 | 30 | popd > /dev/null 31 | -------------------------------------------------------------------------------- /bin/create_venv.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | THIS_DIR=$(dirname $(readlink -f $0)) 6 | PROJ_ROOT=${THIS_DIR}/.. 7 | 8 | # Set different virtual environment paths so that these don't clash when 9 | # mounting the code in a development container 10 | VENV_PATH="undetected" 11 | if [[ -z "$FAABRIC_DOCKER" ]]; then 12 | VENV_PATH="${PROJ_ROOT}/venv-bm" 13 | else 14 | VENV_PATH="${PROJ_ROOT}/venv" 15 | fi 16 | 17 | PIP=${VENV_PATH}/bin/pip3 18 | 19 | function pip_cmd { 20 | source ${VENV_PATH}/bin/activate && ${PIP} "$@" 21 | } 22 | 23 | pushd ${PROJ_ROOT} >> /dev/null 24 | 25 | if [ ! -d ${VENV_PATH} ]; then 26 | python3 -m venv ${VENV_PATH} 27 | fi 28 | 29 | pip_cmd install -U pip 30 | pip_cmd install -U setuptools wheel 31 | pip_cmd install -r requirements.txt 32 | 33 | touch ${VENV_PATH}/faabric_venv.BUILT 34 | 35 | popd >> /dev/null 36 | -------------------------------------------------------------------------------- /bin/inv_wrapper.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | # Wrapper script for running invoke in virtual env 5 | 6 | source bin/workon.sh && inv $@ 7 | -------------------------------------------------------------------------------- /bin/planner_entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | # The planner needs to support being used both from faabric and from faasm, 6 | # and in both cases it must support mounting the built binaries to update it. 7 | # Thus, we add a wrapper around the entrypoint command that takes as an input 8 | # the binary dir, and waits until the binary file exists 9 | 10 | BINARY_DIR=${1:-/build/faabric/static/bin} 11 | BINARY_FILE=${BINARY_DIR}/planner_server 12 | 13 | until test -f ${BINARY_FILE} 14 | do 15 | echo "Waiting for planner server binary to be available at: ${BINARY_FILE}" 16 | sleep 3 17 | done 18 | 19 | # Once the binary file is available, run it 20 | ${BINARY_FILE} 21 | -------------------------------------------------------------------------------- /bin/wait_for_venv.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | THIS_DIR=$(dirname $(readlink -f $0)) 6 | PROJ_ROOT=${THIS_DIR}/.. 7 | 8 | pushd ${PROJ_ROOT} >> /dev/null 9 | 10 | if [[ -z "$FAABRIC_DOCKER" ]]; then 11 | VENV_PATH="${PROJ_ROOT}/venv-bm" 12 | else 13 | VENV_PATH="${PROJ_ROOT}/venv" 14 | fi 15 | 16 | until test -f ${VENV_PATH}/faabric_venv.BUILT 17 | do 18 | echo "Waiting for python virtual environment to be ready..." 19 | sleep 3 20 | done 21 | 22 | popd >> /dev/null 23 | -------------------------------------------------------------------------------- /cliff.toml: -------------------------------------------------------------------------------- 1 | # configuration file for git-cliff (0.1.0) 2 | 3 | [changelog] 4 | # changelog header 5 | header = """ 6 | Here is what has changed since last release: 7 | 8 | """ 9 | # template for the changelog body 10 | # https://tera.netlify.app/docs/#introduction 11 | body = """ 12 | {% if version %}\ 13 | ## [{{ version | trim_start_matches(pat="v") }}] - {{ timestamp | date(format="%Y-%m-%d") }} 14 | {% else %}\ 15 | ## [unreleased] 16 | {% endif %}\ 17 | {% for commit in commits %} * {{ commit.message | upper_first | split(pat="\n") | first }} 18 | {% endfor %} 19 | 20 | """ 21 | # remove the leading and trailing whitespace from the template 22 | trim = true 23 | # changelog footer 24 | footer = """ 25 | 26 | """ 27 | 28 | [git] 29 | # parse the commits based on https://www.conventionalcommits.org 30 | conventional_commits = false 31 | # filter out the commits that are not conventional 32 | filter_unconventional = false 33 | # regex for parsing and grouping commits 34 | commit_parsers = [ 35 | { message = "^feat", group = "Features"}, 36 | { message = "^fix", group = "Bug Fixes"}, 37 | { message = "^doc", group = "Documentation"}, 38 | { message = "^perf", group = "Performance"}, 39 | { message = "^refactor", group = "Refactor"}, 40 | { message = "^style", group = "Styling"}, 41 | { message = "^test", group = "Testing"}, 42 | { message = "^chore\\(release\\): prepare for", skip = true}, 43 | { message = "^chore", group = "Miscellaneous Tasks"}, 44 | { body = ".*security", group = "Security"}, 45 | ] 46 | # filter out the commits that are not matched by commit parsers 47 | filter_commits = false 48 | # glob pattern for matching git tags 49 | tag_pattern = "v[0-9]*" 50 | # regex for skipping tags 51 | skip_tags = "v0.1.0-beta.1" 52 | # regex for ignoring tags 53 | ignore_tags = "" 54 | # sort the tags chronologically 55 | date_order = true 56 | # sort the commits inside sections by oldest/newest order 57 | sort_commits = "newest" 58 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | ignore: 2 | # Ideally we would not keep protobuf generated files in the source tree. 3 | # Until we don't fix that, we need to ignore them here 4 | - "**/*.pb.h" 5 | # Same with flatbuffers 6 | - "include/faabric/flat" 7 | # We exclude the tests from the coverage results 8 | - "tests" 9 | 10 | # Don't report actions as failed unless there's more than a 1% decrease in 11 | # coverage 12 | coverage: 13 | status: 14 | project: 15 | default: 16 | threshold: 1% 17 | -------------------------------------------------------------------------------- /conan-profile.txt: -------------------------------------------------------------------------------- 1 | [settings] 2 | os=Linux 3 | os_build=Linux 4 | arch=x86_64 5 | arch_build=x86_64 6 | compiler=clang 7 | compiler.version=17 8 | compiler.libcxx=libstdc++11 9 | compiler.cppstd=20 10 | build_type=Release 11 | [options] 12 | [build_requires] 13 | [env] 14 | CC=/usr/bin/clang-17 15 | CXX=/usr/bin/clang++-17 16 | -------------------------------------------------------------------------------- /dist-test/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | export PROJ_ROOT=$(dirname $(dirname $(readlink -f $0))) 6 | pushd ${PROJ_ROOT} >> /dev/null 7 | 8 | # Run the build 9 | export FAABRIC_DEPLOYMENT_TYPE=gha-ci 10 | docker compose \ 11 | run \ 12 | --rm \ 13 | cli \ 14 | /code/faabric/dist-test/build_internal.sh 15 | 16 | popd >> /dev/null 17 | -------------------------------------------------------------------------------- /dist-test/build_internal.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | export PROJ_ROOT=$(dirname $(dirname $(readlink -f $0))) 6 | pushd ${PROJ_ROOT} >> /dev/null 7 | 8 | # Activate the Python venv 9 | source ./bin/workon.sh 10 | 11 | # Run the debug build 12 | inv dev.cmake --build=Debug --clean 13 | inv dev.cc faabric_dist_tests 14 | inv dev.cc faabric_dist_test_server 15 | inv dev.cc planner_server 16 | 17 | popd >> /dev/null 18 | -------------------------------------------------------------------------------- /dist-test/dev_server.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | THIS_DIR=$(dirname $(readlink -f $0)) 5 | PROJ_ROOT=${THIS_DIR}/.. 6 | pushd ${PROJ_ROOT} > /dev/null 7 | 8 | export OVERRIDE_CPU_COUNT=4 9 | 10 | if [[ -z "$1" ]]; then 11 | docker compose up -d dist-test-server 12 | elif [[ "$1" == "restart" ]]; then 13 | docker compose restart dist-test-server 14 | elif [[ "$1" == "stop" ]]; then 15 | docker compose stop dist-test-server 16 | elif [[ "$1" == "rm" ]]; then 17 | docker compose rm dist-test-server 18 | else 19 | echo "Unrecognised argument: $1" 20 | echo "" 21 | echo "Usage:" 22 | echo "" 23 | echo "./dist-test/dev_server.sh [restart|stop|rm]" 24 | exit 1 25 | fi 26 | 27 | popd > /dev/null 28 | -------------------------------------------------------------------------------- /dist-test/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export PROJ_ROOT=$(dirname $(dirname $(readlink -f $0))) 4 | pushd ${PROJ_ROOT} >> /dev/null 5 | 6 | RETURN_VAL=0 7 | 8 | export OVERRIDE_CPU_COUNT=4 9 | 10 | # Run the test server in the background 11 | docker compose \ 12 | up \ 13 | -d \ 14 | dist-test-server 15 | 16 | # Run the tests directly 17 | docker compose \ 18 | run \ 19 | --rm \ 20 | cli \ 21 | /build/faabric/static/bin/faabric_dist_tests 22 | 23 | RETURN_VAL=$? 24 | 25 | echo "-------------------------------------------" 26 | echo " SERVER LOGS " 27 | echo "-------------------------------------------" 28 | docker compose logs dist-test-server 29 | 30 | # Stop everything 31 | docker compose stop 32 | 33 | popd >> /dev/null 34 | 35 | exit $RETURN_VAL 36 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3" 2 | 3 | services: 4 | redis: 5 | image: redis 6 | 7 | planner: 8 | image: ghcr.io/faasm/planner:${FAABRIC_VERSION} 9 | # The planner entrypoint supports changing the location of the 10 | # planner_server binary. If the binary is not found, it will wait until 11 | # it is built 12 | command: ${PLANNER_ENTRYPOINT_BINARY:-/build/faabric/static/bin} 13 | volumes: 14 | - ./:/code/faabric 15 | - ./build:/build/faabric 16 | environment: 17 | - LOG_LEVEL=debug 18 | 19 | cli: 20 | image: ${FAABRIC_CLI_IMAGE} 21 | volumes: 22 | - /var/run/docker.sock:/var/run/docker.sock 23 | - /usr/bin/docker:/usr/bin/docker 24 | - ./:/code/faabric 25 | - ./build:/build/faabric 26 | - ${CONAN_CACHE_MOUNT_SOURCE}:/root/.conan 27 | working_dir: /code/faabric 28 | stdin_open: true 29 | tty: true 30 | privileged: true 31 | environment: 32 | - DEPLOYMENT_TYPE=${FAABRIC_DEPLOYMENT_TYPE:-compose} 33 | - LOG_LEVEL=${LOG_LEVEL:-debug} 34 | - PLANNER_HOST=planner 35 | - PLANNER_PORT=8080 36 | - POINT_TO_POINT_SERVER_THREADS=2 37 | - REDIS_STATE_HOST=redis 38 | - REDIS_QUEUE_HOST=redis 39 | - OVERRIDE_CPU_COUNT=${OVERRIDE_CPU_COUNT:-0} 40 | depends_on: 41 | - planner 42 | - redis 43 | 44 | dist-test-server: 45 | image: ${FAABRIC_CLI_IMAGE} 46 | volumes: 47 | - ./:/code/faabric 48 | - ./build:/build/faabric 49 | - ./conan-cache/:/root/.conan 50 | working_dir: /build/faabric/static 51 | environment: 52 | - DEPLOYMENT_TYPE=${FAABRIC_DEPLOYMENT_TYPE:-compose} 53 | - LOG_LEVEL=debug 54 | - PLANNER_HOST=planner 55 | - PLANNER_PORT=8080 56 | - POINT_TO_POINT_SERVER_THREADS=2 57 | - REDIS_STATE_HOST=redis 58 | - REDIS_QUEUE_HOST=redis 59 | - OVERRIDE_CPU_COUNT=${OVERRIDE_CPU_COUNT:-0} 60 | command: ./bin/faabric_dist_test_server 61 | depends_on: 62 | - planner 63 | - redis 64 | -------------------------------------------------------------------------------- /docker/faabric.dockerfile: -------------------------------------------------------------------------------- 1 | FROM ghcr.io/faasm/faabric-base:0.15.0 2 | ARG FAABRIC_VERSION 3 | 4 | # faabic-base image is not re-built often, so tag may be behind 5 | SHELL ["/bin/bash", "-c"] 6 | 7 | # Flag to say we're in a container 8 | ENV FAABRIC_DOCKER="on" 9 | 10 | # Put the code in place 11 | WORKDIR /code 12 | RUN git clone \ 13 | -b v${FAABRIC_VERSION} https://github.com/faasm/faabric \ 14 | && git config --global --add safe.directory /code/faabric 15 | 16 | WORKDIR /code/faabric 17 | 18 | # Python set-up and code builds 19 | RUN ./bin/create_venv.sh \ 20 | && source venv/bin/activate \ 21 | # Static build 22 | && inv dev.cmake --build=Release \ 23 | && inv dev.cc faabric \ 24 | # Shared build 25 | && inv dev.cmake --shared --build=Release \ 26 | && inv dev.cc faabric --shared \ 27 | && inv dev.install faabric --shared 28 | 29 | # GDB config, allow loading repo-specific config 30 | RUN echo "set auto-load safe-path /" > /root/.gdbinit 31 | 32 | # CLI setup 33 | ENV TERM=xterm-256color 34 | 35 | RUN echo ". /code/faabric/bin/workon.sh" >> ~/.bashrc 36 | CMD ["/bin/bash", "-l"] 37 | -------------------------------------------------------------------------------- /docker/planner.dockerfile: -------------------------------------------------------------------------------- 1 | FROM ghcr.io/faasm/faabric-base:0.15.0 2 | ARG FAABRIC_VERSION 3 | 4 | # Flag to say we're in a container 5 | ENV FAABRIC_DOCKER="on" 6 | SHELL ["/bin/bash", "-c"] 7 | 8 | # Put the code in place 9 | RUN rm -rf /code \ 10 | && mkdir -p /code/faabric \ 11 | && git clone \ 12 | -b v${FAABRIC_VERSION} \ 13 | https://github.com/faasm/faabric \ 14 | /code/faabric \ 15 | && cd /code/faabric \ 16 | && ./bin/create_venv.sh \ 17 | && source venv/bin/activate \ 18 | && inv dev.cmake --build=Release \ 19 | && inv dev.cc planner_server 20 | 21 | ENTRYPOINT ["/code/faabric/bin/planner_entrypoint.sh"] 22 | -------------------------------------------------------------------------------- /docs/.gitignore: -------------------------------------------------------------------------------- 1 | doxygen/ 2 | apidoc/ 3 | sphinx/ 4 | -------------------------------------------------------------------------------- /docs/Doxyfile: -------------------------------------------------------------------------------- 1 | DOXYFILE_ENCODING = UTF-8 2 | PROJECT_NAME = "Faabric" 3 | OUTPUT_DIRECTORY = doxygen 4 | OUTPUT_LANGUAGE = English 5 | 6 | FULL_PATH_NAMES = NO 7 | MARKDOWN_SUPPORT = YES 8 | RECURSIVE = YES 9 | QUIET = YES 10 | 11 | GENERATE_LATEX = NO 12 | GENERATE_XML = YES 13 | GENERATE_HTML = NO 14 | 15 | INPUT = ../include ../src 16 | FILE_PATTERNS = *.h *.cpp 17 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | from os.path import dirname, realpath, join 2 | from subprocess import run 3 | from os import makedirs 4 | 5 | # ---------------------------------------- 6 | # This is the Sphinx configuration file, which wraps 7 | # the calls to Doxygen and Breathe. 8 | # ---------------------------------------- 9 | 10 | DOCS_ROOT = dirname(realpath(__file__)) 11 | DOXYGEN_OUT = join(DOCS_ROOT, "doxygen", "xml") 12 | APIDOC_OUT_DIR = join(DOCS_ROOT, "apidoc") 13 | 14 | makedirs(APIDOC_OUT_DIR, exist_ok=True) 15 | 16 | run("doxygen", cwd=DOCS_ROOT, check=True, shell=True) 17 | 18 | run( 19 | "breathe-apidoc {} -o {} -f -m".format(DOXYGEN_OUT, APIDOC_OUT_DIR), 20 | cwd=DOCS_ROOT, 21 | check=True, 22 | shell=True, 23 | ) 24 | 25 | project = "Faabric" 26 | copyright = "2022, Simon Shillaker" 27 | author = "Simon Shillaker" 28 | 29 | extensions = ["breathe", "myst_parser", "sphinx_rtd_theme"] 30 | 31 | templates_path = ["source/templates"] 32 | html_static_path = ["source/static"] 33 | 34 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] 35 | 36 | html_theme = "sphinx_rtd_theme" 37 | 38 | breathe_projects = {"Faabric": DOXYGEN_OUT} 39 | breathe_default_project = "Faabric" 40 | breathe_default_members = ("members", "undoc-members") 41 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | Faabric Documentation 2 | =================================== 3 | 4 | These docs include some pages on specific topics, as well as the autogenerated 5 | docs from the code. 6 | 7 | .. toctree:: 8 | :maxdepth: 2 9 | :caption: Topics 10 | 11 | source/development 12 | source/mpi_native 13 | 14 | .. toctree:: 15 | :maxdepth: 1 16 | :caption: Code 17 | 18 | apidoc/namespacelist 19 | apidoc/classlist 20 | -------------------------------------------------------------------------------- /docs/source/mpi.md: -------------------------------------------------------------------------------- 1 | # Native MPI execution in Faabric 2 | 3 | Faabric supports linking MPI binaries against our custom MPI implementation 4 | used in [Faasm](https://github.com/faasm/faasm). This way, you can test the 5 | compliance of your MPI application with our API (a subset of the standard) 6 | without the burden of cross-compiling to WebAssembly. 7 | 8 | To run native MPI applications, you can check the examples in the distributed 9 | tests (`tests/dist/mpi/examples`). If you need to implement a new method, check 10 | first how is it done in [faasm](https://github.com/faasm/faasm/blob/main/src/wavm/mpi.cpp). 11 | 12 | To run the distributed test set for MPI, follow the instructions to set up the 13 | distributed tests in the [development docs](https://github.com/faasm/faabric/blob/main/docs/source/development.md) 14 | and use the tag `[mpi]`, i.e.: 15 | 16 | ```bash 17 | faabric_dist_tests [mpi] 18 | ``` 19 | -------------------------------------------------------------------------------- /examples/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.21.0) 2 | project(faabric-examples) 3 | 4 | set(CMAKE_CXX_STANDARD 20) 5 | set(CMAKE_CXX_STANDARD_REQUIRED ON) 6 | set(CMAKE_CXX_EXTENSIONS OFF) 7 | 8 | # ----------------------------------------------- 9 | # These examples must support being compiled as stand-alone project. 10 | # Don't include them in the main CMake build, instead link them against 11 | # the installed Faabric library as an external user would. 12 | # ----------------------------------------------- 13 | set(FAABRIC_LIB_DIR "/build/faabric/install/lib") 14 | 15 | include(../cmake/ExternalProjects.cmake) 16 | 17 | function(add_example example_name) 18 | add_executable(${example_name} ${example_name}.cpp) 19 | 20 | target_link_libraries(${example_name} 21 | ${FAABRIC_LIB_DIR}/libfaabric.so 22 | ${FAABRIC_LIB_DIR}/libfaabricmpi.so 23 | faabric::common_dependencies 24 | ) 25 | 26 | set_property(TARGET ${example_name} PROPERTY BUILD_RPATH "${FAABRIC_LIB_DIR}") 27 | 28 | set(ALL_EXAMPLES ${ALL_EXAMPLES} ${example_name} PARENT_SCOPE) 29 | endfunction() 30 | 31 | add_example(check) 32 | add_example(server) 33 | 34 | add_custom_target(all_examples DEPENDS ${ALL_EXAMPLES}) 35 | -------------------------------------------------------------------------------- /examples/check.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main() 5 | { 6 | faabric::util::initLogging(); 7 | 8 | // Build a message just to check things work 9 | faabric::Message msg = faabric::util::messageFactory("foo", "bar"); 10 | std::string msgString = faabric::util::funcToString(msg, true); 11 | 12 | SPDLOG_DEBUG("Message: {}", msgString); 13 | 14 | return 0; 15 | } 16 | -------------------------------------------------------------------------------- /examples/server.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | using namespace faabric::executor; 7 | 8 | class ExampleExecutor : public Executor 9 | { 10 | public: 11 | ExampleExecutor(faabric::Message& msg) 12 | : Executor(msg) 13 | {} 14 | 15 | ~ExampleExecutor() {} 16 | 17 | int32_t executeTask(int threadPoolIdx, 18 | int msgIdx, 19 | std::shared_ptr req) 20 | { 21 | SPDLOG_INFO("Hello world!"); 22 | faabric::Message& msg = req->mutable_messages()->at(msgIdx); 23 | msg.set_outputdata("This is hello output!"); 24 | 25 | return 0; 26 | } 27 | }; 28 | 29 | class ExampleExecutorFactory : public ExecutorFactory 30 | { 31 | protected: 32 | std::shared_ptr createExecutor(faabric::Message& msg) override 33 | { 34 | return std::make_shared(msg); 35 | } 36 | }; 37 | 38 | int main() 39 | { 40 | faabric::util::initLogging(); 41 | 42 | // Start the worker pool 43 | SPDLOG_INFO("Starting executor pool in the background"); 44 | std::shared_ptr fac = 45 | std::make_shared(); 46 | faabric::runner::FaabricMain m(fac); 47 | m.startBackground(); 48 | 49 | // Start endpoint, will block until it receives a signal 50 | SPDLOG_INFO("Starting endpoint"); 51 | faabric::endpoint::FaabricEndpoint endpoint; 52 | endpoint.start(faabric::endpoint::EndpointMode::SIGNAL); 53 | 54 | SPDLOG_INFO("Shutting down endpoint"); 55 | m.shutdown(); 56 | return EXIT_SUCCESS; 57 | } 58 | -------------------------------------------------------------------------------- /faabric.cpp: -------------------------------------------------------------------------------- 1 | // Stub to link together top-level CMake library 2 | -------------------------------------------------------------------------------- /include/faabric/batch-scheduler/BinPackScheduler.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | namespace faabric::batch_scheduler { 9 | 10 | class BinPackScheduler final : public BatchScheduler 11 | { 12 | public: 13 | std::shared_ptr makeSchedulingDecision( 14 | HostMap& hostMap, 15 | const InFlightReqs& inFlightReqs, 16 | std::shared_ptr req) override; 17 | 18 | private: 19 | bool isFirstDecisionBetter( 20 | std::shared_ptr decisionA, 21 | std::shared_ptr decisionB) override; 22 | 23 | std::vector getSortedHosts( 24 | HostMap& hostMap, 25 | const InFlightReqs& inFlightReqs, 26 | std::shared_ptr req, 27 | const DecisionType& decisionType) override; 28 | }; 29 | } 30 | -------------------------------------------------------------------------------- /include/faabric/batch-scheduler/CompactScheduler.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | namespace faabric::batch_scheduler { 8 | 9 | // This batch scheduler behaves in the same way than BinPack for NEW and 10 | // SCALE_CHANGE requests, but for DIST_CHANGE requests it tries to compact 11 | // to the fewest number of VMs. 12 | class CompactScheduler final : public BatchScheduler 13 | { 14 | public: 15 | std::shared_ptr makeSchedulingDecision( 16 | HostMap& hostMap, 17 | const InFlightReqs& inFlightReqs, 18 | std::shared_ptr req) override; 19 | 20 | private: 21 | bool isFirstDecisionBetter( 22 | std::shared_ptr decisionA, 23 | std::shared_ptr decisionB) override; 24 | 25 | bool isFirstDecisionBetter(HostMap& hostMap, 26 | std::shared_ptr decisionA, 27 | std::shared_ptr decisionB); 28 | 29 | std::vector getSortedHosts( 30 | HostMap& hostMap, 31 | const InFlightReqs& inFlightReqs, 32 | std::shared_ptr req, 33 | const DecisionType& decisionType) override; 34 | }; 35 | } 36 | -------------------------------------------------------------------------------- /include/faabric/batch-scheduler/DecisionCache.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include 6 | #include 7 | 8 | namespace faabric::batch_scheduler { 9 | /** 10 | * A record of a decision already taken for the given size of batch request 11 | * for the given function. This doesn't contain the messages themselves, 12 | * just the hosts and group ID that was used. 13 | */ 14 | class CachedDecision 15 | { 16 | public: 17 | CachedDecision(const std::vector& hostsIn, int groupIdIn); 18 | 19 | std::vector getHosts() { return hosts; } 20 | 21 | int getGroupId() const { return groupId; } 22 | 23 | private: 24 | std::vector hosts; 25 | int groupId = 0; 26 | }; 27 | 28 | /** 29 | * Repository for cached scheduling decisions. Object is not thread safe as we 30 | * assume only a single executor will be caching decisions for a given function 31 | * and size of batch request on one host at a time. 32 | */ 33 | class DecisionCache 34 | { 35 | public: 36 | std::shared_ptr getCachedDecision( 37 | std::shared_ptr req); 38 | 39 | void addCachedDecision(std::shared_ptr req, 40 | SchedulingDecision& decision); 41 | 42 | void clear(); 43 | 44 | private: 45 | std::string getCacheKey(std::shared_ptr req); 46 | 47 | std::unordered_map> 48 | cachedDecisions; 49 | }; 50 | 51 | DecisionCache& getSchedulingDecisionCache(); 52 | } 53 | -------------------------------------------------------------------------------- /include/faabric/batch-scheduler/SpotScheduler.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | namespace faabric::batch_scheduler { 8 | 9 | // This batch scheduler behaves in the same way than BinPack for NEW and 10 | // SCALE_CHANGE requests, but for DIST_CHANGE it considers if any of the 11 | // hosts in the Host Map have been tainted with the eviction mark. In which 12 | // case it first tries to migrate them to other running hosts and, if not 13 | // enough hosts are available, freezes the messages. 14 | class SpotScheduler final : public BatchScheduler 15 | { 16 | public: 17 | std::shared_ptr makeSchedulingDecision( 18 | HostMap& hostMap, 19 | const InFlightReqs& inFlightReqs, 20 | std::shared_ptr req) override; 21 | 22 | private: 23 | bool isFirstDecisionBetter( 24 | std::shared_ptr decisionA, 25 | std::shared_ptr decisionB) override; 26 | 27 | std::vector getSortedHosts( 28 | HostMap& hostMap, 29 | const InFlightReqs& inFlightReqs, 30 | std::shared_ptr req, 31 | const DecisionType& decisionType) override; 32 | }; 33 | } 34 | -------------------------------------------------------------------------------- /include/faabric/endpoint/FaabricEndpoint.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | namespace faabric::endpoint { 11 | 12 | enum class EndpointMode 13 | { 14 | SIGNAL, 15 | BG_THREAD 16 | }; 17 | 18 | namespace detail { 19 | class EndpointState; 20 | } 21 | 22 | struct HttpRequestContext 23 | { 24 | asio::io_context& ioc; 25 | asio::any_io_executor executor; 26 | std::function sendFunction; 27 | }; 28 | 29 | class HttpRequestHandler 30 | { 31 | public: 32 | virtual void onRequest(HttpRequestContext&& ctx, 33 | faabric::util::BeastHttpRequest&& request) = 0; 34 | }; 35 | 36 | class FaabricEndpoint 37 | { 38 | public: 39 | FaabricEndpoint(); 40 | 41 | FaabricEndpoint(int port, 42 | int threadCount, 43 | std::shared_ptr requestHandlerIn); 44 | 45 | FaabricEndpoint(const FaabricEndpoint&) = delete; 46 | 47 | FaabricEndpoint(FaabricEndpoint&&) = delete; 48 | 49 | FaabricEndpoint& operator=(const FaabricEndpoint&) = delete; 50 | 51 | FaabricEndpoint& operator=(FaabricEndpoint&&) = delete; 52 | 53 | virtual ~FaabricEndpoint(); 54 | 55 | void start(EndpointMode mode = EndpointMode::SIGNAL); 56 | 57 | void stop(); 58 | 59 | private: 60 | int port; 61 | int threadCount; 62 | std::unique_ptr state; 63 | std::shared_ptr requestHandler; 64 | }; 65 | } 66 | -------------------------------------------------------------------------------- /include/faabric/endpoint/FaabricEndpointHandler.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | namespace faabric::endpoint { 7 | class FaabricEndpointHandler final 8 | : public HttpRequestHandler 9 | , public std::enable_shared_from_this 10 | { 11 | public: 12 | void onRequest(HttpRequestContext&& ctx, 13 | faabric::util::BeastHttpRequest&& request) override; 14 | 15 | private: 16 | void executeFunction(HttpRequestContext&& ctx, 17 | faabric::util::BeastHttpResponse&& partialResponse, 18 | std::shared_ptr ber, 19 | size_t messageIndex); 20 | 21 | void onFunctionResult(HttpRequestContext&& ctx, 22 | faabric::util::BeastHttpResponse&& partialResponse, 23 | faabric::Message& msg); 24 | }; 25 | } 26 | -------------------------------------------------------------------------------- /include/faabric/executor/ExecutorContext.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | namespace faabric::executor { 8 | 9 | class ExecutorContextException : public faabric::util::FaabricException 10 | { 11 | public: 12 | explicit ExecutorContextException(std::string message) 13 | : FaabricException(std::move(message)) 14 | {} 15 | }; 16 | 17 | /** 18 | * Globally-accessible wrapper that allows executing applications to query 19 | * their execution context. The context is thread-local, so applications can 20 | * query which specific message they are executing. 21 | */ 22 | class ExecutorContext 23 | { 24 | public: 25 | ExecutorContext(Executor* executorIn, 26 | std::shared_ptr reqIn, 27 | int msgIdx); 28 | 29 | static bool isSet(); 30 | 31 | static void set(Executor* executorIn, 32 | std::shared_ptr reqIn, 33 | int msgIdxIn); 34 | 35 | static void unset(); 36 | 37 | static std::shared_ptr get(); 38 | 39 | Executor* getExecutor() { return executor; } 40 | 41 | std::shared_ptr getBatchRequest() 42 | { 43 | return req; 44 | } 45 | 46 | faabric::Message& getMsg() 47 | { 48 | if (req == nullptr) { 49 | throw std::runtime_error( 50 | "Getting message when no request set in context"); 51 | } 52 | return req->mutable_messages()->at(msgIdx); 53 | } 54 | 55 | int getMsgIdx() { return msgIdx; } 56 | 57 | private: 58 | Executor* executor = nullptr; 59 | std::shared_ptr req = nullptr; 60 | int msgIdx = 0; 61 | }; 62 | } 63 | -------------------------------------------------------------------------------- /include/faabric/executor/ExecutorFactory.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace faabric::executor { 6 | 7 | class ExecutorFactory 8 | { 9 | public: 10 | virtual ~ExecutorFactory(){}; 11 | 12 | virtual std::shared_ptr createExecutor(faabric::Message& msg) = 0; 13 | 14 | virtual void flushHost(); 15 | }; 16 | 17 | void setExecutorFactory(std::shared_ptr fac); 18 | 19 | std::shared_ptr getExecutorFactory(); 20 | } 21 | -------------------------------------------------------------------------------- /include/faabric/executor/ExecutorTask.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace faabric::executor { 6 | 7 | class ExecutorTask 8 | { 9 | public: 10 | ExecutorTask() = default; 11 | 12 | ExecutorTask(int messageIndexIn, 13 | std::shared_ptr reqIn); 14 | 15 | // Delete everything copy-related, default everything move-related 16 | ExecutorTask(const ExecutorTask& other) = delete; 17 | 18 | ExecutorTask& operator=(const ExecutorTask& other) = delete; 19 | 20 | ExecutorTask(ExecutorTask&& other) = default; 21 | 22 | ExecutorTask& operator=(ExecutorTask&& other) = default; 23 | 24 | std::shared_ptr req; 25 | int messageIndex = 0; 26 | }; 27 | } 28 | -------------------------------------------------------------------------------- /include/faabric/flat/.gitignore: -------------------------------------------------------------------------------- 1 | # Flatbuffers output 2 | *.fb.h 3 | faabric_generated.h 4 | -------------------------------------------------------------------------------- /include/faabric/mpi/MpiContext.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | namespace faabric::mpi { 7 | class MpiContext 8 | { 9 | public: 10 | MpiContext(); 11 | 12 | int createWorld(faabric::Message& msg); 13 | 14 | void joinWorld(faabric::Message& msg); 15 | 16 | bool getIsMpi() const; 17 | 18 | int getRank() const; 19 | 20 | int getWorldId() const; 21 | 22 | private: 23 | bool isMpi = false; 24 | int rank = -1; 25 | int worldId = -1; 26 | }; 27 | } 28 | -------------------------------------------------------------------------------- /include/faabric/mpi/MpiMessage.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | namespace faabric::mpi { 7 | 8 | enum MpiMessageType : int32_t 9 | { 10 | NORMAL = 0, 11 | BARRIER_JOIN = 1, 12 | BARRIER_DONE = 2, 13 | SCATTER = 3, 14 | GATHER = 4, 15 | ALLGATHER = 5, 16 | REDUCE = 6, 17 | SCAN = 7, 18 | ALLREDUCE = 8, 19 | ALLTOALL = 9, 20 | ALLTOALL_PACKED = 10, 21 | SENDRECV = 11, 22 | BROADCAST = 12, 23 | // Special message type for async messages that have not been unacked yet 24 | UNACKED_MPI_MESSAGE = 13, 25 | HANDSHAKE = 14, 26 | }; 27 | 28 | /* Simple fixed-size C-struct to capture the state of an MPI message moving 29 | * through Faabric. 30 | * 31 | * We require fixed-size, and no unique pointers to be able to use 32 | * high-throughput in-memory ring-buffers to send the messages around. 33 | * This also means that we manually malloc/free the data pointer. The message 34 | * size is: 35 | * 7 * int32_t = 7 * 4 bytes = 28 bytes 36 | * 1 * int32_t (padding) = 4 bytes 37 | * 1 * void* = 1 * 8 bytes = 8 bytes 38 | * total = 40 bytes = 5 * 8 so the struct is 8 byte-aligned 39 | */ 40 | struct MpiMessage 41 | { 42 | int32_t id; 43 | int32_t worldId; 44 | int32_t sendRank; 45 | int32_t recvRank; 46 | int32_t typeSize; 47 | int32_t count; 48 | // This field is only used for async messages, but it helps making the 49 | // struct 8-aligned 50 | int32_t requestId; 51 | MpiMessageType messageType; 52 | void* buffer; 53 | }; 54 | static_assert((sizeof(MpiMessage) % 8) == 0, "MPI message must be 8-aligned!"); 55 | 56 | inline size_t payloadSize(const MpiMessage& msg) 57 | { 58 | return msg.typeSize * msg.count; 59 | } 60 | 61 | inline size_t msgSize(const MpiMessage& msg) 62 | { 63 | return sizeof(MpiMessage) + payloadSize(msg); 64 | } 65 | 66 | void serializeMpiMsg(std::vector& buffer, const MpiMessage& msg); 67 | 68 | void parseMpiMsg(const std::vector& bytes, MpiMessage* msg); 69 | } 70 | -------------------------------------------------------------------------------- /include/faabric/mpi/MpiWorldRegistry.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | namespace faabric::mpi { 7 | class MpiWorldRegistry 8 | { 9 | public: 10 | MpiWorldRegistry() = default; 11 | 12 | MpiWorld& createWorld(faabric::Message& msg, 13 | int worldId, 14 | std::string hostOverride = ""); 15 | 16 | MpiWorld& getOrInitialiseWorld(faabric::Message& msg); 17 | 18 | MpiWorld& getWorld(int worldId); 19 | 20 | bool worldExists(int worldId); 21 | 22 | void clearWorld(int worldId); 23 | 24 | void clear(); 25 | 26 | private: 27 | faabric::util::ConcurrentMap> worldMap; 28 | }; 29 | 30 | MpiWorldRegistry& getMpiWorldRegistry(); 31 | } 32 | -------------------------------------------------------------------------------- /include/faabric/planner/PlannerApi.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | namespace faabric::planner { 4 | enum PlannerCalls 5 | { 6 | NoPlanerCall = 0, 7 | // Util 8 | Ping = 1, 9 | // Host-membership calls 10 | GetAvailableHosts = 2, 11 | RegisterHost = 3, 12 | RemoveHost = 4, 13 | // Scheduling calls 14 | SetMessageResult = 8, 15 | GetMessageResult = 9, 16 | GetBatchResults = 10, 17 | GetSchedulingDecision = 11, 18 | GetNumMigrations = 12, 19 | CallBatch = 13, 20 | PreloadSchedulingDecision = 14, 21 | }; 22 | } 23 | -------------------------------------------------------------------------------- /include/faabric/planner/PlannerEndpointHandler.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace faabric::planner { 6 | class PlannerEndpointHandler final 7 | : public faabric::endpoint::HttpRequestHandler 8 | , public std::enable_shared_from_this 9 | { 10 | public: 11 | void onRequest(faabric::endpoint::HttpRequestContext&& ctx, 12 | faabric::util::BeastHttpRequest&& request) override; 13 | }; 14 | } 15 | -------------------------------------------------------------------------------- /include/faabric/planner/PlannerServer.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | namespace faabric::planner { 7 | class PlannerServer final : public faabric::transport::MessageEndpointServer 8 | { 9 | public: 10 | PlannerServer(); 11 | 12 | protected: 13 | void doAsyncRecv(transport::Message& message) override; 14 | 15 | std::unique_ptr doSyncRecv( 16 | transport::Message& message) override; 17 | 18 | // Asynchronous calls 19 | 20 | void recvSetMessageResult(std::span buffer); 21 | 22 | // Synchronous calls 23 | 24 | std::unique_ptr recvPing(); 25 | 26 | std::unique_ptr recvGetAvailableHosts(); 27 | 28 | std::unique_ptr recvRegisterHost( 29 | std::span buffer); 30 | 31 | std::unique_ptr recvRemoveHost( 32 | std::span buffer); 33 | 34 | std::unique_ptr recvGetMessageResult( 35 | std::span buffer); 36 | 37 | std::unique_ptr recvGetBatchResults( 38 | std::span buffer); 39 | 40 | std::unique_ptr recvGetSchedulingDecision( 41 | std::span buffer); 42 | 43 | std::unique_ptr recvGetNumMigrations( 44 | std::span buffer); 45 | 46 | std::unique_ptr recvPreloadSchedulingDecision( 47 | std::span buffer); 48 | 49 | std::unique_ptr recvCallBatch( 50 | std::span buffer); 51 | 52 | private: 53 | faabric::planner::Planner& planner; 54 | }; 55 | } 56 | -------------------------------------------------------------------------------- /include/faabric/runner/FaabricMain.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | namespace faabric::runner { 12 | class FaabricMain 13 | { 14 | public: 15 | FaabricMain(std::shared_ptr fac); 16 | 17 | void startBackground(); 18 | 19 | void startRunner(); 20 | 21 | void startFunctionCallServer(); 22 | 23 | void startStateServer(); 24 | 25 | void startSnapshotServer(); 26 | 27 | void startPointToPointServer(); 28 | 29 | void shutdown(); 30 | 31 | private: 32 | faabric::state::StateServer stateServer; 33 | faabric::scheduler::FunctionCallServer functionServer; 34 | faabric::snapshot::SnapshotServer snapshotServer; 35 | faabric::transport::PointToPointServer pointToPointServer; 36 | }; 37 | } 38 | -------------------------------------------------------------------------------- /include/faabric/scheduler/FunctionCallApi.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | namespace faabric::scheduler { 4 | enum FunctionCalls 5 | { 6 | NoFunctionCall = 0, 7 | ExecuteFunctions = 1, 8 | Flush = 2, 9 | SetMessageResult = 3, 10 | }; 11 | } 12 | -------------------------------------------------------------------------------- /include/faabric/scheduler/FunctionCallClient.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | namespace faabric::scheduler { 11 | 12 | // ----------------------------------- 13 | // Mocking 14 | // ----------------------------------- 15 | std::vector> getFunctionCalls(); 16 | 17 | std::vector> getFlushCalls(); 18 | 19 | std::vector< 20 | std::pair>> 21 | getBatchRequests(); 22 | 23 | std::vector>> 24 | getMessageResults(); 25 | 26 | void clearMockRequests(); 27 | 28 | // ----------------------------------- 29 | // Function Call Client 30 | // ----------------------------------- 31 | 32 | /* 33 | * The function call client is used to interact with the function call server, 34 | * faabric's RPC like client/server implementation 35 | */ 36 | class FunctionCallClient : public faabric::transport::MessageEndpointClient 37 | { 38 | public: 39 | explicit FunctionCallClient(const std::string& hostIn); 40 | 41 | void sendFlush(); 42 | 43 | void executeFunctions(std::shared_ptr req); 44 | 45 | void setMessageResult(std::shared_ptr msg); 46 | }; 47 | 48 | // ----------------------------------- 49 | // Static setter/getters 50 | // ----------------------------------- 51 | 52 | std::shared_ptr getFunctionCallClient( 53 | const std::string& otherHost); 54 | 55 | void clearFunctionCallClients(); 56 | } 57 | -------------------------------------------------------------------------------- /include/faabric/scheduler/FunctionCallServer.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | namespace faabric::scheduler { 9 | class FunctionCallServer final 10 | : public faabric::transport::MessageEndpointServer 11 | { 12 | public: 13 | FunctionCallServer(); 14 | 15 | private: 16 | Scheduler& scheduler; 17 | 18 | void doAsyncRecv(transport::Message& message) override; 19 | 20 | std::unique_ptr doSyncRecv( 21 | transport::Message& message) override; 22 | 23 | std::unique_ptr recvFlush( 24 | std::span buffer); 25 | 26 | void recvExecuteFunctions(std::span buffer); 27 | 28 | void recvSetMessageResult(std::span buffer); 29 | }; 30 | } 31 | -------------------------------------------------------------------------------- /include/faabric/scheduler/InMemoryMessageQueue.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | namespace faabric::scheduler { 7 | typedef faabric::util::Queue InMemoryMessageQueue; 8 | typedef std::pair InMemoryMessageQueuePair; 9 | } 10 | -------------------------------------------------------------------------------- /include/faabric/snapshot/SnapshotApi.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | namespace faabric::snapshot { 4 | enum SnapshotCalls 5 | { 6 | NoSnapshotCall = 0, 7 | PushSnapshot = 1, 8 | PushSnapshotUpdate = 2, 9 | DeleteSnapshot = 3, 10 | ThreadResult = 4, 11 | }; 12 | } 13 | -------------------------------------------------------------------------------- /include/faabric/snapshot/SnapshotRegistry.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | namespace faabric::snapshot { 12 | 13 | class SnapshotRegistry 14 | { 15 | public: 16 | SnapshotRegistry() = default; 17 | 18 | std::shared_ptr getSnapshot( 19 | const std::string& key); 20 | 21 | bool snapshotExists(const std::string& key); 22 | 23 | void registerSnapshot(const std::string& key, 24 | std::shared_ptr data); 25 | 26 | void deleteSnapshot(const std::string& key); 27 | 28 | size_t getSnapshotCount(); 29 | 30 | void clear(); 31 | 32 | private: 33 | std::unordered_map> 35 | snapshotMap; 36 | 37 | std::shared_mutex snapshotsMx; 38 | 39 | int writeSnapshotToFd(const std::string& key, 40 | faabric::util::SnapshotData& data); 41 | }; 42 | 43 | SnapshotRegistry& getSnapshotRegistry(); 44 | 45 | } 46 | -------------------------------------------------------------------------------- /include/faabric/snapshot/SnapshotServer.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | namespace faabric::snapshot { 12 | class SnapshotServer final : public faabric::transport::MessageEndpointServer 13 | { 14 | public: 15 | SnapshotServer(); 16 | 17 | protected: 18 | void doAsyncRecv(transport::Message& message) override; 19 | 20 | std::unique_ptr doSyncRecv( 21 | transport::Message& message) override; 22 | 23 | std::unique_ptr recvPushSnapshot( 24 | std::span buffer); 25 | 26 | std::unique_ptr recvPushSnapshotUpdate( 27 | std::span buffer); 28 | 29 | void recvDeleteSnapshot(std::span buffer); 30 | 31 | std::unique_ptr recvThreadResult( 32 | faabric::transport::Message& message); 33 | 34 | private: 35 | faabric::transport::PointToPointBroker& broker; 36 | faabric::snapshot::SnapshotRegistry& reg; 37 | }; 38 | } 39 | -------------------------------------------------------------------------------- /include/faabric/state/InMemoryStateRegistry.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | namespace faabric::state { 8 | class InMemoryStateRegistry 9 | { 10 | public: 11 | InMemoryStateRegistry() = default; 12 | 13 | std::string getMasterIP(const std::string& user, 14 | const std::string& key, 15 | const std::string& thisIP, 16 | bool claim); 17 | 18 | std::string getMasterIPForOtherMaster(const std::string& userIn, 19 | const std::string& keyIn, 20 | const std::string& thisIP); 21 | 22 | void clear(); 23 | 24 | private: 25 | std::unordered_map mainMap; 26 | std::shared_mutex mainMapMutex; 27 | }; 28 | 29 | InMemoryStateRegistry& getInMemoryStateRegistry(); 30 | } 31 | -------------------------------------------------------------------------------- /include/faabric/state/RedisStateKeyValue.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | namespace faabric::state { 10 | class RedisStateKeyValue final : public StateKeyValue 11 | { 12 | public: 13 | RedisStateKeyValue(const std::string& userIn, 14 | const std::string& keyIn, 15 | size_t sizeIn); 16 | 17 | RedisStateKeyValue(const std::string& userIn, const std::string& keyIn); 18 | 19 | static size_t getStateSizeFromRemote(const std::string& userIn, 20 | const std::string& keyIn); 21 | 22 | static void deleteFromRemote(const std::string& userIn, 23 | const std::string& keyIn); 24 | 25 | static void clearAll(bool global); 26 | 27 | private: 28 | const std::string joinedKey; 29 | 30 | void pullFromRemote() override; 31 | 32 | void pullChunkFromRemote(long offset, size_t length) override; 33 | 34 | void pushToRemote() override; 35 | 36 | void pushPartialToRemote( 37 | const std::vector& dirtyChunks) override; 38 | 39 | void appendToRemote(const uint8_t* data, size_t length) override; 40 | 41 | void pullAppendedFromRemote(uint8_t* data, 42 | size_t length, 43 | long nValues) override; 44 | 45 | void clearAppendedFromRemote() override; 46 | }; 47 | } 48 | -------------------------------------------------------------------------------- /include/faabric/state/State.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include 6 | #include 7 | 8 | namespace faabric::state { 9 | 10 | // State client-server API 11 | enum StateCalls 12 | { 13 | NoStateCall = 0, 14 | Pull = 1, 15 | Push = 2, 16 | Size = 3, 17 | Append = 4, 18 | ClearAppended = 5, 19 | PullAppended = 6, 20 | Delete = 7, 21 | }; 22 | 23 | class State 24 | { 25 | public: 26 | explicit State(std::string thisIPIn); 27 | 28 | size_t getStateSize(const std::string& user, const std::string& keyIn); 29 | 30 | std::shared_ptr getKV(const std::string& user, 31 | const std::string& key, 32 | size_t size); 33 | 34 | std::shared_ptr getKV(const std::string& user, 35 | const std::string& key); 36 | 37 | void forceClearAll(bool global); 38 | 39 | void deleteKV(const std::string& userIn, const std::string& keyIn); 40 | 41 | void deleteKVLocally(const std::string& userIn, const std::string& keyIn); 42 | 43 | size_t getKVCount(); 44 | 45 | std::string getThisIP(); 46 | 47 | private: 48 | const std::string thisIP; 49 | 50 | std::unordered_map> kvMap; 51 | std::shared_mutex mapMutex; 52 | 53 | std::shared_ptr doGetKV(const std::string& user, 54 | const std::string& key, 55 | bool sizeless, 56 | size_t size); 57 | }; 58 | 59 | State& getGlobalState(); 60 | } 61 | -------------------------------------------------------------------------------- /include/faabric/state/StateClient.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | namespace faabric::state { 10 | class StateClient : public faabric::transport::MessageEndpointClient 11 | { 12 | public: 13 | explicit StateClient(const std::string& userIn, 14 | const std::string& keyIn, 15 | const std::string& hostIn); 16 | 17 | const std::string user; 18 | const std::string key; 19 | 20 | void pushChunks(const std::vector& chunks); 21 | 22 | void pullChunks(const std::vector& chunks, 23 | uint8_t* bufferStart); 24 | 25 | void append(const uint8_t* data, size_t length); 26 | 27 | void pullAppended(uint8_t* buffer, size_t length, long nValues); 28 | 29 | void clearAppended(); 30 | 31 | size_t stateSize(); 32 | 33 | void deleteState(); 34 | 35 | void lock(); 36 | 37 | void unlock(); 38 | 39 | private: 40 | void sendStateRequest(faabric::state::StateCalls header, 41 | const uint8_t* data, 42 | int length); 43 | 44 | void logRequest(const std::string& op); 45 | }; 46 | } 47 | -------------------------------------------------------------------------------- /include/faabric/state/StateServer.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | namespace faabric::state { 8 | class StateServer final : public faabric::transport::MessageEndpointServer 9 | { 10 | public: 11 | explicit StateServer(State& stateIn); 12 | 13 | private: 14 | State& state; 15 | 16 | void logOperation(const std::string& op); 17 | 18 | void doAsyncRecv(transport::Message& message) override; 19 | 20 | std::unique_ptr doSyncRecv( 21 | transport::Message& message) override; 22 | 23 | // Sync methods 24 | 25 | std::unique_ptr recvSize( 26 | std::span buffer); 27 | 28 | std::unique_ptr recvPull( 29 | std::span buffer); 30 | 31 | std::unique_ptr recvPush( 32 | std::span buffer); 33 | 34 | std::unique_ptr recvAppend( 35 | std::span buffer); 36 | 37 | std::unique_ptr recvPullAppended( 38 | std::span buffer); 39 | 40 | std::unique_ptr recvClearAppended( 41 | std::span buffer); 42 | 43 | std::unique_ptr recvDelete( 44 | std::span buffer); 45 | }; 46 | } 47 | -------------------------------------------------------------------------------- /include/faabric/transport/MessageEndpointClient.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | namespace faabric::transport { 11 | class MessageEndpointClient 12 | { 13 | public: 14 | MessageEndpointClient(std::string hostIn, 15 | int asyncPort, 16 | int syncPort, 17 | int timeoutMs = DEFAULT_SOCKET_TIMEOUT_MS); 18 | 19 | void asyncSend(int header, 20 | google::protobuf::Message* msg, 21 | int sequenceNum = NO_SEQUENCE_NUM); 22 | 23 | void asyncSend(int header, 24 | const uint8_t* buffer, 25 | size_t bufferSize, 26 | int sequenceNum = NO_SEQUENCE_NUM); 27 | 28 | void syncSend(int header, 29 | google::protobuf::Message* msg, 30 | google::protobuf::Message* response); 31 | 32 | void syncSend(int header, 33 | const uint8_t* buffer, 34 | size_t bufferSize, 35 | google::protobuf::Message* response); 36 | 37 | protected: 38 | const std::string host; 39 | 40 | const int asyncPort; 41 | 42 | const int syncPort; 43 | 44 | // Optional: nullopt in mock mode, to avoid connecting to invalid hosts 45 | std::optional asyncEndpoint; 46 | 47 | std::optional syncEndpoint; 48 | }; 49 | } 50 | -------------------------------------------------------------------------------- /include/faabric/transport/PointToPointCall.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | namespace faabric::transport { 4 | 5 | enum PointToPointCall 6 | { 7 | MAPPING = 0, 8 | MESSAGE = 1, 9 | LOCK_GROUP = 2, 10 | LOCK_GROUP_RECURSIVE = 3, 11 | UNLOCK_GROUP = 4, 12 | UNLOCK_GROUP_RECURSIVE = 5, 13 | }; 14 | } 15 | -------------------------------------------------------------------------------- /include/faabric/transport/PointToPointClient.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | namespace faabric::transport { 8 | 9 | std::vector> 10 | getSentMappings(); 11 | 12 | std::vector> 13 | getSentPointToPointMessages(); 14 | 15 | std::vector> 18 | getSentLockMessages(); 19 | 20 | void clearSentMessages(); 21 | 22 | class PointToPointClient : public faabric::transport::MessageEndpointClient 23 | { 24 | public: 25 | PointToPointClient(const std::string& hostIn); 26 | 27 | void sendMappings(faabric::PointToPointMappings& mappings); 28 | 29 | void sendMessage(faabric::PointToPointMessage& msg, 30 | int sequenceNum = NO_SEQUENCE_NUM); 31 | 32 | void groupLock(int appId, 33 | int groupId, 34 | int groupIdx, 35 | bool recursive = false); 36 | 37 | void groupUnlock(int appId, 38 | int groupId, 39 | int groupIdx, 40 | bool recursive = false); 41 | 42 | private: 43 | void makeCoordinationRequest(int appId, 44 | int groupId, 45 | int groupIdx, 46 | faabric::transport::PointToPointCall call); 47 | }; 48 | } 49 | -------------------------------------------------------------------------------- /include/faabric/transport/PointToPointServer.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | namespace faabric::transport { 7 | 8 | class PointToPointServer final : public MessageEndpointServer 9 | { 10 | public: 11 | PointToPointServer(); 12 | 13 | private: 14 | PointToPointBroker& broker; 15 | 16 | void doAsyncRecv(transport::Message& message) override; 17 | 18 | std::unique_ptr doSyncRecv( 19 | transport::Message& message) override; 20 | 21 | void onWorkerStop() override; 22 | 23 | std::unique_ptr doRecvMappings( 24 | std::span buffer); 25 | 26 | void recvGroupLock(std::span buffer, bool recursive); 27 | 28 | void recvGroupUnlock(std::span buffer, bool recursive); 29 | }; 30 | } 31 | -------------------------------------------------------------------------------- /include/faabric/transport/common.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #define ANY_HOST "0.0.0.0" 4 | 5 | #define DEFAULT_STATE_HOST ANY_HOST 6 | #define DEFAULT_FUNCTION_CALL_HOST ANY_HOST 7 | #define DEFAULT_SNAPSHOT_HOST ANY_HOST 8 | 9 | #define STATE_ASYNC_PORT 8003 10 | #define STATE_SYNC_PORT 8004 11 | #define STATE_INPROC_LABEL "state" 12 | 13 | #define FUNCTION_CALL_ASYNC_PORT 8005 14 | #define FUNCTION_CALL_SYNC_PORT 8006 15 | #define FUNCTION_INPROC_LABEL "function" 16 | 17 | #define SNAPSHOT_ASYNC_PORT 8007 18 | #define SNAPSHOT_SYNC_PORT 8008 19 | #define SNAPSHOT_INPROC_LABEL "snapshot" 20 | 21 | #define POINT_TO_POINT_ASYNC_PORT 8009 22 | #define POINT_TO_POINT_SYNC_PORT 8010 23 | #define POINT_TO_POINT_INPROC_LABEL "ptp" 24 | 25 | #define PLANNER_ASYNC_PORT 8011 26 | #define PLANNER_SYNC_PORT 8012 27 | #define PLANNER_INPROC_LABEL "planner" 28 | 29 | #define MPI_BASE_PORT 8020 30 | -------------------------------------------------------------------------------- /include/faabric/transport/macros.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #define PARSE_MSG(T, data, size) \ 4 | T parsedMsg; \ 5 | if (!parsedMsg.ParseFromArray(data, size)) { \ 6 | throw std::runtime_error("Error deserialising message"); \ 7 | } 8 | 9 | #define SEND_FB_MSG(T, _mb) \ 10 | { \ 11 | const uint8_t* _buffer = _mb.GetBufferPointer(); \ 12 | int _size = _mb.GetSize(); \ 13 | faabric::EmptyResponse _response; \ 14 | syncSend(T, _buffer, _size, &_response); \ 15 | } 16 | 17 | #define SEND_FB_MSG_ASYNC(T, _mb) \ 18 | { \ 19 | const uint8_t* _buffer = _mb.GetBufferPointer(); \ 20 | int _size = _mb.GetSize(); \ 21 | asyncSend(T, _buffer, _size); \ 22 | } 23 | -------------------------------------------------------------------------------- /include/faabric/transport/tcp/Address.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | namespace faabric::transport::tcp { 8 | class Address 9 | { 10 | public: 11 | Address(const std::string& host, int port); 12 | 13 | Address(int port); 14 | 15 | sockaddr* get() const; 16 | 17 | private: 18 | sockaddr_in addr; 19 | }; 20 | 21 | } 22 | -------------------------------------------------------------------------------- /include/faabric/transport/tcp/RecvSocket.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | 9 | namespace faabric::transport::tcp { 10 | class RecvSocket 11 | { 12 | public: 13 | RecvSocket(int port, const std::string& host = ANY_HOST); 14 | RecvSocket(const RecvSocket& recvSocket) = delete; 15 | ~RecvSocket(); 16 | 17 | // Start BIND socket and mark it as a passive socket 18 | void listen(); 19 | 20 | // Accept a connection into the socket, and return the connection fd 21 | int accept(); 22 | 23 | // Receive bytes from a connection 24 | void recvOne(int conn, uint8_t* buffer, size_t bufferSize); 25 | 26 | private: 27 | Address addr; 28 | Socket sock; 29 | 30 | std::string host; 31 | int port; 32 | 33 | std::deque openConnections; 34 | 35 | void setSocketOptions(int connFd); 36 | }; 37 | } 38 | -------------------------------------------------------------------------------- /include/faabric/transport/tcp/SendSocket.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | #include 9 | 10 | namespace faabric::transport::tcp { 11 | class SendSocket 12 | { 13 | public: 14 | SendSocket(const std::string& host, int port); 15 | 16 | void dial(); 17 | 18 | void sendOne(const uint8_t* buffer, size_t bufferSize); 19 | 20 | private: 21 | Address addr; 22 | Socket sock; 23 | bool connected; 24 | 25 | std::string host; 26 | int port; 27 | 28 | void setSocketOptions(int connFd); 29 | }; 30 | } 31 | -------------------------------------------------------------------------------- /include/faabric/transport/tcp/Socket.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | namespace faabric::transport::tcp { 7 | 8 | const int SocketTimeoutMs = 5000; 9 | // We get this value from OpenMPI's recommended TCP settings (FAQ 9): 10 | // https://www.open-mpi.org/faq/?category=tcp 11 | const size_t SocketBufferSizeBytes = 16777216; 12 | 13 | class Socket 14 | { 15 | public: 16 | Socket(); 17 | Socket(int connFd); 18 | Socket(const Socket& socket) = delete; 19 | ~Socket(); 20 | 21 | int get() const { return connFd; } 22 | 23 | private: 24 | int connFd; 25 | }; 26 | } 27 | -------------------------------------------------------------------------------- /include/faabric/transport/tcp/SocketOptions.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace faabric::transport::tcp { 6 | void setReuseAddr(int connFd); 7 | void setNoDelay(int connFd); 8 | void setQuickAck(int connFd); 9 | 10 | // Blocking/Non-blocking sockets 11 | void setNonBlocking(int connFd); 12 | void setBlocking(int connFd); 13 | bool isNonBlocking(int connFd); 14 | 15 | // Enable busy polling for non-blocking sockets 16 | void setBusyPolling(int connFd); 17 | 18 | // Set timeout for blocking sockets 19 | void setRecvTimeoutMs(int connFd, int timeoutMs); 20 | void setSendTimeoutMs(int connFd, int timeoutMs); 21 | 22 | // Set send/recv buffer sizes (important to guarantee MPI progress). Note that 23 | // this options can never exceed the values set in net.core.{r,w}mem_max. To 24 | // this extent, this functions must be used in conjunction with the adequate 25 | // TCP configuration 26 | void setRecvBufferSize(int connFd, size_t bufferSizeBytes); 27 | void setSendBufferSize(int connFd, size_t bufferSizeBytes); 28 | } 29 | -------------------------------------------------------------------------------- /include/faabric/util/ExecGraph.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | 9 | namespace faabric::util { 10 | 11 | class ExecGraphNodeNotFoundException : public faabric::util::FaabricException 12 | { 13 | public: 14 | explicit ExecGraphNodeNotFoundException(std::string message) 15 | : FaabricException(std::move(message)) 16 | {} 17 | }; 18 | 19 | struct ExecGraphNode 20 | { 21 | faabric::Message msg; 22 | std::vector children; 23 | }; 24 | 25 | struct ExecGraph 26 | { 27 | ExecGraphNode rootNode; 28 | }; 29 | 30 | ExecGraphNode getFunctionExecGraphNode(int appId, int msgId); 31 | 32 | ExecGraph getFunctionExecGraph(const faabric::Message& msg); 33 | 34 | void logChainedFunction(faabric::Message& parentMessage, 35 | const faabric::Message& chainedMessage); 36 | 37 | std::set getChainedFunctions(const faabric::Message& msg); 38 | 39 | int countExecGraphNodes(const ExecGraph& graph); 40 | 41 | std::set getExecGraphHosts(const ExecGraph& graph); 42 | 43 | std::vector getMpiRankHostsFromExecGraph(const ExecGraph& graph); 44 | 45 | std::pair, std::vector> 46 | getMigratedMpiRankHostsFromExecGraph(const ExecGraph& graph); 47 | 48 | std::string execNodeToJson(const ExecGraphNode& node); 49 | 50 | std::string execGraphToJson(const ExecGraph& graph); 51 | 52 | void addDetail(faabric::Message& msg, 53 | const std::string& key, 54 | const std::string& value); 55 | 56 | void incrementCounter(faabric::Message& msg, 57 | const std::string& key, 58 | const int valueToIncrement = 1); 59 | } 60 | -------------------------------------------------------------------------------- /include/faabric/util/PeriodicBackgroundThread.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #define DEFAULT_BACKGROUND_INTERVAL_SECONDS 30 8 | 9 | namespace faabric::util { 10 | 11 | /** 12 | * Wrapper around periodic background thread that repeatedly does some arbitrary 13 | * work after a given interval. 14 | */ 15 | class PeriodicBackgroundThread 16 | { 17 | public: 18 | /** 19 | * Start the background thread with the given wake-up interval in seconds. 20 | */ 21 | void start(int intervalSecondsIn); 22 | 23 | /** 24 | * Stop and wait for this thread to finish. 25 | */ 26 | void stop(); 27 | 28 | virtual void doWork() = 0; 29 | 30 | virtual void tidyUp(); 31 | 32 | int getIntervalSeconds() { return intervalSeconds; } 33 | 34 | protected: 35 | int intervalSeconds = DEFAULT_BACKGROUND_INTERVAL_SECONDS; 36 | 37 | private: 38 | std::unique_ptr workThread = nullptr; 39 | 40 | std::mutex mx; 41 | 42 | std::condition_variable_any timeoutCv; 43 | }; 44 | } 45 | -------------------------------------------------------------------------------- /include/faabric/util/asio.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | namespace asio = boost::asio; 9 | namespace beast = boost::beast; 10 | 11 | namespace faabric::util { 12 | using BeastHttpRequest = beast::http::request; 13 | using BeastHttpResponse = beast::http::response; 14 | } 15 | -------------------------------------------------------------------------------- /include/faabric/util/barrier.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | namespace faabric::util { 8 | 9 | #define DEFAULT_BARRIER_TIMEOUT_MS 10000 10 | 11 | class Barrier 12 | { 13 | public: 14 | // WARNING: this barrier must be shared between threads using a shared 15 | // pointer, otherwise there seems to be some nasty race conditions related 16 | // to its destruction. 17 | static std::shared_ptr create( 18 | int count, 19 | std::function completionFunctionIn, 20 | int timeoutMs = DEFAULT_BARRIER_TIMEOUT_MS); 21 | 22 | static std::shared_ptr create( 23 | int count, 24 | int timeoutMs = DEFAULT_BARRIER_TIMEOUT_MS); 25 | 26 | explicit Barrier(int countIn, 27 | std::function completionFunctionIn, 28 | int timeoutMsIn); 29 | 30 | void wait(); 31 | 32 | private: 33 | int count = 0; 34 | int visits = 0; 35 | int currentPhase = 1; 36 | 37 | std::function completionFunction; 38 | 39 | int timeoutMs; 40 | 41 | std::mutex mx; 42 | std::condition_variable cv; 43 | }; 44 | } 45 | -------------------------------------------------------------------------------- /include/faabric/util/batch.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace faabric::util { 6 | 7 | // ---------- 8 | // Batch Execute Requests (BER) 9 | // ---------- 10 | 11 | std::shared_ptr batchExecFactory(); 12 | 13 | std::shared_ptr batchExecFactory( 14 | const std::string& user, 15 | const std::string& function, 16 | int count = 1); 17 | 18 | bool isBatchExecRequestValid(std::shared_ptr ber); 19 | 20 | void updateBatchExecAppId(std::shared_ptr ber, 21 | int newAppId); 22 | 23 | void updateBatchExecGroupId(std::shared_ptr ber, 24 | int newGroupId); 25 | 26 | // ---------- 27 | // Batch Execute Requests' Status 28 | // ---------- 29 | 30 | std::shared_ptr batchExecStatusFactory( 31 | int32_t appId); 32 | 33 | std::shared_ptr batchExecStatusFactory( 34 | std::shared_ptr ber); 35 | 36 | // Get the number of messages in a BER Status that have actually finished (i.e. 37 | // those that have not been migrated) 38 | int getNumFinishedMessagesInBatch( 39 | std::shared_ptr berStatus); 40 | } 41 | -------------------------------------------------------------------------------- /include/faabric/util/chaining.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | namespace faabric::util { 7 | class ChainedCallFailedException : public faabric::util::FaabricException 8 | { 9 | public: 10 | explicit ChainedCallFailedException(std::string message) 11 | : FaabricException(std::move(message)) 12 | {} 13 | }; 14 | } 15 | -------------------------------------------------------------------------------- /include/faabric/util/clock.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace faabric::util { 6 | typedef std::chrono::steady_clock::time_point TimePoint; 7 | 8 | class Clock 9 | { 10 | public: 11 | Clock(); 12 | 13 | const TimePoint now(); 14 | 15 | const long epochMillis(); 16 | 17 | const long timeDiff(const TimePoint& t1, const TimePoint& t2); 18 | 19 | const long timeDiffNano(const TimePoint& t1, const TimePoint& t2); 20 | 21 | const long timeDiffMicro(const TimePoint& t1, const TimePoint& t2); 22 | }; 23 | 24 | Clock& getGlobalClock(); 25 | } 26 | -------------------------------------------------------------------------------- /include/faabric/util/compare.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | namespace faabric::util { 7 | template 8 | bool compareArrays(T* v1, T* v2, int size) 9 | { 10 | for (int i = 0; i < size; i++) { 11 | if (v1[i] != v2[i]) { 12 | return false; 13 | } 14 | } 15 | 16 | return true; 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /include/faabric/util/config.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #define MPI_HOST_STATE_LEN 20 6 | 7 | #define DEFAULT_TIMEOUT 60000 8 | #define RESULT_KEY_EXPIRY 30000 9 | #define STATUS_KEY_EXPIRY 300000 10 | 11 | namespace faabric::util { 12 | class SystemConfig 13 | { 14 | 15 | public: 16 | // System 17 | std::string serialisation; 18 | std::string logLevel; 19 | std::string logFile; 20 | std::string stateMode; 21 | std::string deltaSnapshotEncoding; 22 | 23 | // Redis 24 | std::string redisStateHost; 25 | std::string redisQueueHost; 26 | std::string redisPort; 27 | 28 | // Scheduling 29 | int overrideCpuCount; 30 | int overrideFreeCpuStart; 31 | std::string batchSchedulerMode; 32 | 33 | // Worker-related timeouts 34 | int globalMessageTimeout; 35 | int boundTimeout; 36 | int reaperIntervalSeconds; 37 | 38 | // MPI 39 | int defaultMpiWorldSize; 40 | 41 | // Endpoint 42 | std::string endpointInterface; 43 | std::string endpointHost; 44 | int endpointPort; 45 | int endpointNumThreads; 46 | 47 | // Transport 48 | int functionServerThreads; 49 | int stateServerThreads; 50 | int snapshotServerThreads; 51 | int pointToPointServerThreads; 52 | 53 | // Dirty tracking 54 | std::string dirtyTrackingMode; 55 | std::string diffingMode; 56 | 57 | // Planner 58 | std::string plannerHost; 59 | int plannerPort; 60 | 61 | SystemConfig(); 62 | 63 | void print(); 64 | 65 | void reset(); 66 | 67 | private: 68 | int getSystemConfIntParam(const char* name, const char* defaultValue); 69 | 70 | void initialise(); 71 | }; 72 | 73 | SystemConfig& getSystemConfig(); 74 | } 75 | -------------------------------------------------------------------------------- /include/faabric/util/crash.h: -------------------------------------------------------------------------------- 1 | 2 | namespace faabric::util { 3 | 4 | /* 5 | * Sets up crash handling. By default covers a number of signals that would 6 | * otherwise cause a crash. Signal argument can be provided to reinstating crash 7 | * handling for a specific signal after it's been used elsewhere in the 8 | * application (e.g. for dirty tracking). 9 | */ 10 | void setUpCrashHandler(int sig = -1); 11 | 12 | /* 13 | * Prints the stack trace for a given signal. Only to be called in signal 14 | * handlers. 15 | */ 16 | void handleCrash(int sig); 17 | 18 | } 19 | -------------------------------------------------------------------------------- /include/faabric/util/delta.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | namespace faabric::util { 9 | 10 | struct DeltaSettings 11 | { 12 | // pages=SIZE; 13 | bool usePages = true; 14 | size_t pageSize = 4096; 15 | // xor; 16 | bool xorWithOld = true; 17 | // zstd=LEVEL; 18 | bool useZstd = true; 19 | int zstdLevel = 1; 20 | 21 | explicit DeltaSettings(const std::string& definition); 22 | std::string toString() const; 23 | }; 24 | 25 | inline constexpr uint8_t DELTA_PROTOCOL_VERSION = 1; 26 | inline constexpr int DELTA_ZSTD_COMPRESS_LEVEL = 1; 27 | 28 | enum DeltaCommand : uint8_t 29 | { 30 | // followed by u32(total size) 31 | DELTACMD_TOTAL_SIZE = 0x00, 32 | // followed by u64(compressed length), u64(decompressed length), 33 | // bytes(compressed commands) 34 | DELTACMD_ZSTD_COMPRESSED_COMMANDS = 0x01, 35 | // followed by u32(offset), u32(length), bytes(data) 36 | DELTACMD_DELTA_OVERWRITE = 0x02, 37 | // followed by u32(offset), u32(length), bytes(data) 38 | DELTACMD_DELTA_XOR = 0x03, 39 | // final command 40 | DELTACMD_END = 0xFE, 41 | }; 42 | 43 | std::vector serializeDelta(const DeltaSettings& cfg, 44 | const uint8_t* oldDataStart, 45 | size_t oldDataLen, 46 | const uint8_t* newDataStart, 47 | size_t newDataLen); 48 | 49 | void applyDelta(const std::vector& delta, 50 | std::function setDataSize, 51 | std::function getDataPointer); 52 | 53 | } 54 | -------------------------------------------------------------------------------- /include/faabric/util/environment.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace faabric::util { 6 | std::string getEnvVar(const std::string& key, const std::string& deflt); 7 | 8 | std::string setEnvVar(const std::string& varName, const std::string& value); 9 | 10 | void unsetEnvVar(const std::string& varName); 11 | 12 | unsigned int getUsableCores(); 13 | } 14 | -------------------------------------------------------------------------------- /include/faabric/util/exception.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | namespace faabric::util { 7 | class FaabricException : public std::exception 8 | { 9 | public: 10 | explicit FaabricException(std::string message) 11 | : _message(std::move(message)) 12 | {} 13 | 14 | [[nodiscard]] const char* what() const noexcept override 15 | { 16 | return _message.c_str(); 17 | } 18 | 19 | protected: 20 | std::string _message; 21 | }; 22 | } 23 | -------------------------------------------------------------------------------- /include/faabric/util/files.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | namespace faabric::util { 8 | std::string readFileToString(const std::string& path); 9 | 10 | std::vector readFileToBytes(const std::string& path); 11 | 12 | void writeBytesToFile(const std::string& path, 13 | const std::vector& data); 14 | 15 | bool isWasm(const std::vector& bytes); 16 | } 17 | -------------------------------------------------------------------------------- /include/faabric/util/func.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #define MIGRATED_FUNCTION_RETURN_VALUE -99 9 | #define FROZEN_FUNCTION_RETURN_VALUE -98 10 | 11 | namespace faabric::util { 12 | 13 | class FunctionFrozenException : public faabric::util::FaabricException 14 | { 15 | public: 16 | explicit FunctionFrozenException(std::string message) 17 | : FaabricException(std::move(message)) 18 | {} 19 | }; 20 | 21 | class FunctionMigratedException : public faabric::util::FaabricException 22 | { 23 | public: 24 | explicit FunctionMigratedException(std::string message) 25 | : FaabricException(std::move(message)) 26 | {} 27 | }; 28 | 29 | std::string funcToString(const faabric::Message& msg, bool includeId); 30 | 31 | std::string funcToString( 32 | const std::shared_ptr& req); 33 | 34 | unsigned int setMessageId(faabric::Message& msg); 35 | 36 | std::string buildAsyncResponse(const faabric::Message& msg); 37 | 38 | std::shared_ptr messageFactoryShared( 39 | const std::string& user, 40 | const std::string& function); 41 | 42 | faabric::Message messageFactory(const std::string& user, 43 | const std::string& function); 44 | 45 | std::string resultKeyFromMessageId(unsigned int mid); 46 | 47 | std::string statusKeyFromMessageId(unsigned int mid); 48 | 49 | std::vector messageToBytes(const faabric::Message& msg); 50 | 51 | std::vector getArgvForMessage(const faabric::Message& msg); 52 | 53 | /* 54 | * Gets the key for the main thread snapshot for the given message. Result will 55 | * be the same on all hosts. 56 | */ 57 | std::string getMainThreadSnapshotKey(const faabric::Message& msg); 58 | 59 | } 60 | -------------------------------------------------------------------------------- /include/faabric/util/gids.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace faabric::util { 6 | unsigned int generateGid(); 7 | } 8 | -------------------------------------------------------------------------------- /include/faabric/util/hwloc.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | namespace faabric::util { 7 | 8 | const int NO_CPU_IDX = -1; 9 | const int GHA_CPU_IDX = -2; 10 | 11 | class FaabricCpuSet 12 | { 13 | public: 14 | FaabricCpuSet(int cpuIdxIn = NO_CPU_IDX); 15 | FaabricCpuSet& operator=(const FaabricCpuSet&) = delete; 16 | FaabricCpuSet(const FaabricCpuSet&) = delete; 17 | 18 | ~FaabricCpuSet(); 19 | 20 | cpu_set_t* get() { return &cpuSet; } 21 | 22 | private: 23 | cpu_set_t cpuSet; 24 | 25 | // CPU index in internal CPU accounting 26 | int cpuIdx = NO_CPU_IDX; 27 | }; 28 | 29 | // Pin thread to any "unpinned" CPUs. Returns the CPU set it was pinned to. 30 | // We return a unique pointer to enforce RAII on the pinned-to CPU 31 | std::unique_ptr pinThreadToFreeCpu(pthread_t thread); 32 | } 33 | -------------------------------------------------------------------------------- /include/faabric/util/json.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include 6 | 7 | namespace faabric::util { 8 | std::string messageToJson(const google::protobuf::Message& msg); 9 | 10 | void jsonToMessage(const std::string& jsonStr, google::protobuf::Message* msg); 11 | 12 | class JsonSerialisationException : public faabric::util::FaabricException 13 | { 14 | public: 15 | explicit JsonSerialisationException(std::string message) 16 | : FaabricException(std::move(message)) 17 | {} 18 | }; 19 | } 20 | -------------------------------------------------------------------------------- /include/faabric/util/latch.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | namespace faabric::util { 8 | 9 | #define DEFAULT_LATCH_TIMEOUT_MS 10000 10 | 11 | class Latch : public std::enable_shared_from_this 12 | { 13 | public: 14 | // WARNING: this latch must be shared between threads using a shared 15 | // pointer, otherwise there seems to be some nasty race conditions related 16 | // to its destruction. 17 | static std::shared_ptr create( 18 | int count, 19 | int timeoutMs = DEFAULT_LATCH_TIMEOUT_MS); 20 | 21 | explicit Latch(int countIn, int timeoutMsIn); 22 | 23 | void wait(); 24 | 25 | private: 26 | int count; 27 | int waiters = 0; 28 | 29 | int timeoutMs; 30 | 31 | std::mutex mx; 32 | std::condition_variable cv; 33 | }; 34 | } 35 | -------------------------------------------------------------------------------- /include/faabric/util/locks.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #define DEFAULT_FLAG_WAIT_MS 10000 12 | 13 | namespace faabric::util { 14 | typedef std::unique_lock UniqueLock; 15 | typedef std::unique_lock FullLock; 16 | typedef std::shared_lock SharedLock; 17 | 18 | class FlagWaiter : public std::enable_shared_from_this 19 | { 20 | public: 21 | FlagWaiter(int timeoutMsIn = DEFAULT_FLAG_WAIT_MS); 22 | 23 | void waitOnFlag(); 24 | 25 | void setFlag(bool value); 26 | 27 | private: 28 | int timeoutMs; 29 | 30 | std::mutex flagMx; 31 | std::condition_variable cv; 32 | std::atomic flag; 33 | }; 34 | } 35 | -------------------------------------------------------------------------------- /include/faabric/util/logging.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | // See spdlog source for definition of levels, e.g. SPDLOG_LEVEL_TRACE=0, 4 | // SPDLOG_LEVEL_DEBUG=1 5 | // https://github.com/gabime/spdlog/blob/v1.x/include/spdlog/common.h 6 | 7 | // Note that defining SPDLOG_ACTIVE_LEVEL sets the *minimum available* log 8 | // level, however, we must also programmatically set the logging level using 9 | // spdlog::set_level (or it will default to info). 10 | // 11 | // Defining this minimum level is to noop debug and trace logging statements 12 | // in a release build for performance reasons. 13 | 14 | #ifdef NDEBUG 15 | // Allow info and up in release build 16 | #define SPDLOG_ACTIVE_LEVEL 2 17 | #else 18 | // Allow all levels in debug build 19 | #define SPDLOG_ACTIVE_LEVEL 0 20 | #endif 21 | 22 | #include 23 | 24 | namespace faabric::util { 25 | void initLogging(); 26 | } 27 | -------------------------------------------------------------------------------- /include/faabric/util/macros.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | template 6 | uint8_t* BYTES(T* arr) 7 | { 8 | return reinterpret_cast(arr); 9 | } 10 | 11 | template 12 | const uint8_t* BYTES_CONST(const T* arr) 13 | { 14 | return reinterpret_cast(arr); 15 | } 16 | 17 | #define SLEEP_MS(ms) usleep((ms) * 1000) 18 | 19 | #define UNUSED(x) (void)(x) 20 | -------------------------------------------------------------------------------- /include/faabric/util/network.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #define LOCALHOST "127.0.0.1" 6 | 7 | namespace faabric::util { 8 | std::string getIPFromHostname(const std::string& hostname); 9 | 10 | std::string getPrimaryIPForThisHost(const std::string& interface); 11 | } 12 | -------------------------------------------------------------------------------- /include/faabric/util/ptp.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | namespace faabric::util { 7 | faabric::PointToPointMappings ptpMappingsFromSchedulingDecision( 8 | std::shared_ptr decision); 9 | } 10 | -------------------------------------------------------------------------------- /include/faabric/util/random.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | namespace faabric::util { 7 | std::string randomString(int len); 8 | 9 | std::string randomStringFromSet(const std::unordered_set& s); 10 | } 11 | -------------------------------------------------------------------------------- /include/faabric/util/state.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #define STATE_MASK_8 0b11111111 6 | #define STATE_MASK_32 0b11111111111111111111111111111111 7 | 8 | namespace faabric::util { 9 | std::string keyForUser(const std::string& user, const std::string& key); 10 | 11 | void maskDouble(unsigned int* maskArray, unsigned long idx); 12 | } 13 | -------------------------------------------------------------------------------- /include/faabric/util/string_tools.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | namespace faabric::util { 8 | bool isAllWhitespace(const std::string& input); 9 | 10 | bool startsWith(const std::string& input, const std::string& subStr); 11 | 12 | bool endsWith(const std::string& input, const std::string& subStr); 13 | 14 | bool contains(const std::string& input, const std::string& subStr); 15 | 16 | std::string removeSubstr(const std::string& input, const std::string& toErase); 17 | 18 | bool stringIsInt(const std::string& input); 19 | 20 | template 21 | std::string vectorToString(std::vector vec) 22 | { 23 | std::stringstream ss; 24 | 25 | ss << "["; 26 | for (int i = 0; i < vec.size(); i++) { 27 | ss << vec.at(i); 28 | 29 | if (i < vec.size() - 1) { 30 | ss << ", "; 31 | } 32 | } 33 | ss << "]"; 34 | 35 | return ss.str(); 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /include/faabric/util/testing.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | namespace faabric::util { 4 | void setTestMode(bool val); 5 | 6 | void setMockMode(bool val); 7 | 8 | bool isTestMode(); 9 | 10 | bool isMockMode(); 11 | } 12 | -------------------------------------------------------------------------------- /include/faabric/util/timing.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #ifdef TRACE_ALL 7 | #define PROF_BEGIN faabric::util::startGlobalTimer(); 8 | #define PROF_START(name) \ 9 | const faabric::util::TimePoint name = faabric::util::startTimer(); 10 | #define PROF_END(name) faabric::util::logEndTimer(#name, name); 11 | #define PROF_SUMMARY faabric::util::printTimerTotals(); 12 | #else 13 | #define PROF_BEGIN 14 | #define PROF_START(name) 15 | #define PROF_END(name) 16 | #define PROF_SUMMARY 17 | #endif 18 | 19 | namespace faabric::util { 20 | faabric::util::TimePoint startTimer(); 21 | 22 | long getTimeDiffNanos(const faabric::util::TimePoint& begin); 23 | 24 | long getTimeDiffMicros(const faabric::util::TimePoint& begin); 25 | 26 | double getTimeDiffMillis(const faabric::util::TimePoint& begin); 27 | 28 | void logEndTimer(const std::string& label, 29 | const faabric::util::TimePoint& begin); 30 | 31 | void startGlobalTimer(); 32 | 33 | void printTimerTotals(); 34 | 35 | uint64_t timespecToNanos(struct timespec* nativeTimespec); 36 | 37 | void nanosToTimespec(uint64_t nanos, struct timespec* nativeTimespec); 38 | } 39 | -------------------------------------------------------------------------------- /include/faabric/wasm/wasm.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | int helloFaabricWasm(); 4 | -------------------------------------------------------------------------------- /leak-sanitizer-ignorelist.txt: -------------------------------------------------------------------------------- 1 | # For local MPI messages we send malloc-ed pointers through in-memory queues, 2 | # what makes LSAN unhappy 3 | leak:MpiWorld::send 4 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.black] 2 | line-length = 79 3 | 4 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | black>=24.4.0 2 | breathe>=4.35.0 3 | flake8>=7.0.0 4 | invoke>=2.0.0 5 | myst_parser>=2.0.0 6 | PyGithub>=1.55 7 | sphinx>=7.3.6 8 | sphinx-rtd-theme>=2.0.0 9 | -------------------------------------------------------------------------------- /src/batch-scheduler/BatchScheduler.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | namespace faabric::batch_scheduler { 9 | 10 | // We have one static instance of the BatchScheduler globally. Note that the 11 | // BatchScheduler is stateless, so having one static instance is very much like 12 | // having a C++ interface 13 | static std::shared_ptr batchScheduler = nullptr; 14 | 15 | std::shared_ptr getBatchScheduler() 16 | { 17 | if (batchScheduler != nullptr) { 18 | return batchScheduler; 19 | } 20 | 21 | std::string mode = faabric::util::getSystemConfig().batchSchedulerMode; 22 | 23 | if (mode == "bin-pack") { 24 | batchScheduler = std::make_shared(); 25 | } else if (mode == "compact") { 26 | batchScheduler = std::make_shared(); 27 | } else if (mode == "spot") { 28 | batchScheduler = std::make_shared(); 29 | } else { 30 | SPDLOG_ERROR("Unrecognised batch scheduler mode: {}", mode); 31 | throw std::runtime_error("Unrecognised batch scheduler mode"); 32 | } 33 | 34 | return batchScheduler; 35 | } 36 | 37 | void resetBatchScheduler() 38 | { 39 | batchScheduler = nullptr; 40 | } 41 | 42 | void resetBatchScheduler(const std::string& newMode) 43 | { 44 | resetBatchScheduler(); 45 | 46 | faabric::util::getSystemConfig().batchSchedulerMode = newMode; 47 | 48 | getBatchScheduler(); 49 | } 50 | 51 | DecisionType BatchScheduler::getDecisionType( 52 | const InFlightReqs& inFlightReqs, 53 | std::shared_ptr req) 54 | { 55 | int appId = req->appid(); 56 | 57 | if (!inFlightReqs.contains(appId)) { 58 | return DecisionType::NEW; 59 | } 60 | 61 | if (req->type() == BatchExecuteRequest_BatchExecuteType_MIGRATION) { 62 | return DecisionType::DIST_CHANGE; 63 | } 64 | 65 | return DecisionType::SCALE_CHANGE; 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /src/batch-scheduler/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | faabric_lib(scheduling_util 2 | DecisionCache.cpp 3 | SchedulingDecision.cpp 4 | ) 5 | 6 | faabric_lib(batch_scheduler 7 | BatchScheduler.cpp 8 | BinPackScheduler.cpp 9 | CompactScheduler.cpp 10 | SpotScheduler.cpp 11 | ) 12 | 13 | target_link_libraries(batch_scheduler PRIVATE 14 | faabric::util 15 | faabric::scheduling_util 16 | ) 17 | -------------------------------------------------------------------------------- /src/endpoint/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | faabric_lib(endpoint 3 | FaabricEndpoint.cpp 4 | FaabricEndpointHandler.cpp 5 | ) 6 | 7 | target_link_libraries(endpoint PRIVATE faabric::util) 8 | -------------------------------------------------------------------------------- /src/executor/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | faabric_lib(executor 2 | Executor.cpp 3 | ExecutorContext.cpp 4 | ExecutorFactory.cpp 5 | ExecutorTask.cpp 6 | ) 7 | 8 | # FIXME: do we need all these deps here? 9 | target_link_libraries(executor PRIVATE 10 | faabric::scheduling_util 11 | faabric::snapshot 12 | faabric::state 13 | faabric::redis 14 | ) 15 | -------------------------------------------------------------------------------- /src/executor/ExecutorContext.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | namespace faabric::executor { 4 | 5 | static thread_local std::shared_ptr context = nullptr; 6 | 7 | ExecutorContext::ExecutorContext( 8 | Executor* executorIn, 9 | std::shared_ptr reqIn, 10 | int msgIdxIn) 11 | : executor(executorIn) 12 | , req(reqIn) 13 | , msgIdx(msgIdxIn) 14 | {} 15 | 16 | bool ExecutorContext::isSet() 17 | { 18 | return context != nullptr; 19 | } 20 | 21 | void ExecutorContext::set(Executor* executorIn, 22 | std::shared_ptr reqIn, 23 | int appIdxIn) 24 | { 25 | context = std::make_shared(executorIn, reqIn, appIdxIn); 26 | } 27 | 28 | void ExecutorContext::unset() 29 | { 30 | context = nullptr; 31 | } 32 | 33 | std::shared_ptr ExecutorContext::get() 34 | { 35 | if (context == nullptr) { 36 | SPDLOG_ERROR("No executor context set"); 37 | throw ExecutorContextException("No executor context set"); 38 | } 39 | return context; 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /src/executor/ExecutorFactory.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | namespace faabric::executor { 5 | 6 | static std::shared_ptr _factory; 7 | 8 | void ExecutorFactory::flushHost() 9 | { 10 | SPDLOG_WARN("Using default flush method"); 11 | } 12 | 13 | void setExecutorFactory(std::shared_ptr fac) 14 | { 15 | _factory = fac; 16 | } 17 | 18 | std::shared_ptr getExecutorFactory() 19 | { 20 | if (_factory == nullptr) { 21 | throw std::runtime_error("No executor factory set"); 22 | } 23 | 24 | return _factory; 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /src/executor/ExecutorTask.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | namespace faabric::executor { 4 | 5 | ExecutorTask::ExecutorTask(int messageIndexIn, 6 | std::shared_ptr reqIn) 7 | : req(std::move(reqIn)) 8 | , messageIndex(messageIndexIn) 9 | {} 10 | } 11 | -------------------------------------------------------------------------------- /src/flat/.gitignore: -------------------------------------------------------------------------------- 1 | # Flatbuffers/ gRPC output 2 | *.fb.h 3 | *.fb.cc 4 | faabric_generated.h 5 | -------------------------------------------------------------------------------- /src/flat/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------- 2 | # Flatbuffers 3 | # ---------------------------------------------- 4 | 5 | set(FB_HEADER "${CMAKE_CURRENT_LIST_DIR}/faabric_generated.h") 6 | set(FB_HEADER_COPIED "${FAABRIC_INCLUDE_DIR}/faabric/flat/faabric_generated.h") 7 | 8 | # flatc command to generate flatbuffers files 9 | add_custom_command( 10 | OUTPUT "${FB_HEADER}" 11 | DEPENDS faabric.fbs 12 | WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR} 13 | COMMAND ${CONAN_FLATBUFFERS_ROOT}/bin/flatc 14 | ARGS --cpp --gen-mutable --raw-binary faabric.fbs 15 | ) 16 | 17 | # Copy the generated headers into place 18 | add_custom_command( 19 | OUTPUT "${FB_HEADER_COPIED}" 20 | DEPENDS "${FB_HEADER}" 21 | COMMAND ${CMAKE_COMMAND} -E copy ${FB_HEADER} ${FB_HEADER_COPIED} 22 | ) 23 | 24 | add_custom_target( 25 | faabric_fbs_copied 26 | DEPENDS ${FB_HEADER_COPIED} 27 | ) 28 | 29 | # ---------------------------------------------- 30 | # Faabric wrapper library 31 | # ---------------------------------------------- 32 | 33 | faabric_lib(flat flat.cpp) 34 | add_dependencies(faabric_common_dependencies faabric_fbs_copied) 35 | -------------------------------------------------------------------------------- /src/flat/faabric.fbs: -------------------------------------------------------------------------------- 1 | table SnapshotMergeRegionRequest { 2 | offset:int; 3 | length:ulong; 4 | data_type:int; 5 | merge_op:int; 6 | } 7 | 8 | table SnapshotPushRequest { 9 | key:string; 10 | max_size:ulong; 11 | contents:[ubyte]; 12 | merge_regions:[SnapshotMergeRegionRequest]; 13 | } 14 | 15 | table SnapshotDeleteRequest { 16 | key:string; 17 | } 18 | 19 | table SnapshotDiffRequest { 20 | offset:int; 21 | data_type:int; 22 | merge_op:int; 23 | data:[ubyte]; 24 | } 25 | 26 | table SnapshotUpdateRequest { 27 | key:string; 28 | merge_regions:[SnapshotMergeRegionRequest]; 29 | diffs:[SnapshotDiffRequest]; 30 | } 31 | 32 | table ThreadResultRequest { 33 | app_id: int; 34 | message_id:int; 35 | return_value:int; 36 | key:string; 37 | diffs:[SnapshotDiffRequest]; 38 | } 39 | -------------------------------------------------------------------------------- /src/flat/flat.cpp: -------------------------------------------------------------------------------- 1 | // Stub to link together the module 2 | // https://stackoverflow.com/a/29518149/10662021 3 | -------------------------------------------------------------------------------- /src/mpi/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.13.0) 2 | project(faabricmpi) 3 | 4 | # ----------------------------------------------- 5 | # Standalone Faabric MPI library 6 | # ----------------------------------------------- 7 | 8 | set(FAABRICMPI_INCLUDE_DIR ${CMAKE_CURRENT_LIST_DIR}/../../include) 9 | set(FAABRICMPI_PUBLIC_HEADERS ${FAABRICMPI_INCLUDE_DIR}/faabric/mpi/mpi.h) 10 | 11 | add_library(faabricmpi mpi.cpp ${FAABRICMPI_PUBLIC_HEADERS}) 12 | target_include_directories(faabricmpi PUBLIC ${FAABRICMPI_INCLUDE_DIR}) 13 | add_library(faabric::faabricmpi ALIAS faabricmpi) 14 | 15 | set_target_properties(faabricmpi 16 | PROPERTIES PUBLIC_HEADER "${FAABRICMPI_PUBLIC_HEADERS}" 17 | ) 18 | 19 | if(CMAKE_SYSTEM_NAME STREQUAL "Wasm") 20 | message(STATUS "Faabric MPI WebAssembly build") 21 | set(LIB_DIRECTORY ${CMAKE_SYSROOT}/lib/wasm32-wasi) 22 | 23 | install(TARGETS faabricmpi 24 | ARCHIVE DESTINATION ${LIB_DIRECTORY} 25 | LIBRARY DESTINATION ${LIB_DIRECTORY} 26 | PUBLIC_HEADER DESTINATION ${CMAKE_SYSROOT}/include/faabric/mpi 27 | ) 28 | else() 29 | message(STATUS "Faabric MPI native build") 30 | install(TARGETS faabricmpi 31 | PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_PREFIX}/include/faabric/mpi 32 | ) 33 | endif() 34 | 35 | 36 | # ----------------------------------------------- 37 | # MPI Implementation for Faabric 38 | # ----------------------------------------------- 39 | 40 | if (NOT ("${CMAKE_PROJECT_NAME}" STREQUAL "faabricmpi")) 41 | faabric_lib(mpi 42 | MpiContext.cpp 43 | MpiMessage.cpp 44 | MpiWorld.cpp 45 | MpiWorldRegistry.cpp 46 | ) 47 | 48 | target_link_libraries(mpi PRIVATE 49 | faabric::scheduler 50 | faabric::transport_tcp 51 | faabric::faabricmpi 52 | ) 53 | endif () 54 | -------------------------------------------------------------------------------- /src/mpi/MpiContext.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | namespace faabric::mpi { 8 | MpiContext::MpiContext() 9 | : isMpi(false) 10 | , rank(-1) 11 | , worldId(-1) 12 | {} 13 | 14 | int MpiContext::createWorld(faabric::Message& msg) 15 | { 16 | 17 | if (msg.mpirank() > 0) { 18 | SPDLOG_ERROR("Attempting to initialise world for non-zero rank {}", 19 | msg.mpirank()); 20 | throw std::runtime_error("Initialising world on non-zero rank"); 21 | } 22 | 23 | worldId = (int)faabric::util::generateGid(); 24 | SPDLOG_DEBUG("Initialising world {}", worldId); 25 | 26 | // Create the MPI world 27 | MpiWorldRegistry& reg = getMpiWorldRegistry(); 28 | reg.createWorld(msg, worldId); 29 | 30 | // Set up this context 31 | isMpi = true; 32 | rank = 0; 33 | 34 | // Return the world id to store it in the original message 35 | return worldId; 36 | } 37 | 38 | void MpiContext::joinWorld(faabric::Message& msg) 39 | { 40 | if (!msg.ismpi()) { 41 | // Not an MPI call 42 | return; 43 | } 44 | 45 | isMpi = true; 46 | worldId = msg.mpiworldid(); 47 | rank = msg.mpirank(); 48 | 49 | // Register with the world 50 | MpiWorldRegistry& registry = getMpiWorldRegistry(); 51 | registry.getOrInitialiseWorld(msg); 52 | } 53 | 54 | bool MpiContext::getIsMpi() const 55 | { 56 | return isMpi; 57 | } 58 | 59 | int MpiContext::getRank() const 60 | { 61 | return rank; 62 | } 63 | 64 | int MpiContext::getWorldId() const 65 | { 66 | return worldId; 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /src/mpi/MpiMessage.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | namespace faabric::mpi { 9 | 10 | void parseMpiMsg(const std::vector& bytes, MpiMessage* msg) 11 | { 12 | assert(msg != nullptr); 13 | assert(bytes.size() >= sizeof(MpiMessage)); 14 | std::memcpy(msg, bytes.data(), sizeof(MpiMessage)); 15 | size_t thisPayloadSize = bytes.size() - sizeof(MpiMessage); 16 | assert(thisPayloadSize == payloadSize(*msg)); 17 | 18 | if (thisPayloadSize == 0) { 19 | msg->buffer = nullptr; 20 | return; 21 | } 22 | 23 | msg->buffer = faabric::util::malloc(thisPayloadSize); 24 | std::memcpy( 25 | msg->buffer, bytes.data() + sizeof(MpiMessage), thisPayloadSize); 26 | } 27 | 28 | void serializeMpiMsg(std::vector& buffer, const MpiMessage& msg) 29 | { 30 | std::memcpy(buffer.data(), &msg, sizeof(MpiMessage)); 31 | size_t payloadSz = payloadSize(msg); 32 | if (payloadSz > 0 && msg.buffer != nullptr) { 33 | std::memcpy(buffer.data() + sizeof(MpiMessage), msg.buffer, payloadSz); 34 | } 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /src/planner/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------- 2 | # Protobuf generation 3 | # ---------------------------------------------- 4 | 5 | set(PLANNER_PB_HEADER_COPIED "${FAABRIC_INCLUDE_DIR}/faabric/planner/planner.pb.h") 6 | 7 | protobuf_generate_cpp(PLANNER_PB_SRC PLANNER_PB_HEADER planner.proto) 8 | 9 | # Copy the generated headers into place 10 | add_custom_command( 11 | OUTPUT "${PLANNER_PB_HEADER_COPIED}" 12 | DEPENDS "${PLANNER_PB_HEADER}" 13 | COMMAND ${CMAKE_COMMAND} 14 | ARGS -E copy ${PLANNER_PB_HEADER} ${FAABRIC_INCLUDE_DIR}/faabric/planner/ 15 | ) 16 | 17 | add_custom_target( 18 | planner_pbh_copied 19 | DEPENDS ${PLANNER_PB_HEADER_COPIED} 20 | ) 21 | 22 | faabric_lib(planner 23 | Planner.cpp 24 | PlannerClient.cpp 25 | PlannerEndpointHandler.cpp 26 | PlannerServer.cpp 27 | ${PLANNER_PB_SRC} 28 | ) 29 | 30 | # Make sure the protobuf headers are built before we build anything else. 31 | # Otherwise some pre-processor checks will fail 32 | add_dependencies(faabric_common_dependencies planner_pbh_copied) 33 | 34 | target_link_libraries(planner PRIVATE 35 | faabric::batch_scheduler 36 | faabric::endpoint 37 | faabric::transport 38 | faabric::util 39 | ) 40 | 41 | add_executable(planner_server planner_server.cpp) 42 | target_link_libraries(planner_server PRIVATE faabric::faabric) 43 | 44 | add_executable(is_app_migratable is_app_migratable.cpp) 45 | target_link_libraries(is_app_migratable PRIVATE faabric::faabric) 46 | -------------------------------------------------------------------------------- /src/planner/planner_server.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | int main() 10 | { 11 | // Initialise logging 12 | faabric::util::initLogging(); 13 | 14 | // Initialise crash handler 15 | faabric::util::setUpCrashHandler(); 16 | 17 | // Start both the planner server and the planner http endpoint 18 | SPDLOG_INFO("Starting planner server"); 19 | faabric::planner::PlannerServer plannerServer; 20 | // The RPC server starts in the background 21 | plannerServer.start(); 22 | 23 | // Start also a snapshot server to synchronise snapshots 24 | SPDLOG_INFO("Starting planner snapshot server"); 25 | faabric::snapshot::SnapshotServer snapshotServer; 26 | snapshotServer.start(); 27 | 28 | // The faabric endpoint starts in the foreground 29 | SPDLOG_INFO("Starting planner endpoint"); 30 | // We get the port from the global config, but the number of threads from 31 | // the planner config 32 | faabric::endpoint::FaabricEndpoint endpoint( 33 | faabric::util::getSystemConfig().plannerPort, 34 | faabric::planner::getPlanner().getConfig().numthreadshttpserver(), 35 | std::make_shared()); 36 | endpoint.start(faabric::endpoint::EndpointMode::SIGNAL); 37 | 38 | SPDLOG_INFO("Planner snapshot server shutting down"); 39 | snapshotServer.stop(); 40 | 41 | SPDLOG_INFO("Planner server shutting down"); 42 | plannerServer.stop(); 43 | } 44 | -------------------------------------------------------------------------------- /src/proto/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------- 2 | # Protobuf/gRPC generation 3 | # ---------------------------------------------- 4 | 5 | set(PB_HEADER_COPIED "${FAABRIC_INCLUDE_DIR}/faabric/proto/faabric.pb.h") 6 | 7 | protobuf_generate_cpp(PB_SRC PB_HEADER faabric.proto) 8 | 9 | # Copy the generated headers into place 10 | add_custom_command( 11 | OUTPUT "${PB_HEADER_COPIED}" 12 | DEPENDS "${PB_HEADER}" 13 | COMMAND ${CMAKE_COMMAND} 14 | ARGS -E copy ${PB_HEADER} ${FAABRIC_INCLUDE_DIR}/faabric/proto/ 15 | ) 16 | 17 | add_custom_target( 18 | faabric_pbh_copied 19 | DEPENDS ${PB_HEADER_COPIED} 20 | ) 21 | 22 | # ---------------------------------------------- 23 | # Faabric wrapper library 24 | # ---------------------------------------------- 25 | 26 | faabric_lib(proto ${PB_SRC}) 27 | add_dependencies(faabric_common_dependencies faabric_pbh_copied) 28 | target_include_directories(proto_deps INTERFACE ${CMAKE_CURRENT_BINARY_DIR}) 29 | -------------------------------------------------------------------------------- /src/redis/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | faabric_lib(redis Redis.cpp) 3 | 4 | target_link_libraries(redis PRIVATE faabric::util) 5 | -------------------------------------------------------------------------------- /src/runner/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | faabric_lib(runner FaabricMain.cpp) 3 | 4 | target_link_libraries(runner PRIVATE 5 | faabric::planner 6 | faabric::scheduler 7 | faabric::util 8 | faabric::endpoint 9 | ) 10 | -------------------------------------------------------------------------------- /src/scheduler/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | faabric_lib(scheduler 2 | FunctionCallClient.cpp 3 | FunctionCallServer.cpp 4 | Scheduler.cpp 5 | ) 6 | 7 | # FIXME: do we need all these deps here? 8 | target_link_libraries(scheduler PRIVATE 9 | faabric::scheduling_util 10 | faabric::snapshot 11 | faabric::state 12 | faabric::redis 13 | ) 14 | -------------------------------------------------------------------------------- /src/snapshot/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | faabric_lib(snapshot 3 | SnapshotClient.cpp 4 | SnapshotRegistry.cpp 5 | SnapshotServer.cpp 6 | ) 7 | 8 | target_link_libraries(snapshot PRIVATE 9 | faabric::proto 10 | faabric::flat 11 | faabric::transport 12 | faabric::util 13 | ) 14 | -------------------------------------------------------------------------------- /src/state/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | faabric_lib(state 3 | InMemoryStateKeyValue.cpp 4 | InMemoryStateRegistry.cpp 5 | State.cpp 6 | StateClient.cpp 7 | StateKeyValue.cpp 8 | StateServer.cpp 9 | RedisStateKeyValue.cpp 10 | ) 11 | 12 | target_link_libraries(state PRIVATE 13 | faabric::proto 14 | faabric::redis 15 | faabric::transport 16 | ) 17 | -------------------------------------------------------------------------------- /src/transport/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------- 2 | # Faabric transport library: NNG or raw TCP 3 | # ---------------------------------------------- 4 | 5 | # Raw TCP sockets 6 | add_subdirectory(tcp) 7 | 8 | # NNG sockets 9 | faabric_lib(transport 10 | Message.cpp 11 | MessageEndpoint.cpp 12 | MessageEndpointClient.cpp 13 | MessageEndpointServer.cpp 14 | # TODO: move to src/ptp 15 | PointToPointBroker.cpp 16 | PointToPointClient.cpp 17 | PointToPointServer.cpp 18 | ) 19 | 20 | target_link_libraries(transport PRIVATE 21 | faabric::transport_tcp 22 | faabric::util 23 | faabric::proto 24 | faabric::scheduling_util 25 | ) 26 | -------------------------------------------------------------------------------- /src/transport/Message.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | namespace faabric::transport { 7 | 8 | Message::Message(size_t bufferSize) 9 | { 10 | if (int ec = nng_msg_alloc(&nngMsg, bufferSize); ec != 0) { 11 | SPDLOG_CRITICAL("Error allocating a message of size {}: {}", 12 | bufferSize, 13 | nng_strerror(ec)); 14 | throw std::bad_alloc(); 15 | } 16 | } 17 | 18 | Message::Message(nng_msg* nngMsg) 19 | : nngMsg(nngMsg) 20 | {} 21 | 22 | Message::Message(MessageResponseCode responseCodeIn) 23 | : responseCode(responseCodeIn) 24 | {} 25 | 26 | Message::~Message() 27 | { 28 | if (nngMsg != nullptr) { 29 | nng_msg_free(nngMsg); 30 | nngMsg = nullptr; 31 | } 32 | } 33 | 34 | std::span Message::data() 35 | { 36 | auto udat = udata(); 37 | return std::span(reinterpret_cast(udat.data()), udat.size_bytes()); 38 | } 39 | 40 | std::span Message::data() const 41 | { 42 | auto udat = udata(); 43 | return std::span(reinterpret_cast(udat.data()), 44 | udat.size_bytes()); 45 | } 46 | 47 | std::span Message::udata() 48 | { 49 | return allData().size() < HEADER_MSG_SIZE 50 | ? std::span() 51 | : allData().subspan(HEADER_MSG_SIZE); 52 | } 53 | 54 | std::span Message::udata() const 55 | { 56 | return allData().size() < HEADER_MSG_SIZE 57 | ? std::span() 58 | : allData().subspan(HEADER_MSG_SIZE); 59 | } 60 | 61 | std::vector Message::dataCopy() const 62 | { 63 | return std::vector(udata().begin(), udata().end()); 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /src/transport/tcp/Address.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | namespace faabric::transport::tcp { 9 | Address::Address(const std::string& host, int port) 10 | { 11 | std::memset(&addr, 0, sizeof(sockaddr_in)); 12 | addr.sin_family = AF_INET; 13 | addr.sin_port = htons(port); 14 | if (host != ANY_HOST) { 15 | inet_pton(AF_INET, host.c_str(), &addr.sin_addr); 16 | } 17 | } 18 | 19 | Address::Address(int port) 20 | : Address(ANY_HOST, port){}; 21 | 22 | sockaddr* Address::get() const 23 | { 24 | return (sockaddr*)&addr; 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /src/transport/tcp/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | faabric_lib(transport_tcp 2 | Address.cpp 3 | RecvSocket.cpp 4 | SendSocket.cpp 5 | Socket.cpp 6 | SocketOptions.cpp 7 | ) 8 | -------------------------------------------------------------------------------- /src/transport/tcp/Socket.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | 7 | namespace faabric::transport::tcp { 8 | Socket::Socket() 9 | : connFd(socket(AF_INET, SOCK_STREAM, 0)) 10 | { 11 | if (get() <= 0) { 12 | SPDLOG_ERROR("Failed to create TCP socket: {}", std::strerror(errno)); 13 | throw std::runtime_error("Failed to create TCP socket"); 14 | } 15 | } 16 | 17 | Socket::Socket(int conn) 18 | : connFd(conn) 19 | {} 20 | 21 | Socket::~Socket() 22 | { 23 | ::close(connFd); 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /src/util/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | faabric_lib(util 3 | ExecGraph.cpp 4 | PeriodicBackgroundThread.cpp 5 | barrier.cpp 6 | batch.cpp 7 | bytes.cpp 8 | config.cpp 9 | clock.cpp 10 | crash.cpp 11 | delta.cpp 12 | dirty.cpp 13 | environment.cpp 14 | files.cpp 15 | func.cpp 16 | gids.cpp 17 | hwloc.cpp 18 | json.cpp 19 | latch.cpp 20 | locks.cpp 21 | logging.cpp 22 | memory.cpp 23 | network.cpp 24 | ptp.cpp 25 | queue.cpp 26 | random.cpp 27 | snapshot.cpp 28 | state.cpp 29 | string_tools.cpp 30 | timing.cpp 31 | testing.cpp 32 | ) 33 | -------------------------------------------------------------------------------- /src/util/PeriodicBackgroundThread.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | namespace faabric::util { 6 | 7 | void PeriodicBackgroundThread::start(int intervalSecondsIn) 8 | { 9 | intervalSeconds = intervalSecondsIn; 10 | if (intervalSecondsIn <= 0) { 11 | SPDLOG_DEBUG("Skipping starting periodic background thread"); 12 | return; 13 | } 14 | 15 | SPDLOG_DEBUG("Starting periodic background thread with interval {}s", 16 | intervalSeconds); 17 | 18 | workThread = std::make_unique([&](std::stop_token st) { 19 | while (!st.stop_requested()) { 20 | faabric::util::UniqueLock lock(mx); 21 | if (st.stop_requested()) { 22 | break; 23 | } 24 | 25 | bool isStopped = timeoutCv.wait_for( 26 | lock, 27 | st, 28 | std::chrono::milliseconds(intervalSeconds * 1000), 29 | [&st] { return st.stop_requested(); }); 30 | 31 | // If we hit the timeout it means we have not been notified to 32 | // stop. Thus we can do work 33 | if (!isStopped) { 34 | doWork(); 35 | } 36 | }; 37 | 38 | SPDLOG_DEBUG("Exiting periodic background thread"); 39 | }); 40 | } 41 | 42 | void PeriodicBackgroundThread::tidyUp() 43 | { 44 | // Hook for subclasses 45 | } 46 | 47 | void PeriodicBackgroundThread::stop() 48 | { 49 | if (workThread == nullptr) { 50 | return; 51 | } 52 | 53 | SPDLOG_TRACE("Stopping periodic background thread"); 54 | 55 | workThread->request_stop(); 56 | timeoutCv.notify_one(); 57 | 58 | // Join to make sure no background tasks are running 59 | if (workThread->joinable()) { 60 | workThread->join(); 61 | } 62 | 63 | // Hook into tidy up function 64 | tidyUp(); 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /src/util/barrier.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | namespace faabric::util { 6 | 7 | std::shared_ptr Barrier::create( 8 | int count, 9 | std::function completionFunctionIn, 10 | int timeoutMs) 11 | { 12 | return std::make_shared(count, completionFunctionIn, timeoutMs); 13 | } 14 | 15 | std::shared_ptr Barrier::create(int count, int timeoutMs) 16 | { 17 | return std::make_shared( 18 | count, []() {}, timeoutMs); 19 | } 20 | 21 | Barrier::Barrier(int countIn, 22 | std::function completionFunctionIn, 23 | int timeoutMsIn) 24 | : count(countIn) 25 | , completionFunction(completionFunctionIn) 26 | , timeoutMs(timeoutMsIn) 27 | {} 28 | 29 | void Barrier::wait() 30 | { 31 | UniqueLock lock(mx); 32 | 33 | visits++; 34 | int phaseCompletionVisits = currentPhase * count; 35 | 36 | if (visits == phaseCompletionVisits) { 37 | completionFunction(); 38 | currentPhase++; 39 | cv.notify_all(); 40 | } else { 41 | auto timePoint = std::chrono::system_clock::now() + 42 | std::chrono::milliseconds(timeoutMs); 43 | 44 | if (!cv.wait_until(lock, timePoint, [this, phaseCompletionVisits] { 45 | return visits >= phaseCompletionVisits; 46 | })) { 47 | std::string msg = 48 | fmt::format("Barrier timed out ({}ms)", timeoutMs); 49 | SPDLOG_ERROR(msg); 50 | throw std::runtime_error("Barrier timed out"); 51 | } 52 | } 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /src/util/clock.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | namespace faabric::util { 4 | Clock& getGlobalClock() 5 | { 6 | static Clock instance; 7 | return instance; 8 | } 9 | 10 | Clock::Clock() = default; 11 | 12 | const TimePoint Clock::now() 13 | { 14 | return std::chrono::steady_clock::now(); 15 | } 16 | 17 | const long Clock::epochMillis() 18 | { 19 | long millis = std::chrono::duration_cast( 20 | std::chrono::system_clock::now().time_since_epoch()) 21 | .count(); 22 | 23 | return millis; 24 | } 25 | 26 | const long Clock::timeDiff(const TimePoint& t1, const TimePoint& t2) 27 | { 28 | long age = 29 | std::chrono::duration_cast(t1 - t2).count(); 30 | return age; 31 | } 32 | 33 | const long Clock::timeDiffMicro(const TimePoint& t1, const TimePoint& t2) 34 | { 35 | long age = 36 | std::chrono::duration_cast(t1 - t2).count(); 37 | return age; 38 | } 39 | 40 | const long Clock::timeDiffNano(const TimePoint& t1, const TimePoint& t2) 41 | { 42 | long age = 43 | std::chrono::duration_cast(t1 - t2).count(); 44 | return age; 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /src/util/crash.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | const std::string_view ABORT_MSG = "Caught stack backtrace:\n"; 13 | constexpr int TEST_SIGNAL = 12341234; 14 | 15 | // Must be async-signal-safe - don't call allocating functions 16 | void crashHandler(int sig) noexcept 17 | { 18 | faabric::util::handleCrash(sig); 19 | } 20 | 21 | namespace faabric::util { 22 | 23 | void handleCrash(int sig) 24 | { 25 | std::array stackPtrs; 26 | size_t filledStacks = backtrace(stackPtrs.data(), stackPtrs.size()); 27 | if (sig != TEST_SIGNAL) { 28 | write(STDERR_FILENO, ABORT_MSG.data(), ABORT_MSG.size()); 29 | } 30 | backtrace_symbols_fd(stackPtrs.data(), 31 | std::min(filledStacks, stackPtrs.size()), 32 | STDERR_FILENO); 33 | if (sig != TEST_SIGNAL) { 34 | signal(sig, SIG_DFL); 35 | raise(sig); 36 | exit(1); 37 | } 38 | } 39 | 40 | void setUpCrashHandler(int sig) 41 | { 42 | std::vector sigs; 43 | if (sig >= 0) { 44 | sigs = { sig }; 45 | } else { 46 | fputs("Testing crash handler backtrace:\n", stderr); 47 | fflush(stderr); 48 | crashHandler(TEST_SIGNAL); 49 | SPDLOG_INFO("Installing crash handler"); 50 | 51 | // We don't handle SIGSEGV here because segfault handling is 52 | // necessary for dirty tracking and if this handler gets initialised 53 | // after the one for dirty tracking it thinks legitimate dirty tracking 54 | // segfaults are crashes 55 | sigs = { SIGABRT, SIGILL, SIGFPE }; 56 | } 57 | 58 | for (auto signo : sigs) { 59 | if (signal(signo, &crashHandler) == SIG_ERR) { 60 | SPDLOG_WARN("Couldn't install handler for signal {}", signo); 61 | } else { 62 | SPDLOG_INFO("Installed handler for signal {}", signo); 63 | } 64 | } 65 | } 66 | 67 | } 68 | -------------------------------------------------------------------------------- /src/util/environment.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | 7 | namespace faabric::util { 8 | std::string getEnvVar(std::string const& key, std::string const& deflt) 9 | { 10 | char const* val = getenv(key.c_str()); 11 | 12 | if (val == nullptr) { 13 | return deflt; 14 | } 15 | 16 | std::string retVal(val); 17 | 18 | if (retVal.length() == 0) { 19 | return deflt; 20 | } else { 21 | return retVal; 22 | } 23 | } 24 | 25 | std::string setEnvVar(const std::string& varName, const std::string& value) 26 | { 27 | std::string original = getEnvVar(varName, ""); 28 | setenv(varName.c_str(), value.c_str(), 1); 29 | 30 | return original; 31 | } 32 | 33 | void unsetEnvVar(const std::string& varName) 34 | { 35 | unsetenv(varName.c_str()); 36 | } 37 | 38 | unsigned int getUsableCores() 39 | { 40 | auto conf = faabric::util::getSystemConfig(); 41 | unsigned int nCores; 42 | 43 | if (conf.overrideCpuCount == 0) { 44 | nCores = std::jthread::hardware_concurrency(); 45 | } else { 46 | nCores = conf.overrideCpuCount; 47 | } 48 | 49 | // Returns zero when unable to detect 50 | if (nCores == 0) { 51 | throw std::runtime_error("Unable to detect number of cores"); 52 | } 53 | 54 | return nCores; 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /src/util/gids.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | 9 | static std::atomic_int counter = 0; 10 | static std::atomic_size_t gidKeyHash = 0; 11 | static std::mutex gidMx; 12 | 13 | #define GID_LEN 20 14 | 15 | namespace faabric::util { 16 | unsigned int generateGid() 17 | { 18 | if (gidKeyHash.load(std::memory_order_relaxed) == 0) { 19 | faabric::util::UniqueLock lock(gidMx); 20 | if (gidKeyHash == 0) { 21 | // Generate random hash 22 | std::string gidKey = faabric::util::randomString(GID_LEN); 23 | gidKeyHash = std::hash{}(gidKey); 24 | } 25 | } 26 | 27 | unsigned int intHash = 28 | gidKeyHash.load(std::memory_order_relaxed) % INT32_MAX; 29 | unsigned int result = intHash + counter.fetch_add(1); 30 | if (result) { 31 | return result; 32 | } else { 33 | return intHash + counter.fetch_add(1); 34 | } 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /src/util/json.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include 6 | 7 | namespace faabric::util { 8 | std::string messageToJson(const google::protobuf::Message& msg) 9 | { 10 | std::string jsonStr; 11 | 12 | // Set the JSON print options. This is very important to ensure backwards- 13 | // compatibility with clients sending HTTP requests to faabric 14 | google::protobuf::util::JsonOptions jsonOptions; 15 | jsonOptions.always_print_enums_as_ints = true; 16 | 17 | google::protobuf::util::Status status = 18 | google::protobuf::util::MessageToJsonString(msg, &jsonStr, jsonOptions); 19 | if (!status.ok()) { 20 | SPDLOG_ERROR("Serialising JSON string to protobuf message: {}", 21 | status.message().data()); 22 | throw faabric::util::JsonSerialisationException( 23 | status.message().data()); 24 | } 25 | 26 | return jsonStr; 27 | } 28 | 29 | void jsonToMessage(const std::string& jsonStr, google::protobuf::Message* msg) 30 | { 31 | google::protobuf::util::Status status = 32 | google::protobuf::util::JsonStringToMessage(jsonStr, msg); 33 | if (!status.ok()) { 34 | SPDLOG_ERROR("Deserialising JSON string to protobuf message: {}", 35 | status.message().data()); 36 | throw faabric::util::JsonSerialisationException( 37 | status.message().data()); 38 | } 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /src/util/latch.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | namespace faabric::util { 6 | 7 | std::shared_ptr Latch::create(int count, int timeoutMs) 8 | { 9 | return std::make_shared(count, timeoutMs); 10 | } 11 | 12 | Latch::Latch(int countIn, int timeoutMsIn) 13 | : count(countIn) 14 | , timeoutMs(timeoutMsIn) 15 | {} 16 | 17 | void Latch::wait() 18 | { 19 | // Keep the this shared_ptr alive to prevent heap-use-after-free 20 | std::shared_ptr _keepMeAlive = shared_from_this(); 21 | UniqueLock lock(mx); 22 | 23 | waiters++; 24 | 25 | if (waiters > count) { 26 | SPDLOG_ERROR("Latch already used: {} > {}", waiters, count); 27 | throw std::runtime_error("Latch already used"); 28 | } 29 | 30 | if (waiters == count) { 31 | cv.notify_all(); 32 | } else { 33 | auto timePoint = std::chrono::system_clock::now() + 34 | std::chrono::milliseconds(timeoutMs); 35 | 36 | if (!cv.wait_until(lock, timePoint, [&] { return waiters >= count; })) { 37 | throw std::runtime_error("Latch timed out"); 38 | } 39 | } 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /src/util/locks.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | namespace faabric::util { 4 | 5 | FlagWaiter::FlagWaiter(int timeoutMsIn) 6 | : timeoutMs(timeoutMsIn) 7 | {} 8 | 9 | void FlagWaiter::waitOnFlag() 10 | { 11 | // Keep the this shared_ptr alive to prevent heap-use-after-free 12 | std::shared_ptr _keepMeAlive = shared_from_this(); 13 | // Check 14 | if (flag.load()) { 15 | return; 16 | } 17 | 18 | // Wait for flag to be set 19 | UniqueLock lock(flagMx); 20 | if (!cv.wait_for(lock, std::chrono::milliseconds(timeoutMs), [this] { 21 | return flag.load(); 22 | })) { 23 | 24 | SPDLOG_ERROR("Timed out waiting for flag"); 25 | throw std::runtime_error("Timed out waiting for flag"); 26 | } 27 | } 28 | 29 | void FlagWaiter::setFlag(bool value) 30 | { 31 | // Keep the this shared_ptr alive to prevent heap-use-after-free 32 | std::shared_ptr _keepMeAlive = shared_from_this(); 33 | UniqueLock lock(flagMx); 34 | flag.store(value); 35 | cv.notify_all(); 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /src/util/logging.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | namespace faabric::util { 5 | 6 | void initLogging() 7 | { 8 | // Docs: https://github.com/gabime/spdlog/wiki/3.-Custom-formatting 9 | spdlog::set_pattern("%^[%H:%M:%S] [%t] [%L]%$ %v"); 10 | 11 | // Set the current level for the level for the default logger (note that the 12 | // minimum log level is determined in the header). 13 | const SystemConfig& conf = getSystemConfig(); 14 | if (conf.logLevel == "trace") { 15 | spdlog::set_level(spdlog::level::trace); 16 | 17 | // Check the minimum level permits trace logging 18 | #if SPDLOG_ACTIVE_LEVEL > SPDLOG_LEVEL_TRACE 19 | SPDLOG_WARN( 20 | "Logging set to trace but minimum log level set too high ({})", 21 | SPDLOG_ACTIVE_LEVEL); 22 | #endif 23 | } else if (conf.logLevel == "debug") { 24 | spdlog::set_level(spdlog::level::debug); 25 | 26 | // Check the minimum level permits debug logging 27 | #if SPDLOG_ACTIVE_LEVEL > SPDLOG_LEVEL_DEBUG 28 | SPDLOG_WARN( 29 | "Logging set to debug but minimum log level set too high ({})", 30 | SPDLOG_ACTIVE_LEVEL); 31 | #endif 32 | } else { 33 | spdlog::set_level(spdlog::level::info); 34 | } 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /src/util/ptp.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | namespace faabric::util { 4 | faabric::PointToPointMappings ptpMappingsFromSchedulingDecision( 5 | std::shared_ptr decision) 6 | { 7 | faabric::PointToPointMappings mappings; 8 | mappings.set_appid(decision->appId); 9 | mappings.set_groupid(decision->groupId); 10 | for (int i = 0; i < decision->hosts.size(); i++) { 11 | auto* mapping = mappings.add_mappings(); 12 | mapping->set_host(decision->hosts.at(i)); 13 | mapping->set_messageid(decision->messageIds.at(i)); 14 | mapping->set_appidx(decision->appIdxs.at(i)); 15 | mapping->set_groupidx(decision->groupIdxs.at(i)); 16 | } 17 | 18 | return mappings; 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/util/queue.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | namespace faabric::util { 4 | TokenPool::TokenPool(int nTokens) 5 | : _size(nTokens) 6 | { 7 | // Initialise all tokens as available 8 | for (int i = 0; i < nTokens; i++) { 9 | this->queue.enqueue(i); 10 | } 11 | }; 12 | 13 | int TokenPool::size() 14 | { 15 | return _size; 16 | } 17 | 18 | int TokenPool::taken() 19 | { 20 | return _size - ((int)queue.size()); 21 | } 22 | 23 | int TokenPool::free() 24 | { 25 | return (int)queue.size(); 26 | } 27 | 28 | /** 29 | * Blocking call to get an available token 30 | */ 31 | int TokenPool::getToken() 32 | { 33 | if (_size == 0) { 34 | return -1; 35 | } 36 | 37 | return queue.dequeue(); 38 | } 39 | 40 | void TokenPool::releaseToken(int token) 41 | { 42 | queue.enqueue(token); 43 | } 44 | 45 | void TokenPool::reset() 46 | { 47 | queue.reset(); 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /src/util/random.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | namespace faabric::util { 6 | std::string randomString(int len) 7 | { 8 | std::string result(len, '\0'); 9 | 10 | static const char alphanum[] = "123456789" 11 | "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 12 | "abcdefghijklmnopqrstuvwxyz"; 13 | 14 | static std::random_device rd; 15 | static std::mt19937 rng(rd()); 16 | 17 | // Range cannot include last element of alphanum array as this is a null 18 | // terminator 19 | std::uniform_int_distribution uni(0, sizeof(alphanum) - 2); 20 | 21 | for (int i = 0; i < len; ++i) { 22 | int r = uni(rng); 23 | result[i] = alphanum[r]; 24 | } 25 | 26 | return result; 27 | } 28 | 29 | std::string randomStringFromSet(const std::unordered_set& s) 30 | { 31 | static std::random_device rd; 32 | static std::mt19937 rng(rd()); 33 | 34 | unsigned long nElems = s.size(); 35 | if (nElems == 0) { 36 | return ""; 37 | } 38 | 39 | std::uniform_int_distribution uni(0, nElems - 1); 40 | int random_integer = uni(rng); 41 | auto it = s.begin(); 42 | std::advance(it, random_integer); 43 | 44 | return *it; 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /src/util/state.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | namespace faabric::util { 5 | std::string keyForUser(const std::string& user, const std::string& key) 6 | { 7 | if (user.empty() || key.empty()) { 8 | throw std::runtime_error( 9 | fmt::format("Cannot have empty user or key ({}/{})", user, key)); 10 | } 11 | 12 | std::string fullKey = user + "_" + key; 13 | 14 | return fullKey; 15 | } 16 | 17 | void maskDouble(unsigned int* maskArray, unsigned long idx) 18 | { 19 | // We assume int is half size of double 20 | unsigned long intIdx = 2 * idx; 21 | maskArray[intIdx] |= STATE_MASK_32; 22 | maskArray[intIdx + 1] |= STATE_MASK_32; 23 | } 24 | 25 | } 26 | -------------------------------------------------------------------------------- /src/util/string_tools.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | namespace faabric::util { 7 | 8 | bool isAllWhitespace(const std::string& input) 9 | { 10 | return std::all_of(input.begin(), input.end(), isspace); 11 | } 12 | 13 | bool startsWith(const std::string& input, const std::string& subStr) 14 | { 15 | if (subStr.empty()) { 16 | return false; 17 | } 18 | 19 | return input.rfind(subStr, 0) == 0; 20 | } 21 | 22 | bool endsWith(std::string const& value, std::string const& ending) 23 | { 24 | if (ending.empty()) { 25 | return false; 26 | } else if (ending.size() > value.size()) { 27 | return false; 28 | } 29 | return std::equal(ending.rbegin(), ending.rend(), value.rbegin()); 30 | } 31 | 32 | bool contains(const std::string& input, const std::string& subStr) 33 | { 34 | if (input.find(subStr) != std::string::npos) { 35 | return true; 36 | } else { 37 | return false; 38 | } 39 | } 40 | 41 | std::string removeSubstr(const std::string& input, const std::string& toErase) 42 | { 43 | std::string output = input; 44 | 45 | size_t pos = output.find(toErase); 46 | 47 | if (pos != std::string::npos) { 48 | output.erase(pos, toErase.length()); 49 | } 50 | 51 | return output; 52 | } 53 | 54 | bool stringIsInt(const std::string& input) 55 | { 56 | return !input.empty() && 57 | input.find_first_not_of("0123456789") == std::string::npos; 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /src/util/testing.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | namespace faabric::util { 6 | static std::atomic testMode = false; 7 | static std::atomic mockMode = false; 8 | 9 | void setTestMode(bool val) 10 | { 11 | testMode.store(val, std::memory_order_release); 12 | } 13 | 14 | bool isTestMode() 15 | { 16 | return testMode.load(std::memory_order_acquire); 17 | } 18 | void setMockMode(bool val) 19 | { 20 | mockMode.store(val, std::memory_order_release); 21 | } 22 | 23 | bool isMockMode() 24 | { 25 | return mockMode.load(std::memory_order_acquire); 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /tasks/__init__.py: -------------------------------------------------------------------------------- 1 | from invoke import Collection 2 | 3 | from . import call 4 | from . import dev 5 | from . import docker 6 | from . import docs 7 | from . import examples 8 | from . import git 9 | from . import format_code 10 | from . import tests 11 | 12 | ns = Collection( 13 | call, 14 | dev, 15 | docker, 16 | docs, 17 | examples, 18 | git, 19 | format_code, 20 | tests, 21 | ) 22 | -------------------------------------------------------------------------------- /tasks/call.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from invoke import task 3 | 4 | 5 | @task 6 | def call(ctx, user, func, data=None, host="localhost"): 7 | """ 8 | Calls a function using the HTTP API 9 | """ 10 | msg = { 11 | "user": user, 12 | "function": func, 13 | } 14 | 15 | if data: 16 | msg["input"] = data 17 | 18 | url = "http://{}:8080/f/{}/{}".format(host, user, func) 19 | response = requests.post(url, json=msg) 20 | 21 | if response.status_code >= 400: 22 | print("Request failed: status = {}".format(response.status_code)) 23 | exit(1) 24 | 25 | print(response.text) 26 | -------------------------------------------------------------------------------- /tasks/docs.py: -------------------------------------------------------------------------------- 1 | from os import makedirs 2 | from os.path import join 3 | from subprocess import run 4 | 5 | from tasks.util.env import PROJ_ROOT 6 | 7 | from invoke import task 8 | 9 | DOCS_DIR = join(PROJ_ROOT, "docs") 10 | SPHINX_OUT_DIR = join(DOCS_DIR, "sphinx") 11 | 12 | 13 | @task(default=True) 14 | def generate(ctx): 15 | makedirs(SPHINX_OUT_DIR, exist_ok=True) 16 | 17 | run( 18 | "sphinx-build -b html {} {}".format(DOCS_DIR, SPHINX_OUT_DIR), 19 | cwd=DOCS_DIR, 20 | check=True, 21 | shell=True, 22 | ) 23 | -------------------------------------------------------------------------------- /tasks/examples.py: -------------------------------------------------------------------------------- 1 | from copy import copy 2 | from invoke import task 3 | from os import makedirs, environ 4 | from os.path import join, exists 5 | from shutil import rmtree 6 | from subprocess import run 7 | from tasks.util.env import ( 8 | FAABRIC_INSTALL_PREFIX, 9 | LLVM_VERSION_MAJOR, 10 | PROJ_ROOT, 11 | ) 12 | 13 | 14 | EXAMPLES_DIR = join(PROJ_ROOT, "examples") 15 | BUILD_DIR = join(EXAMPLES_DIR, "build") 16 | INCLUDE_DIR = "{}/include".format(FAABRIC_INSTALL_PREFIX) 17 | LIB_DIR = "{}/lib".format(FAABRIC_INSTALL_PREFIX) 18 | 19 | 20 | @task(default=True) 21 | def build(ctx, clean=False): 22 | """ 23 | Builds the examples 24 | """ 25 | if clean and exists(BUILD_DIR): 26 | rmtree(BUILD_DIR) 27 | 28 | if not exists(BUILD_DIR): 29 | makedirs(BUILD_DIR) 30 | 31 | # Cmake 32 | cmake_cmd = " ".join( 33 | [ 34 | "cmake", 35 | "-GNinja", 36 | "-DCMAKE_BUILD_TYPE=Release", 37 | "-DCMAKE_CXX_FLAGS=-I{}".format(INCLUDE_DIR), 38 | "-DCMAKE_EXE_LINKER_FLAGS=-L{}".format(LIB_DIR), 39 | "-DCMAKE_CXX_COMPILER=/usr/bin/clang++-{}".format( 40 | LLVM_VERSION_MAJOR 41 | ), 42 | "-DCMAKE_C_COMPILER=/usr/bin/clang-{}".format(LLVM_VERSION_MAJOR), 43 | EXAMPLES_DIR, 44 | ] 45 | ) 46 | print(cmake_cmd) 47 | 48 | run( 49 | cmake_cmd, 50 | cwd=BUILD_DIR, 51 | shell=True, 52 | check=True, 53 | ) 54 | 55 | # Build 56 | run( 57 | "cmake --build . --target all_examples", 58 | cwd=BUILD_DIR, 59 | shell=True, 60 | check=True, 61 | ) 62 | 63 | 64 | @task 65 | def execute(ctx, example): 66 | """ 67 | Runs the given example 68 | """ 69 | exe_path = join(BUILD_DIR, example) 70 | 71 | if not exists(exe_path): 72 | raise RuntimeError("Did not find {} as expected".format(exe_path)) 73 | 74 | shell_env = copy(environ) 75 | shell_env.update( 76 | { 77 | "LD_LIBRARY_PATH": LIB_DIR, 78 | } 79 | ) 80 | 81 | run(exe_path, env=shell_env, shell=True, check=True) 82 | -------------------------------------------------------------------------------- /tasks/util/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/faasm/faabric/057fe909b078edc2f07b05f37fd16629c59c70b7/tasks/util/__init__.py -------------------------------------------------------------------------------- /tasks/util/env.py: -------------------------------------------------------------------------------- 1 | from os import environ 2 | from os.path import dirname, exists, realpath, join, expanduser 3 | import configparser 4 | 5 | HOME_DIR = expanduser("~") 6 | PROJ_ROOT = dirname(dirname(dirname(realpath(__file__)))) 7 | 8 | _FAABRIC_BUILD_DIR = environ.get("FAABRIC_BUILD_DIR", "/build/faabric") 9 | 10 | FAABRIC_SHARED_BUILD_DIR = join(_FAABRIC_BUILD_DIR, "shared") 11 | FAABRIC_STATIC_BUILD_DIR = join(_FAABRIC_BUILD_DIR, "static") 12 | 13 | FAABRIC_INSTALL_PREFIX = join(_FAABRIC_BUILD_DIR, "install") 14 | 15 | FAABRIC_CONFIG_FILE = join(PROJ_ROOT, "faabric.ini") 16 | 17 | CR_NAME = "ghcr.io/faasm" 18 | 19 | # This LLVM version is for the LLVM that we use to compile regular C/C++ code 20 | # to x86. For the LLVM version to cross-compile code to WebAssembly see 21 | # faasm/cpp/faasmtools/env.py. Ideally, both versions will be in sync but it 22 | # is not strictly necessary. 23 | LLVM_VERSION = "17.0.6" 24 | LLVM_VERSION_MAJOR = LLVM_VERSION.split(".")[0] 25 | 26 | 27 | def get_version(): 28 | ver_file = join(PROJ_ROOT, "VERSION") 29 | 30 | with open(ver_file, "r") as fh: 31 | version = fh.read() 32 | 33 | version = version.strip() 34 | return version 35 | 36 | 37 | def get_faabric_config(): 38 | config = configparser.ConfigParser() 39 | if not exists(FAABRIC_CONFIG_FILE): 40 | print("Creating config file at {}".format(FAABRIC_CONFIG_FILE)) 41 | 42 | with open(FAABRIC_CONFIG_FILE, "w") as fh: 43 | config.write(fh) 44 | else: 45 | config.read(FAABRIC_CONFIG_FILE) 46 | 47 | return config 48 | -------------------------------------------------------------------------------- /tests/dist/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | file(GLOB_RECURSE TEST_FILES ${CMAKE_CURRENT_LIST_DIR} test_*.cpp) 2 | 3 | add_subdirectory(mpi) 4 | 5 | # Shared 6 | add_library(faabric_dist_tests_lib 7 | init.h 8 | init.cpp 9 | DistTestExecutor.h 10 | DistTestExecutor.cpp 11 | ${MPI_DIST_TEST_SOURCES} 12 | scheduler/functions.cpp 13 | transport/functions.cpp 14 | ) 15 | 16 | target_link_libraries(faabric_dist_tests_lib PUBLIC faabric::test_utils faabric_dist_tests_mpinative_lib) 17 | target_include_directories(faabric_dist_tests_lib PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) 18 | 19 | # Tests 20 | add_executable( 21 | faabric_dist_tests 22 | main.cpp 23 | ${TEST_FILES} 24 | ) 25 | 26 | target_link_libraries(faabric_dist_tests PRIVATE faabric_dist_tests_lib) 27 | 28 | add_test(NAME faabric_dist_tests COMMAND "tests/dist/faabric_dist_tests") 29 | 30 | # Server 31 | add_executable( 32 | faabric_dist_test_server 33 | server.cpp 34 | ) 35 | 36 | target_link_libraries(faabric_dist_test_server PRIVATE faabric_dist_tests_lib) 37 | -------------------------------------------------------------------------------- /tests/dist/DistTestExecutor.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | namespace tests { 8 | 9 | #define DIST_TEST_EXECUTOR_MEMORY_SIZE (30 * faabric::util::HOST_PAGE_SIZE) 10 | 11 | class DistTestExecutor final : public faabric::executor::Executor 12 | { 13 | public: 14 | DistTestExecutor(faabric::Message& msg); 15 | 16 | ~DistTestExecutor(); 17 | 18 | int32_t executeTask( 19 | int threadPoolIdx, 20 | int msgIdx, 21 | std::shared_ptr req) override; 22 | 23 | void reset(faabric::Message& msg) override; 24 | 25 | std::span getMemoryView() override; 26 | 27 | std::span getDummyMemory(); 28 | 29 | // Helper method to execute threads in a distributed test 30 | std::vector> executeThreads( 31 | std::shared_ptr req, 32 | const std::vector& mergeRegions); 33 | 34 | protected: 35 | void setMemorySize(size_t newSize) override; 36 | 37 | private: 38 | faabric::util::MemoryRegion dummyMemory = nullptr; 39 | 40 | size_t dummyMemorySize = DIST_TEST_EXECUTOR_MEMORY_SIZE; 41 | 42 | void setUpDummyMemory(size_t memSize); 43 | }; 44 | 45 | class DistTestExecutorFactory : public faabric::executor::ExecutorFactory 46 | { 47 | protected: 48 | std::shared_ptr createExecutor( 49 | faabric::Message& msg) override; 50 | }; 51 | 52 | typedef int (*ExecutorFunction)( 53 | tests::DistTestExecutor* exec, 54 | int threadPoolIdx, 55 | int msgIdx, 56 | std::shared_ptr req); 57 | 58 | void registerDistTestExecutorCallback(const char* user, 59 | const char* funcName, 60 | ExecutorFunction func); 61 | 62 | } 63 | -------------------------------------------------------------------------------- /tests/dist/init.cpp: -------------------------------------------------------------------------------- 1 | #include "init.h" 2 | #include "DistTestExecutor.h" 3 | 4 | #include 5 | 6 | namespace tests { 7 | void initDistTests() 8 | { 9 | SPDLOG_INFO("Registering distributed test server functions"); 10 | 11 | tests::registerMpiTestFunctions(); 12 | tests::registerSchedulerTestFunctions(); 13 | tests::registerTransportTestFunctions(); 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /tests/dist/init.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | namespace tests { 4 | 5 | void initDistTests(); 6 | 7 | // Specific test functions 8 | void registerMpiTestFunctions(); 9 | void registerSchedulerTestFunctions(); 10 | void registerTransportTestFunctions(); 11 | 12 | } 13 | -------------------------------------------------------------------------------- /tests/dist/main.cpp: -------------------------------------------------------------------------------- 1 | #define CATCH_CONFIG_RUNNER 2 | 3 | // Disable catch signal catching to avoid interfering with dirty tracking 4 | #define CATCH_CONFIG_NO_POSIX_SIGNALS 1 5 | 6 | #include 7 | 8 | #include "DistTestExecutor.h" 9 | #include "faabric_utils.h" 10 | #include "init.h" 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | FAABRIC_CATCH_LOGGER 18 | 19 | int main(int argc, char* argv[]) 20 | { 21 | faabric::util::setUpCrashHandler(); 22 | 23 | faabric::util::initLogging(); 24 | tests::initDistTests(); 25 | 26 | std::shared_ptr fac = 27 | std::make_shared(); 28 | 29 | // WARNING: all 0MQ sockets have to have gone *out of scope* before we shut 30 | // down the context, therefore this segment must be in a nested scope (or 31 | // another function). 32 | int result; 33 | { 34 | SPDLOG_INFO("Starting distributed test server on main"); 35 | faabric::runner::FaabricMain m(fac); 36 | m.startBackground(); 37 | 38 | // Run the tests 39 | result = Catch::Session().run(argc, argv); 40 | fflush(stdout); 41 | 42 | // Shut down 43 | SPDLOG_INFO("Shutting down"); 44 | m.shutdown(); 45 | } 46 | return result; 47 | } 48 | -------------------------------------------------------------------------------- /tests/dist/mpi/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # First we build the MPI native library 2 | add_library(faabric_dist_tests_mpinative_lib 3 | mpi_native.cpp 4 | mpi_native.h 5 | ) 6 | 7 | target_link_libraries(faabric_dist_tests_mpinative_lib PUBLIC faabric::test_utils) 8 | 9 | # Second we set all the source files for the dist tests library 10 | set(ALL_MPI_EXAMPLES 11 | ${CMAKE_CURRENT_LIST_DIR}/benchmarks/mpi_allreduce.cpp 12 | ${CMAKE_CURRENT_LIST_DIR}/benchmarks/mpi_bench.cpp 13 | ${CMAKE_CURRENT_LIST_DIR}/benchmarks/mpi_send_recv.cpp 14 | ${CMAKE_CURRENT_LIST_DIR}/examples/mpi_allgather.cpp 15 | ${CMAKE_CURRENT_LIST_DIR}/examples/mpi_allreduce.cpp 16 | ${CMAKE_CURRENT_LIST_DIR}/examples/mpi_alltoall.cpp 17 | ${CMAKE_CURRENT_LIST_DIR}/examples/mpi_alltoall_sleep.cpp 18 | ${CMAKE_CURRENT_LIST_DIR}/examples/mpi_bcast.cpp 19 | ${CMAKE_CURRENT_LIST_DIR}/examples/mpi_barrier.cpp 20 | ${CMAKE_CURRENT_LIST_DIR}/examples/mpi_cart_create.cpp 21 | ${CMAKE_CURRENT_LIST_DIR}/examples/mpi_cartesian.cpp 22 | ${CMAKE_CURRENT_LIST_DIR}/examples/mpi_checks.cpp 23 | ${CMAKE_CURRENT_LIST_DIR}/examples/mpi_gather.cpp 24 | ${CMAKE_CURRENT_LIST_DIR}/examples/mpi_helloworld.cpp 25 | ${CMAKE_CURRENT_LIST_DIR}/examples/mpi_isendrecv.cpp 26 | ${CMAKE_CURRENT_LIST_DIR}/examples/mpi_migration.cpp 27 | ${CMAKE_CURRENT_LIST_DIR}/examples/mpi_order.cpp 28 | ${CMAKE_CURRENT_LIST_DIR}/examples/mpi_reduce.cpp 29 | ${CMAKE_CURRENT_LIST_DIR}/examples/mpi_reduce_many.cpp 30 | ${CMAKE_CURRENT_LIST_DIR}/examples/mpi_scan.cpp 31 | ${CMAKE_CURRENT_LIST_DIR}/examples/mpi_scatter.cpp 32 | ${CMAKE_CURRENT_LIST_DIR}/examples/mpi_send.cpp 33 | ${CMAKE_CURRENT_LIST_DIR}/examples/mpi_send_many.cpp 34 | ${CMAKE_CURRENT_LIST_DIR}/examples/mpi_send_sync_async.cpp 35 | ${CMAKE_CURRENT_LIST_DIR}/examples/mpi_sendrecv.cpp 36 | ${CMAKE_CURRENT_LIST_DIR}/examples/mpi_status.cpp 37 | ${CMAKE_CURRENT_LIST_DIR}/examples/mpi_typesize.cpp 38 | ) 39 | 40 | set(MPI_DIST_TEST_SOURCES 41 | ${CMAKE_CURRENT_LIST_DIR}/functions.cpp 42 | ${CMAKE_CURRENT_LIST_DIR}/mpi_native.h 43 | ${ALL_MPI_EXAMPLES} 44 | PARENT_SCOPE 45 | ) 46 | -------------------------------------------------------------------------------- /tests/dist/mpi/benchmarks/mpi_bench.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | 6 | #ifdef USE_REAL_MPI 7 | #include 8 | #else 9 | #include "mpi/mpi_native.h" 10 | #include 11 | #include 12 | #include 13 | 14 | int MPI_Get_library_version(char* p, int* n); 15 | 16 | #endif 17 | 18 | #define PRN(fmt, ...) std::fprintf(stdout, fmt "\n", __VA_ARGS__) 19 | 20 | #define PRN_IF(cond, fmt, ...) \ 21 | if (cond) { \ 22 | PRN(fmt, __VA_ARGS__); \ 23 | } 24 | 25 | #define S(x) (x).c_str() 26 | 27 | using CLOCK = std::chrono::high_resolution_clock; 28 | using duration = std::chrono::duration; 29 | 30 | std::vector small_sizes(); 31 | 32 | std::vector resnet50_grad_sizes(); 33 | 34 | std::string show_size(int64_t workload); 35 | 36 | std::string show_rate(int64_t workload, duration d); 37 | -------------------------------------------------------------------------------- /tests/dist/mpi/examples/mpi_allgather.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | namespace tests::mpi { 5 | 6 | int allGather() 7 | { 8 | MPI_Init(NULL, NULL); 9 | 10 | int rank, worldSize; 11 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 12 | MPI_Comm_size(MPI_COMM_WORLD, &worldSize); 13 | 14 | int nPerRank = 4; 15 | int n = worldSize * nPerRank; 16 | 17 | // Arrays for sending and receiving 18 | int* thisChunk = new int[nPerRank]; 19 | int* expected = new int[n]; 20 | int* actual = new int[n]; 21 | 22 | int startIdx = rank * nPerRank; 23 | for (int i = 0; i < nPerRank; i++) { 24 | thisChunk[i] = startIdx + i; 25 | } 26 | 27 | for (int i = 0; i < n; i++) { 28 | expected[i] = i; 29 | actual[i] = -1; 30 | } 31 | 32 | MPI_Allgather( 33 | thisChunk, nPerRank, MPI_INT, actual, nPerRank, MPI_INT, MPI_COMM_WORLD); 34 | 35 | for (int i = 0; i < n; i++) { 36 | if (actual[i] != expected[i]) { 37 | printf("Allgather failed!\n"); 38 | return 1; 39 | } 40 | } 41 | 42 | printf("Rank %i: gather as expected\n", rank); 43 | 44 | delete[] thisChunk; 45 | delete[] actual; 46 | delete[] expected; 47 | 48 | MPI_Finalize(); 49 | 50 | return 0; 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /tests/dist/mpi/examples/mpi_allreduce.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | namespace tests::mpi { 6 | 7 | int allReduce() 8 | { 9 | MPI_Init(NULL, NULL); 10 | 11 | int rank; 12 | int worldSize; 13 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 14 | MPI_Comm_size(MPI_COMM_WORLD, &worldSize); 15 | 16 | // Create an array of three numbers for this process 17 | int numsThisProc[3] = { rank, 10 * rank, 100 * rank }; 18 | int* expected = new int[3]; 19 | int* result = new int[3]; 20 | 21 | // Build expectation 22 | memset(expected, 0, 3 * sizeof(int)); 23 | for (int r = 0; r < worldSize; r++) { 24 | expected[0] += r; 25 | expected[1] += 10 * r; 26 | expected[2] += 100 * r; 27 | } 28 | 29 | // Call allreduce 30 | MPI_Allreduce(numsThisProc, result, 3, MPI_INT, MPI_SUM, MPI_COMM_WORLD); 31 | 32 | // Check vs. expectation 33 | for (int i = 0; i < 3; i++) { 34 | if (result[i] != expected[i]) { 35 | printf("Allreduce failed!\n"); 36 | return 1; 37 | } 38 | } 39 | 40 | printf("Rank %i: Allreduce as expected\n", rank); 41 | 42 | MPI_Finalize(); 43 | 44 | return MPI_SUCCESS; 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /tests/dist/mpi/examples/mpi_alltoall.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | 6 | namespace tests::mpi { 7 | 8 | int allToAll() 9 | { 10 | MPI_Init(NULL, NULL); 11 | 12 | int rank; 13 | int worldSize; 14 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 15 | MPI_Comm_size(MPI_COMM_WORLD, &worldSize); 16 | 17 | int chunkSize = 2; 18 | int fullSize = worldSize * chunkSize; 19 | 20 | // Arrays for sending and receiving 21 | int* sendBuf = new int[fullSize]; 22 | int* expected = new int[fullSize]; 23 | int* actual = new int[fullSize]; 24 | 25 | // Populate data 26 | for (int i = 0; i < fullSize; i++) { 27 | // Send buffer from this rank 28 | sendBuf[i] = (rank * 10) + i; 29 | 30 | // Work out which rank this chunk of the expectation will come from 31 | int rankOffset = (rank * chunkSize) + (i % chunkSize); 32 | int recvRank = i / chunkSize; 33 | expected[i] = (recvRank * 10) + rankOffset; 34 | } 35 | 36 | MPI_Alltoall( 37 | sendBuf, chunkSize, MPI_INT, actual, chunkSize, MPI_INT, MPI_COMM_WORLD); 38 | 39 | int returnValue = 0; 40 | 41 | for (int i = 0; i < fullSize; i++) { 42 | if (actual[i] != expected[i]) { 43 | returnValue = 1; 44 | } 45 | } 46 | 47 | if (returnValue == 0) { 48 | printf("Rank %i: alltoall as expected\n", rank); 49 | } else { 50 | std::string expectedMsg = "[" + std::to_string(expected[0]); 51 | std::string actualMsg = "[" + std::to_string(actual[0]); 52 | for (int i = 1; i < fullSize; i++) { 53 | expectedMsg += "," + std::to_string(expected[i]); 54 | actualMsg += "," + std::to_string(actual[i]); 55 | } 56 | expectedMsg += "]"; 57 | actualMsg += "]"; 58 | 59 | printf("Rank %i: alltoall failed\n\texpected: %s\n\tgot:%s\n", 60 | rank, 61 | expectedMsg.c_str(), 62 | actualMsg.c_str()); 63 | } 64 | 65 | delete[] sendBuf; 66 | delete[] actual; 67 | delete[] expected; 68 | 69 | MPI_Finalize(); 70 | 71 | return returnValue; 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /tests/dist/mpi/examples/mpi_alltoall_sleep.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "mpi/mpi_native.h" 6 | 7 | #include 8 | #include 9 | 10 | namespace tests::mpi { 11 | 12 | int allToAllAndSleep() 13 | { 14 | MPI_Init(NULL, NULL); 15 | 16 | int rank; 17 | int worldSize; 18 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 19 | MPI_Comm_size(MPI_COMM_WORLD, &worldSize); 20 | 21 | // Call barrier and all-to-all multiple times 22 | int numBarriers = 500; 23 | for (int i = 0; i < numBarriers; i++) { 24 | MPI_Barrier(MPI_COMM_WORLD); 25 | doAllToAll(rank, worldSize, i); 26 | } 27 | 28 | int timeToSleepSec = 5; 29 | SPDLOG_INFO("Rank {} going to sleep for {} seconds", rank, timeToSleepSec); 30 | SLEEP_MS(timeToSleepSec * 1000); 31 | SPDLOG_INFO("Rank {} waking up", rank); 32 | 33 | for (int i = 0; i < numBarriers; i++) { 34 | MPI_Barrier(MPI_COMM_WORLD); 35 | doAllToAll(rank, worldSize, i); 36 | } 37 | 38 | MPI_Finalize(); 39 | 40 | return 0; 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /tests/dist/mpi/examples/mpi_barrier.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "mpi/mpi_native.h" 4 | 5 | namespace tests::mpi { 6 | 7 | int barrier() 8 | { 9 | MPI_Init(NULL, NULL); 10 | 11 | int rank; 12 | int worldSize; 13 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 14 | MPI_Comm_size(MPI_COMM_WORLD, &worldSize); 15 | 16 | // Call barrier multiple times. Note that there are assertions in the code 17 | // that check that the barrier works alright. 18 | int numBarriers = 100; 19 | for (int i = 0; i < numBarriers; i++) { 20 | MPI_Barrier(MPI_COMM_WORLD); 21 | doAllToAll(rank, worldSize, i); 22 | } 23 | 24 | MPI_Finalize(); 25 | 26 | return 0; 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /tests/dist/mpi/examples/mpi_bcast.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | namespace tests::mpi { 6 | 7 | int broadcast() 8 | { 9 | MPI_Init(NULL, NULL); 10 | 11 | int rank; 12 | int worldSize; 13 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 14 | MPI_Comm_size(MPI_COMM_WORLD, &worldSize); 15 | 16 | if (worldSize < 3) { 17 | printf("Need world of size 3 or more\n"); 18 | return 1; 19 | } 20 | 21 | int root = 2; 22 | int expected[4] = { 0, 1, 2, 3 }; 23 | int actual[4] = { -1, -1, -1, -1 }; 24 | 25 | if (rank == root) { 26 | memcpy(actual, expected, 4 * sizeof(int)); 27 | } 28 | 29 | // Broadcast (all should subsequently agree) 30 | MPI_Bcast(actual, 4, MPI_INT, root, MPI_COMM_WORLD); 31 | 32 | for (int i = 0; i < 4; i++) { 33 | if (actual[i] != expected[i]) { 34 | printf("Broadcast failed!\n"); 35 | return 1; 36 | } 37 | } 38 | 39 | printf("Broadcast succeeded\n"); 40 | 41 | MPI_Finalize(); 42 | 43 | return 0; 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /tests/dist/mpi/examples/mpi_cart_create.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | namespace tests::mpi { 6 | 7 | int cartCreate() 8 | { 9 | MPI_Init(NULL, NULL); 10 | 11 | int rank; 12 | int worldSize; 13 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 14 | MPI_Comm_size(MPI_COMM_WORLD, &worldSize); 15 | 16 | // Prepare arguments 17 | int ndims = 2; 18 | int sideLength = static_cast(std::floor(std::sqrt(worldSize))); 19 | int dims[2] = { sideLength, sideLength }; 20 | int periods[2] = { 0, 0 }; 21 | int reorder = 0; 22 | MPI_Comm cart1, cart2; 23 | 24 | // Create two different communicators 25 | if (MPI_Cart_create( 26 | MPI_COMM_WORLD, ndims, dims, periods, reorder, &cart1) != 27 | MPI_SUCCESS) { 28 | printf("MPI_Cart_create failed!\n"); 29 | return 1; 30 | } 31 | if (MPI_Cart_create( 32 | MPI_COMM_WORLD, ndims, dims, periods, reorder, &cart2) != 33 | MPI_SUCCESS) { 34 | printf("MPI_Cart_create failed!\n"); 35 | return 1; 36 | } 37 | 38 | // Check that they have been allocated at different addresses. This is to 39 | // prevent situations in which memory is not properly allocated and we 40 | // end up creating an object at the base of the wasm memory array. 41 | if (&cart1 == &cart2) { 42 | printf("Both communicators allocated at the same address.\n"); 43 | return 1; 44 | } 45 | printf("MPI_Cart_create correctly allocated.\n"); 46 | 47 | MPI_Finalize(); 48 | 49 | return 0; 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /tests/dist/mpi/examples/mpi_gather.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | namespace tests::mpi { 5 | 6 | int gather() 7 | { 8 | MPI_Init(NULL, NULL); 9 | 10 | int rank; 11 | int worldSize; 12 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 13 | MPI_Comm_size(MPI_COMM_WORLD, &worldSize); 14 | 15 | // Build an array the right size 16 | int nPerRank = 4; 17 | int n = worldSize * nPerRank; 18 | 19 | // Set up containers in root 20 | int* expected = nullptr; 21 | int* actual = nullptr; 22 | int root = 2; 23 | if (rank == root) { 24 | expected = new int[n]; 25 | for (int i = 0; i < n; i++) { 26 | expected[i] = i; 27 | } 28 | 29 | actual = new int[n]; 30 | } 31 | 32 | // Build the data chunk for this rank 33 | int startIdx = rank * nPerRank; 34 | int* thisChunk = new int[nPerRank]; 35 | for (int i = 0; i < nPerRank; i++) { 36 | thisChunk[i] = startIdx + i; 37 | } 38 | 39 | MPI_Gather(thisChunk, 40 | nPerRank, 41 | MPI_INT, 42 | actual, 43 | nPerRank, 44 | MPI_INT, 45 | root, 46 | MPI_COMM_WORLD); 47 | 48 | if (rank == root) { 49 | for (int i = 0; i < n; i++) { 50 | if (actual[i] != expected[i]) { 51 | printf("Gather failed!\n"); 52 | return 1; 53 | } 54 | } 55 | printf("Gather as expected\n"); 56 | } 57 | 58 | delete[] thisChunk; 59 | delete[] actual; 60 | delete[] expected; 61 | 62 | MPI_Finalize(); 63 | 64 | return 0; 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /tests/dist/mpi/examples/mpi_helloworld.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | namespace tests::mpi { 5 | 6 | int helloWorld() 7 | { 8 | SPDLOG_INFO("Hello world from Faabric MPI Main!"); 9 | 10 | MPI_Init(NULL, NULL); 11 | 12 | int rank; 13 | int worldSize; 14 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 15 | MPI_Comm_size(MPI_COMM_WORLD, &worldSize); 16 | 17 | SPDLOG_INFO("Hello faabric from process {} of {}", rank + 1, worldSize); 18 | 19 | MPI_Finalize(); 20 | 21 | return 0; 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /tests/dist/mpi/examples/mpi_isendrecv.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | namespace tests::mpi { 5 | 6 | int iSendRecv() 7 | { 8 | MPI_Init(NULL, NULL); 9 | 10 | int rank; 11 | int worldSize; 12 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 13 | MPI_Comm_size(MPI_COMM_WORLD, &worldSize); 14 | 15 | // Send and receive messages asynchronously in a ring 16 | int right = (rank + 1) % worldSize; 17 | int maxRank = worldSize - 1; 18 | int left = rank > 0 ? rank - 1 : maxRank; 19 | 20 | // Asynchronously receive from the left 21 | int recvValue = -1; 22 | MPI_Request recvRequest; 23 | MPI_Irecv(&recvValue, 1, MPI_INT, left, 0, MPI_COMM_WORLD, &recvRequest); 24 | 25 | // Asynchronously send to the right 26 | int sendValue = rank; 27 | MPI_Request sendRequest; 28 | MPI_Isend(&sendValue, 1, MPI_INT, right, 0, MPI_COMM_WORLD, &sendRequest); 29 | 30 | // Wait for both 31 | MPI_Wait(&recvRequest, MPI_STATUS_IGNORE); 32 | MPI_Wait(&sendRequest, MPI_STATUS_IGNORE); 33 | 34 | // Check the received value is as expected 35 | if (recvValue != left) { 36 | printf("Rank %i - async not working properly (got %i expected %i)\n", 37 | rank, 38 | recvValue, 39 | left); 40 | return 1; 41 | } 42 | printf("Rank %i - async working properly\n", rank); 43 | 44 | delete sendRequest; 45 | delete recvRequest; 46 | 47 | MPI_Finalize(); 48 | 49 | return 0; 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /tests/dist/mpi/examples/mpi_migration.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "mpi/mpi_native.h" 7 | 8 | namespace tests::mpi { 9 | 10 | // Outer wrapper, and re-entry point after migration 11 | int migration(int nLoops) 12 | { 13 | bool mustCheck = nLoops == NUM_MIGRATION_LOOPS; 14 | 15 | // Initialisation 16 | int res = MPI_Init(NULL, NULL); 17 | if (res != MPI_SUCCESS) { 18 | printf("Failed on MPI init\n"); 19 | return 1; 20 | } 21 | 22 | int rank; 23 | int worldSize; 24 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 25 | MPI_Comm_size(MPI_COMM_WORLD, &worldSize); 26 | 27 | for (int i = 0; i < nLoops; i++) { 28 | // Make sure everyone is in sync (including those ranks that have been 29 | // migrated) 30 | MPI_Barrier(MPI_COMM_WORLD); 31 | 32 | tests::mpi::doAllToAll(rank, worldSize, i); 33 | 34 | if (mustCheck && (i % CHECK_EVERY == 0) && (i / CHECK_EVERY > 0)) { 35 | mustCheck = false; 36 | if (rank == 0) { 37 | SPDLOG_INFO( 38 | "Checking for migrations at iteration {}/{}", i, nLoops); 39 | } 40 | // Migration point, which may or may not resume the 41 | // benchmark on another host for the remaining iterations. 42 | // This would eventually be MPI_Barrier 43 | MPI_Barrier(MPI_COMM_WORLD); 44 | mpiMigrationPoint(nLoops - i - 1); 45 | } 46 | } 47 | 48 | SPDLOG_DEBUG("Rank {} exitting the migration loop", rank); 49 | MPI_Barrier(MPI_COMM_WORLD); 50 | 51 | // Shutdown 52 | MPI_Finalize(); 53 | 54 | return MPI_SUCCESS; 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /tests/dist/mpi/examples/mpi_order.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | namespace tests::mpi { 5 | 6 | int order() 7 | { 8 | MPI_Init(NULL, NULL); 9 | 10 | int rank; 11 | int worldSize; 12 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 13 | MPI_Comm_size(MPI_COMM_WORLD, &worldSize); 14 | 15 | if (worldSize < 4) { 16 | printf("Need world size of 3 or more\n"); 17 | return 1; 18 | } 19 | 20 | if (rank == 0) { 21 | // Send messages out 22 | int out[3] = { 111, 222, 333 }; 23 | MPI_Send(&out[0], 1, MPI_INT, 1, 0, MPI_COMM_WORLD); 24 | MPI_Send(&out[1], 1, MPI_INT, 2, 0, MPI_COMM_WORLD); 25 | MPI_Send(&out[2], 1, MPI_INT, 3, 0, MPI_COMM_WORLD); 26 | 27 | // Get responses out of order 28 | int in[3] = { 0, 0, 0 }; 29 | MPI_Recv(&in[2], 1, MPI_INT, 3, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); 30 | MPI_Recv(&in[0], 1, MPI_INT, 1, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); 31 | MPI_Recv(&in[1], 1, MPI_INT, 2, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); 32 | 33 | // Check values 34 | if (out[0] != in[0] || out[1] != in[1] || out[2] != in[2]) { 35 | printf("Responses not as expected (got [%i, %i, %i], expected [%i, " 36 | "%i, %i]\n", 37 | in[0], 38 | in[1], 39 | in[2], 40 | out[0], 41 | out[1], 42 | out[2]); 43 | return 1; 44 | } 45 | printf("MPI order check successful\n"); 46 | } else if (rank > 0 && rank <= 3) { 47 | // Echo message back to main 48 | int receivedNumber = 0; 49 | MPI_Recv( 50 | &receivedNumber, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); 51 | MPI_Send(&receivedNumber, 1, MPI_INT, 0, 0, MPI_COMM_WORLD); 52 | } 53 | 54 | MPI_Finalize(); 55 | 56 | return 0; 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /tests/dist/mpi/examples/mpi_reduce.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | namespace tests::mpi { 6 | 7 | int reduce() 8 | { 9 | MPI_Init(NULL, NULL); 10 | 11 | int rank; 12 | int worldSize; 13 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 14 | MPI_Comm_size(MPI_COMM_WORLD, &worldSize); 15 | 16 | int root = 0; 17 | int* expected = nullptr; 18 | int* result = nullptr; 19 | 20 | if (rank == root) { 21 | // Build expectation 22 | expected = new int[3]; 23 | memset(expected, 0, 3 * sizeof(int)); 24 | 25 | for (int r = 0; r < worldSize; r++) { 26 | expected[0] += r; 27 | expected[1] += 10 * r; 28 | expected[2] += 100 * r; 29 | } 30 | 31 | // Create a buffer for the result, and put the values for the 32 | // root in straight away 33 | result = new int[3]; 34 | result[0] = root; 35 | result[1] = 10 * root; 36 | result[2] = 100 * root; 37 | 38 | // Call the reduce in place on the root 39 | MPI_Reduce( 40 | MPI_IN_PLACE, result, 3, MPI_INT, MPI_SUM, root, MPI_COMM_WORLD); 41 | 42 | // Check vs. expectation 43 | for (int i = 0; i < 3; i++) { 44 | if (result[i] != expected[i]) { 45 | printf("Reduce failed\n"); 46 | return 1; 47 | } 48 | } 49 | 50 | printf("Reduce as expected\n"); 51 | } else { 52 | // Create an array of three numbers specific to this rank 53 | int numsThisProc[3] = { rank, 10 * rank, 100 * rank }; 54 | MPI_Reduce( 55 | numsThisProc, result, 3, MPI_INT, MPI_SUM, root, MPI_COMM_WORLD); 56 | } 57 | 58 | MPI_Finalize(); 59 | 60 | return MPI_SUCCESS; 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /tests/dist/mpi/examples/mpi_reduce_many.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | namespace tests::mpi { 6 | 7 | int reduceMany() 8 | { 9 | MPI_Init(NULL, NULL); 10 | 11 | int rank; 12 | int worldSize; 13 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 14 | MPI_Comm_size(MPI_COMM_WORLD, &worldSize); 15 | 16 | int root = 0; 17 | int* expected = nullptr; 18 | int* result = nullptr; 19 | 20 | int numReduce = 100; 21 | if (rank == root) { 22 | expected = new int[3]; 23 | result = new int[3]; 24 | 25 | for (int r = 0; r < numReduce; r++) { 26 | // Build expectation 27 | memset(expected, 0, 3 * sizeof(int)); 28 | memset(result, 0, 3 * sizeof(int)); 29 | 30 | for (int r = 0; r < worldSize; r++) { 31 | expected[0] += r; 32 | expected[1] += 10 * r; 33 | expected[2] += 100 * r; 34 | } 35 | 36 | // Create a buffer for the result, and put the values for the 37 | // root in straight away 38 | result[0] = root; 39 | result[1] = 10 * root; 40 | result[2] = 100 * root; 41 | 42 | // Call the reduce in place on the root 43 | MPI_Reduce( 44 | MPI_IN_PLACE, result, 3, MPI_INT, MPI_SUM, root, MPI_COMM_WORLD); 45 | 46 | // Check vs. expectation 47 | for (int i = 0; i < 3; i++) { 48 | if (result[i] != expected[i]) { 49 | printf("Reduce failed\n"); 50 | return 1; 51 | } 52 | } 53 | } 54 | 55 | printf("Reduce as expected\n"); 56 | 57 | delete[] expected; 58 | delete[] result; 59 | } else { 60 | // Create an array of three numbers specific to this rank 61 | int numsThisProc[3] = { rank, 10 * rank, 100 * rank }; 62 | for (int r = 0; r < numReduce; r++) { 63 | MPI_Reduce( 64 | numsThisProc, result, 3, MPI_INT, MPI_SUM, root, MPI_COMM_WORLD); 65 | } 66 | } 67 | 68 | MPI_Finalize(); 69 | 70 | return MPI_SUCCESS; 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /tests/dist/mpi/examples/mpi_scan.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | namespace tests::mpi { 7 | 8 | int scan() 9 | { 10 | MPI_Init(NULL, NULL); 11 | 12 | int rank; 13 | int worldSize; 14 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 15 | MPI_Comm_size(MPI_COMM_WORLD, &worldSize); 16 | 17 | int count = 3; 18 | int* expected = nullptr; 19 | int* input = nullptr; 20 | int* result = nullptr; 21 | 22 | expected = new int[count]; 23 | memset(expected, 0, count * sizeof(int)); 24 | input = new int[count]; 25 | memset(input, 0, count * sizeof(int)); 26 | result = new int[count]; 27 | memset(result, 0, count * sizeof(int)); 28 | 29 | // Prepare input and expected data 30 | for (int i = 0; i < count; i++) { 31 | input[i] = rank * 10 + i; 32 | for (int r = 0; r <= rank; r++) { 33 | expected[i] += r * 10 + i; 34 | } 35 | } 36 | 37 | // Call the scan operation 38 | MPI_Scan(input, result, count, MPI_INT, MPI_SUM, MPI_COMM_WORLD); 39 | 40 | // Check vs. expectation 41 | if (!faabric::util::compareArrays(result, expected, count)) { 42 | return 1; 43 | } 44 | printf("MPI_Scan not in-place as expected.\n"); 45 | 46 | MPI_Barrier(MPI_COMM_WORLD); 47 | 48 | // Check operation in place 49 | MPI_Scan(input, input, count, MPI_INT, MPI_SUM, MPI_COMM_WORLD); 50 | 51 | // Check vs. expectation 52 | if (!faabric::util::compareArrays(input, expected, count)) { 53 | return 1; 54 | } 55 | printf("MPI_Scan in-place as expected.\n"); 56 | 57 | MPI_Finalize(); 58 | 59 | return MPI_SUCCESS; 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /tests/dist/mpi/examples/mpi_scatter.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | namespace tests::mpi { 5 | 6 | int scatter() 7 | { 8 | MPI_Init(NULL, NULL); 9 | 10 | int rank; 11 | int worldSize; 12 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 13 | MPI_Comm_size(MPI_COMM_WORLD, &worldSize); 14 | 15 | // Build an array the right size 16 | int nPerRank = 4; 17 | int n = worldSize * nPerRank; 18 | 19 | // Set up data in root rank 20 | int* allData = nullptr; 21 | int root = 2; 22 | if (rank == root) { 23 | allData = new int[n]; 24 | for (int i = 0; i < n; i++) { 25 | allData[i] = i; 26 | } 27 | } 28 | 29 | // Build the expectation 30 | int startIdx = rank * nPerRank; 31 | int* expected = new int[nPerRank]; 32 | for (int i = 0; i < nPerRank; i++) { 33 | expected[i] = startIdx + i; 34 | } 35 | 36 | // Do the scatter 37 | int* actual = new int[nPerRank]; 38 | MPI_Scatter(allData, 39 | nPerRank, 40 | MPI_INT, 41 | actual, 42 | nPerRank, 43 | MPI_INT, 44 | root, 45 | MPI_COMM_WORLD); 46 | 47 | for (int i = 0; i < nPerRank; i++) { 48 | if (actual[i] != expected[i]) { 49 | printf("Scatter failed!\n"); 50 | return 1; 51 | } 52 | } 53 | 54 | printf("Scatter %i: [%i, %i, %i, %i]\n", 55 | rank, 56 | actual[0], 57 | actual[1], 58 | actual[2], 59 | actual[3]); 60 | 61 | delete[] allData; 62 | delete[] actual; 63 | delete[] expected; 64 | 65 | MPI_Finalize(); 66 | 67 | return 0; 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /tests/dist/mpi/examples/mpi_send_sync_async.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | namespace tests::mpi { 5 | 6 | int sendSyncAsync() 7 | { 8 | MPI_Init(NULL, NULL); 9 | 10 | int rank; 11 | int worldSize; 12 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 13 | MPI_Comm_size(MPI_COMM_WORLD, &worldSize); 14 | 15 | // Send two messages to all ranks other than 0: once synchronously and once 16 | // asynchronously. 17 | if (rank == 0) { 18 | // Asynchronously send to the right 19 | MPI_Request sendRequest; 20 | for (int r = 1; r < worldSize; r++) { 21 | MPI_Isend(&r, 1, MPI_INT, r, 0, MPI_COMM_WORLD, &sendRequest); 22 | MPI_Send(&r, 1, MPI_INT, r, 0, MPI_COMM_WORLD); 23 | MPI_Wait(&sendRequest, MPI_STATUS_IGNORE); 24 | } 25 | delete sendRequest; 26 | } else { 27 | // Asynchronously receive twice from rank 0 28 | int recvValue1 = -1; 29 | int recvValue2 = -1; 30 | MPI_Request recvRequest1; 31 | MPI_Request recvRequest2; 32 | MPI_Irecv(&recvValue1, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, &recvRequest1); 33 | MPI_Irecv(&recvValue2, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, &recvRequest2); 34 | 35 | // Wait for both out of order 36 | MPI_Wait(&recvRequest2, MPI_STATUS_IGNORE); 37 | MPI_Wait(&recvRequest1, MPI_STATUS_IGNORE); 38 | 39 | // Check the received value is as expected 40 | if (recvValue1 != rank || recvValue2 != rank) { 41 | printf("Rank %i - async not working properly (got %i-%i expected " 42 | "%i-%i)\n", 43 | rank, 44 | recvValue1, 45 | rank, 46 | recvValue2, 47 | rank); 48 | return 1; 49 | } 50 | delete recvRequest1; 51 | delete recvRequest2; 52 | } 53 | printf("Rank %i - send sync and async working properly\n", rank); 54 | 55 | MPI_Finalize(); 56 | 57 | return 0; 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /tests/dist/mpi/examples/mpi_sendrecv.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | namespace tests::mpi { 5 | 6 | int sendRecv() 7 | { 8 | MPI_Init(NULL, NULL); 9 | 10 | int rank; 11 | int worldSize; 12 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 13 | MPI_Comm_size(MPI_COMM_WORLD, &worldSize); 14 | 15 | // Send and receive messages in a ring 16 | // Sendrecv prevents worrying about possible deadlocks 17 | int right = (rank + 1) % worldSize; 18 | int maxRank = worldSize - 1; 19 | int left = rank > 0 ? rank - 1 : maxRank; 20 | 21 | // Receive from the left and send to the right 22 | int sendValue = rank; 23 | int recvValue = -1; 24 | printf( 25 | "Rank: %i. Receiving from: %i - Sending to: %i\n", rank, left, right); 26 | MPI_Sendrecv(&sendValue, 27 | 1, 28 | MPI_INT, 29 | right, 30 | 0, 31 | &recvValue, 32 | 1, 33 | MPI_INT, 34 | left, 35 | 0, 36 | MPI_COMM_WORLD, 37 | nullptr); 38 | 39 | // Check the received value is as expected 40 | if (recvValue != left) { 41 | printf("Rank %i - sendrecv not working properly (got %i expected %i)\n", 42 | rank, 43 | recvValue, 44 | left); 45 | return 1; 46 | } 47 | printf("Rank %i - sendrecv working properly\n", rank); 48 | 49 | MPI_Finalize(); 50 | 51 | return 0; 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /tests/dist/mpi/examples/mpi_status.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | namespace tests::mpi { 5 | 6 | int status() 7 | { 8 | MPI_Init(NULL, NULL); 9 | 10 | const int maxCount = 100; 11 | auto numbers = new int[maxCount]; 12 | 13 | int rank; 14 | int worldSize; 15 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 16 | MPI_Comm_size(MPI_COMM_WORLD, &worldSize); 17 | 18 | if (rank == 0) { 19 | // Send a number of values 20 | const int actualCount = 40; 21 | MPI_Send(numbers, actualCount, MPI_INT, 1, 0, MPI_COMM_WORLD); 22 | printf("Sent %d numbers to 1\n", actualCount); 23 | } else if (rank == 1) { 24 | // Receive more than the actual count 25 | MPI_Status status; 26 | MPI_Recv(numbers, maxCount, MPI_INT, 0, 0, MPI_COMM_WORLD, &status); 27 | 28 | // After receiving the message, check the status to determine 29 | // how many numbers were actually received 30 | int expectedCount = 40; 31 | int actualCount; 32 | MPI_Get_count(&status, MPI_INT, &actualCount); 33 | 34 | if (actualCount != expectedCount) { 35 | printf( 36 | "Not expected: asked for %i values, expecting %i, but got %i\n", 37 | maxCount, 38 | expectedCount, 39 | actualCount); 40 | return 1; 41 | } 42 | printf("As expected, asked for %i values but got %i\n", 43 | maxCount, 44 | actualCount); 45 | } 46 | 47 | delete[] numbers; 48 | 49 | MPI_Finalize(); 50 | 51 | return 0; 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /tests/dist/mpi/examples/mpi_typesize.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | bool checkTypeSize(MPI_Datatype dt, int expected, const char* name) 5 | { 6 | int actual; 7 | MPI_Type_size(dt, &actual); 8 | if (actual != expected) { 9 | printf("MPI %s size not as expected (got %i, expected %i)\n", 10 | name, 11 | actual, 12 | expected); 13 | return false; 14 | } else { 15 | return true; 16 | } 17 | } 18 | 19 | namespace tests::mpi { 20 | 21 | int typeSize() 22 | { 23 | MPI_Init(NULL, NULL); 24 | 25 | int rank; 26 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 27 | 28 | if (rank == 0) { 29 | if (!checkTypeSize(MPI_INT, sizeof(int), "int")) { 30 | return 1; 31 | } 32 | if (!checkTypeSize(MPI_LONG, sizeof(long), "long")) { 33 | return 1; 34 | } 35 | if (!checkTypeSize(MPI_LONG_LONG, sizeof(long long), "long long")) { 36 | return 1; 37 | } 38 | if (!checkTypeSize( 39 | MPI_LONG_LONG_INT, sizeof(long long int), "long long int")) { 40 | return 1; 41 | } 42 | if (!checkTypeSize(MPI_DOUBLE, sizeof(double), "double")) { 43 | return 1; 44 | } 45 | struct 46 | { 47 | double a; 48 | int b; 49 | } s; 50 | if (!checkTypeSize(MPI_DOUBLE_INT, sizeof s, "double int")) { 51 | return 1; 52 | } 53 | if (!checkTypeSize(MPI_FLOAT, sizeof(float), "float")) { 54 | return 1; 55 | } 56 | if (!checkTypeSize(MPI_DOUBLE, sizeof(double), "double")) { 57 | return 1; 58 | } 59 | if (!checkTypeSize(MPI_CHAR, sizeof(char), "char")) { 60 | return 1; 61 | } 62 | 63 | printf("MPI type sizes as expected\n"); 64 | } 65 | 66 | MPI_Finalize(); 67 | 68 | return 0; 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /tests/dist/mpi/mpi_native.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #define NUM_MIGRATION_LOOPS 10000 7 | #define CHECK_EVERY 5000 8 | 9 | namespace tests::mpi { 10 | 11 | // --- List of MPI functions --- 12 | 13 | int allGather(); 14 | 15 | int allReduce(); 16 | 17 | int allToAll(); 18 | 19 | int allToAllAndSleep(); 20 | 21 | int barrier(); 22 | 23 | int broadcast(); 24 | 25 | int cartCreate(); 26 | 27 | int cartesian(); 28 | 29 | int checks(); 30 | 31 | int gather(); 32 | 33 | int helloWorld(); 34 | 35 | int iSendRecv(); 36 | 37 | int migration(int nLoops); 38 | 39 | int oneSided(); 40 | 41 | int order(); 42 | 43 | int probe(); 44 | 45 | int reduce(); 46 | 47 | int reduceMany(); 48 | 49 | int scan(); 50 | 51 | int scatter(); 52 | 53 | int send(); 54 | 55 | int sendMany(); 56 | 57 | int sendRecv(); 58 | 59 | int sendSyncAsync(); 60 | 61 | int status(); 62 | 63 | int typeSize(); 64 | 65 | int winCreate(); 66 | 67 | // Other helper functions 68 | int doAllToAll(int rank, int worldSize, int i); 69 | 70 | int bench_allreduce(); 71 | 72 | int bench_send_recv(); 73 | 74 | void mpiMigrationPoint(int entrypointFuncArg); 75 | } 76 | -------------------------------------------------------------------------------- /tests/dist/mpi/native/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | CMAKE_MINIMUM_REQUIRED(VERSION 3.5) 2 | PROJECT(project) 3 | 4 | set(CMAKE_CXX_STANDARD 20) 5 | SET(CMAKE_BUILD_TYPE Release) 6 | 7 | function(ADD_BENCH_TARGET target) 8 | add_executable(${target} ${ARGN}) 9 | target_compile_definitions(${target} PRIVATE USE_REAL_MPI=1) 10 | target_link_libraries(${target} mpi) 11 | endfunction() 12 | 13 | ADD_BENCH_TARGET(bench_mpi_allreduce 14 | bench_mpi_allreduce.cpp 15 | ${CMAKE_CURRENT_LIST_DIR}/../benchmarks/mpi_allreduce.cpp 16 | ${CMAKE_CURRENT_LIST_DIR}/../benchmarks/mpi_bench.cpp 17 | ) 18 | 19 | ADD_BENCH_TARGET(bench_mpi_send_recv 20 | bench_mpi_send_recv.cpp 21 | ${CMAKE_CURRENT_LIST_DIR}/../benchmarks/mpi_send_recv.cpp 22 | ${CMAKE_CURRENT_LIST_DIR}/../benchmarks/mpi_bench.cpp 23 | ) 24 | -------------------------------------------------------------------------------- /tests/dist/mpi/native/bench_mpi_allreduce.cpp: -------------------------------------------------------------------------------- 1 | namespace tests::mpi { 2 | int bench_allreduce(); 3 | } 4 | 5 | int main(int argc, char* argv[]) 6 | { 7 | tests::mpi::bench_allreduce(); 8 | return 0; 9 | } 10 | -------------------------------------------------------------------------------- /tests/dist/mpi/native/bench_mpi_send_recv.cpp: -------------------------------------------------------------------------------- 1 | namespace tests::mpi { 2 | int bench_send_recv(); 3 | } 4 | 5 | int main(int argc, char* argv[]) 6 | { 7 | tests::mpi::bench_send_recv(); 8 | return 0; 9 | } 10 | -------------------------------------------------------------------------------- /tests/dist/scheduler/test_hosts.cpp: -------------------------------------------------------------------------------- 1 | #include "faabric_utils.h" 2 | #include 3 | 4 | #include "dist_test_fixtures.h" 5 | #include "init.h" 6 | 7 | #include 8 | 9 | namespace tests { 10 | TEST_CASE_METHOD(DistTestsFixture, "Test available hosts", "[scheduler]") 11 | { 12 | auto& sch = faabric::scheduler::getScheduler(); 13 | sch.addHostToGlobalSet(); 14 | 15 | // Check the available hosts 16 | auto availableHosts = plannerCli.getAvailableHosts(); 17 | std::set actual; 18 | for (auto& host : availableHosts) { 19 | actual.insert(host.ip()); 20 | } 21 | 22 | std::set expected = { getMasterIP(), getWorkerIP() }; 23 | REQUIRE(actual == expected); 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /tests/dist/server.cpp: -------------------------------------------------------------------------------- 1 | #include "DistTestExecutor.h" 2 | #include "init.h" 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | int main() 10 | { 11 | faabric::util::initLogging(); 12 | 13 | tests::initDistTests(); 14 | 15 | int slots = 4; 16 | SPDLOG_INFO("Forcing distributed test server to have {} slots", slots); 17 | faabric::HostResources res; 18 | res.set_slots(slots); 19 | faabric::scheduler::getScheduler().setThisHostResources(res); 20 | 21 | SPDLOG_INFO("Starting distributed test server on worker"); 22 | std::shared_ptr fac = 23 | std::make_shared(); 24 | faabric::runner::FaabricMain faabricMain(fac); 25 | faabricMain.startBackground(); 26 | 27 | SPDLOG_INFO("---------------------------------"); 28 | SPDLOG_INFO("Distributed test server started"); 29 | SPDLOG_INFO("---------------------------------"); 30 | 31 | // Endpoint will block until killed 32 | SPDLOG_INFO("Starting HTTP endpoint on worker"); 33 | faabric::endpoint::FaabricEndpoint endpoint; 34 | endpoint.start(faabric::endpoint::EndpointMode::SIGNAL); 35 | 36 | SPDLOG_INFO("Shutting down"); 37 | faabricMain.shutdown(); 38 | 39 | return EXIT_SUCCESS; 40 | } 41 | -------------------------------------------------------------------------------- /tests/dist/transport/test_coordination.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "dist_test_fixtures.h" 4 | #include "faabric_utils.h" 5 | #include "init.h" 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | namespace tests { 14 | 15 | TEST_CASE_METHOD(DistTestsFixture, "Test distributed lock", "[ptp][transport]") 16 | { 17 | // Set up the host resources. The distributed lock test will start 10 other 18 | // functions (so we need 11 slots). We give each host 8 slots for an even 19 | // distribution 20 | int nSlotsPerHost = 8; 21 | faabric::HostResources res; 22 | res.set_slots(nSlotsPerHost); 23 | sch.setThisHostResources(res); 24 | sch.addHostToGlobalSet(getWorkerIP(), std::make_shared(res)); 25 | 26 | // Set up the request 27 | std::shared_ptr req = 28 | faabric::util::batchExecFactory("ptp", "lock", 1); 29 | 30 | plannerCli.callFunctions(req); 31 | 32 | faabric::Message& m = req->mutable_messages()->at(0); 33 | faabric::Message result = plannerCli.getMessageResult(m, 30000); 34 | REQUIRE(result.returnvalue() == 0); 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /tests/test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | file(GLOB_RECURSE TEST_FILES ${CMAKE_CURRENT_LIST_DIR} test_*.cpp) 2 | 3 | add_executable( 4 | faabric_tests 5 | main.cpp 6 | ${TEST_FILES} 7 | ) 8 | 9 | target_include_directories(faabric_tests PUBLIC 10 | ${CMAKE_CURRENT_SOURCE_DIR} 11 | ) 12 | 13 | target_link_libraries(faabric_tests PRIVATE 14 | faabric::test_utils 15 | faabric::common_dependencies 16 | ) 17 | 18 | add_test(NAME faabric_tests COMMAND "tests/test/faabric_tests") 19 | -------------------------------------------------------------------------------- /tests/test/main.cpp: -------------------------------------------------------------------------------- 1 | #define CATCH_CONFIG_RUNNER 2 | 3 | // Disable catch signal catching to avoid interfering with dirty tracking 4 | #define CATCH_CONFIG_NO_POSIX_SIGNALS 1 5 | 6 | #include 7 | 8 | #include "faabric_utils.h" 9 | 10 | #include 11 | #include 12 | #include 13 | 14 | FAABRIC_CATCH_LOGGER 15 | 16 | int main(int argc, char* argv[]) 17 | { 18 | faabric::util::setUpCrashHandler(); 19 | 20 | faabric::util::setTestMode(true); 21 | faabric::util::initLogging(); 22 | 23 | int result = Catch::Session().run(argc, argv); 24 | 25 | fflush(stdout); 26 | return result; 27 | } 28 | -------------------------------------------------------------------------------- /tests/test/transport/test_message.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "faabric_utils.h" 4 | 5 | #include 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | using namespace faabric::transport; 13 | 14 | namespace tests { 15 | 16 | TEST_CASE("Test message moving", "[transport]") 17 | { 18 | size_t msgSize = 100; 19 | 20 | faabric::transport::Message m(msgSize); 21 | uint8_t* dataPtr = m.udata().data(); 22 | 23 | // Set some data 24 | dataPtr[0] = 1; 25 | dataPtr[1] = 2; 26 | dataPtr[2] = 3; 27 | 28 | // Create moved copy 29 | faabric::transport::Message mB(std::move(m)); 30 | 31 | // Check we can still access the pointer 32 | REQUIRE(mB.udata().data() == dataPtr); 33 | 34 | // Check the old message is no longer accessible 35 | REQUIRE(m.udata().data() == nullptr); 36 | 37 | REQUIRE(dataPtr[0] == 1); 38 | REQUIRE(dataPtr[1] == 2); 39 | REQUIRE(dataPtr[2] == 3); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /tests/test/util/test_barrier.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "faabric_utils.h" 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | #include 10 | #include 11 | 12 | using namespace faabric::util; 13 | 14 | namespace tests { 15 | TEST_CASE("Test barrier operation", "[util]") 16 | { 17 | int nThreads = 5; 18 | int nSums = 1000; 19 | std::vector> sums(nSums); 20 | 21 | std::atomic completionCount = 0; 22 | 23 | // Shared barrier between all threads 24 | auto b = 25 | Barrier::create(nThreads, [&completionCount] { completionCount++; }); 26 | 27 | // Have n-1 threads iterating through sums, adding, then waiting on the 28 | // barrier 29 | std::vector threads; 30 | for (int i = 1; i < nThreads; i++) { 31 | threads.emplace_back([nSums, &b, &sums]() { 32 | for (int s = 0; s < nSums; s++) { 33 | sums.at(s).fetch_add(s + 1); 34 | b->wait(); 35 | } 36 | }); 37 | } 38 | 39 | for (int s = 0; s < nSums; s++) { 40 | b->wait(); 41 | 42 | REQUIRE(sums.at(s) == (nThreads - 1) * (s + 1)); 43 | 44 | // Only the latest completion function should have run 45 | REQUIRE(completionCount == s + 1); 46 | } 47 | 48 | for (auto& t : threads) { 49 | if (t.joinable()) { 50 | t.join(); 51 | } 52 | } 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /tests/test/util/test_environment.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "faabric_utils.h" 4 | 5 | #include 6 | #include 7 | 8 | #include 9 | 10 | using namespace faabric::util; 11 | 12 | namespace tests { 13 | 14 | TEST_CASE("Test default environment variables", "[util]") 15 | { 16 | std::string key = "JUNK_VAR"; 17 | 18 | // Sanity check for null pointer when env var not set 19 | char const* val = getenv(key.c_str()); 20 | REQUIRE(val == nullptr); 21 | 22 | REQUIRE(getEnvVar(key, "blah") == "blah"); 23 | } 24 | 25 | TEST_CASE("Test setting environment variables", "[util]") 26 | { 27 | unsetEnvVar("MY_VAR"); 28 | 29 | // Sanity check for empty string when env var set to empty 30 | char* currentValue = getenv("MY_VAR"); 31 | REQUIRE(currentValue == nullptr); 32 | 33 | REQUIRE(getEnvVar("MY_VAR", "alpha") == "alpha"); 34 | 35 | // Check original is returned when resetting 36 | const std::string original = setEnvVar("MY_VAR", "beta"); 37 | REQUIRE(original == ""); 38 | 39 | const std::string original2 = setEnvVar("MY_VAR", "gamma"); 40 | REQUIRE(original2 == "beta"); 41 | } 42 | 43 | TEST_CASE("Test overriding CPU count", "[util]") 44 | { 45 | // Check default cores is same as usable cores 46 | auto& conf = getSystemConfig(); 47 | unsigned int defaultCores = getUsableCores(); 48 | REQUIRE(defaultCores == getUsableCores()); 49 | 50 | // Check it can be overridden 51 | conf.overrideCpuCount = 1234; 52 | REQUIRE(getUsableCores() == 1234); 53 | 54 | // Reset the conf 55 | conf.reset(); 56 | 57 | // Check we're back to the default 58 | REQUIRE(getUsableCores() == defaultCores); 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /tests/test/util/test_files.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | using namespace faabric::util; 8 | 9 | namespace tests { 10 | 11 | TEST_CASE("Test writing to a file", "[util]") 12 | { 13 | std::string dummyFile = "/tmp/faasmTest1.txt"; 14 | 15 | // Write to the file 16 | std::vector bytesIn = { 0, 1, 2, 10, 20 }; 17 | faabric::util::writeBytesToFile(dummyFile, bytesIn); 18 | 19 | // Read in 20 | std::vector actual = faabric::util::readFileToBytes(dummyFile); 21 | 22 | // Check they match 23 | REQUIRE(actual == bytesIn); 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /tests/test/util/test_gids.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | #include 9 | 10 | using namespace faabric::util; 11 | 12 | namespace tests { 13 | TEST_CASE("Test multithreaded gid generation", "[util]") 14 | { 15 | // Generate gids 16 | int nThreads = 10; 17 | int nLoops = 1000; 18 | int nValues = nThreads * nLoops; 19 | 20 | std::vector generated; 21 | std::mutex mx; 22 | std::vector threads(nThreads); 23 | for (int i = 0; i < nThreads; i++) { 24 | threads.emplace_back(std::jthread([&generated, &mx, nLoops] { 25 | for (int j = 0; j < nLoops; j++) { 26 | faabric::util::UniqueLock lock(mx); 27 | generated.push_back(faabric::util::generateGid()); 28 | } 29 | })); 30 | } 31 | 32 | for (auto& t : threads) { 33 | if (t.joinable()) { 34 | t.join(); 35 | } 36 | } 37 | 38 | REQUIRE(generated.size() == nValues); 39 | 40 | // Check that there are no duplicates (if there's a problem there should 41 | // reliably be several) 42 | std::unordered_set uniques; 43 | for (auto g : generated) { 44 | if (uniques.count(g) > 0) { 45 | SPDLOG_ERROR("Found duplicate gid - {}", g); 46 | FAIL(); 47 | } else { 48 | uniques.insert(g); 49 | } 50 | } 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /tests/test/util/test_latch.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "faabric_utils.h" 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | #include 10 | #include 11 | 12 | using namespace faabric::util; 13 | 14 | namespace tests { 15 | TEST_CASE("Test latch operation", "[util]") 16 | { 17 | auto l = Latch::create(3); 18 | 19 | auto t1 = std::jthread([l] { l->wait(); }); 20 | auto t2 = std::jthread([l] { l->wait(); }); 21 | 22 | l->wait(); 23 | 24 | if (t1.joinable()) { 25 | t1.join(); 26 | } 27 | 28 | if (t2.joinable()) { 29 | t2.join(); 30 | } 31 | 32 | REQUIRE_THROWS(l->wait()); 33 | } 34 | 35 | TEST_CASE("Test latch timeout", "[util]") 36 | { 37 | int timeoutMs = 500; 38 | auto l = Latch::create(2, timeoutMs); 39 | REQUIRE_THROWS(l->wait()); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /tests/test/util/test_network.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | using namespace faabric::util; 6 | 7 | namespace tests { 8 | TEST_CASE("Test getting primary IP", "[util]") 9 | { 10 | // Can only really check this is not empty 11 | std::string ipA = getPrimaryIPForThisHost(""); 12 | REQUIRE(!ipA.empty()); 13 | 14 | // Test IP on junk interface is empty 15 | std::string ipB = getPrimaryIPForThisHost("foobar"); 16 | REQUIRE(ipB.empty()); 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /tests/test/util/test_periodic_thread.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "faabric_utils.h" 4 | 5 | #include 6 | #include 7 | 8 | #include 9 | #include 10 | 11 | using namespace faabric::util; 12 | 13 | namespace tests { 14 | 15 | class DummyPeriodicThread : public PeriodicBackgroundThread 16 | { 17 | public: 18 | DummyPeriodicThread(std::shared_ptr barrierIn) 19 | : barrier(barrierIn) 20 | {} 21 | 22 | void doWork() override 23 | { 24 | isRunning.store(true); 25 | workCount++; 26 | barrier->wait(); 27 | } 28 | 29 | void tidyUp() override { isRunning.store(false); } 30 | 31 | int getWorkCount() { return workCount.load(); } 32 | 33 | std::atomic isRunning = false; 34 | 35 | private: 36 | std::shared_ptr barrier; 37 | 38 | std::atomic workCount = 0; 39 | }; 40 | 41 | TEST_CASE("Test periodic background operation", "[util]") 42 | { 43 | int intervalSeconds = 1; 44 | 45 | auto b = Barrier::create(2); 46 | 47 | DummyPeriodicThread t(b); 48 | REQUIRE(t.getWorkCount() == 0); 49 | 50 | // Start and wait on the barrier twice 51 | t.start(intervalSeconds); 52 | b->wait(); 53 | REQUIRE(t.getWorkCount() == 1); 54 | REQUIRE(t.isRunning.load()); 55 | 56 | b->wait(); 57 | REQUIRE(t.getWorkCount() == 2); 58 | 59 | // Stop the thread 60 | t.stop(); 61 | REQUIRE(!t.isRunning.load()); 62 | 63 | // Check the count again 64 | REQUIRE(t.getWorkCount() == 2); 65 | } 66 | 67 | TEST_CASE( 68 | "Test periodic background thread does not start with non-positive interval", 69 | "[util]") 70 | { 71 | int intervalSeconds = 0; 72 | 73 | auto b = Barrier::create(2); 74 | 75 | DummyPeriodicThread t(b); 76 | REQUIRE(t.getWorkCount() == 0); 77 | 78 | // Start and wait for the interval 79 | t.start(intervalSeconds); 80 | SLEEP_MS(intervalSeconds * 1000); 81 | REQUIRE(!t.isRunning.load()); 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /tests/test/util/test_random.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | #include 6 | 7 | using namespace faabric::util; 8 | 9 | namespace tests { 10 | TEST_CASE("Test random string generation", "[util]") 11 | { 12 | std::string actualA = randomString(100); 13 | REQUIRE(actualA.size() == 100); 14 | 15 | std::string actualB = randomString(100); 16 | REQUIRE(actualB.size() == 100); 17 | 18 | REQUIRE(actualA != actualB); 19 | } 20 | 21 | TEST_CASE("Test random value from set", "[random]") 22 | { 23 | std::unordered_set s; 24 | 25 | // Should return empty string if nothing 26 | REQUIRE(faabric::util::randomStringFromSet(s).empty()); 27 | 28 | s.insert("foo"); 29 | s.insert("bar"); 30 | s.insert("baz"); 31 | s.insert("qux"); 32 | 33 | std::unordered_set actual; 34 | for (int i = 0; i < 1000; i++) { 35 | actual.insert(faabric::util::randomStringFromSet(s)); 36 | } 37 | 38 | REQUIRE(actual.size() == 4); 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /tests/test/util/test_state.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | using namespace faabric::util; 6 | 7 | namespace tests { 8 | 9 | TEST_CASE("Test creating key for user", "[util]") 10 | { 11 | REQUIRE(faabric::util::keyForUser("foo", "bar") == "foo_bar"); 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /tests/utils/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | add_library(faabric_test_utils 3 | DummyExecutor.cpp 4 | DummyExecutorFactory.cpp 5 | exec_graph_utils.cpp 6 | http_utils.cpp 7 | message_utils.cpp 8 | planner_utils.cpp 9 | scheduling_utils.cpp 10 | ) 11 | 12 | target_compile_options(faabric_test_utils PUBLIC -fno-omit-frame-pointer) 13 | target_link_options(faabric_test_utils PUBLIC -Wl,--export-dynamic) 14 | 15 | target_include_directories(faabric_test_utils PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) 16 | 17 | target_link_libraries(faabric_test_utils PUBLIC 18 | faabric::common_dependencies 19 | faabric::faabric 20 | curl 21 | Catch2::Catch2 22 | ) 23 | 24 | add_library(faabric::test_utils ALIAS faabric_test_utils) 25 | -------------------------------------------------------------------------------- /tests/utils/DummyExecutor.cpp: -------------------------------------------------------------------------------- 1 | #include "DummyExecutor.h" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #define SHORT_SLEEP_MS 50 10 | 11 | namespace faabric::executor { 12 | 13 | DummyExecutor::DummyExecutor(faabric::Message& msg) 14 | : Executor(msg) 15 | {} 16 | 17 | int32_t DummyExecutor::executeTask( 18 | int threadPoolIdx, 19 | int msgIdx, 20 | std::shared_ptr req) 21 | { 22 | 23 | faabric::Message& msg = req->mutable_messages()->at(msgIdx); 24 | SPDLOG_DEBUG("DummyExecutor executing task {}", msg.id()); 25 | 26 | msg.set_outputdata(fmt::format("DummyExecutor executed {}", msg.id())); 27 | 28 | // Make sure the executor stays busy and cannot accept another task while 29 | // the scheduler is executing its logic. TSan tests are sensitive to this. 30 | SLEEP_MS(SHORT_SLEEP_MS); 31 | 32 | return 0; 33 | } 34 | 35 | std::span DummyExecutor::getMemoryView() 36 | { 37 | return {}; 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /tests/utils/DummyExecutor.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace faabric::executor { 6 | 7 | class DummyExecutor final : public Executor 8 | { 9 | public: 10 | DummyExecutor(faabric::Message& msg); 11 | 12 | protected: 13 | int32_t executeTask( 14 | int threadPoolIdx, 15 | int msgIdx, 16 | std::shared_ptr req) override; 17 | 18 | std::span getMemoryView() override; 19 | }; 20 | 21 | } 22 | -------------------------------------------------------------------------------- /tests/utils/DummyExecutorFactory.cpp: -------------------------------------------------------------------------------- 1 | #include "DummyExecutorFactory.h" 2 | #include "DummyExecutor.h" 3 | 4 | #include 5 | 6 | namespace faabric::executor { 7 | 8 | std::shared_ptr DummyExecutorFactory::createExecutor( 9 | faabric::Message& msg) 10 | { 11 | return std::make_shared(msg); 12 | } 13 | 14 | int DummyExecutorFactory::getFlushCount() 15 | { 16 | return flushCount; 17 | } 18 | 19 | void DummyExecutorFactory::flushHost() 20 | { 21 | flushCount++; 22 | } 23 | 24 | void DummyExecutorFactory::reset() 25 | { 26 | flushCount = 0; 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /tests/utils/DummyExecutorFactory.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace faabric::executor { 6 | 7 | class DummyExecutorFactory : public ExecutorFactory 8 | { 9 | public: 10 | void reset(); 11 | 12 | int getFlushCount(); 13 | 14 | std::shared_ptr createExecutor(faabric::Message& msg) override; 15 | 16 | protected: 17 | void flushHost() override; 18 | 19 | private: 20 | int flushCount = 0; 21 | }; 22 | } 23 | -------------------------------------------------------------------------------- /tests/utils/exec_graph_utils.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "faabric_utils.h" 4 | 5 | #include 6 | 7 | using namespace faabric::util; 8 | 9 | namespace tests { 10 | void checkExecGraphNodeEquality(const ExecGraphNode& nodeA, 11 | const ExecGraphNode& nodeB) 12 | { 13 | // Check the message itself 14 | checkMessageEquality(nodeA.msg, nodeB.msg); 15 | 16 | if (nodeA.children.size() != nodeB.children.size()) { 17 | FAIL(fmt::format("Children not same size: {} vs {}", 18 | nodeA.children.size(), 19 | nodeB.children.size())); 20 | } 21 | 22 | // Assume children are in same order 23 | for (int i = 0; i < nodeA.children.size(); i++) { 24 | checkExecGraphNodeEquality(nodeA.children.at(i), nodeB.children.at(i)); 25 | } 26 | } 27 | 28 | void checkExecGraphEquality(const ExecGraph& graphA, const ExecGraph& graphB) 29 | { 30 | checkExecGraphNodeEquality(graphA.rootNode, graphB.rootNode); 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /tests/utils/message_utils.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "faabric_utils.h" 4 | 5 | namespace tests { 6 | void checkMessageEquality(const faabric::Message& msgA, 7 | const faabric::Message& msgB) 8 | { 9 | REQUIRE(msgA.id() == msgB.id()); 10 | REQUIRE(msgA.type() == msgB.type()); 11 | 12 | REQUIRE(msgA.user() == msgB.user()); 13 | REQUIRE(msgA.function() == msgB.function()); 14 | REQUIRE(msgA.executedhost() == msgB.executedhost()); 15 | 16 | REQUIRE(msgA.starttimestamp() == msgB.starttimestamp()); 17 | REQUIRE(msgA.snapshotkey() == msgB.snapshotkey()); 18 | REQUIRE(msgA.funcptr() == msgB.funcptr()); 19 | 20 | REQUIRE(msgA.pythonuser() == msgB.pythonuser()); 21 | REQUIRE(msgA.pythonfunction() == msgB.pythonfunction()); 22 | REQUIRE(msgA.pythonentry() == msgB.pythonentry()); 23 | REQUIRE(msgA.ispython() == msgB.ispython()); 24 | 25 | REQUIRE(msgA.returnvalue() == msgB.returnvalue()); 26 | 27 | REQUIRE(msgA.inputdata() == msgB.inputdata()); 28 | REQUIRE(msgA.outputdata() == msgB.outputdata()); 29 | 30 | REQUIRE(msgA.resultkey() == msgB.resultkey()); 31 | REQUIRE(msgA.statuskey() == msgB.statuskey()); 32 | 33 | REQUIRE(msgA.ismpi() == msgB.ismpi()); 34 | REQUIRE(msgA.mpiworldid() == msgB.mpiworldid()); 35 | REQUIRE(msgA.mpirank() == msgB.mpirank()); 36 | REQUIRE(msgA.mpiworldsize() == msgB.mpiworldsize()); 37 | 38 | REQUIRE(msgA.cmdline() == msgB.cmdline()); 39 | 40 | REQUIRE(msgA.recordexecgraph() == msgB.recordexecgraph()); 41 | checkMessageMapEquality(msgA.execgraphdetails(), msgB.execgraphdetails()); 42 | checkMessageMapEquality(msgA.intexecgraphdetails(), 43 | msgB.intexecgraphdetails()); 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /tests/utils/scheduling_utils.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "faabric_utils.h" 4 | 5 | namespace tests { 6 | 7 | void checkSchedulingDecisionEquality( 8 | const faabric::batch_scheduler::SchedulingDecision& decisionA, 9 | const faabric::batch_scheduler::SchedulingDecision& decisionB) 10 | { 11 | REQUIRE(decisionA.appId == decisionB.appId); 12 | REQUIRE(decisionA.nFunctions == decisionB.nFunctions); 13 | REQUIRE(decisionA.messageIds == decisionB.messageIds); 14 | REQUIRE(decisionA.hosts == decisionB.hosts); 15 | REQUIRE(decisionA.appIdxs == decisionB.appIdxs); 16 | REQUIRE(decisionA.returnHost == decisionB.returnHost); 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /thread-sanitizer-ignorelist.txt: -------------------------------------------------------------------------------- 1 | # Config only changes in tests, and in places where being slightly racy doesn't matter 2 | race:faabric::util::SystemConfig::* 3 | # Catch2 allocates in its signal handler, this prevents showing the wrong crash report 4 | signal:* 5 | 6 | # Tsan doesn't see through the readerwriterqueue's semaphore implementation 7 | race:moodycamel::BlockingReaderWriterCircularBuffer* 8 | 9 | # TODO: Remove: There's something weird going on with MPI code I don't understand 10 | race:faabric::mpi::MpiWorld::* 11 | 12 | # Race in ReaderWriterQueue 13 | race:moodycamel::* 14 | --------------------------------------------------------------------------------