├── mosec ├── py.typed ├── mixin │ ├── __init__.py │ ├── numbin_worker.py │ ├── msgpack_worker.py │ ├── typed_worker.py │ ├── plasma_worker.py │ └── redis_worker.py ├── __init__.py ├── utils.py ├── env.py ├── errors.py └── protocol.py ├── docs ├── source │ ├── _static │ │ └── .gitkeep │ ├── development │ │ ├── contributing.md │ │ └── index.md │ ├── robots.txt │ ├── license.md │ ├── reference │ │ ├── arguments.md │ │ ├── index.md │ │ ├── interface.md │ │ ├── migration.md │ │ └── concept.md │ ├── examples │ │ ├── rerank.md │ │ ├── embedding.md │ │ ├── index.md │ │ ├── stable_diffusion.md │ │ ├── echo.md │ │ ├── compression.md │ │ ├── env.md │ │ ├── jax.md │ │ ├── validate.md │ │ ├── ipc.md │ │ ├── metric.md │ │ ├── multi_route.md │ │ └── pytorch.md │ ├── index.md │ └── conf.py ├── Makefile └── make.bat ├── .github ├── CODEOWNERS ├── ISSUE_TEMPLATE │ ├── config.yml │ ├── feature_request.yaml │ └── bug_report.yaml ├── release.yml ├── workflows │ ├── links.yml │ ├── deny.yml │ ├── label.yml │ ├── nightly.yml │ ├── page.yml │ ├── check.yml │ ├── codeql.yml │ └── package.yml └── dependabot.yml ├── rust-toolchain.toml ├── lychee.toml ├── examples ├── monitor │ ├── mosec_datasource.yml │ ├── mosec_dashboard.yml │ ├── dockerfile │ ├── prometheus.yml │ ├── docker-compose.yml │ ├── README.md │ └── python_side_metrics.py ├── README.md ├── __init__.py ├── stable_diffusion │ ├── build.envd │ ├── client.py │ └── server.py ├── embedding │ ├── client.py │ └── server.py ├── type_validation │ ├── client.py │ └── server.py ├── jax_single_layer │ ├── client.py │ └── server.py ├── rerank │ ├── client.py │ └── server.py ├── resnet50_msgpack │ ├── client.py │ └── server.py ├── server_side_event │ ├── client.py │ └── server.py ├── multi_route │ ├── client.py │ └── server.py ├── custom_env.py ├── segment │ ├── client.py │ └── server.py ├── echo.py ├── shm_ipc │ ├── redis.py │ └── plasma_legacy.py └── distil_bert_server_pytorch.py ├── rustfmt.toml ├── .devcontainer ├── devcontainer.json └── Dockerfile ├── CITATION.cff ├── tests ├── __init__.py ├── services │ ├── __init__.py │ ├── mixin_typed_service.py │ ├── square_service.py │ ├── mixin_numbin_service.py │ ├── sse_service.py │ ├── bad_service.py │ ├── timeout_service.py │ ├── multi_route_service.py │ ├── openapi_service.py │ └── mixin_ipc_shm_service.py ├── test_utils.py ├── test_log.py ├── mock_socket.py ├── bad_req.py ├── test_protocol.py └── utils.py ├── .pre-commit-config.yaml ├── CONTRIBUTING.md ├── .readthedocs.yaml ├── src ├── errors.rs ├── apidoc.rs ├── config.rs ├── layouts.rs ├── metrics.rs └── main.rs ├── Cargo.toml ├── Dockerfile ├── Makefile ├── .gitignore └── pyproject.toml /mosec/py.typed: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/source/_static/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | * @kemingy 2 | -------------------------------------------------------------------------------- /docs/source/development/contributing.md: -------------------------------------------------------------------------------- 1 | ```{include} ../../../CONTRIBUTING.md 2 | ``` 3 | -------------------------------------------------------------------------------- /rust-toolchain.toml: -------------------------------------------------------------------------------- 1 | [toolchain] 2 | channel = "stable" 3 | components = ["rustfmt", "clippy"] 4 | -------------------------------------------------------------------------------- /docs/source/robots.txt: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | 3 | Sitemap: https://mosecorg.github.io/mosec/sitemap.xml 4 | -------------------------------------------------------------------------------- /docs/source/license.md: -------------------------------------------------------------------------------- 1 | --- 2 | orphan: true 3 | --- 4 | 5 | # License 6 | 7 | ```{include} ../../LICENSE 8 | ``` 9 | -------------------------------------------------------------------------------- /lychee.toml: -------------------------------------------------------------------------------- 1 | accept = "100..=103, 200..=208, 403" 2 | scheme = ["https", "http", "mailto"] 3 | exclude_loopback = true 4 | -------------------------------------------------------------------------------- /docs/source/development/index.md: -------------------------------------------------------------------------------- 1 | # Development 2 | 3 | ```{toctree} 4 | --- 5 | hidden: 6 | --- 7 | 8 | contributing 9 | ``` 10 | 11 | - {doc}`contributing` 12 | -------------------------------------------------------------------------------- /examples/monitor/mosec_datasource.yml: -------------------------------------------------------------------------------- 1 | apiVersion: 1 2 | 3 | datasources: 4 | - name: 'Prometheus' 5 | type: prometheus 6 | access: proxy 7 | url: prometheus:9090 -------------------------------------------------------------------------------- /docs/source/reference/arguments.md: -------------------------------------------------------------------------------- 1 | # CLI Arguments 2 | 3 | ```shell 4 | python echo.py --help 5 | ``` 6 | 7 | ```{program-output} python ../../examples/echo.py --help 8 | ``` 9 | -------------------------------------------------------------------------------- /examples/README.md: -------------------------------------------------------------------------------- 1 | All the examples in this section are self-contained and tested. 2 | 3 | See https://mosecorg.github.io/mosec/examples/index.html for detailed explanations. 4 | -------------------------------------------------------------------------------- /examples/monitor/mosec_dashboard.yml: -------------------------------------------------------------------------------- 1 | apiVersion: 1 2 | 3 | providers: 4 | - name: 'Mosec Dashboards' 5 | type: file 6 | options: 7 | path: /etc/grafana/provisioning/dashboards 8 | -------------------------------------------------------------------------------- /rustfmt.toml: -------------------------------------------------------------------------------- 1 | reorder_imports = true 2 | 3 | # unstable 4 | unstable_features = true 5 | format_strings = true 6 | imports_granularity = "Module" 7 | group_imports = "StdExternalCrate" 8 | reorder_impl_items = true 9 | -------------------------------------------------------------------------------- /docs/source/reference/index.md: -------------------------------------------------------------------------------- 1 | # Reference 2 | 3 | ```{toctree} 4 | --- 5 | hidden: 6 | --- 7 | 8 | arguments 9 | interface 10 | concept 11 | migration 12 | ``` 13 | 14 | - {doc}`arguments` 15 | - {doc}`interface` 16 | - {doc}`concept` 17 | - {doc}`migration` 18 | -------------------------------------------------------------------------------- /examples/monitor/dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.9-slim-buster 2 | 3 | COPY python_side_metrics.py /root/python_side_metrics.py 4 | 5 | RUN pip install -U mosec prometheus_client 6 | 7 | ENTRYPOINT ["python", "/root/python_side_metrics.py"] 8 | 9 | EXPOSE 8000 5000 10 | -------------------------------------------------------------------------------- /examples/monitor/prometheus.yml: -------------------------------------------------------------------------------- 1 | global: 2 | scrape_interval: 1s 3 | evaluation_interval: 1s 4 | 5 | scrape_configs: 6 | - job_name: mosec_rust 7 | static_configs: 8 | - targets: ['mosec:8000'] 9 | - job_name: mosec_python 10 | static_configs: 11 | - targets: ['mosec:5000'] 12 | - job_name: prometheus 13 | static_configs: 14 | - targets: ['prometheus:9090'] 15 | -------------------------------------------------------------------------------- /.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "mosec", 3 | "build": { "dockerfile": "Dockerfile" }, 4 | "extensions": [ 5 | "rust-lang.rust-analyzer", 6 | "ms-python.python" 7 | ], 8 | "remoteUser": "dev", 9 | "updateRemoteUserUID": true, 10 | "remoteEnv": { "PATH": "${containerEnv:PATH}:/home/dev/.local/bin" }, 11 | "settings": { 12 | "editor.formatOnSave": true, 13 | "files.exclude": { 14 | "**/LICENSE": true 15 | } 16 | } 17 | } -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: false 2 | 3 | contact_links: 4 | - name: Have you read the docs? 5 | url: https://mosecorg.github.io/mosec/ 6 | about: Much help can be found in the docs 7 | - name: Ask a question 8 | url: https://github.com/mosecorg/mosec/discussions/new/choose 9 | about: Ask a question or start a discussion 10 | - name: Chat on Discord 11 | url: https://discord.gg/Jq5vxuH69W 12 | about: Maybe chatting with the community can help 13 | -------------------------------------------------------------------------------- /.devcontainer/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.11 2 | 3 | ENV TERM xterm-256color 4 | 5 | RUN useradd -p "" -u 1000 -s /bin/bash -m dev 6 | 7 | RUN apt-get update && apt-get install -y git ripgrep && \ 8 | curl --proto '=https' --tlsv1.2 -sSf https://starship.rs/install.sh | sh -s -- -y && \ 9 | printf 'eval "$(starship init bash)"' >> /etc/bash.bashrc && \ 10 | rm -rf /tmp/* /var/lib/apt/lists/* 11 | 12 | USER dev 13 | 14 | RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y 15 | 16 | WORKDIR /home/dev 17 | -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | cff-version: 1.2.0 2 | message: "If you use this software, please cite it as below." 3 | authors: 4 | - family-names: "Yang" 5 | given-names: "Keming" 6 | orcid: "https://orcid.org/0000-0002-1351-2342" 7 | - family-names: "Liu" 8 | given-names: "Zichen" 9 | orcid: "https://orcid.org/0000-0001-9451-8625" 10 | - family-names: "Cheng" 11 | given-names: "Philip" 12 | title: "MOSEC: Model Serving made Efficient in the Cloud" 13 | url: "https://github.com/mosecorg/mosec" 14 | type: software 15 | date-released: 2021-09-27 16 | -------------------------------------------------------------------------------- /docs/source/examples/rerank.md: -------------------------------------------------------------------------------- 1 | # Cross-Encoder model for reranking 2 | 3 | This example shows how to use a cross-encoder model to rerank a list of passages based on a query. This is useful for hybrid search that combines multiple retrieval results. 4 | 5 | 6 | ## Server 7 | 8 | ```bash 9 | python examples/rerank/server.py 10 | ``` 11 | 12 | ```{include} ../../../examples/rerank/server.py 13 | :code: python 14 | ``` 15 | 16 | ## Client 17 | 18 | ```bash 19 | python examples/rerank/client.py 20 | ``` 21 | 22 | ```{include} ../../../examples/rerank/client.py 23 | :code: python 24 | ``` 25 | -------------------------------------------------------------------------------- /.github/release.yml: -------------------------------------------------------------------------------- 1 | changelog: 2 | exclude: 3 | labels: 4 | - dependencies 5 | authors: 6 | - dependabot 7 | categories: 8 | - title: Changes 🛠 9 | labels: 10 | - bug_fix 11 | - title: Exciting New Features 🎉 12 | labels: 13 | - enhancement 14 | - title: More Documentation 📚 15 | labels: 16 | - documentation 17 | - title: Refactoring 🧬 18 | labels: 19 | - refactoring 20 | - title: Minor changes 🧹 21 | labels: 22 | - chore 23 | - title: Others 🔔 24 | labels: 25 | - "*" 26 | -------------------------------------------------------------------------------- /docs/source/reference/interface.md: -------------------------------------------------------------------------------- 1 | # Interface 2 | 3 | ## Server 4 | 5 | ```{eval-rst} 6 | .. automodule:: mosec.server 7 | :members: 8 | ``` 9 | 10 | ## Worker 11 | 12 | ```{eval-rst} 13 | .. automodule:: mosec.worker 14 | :members: 15 | ``` 16 | 17 | ## Runtime 18 | 19 | ```{eval-rst} 20 | .. automodule:: mosec.runtime 21 | :members: Runtime 22 | ``` 23 | 24 | ## Errors 25 | 26 | ```{eval-rst} 27 | .. automodule:: mosec.errors 28 | :members: 29 | :show-inheritance: 30 | ``` 31 | 32 | ## Mixins 33 | 34 | ```{eval-rst} 35 | .. automodule:: mosec.mixin 36 | :members: 37 | :show-inheritance: 38 | ``` 39 | -------------------------------------------------------------------------------- /examples/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 MOSEC Authors 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 MOSEC Authors 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /tests/services/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 MOSEC Authors 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /.github/workflows/links.yml: -------------------------------------------------------------------------------- 1 | name: Links 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | paths: 8 | - '.github/workflows/links.yml' 9 | - '**.md' 10 | - '**.html' 11 | pull_request: 12 | paths: 13 | - '.github/workflows/links.yml' 14 | - '**.md' 15 | - '**.html' 16 | workflow_dispatch: 17 | 18 | jobs: 19 | linkChecker: 20 | runs-on: ubuntu-latest 21 | steps: 22 | - uses: actions/checkout@v6 23 | 24 | - name: Link Checker 25 | uses: lycheeverse/lychee-action@v2 26 | with: 27 | fail: true 28 | args: --verbose --no-progress --format detailed . 29 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: v6.0.0 4 | hooks: 5 | - id: check-docstring-first 6 | - id: check-merge-conflict 7 | - id: check-yaml 8 | - id: end-of-file-fixer 9 | - id: trailing-whitespace 10 | - repo: local 11 | hooks: 12 | - id: make-lint 13 | name: Lint 14 | entry: make lint semantic_lint 15 | language: system 16 | types: [python, rust] 17 | pass_filenames: false 18 | always_run: true 19 | - repo: https://github.com/crate-ci/typos 20 | rev: v1.35.5 21 | hooks: 22 | - id: typos 23 | -------------------------------------------------------------------------------- /examples/stable_diffusion/build.envd: -------------------------------------------------------------------------------- 1 | # syntax=v1 2 | 3 | 4 | def basic(): 5 | install.cuda(version="11.6.2") 6 | install.python() 7 | install.python_packages( 8 | name=[ 9 | "torch --extra-index-url https://download.pytorch.org/whl/cu116", 10 | "diffusers[torch]", 11 | "transformers", 12 | "accelerate", 13 | "msgpack", 14 | "mosec", 15 | ] 16 | ) 17 | 18 | 19 | def build(): 20 | base(dev=True) 21 | basic() 22 | runtime.expose(envd_port=8000, host_port=8000, service="sd") 23 | 24 | 25 | def serving(): 26 | basic() 27 | io.copy("server.py", "/") 28 | config.entrypoint(["python", "server.py", "--timeout", "30000"]) 29 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= -W --keep-going 7 | SPHINXBUILD ?= uv run sphinx-build 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/source/examples/embedding.md: -------------------------------------------------------------------------------- 1 | # OpenAI compatible embedding service 2 | 3 | This example shows how to create an embedding service that is compatible with the [OpenAI API](https://platform.openai.com/docs/api-reference/embeddings). 4 | 5 | In this example, we use the embedding model from [Hugging Face LeaderBoard](https://huggingface.co/spaces/mteb/leaderboard). 6 | 7 | 8 | ## Server 9 | 10 | ```bash 11 | EMB_MODEL=thenlper/gte-base python examples/embedding/server.py 12 | ``` 13 | 14 | ```{include} ../../../examples/embedding/server.py 15 | :code: python 16 | ``` 17 | 18 | ## Client 19 | 20 | ```bash 21 | EMB_MODEL=thenlper/gte-base python examples/embedding/client.py 22 | ``` 23 | 24 | ```{include} ../../../examples/embedding/client.py 25 | :code: python 26 | ``` 27 | -------------------------------------------------------------------------------- /docs/source/examples/index.md: -------------------------------------------------------------------------------- 1 | # Examples 2 | 3 | ```{toctree} 4 | --- 5 | hidden: 6 | --- 7 | 8 | echo 9 | embedding 10 | env 11 | jax 12 | ipc 13 | metric 14 | multi_route 15 | pytorch 16 | rerank 17 | stable_diffusion 18 | validate 19 | compression 20 | ``` 21 | 22 | We provide examples across different ML frameworks and for various tasks in this section. 23 | 24 | ## Requirements 25 | 26 | All the examples in this section are self-contained and tested. Feel free to grab one and run: 27 | 28 | ```shell 29 | python model_server.py 30 | ``` 31 | 32 | To test the server, we use [`httpie`](https://github.com/httpie/httpie) and [`httpx`](https://github.com/encode/httpx) by default. You can have other choices but if you want to install them: 33 | 34 | ```shell 35 | pip install httpie httpx 36 | ``` 37 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to `Mosec` 2 | 3 | Before contributing to this repository, please first discuss the change you wish to make via issue, email, or any other method with the owners of this repository before making a change. 4 | 5 | ## Pull Request Process 6 | 7 | 1. After you have forked this repository, you could use `make install` for *the first time* to install the local development dependencies. 8 | 2. Before committing your changes, you can use `make format && make lint` to ensure the codes follow our style standards. 9 | 3. Please add corresponding tests to your change if that's related to new feature or API, and ensure `make test` can pass. 10 | 4. Submit your pull request. 11 | 12 | ## Contacts 13 | 14 | - [Keming](mailto:kemingy94@gmail.com) 15 | - [zclzc](mailto:lkevinzc@gmail.com) 16 | -------------------------------------------------------------------------------- /docs/source/examples/stable_diffusion.md: -------------------------------------------------------------------------------- 1 | # Stable Diffusion 2 | 3 | This example provides a demo service for stable diffusion. You can develop this in the container environment by using [envd](https://github.com/tensorchord/envd): `envd up -p examples/stable_diffusion`. 4 | 5 | You should be able to try this demo under the `mosec/examples/stable_diffusion/` directory. 6 | 7 | ## Server 8 | 9 | ```shell 10 | envd build -t sd:serving 11 | docker run --rm --gpus all -p 8000:8000 sd:serving 12 | ``` 13 | 14 | ```{include} ../../../examples/stable_diffusion/server.py 15 | :code: python 16 | ``` 17 | 18 | ```shell 19 | python server.py --timeout 30000 20 | ``` 21 | 22 | ## Client 23 | 24 | ```shell 25 | python client.py --prompt "a cute cat site on the basketball" 26 | ``` 27 | 28 | ```{include} ../../../examples/stable_diffusion/client.py 29 | :code: python 30 | ``` 31 | -------------------------------------------------------------------------------- /docs/source/examples/echo.md: -------------------------------------------------------------------------------- 1 | # Echo Example 2 | 3 | An echo server is usually the very first server you wanna implement to get familiar with the framework. 4 | 5 | This server sleeps for a given period and return. It is a simple illustration of how **multi-stage workload** is implemented. It also shows how to write a simple **validation** for input data. 6 | 7 | The default JSON protocol will be used since the (de)serialization methods are not overridden in this demo. In particular, the input `data` of `Preprocess`'s `forward` is a dictionary decoded by JSON from the request body's bytes; and the output dictionary of `Postprocess`'s `forward` will be JSON-encoded as a mirrored process. 8 | 9 | ## **`echo.py`** 10 | 11 | ```{include} ../../../examples/echo.py 12 | :code: python 13 | ``` 14 | 15 | ## Start 16 | 17 | ```shell 18 | python echo.py 19 | ``` 20 | 21 | ## Test 22 | 23 | ```shell 24 | http :8000/inference time=1.5 25 | ``` 26 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | 13 | %SPHINXBUILD% >NUL 2>NUL 14 | if errorlevel 9009 ( 15 | echo. 16 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 17 | echo.installed, then set the SPHINXBUILD environment variable to point 18 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 19 | echo.may add the Sphinx directory to PATH. 20 | echo. 21 | echo.If you don't have Sphinx installed, grab it from 22 | echo.https://www.sphinx-doc.org/ 23 | exit /b 1 24 | ) 25 | 26 | if "%1" == "" goto help 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/source/index.md: -------------------------------------------------------------------------------- 1 | # Machine Learning Model Serving made Efficient in the Cloud 2 | 3 | ```{eval-rst} 4 | .. meta:: 5 | :description lang=en: 6 | MOSEC is a high-performance serving framework for ML models, offers dynamic batching and multi-stage pipeline to fully exploit your compute machine. 7 | ``` 8 | 9 | ```{note} 10 | {doc}`mosec is licensed under the Apache-2 `. 11 | ``` 12 | 13 | ```{include} ../../README.md 14 | ``` 15 | 16 | ```{toctree} 17 | --- 18 | maxdepth: 2 19 | hidden: 20 | caption: User Guide 21 | --- 22 | reference/index 23 | examples/index 24 | ``` 25 | 26 | ```{toctree} 27 | --- 28 | maxdepth: 2 29 | hidden: 30 | caption: Dev Guide 31 | --- 32 | development/index 33 | ``` 34 | 35 | ```{toctree} 36 | --- 37 | hidden: 38 | caption: Project Links 39 | --- 40 | 41 | GitHub 42 | Discord 43 | ``` 44 | 45 | ## Indices and tables 46 | 47 | - {ref}`genindex` 48 | -------------------------------------------------------------------------------- /docs/source/examples/compression.md: -------------------------------------------------------------------------------- 1 | # Compression 2 | 3 | This example demonstrates how to use the `--compression` feature for segmentation tasks. We use the example from the [Segment Anything Model 2](https://github.com/facebookresearch/sam2/blob/main/notebooks/image_predictor_example.ipynb). The request includes an image and its low resolution mask, the response is the final mask. Since there are lots of duplicate values in the mask, we can use `gzip` or `zstd` to compress it. 4 | 5 | ## Server 6 | 7 | ```shell 8 | python examples/segment/server.py --compression 9 | ``` 10 | 11 |
12 | segment.py 13 | 14 | ```{include} ../../../examples/segment/server.py 15 | :code: python 16 | ``` 17 | 18 |
19 | 20 | ## Client 21 | 22 | ```shell 23 | python examples/segment/client.py 24 | ``` 25 | 26 |
27 | segment.py 28 | 29 | ```{include} ../../../examples/segment/client.py 30 | :code: python 31 | ``` 32 | 33 |
34 | -------------------------------------------------------------------------------- /examples/monitor/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | 3 | services: 4 | 5 | mosec: 6 | build: . 7 | container_name: mosec 8 | ports: 9 | - "8000:8000" 10 | - "5000:5000" 11 | 12 | prometheus: 13 | image: prom/prometheus:v2.30.0 14 | container_name: prometheus 15 | ports: 16 | - "9090:9090" 17 | volumes: 18 | - ./prometheus.yml:/etc/prometheus/prometheus.yml 19 | command: 20 | - --config.file=/etc/prometheus/prometheus.yml 21 | restart: always 22 | depends_on: 23 | - mosec 24 | 25 | grafana: 26 | image: grafana/grafana:8.2.2 27 | container_name: grafana 28 | ports: 29 | - "3000:3000" 30 | volumes: 31 | - ./mosec_datasource.yml:/etc/grafana/provisioning/datasources/mosec_datasource.yml 32 | - ./mosec_dashboard.yml:/etc/grafana/provisioning/dashboards/mosec_dashboard.yml 33 | - ./mosec_dashboard.json:/etc/grafana/provisioning/dashboards/mosec_dashboard.json 34 | restart: always 35 | depends_on: 36 | - prometheus 37 | -------------------------------------------------------------------------------- /examples/embedding/client.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 MOSEC Authors 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """OpenAI embedding client example.""" 16 | 17 | import os 18 | 19 | from openai import Client 20 | 21 | DEFAULT_MODEL = "thenlper/gte-base" 22 | 23 | client = Client(api_key="fake", base_url="http://127.0.0.1:8000/") 24 | emb = client.embeddings.create( 25 | model=os.getenv("EMB_MODEL", DEFAULT_MODEL), 26 | input="Hello world!", 27 | ) 28 | print(emb.data[0].embedding) # type: ignore 29 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.yaml: -------------------------------------------------------------------------------- 1 | name: Feature Request 2 | description: Feature request for mosec 3 | labels: ["enhancement"] 4 | title: "feat: " 5 | body: 6 | - type: markdown 7 | attributes: 8 | value: | 9 | Thanks for taking the time to fill out this feature request! 10 | - type: textarea 11 | attributes: 12 | label: Describe the feature 13 | description: A clear and concise description of what the feature is. 14 | validations: 15 | required: true 16 | - type: textarea 17 | attributes: 18 | label: Why do you need this feature? 19 | description: A clear and concise description of why you need this feature. 20 | validations: 21 | required: false 22 | - type: textarea 23 | attributes: 24 | label: Additional context 25 | description: Add any other context about the problem here. 26 | validations: 27 | required: false 28 | - type: markdown 29 | attributes: 30 | value: | 31 | Love this enhancement proposal? Give it a 👍. We prioritise the proposals with the most 👍. 32 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # Read the Docs configuration file 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 3 | 4 | # Required 5 | version: 2 6 | 7 | # Set the OS, Python version and other tools you might need 8 | build: 9 | os: ubuntu-22.04 10 | tools: 11 | python: "3.11" 12 | # You can also specify other tool versions: 13 | # rust: "1.70" 14 | 15 | # Build documentation in the "docs/" directory with Sphinx 16 | sphinx: 17 | configuration: docs/source/conf.py 18 | 19 | # Optionally build your docs in additional formats such as PDF and ePub 20 | formats: 21 | - pdf 22 | - epub 23 | 24 | # Optional but recommended, declare the Python requirements required 25 | # to build your documentation 26 | # See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html 27 | # https://docs.readthedocs.io/en/stable/config-file/v2.html#python-install 28 | python: 29 | install: 30 | - requirements: requirements/doc.txt 31 | - requirements: requirements/mixin.txt 32 | - requirements: requirements/dev.txt 33 | - method: pip 34 | path: . 35 | -------------------------------------------------------------------------------- /examples/type_validation/client.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 MOSEC Authors 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from http import HTTPStatus 16 | 17 | import httpx 18 | import msgspec 19 | 20 | req = { 21 | "bin": b"hello mosec", 22 | "name": "type check", 23 | } 24 | 25 | resp = httpx.post( 26 | "http://127.0.0.1:8000/inference", content=msgspec.msgpack.encode(req) 27 | ) 28 | if resp.status_code == HTTPStatus.OK: 29 | print(f"OK: {msgspec.msgpack.decode(resp.content)}") 30 | else: 31 | print(f"err[{resp.status_code}] {resp.text}") 32 | -------------------------------------------------------------------------------- /src/errors.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2022 MOSEC Authors 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | use crate::tasks::TaskCode; 16 | 17 | #[derive(Debug, derive_more::From, derive_more::Display, derive_more::Error)] 18 | pub(crate) enum ServiceError { 19 | #[display("inference timeout")] 20 | Timeout, 21 | 22 | #[display("too many request: task queue is full")] 23 | TooManyRequests, 24 | 25 | #[display("mosec unknown error")] 26 | UnknownError, 27 | 28 | #[display("SSE inference error: {_0}")] 29 | SSEError(TaskCode), 30 | } 31 | -------------------------------------------------------------------------------- /mosec/mixin/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 MOSEC Authors 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Provide useful mixin to extend MOSEC.""" 16 | 17 | from mosec.mixin.msgpack_worker import MsgpackMixin 18 | from mosec.mixin.numbin_worker import NumBinIPCMixin 19 | from mosec.mixin.plasma_worker import PlasmaShmIPCMixin 20 | from mosec.mixin.redis_worker import RedisShmIPCMixin 21 | from mosec.mixin.typed_worker import TypedMsgPackMixin 22 | 23 | __all__ = [ 24 | "MsgpackMixin", 25 | "NumBinIPCMixin", 26 | "PlasmaShmIPCMixin", 27 | "RedisShmIPCMixin", 28 | "TypedMsgPackMixin", 29 | ] 30 | -------------------------------------------------------------------------------- /.github/workflows/deny.yml: -------------------------------------------------------------------------------- 1 | name: Cargo Deny 2 | 3 | on: 4 | workflow_dispatch: 5 | schedule: 6 | - cron: '30 10 * * 1' # Every Monday at 10:30 AM UTC 7 | pull_request: 8 | paths: 9 | - 'Cargo.toml' 10 | - 'Cargo.lock' 11 | - '.github/workflows/deny.yml' 12 | - 'deny.toml' 13 | merge_group: 14 | 15 | concurrency: 16 | group: ${{ github.ref }}-${{ github.workflow }} 17 | cancel-in-progress: true 18 | 19 | jobs: 20 | deny: 21 | runs-on: ubuntu-latest 22 | env: 23 | CARGO_TERM_COLOR: always 24 | VERSION: 0.18.9 25 | CMD: "cargo-deny" 26 | DIR: "/tmp/cargo-deny" 27 | steps: 28 | - uses: actions/checkout@v6 29 | - name: Set up Rust 30 | uses: dtolnay/rust-toolchain@stable 31 | - name: Install Deny 32 | run: | 33 | mkdir -p $DIR 34 | curl -L -o $DIR/archive.tar.gz https://github.com/EmbarkStudios/$CMD/releases/download/$VERSION/$CMD-$VERSION-x86_64-unknown-linux-musl.tar.gz 35 | tar -xzvf $DIR/archive.tar.gz --strip-components=1 -C $DIR 36 | - name: Deny 37 | run: $DIR/$CMD -L warn check bans licenses advisories --show-stats 38 | -------------------------------------------------------------------------------- /examples/jax_single_layer/client.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 MOSEC Authors 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Example: Client of the Jax server.""" 15 | 16 | import random 17 | from http import HTTPStatus 18 | 19 | import httpx 20 | 21 | input_data = [random.randint(-99, 99), random.randint(-99, 99), random.randint(-99, 99)] 22 | print("Client : sending data : ", input_data) 23 | 24 | prediction = httpx.post( 25 | "http://127.0.0.1:8000/inference", 26 | json={"array": input_data}, 27 | ) 28 | if prediction.status_code == HTTPStatus.OK: 29 | print(prediction.json()) 30 | else: 31 | print(prediction.status_code, prediction.json()) 32 | -------------------------------------------------------------------------------- /docs/source/examples/env.md: -------------------------------------------------------------------------------- 1 | # Customized GPU Allocation 2 | 3 | This is an example demonstrating how to give different worker processes customized environment variables to control things like GPU device allocation, etc. 4 | 5 | Assume your machine has 4 GPUs, and you hope to deploy your model to all of them to handle inference requests in parallel, maximizing your service's throughput. With MOSEC, we provide parallel workers with customized environment variables to satisfy the needs. 6 | 7 | As shown in the codes below, we can define our inference worker together with a list of environment variable dictionaries, each of which will be passed to the corresponding worker process. For example, if we set `CUDA_VISIBLE_DEVICES` to `0-3`, (the same copy of) our model will be deployed on 4 different GPUs and be queried in parallel, largely improving the system's throughput. You could verify this either from the server logs or the client response. 8 | 9 | ## **`custom_env.py`** 10 | 11 | ```{include} ../../../examples/custom_env.py 12 | :code: python 13 | ``` 14 | 15 | ## Start 16 | 17 | ```shell 18 | python custom_env.py 19 | ``` 20 | 21 | ## Test 22 | 23 | ```shell 24 | http :8000/inference dummy=0 25 | ``` 26 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # To get started with Dependabot version updates, you'll need to specify which 2 | # package ecosystems to update and where the package manifests are located. 3 | # Please see the documentation for all configuration options: 4 | # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates 5 | 6 | version: 2 7 | updates: 8 | - package-ecosystem: "cargo" # See documentation for possible values 9 | directory: "/" # Location of package manifests 10 | schedule: 11 | interval: "monthly" 12 | commit-message: 13 | prefix: "chore(cargo)" 14 | groups: 15 | all-crates: 16 | patterns: 17 | - "*" 18 | 19 | - package-ecosystem: "pip" 20 | directory: "/" 21 | schedule: 22 | interval: "monthly" 23 | commit-message: 24 | prefix: "chore(pip)" 25 | groups: 26 | all-pips: 27 | patterns: 28 | - "*" 29 | 30 | - package-ecosystem: "github-actions" 31 | directory: "/" 32 | schedule: 33 | interval: "monthly" 34 | commit-message: 35 | prefix: "chore(actions)" 36 | groups: 37 | all-actions: 38 | patterns: 39 | - "*" 40 | -------------------------------------------------------------------------------- /examples/rerank/client.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 MOSEC Authors 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from http import HTTPStatus 16 | 17 | import httpx 18 | import msgspec 19 | 20 | req = { 21 | "query": "talk is cheap, show me the code", 22 | "docs": [ 23 | "what a nice day", 24 | "life is short, use python", 25 | "early bird catches the worm", 26 | ], 27 | } 28 | 29 | resp = httpx.post( 30 | "http://127.0.0.1:8000/inference", content=msgspec.msgpack.encode(req) 31 | ) 32 | if resp.status_code == HTTPStatus.OK: 33 | print(f"OK: {msgspec.msgpack.decode(resp.content)}") 34 | else: 35 | print(f"err[{resp.status_code}] {resp.text}") 36 | -------------------------------------------------------------------------------- /examples/resnet50_msgpack/client.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 MOSEC Authors 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Example: Sample Resnet client.""" 15 | 16 | from http import HTTPStatus 17 | 18 | import httpx 19 | import msgpack # type: ignore 20 | 21 | dog_bytes = httpx.get( 22 | "https://raw.githubusercontent.com/pytorch/hub/master/images/dog.jpg" 23 | ).content 24 | 25 | 26 | prediction = httpx.post( 27 | "http://127.0.0.1:8000/inference", 28 | content=msgpack.packb({"image": dog_bytes}), 29 | ) 30 | if prediction.status_code == HTTPStatus.OK: 31 | print(msgpack.unpackb(prediction.content)) 32 | else: 33 | print(prediction.status_code, prediction.content) 34 | -------------------------------------------------------------------------------- /tests/services/mixin_typed_service.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 MOSEC Authors 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Test MsgPack mixin.""" 16 | 17 | from typing import Any, List 18 | 19 | from msgspec import Struct 20 | 21 | from mosec import Server, Worker 22 | from mosec.mixin import TypedMsgPackMixin 23 | 24 | 25 | class Request(Struct): 26 | media: str 27 | binary: bytes 28 | 29 | 30 | class Inference(TypedMsgPackMixin, Worker): 31 | def forward(self, data: List[Request]) -> Any: 32 | return [len(req.binary) for req in data] 33 | 34 | 35 | if __name__ == "__main__": 36 | server = Server() 37 | server.append_worker(Inference, max_batch_size=4) 38 | server.run() 39 | -------------------------------------------------------------------------------- /mosec/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 MOSEC Authors 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """MOSEC is a machine learning model serving framework.""" 16 | 17 | from mosec.errors import ( 18 | ClientError, 19 | DecodingError, 20 | EncodingError, 21 | ServerError, 22 | ValidationError, 23 | ) 24 | from mosec.log import get_logger 25 | from mosec.runtime import Runtime 26 | from mosec.server import Server 27 | from mosec.worker import SSEWorker, Worker 28 | 29 | __all__ = [ 30 | "ClientError", 31 | "DecodingError", 32 | "EncodingError", 33 | "Runtime", 34 | "SSEWorker", 35 | "Server", 36 | "ServerError", 37 | "ValidationError", 38 | "Worker", 39 | "get_logger", 40 | ] 41 | -------------------------------------------------------------------------------- /docs/source/examples/jax.md: -------------------------------------------------------------------------------- 1 | # Jax jitted inference 2 | 3 | This example shows how to utilize the [Jax framework](https://github.com/google/jax) to build a just-in-time (JIT) compiled inference server. You could install Jax following their official guide and you also need `chex` to run this example (`pip install -U chex`). 4 | 5 | We use a single layer neural network for this minimal example. You could also experiment the speedup of JIT by setting the environment variable `USE_JIT=true` and observe the latency difference. Note that in the `__init__` of the worker we set the `self.multi_examples` as a list of example inputs to warmup, because different batch sizes will trigger re-jitting when they are traced for the first time. 6 | 7 | ## Server 8 | 9 | ```shell 10 | USE_JIT=true python examples/jax_single_layer/server.py 11 | ``` 12 | 13 | <details> 14 | <summary>jax_single_layer.py</summary> 15 | 16 | ```{include} ../../../examples/jax_single_layer/server.py 17 | :code: python 18 | ``` 19 | 20 | </details> 21 | 22 | ## Client 23 | 24 | ```shell 25 | python examples/jax_single_layer/client.py 26 | ``` 27 | 28 | <details> 29 | <summary>jax_single_layer_cli.py</summary> 30 | 31 | ```{include} ../../../examples/jax_single_layer/client.py 32 | :code: python 33 | ``` 34 | 35 | </details> 36 | -------------------------------------------------------------------------------- /tests/services/square_service.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 MOSEC Authors 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Test basic `forward` logic for single/concurrency request.""" 16 | 17 | from typing import List 18 | 19 | from mosec import Server, Worker 20 | from mosec.errors import ValidationError 21 | 22 | 23 | class SquareService(Worker): 24 | def forward(self, data: List[dict]) -> List[dict]: 25 | try: 26 | result = [{"x": int(req["x"]) ** 2} for req in data] 27 | except KeyError as err: 28 | raise ValidationError(err) from err 29 | return result 30 | 31 | 32 | if __name__ == "__main__": 33 | server = Server() 34 | server.append_worker(SquareService, max_batch_size=8) 35 | server.run() 36 | -------------------------------------------------------------------------------- /examples/server_side_event/client.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 MOSEC Authors 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import httpx 16 | from httpx_sse import connect_sse 17 | 18 | with ( 19 | httpx.Client() as client, 20 | connect_sse( 21 | client, "POST", "http://127.0.0.1:8000/inference", json={"text": "mosec"} 22 | ) as event_source, 23 | ): 24 | for sse in event_source.iter_sse(): 25 | print(f"Event({sse.event}): {sse.data}") 26 | 27 | # error handling 28 | with ( 29 | httpx.Client() as client, 30 | connect_sse( 31 | client, "POST", "http://127.0.0.1:8000/inference", json={"error": "mosec"} 32 | ) as event_source, 33 | ): 34 | for sse in event_source.iter_sse(): 35 | print(f"Event({sse.event}): {sse.data}") 36 | -------------------------------------------------------------------------------- /.github/workflows/label.yml: -------------------------------------------------------------------------------- 1 | name: Label 2 | on: 3 | pull_request_target: 4 | types: [opened, edited] 5 | 6 | # make sure you have the following labels: 7 | # [documentation, enhancement, bug_fix, refactoring, chore] 8 | jobs: 9 | pr_label: 10 | name: PR label 11 | permissions: 12 | pull-requests: write 13 | contents: read 14 | runs-on: ubuntu-latest 15 | env: 16 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 17 | PR: ${{ github.event.number }} 18 | steps: 19 | - uses: actions/checkout@v6 20 | - run: gh pr edit $PR --remove-label documentation --remove-label enhancement --remove-label bug_fix --remove-label refactoring --remove-label chore 21 | - run: gh pr edit $PR --add-label documentation 22 | if: ${{ startsWith(github.event.pull_request.title, 'doc') }} 23 | - run: gh pr edit $PR --add-label enhancement 24 | if: ${{ startsWith(github.event.pull_request.title, 'feat') }} 25 | - run: gh pr edit $PR --add-label bug_fix 26 | if: ${{ startsWith(github.event.pull_request.title, 'fix') }} 27 | - run: gh pr edit $PR --add-label refactoring 28 | if: ${{ startsWith(github.event.pull_request.title, 'refact') }} 29 | - run: gh pr edit $PR --add-label chore 30 | if: ${{ startsWith(github.event.pull_request.title, 'chore') }} 31 | 32 | -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 MOSEC Authors 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Test util functions.""" 16 | 17 | from typing import List 18 | 19 | from msgspec import Struct 20 | 21 | from mosec import Worker 22 | from mosec.utils import ParseTarget, parse_func_type 23 | 24 | 25 | class Request(Struct): 26 | name: str 27 | 28 | 29 | class Demo(Worker): 30 | def forward(self, data: Request): 31 | pass 32 | 33 | def batch_forward(self, data: List[Request]): 34 | pass 35 | 36 | 37 | def test_parse_forward_input_type(): 38 | demo = Demo() 39 | 40 | single = parse_func_type(demo.forward, ParseTarget.INPUT) 41 | assert single is Request, single 42 | 43 | batch = parse_func_type(demo.batch_forward, ParseTarget.INPUT) 44 | assert batch is Request, batch 45 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.yaml: -------------------------------------------------------------------------------- 1 | name: Bug Report 2 | description: Bug report for mosec 3 | labels: ["bug"] 4 | title: "bug: <title>" 5 | body: 6 | - type: markdown 7 | attributes: 8 | value: | 9 | Thanks for taking the time to fill out this bug report! 10 | - type: textarea 11 | attributes: 12 | label: Describe the bug 13 | description: A clear and concise description of what the bug is. 14 | validations: 15 | required: true 16 | - type: textarea 17 | attributes: 18 | label: To Reproduce 19 | description: Steps to reproduce the behavior. 20 | validations: 21 | required: true 22 | - type: textarea 23 | attributes: 24 | label: Expected behavior 25 | description: A clear and concise description of what you expected to happen. 26 | validations: 27 | required: false 28 | - type: textarea 29 | attributes: 30 | label: The mosec version 31 | description: The output of `pip show mosec` command. 32 | validations: 33 | required: true 34 | - type: textarea 35 | attributes: 36 | label: Additional context 37 | description: Add any other context about the problem here. 38 | validations: 39 | required: false 40 | - type: markdown 41 | attributes: 42 | value: | 43 | Impacted by this bug? Give it a 👍. We prioritise the issues with the most 👍. 44 | -------------------------------------------------------------------------------- /tests/test_log.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 MOSEC Authors 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Test multiprocessing logging configuration.""" 16 | 17 | import logging 18 | 19 | from mosec.log import get_log_level, get_logger, set_logger 20 | from tests.utils import env_context 21 | 22 | 23 | def test_get_logger(): 24 | with env_context(): 25 | logger = get_logger() 26 | assert logger.level == logging.INFO 27 | 28 | with env_context(MOSEC_LOG_LEVEL="warning"): 29 | set_logger(get_log_level()) 30 | logger = get_logger() 31 | assert logger.level == logging.WARNING 32 | 33 | # `--debug` has higher priority 34 | with env_context(MOSEC_DEBUG="true", MOSEC_LOG_LEVEL="warning"): 35 | set_logger(get_log_level()) 36 | logger = get_logger() 37 | assert logger.level == logging.DEBUG 38 | -------------------------------------------------------------------------------- /docs/source/examples/validate.md: -------------------------------------------------------------------------------- 1 | # Validate Request 2 | 3 | This example shows how to use the `TypedMsgPackMixin` to validate the request with the help of [`msgspec`](https://github.com/jcrist/msgspec). 4 | 5 | Request validation can provide the following benefits: 6 | 7 | - The client can know the exact expected data schema from the type definition. 8 | - Validation failure will return the details of the failure reason to help the client debug. 9 | - Ensure that the service is working on the correct data without fear. 10 | 11 | First of all, define the request type with `msgspec.Struct` like: 12 | 13 | ```python 14 | class Request(msgspec.Struct): 15 | media: str 16 | binary: bytes 17 | ``` 18 | 19 | Then, apply the `TypedMsgPackMixin` mixin and add the type you defined to the annotation of `forward(self, data)`: 20 | 21 | ```python 22 | class Inference(TypedMsgPackMixin, Worker): 23 | def forward(self, data: Request): 24 | pass 25 | ``` 26 | 27 | ```{note} 28 | If you are using dynamic **batch** inference as the first stage, just use the `List[Request]` as the annotation. 29 | ``` 30 | 31 | You can check the full demo code below. 32 | 33 | ## Server 34 | 35 | ```{include} ../../../examples/type_validation/server.py 36 | :code: python 37 | ``` 38 | 39 | ## Client 40 | 41 | ```{include} ../../../examples/type_validation/client.py 42 | :code: python 43 | ``` 44 | 45 | ## Test 46 | 47 | ```shell 48 | python client.py 49 | ``` 50 | -------------------------------------------------------------------------------- /tests/services/mixin_numbin_service.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 MOSEC Authors 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Test NumBin IPC mixin.""" 16 | 17 | from typing import Dict, List 18 | 19 | import numpy as np 20 | 21 | from mosec import Server, Worker 22 | from mosec.mixin import NumBinIPCMixin 23 | 24 | 25 | class Preprocess(NumBinIPCMixin, Worker): 26 | def forward(self, data: Dict[str, str]) -> np.ndarray: 27 | num = int(data.get("num", 10)) 28 | arr = np.ones(num) * (1 / num) 29 | return arr 30 | 31 | 32 | class Inference(NumBinIPCMixin, Worker): 33 | def forward(self, data: List[np.ndarray]) -> List[str]: 34 | res = ["equal" if np.equal(1, arr.sum()) else "unequal" for arr in data] 35 | return res 36 | 37 | 38 | if __name__ == "__main__": 39 | server = Server() 40 | server.append_worker(Preprocess) 41 | server.append_worker(Inference, max_batch_size=8) 42 | server.run() 43 | -------------------------------------------------------------------------------- /.github/workflows/nightly.yml: -------------------------------------------------------------------------------- 1 | name: Nightly Test 2 | 3 | on: 4 | schedule: 5 | - cron: '30 7 * * *' 6 | pull_request: 7 | paths: 8 | - '.github/workflows/nightly.yml' 9 | - 'mosec/**' 10 | - 'src/**' 11 | - 'pyproject.toml' 12 | - 'Cargo.lock' 13 | - 'Cargo.toml' 14 | - 'tests/bad_req.py' 15 | push: 16 | branches: 17 | - main 18 | paths: 19 | - '.github/workflows/nightly.yml' 20 | - 'mosec/**' 21 | - 'src/**' 22 | - 'pyproject.toml' 23 | - 'Cargo.lock' 24 | - 'Cargo.toml' 25 | - 'tests/bad_req.py' 26 | workflow_dispatch: 27 | 28 | concurrency: 29 | group: ${{ github.ref }}-${{ github.workflow }} 30 | cancel-in-progress: true 31 | 32 | env: 33 | SCCACHE_GHA_ENABLED: "true" 34 | RUSTC_WRAPPER: "sccache" 35 | 36 | jobs: 37 | test: 38 | name: "stressful bad requests test" 39 | runs-on: ${{ matrix.os }} 40 | timeout-minutes: 45 41 | strategy: 42 | matrix: 43 | os: [ubuntu-latest] 44 | 45 | steps: 46 | - uses: actions/checkout@v6 47 | - name: Install uv 48 | uses: astral-sh/setup-uv@v7 49 | with: 50 | enable-cache: true 51 | - name: Set up Rust 52 | uses: dtolnay/rust-toolchain@stable 53 | - name: Run sccache-cache 54 | uses: mozilla-actions/sccache-action@v0.0.9 55 | - name: Install dependencies 56 | run: make install_py 57 | - name: Test 58 | run: make test_chaos 59 | -------------------------------------------------------------------------------- /examples/multi_route/client.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 MOSEC Authors 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import json 16 | from http import HTTPStatus 17 | 18 | import httpx 19 | import msgpack # type: ignore 20 | 21 | typed_req = { 22 | "bin": b"hello mosec with type check", 23 | "name": "type check", 24 | } 25 | 26 | print(">> requesting for the typed route with msgpack serde") 27 | resp = httpx.post( 28 | "http://127.0.0.1:8000/v1/inference", content=msgpack.packb(typed_req) 29 | ) 30 | if resp.status_code == HTTPStatus.OK: 31 | print(f"OK: {msgpack.unpackb(resp.content)}") 32 | else: 33 | print(f"err[{resp.status_code}] {resp.text}") 34 | 35 | print(">> requesting for the untyped route with json serde") 36 | resp = httpx.post("http://127.0.0.1:8000/inference", content=b"hello mosec") 37 | if resp.status_code == HTTPStatus.OK: 38 | print(f"OK: {json.loads(resp.content)}") 39 | else: 40 | print(f"err[{resp.status_code}] {resp.text}") 41 | -------------------------------------------------------------------------------- /examples/server_side_event/server.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 MOSEC Authors 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from mosec import Server, SSEWorker, ValidationError, Worker, get_logger 16 | 17 | logger = get_logger() 18 | 19 | 20 | class Preprocess(Worker): 21 | def forward(self, data): 22 | text = data.get("text") 23 | if text is None: 24 | raise ValidationError("text is required") 25 | return text 26 | 27 | 28 | class Inference(SSEWorker): 29 | def forward(self, data): 30 | epoch = 5 31 | for i in range(epoch): 32 | for j in range(len(data)): 33 | self.send_stream_event( 34 | f"inference: ({i + 1}/{epoch}) {data[j]}", index=j 35 | ) 36 | 37 | # this return value will be ignored 38 | return data 39 | 40 | 41 | if __name__ == "__main__": 42 | server = Server() 43 | server.append_worker(Preprocess) 44 | server.append_worker(Inference, max_batch_size=2) 45 | server.run() 46 | -------------------------------------------------------------------------------- /tests/services/sse_service.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 MOSEC Authors 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Test Server-Sent Event.""" 16 | 17 | from mosec import Server, SSEWorker, ValidationError, Worker, get_logger 18 | 19 | logger = get_logger() 20 | EPOCH = 5 21 | 22 | 23 | class Preprocess(Worker): 24 | def forward(self, data): 25 | text = data.get("text") 26 | if text is None: 27 | raise ValidationError("text is required") 28 | return text 29 | 30 | 31 | class Inference(SSEWorker): 32 | def forward(self, data): 33 | for _ in range(EPOCH): 34 | # pylint: disable=consider-using-enumerate 35 | for j in range(len(data)): 36 | self.send_stream_event(f"{data[j]}", index=j) 37 | 38 | # this return value will be ignored 39 | return data 40 | 41 | 42 | if __name__ == "__main__": 43 | server = Server() 44 | server.append_worker(Preprocess) 45 | server.append_worker(Inference, max_batch_size=2) 46 | server.run() 47 | -------------------------------------------------------------------------------- /examples/stable_diffusion/client.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 MOSEC Authors 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import argparse 16 | from http import HTTPStatus 17 | 18 | import httpx 19 | import msgpack # type: ignore 20 | 21 | parser = argparse.ArgumentParser( 22 | prog="stable diffusion client demo", 23 | ) 24 | parser.add_argument( 25 | "-p", "--prompt", default="a photo of an astronaut riding a horse on mars" 26 | ) 27 | parser.add_argument( 28 | "-o", "--output", default="stable_diffusion_result.jpg", help="output filename" 29 | ) 30 | parser.add_argument( 31 | "--port", 32 | default=8000, 33 | type=int, 34 | help="service port", 35 | ) 36 | 37 | 38 | args = parser.parse_args() 39 | resp = httpx.post( 40 | f"http://127.0.0.1:{args.port}/inference", 41 | content=msgpack.packb(args.prompt), 42 | timeout=httpx.Timeout(20), 43 | ) 44 | if resp.status_code == HTTPStatus.OK: 45 | data = msgpack.unpackb(resp.content) 46 | with open(args.output, "wb") as f: 47 | f.write(data) 48 | else: 49 | print(f"ERROR: <{resp.status_code}> {resp.text}") 50 | -------------------------------------------------------------------------------- /docs/source/examples/ipc.md: -------------------------------------------------------------------------------- 1 | # Shared Memory IPC 2 | 3 | This is an example demonstrating how you can enable the plasma shared memory store or customize your own IPC wrapper. 4 | 5 | Mosec's multi-stage pipeline requires the output data from the previous stage to be transferred to the next stage across python processes. This is coordinated via Unix domain socket between every Python worker process from all stages and the Rust controller process. 6 | 7 | By default, we serialize the data and directly transfer the bytes over the socket. However, users may find wrapping this IPC useful or more efficient for specific use cases. Therefore, we provide an example implementation `PlasmaShmIPCMixin` based on [`pyarrow.plasma`](https://arrow.apache.org/docs/11.0/python/plasma.html) and `RedisShmIPCMixin` based on [`redis`](https://pypi.org/project/redis). We recommend using `RedisShmWrapper` for better performance and longer-lasting updates. 8 | 9 | ```{warning} 10 | `plasma` is deprecated. Please use Redis instead. 11 | ``` 12 | 13 | The additional subprocess can be registered as a daemon thus it will be checked by mosec regularly and trigger graceful shutdown when the daemon exits. 14 | 15 | ## **`plasma_legacy.py`** 16 | 17 | ```{include} ../../../examples/shm_ipc/plasma_legacy.py 18 | :code: python 19 | ``` 20 | ## **`redis.py`** 21 | 22 | ```{include} ../../../examples/shm_ipc/redis.py 23 | :code: python 24 | ``` 25 | 26 | ## Start 27 | 28 | ```shell 29 | python examples/shm_ipc/plasma_legacy.py 30 | ``` 31 | 32 | or 33 | 34 | ```shell 35 | python examples/shm_ipc/redis.py 36 | ``` 37 | ## Test 38 | 39 | ```shell 40 | http :8000/inference size=100 41 | ``` 42 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "mosec" 3 | version = "0.9.6" 4 | authors = ["Keming <kemingy94@gmail.com>", "Zichen <lkevinzc@gmail.com>"] 5 | edition = "2024" 6 | license = "Apache-2.0" 7 | readme = "README.md" 8 | repository = "https://github.com/mosecorg/mosec" 9 | description = "Model Serving made Efficient in the Cloud." 10 | documentation = "https://docs.rs/mosec" 11 | categories = ["science"] 12 | keywords = [ 13 | "machine-learning", 14 | "deep-learning", 15 | "cloud", 16 | "model-serving", 17 | "service", 18 | ] 19 | exclude = ["target", "examples", "tests", "scripts"] 20 | rust-version = "1.85" 21 | 22 | [dependencies] 23 | bytes = "1.11" 24 | tokio = { version = "1.48", features = [ 25 | "rt", 26 | "rt-multi-thread", 27 | "time", 28 | "macros", 29 | "sync", 30 | "signal", 31 | "io-util", 32 | ] } 33 | derive_more = { version = "2.0.1", features = ["display", "error", "from"] } 34 | # MPMS that only one consumer sees each message & async 35 | async-channel = "2.5" 36 | prometheus-client = "0.24.0" 37 | axum = { version = "0.8.7", default-features = false, features = [ 38 | "matched-path", 39 | "original-uri", 40 | "query", 41 | "tokio", 42 | "http1", 43 | "http2", 44 | ] } 45 | async-stream = "0.3.6" 46 | serde = "1.0" 47 | serde_json = "1.0" 48 | utoipa = "5.4" 49 | utoipa-swagger-ui = { version = "9", features = ["axum"] } 50 | tower = "0.5.2" 51 | tower-http = { version = "0.6.7", features = [ 52 | "compression-zstd", 53 | "decompression-zstd", 54 | "compression-gzip", 55 | "decompression-gzip", 56 | ] } 57 | log = { version = "0.4.28", features = ["kv"] } 58 | logforth = { version = "0.29.1", features = ["starter-log"] } 59 | jiff = "0.2.15" 60 | -------------------------------------------------------------------------------- /docs/source/examples/metric.md: -------------------------------------------------------------------------------- 1 | # Customized Metrics 2 | 3 | This is an example demonstrating how to add your customized Python side Prometheus metrics. 4 | 5 | Mosec already has the Rust side metrics, including: 6 | 7 | * throughput for the inference endpoint 8 | * duration for each stage (including the IPC time) 9 | * batch size (only for the `max_batch_size > 1` workers) 10 | * number of remaining tasks to be processed 11 | 12 | If you need to monitor more details about the inference process, you can add some Python side metrics. E.g., the inference result distribution, the duration of some CPU-bound or GPU-bound processing, the IPC time (get from `rust_step_duration - python_step_duration`). 13 | 14 | This example has a simple WSGI app as the monitoring metrics service. In each worker process, the `Counter` will collect the inference results and export them to the metrics service. For the inference part, it parses the batch data and compares them with the average value. 15 | 16 | For more information about the multiprocess mode for the metrics, check the [Prometheus doc](https://github.com/prometheus/client_python#multiprocess-mode-eg-gunicorn). 17 | 18 | ## **`python_side_metrics.py`** 19 | 20 | ```{include} ../../../examples/monitor/python_side_metrics.py 21 | :code: python 22 | ``` 23 | 24 | ## Start 25 | 26 | ```shell 27 | python python_side_metrics.py 28 | ``` 29 | 30 | ## Test 31 | 32 | ```shell 33 | http POST :8000/inference num=1 34 | ``` 35 | 36 | ## Check the Python side metrics 37 | 38 | ```shell 39 | http :8080 40 | ``` 41 | 42 | ## Check the Rust side metrics 43 | 44 | ```shell 45 | http :8000/metrics 46 | ``` 47 | 48 | ```{include} ../../../examples/monitor/README.md 49 | ``` 50 | -------------------------------------------------------------------------------- /examples/rerank/server.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 MOSEC Authors 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from os import getenv 16 | from typing import List 17 | 18 | from msgspec import Struct 19 | from sentence_transformers import CrossEncoder # type: ignore 20 | 21 | from mosec import Server, Worker 22 | from mosec.mixin import TypedMsgPackMixin 23 | 24 | DEFAULT_MODEL = "cross-encoder/ms-marco-MiniLM-L-6-v2" 25 | WORKER_NUM = int(getenv("WORKER_NUM", default="1")) 26 | 27 | 28 | class Request(Struct, kw_only=True): 29 | query: str 30 | docs: List[str] 31 | 32 | 33 | class Response(Struct, kw_only=True): 34 | scores: List[float] 35 | 36 | 37 | class Encoder(TypedMsgPackMixin, Worker): 38 | def __init__(self): 39 | self.model_name = getenv("MODEL_NAME", default=DEFAULT_MODEL) 40 | self.model = CrossEncoder(self.model_name) 41 | 42 | def forward(self, data: Request) -> Response: 43 | scores = self.model.predict([[data.query, doc] for doc in data.docs]) 44 | return Response(scores=scores.tolist()) 45 | 46 | 47 | if __name__ == "__main__": 48 | server = Server() 49 | server.append_worker(Encoder, num=WORKER_NUM) 50 | server.run() 51 | -------------------------------------------------------------------------------- /examples/type_validation/server.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 MOSEC Authors 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Request validation example.""" 16 | 17 | from typing import Any, List 18 | 19 | from msgspec import Struct 20 | 21 | from mosec import Server, Worker 22 | from mosec.mixin import TypedMsgPackMixin 23 | 24 | 25 | class Request(Struct): 26 | """User request struct.""" 27 | 28 | # pylint: disable=too-few-public-methods 29 | 30 | bin: bytes 31 | name: str = "test" 32 | 33 | 34 | class Preprocess(TypedMsgPackMixin, Worker): 35 | """Dummy preprocess to exit early if the validation failed.""" 36 | 37 | def forward(self, data: Request) -> Any: 38 | """Input will be parse as the `Request`.""" 39 | print(f"received {data}") 40 | return data.bin 41 | 42 | 43 | class Inference(TypedMsgPackMixin, Worker): 44 | """Dummy batch inference.""" 45 | 46 | def forward(self, data: List[bytes]) -> List[int]: 47 | return [len(buf) for buf in data] 48 | 49 | 50 | if __name__ == "__main__": 51 | server = Server() 52 | server.append_worker(Preprocess) 53 | server.append_worker(Inference, max_batch_size=16) 54 | server.run() 55 | -------------------------------------------------------------------------------- /docs/source/reference/migration.md: -------------------------------------------------------------------------------- 1 | # Migration Guide 2 | 3 | This guide will help you migrate from other frameworks to `mosec`. 4 | 5 | ## From the `Triton Inference Server` 6 | 7 | Both [`PyTriton`](https://github.com/triton-inference-server/pytriton) and [`Triton Python Backend`](https://github.com/triton-inference-server/python_backend) are using [`Triton Inference Server`](https://github.com/triton-inference-server). 8 | 9 | - `mosec` doesn't require a specific client, you can use any HTTP client library 10 | - dynamic batching is configured when calling the [`append_worker`](mosec.server.Server.append_worker) 11 | - `mosec` doesn't need to declare the `inputs` and `outputs`. If you want to validate the request, you can use the [`TypedMsgPackMixin`](mosec.mixin.typed_worker.TypedMsgPackMixin) (ref [Validate Request](https://mosecorg.github.io/mosec/examples/validate.html)) 12 | 13 | ### `Triton Python Backend` 14 | 15 | - change the `TritonPythonModel` class to a worker class that inherits [`mosec.Worker`](mosec.worker.Worker) 16 | - move the `initialize` method to the `__init__` method in the new class 17 | - move the `execute` method to the `forward` method in the new class 18 | - if you still prefer to use the `auto_complete_config` method, you can merge it into the `__init__` method 19 | - `mosec` doesn't have the corresponding `finalize` method as an unloading handler 20 | - `mosec` doesn't require any special model directories or configurations 21 | - to run multiple replicas, configure the `num` in [`append_worker`](mosec.server.Server.append_worker) 22 | 23 | ### `PyTriton` 24 | 25 | - move the model loading logic to the `__init__` method, since this happens in a different process 26 | - move the `infer_func` function to the `forward` method 27 | -------------------------------------------------------------------------------- /docs/source/examples/multi_route.md: -------------------------------------------------------------------------------- 1 | # Multi-Route 2 | 3 | This example shows how to use the multi-route feature. 4 | 5 | You will need this feature if you want to: 6 | 7 | - Serve multiple models in one service on different endpoints. 8 | - i.e. register `/embedding` & `/classify` with different models 9 | - Serve one model to multiple different endpoints in one service. 10 | - i.e. register LLaMA with `/inference` and `/v1/chat/completions` to make it compatible with the OpenAI API 11 | - Share a worker in different routes 12 | - The shared worker will collect the dynamic batch from multiple previous stages. 13 | - If you want to have multiple runtimes with sharing, you can declare multiple runtime instances with the same worker class. 14 | 15 | The worker definition part is the same as for a single route. The only difference is how you register the worker with the server. 16 | 17 | Here we expose a new [concept](../reference/concept.md) called [`Runtime`](mosec.runtime.Runtime). 18 | 19 | You can create the `Runtime` and register on the server with a `{endpoint: [Runtime]}` dictionary. 20 | 21 | See the complete demo code below. This will run a service with two endpoints: 22 | 23 | - `/inference` with `Preprocess` and `Inference` 24 | - `/v1/inference` with `TypedProcess`, `Inference` and `TypedPostprocess` 25 | 26 | And the `Inference` worker is shared between the two routes. 27 | 28 | ## Server 29 | 30 | <details> 31 | <summary>multi_route_server.py</summary> 32 | 33 | ```{include} ../../../examples/multi_route/server.py 34 | :code: python 35 | ``` 36 | 37 | </details> 38 | 39 | ## Client 40 | 41 | <details> 42 | <summary>multi_route_client.py</summary> 43 | 44 | ```{include} ../../../examples/multi_route/client.py 45 | :code: python 46 | ``` 47 | 48 | </details> 49 | -------------------------------------------------------------------------------- /examples/custom_env.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 MOSEC Authors 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Example: Custom Environment setup""" 15 | 16 | import os 17 | 18 | from mosec import Server, Worker, get_logger 19 | 20 | logger = get_logger() 21 | 22 | 23 | class Inference(Worker): 24 | """Customisable inference class.""" 25 | 26 | def __init__(self): 27 | super().__init__() 28 | # initialize your models here and allocate dedicated device to it 29 | device = os.getenv("CUDA_VISIBLE_DEVICES") 30 | logger.info("initializing model on device=%s", device) 31 | 32 | def forward(self, data: dict) -> dict: 33 | device = os.getenv("CUDA_VISIBLE_DEVICES") 34 | # NOTE self.worker_id is 1-indexed 35 | logger.info("worker=%d on device=%s is processing...", self.worker_id, device) 36 | return {"device": device} 37 | 38 | 39 | if __name__ == "__main__": 40 | NUM_DEVICE = 4 41 | 42 | def _get_cuda_device(cid: int) -> dict: 43 | return {"CUDA_VISIBLE_DEVICES": str(cid)} 44 | 45 | server = Server() 46 | 47 | server.append_worker( 48 | Inference, num=NUM_DEVICE, env=[_get_cuda_device(x) for x in range(NUM_DEVICE)] 49 | ) 50 | server.run() 51 | -------------------------------------------------------------------------------- /mosec/mixin/numbin_worker.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 MOSEC Authors 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """MOSEC NumBin IPC worker mixin. 16 | 17 | Features: 18 | 19 | * deserialize IPC data with numbin 20 | * serialize IPC data with numbin 21 | 22 | Attention: numbin only supports NumPy ndarray types. 23 | """ 24 | 25 | # pylint: disable=import-outside-toplevel 26 | 27 | from typing import Any 28 | 29 | from mosec.errors import DecodingError, EncodingError 30 | 31 | 32 | class NumBinIPCMixin: 33 | """NumBin IPC worker mixin interface.""" 34 | 35 | # pylint: disable=no-self-use 36 | 37 | def serialize_ipc(self, data: Any) -> bytes: 38 | """Serialize with NumBin for the IPC.""" 39 | import numbin 40 | 41 | try: 42 | data_bytes = numbin.dumps(data) 43 | except Exception as err: 44 | raise EncodingError from err 45 | return data_bytes 46 | 47 | def deserialize_ipc(self, data: bytes) -> Any: 48 | """Deserialize with NumBin for the IPC.""" 49 | import numbin 50 | 51 | try: 52 | array = numbin.loads(data) 53 | except Exception as err: 54 | raise DecodingError from err 55 | return array 56 | -------------------------------------------------------------------------------- /examples/monitor/README.md: -------------------------------------------------------------------------------- 1 | ## How to build monitoring system for Mosec 2 | In this tutorial, we will explain how to build monitoring system for Mosec, which includes Prometheus and Grafana. 3 | 4 | ### Prerequisites 5 | Before starting, you need to have Docker and Docker Compose installed on your machine. If you don't have them installed, you can follow the instructions [get-docker](https://docs.docker.com/get-docker/) and [compose](https://docs.docker.com/compose/install/) to install them. 6 | 7 | ## Starting the monitoring system 8 | Clone the repository containing the docker-compose.yaml file: 9 | ```bash 10 | git clone https://github.com/mosecorg/mosec.git 11 | ``` 12 | 13 | Navigate to the directory containing the docker-compose.yaml file: 14 | ```bash 15 | cd mosec/examples/monitor 16 | ``` 17 | 18 | Start the monitoring system by running the following command: 19 | ```bash 20 | docker-compose up -d 21 | ``` 22 | This command will start three containers: Mosec, Prometheus, and Grafana. 23 | 24 | 25 | ## Test 26 | Run test and feed metrics to Prometheus. 27 | ```shell 28 | http POST :8000/inference num=1 29 | ``` 30 | 31 | ## Accessing Prometheus 32 | Prometheus is a monitoring and alerting system that collects metrics from Mosec. You can access the Prometheus UI by visiting http://127.0.0.1:9090 in your web browser. 33 | 34 | ## Accessing Grafana 35 | Grafana is a visualization tool for monitoring and analyzing metrics. You can access the Grafana UI by visiting http://127.0.0.1:3000 in your web browser. The default username and password are both admin. 36 | 37 | ## Stopping the monitoring system 38 | To stop the monitoring system, run the following command: 39 | 40 | ```bash 41 | docker-compose down 42 | ``` 43 | This command will stop and remove the containers created by Docker Compose. 44 | -------------------------------------------------------------------------------- /examples/segment/client.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 MOSEC Authors 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import gzip 16 | from http import HTTPStatus 17 | from io import BytesIO 18 | 19 | import httpx 20 | import msgpack # type: ignore 21 | import numbin 22 | import numpy as np 23 | from PIL import Image # type: ignore 24 | 25 | truck_image = Image.open( 26 | BytesIO( 27 | httpx.get( 28 | "https://raw.githubusercontent.com/facebookresearch/sam2/main/notebooks/images/truck.jpg" 29 | ).content 30 | ) 31 | ) 32 | array = np.array(truck_image.convert("RGB")) 33 | # assume we have obtains the low resolution mask from the previous step 34 | mask = np.zeros((256, 256)) 35 | 36 | resp = httpx.post( 37 | "http://127.0.0.1:8000/inference", 38 | content=gzip.compress( 39 | msgpack.packb( # type: ignore 40 | { 41 | "image": numbin.dumps(array), 42 | "mask": numbin.dumps(mask), 43 | "labels": [1, 1], 44 | "point_coords": [[500, 375], [1125, 625]], 45 | } 46 | ) 47 | ), 48 | headers={"Accept-Encoding": "gzip", "Content-Encoding": "gzip"}, 49 | ) 50 | assert resp.status_code == HTTPStatus.OK, resp.status_code 51 | res = numbin.loads(msgpack.loads(resp.content)) 52 | assert res.shape == array.shape[:2], f"expect {array.shape[:2]}, got {res.shape}" 53 | -------------------------------------------------------------------------------- /.github/workflows/page.yml: -------------------------------------------------------------------------------- 1 | name: Pages 2 | 3 | on: 4 | pull_request: 5 | paths: 6 | - 'mosec/**' 7 | - 'docs/**' 8 | - '.github/workflows/page.yml' 9 | - 'examples/**' 10 | - '**.md' 11 | push: 12 | branches: [ main ] 13 | paths: 14 | - 'mosec/**' 15 | - 'docs/**' 16 | - '.github/workflows/page.yml' 17 | - 'examples/**' 18 | - '**.md' 19 | # Allows you to run this workflow manually from the Actions tab 20 | workflow_dispatch: 21 | 22 | concurrency: 23 | group: ${{ github.ref }}-${{ github.workflow }} 24 | cancel-in-progress: true 25 | 26 | env: 27 | SCCACHE_GHA_ENABLED: "true" 28 | RUSTC_WRAPPER: "sccache" 29 | 30 | jobs: 31 | build: 32 | runs-on: ubuntu-latest 33 | steps: 34 | - uses: actions/checkout@v6 35 | - name: Setup Pages 36 | uses: actions/configure-pages@v5 37 | - name: Install uv 38 | uses: astral-sh/setup-uv@v7 39 | with: 40 | enable-cache: true 41 | - name: Set up Rust 42 | uses: dtolnay/rust-toolchain@stable 43 | - name: Run sccache-cache 44 | uses: mozilla-actions/sccache-action@v0.0.9 45 | - name: Install dependencies 46 | run: | 47 | make install_py 48 | - name: Generate docs 49 | run: | 50 | cd docs && make html 51 | - name: Upload artifact 52 | uses: actions/upload-pages-artifact@v4 53 | with: 54 | # Upload entire repository 55 | path: 'docs/build/html' 56 | 57 | deploy: 58 | runs-on: ubuntu-latest 59 | needs: build 60 | if: ${{ github.event_name == 'push' }} 61 | # Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages 62 | permissions: 63 | pages: write 64 | id-token: write 65 | environment: 66 | name: github-pages 67 | url: ${{ steps.deployment.outputs.page_url }} 68 | steps: 69 | - name: Deploy to GitHub Pages 70 | id: deployment 71 | uses: actions/deploy-pages@v4 72 | -------------------------------------------------------------------------------- /examples/stable_diffusion/server.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 MOSEC Authors 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from io import BytesIO 16 | from typing import List 17 | 18 | import torch # type: ignore 19 | from diffusers import StableDiffusionPipeline # type: ignore 20 | 21 | from mosec import Server, Worker, get_logger 22 | from mosec.mixin import MsgpackMixin 23 | 24 | logger = get_logger() 25 | 26 | 27 | class StableDiffusion(MsgpackMixin, Worker): 28 | def __init__(self): 29 | self.pipe = StableDiffusionPipeline.from_pretrained( 30 | "sd-legacy/stable-diffusion-v1-5", 31 | torch_dtype=torch.float16, 32 | ) 33 | self.pipe.enable_model_cpu_offload() 34 | self.example = ["useless example prompt"] * 4 # warmup (bs=4) 35 | 36 | def forward(self, data: List[str]) -> List[memoryview]: 37 | logger.debug("generate images for %s", data) 38 | res = self.pipe(data) # type: ignore 39 | logger.debug("NSFW: %s", res[1]) 40 | images = [] 41 | for img in res[0]: # type: ignore 42 | dummy_file = BytesIO() 43 | img.save(dummy_file, format="JPEG") # type: ignore 44 | images.append(dummy_file.getbuffer()) 45 | return images 46 | 47 | 48 | if __name__ == "__main__": 49 | server = Server() 50 | server.append_worker(StableDiffusion, num=1, max_batch_size=4, max_wait_time=10) 51 | server.run() 52 | -------------------------------------------------------------------------------- /examples/echo.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 MOSEC Authors 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Example: Sample structures for using mosec server.""" 15 | 16 | import time 17 | from types import MappingProxyType as ImmutableDict 18 | from typing import List 19 | 20 | from mosec import Server, ValidationError, Worker, get_logger 21 | 22 | logger = get_logger() 23 | 24 | 25 | class Preprocess(Worker): 26 | """Sample Class.""" 27 | 28 | example = ImmutableDict({"time": 0}) 29 | 30 | def forward(self, data: dict) -> float: 31 | logger.debug("pre received %s", data) 32 | # Customized, simple input validation 33 | try: 34 | count_time = float(data["time"]) 35 | except KeyError as err: 36 | raise ValidationError(f"cannot find key {err}") from err 37 | return count_time 38 | 39 | 40 | class Inference(Worker): 41 | """Sample Class.""" 42 | 43 | example = (0, 1e-5, 2e-4) 44 | 45 | def forward(self, data: List[float]) -> List[float]: 46 | logger.info("sleeping for %s seconds", max(data)) 47 | time.sleep(max(data)) 48 | return data 49 | 50 | 51 | class Postprocess(Worker): 52 | """Sample Class.""" 53 | 54 | def forward(self, data: float) -> dict: 55 | logger.debug("post received %f", data) 56 | return {"msg": f"sleep {data} seconds"} 57 | 58 | 59 | if __name__ == "__main__": 60 | server = Server() 61 | server.append_worker(Preprocess) 62 | server.append_worker(Inference, max_batch_size=32) 63 | server.append_worker(Postprocess) 64 | server.run() 65 | -------------------------------------------------------------------------------- /tests/services/bad_service.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 MOSEC Authors 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Simulate bad requests: 16 | 17 | - Preprocess: raise ValidationError 18 | - Inference: raise random ServerError 19 | - client: disconnection 20 | """ 21 | 22 | import time 23 | from random import random 24 | from typing import List 25 | 26 | from mosec import Server, ServerError, ValidationError, Worker, get_logger 27 | 28 | logger = get_logger() 29 | LUCKY_THRESHOLD = 0.5 30 | 31 | 32 | class Preprocess(Worker): 33 | """Sample Class.""" 34 | 35 | def forward(self, data: dict) -> float: 36 | logger.debug("pre received %s", data) 37 | try: 38 | count_time = float(data["time"]) 39 | except KeyError as err: 40 | raise ValidationError(f"cannot find key {err}") from err 41 | return count_time 42 | 43 | 44 | class Inference(Worker): 45 | """Sample Class.""" 46 | 47 | def forward(self, data: List[float]) -> List[float]: 48 | # special case: {"time": 0} 49 | if len(data) == 1 and data[0] == 0: 50 | return data 51 | # chaos 52 | if random() < LUCKY_THRESHOLD: 53 | logger.info("bad luck, this batch will be drop") 54 | raise ServerError("no way") 55 | logger.info("sleeping for %s seconds", max(data)) 56 | time.sleep(max(data)) 57 | return data 58 | 59 | 60 | if __name__ == "__main__": 61 | server = Server() 62 | server.append_worker(Preprocess, num=2) 63 | server.append_worker(Inference, max_batch_size=32) 64 | server.run() 65 | -------------------------------------------------------------------------------- /tests/mock_socket.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 MOSEC Authors 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Mock socket interface.""" 16 | 17 | 18 | class MockSocket: 19 | """Mock socket object used to test protocol.""" 20 | 21 | def __init__(self, family=None): 22 | self.family = family 23 | self.buffer = b"" 24 | self.timeout = None 25 | 26 | def recv(self, bufsize, flags=None) -> bytes: 27 | """Receive data from buffer with size=bufsize.""" 28 | data = self.buffer[:bufsize] 29 | self.buffer = self.buffer[bufsize:] 30 | return data 31 | 32 | def recv_into(self, buf: memoryview, nbytes=1): 33 | """Set nbytes=1 to avoid boundary condition.""" 34 | chunk = self.buffer[:nbytes] 35 | buf[:nbytes] = chunk 36 | self.buffer = self.buffer[nbytes:] 37 | return nbytes 38 | 39 | def settimeout(self, timeout): 40 | self.timeout = timeout 41 | 42 | def setblocking(self, flag): 43 | pass 44 | 45 | def listen(self, backlog): 46 | pass 47 | 48 | def sendall(self, data, flags=None): 49 | self.buffer += data 50 | return len(data) 51 | 52 | # pylint: disable=no-self-use 53 | def getpeername(self): 54 | return ("peer-address", "peer-port") 55 | 56 | def close(self): 57 | pass 58 | 59 | def connect(self, host): 60 | pass 61 | 62 | 63 | class Socket: 64 | AF_UNIX = "AF_UNIX" 65 | SOCK_STREAM = "SOCK_STREAM" 66 | 67 | @staticmethod 68 | def socket(family=None, typ=None, protocol=None): 69 | return MockSocket(family) 70 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | ARG base=nvidia/cuda:13.0.2-cudnn-runtime-ubuntu22.04 2 | 3 | FROM ${base} 4 | 5 | ENV DEBIAN_FRONTEND=noninteractive LANG=en_US.UTF-8 LC_ALL=en_US.UTF-8 6 | ENV PATH /opt/conda/bin:$PATH 7 | 8 | ARG CONDA_VERSION=py311_25.9.1-1 9 | 10 | RUN apt update && \ 11 | apt install -y --no-install-recommends \ 12 | wget \ 13 | git \ 14 | ca-certificates && \ 15 | rm -rf /var/lib/apt/lists/* 16 | 17 | RUN set -x && \ 18 | UNAME_M="$(uname -m)" && \ 19 | if [ "${UNAME_M}" = "x86_64" ]; then \ 20 | MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-${CONDA_VERSION}-Linux-x86_64.sh"; \ 21 | SHA256SUM="238abad23f8d4d8ba89dd05df0b0079e278909a36e06955f12bbef4aa94e6131"; \ 22 | elif [ "${UNAME_M}" = "aarch64" ]; then \ 23 | MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-${CONDA_VERSION}-Linux-aarch64.sh"; \ 24 | SHA256SUM="4e0723b9d76aa491cf22511dac36f4fdec373e41d2a243ff875e19b8df39bf94"; \ 25 | fi && \ 26 | wget "${MINICONDA_URL}" -O miniconda.sh -q && \ 27 | echo "${SHA256SUM} miniconda.sh" > shasum && \ 28 | if [ "${CONDA_VERSION}" != "latest" ]; then sha256sum --check --status shasum; fi && \ 29 | mkdir -p /opt && \ 30 | bash miniconda.sh -b -p /opt/conda && \ 31 | rm miniconda.sh shasum && \ 32 | ln -s /opt/conda/etc/profile.d/conda.sh /etc/profile.d/conda.sh && \ 33 | echo ". /opt/conda/etc/profile.d/conda.sh" >> ~/.bashrc && \ 34 | echo "conda activate base" >> ~/.bashrc && \ 35 | find /opt/conda/ -follow -type f -name '*.a' -delete && \ 36 | find /opt/conda/ -follow -type f -name '*.js.map' -delete && \ 37 | /opt/conda/bin/conda clean -afy 38 | 39 | ENV PYTHON_PREFIX=/opt/conda/bin 40 | ENV PATH="$PATH:/opt/conda/bin" 41 | 42 | RUN update-alternatives --install /usr/bin/python python ${PYTHON_PREFIX}/python 1 && \ 43 | update-alternatives --install /usr/bin/python3 python3 ${PYTHON_PREFIX}/python3 1 && \ 44 | update-alternatives --install /usr/bin/pip pip ${PYTHON_PREFIX}/pip 1 && \ 45 | update-alternatives --install /usr/bin/pip3 pip3 ${PYTHON_PREFIX}/pip3 1 46 | 47 | RUN pip install mosec 48 | 49 | RUN mkdir -p /workspace 50 | WORKDIR /workspace 51 | 52 | CMD [ "/bin/bash" ] 53 | -------------------------------------------------------------------------------- /examples/shm_ipc/redis.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 MOSEC Authors 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Example: Using Redis store with mosec mixin RedisShmIPCMixin. 16 | 17 | We start a subprocess for the Redis server, and pass the url 18 | to the redis client which serves as the shm mixin. 19 | We also register the redis server process as a daemon, so 20 | that when it exits the service is able to gracefully shut down 21 | and be restarted by the orchestrator. 22 | """ 23 | 24 | import subprocess 25 | 26 | import numpy as np 27 | 28 | from mosec import Server, ValidationError, Worker 29 | from mosec.mixin import RedisShmIPCMixin 30 | 31 | 32 | class DataProducer(RedisShmIPCMixin, Worker): 33 | """Sample Data Producer.""" 34 | 35 | def forward(self, data: dict) -> np.ndarray: 36 | # pylint: disable=duplicate-code 37 | try: 38 | nums = np.random.rand(int(data["size"])) 39 | except KeyError as err: 40 | raise ValidationError(err) from err 41 | return nums 42 | 43 | 44 | class DataConsumer(RedisShmIPCMixin, Worker): 45 | """Sample Data Consumer.""" 46 | 47 | def forward(self, data: np.ndarray) -> dict: 48 | return {"ipc test data": data.tolist()} 49 | 50 | 51 | if __name__ == "__main__": 52 | with subprocess.Popen(["redis-server"]) as p: # start the redis server 53 | # configure the redis url 54 | RedisShmIPCMixin.set_redis_url("redis://localhost:6379/0") 55 | 56 | server = Server() 57 | # register this process to be monitored 58 | server.register_daemon("redis-server", p) 59 | server.append_worker(DataProducer, num=2) 60 | server.append_worker(DataConsumer, num=2) 61 | server.run() 62 | -------------------------------------------------------------------------------- /tests/services/timeout_service.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 MOSEC Authors 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Test worker timeout configuration.""" 16 | 17 | import os 18 | import time 19 | from typing import Any 20 | 21 | from mosec import Runtime, Server, Worker, get_logger 22 | 23 | logger = get_logger() 24 | 25 | 26 | class SleepyInference(Worker): 27 | """Sample Class.""" 28 | 29 | def forward(self, data: Any) -> Any: 30 | sleep_duration = float(os.getenv("SLEEP_DURATION", default="0")) 31 | logger.info("sleep_duration %s", sleep_duration) 32 | time.sleep(sleep_duration) 33 | return data 34 | 35 | 36 | if __name__ == "__main__": 37 | import argparse 38 | 39 | parser = argparse.ArgumentParser() 40 | 41 | parser.add_argument("--sleep-duration", type=float, help="worker sleep duration") 42 | parser.add_argument("--worker-timeout", type=float, help="worker timeout") 43 | parser.add_argument("--port", type=int, help="port") 44 | parser.add_argument( 45 | "--runtime", 46 | action="store_true", 47 | help="use runtime register instead of append worker", 48 | ) 49 | 50 | args = parser.parse_args() 51 | 52 | sleep_duration = args.sleep_duration 53 | worker_timeout = args.worker_timeout 54 | server = Server() 55 | if args.runtime: 56 | sleepy = Runtime( 57 | SleepyInference, 58 | timeout=worker_timeout, 59 | env=[{"SLEEP_DURATION": str(sleep_duration)}], 60 | ) 61 | server.register_runtime({"/inference": [sleepy]}) 62 | else: 63 | server.append_worker( 64 | SleepyInference, 65 | timeout=worker_timeout, 66 | env=[{"SLEEP_DURATION": str(sleep_duration)}], 67 | ) 68 | server.run() 69 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | PY_SOURCE_FILES=mosec tests examples 2 | RUST_SOURCE_FILES=src/* 3 | RUST_BACKTRACE=1 4 | 5 | install_py: 6 | uv venv 7 | uv sync --all-groups --all-extras 8 | uv run -- prek install 9 | 10 | install_rs: 11 | rustup toolchain install nightly --no-self-update 12 | rustup component add rustfmt clippy --toolchain nightly 13 | 14 | install: install_py install_rs 15 | 16 | test: 17 | echo "Running tests for the main logic and mixin(!shm)" 18 | uv run -- pytest tests -vv -s -m "not shm" 19 | cargo test -vv 20 | 21 | test_unit: 22 | echo "Running tests for the main logic" 23 | uv run -- pytest -vv -s tests/test_log.py tests/test_protocol.py tests/test_coordinator.py 24 | cargo test -vv 25 | 26 | test_shm: 27 | echo "Running tests for the shm mixin" 28 | uv run -- pytest tests -vv -s -m "shm" 29 | 30 | test_all: 31 | echo "Running tests for the all features" 32 | uv run -- pytest tests -vv -s 33 | cargo test -vv 34 | 35 | test_chaos: 36 | @uv run -m tests.bad_req 37 | 38 | doc: 39 | @cd docs && make html && cd ../ 40 | @uv run -m http.server -d docs/build/html 7291 -b 127.0.0.1 41 | 42 | clean: 43 | @cargo clean 44 | @uv cache clean 45 | @-rm -rf build/ dist/ .eggs/ site/ *.egg-info .pytest_cache .mypy_cache .ruff_cache 46 | @-find . -name '*.pyc' -type f -exec rm -rf {} + 47 | @-find . -name '__pycache__' -exec rm -rf {} + 48 | 49 | package: clean 50 | uv run -- maturin build --release --out dist 51 | 52 | publish: package 53 | uv run -- twine upload dist/* 54 | 55 | format: 56 | @uv run -- ruff check --fix ${PY_SOURCE_FILES} 57 | @uv run -- ruff format ${PY_SOURCE_FILES} 58 | @cargo +nightly fmt --all 59 | 60 | lint: 61 | @uv run -- ruff check ${PY_SOURCE_FILES} 62 | @uv run -- ruff format --check ${PY_SOURCE_FILES} 63 | @-rm mosec/_version.py 64 | @uv run -- pyright --stats 65 | @uv run -- mypy --non-interactive --install-types ${PY_SOURCE_FILES} 66 | @cargo +nightly fmt -- --check 67 | 68 | semantic_lint: 69 | @cargo clippy -- -D warnings 70 | 71 | version: 72 | @cargo metadata --format-version 1 | jq -r '.packages[] | select(.name == "mosec") | .version' 73 | 74 | add_license: 75 | @addlicense -c "MOSEC Authors" **/*.py **/*.rs **/**/*.py 76 | 77 | dep_license: 78 | @cargo license --direct-deps-only --authors --avoid-build-deps --avoid-dev-deps --do-not-bundle --all-features --json > license.json 79 | 80 | .PHONY: test doc 81 | -------------------------------------------------------------------------------- /examples/multi_route/server.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 MOSEC Authors 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from typing import Any 16 | 17 | from msgspec import Struct 18 | 19 | from mosec import Runtime, Server, Worker 20 | from mosec.mixin import TypedMsgPackMixin 21 | 22 | 23 | class Request(Struct): 24 | """User request struct.""" 25 | 26 | # pylint: disable=too-few-public-methods 27 | 28 | bin: bytes 29 | name: str = "test" 30 | 31 | 32 | class TypedPreprocess(TypedMsgPackMixin, Worker): 33 | """Dummy preprocess to exit early if the validation failed.""" 34 | 35 | def forward(self, data: Request) -> Any: 36 | """Input will be parse as the `Request`.""" 37 | print(f"received from {data.name} with {data.bin!r}") 38 | return data.bin 39 | 40 | 41 | class Preprocess(Worker): 42 | """Dummy preprocess worker.""" 43 | 44 | def deserialize(self, data: bytes) -> Any: 45 | return data 46 | 47 | def forward(self, data: Any) -> Any: 48 | return data 49 | 50 | 51 | class Inference(Worker): 52 | """Dummy inference worker.""" 53 | 54 | def forward(self, data: Any) -> Any: 55 | return [{"length": len(datum)} for datum in data] 56 | 57 | 58 | class TypedPostprocess(TypedMsgPackMixin, Worker): 59 | """Dummy postprocess with msgpack.""" 60 | 61 | def forward(self, data: Any) -> Any: 62 | return data 63 | 64 | 65 | if __name__ == "__main__": 66 | server = Server() 67 | typed_pre = Runtime(TypedPreprocess) 68 | pre = Runtime(Preprocess) 69 | inf = Runtime(Inference, max_batch_size=16) 70 | typed_post = Runtime(TypedPostprocess) 71 | server.register_runtime( 72 | { 73 | "/v1/inference": [typed_pre, inf, typed_post], 74 | "/inference": [pre, inf], 75 | } 76 | ) 77 | server.run() 78 | -------------------------------------------------------------------------------- /tests/services/multi_route_service.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 MOSEC Authors 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Test multi-route service.""" 16 | 17 | from typing import Any 18 | 19 | from msgspec import Struct 20 | 21 | from mosec import Runtime, Server, Worker 22 | from mosec.mixin import TypedMsgPackMixin 23 | 24 | 25 | class Request(Struct): 26 | """User request struct.""" 27 | 28 | # pylint: disable=too-few-public-methods 29 | 30 | bin: bytes 31 | name: str = "test" 32 | 33 | 34 | class TypedPreprocess(TypedMsgPackMixin, Worker): 35 | """Dummy preprocess to exit early if the validation failed.""" 36 | 37 | def forward(self, data: Request) -> Any: 38 | """Input will be parse as the `Request`.""" 39 | print(f"received from {data.name} with {data.bin!r}") 40 | return data.bin 41 | 42 | 43 | class Preprocess(Worker): 44 | """Dummy preprocess worker.""" 45 | 46 | def deserialize(self, data: bytes) -> Any: 47 | return data 48 | 49 | def forward(self, data: Any) -> Any: 50 | return data 51 | 52 | 53 | class Inference(Worker): 54 | """Dummy inference worker.""" 55 | 56 | def forward(self, data: Any) -> Any: 57 | return [{"length": len(datum)} for datum in data] 58 | 59 | 60 | class TypedPostprocess(TypedMsgPackMixin, Worker): 61 | """Dummy postprocess with msgpack.""" 62 | 63 | def forward(self, data: Any) -> Any: 64 | return data 65 | 66 | 67 | if __name__ == "__main__": 68 | server = Server() 69 | typed_pre = Runtime(TypedPreprocess) 70 | pre = Runtime(Preprocess) 71 | inf = Runtime(Inference, max_batch_size=16) 72 | typed_post = Runtime(TypedPostprocess) 73 | server.register_runtime( 74 | { 75 | "/v1/inference": [typed_pre, inf, typed_post], 76 | "/inference": [pre, inf], 77 | } 78 | ) 79 | server.run() 80 | -------------------------------------------------------------------------------- /examples/shm_ipc/plasma_legacy.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 MOSEC Authors 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Example: Using Plasma store with mosec mixin PlasmaShmIPCMixin. 16 | 17 | We start a subprocess for the plasma server, and pass the path 18 | to the plasma client which serves as the shm mixin. 19 | We also register the plasma server process as a daemon, so 20 | that when it exits the service is able to gracefully shutdown 21 | and restarted by the orchestrator. 22 | """ 23 | 24 | import numpy as np 25 | from pyarrow import plasma # type: ignore 26 | 27 | from mosec import Server, ValidationError, Worker 28 | from mosec.mixin import PlasmaShmIPCMixin 29 | 30 | 31 | class DataProducer(PlasmaShmIPCMixin, Worker): 32 | """Sample Data Producer.""" 33 | 34 | def forward(self, data: dict) -> np.ndarray: 35 | # pylint: disable=duplicate-code 36 | try: 37 | nums = np.random.rand(int(data["size"])) 38 | except KeyError as err: 39 | raise ValidationError(err) from err 40 | return nums 41 | 42 | 43 | class DataConsumer(PlasmaShmIPCMixin, Worker): 44 | """Sample Data Consumer.""" 45 | 46 | def forward(self, data: np.ndarray) -> dict: 47 | return {"ipc test data": data.tolist()} 48 | 49 | 50 | if __name__ == "__main__": 51 | # 200 Mb store, adjust the size according to your requirement 52 | with plasma.start_plasma_store(plasma_store_memory=200 * 1000 * 1000) as ( 53 | shm_path, 54 | shm_process, 55 | ): 56 | # configure the plasma service path 57 | PlasmaShmIPCMixin.set_plasma_path(shm_path) 58 | 59 | server = Server() 60 | # register this process to be monitored 61 | server.register_daemon("plasma_server", shm_process) 62 | server.append_worker(DataProducer, num=2) 63 | server.append_worker(DataConsumer, num=2) 64 | server.run() 65 | -------------------------------------------------------------------------------- /src/apidoc.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2023 MOSEC Authors 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | use utoipa::openapi::{Components, OpenApi}; 16 | 17 | use crate::config::Route; 18 | 19 | #[derive(Default, Clone)] 20 | pub(crate) struct MosecOpenAPI { 21 | pub api: OpenApi, 22 | } 23 | 24 | impl MosecOpenAPI { 25 | /// Merge the route request_body/response/schemas into the OpenAPI. 26 | pub fn merge_route(&mut self, route: &Route) -> &mut Self { 27 | let reserved = match route.is_sse { 28 | true => "/openapi/reserved/inference", 29 | false => "/openapi/reserved/inference_sse", 30 | }; 31 | let mut path = self.api.paths.paths.get(reserved).unwrap().clone(); 32 | if let Some(mut op) = path.post.clone() { 33 | if let Some(mut user_schemas) = route.schemas.clone() { 34 | if self.api.components.is_none() { 35 | self.api.components = Some(Components::default()); 36 | } 37 | self.api 38 | .components 39 | .as_mut() 40 | .unwrap() 41 | .schemas 42 | .append(&mut user_schemas); 43 | }; 44 | if let Some(req) = route.request_body.clone() { 45 | op.request_body = Some(req); 46 | }; 47 | 48 | if let Some(mut responses) = route.responses.clone() { 49 | op.responses.responses.append(&mut responses); 50 | }; 51 | path.post = Some(op); 52 | } 53 | self.api.paths.paths.insert(route.endpoint.clone(), path); 54 | 55 | self 56 | } 57 | 58 | /// Removes the reserved paths from the OpenAPI spec. 59 | pub fn clean(&mut self) -> &mut Self { 60 | self.api.paths.paths.remove("/openapi/reserved/inference"); 61 | self.api 62 | .paths 63 | .paths 64 | .remove("/openapi/reserved/inference_sse"); 65 | self 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /mosec/mixin/msgpack_worker.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 MOSEC Authors 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """MOSEC msgpack worker mixin. 16 | 17 | Features: 18 | 19 | * deserialize request body with msgpack 20 | * serialize response body with msgpack 21 | """ 22 | 23 | # pylint: disable=import-outside-toplevel 24 | 25 | from typing import Any 26 | 27 | from mosec.errors import DecodingError, EncodingError 28 | 29 | 30 | class MsgpackMixin: 31 | """Msgpack worker mixin interface.""" 32 | 33 | # pylint: disable=no-self-use 34 | 35 | resp_mime_type = "application/msgpack" 36 | 37 | def serialize(self, data: Any) -> bytes: 38 | """Serialize with msgpack for the last stage (egress). 39 | 40 | Arguments: 41 | data: the **same type** as returned by 42 | :py:meth:`Worker.forward <mosec.worker.Worker.forward>` 43 | 44 | Returns: 45 | the bytes you want to put into the response body 46 | 47 | Raises: 48 | EncodingError: if the data cannot be serialized with msgpack 49 | 50 | """ 51 | import msgpack # type: ignore 52 | 53 | try: 54 | data_bytes = msgpack.packb(data) 55 | except Exception as err: 56 | raise EncodingError from err 57 | return data_bytes # type: ignore 58 | 59 | def deserialize(self, data: bytes) -> Any: 60 | """Deserialize method for the first stage (ingress). 61 | 62 | Arguments: 63 | data: the raw bytes extracted from the request body 64 | 65 | Returns: 66 | the **same type** as the input of 67 | :py:meth:`Worker.forward <mosec.worker.Worker.forward>` 68 | 69 | Raises: 70 | DecodingError: if the data cannot be deserialized with msgpack 71 | 72 | """ 73 | import msgpack 74 | 75 | try: 76 | data_msg = msgpack.unpackb(data, use_list=False) 77 | except Exception as err: 78 | raise DecodingError from err 79 | return data_msg 80 | -------------------------------------------------------------------------------- /.github/workflows/check.yml: -------------------------------------------------------------------------------- 1 | name: lint and test 2 | 3 | on: 4 | pull_request: 5 | paths: 6 | - '.github/workflows/check.yml' 7 | - 'mosec/**' 8 | - 'src/**' 9 | - 'tests/**' 10 | - 'examples/**' 11 | - 'pyproject.toml' 12 | - 'Cargo.lock' 13 | - 'Cargo.toml' 14 | push: 15 | branches: 16 | - main 17 | paths: 18 | - '.github/workflows/check.yml' 19 | - 'mosec/**' 20 | - 'src/**' 21 | - 'tests/**' 22 | - 'examples/**' 23 | - 'pyproject.toml' 24 | - 'Cargo.lock' 25 | - 'Cargo.toml' 26 | merge_group: 27 | 28 | concurrency: 29 | group: ${{ github.ref }}-${{ github.workflow }} 30 | cancel-in-progress: true 31 | 32 | env: 33 | SCCACHE_GHA_ENABLED: "true" 34 | RUSTC_WRAPPER: "sccache" 35 | 36 | jobs: 37 | lint: 38 | runs-on: ubuntu-latest 39 | timeout-minutes: 5 40 | steps: 41 | - uses: actions/checkout@v6 42 | - name: Install uv 43 | uses: astral-sh/setup-uv@v7 44 | with: 45 | enable-cache: true 46 | - name: Set up Rust 47 | uses: dtolnay/rust-toolchain@stable 48 | - name: Run sccache-cache 49 | uses: mozilla-actions/sccache-action@v0.0.9 50 | - name: Install dependencies 51 | run: make install 52 | - name: Lint 53 | run: make lint semantic_lint 54 | 55 | test: 56 | runs-on: ${{ matrix.os }} 57 | timeout-minutes: 20 58 | strategy: 59 | fail-fast: false 60 | matrix: 61 | python-version: ["3.10", "3.11", "3.12", "3.13", "3.14", "3.14t"] 62 | os: [ubuntu-24.04, ubuntu-24.04-arm, macos-15-intel, macos-14] 63 | 64 | steps: 65 | - uses: actions/checkout@v6 66 | - name: Install uv 67 | uses: astral-sh/setup-uv@v7 68 | with: 69 | enable-cache: true 70 | python-version: ${{ matrix.python-version }} 71 | - name: Set up Rust 72 | uses: dtolnay/rust-toolchain@stable 73 | - name: Run sccache-cache 74 | uses: mozilla-actions/sccache-action@v0.0.9 75 | - name: Install components 76 | run: make install 77 | - name: Test unit 78 | run: make test_unit 79 | - name: Test 80 | run: make test 81 | - name: Test shm in Linux 82 | # ignore the shm test for Python 3.12 since pyarrow doesn't have py3.12 wheel with version < 12 83 | if: ${{ startsWith(matrix.os, 'ubuntu') && !startsWith(matrix.python-version, '3.12') && !startsWith(matrix.python-version, '3.13') && !startsWith(matrix.python-version, '3.14') }} 84 | run: | 85 | docker run --rm -d --name redis -p 6379:6379 redis 86 | make test_shm 87 | docker stop redis 88 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # For the full list of built-in configuration values, see the documentation: 4 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 5 | 6 | import os 7 | import sys 8 | 9 | sys.path.insert(0, os.path.abspath("../..")) 10 | 11 | # -- Project information ----------------------------------------------------- 12 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information 13 | 14 | project = "mosec" 15 | copyright = "2023, mosec maintainers" 16 | author = "mosec maintainers" 17 | release = "latest" 18 | 19 | # -- General configuration --------------------------------------------------- 20 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration 21 | 22 | extensions = [ 23 | "sphinx.ext.viewcode", 24 | "sphinx.ext.autodoc", 25 | "sphinx.ext.githubpages", 26 | "sphinx.ext.napoleon", 27 | "myst_parser", 28 | "sphinx_copybutton", 29 | "sphinxcontrib.programoutput", 30 | "sphinx_autodoc_typehints", 31 | "sphinxext.opengraph", 32 | "sphinx_sitemap", 33 | ] 34 | 35 | templates_path = ["_templates"] 36 | exclude_patterns = [] 37 | source_suffix = [".rst", ".md"] 38 | master_doc = "index" 39 | language = "en" 40 | 41 | # Extension configuration 42 | myst_heading_anchors = 3 43 | autodoc_member_order = "bysource" 44 | # https://www.sphinx-doc.org/en/master/usage/extensions/napoleon.html 45 | napoleon_attr_annotations = True 46 | napoleon_include_init_with_doc = True 47 | napoleon_use_admonition_for_references = True 48 | # https://sphinxext-opengraph.readthedocs.io/en/latest/ 49 | ogp_site_url = "https://mosecorg.github.io/mosec/" 50 | ogp_image = "https://user-images.githubusercontent.com/38581401/240117836-f06199ba-c80d-413a-9cb4-5adc76316bda.png" 51 | # https://sphinx-sitemap.readthedocs.io/en/latest/getting-started.html 52 | html_baseurl = "https://mosecorg.github.io/mosec/" 53 | html_extra_path = ['robots.txt'] 54 | # -- Options for HTML output ------------------------------------------------- 55 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output 56 | 57 | html_theme = "furo" 58 | html_logo = "https://user-images.githubusercontent.com/38581401/240117836-f06199ba-c80d-413a-9cb4-5adc76316bda.png" 59 | html_static_path = ["_static"] 60 | html_favicon = "https://user-images.githubusercontent.com/38581401/134798617-0104dc12-e0d4-4ed5-a79c-9e2435e99a14.png" 61 | 62 | # Theme 63 | html_theme_options = { 64 | "sidebar_hide_name": True, 65 | "navigation_with_keys": True, 66 | "source_repository": "https://github.com/mosecorg/mosec", 67 | "source_branch": "main", 68 | "source_directory": "docs/source", 69 | } 70 | -------------------------------------------------------------------------------- /mosec/mixin/typed_worker.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 MOSEC Authors 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """MOSEC type validation mixin.""" 16 | 17 | # pylint: disable=import-outside-toplevel 18 | 19 | from typing import Any, Dict, Optional, Tuple 20 | 21 | from mosec import get_logger 22 | from mosec.errors import ValidationError 23 | from mosec.utils import ParseTarget, parse_func_type 24 | from mosec.worker import Worker 25 | 26 | logger = get_logger() 27 | 28 | 29 | class TypedMsgPackMixin(Worker): 30 | """Enable request type validation with `msgspec` and serde with `msgpack`.""" 31 | 32 | # pylint: disable=no-self-use 33 | 34 | resp_mime_type = "application/msgpack" 35 | _input_typ: Optional[type] = None 36 | 37 | def deserialize(self, data: Any) -> Any: 38 | """Deserialize and validate request with msgspec.""" 39 | import msgspec 40 | 41 | if self._input_typ is None: 42 | self._input_typ = parse_func_type(self.forward, ParseTarget.INPUT) 43 | 44 | try: 45 | return msgspec.msgpack.decode(data, type=self._input_typ) 46 | except msgspec.ValidationError as err: 47 | raise ValidationError(err) from err 48 | 49 | def serialize(self, data: Any) -> bytes: 50 | """Serialize with `msgpack`.""" 51 | import msgspec 52 | 53 | return msgspec.msgpack.encode(data) 54 | 55 | @classmethod 56 | def get_forward_json_schema( 57 | cls, target: ParseTarget, ref_template: str 58 | ) -> Tuple[Dict[str, Any], Dict[str, Any]]: 59 | """Get the JSON schema of the forward function.""" 60 | import msgspec 61 | 62 | schema: Dict[str, Any] 63 | comp_schema: Dict[str, Any] 64 | schema, comp_schema = {}, {} 65 | typ = parse_func_type(cls.forward, target) 66 | try: 67 | (schema,), comp_schema = msgspec.json.schema_components( 68 | [typ], ref_template=ref_template 69 | ) 70 | except TypeError as err: 71 | logger.warning( 72 | "Failed to generate JSON schema for %s: %s", cls.__name__, err 73 | ) 74 | return schema, comp_schema 75 | -------------------------------------------------------------------------------- /examples/segment/server.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 MOSEC Authors 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # refer to https://github.com/facebookresearch/sam2/blob/main/notebooks/image_predictor_example.ipynb 16 | 17 | import numbin 18 | import torch # type: ignore 19 | from sam2.sam2_image_predictor import SAM2ImagePredictor # type: ignore 20 | 21 | from mosec import Server, Worker, get_logger 22 | from mosec.mixin import MsgpackMixin 23 | 24 | logger = get_logger() 25 | MIN_TF32_MAJOR = 8 26 | 27 | 28 | class SegmentAnything(MsgpackMixin, Worker): 29 | def __init__(self): 30 | # select the device for computation 31 | if torch.cuda.is_available(): 32 | device = torch.device("cuda") 33 | elif torch.backends.mps.is_available(): 34 | device = torch.device("mps") 35 | else: 36 | device = torch.device("cpu") 37 | logger.info("using device: %s", device) 38 | 39 | self.predictor = SAM2ImagePredictor.from_pretrained( 40 | "facebook/sam2-hiera-large", device=device 41 | ) 42 | 43 | if device.type == "cuda": 44 | # use bfloat16 45 | torch.autocast("cuda", dtype=torch.bfloat16).__enter__() 46 | # turn on tf32 for Ampere GPUs (https://pytorch.org/docs/stable/notes/cuda.html#tensorfloat-32-tf32-on-ampere-devices) 47 | if torch.cuda.get_device_properties(0).major >= MIN_TF32_MAJOR: 48 | torch.backends.cuda.matmul.allow_tf32 = True 49 | torch.backends.cudnn.allow_tf32 = True 50 | 51 | def forward(self, data: dict) -> bytes: 52 | with torch.inference_mode(): 53 | self.predictor.set_image(numbin.loads(data["image"])) 54 | masks, _, _ = self.predictor.predict( 55 | point_coords=data["point_coords"], 56 | point_labels=data["labels"], 57 | mask_input=numbin.loads(data["mask"])[None, :, :], 58 | multimask_output=False, 59 | ) 60 | return numbin.dumps(masks[0]) 61 | 62 | 63 | if __name__ == "__main__": 64 | server = Server() 65 | server.append_worker(SegmentAnything, num=1, max_batch_size=1) 66 | server.run() 67 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Self 2 | mosec/bin 3 | 4 | # Byte-compiled / optimized / DLL files 5 | __pycache__/ 6 | *.py[cod] 7 | *$py.class 8 | 9 | # C extensions 10 | *.so 11 | 12 | # Distribution / packaging 13 | .Python 14 | build/ 15 | develop-eggs/ 16 | dist/ 17 | wheelhouse/ 18 | downloads/ 19 | eggs/ 20 | .eggs/ 21 | lib/ 22 | lib64/ 23 | parts/ 24 | sdist/ 25 | var/ 26 | wheels/ 27 | pip-wheel-metadata/ 28 | share/python-wheels/ 29 | *.egg-info/ 30 | .installed.cfg 31 | *.egg 32 | MANIFEST 33 | 34 | # PyInstaller 35 | # Usually these files are written by a python script from a template 36 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 37 | *.manifest 38 | *.spec 39 | 40 | # Installer logs 41 | pip-log.txt 42 | pip-delete-this-directory.txt 43 | 44 | # Unit test / coverage reports 45 | htmlcov/ 46 | .tox/ 47 | .nox/ 48 | .coverage 49 | .coverage.* 50 | .cache 51 | nosetests.xml 52 | coverage.xml 53 | *.cover 54 | *.py,cover 55 | .hypothesis/ 56 | .pytest_cache/ 57 | 58 | # Translations 59 | *.mo 60 | *.pot 61 | 62 | # Django stuff: 63 | *.log 64 | local_settings.py 65 | db.sqlite3 66 | db.sqlite3-journal 67 | 68 | # Flask stuff: 69 | instance/ 70 | .webassets-cache 71 | 72 | # Scrapy stuff: 73 | .scrapy 74 | 75 | # Sphinx documentation 76 | docs/_build/ 77 | 78 | # PyBuilder 79 | target/ 80 | 81 | # Jupyter Notebook 82 | .ipynb_checkpoints 83 | 84 | # IPython 85 | profile_default/ 86 | ipython_config.py 87 | 88 | # pyenv 89 | .python-version 90 | 91 | # pipenv 92 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 93 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 94 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 95 | # install all needed dependencies. 96 | #Pipfile.lock 97 | 98 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 99 | __pypackages__/ 100 | 101 | # Celery stuff 102 | celerybeat-schedule 103 | celerybeat.pid 104 | 105 | # SageMath parsed files 106 | *.sage.py 107 | 108 | # Environments 109 | .env 110 | .venv 111 | env/ 112 | venv/ 113 | ENV/ 114 | env.bak/ 115 | venv.bak/ 116 | 117 | # Spyder project settings 118 | .spyderproject 119 | .spyproject 120 | 121 | # Rope project settings 122 | .ropeproject 123 | 124 | # mkdocs documentation 125 | /site 126 | 127 | # mypy 128 | .mypy_cache/ 129 | .dmypy.json 130 | dmypy.json 131 | 132 | # Pyre type checker 133 | .pyre/ 134 | 135 | # Added by cargo 136 | debug/ 137 | target/ 138 | **/*.rs.bk 139 | *.pdb 140 | 141 | # IDE 142 | .vscode/* 143 | .idea/* 144 | 145 | # version file generated by setuptools_scm 146 | mosec/_version.py 147 | 148 | # ruff 149 | .ruff_cache/ 150 | -------------------------------------------------------------------------------- /docs/source/reference/concept.md: -------------------------------------------------------------------------------- 1 | # Concept and FAQs 2 | 3 | There are a few terms used in `mosec`. 4 | 5 | - `worker`: a Python process that executes the `forward` method (inherit from [`mosec.Worker`](mosec.worker.Worker)) 6 | - `stage`: one processing unit in the pipeline, each stage contains several `worker` replicas 7 | - also known as [`Runtime`](mosec.runtime.Runtime) in the code 8 | - each stage retrieves the data from the previous stage and passes the result to the next stage 9 | - retrieved data will be deserialized by the [`Worker.deserialize_ipc`](mosec.worker.Worker.deserialize_ipc) method 10 | - data to be passed will be serialized by the [`Worker.serialize_ipc`](mosec.worker.Worker.serialize_ipc) method 11 | - `ingress/egress`: the first/last stage in the pipeline 12 | - ingress gets data from the client, while egress sends data to the client 13 | - data will be deserialized by the ingress [`Worker.serialize`](mosec.worker.Worker.serialize) method and serialized by the egress [`Worker.deserialize`](mosec.worker.Worker.deserialize) method 14 | - `pipeline`: a chain of processing stages, will be registered to an endpoint (default: `/inference`) 15 | - a server can have multiple pipelines, check the [multi-route](../examples/multi_route.md) example 16 | - `dynamic batching`: batch requests until either the max batch size or the max wait time is reached 17 | - `controller`: a Rust tokio thread that works on: 18 | - read from the previous queue to get new tasks 19 | - send tasks to the ready-to-process worker via the Unix domain socket 20 | - receive results from the worker 21 | - send the tasks to the next queue 22 | 23 | ## FAQs 24 | 25 | ### How to raise an exception? 26 | 27 | Use the `raise` keyword with [mosec.errors](mosec.errors). Raising other exceptions will be treated as an "500 Internal Server Error". 28 | 29 | If a request raises any exception, the error will be returned to the client directly without going through the rest stages. 30 | 31 | ### How to change the serialization/deserialization methods? 32 | 33 | Just let the ingress/egress worker inherit a suitable mixin like [`MsgpackMixin`](mosec.mixin.MsgpackMixin). 34 | 35 | ```{note} 36 | The inheritance order matters in Python. Check [multiple inheritance](https://docs.python.org/3/tutorial/classes.html#multiple-inheritance) for more information. 37 | ``` 38 | 39 | You can also implement the `serialize/deserialize` method to your `ingress/egress` worker directly. 40 | 41 | ### How to share configurations among different workers? 42 | 43 | If the configuration structure is initialized globally, all the workers should be able to use it directly. 44 | 45 | If you want to assign different workers with different configurations, the best way is to use the `env` (ref [`append_worker`](mosec.server.Server.append_worker)). 46 | -------------------------------------------------------------------------------- /mosec/mixin/plasma_worker.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 MOSEC Authors 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """MOSEC plasma worker mixin. 16 | 17 | Provide another data transfer way between workers. 18 | 19 | The data will be stored in plasma shared memory, while the object ID will be 20 | sent via the original way. 21 | 22 | use case: large image tensors 23 | benefits: more stable P99 latency 24 | 25 | ```{warning} 26 | The plasma is deprecated in `pyarrow`. Please use Redis instead. 27 | ``` 28 | """ 29 | 30 | # pylint: disable=import-outside-toplevel 31 | 32 | from os import environ 33 | from typing import Any 34 | 35 | from mosec.worker import Worker 36 | 37 | _PLASMA_PATH_ENV = "MOSEC_INTERNAL_PLASMA_PATH" 38 | 39 | 40 | class PlasmaShmIPCMixin(Worker): 41 | """Plasma shared memory worker mixin interface.""" 42 | 43 | _plasma_client = None 44 | 45 | @classmethod 46 | def set_plasma_path(cls, path: str): 47 | """Set the plasma service path.""" 48 | environ[_PLASMA_PATH_ENV] = path 49 | 50 | def _get_client(self): 51 | """Get the plasma client. This will create a new one if not exist.""" 52 | from pyarrow import plasma # type: ignore 53 | 54 | if not self._plasma_client: 55 | path = environ.get(_PLASMA_PATH_ENV) 56 | if not path: 57 | raise RuntimeError( 58 | "please set the plasma path with " 59 | "`PlasmaShmIPCMixin.set_plasma_path()`" 60 | ) 61 | self._plasma_client = plasma.connect(path) 62 | return self._plasma_client 63 | 64 | def serialize_ipc(self, data: Any) -> bytes: 65 | """Save the data to the plasma server and return the id.""" 66 | client = self._get_client() 67 | object_id = client.put(super().serialize_ipc(data)) 68 | return object_id.binary() 69 | 70 | def deserialize_ipc(self, data: bytes) -> Any: 71 | """Get the data from the plasma server and delete it.""" 72 | from pyarrow import plasma # type: ignore 73 | 74 | client = self._get_client() 75 | object_id = plasma.ObjectID(bytes(data)) 76 | obj = super().deserialize_ipc(client.get(object_id)) 77 | client.delete((object_id,)) 78 | return obj 79 | -------------------------------------------------------------------------------- /examples/monitor/python_side_metrics.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 MOSEC Authors 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Example: Adding metrics service.""" 15 | 16 | import os 17 | import pathlib 18 | import tempfile 19 | from typing import List 20 | 21 | from prometheus_client import ( # type: ignore 22 | CollectorRegistry, 23 | Counter, 24 | multiprocess, 25 | start_http_server, 26 | ) 27 | 28 | from mosec import Server, ValidationError, Worker, get_logger 29 | 30 | logger = get_logger() 31 | 32 | 33 | # check the PROMETHEUS_MULTIPROC_DIR environment variable before import Prometheus 34 | if not os.getenv("PROMETHEUS_MULTIPROC_DIR"): 35 | metric_dir_path = os.path.join(tempfile.gettempdir(), "prometheus_multiproc_dir") 36 | pathlib.Path(metric_dir_path).mkdir(parents=True, exist_ok=True) 37 | os.environ["PROMETHEUS_MULTIPROC_DIR"] = metric_dir_path 38 | 39 | 40 | metric_registry = CollectorRegistry() 41 | multiprocess.MultiProcessCollector(metric_registry) 42 | counter = Counter( 43 | "inference_result", 44 | "statistic of result", 45 | ("status", "worker_id"), 46 | registry=metric_registry, 47 | ) 48 | 49 | 50 | class Inference(Worker): 51 | """Sample Inference Worker.""" 52 | 53 | def __init__(self): 54 | super().__init__() 55 | self.worker_id = str(self.worker_id) 56 | 57 | def deserialize(self, data: bytes) -> int: 58 | json_data = super().deserialize(data) 59 | try: 60 | res = int(json_data.get("num")) 61 | except Exception as err: 62 | raise ValidationError(err) from err 63 | return res 64 | 65 | def forward(self, data: List[int]) -> List[bool]: 66 | avg = sum(data) / len(data) 67 | ans = [x >= avg for x in data] 68 | counter.labels(status="true", worker_id=self.worker_id).inc(sum(ans)) 69 | counter.labels(status="false", worker_id=self.worker_id).inc( 70 | len(ans) - sum(ans) 71 | ) 72 | return ans 73 | 74 | 75 | if __name__ == "__main__": 76 | # Run the metrics server in another thread. 77 | start_http_server(5000, registry=metric_registry) 78 | 79 | # Run the inference server 80 | server = Server() 81 | server.append_worker(Inference, num=2, max_batch_size=8) 82 | server.run() 83 | -------------------------------------------------------------------------------- /tests/bad_req.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 MOSEC Authors 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """A chaos test that contains: 16 | 17 | - normal request 18 | - early disconnection 19 | - client bad request data 20 | - service internal error 21 | """ 22 | 23 | import concurrent.futures 24 | import os 25 | import shlex 26 | import subprocess 27 | from http import HTTPStatus 28 | from random import random 29 | 30 | import httpx 31 | 32 | from tests.utils import wait_for_port_free, wait_for_port_open 33 | 34 | PORT = 5934 35 | URL = f"http://127.0.0.1:{PORT}/inference" 36 | REQ_NUM = int(os.getenv("CHAOS_REQUEST", "10000")) 37 | # set the thread number in case the CI server cannot get the real CPU number. 38 | THREAD = 8 39 | NORMAL_RATE = 0.3 40 | 41 | 42 | def random_req(params, timeout): 43 | resp = httpx.post(URL, json=params, timeout=timeout) 44 | return resp 45 | 46 | 47 | def main(): 48 | with concurrent.futures.ThreadPoolExecutor(max_workers=THREAD) as executor: 49 | futures = [ 50 | executor.submit( 51 | random_req, 52 | {"time": 0.1} if random() > NORMAL_RATE else {"hey": 0}, 53 | random() / 3.0, 54 | ) 55 | for _ in range(REQ_NUM) 56 | ] 57 | count = 0 58 | for future in concurrent.futures.as_completed(futures): 59 | try: 60 | data = future.result() 61 | except Exception as err: # pylint: disable=broad-exception-caught 62 | print("[x]", err) 63 | else: 64 | print("[~]", data) 65 | count += 1 66 | 67 | print(f">> {count}/{REQ_NUM} requests received before disconnection") 68 | 69 | # re-try to check if the service is still alive 70 | resp = httpx.post(URL, json={"time": 0}) 71 | if resp.status_code != HTTPStatus.OK: 72 | print(resp) 73 | raise RuntimeError() 74 | 75 | 76 | if __name__ == "__main__": 77 | service = subprocess.Popen( 78 | shlex.split( 79 | f"python tests/services/bad_service.py --debug --timeout 500 --port {PORT}" 80 | ) 81 | ) 82 | assert wait_for_port_open(port=PORT) 83 | try: 84 | main() 85 | finally: 86 | service.terminate() 87 | assert wait_for_port_free(port=PORT) 88 | -------------------------------------------------------------------------------- /tests/services/openapi_service.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 MOSEC Authors 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Test OpenAPI generated spec.""" 16 | 17 | import sys 18 | from typing import Any, Dict, List, Type 19 | 20 | from msgspec import Struct 21 | 22 | from mosec import Server, Worker 23 | from mosec.mixin import TypedMsgPackMixin 24 | 25 | 26 | class Request(Struct): 27 | """User request struct.""" 28 | 29 | # pylint: disable=too-few-public-methods 30 | 31 | bin: bytes 32 | name: str = "test" 33 | 34 | 35 | class TypedPreprocess(TypedMsgPackMixin, Worker): 36 | """Dummy preprocess to exit early if the validation failed.""" 37 | 38 | def forward(self, data: Request) -> Any: 39 | """Input will be parse as the `Request`.""" 40 | print(f"received {data}") 41 | return data.bin 42 | 43 | 44 | class UntypedPreprocess(TypedMsgPackMixin, Worker): 45 | """Dummy preprocess to exit early if the validation failed.""" 46 | 47 | def forward(self, data): 48 | """Input will be parse as the `Request`.""" 49 | print(f"received {data}") 50 | return data.bin 51 | 52 | 53 | class TypedInference(TypedMsgPackMixin, Worker): 54 | """Dummy batch inference.""" 55 | 56 | def forward(self, data: List[bytes]) -> List[int]: 57 | return [len(buf) for buf in data] 58 | 59 | 60 | class UntypedInference(TypedMsgPackMixin, Worker): 61 | """Dummy batch inference.""" 62 | 63 | def forward(self, data): 64 | return [len(buf) for buf in data] 65 | 66 | 67 | if __name__ == "__main__": 68 | if len(sys.argv) <= 1: 69 | print("Please specify the worker mapping. e.g. TypedPreprocess/TypedInference") 70 | sys.exit(1) 71 | 72 | worker_mapping: Dict[str, Type[Worker]] = { 73 | "TypedPreprocess": TypedPreprocess, 74 | "UntypedPreprocess": UntypedPreprocess, 75 | "TypedInference": TypedInference, 76 | "UntypedInference": UntypedInference, 77 | } 78 | 79 | server = Server() 80 | preprocess_worker, inference_worker = sys.argv[1].split("/") 81 | server.append_worker(worker_mapping[preprocess_worker], route="/v1/inference") 82 | server.append_worker( 83 | worker_mapping[inference_worker], max_batch_size=16, route="/v1/inference" 84 | ) 85 | server.run() 86 | -------------------------------------------------------------------------------- /tests/test_protocol.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 MOSEC Authors 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Test protocol related logic.""" 16 | 17 | import json 18 | import pickle 19 | import random 20 | import struct 21 | from typing import List 22 | 23 | import pytest 24 | 25 | from mosec.coordinator import State 26 | from mosec.protocol import Protocol 27 | from tests.mock_socket import Socket 28 | from tests.utils import imitate_controller_send 29 | 30 | 31 | def echo(protocol: Protocol, data: List[bytes]): 32 | sent_flag = random.choice([1, 2, 4, 8]) 33 | 34 | sent_ids, sent_payloads = imitate_controller_send(protocol.socket, data) 35 | 36 | _, got_ids, got_states, got_payloads = protocol.receive() # client recv 37 | assert len(protocol.socket.buffer) == 0 # type: ignore 38 | assert got_ids == sent_ids 39 | assert all( 40 | bytes(got_payloads[i]) == sent_payloads[i] for i in range(len(sent_payloads)) 41 | ) 42 | got_payload_bytes = [bytes(x) for x in got_payloads] 43 | # client echo 44 | protocol.send(sent_flag, got_ids, got_states, got_payload_bytes) 45 | # server recv (symmetric protocol) 46 | got_flag, got_ids, got_states, got_payloads = protocol.receive() 47 | 48 | assert len(protocol.socket.buffer) == 0 # type: ignore 49 | assert struct.unpack("!H", got_flag)[0] == sent_flag 50 | assert got_states == [State.INGRESS | State.EGRESS] * len(sent_ids) 51 | assert got_ids == sent_ids 52 | assert all( 53 | bytes(got_payloads[i]) == sent_payloads[i] for i in range(len(sent_payloads)) 54 | ) 55 | 56 | 57 | @pytest.fixture 58 | def mock_protocol(mocker): 59 | mocker.patch("mosec.protocol.socket", Socket) 60 | protocol = Protocol(name="test", addr="mock.uds") 61 | return protocol 62 | 63 | 64 | @pytest.mark.parametrize( 65 | "test_data", 66 | [ 67 | [], 68 | ["test"], 69 | [1, 2, 3], 70 | [ 71 | json.dumps({"rid": "147982364", "data": "im_b64_str"}), 72 | json.dumps({"rid": "147982365", "data": "another_im_b64_str"}), 73 | ] 74 | * random.randint(1, 20), 75 | ], 76 | ) 77 | def test_echo(mock_protocol, test_data): 78 | mock_protocol.open() 79 | echo(mock_protocol, [pickle.dumps(x) for x in test_data]) 80 | mock_protocol.close() 81 | -------------------------------------------------------------------------------- /mosec/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2025 MOSEC Authors 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Provide useful utils to inspect function type.""" 16 | 17 | import inspect 18 | import os 19 | import sysconfig 20 | from enum import Enum 21 | from pathlib import Path 22 | from typing import Any, List, Optional 23 | 24 | 25 | # adopted from https://github.com/PyO3/maturin/blob/main/maturin/__main__.py 26 | # License: Apache-2.0 or MIT 27 | def get_mosec_path() -> Optional[Path]: 28 | """Get `mosec` binary path.""" 29 | SCRIPT_NAME = "mosec" 30 | 31 | def script_dir(scheme: str) -> str: 32 | return sysconfig.get_path("scripts", scheme) 33 | 34 | def script_exists(dir: str) -> bool: 35 | for _, _, files in os.walk(dir): 36 | for f in files: 37 | name, *_ = os.path.splitext(f) 38 | if name == SCRIPT_NAME: 39 | return True 40 | 41 | return False 42 | 43 | paths = list( 44 | filter( 45 | script_exists, 46 | filter(os.path.exists, map(script_dir, sysconfig.get_scheme_names())), 47 | ) 48 | ) 49 | 50 | if paths: 51 | return Path(paths[0]) / SCRIPT_NAME 52 | 53 | return None 54 | 55 | 56 | class ParseTarget(Enum): 57 | """Enum to specify the target of parsing func type.""" 58 | 59 | INPUT = "INPUT" 60 | RETURN = "RETURN" 61 | 62 | 63 | def parse_func_type(func, target: ParseTarget) -> type: 64 | """Parse the input type of the target function. 65 | 66 | - single request: return the type 67 | - batch request: return the list item type 68 | """ 69 | annotations = inspect.get_annotations(func, eval_str=True) 70 | name = func.__name__ 71 | typ = Any 72 | if target == ParseTarget.INPUT: 73 | for key in annotations: 74 | if key != "return": 75 | typ = annotations[key] 76 | break 77 | else: 78 | typ = annotations.get("return", Any) 79 | 80 | origin = getattr(typ, "__origin__", None) 81 | if origin is None: 82 | return typ # type: ignore 83 | # GenericAlias, `func` could be batch inference 84 | if origin is list or origin is List: 85 | if not hasattr(typ, "__args__") or len(typ.__args__) != 1: # type: ignore 86 | raise TypeError( 87 | f"`{name}` with dynamic batch should use " 88 | "`List[Struct]` as the input annotation" 89 | ) 90 | return typ.__args__[0] # type: ignore 91 | raise TypeError(f"unsupported type {typ}") 92 | -------------------------------------------------------------------------------- /.github/workflows/codeql.yml: -------------------------------------------------------------------------------- 1 | # For most projects, this workflow file will not need changing; you simply need 2 | # to commit it to your repository. 3 | # 4 | # You may wish to alter this file to override the set of languages analyzed, 5 | # or to provide custom queries or build logic. 6 | # 7 | # ******** NOTE ******** 8 | # We have attempted to detect the languages in your repository. Please check 9 | # the `language` matrix defined below to confirm you have the correct set of 10 | # supported CodeQL languages. 11 | # 12 | name: "CodeQL" 13 | 14 | on: 15 | push: 16 | branches: [ "main" ] 17 | pull_request: 18 | # The branches below must be a subset of the branches above 19 | branches: [ "main" ] 20 | schedule: 21 | - cron: '29 17 * * 1' 22 | 23 | jobs: 24 | analyze: 25 | name: Analyze 26 | runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-latest' }} 27 | timeout-minutes: ${{ (matrix.language == 'swift' && 120) || 360 }} 28 | permissions: 29 | actions: read 30 | contents: read 31 | security-events: write 32 | 33 | strategy: 34 | fail-fast: false 35 | matrix: 36 | language: [ 'python' ] 37 | # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby', 'swift' ] 38 | # Use only 'java' to analyze code written in Java, Kotlin or both 39 | # Use only 'javascript' to analyze code written in JavaScript, TypeScript or both 40 | # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support 41 | 42 | steps: 43 | - name: Checkout repository 44 | uses: actions/checkout@v6 45 | 46 | # Initializes the CodeQL tools for scanning. 47 | - name: Initialize CodeQL 48 | uses: github/codeql-action/init@v4 49 | with: 50 | languages: ${{ matrix.language }} 51 | # If you wish to specify custom queries, you can do so here or in a config file. 52 | # By default, queries listed here will override any specified in a config file. 53 | # Prefix the list here with "+" to use these queries and those in the config file. 54 | 55 | # For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs 56 | # queries: security-extended,security-and-quality 57 | 58 | 59 | # Autobuild attempts to build any compiled languages (C/C++, C#, Go, Java, or Swift). 60 | # If this step fails, then you should remove it and run the build manually (see below) 61 | - name: Autobuild 62 | uses: github/codeql-action/autobuild@v4 63 | 64 | # ℹ️ Command-line programs to run using the OS shell. 65 | # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun 66 | 67 | # If the Autobuild fails above, remove it and uncomment the following three lines. 68 | # modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance. 69 | 70 | # - run: | 71 | # echo "Run, Build Application using script" 72 | # ./location_of_script_within_repo/buildscript.sh 73 | 74 | - name: Perform CodeQL Analysis 75 | uses: github/codeql-action/analyze@v4 76 | with: 77 | category: "/language:${{matrix.language}}" 78 | -------------------------------------------------------------------------------- /mosec/mixin/redis_worker.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 MOSEC Authors 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """MOSEC redis worker mixin. 16 | 17 | Provide another data transfer way between workers. 18 | 19 | The data will be stored in redis shared memory, while the object ID will be 20 | sent via the original way. 21 | 22 | use case: large image tensors, cluster-shared data 23 | benefits: more stable P99 latency 24 | 25 | """ 26 | 27 | # pylint: disable=import-outside-toplevel 28 | 29 | from os import environ 30 | from typing import Any 31 | 32 | from mosec.worker import Worker 33 | 34 | _REDIS_URL_ENV = "MOSEC_INTERNAL_REDIS_URL" 35 | _DEFAULT_KEY = "REDIS_SHM_IPC_KEY" 36 | 37 | 38 | class RedisShmIPCMixin(Worker): 39 | """Redis shared memory worker mixin interface.""" 40 | 41 | _redis_client = None 42 | _redis_key = _DEFAULT_KEY 43 | _next_id = None 44 | 45 | @classmethod 46 | def set_redis_url(cls, url: str): 47 | """Set the redis service url.""" 48 | environ[_REDIS_URL_ENV] = url 49 | 50 | def _get_client(self) -> Any: 51 | """Get the redis client. This will create a new one if not exist.""" 52 | import redis 53 | 54 | if self._redis_client is None: 55 | url = environ.get(_REDIS_URL_ENV) 56 | if not url: 57 | raise RuntimeError( 58 | "please set the redis url with `RedisShmIPCMixin.set_redis_url()`" 59 | ) 60 | self._redis_client = redis.from_url(url) 61 | return self._redis_client 62 | 63 | def _prepare_next_id(self) -> None: 64 | """Make sure the next id exists. This will create a new one if not exist.""" 65 | if self._next_id is None: 66 | client = self._get_client() 67 | key = self._redis_key 68 | self._next_id = bytes(str(client.incr(key)), encoding="utf-8") 69 | 70 | def serialize_ipc(self, data: Any) -> bytes: 71 | """Save the data to the redis server and return the id.""" 72 | self._prepare_next_id() 73 | client = self._get_client() 74 | with client.pipeline() as pipe: 75 | current_id = self._next_id 76 | pipe.set(current_id, super().serialize_ipc(data)) # type: ignore 77 | pipe.incr(self._redis_key) 78 | _id = pipe.execute()[-1] 79 | self._next_id = bytes(str(_id), encoding="utf-8") 80 | return current_id # type: ignore 81 | 82 | def deserialize_ipc(self, data: bytes) -> Any: 83 | """Get the data from the redis server and delete it.""" 84 | client = self._get_client() 85 | object_id = bytes(data) 86 | with client.pipeline() as pipe: 87 | pipe.get(object_id) 88 | pipe.delete(object_id) 89 | obj = pipe.execute()[0] 90 | return super().deserialize_ipc(obj) 91 | -------------------------------------------------------------------------------- /examples/jax_single_layer/server.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 MOSEC Authors 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Example: Simple jax jitted inference with a single layer classifier.""" 15 | 16 | import os 17 | import time 18 | from typing import List 19 | 20 | import chex # type: ignore 21 | import jax # type: ignore 22 | import jax.numpy as jnp # type: ignore 23 | 24 | from mosec import Server, ValidationError, Worker, get_logger 25 | 26 | logger = get_logger() 27 | 28 | INPUT_SIZE = 3 29 | LATENT_SIZE = 16 30 | OUTPUT_SIZE = 2 31 | 32 | MAX_BATCH_SIZE = 8 33 | USE_JIT = os.getenv("USE_JIT", default="false") 34 | 35 | 36 | class JittedInference(Worker): 37 | """Sample Class.""" 38 | 39 | def __init__(self): 40 | super().__init__() 41 | key = jax.random.PRNGKey(42) 42 | k_1, k_2 = jax.random.split(key) 43 | self._layer1_w = jax.random.normal(k_1, (INPUT_SIZE, LATENT_SIZE)) 44 | self._layer1_b = jnp.zeros(LATENT_SIZE) 45 | self._layer2_w = jax.random.normal(k_2, (LATENT_SIZE, OUTPUT_SIZE)) 46 | self._layer2_b = jnp.zeros(OUTPUT_SIZE) 47 | 48 | # Enumerate all batch sizes for caching. 49 | self.multi_examples = [] 50 | dummy_array = list(range(INPUT_SIZE)) 51 | for i in range(MAX_BATCH_SIZE): 52 | self.multi_examples.append([{"array": dummy_array}] * (i + 1)) 53 | 54 | if USE_JIT == "true": 55 | self.batch_forward = jax.jit(self._batch_forward) 56 | else: 57 | self.batch_forward = self._batch_forward 58 | 59 | def _forward(self, x_single: jnp.ndarray) -> jnp.ndarray: # type: ignore 60 | chex.assert_rank([x_single], [1]) 61 | h_1 = jnp.dot(self._layer1_w.T, x_single) + self._layer1_b 62 | a_1 = jax.nn.relu(h_1) 63 | h_2 = jnp.dot(self._layer2_w.T, a_1) + self._layer2_b 64 | o_2 = jax.nn.softmax(h_2) 65 | return jnp.argmax(o_2, axis=-1) 66 | 67 | def _batch_forward(self, x_batch: jnp.ndarray) -> jnp.ndarray: # type: ignore 68 | chex.assert_rank([x_batch], [2]) 69 | return jax.vmap(self._forward)(x_batch) 70 | 71 | def forward(self, data: List[dict]) -> List[dict]: 72 | time_start = time.perf_counter() 73 | try: 74 | input_array_raw = [ele["array"] for ele in data] 75 | except KeyError as err: 76 | raise ValidationError(f"cannot find key {err}") from err 77 | input_array = jnp.array(input_array_raw) 78 | output_array = self.batch_forward(input_array) 79 | output_category = output_array.tolist() 80 | elapse = time.perf_counter() - time_start 81 | return [{"category": c, "elapse": elapse} for c in output_category] 82 | 83 | 84 | if __name__ == "__main__": 85 | server = Server() 86 | server.append_worker(JittedInference, max_batch_size=MAX_BATCH_SIZE) 87 | server.run() 88 | -------------------------------------------------------------------------------- /src/config.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2023 MOSEC Authors 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | use std::collections::BTreeMap; 16 | use std::fmt; 17 | 18 | use serde::Deserialize; 19 | use utoipa::openapi::request_body::RequestBody; 20 | use utoipa::openapi::{RefOr, Response, Schema}; 21 | 22 | #[derive(Deserialize, Debug)] 23 | pub(crate) struct Runtime { 24 | pub max_batch_size: usize, 25 | pub max_wait_time: u64, 26 | pub worker: String, 27 | } 28 | 29 | #[derive(Deserialize)] 30 | pub(crate) struct Route { 31 | pub endpoint: String, 32 | pub workers: Vec<String>, 33 | pub mime: String, 34 | pub is_sse: bool, 35 | pub request_body: Option<RequestBody>, 36 | pub responses: Option<BTreeMap<String, RefOr<Response>>>, 37 | pub schemas: Option<BTreeMap<String, RefOr<Schema>>>, 38 | } 39 | 40 | impl fmt::Debug for Route { 41 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 42 | write!( 43 | f, 44 | "({}: [{}], resp({}))", 45 | self.endpoint, 46 | self.workers.join(", "), 47 | self.mime 48 | ) 49 | } 50 | } 51 | 52 | #[derive(Deserialize, Debug)] 53 | pub(crate) struct Config { 54 | // socket dir 55 | pub path: String, 56 | // channel capacity 57 | pub capacity: usize, 58 | // service timeout (ms) 59 | pub timeout: u64, 60 | // service address 61 | pub address: String, 62 | // service port 63 | pub port: u16, 64 | // metrics namespace 65 | pub namespace: String, 66 | // log level: (debug, info, warning, error) 67 | pub log_level: String, 68 | // `zstd` & `gzip` compression 69 | pub compression: bool, 70 | pub runtimes: Vec<Runtime>, 71 | pub routes: Vec<Route>, 72 | } 73 | 74 | impl Default for Config { 75 | fn default() -> Self { 76 | Self { 77 | path: String::from("/tmp/mosec"), 78 | capacity: 1024, 79 | timeout: 3000, 80 | address: String::from("0.0.0.0"), 81 | port: 8000, 82 | namespace: String::from("mosec_service"), 83 | log_level: String::from("info"), 84 | compression: false, 85 | runtimes: vec![Runtime { 86 | max_batch_size: 64, 87 | max_wait_time: 3000, 88 | worker: String::from("Inference_1"), 89 | }], 90 | routes: vec![Route { 91 | endpoint: String::from("/inference"), 92 | workers: vec![String::from("Inference_1")], 93 | mime: String::from("application/json"), 94 | is_sse: false, 95 | request_body: None, 96 | responses: None, 97 | schemas: None, 98 | }], 99 | } 100 | } 101 | } 102 | -------------------------------------------------------------------------------- /tests/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 MOSEC Authors 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Useful functions for test.""" 16 | 17 | from __future__ import annotations 18 | 19 | import contextlib 20 | import os 21 | import random 22 | import socket 23 | import struct 24 | import time 25 | from http import HTTPStatus 26 | from io import BytesIO 27 | from typing import TYPE_CHECKING, List, Tuple, Union 28 | 29 | from mosec.coordinator import State 30 | 31 | if TYPE_CHECKING: 32 | from tests.mock_socket import Socket as mock_socket 33 | 34 | 35 | def imitate_controller_send( 36 | sock: Union[mock_socket, socket.socket], data: List[bytes] 37 | ) -> Tuple[List[bytes], List[bytes]]: 38 | # explicit byte format here for sanity check 39 | # placeholder flag, should be discarded by receiver 40 | header = struct.pack("!HH", HTTPStatus.OK, len(data)) 41 | buf = BytesIO() 42 | buf.write(header) 43 | sent_ids = [] 44 | sent_payloads = [] 45 | for datum in data: 46 | tid = struct.pack("!I", random.randint(1, 100)) 47 | sent_ids.append(tid) 48 | sent_payloads.append(datum) 49 | length = struct.pack("!I", len(datum)) 50 | buf.write(tid) 51 | buf.write(struct.pack("!H", State.INGRESS | State.EGRESS)) # task state 52 | buf.write(length) 53 | buf.write(datum) 54 | 55 | sock.sendall(buf.getbuffer()) # type: ignore 56 | return sent_ids, sent_payloads 57 | 58 | 59 | def wait_for_port_open( 60 | host: str = "127.0.0.1", port: int = 8000, timeout: int = 10 61 | ) -> bool: 62 | start_time = time.monotonic() 63 | while time.monotonic() - start_time < timeout: 64 | sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 65 | try: 66 | sock.connect((host, port)) 67 | sock.shutdown(socket.SHUT_RDWR) 68 | return True 69 | except (ConnectionRefusedError, OSError): 70 | pass 71 | finally: 72 | sock.close() 73 | time.sleep(0.1) 74 | return False 75 | 76 | 77 | def wait_for_port_free( 78 | host: str = "127.0.0.1", port: int = 8000, timeout: int = 5 79 | ) -> bool: 80 | start_time = time.monotonic() 81 | while time.monotonic() - start_time < timeout: 82 | sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 83 | try: 84 | sock.connect((host, port)) 85 | sock.shutdown(socket.SHUT_RDWR) 86 | except (ConnectionRefusedError, OSError): 87 | return True 88 | finally: 89 | sock.close() 90 | time.sleep(0.1) 91 | return False 92 | 93 | 94 | @contextlib.contextmanager 95 | def env_context(**kwargs): 96 | """Set environment variables for testing.""" 97 | old_env = os.environ.copy() 98 | os.environ.update(kwargs) 99 | yield 100 | os.environ.update(old_env) 101 | -------------------------------------------------------------------------------- /examples/distil_bert_server_pytorch.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 MOSEC Authors 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Example: Mosec with Pytorch Distil BERT.""" 15 | 16 | from typing import Any, List 17 | 18 | import torch # type: ignore 19 | from transformers import ( # type: ignore 20 | AutoModelForSequenceClassification, 21 | AutoTokenizer, 22 | ) 23 | 24 | from mosec import Server, Worker, get_logger 25 | 26 | logger = get_logger() 27 | 28 | # type alias 29 | Returns = Any 30 | 31 | INFERENCE_BATCH_SIZE = 32 32 | INFERENCE_WORKER_NUM = 1 33 | 34 | 35 | class Preprocess(Worker): 36 | """Preprocess BERT on current setup.""" 37 | 38 | def __init__(self): 39 | super().__init__() 40 | self.tokenizer = AutoTokenizer.from_pretrained( 41 | "distilbert-base-uncased-finetuned-sst-2-english" 42 | ) 43 | 44 | def deserialize(self, data: bytes) -> str: 45 | # Override `deserialize` for the *first* stage; 46 | # `data` is the raw bytes from the request body 47 | return data.decode() 48 | 49 | def forward(self, data: str) -> Returns: 50 | tokens = self.tokenizer.encode(data, add_special_tokens=True) 51 | return tokens 52 | 53 | 54 | class Inference(Worker): 55 | """Pytorch Inference class""" 56 | 57 | resp_mime_type = "text/plain" 58 | 59 | def __init__(self): 60 | super().__init__() 61 | self.device = ( 62 | torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu") 63 | ) 64 | logger.info("using computing device: %s", self.device) 65 | self.model = AutoModelForSequenceClassification.from_pretrained( 66 | "distilbert-base-uncased-finetuned-sst-2-english" 67 | ) 68 | self.model.eval() 69 | self.model.to(self.device) 70 | 71 | # Overwrite self.example for warmup 72 | self.example = [ 73 | [101, 2023, 2003, 1037, 8403, 4937, 999, 102] * 5 # make sentence longer 74 | ] * INFERENCE_BATCH_SIZE 75 | 76 | def forward(self, data: List[Returns]) -> List[str]: 77 | tensors = [torch.tensor(token) for token in data] 78 | with torch.no_grad(): 79 | result = self.model( 80 | torch.nn.utils.rnn.pad_sequence(tensors, batch_first=True).to( 81 | self.device 82 | ) 83 | )[0] 84 | scores = result.softmax(dim=1).cpu().tolist() 85 | return [f"positive={p}" for (_, p) in scores] 86 | 87 | def serialize(self, data: str) -> bytes: 88 | # Override `serialize` for the *last* stage; 89 | # `data` is the string from the `forward` output 90 | return data.encode() 91 | 92 | 93 | if __name__ == "__main__": 94 | server = Server() 95 | server.append_worker(Preprocess, num=2 * INFERENCE_WORKER_NUM) 96 | server.append_worker( 97 | Inference, max_batch_size=INFERENCE_BATCH_SIZE, num=INFERENCE_WORKER_NUM 98 | ) 99 | server.run() 100 | -------------------------------------------------------------------------------- /tests/services/mixin_ipc_shm_service.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 MOSEC Authors 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Test IPC shared memory storage.""" 16 | 17 | import sys 18 | from typing import List 19 | 20 | import numpy as np 21 | 22 | from mosec import Server, Worker 23 | from mosec.errors import ValidationError 24 | from mosec.mixin import PlasmaShmIPCMixin, RedisShmIPCMixin 25 | 26 | 27 | class PlasmaRandomService(PlasmaShmIPCMixin, Worker): 28 | def forward(self, data: List[dict]) -> List[dict]: 29 | try: 30 | result = [{"x": np.random.rand(int(req["size"]))} for req in data] 31 | except KeyError as err: 32 | raise ValidationError(err) from err 33 | return result 34 | 35 | 36 | class PlasmaDummyPostprocess(PlasmaShmIPCMixin, Worker): 37 | """This dummy stage is added to test the shm IPC""" 38 | 39 | def forward(self, data: dict) -> dict: 40 | assert isinstance(data.get("x"), np.ndarray), f"wrong data type: {data}" 41 | data["x"] = data["x"].tolist() 42 | return data 43 | 44 | 45 | class RedisRandomService(RedisShmIPCMixin, Worker): 46 | def forward(self, data: List[dict]) -> List[dict]: 47 | try: 48 | result = [{"x": np.random.rand(int(req["size"]))} for req in data] 49 | except KeyError as err: 50 | raise ValidationError(err) from err 51 | return result 52 | 53 | 54 | class RedisDummyPostprocess(RedisShmIPCMixin, Worker): 55 | """This dummy stage is added to test the shm IPC""" 56 | 57 | def forward(self, data: dict) -> dict: 58 | assert isinstance(data.get("x"), np.ndarray), f"wrong data type: {data}" 59 | data["x"] = data["x"].tolist() 60 | return data 61 | 62 | 63 | def start_redis_shm_mosec(): 64 | # configure the plasma service path 65 | # this assumes the redis server is running at `localhost:6379` 66 | RedisShmIPCMixin.set_redis_url("redis://localhost:6379/0") 67 | 68 | server = Server() 69 | server.append_worker(RedisRandomService, max_batch_size=8) 70 | server.append_worker(RedisDummyPostprocess, num=2) 71 | server.run() 72 | 73 | 74 | def start_plasma_shm_mosec(): 75 | from pyarrow import plasma # type: ignore 76 | 77 | # initialize a 20Mb object store as shared memory 78 | with plasma.start_plasma_store(plasma_store_memory=20 * 1000 * 1000) as ( 79 | shm_path, 80 | shm_process, 81 | ): 82 | # configure the plasma shm path 83 | PlasmaShmIPCMixin.set_plasma_path(shm_path) 84 | 85 | server = Server() 86 | server.register_daemon("plasma_server", shm_process) 87 | server.append_worker(PlasmaRandomService, max_batch_size=8) 88 | server.append_worker(PlasmaDummyPostprocess, num=2) 89 | server.run() 90 | 91 | 92 | if __name__ == "__main__": 93 | if len(sys.argv) <= 1: 94 | print("Please specify a shm storage service to run: plasma or redis") 95 | sys.exit(1) 96 | 97 | SERVICE = sys.argv[1] 98 | if SERVICE == "plasma": 99 | start_plasma_shm_mosec() 100 | elif SERVICE == "redis": 101 | start_redis_shm_mosec() 102 | -------------------------------------------------------------------------------- /src/layouts.rs: -------------------------------------------------------------------------------- 1 | use logforth::kv::{Key, Value, Visitor}; 2 | use logforth::layout::text::colored::{Color, ColoredString, Colorize}; 3 | use logforth::record::{Level, Record}; 4 | use logforth::{Diagnostic, Error}; 5 | use serde::Serialize; 6 | use serde_json::Map; 7 | 8 | #[derive(Debug)] 9 | pub(crate) struct ColoredLayout; 10 | 11 | impl logforth::Layout for ColoredLayout { 12 | fn format(&self, record: &Record, diags: &[Box<dyn Diagnostic>]) -> Result<Vec<u8>, Error> { 13 | let ts = jiff::Timestamp::try_from(record.time()).unwrap(); 14 | 15 | let level = ColoredString::from(record.level().to_string()).color(match record.level() { 16 | Level::Fatal | Level::Fatal2 | Level::Fatal3 | Level::Fatal4 => Color::BrightRed, 17 | Level::Error | Level::Error2 | Level::Error3 | Level::Error4 => Color::Red, 18 | Level::Warn | Level::Warn2 | Level::Warn3 | Level::Warn4 => Color::Yellow, 19 | Level::Info | Level::Info2 | Level::Info3 | Level::Info4 => Color::Green, 20 | Level::Debug | Level::Debug2 | Level::Debug3 | Level::Debug4 => Color::Blue, 21 | Level::Trace | Level::Trace2 | Level::Trace3 | Level::Trace4 => Color::Magenta, 22 | }); 23 | 24 | let target = record.target(); 25 | let line = record.line().unwrap_or_default(); 26 | let message = record.payload(); 27 | 28 | struct KvWriter(String); 29 | 30 | impl Visitor for KvWriter { 31 | fn visit(&mut self, key: Key, value: Value) -> Result<(), Error> { 32 | use std::fmt::Write; 33 | // SAFETY: write to a string always succeeds 34 | write!(&mut self.0, " {key}={value}").unwrap(); 35 | Ok(()) 36 | } 37 | } 38 | 39 | let mut visitor = KvWriter(format!("{ts:.6} {level:>6} {target}:{line} {message}")); 40 | record.key_values().visit(&mut visitor)?; 41 | for d in diags { 42 | d.visit(&mut visitor)?; 43 | } 44 | 45 | Ok(visitor.0.into_bytes()) 46 | } 47 | } 48 | 49 | #[derive(Debug)] 50 | pub(crate) struct JsonLayout; 51 | 52 | impl logforth::Layout for JsonLayout { 53 | fn format(&self, record: &Record, diags: &[Box<dyn Diagnostic>]) -> Result<Vec<u8>, Error> { 54 | let diagnostics = diags; 55 | 56 | let ts = jiff::Timestamp::try_from(record.time()).unwrap(); 57 | 58 | struct FieldsVisitor(Map<String, serde_json::Value>); 59 | 60 | impl Visitor for FieldsVisitor { 61 | fn visit(&mut self, key: Key, value: Value) -> Result<(), Error> { 62 | let key = key.to_string(); 63 | match serde_json::to_value(&value) { 64 | Ok(value) => self.0.insert(key, value), 65 | Err(_) => self.0.insert(key, value.to_string().into()), 66 | }; 67 | Ok(()) 68 | } 69 | } 70 | 71 | let mut visitor = FieldsVisitor(Map::new()); 72 | visitor.visit(Key::new("message"), record.payload().into())?; 73 | record.key_values().visit(&mut visitor)?; 74 | for d in diagnostics { 75 | d.visit(&mut visitor)?; 76 | } 77 | 78 | #[derive(Debug, Clone, Serialize)] 79 | struct RecordLine<'a> { 80 | timestamp: String, 81 | level: &'a str, 82 | target: String, 83 | #[serde(skip_serializing_if = "Map::is_empty")] 84 | fields: Map<String, serde_json::Value>, 85 | } 86 | 87 | let record_line = RecordLine { 88 | timestamp: format!("{ts:.6}"), 89 | level: record.level().name(), 90 | target: format!("{}:{}", record.target(), record.line().unwrap_or_default(),), 91 | fields: visitor.0, 92 | }; 93 | 94 | Ok(serde_json::to_vec(&record_line).unwrap()) 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /mosec/env.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 MOSEC Authors 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Environment variables related functions.""" 16 | 17 | from __future__ import annotations 18 | 19 | import contextlib 20 | import os 21 | import warnings 22 | from argparse import Namespace 23 | from typing import Any, Dict, List, Union 24 | 25 | MOSEC_ENV_PREFIX = "MOSEC_" 26 | MOSEC_ENV_CONFIG = { 27 | "path": str, 28 | "capacity": int, 29 | "timeout": int, 30 | "address": str, 31 | "port": int, 32 | "namespace": str, 33 | "debug": bool, 34 | "dry_run": bool, 35 | "log_level": str, 36 | } 37 | 38 | 39 | @contextlib.contextmanager 40 | def env_var_context(env: None | List[Dict[str, str]], index: int): 41 | """Manage the environment variables for a worker process.""" 42 | default: Dict = {} 43 | try: 44 | if env is not None: 45 | for key, value in env[index].items(): 46 | default[key] = os.getenv(key, "") 47 | os.environ[key] = value 48 | yield None 49 | finally: 50 | for key, value in default.items(): 51 | os.environ[key] = value 52 | 53 | 54 | def get_env_namespace(prefix: str = MOSEC_ENV_PREFIX) -> Namespace: 55 | """Read the config from environment variables before the argument parsing. 56 | 57 | Priority: CLI > env > default value. 58 | """ 59 | namespace = Namespace() 60 | for name, converter in MOSEC_ENV_CONFIG.items(): 61 | var = f"{prefix}{name.upper()}" 62 | value = os.getenv(var) 63 | if not value: 64 | continue 65 | try: 66 | val = converter(value) 67 | except ValueError as err: 68 | warnings.warn( 69 | f"failed to convert env {var}={value} to type {converter} {err}, " 70 | "will skip this one", 71 | RuntimeWarning, 72 | stacklevel=2, 73 | ) 74 | else: 75 | setattr(namespace, name, val) 76 | 77 | return namespace 78 | 79 | 80 | def validate_int_ge(number, name, threshold=1): 81 | """Validate int number is greater than threshold.""" 82 | assert isinstance(number, int), ( 83 | f"{name} must be integer but you give {type(number)}" 84 | ) 85 | assert number >= threshold, f"{name} must be no less than {threshold}" 86 | 87 | 88 | def validate_float_ge(number, name, threshold=0.0): 89 | """Validate float number is greater than threshold.""" 90 | assert isinstance(number, float), ( 91 | f"{name} must be float but you give {type(number)}" 92 | ) 93 | assert number >= threshold, f"{name} must be no less than {threshold}" 94 | 95 | 96 | def validate_str_dict(dictionary: Dict): 97 | """Validate keys and values of the dictionary is string type.""" 98 | for key, value in dictionary.items(): 99 | if not (isinstance(key, str) and isinstance(value, str)): 100 | return False 101 | return True 102 | 103 | 104 | def validate_env(env: Union[Any, List[Dict[str, str]]], num: int): 105 | """Validate keys and values of the dictionary is string type.""" 106 | if env is None: 107 | return 108 | assert len(env) == num, "len(env) must equal to num" 109 | valid = True 110 | if not isinstance(env, List) or not all( 111 | isinstance(x, Dict) and validate_str_dict(x) for x in env 112 | ): 113 | valid = False 114 | assert valid, "env must be a list of string dictionary" 115 | -------------------------------------------------------------------------------- /docs/source/examples/pytorch.md: -------------------------------------------------------------------------------- 1 | # PyTorch Examples 2 | 3 | Here are some out-of-the-box model servers powered by mosec for [PyTorch](https://pytorch.org/) users. We use the version 1.9.0 in the following examples. 4 | 5 | ## Natural Language Processing 6 | 7 | Natural language processing model servers usually receive text data and make predictions ranging from text classification, question answering to translation and text generation. 8 | 9 | ### Sentiment Analysis 10 | 11 | This server receives a string and predicts how positive its content is. We build the model server based on [Transformers](https://github.com/huggingface/transformers) of version 4.11.0. 12 | 13 | We show how to customize the `deserialize` method of the ingress stage (`Preprocess`) and the `serialize` method of the egress stage (`Inference`). In this way, we can enjoy the high flexibility, directly reading data bytes from request body and writing the results into response body. 14 | 15 | Note that in a stage that enables batching (e.g. `Inference` in this example), its worker's `forward` method deals with a list of data, while its `serialize` and `deserialize` methods only need to manipulate individual datum. 16 | 17 | #### Server 18 | 19 | ```shell 20 | python distil_bert_server_pytorch.py 21 | ``` 22 | 23 | <details> 24 | <summary>distil_bert_server_pytorch.py</summary> 25 | 26 | ```{include} ../../../examples/distil_bert_server_pytorch.py 27 | :code: python 28 | ``` 29 | 30 | </details> 31 | 32 | #### Client 33 | 34 | ```shell 35 | echo 'i bought this product for many times, highly recommend' | http POST :8000/inference 36 | ``` 37 | 38 | ## Computer Vision 39 | 40 | Computer vision model servers usually receive images or links to the images (downloading from the link becomes an I/O workload then), feed the preprocessed image data into the model and extract information like categories, bounding boxes and pixel labels as results. 41 | 42 | ### Image Recognition 43 | 44 | This server receives an image and classify it according to the [ImageNet](https://www.image-net.org/) categorization. We specifically use [ResNet](https://arxiv.org/abs/1512.03385) as an image classifier and build a model service based on it. Nevertheless, this file serves as the starter code for any kind of image recognition model server. 45 | 46 | We enable multiprocessing for `Preprocess` stage, so that it can produce enough tasks for `Inference` stage to do **batch inference**, which better exploits the GPU computing power. More interestingly, we also started multiple model by setting the number of worker for `Inference` stage to 2. This is because a single model hardly fully occupy the GPU memory or utilization. Multiple models running on the same device in parallel can further increase our service throughput. 47 | 48 | When instantiating the `Server`, we enable `plasma_shm`, which utilizes the [`pyarrow.plasma`](https://arrow.apache.org/docs/11.0/python/plasma.html) as a shared memory data store for IPC. This could benefit the data transfer, especially when the data is large (preprocessed image data in this case). Note that you need to use `pip install -U pyarrow==11` to install necessary dependencies. 49 | 50 | We also demonstrate how to customized **validation** on the data content through this example. In the `forward` method of the `Preprocess` worker, we firstly check the key of the input, then try to decode the str and load it into array. If any of these steps fails, we raise the `ValidationError`. The status will be finally returned to our clients as [HTTP 422](https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/422). 51 | 52 | #### Server 53 | 54 | ```shell 55 | python examples/resnet50_msgpack/server.py 56 | ``` 57 | 58 | <details> 59 | <summary>resnet50_server_msgpack.py</summary> 60 | 61 | ```{include} ../../../examples/resnet50_msgpack/server.py 62 | :code: python 63 | ``` 64 | 65 | </details> 66 | 67 | #### Client 68 | 69 | ```shell 70 | python examples/resnet50_msgpack/client.py 71 | ``` 72 | 73 | <details> 74 | <summary>resnet50_client_msgpack.py</summary> 75 | 76 | ```{include} ../../../examples/resnet50_msgpack/client.py 77 | :code: python 78 | ``` 79 | 80 | </details> 81 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | # https://peps.python.org/pep-0621/ 2 | [project] 3 | name = "mosec" 4 | description = "Model Serving made Efficient in the Cloud" 5 | readme = "README.md" 6 | authors = [ 7 | { name = "Keming", email = "kemingy94@gmail.com" }, 8 | { name = "Zichen", email = "lkevinzc@gmail.com" }, 9 | ] 10 | license = { text = "Apache-2.0" } 11 | keywords = ["machine learning", "deep learning", "model serving"] 12 | dynamic = ["version"] 13 | requires-python = ">=3.10" 14 | dependencies = [] 15 | classifiers = [ 16 | "Environment :: GPU", 17 | "Intended Audience :: Developers", 18 | "Intended Audience :: Science/Research", 19 | "License :: OSI Approved :: Apache Software License", 20 | "Programming Language :: Python :: 3 :: Only", 21 | "Programming Language :: Python :: 3.10", 22 | "Programming Language :: Python :: 3.11", 23 | "Programming Language :: Python :: 3.12", 24 | "Programming Language :: Python :: 3.13", 25 | "Programming Language :: Python :: 3.14", 26 | "Programming Language :: Python :: Implementation :: CPython", 27 | "Programming Language :: Python :: Implementation :: PyPy", 28 | "Programming Language :: Rust", 29 | "Topic :: Scientific/Engineering :: Artificial Intelligence", 30 | "Topic :: Software Development :: Libraries :: Python Modules", 31 | "Topic :: Software Development :: Build Tools", 32 | ] 33 | 34 | [project.urls] 35 | homepage = "https://mosecorg.github.io/" 36 | documentation = "https://mosecorg.github.io/mosec/" 37 | repository = "https://github.com/mosecorg/mosec" 38 | changelog = "https://github.com/mosecorg/mosec/releases" 39 | [project.scripts] 40 | [project.optional-dependencies] 41 | validation = ["msgspec>=0.20"] 42 | redis = ["redis>=5.2.1"] 43 | msgpack = ["msgpack>=1.1.0"] 44 | numbin = ["numbin>=0.5.0"] 45 | 46 | [build-system] 47 | requires = ["maturin>=1.8,<2.0"] 48 | build-backend = "maturin" 49 | 50 | [tool.maturin] 51 | bindings = "bin" 52 | python-packages = ["mosec"] 53 | strip = true 54 | exclude = ["tests/**/*", "docs/**/*", "examples/**/*"] 55 | 56 | [tool.uv] 57 | cache-keys = [ 58 | { file = "pyproject.toml" }, 59 | { file = "Cargo.toml" }, 60 | { file = "src/**/*.rs" }, 61 | ] 62 | 63 | [tool.mypy] 64 | python_version = "3.10" 65 | warn_redundant_casts = true 66 | warn_unreachable = true 67 | pretty = true 68 | 69 | [[tool.mypy.overrides]] 70 | module = [ 71 | "torch.*", 72 | "transformers", 73 | "sentence_transformers", 74 | "llmspec", 75 | "openai", 76 | ] 77 | ignore_missing_imports = true 78 | 79 | [tool.pyright] 80 | venvPath = "." 81 | venv = ".venv" 82 | pythonPlatform = "Linux" 83 | pythonVersion = "3.10" 84 | include = ["mosec", "tests", "examples"] 85 | reportMissingImports = "warning" 86 | 87 | [tool.pytest.ini_options] 88 | markers = ["shm: mark a test is related to shared memory"] 89 | 90 | [tool.ruff.lint] 91 | select = ["E", "F", "G", "B", "I", "SIM", "TID", "PL", "RUF", "D"] 92 | ignore = ["E501", "D203", "D213", "PLC0415"] 93 | [tool.ruff.lint.isort] 94 | known-first-party = ["mosec"] 95 | [tool.ruff.lint.pylint] 96 | max-args = 10 97 | [tool.ruff.lint.per-file-ignores] 98 | "tests/*" = ["D"] 99 | "examples/*" = ["D"] 100 | [tool.ruff.lint.pydocstyle] 101 | convention = "google" 102 | 103 | [tool.typos] 104 | [tool.typos.default.extend-words] 105 | typ = "typ" 106 | 107 | [dependency-groups] 108 | doc = [ 109 | "furo>=2022.12.7", 110 | "myst-parser>=0.18", 111 | "sphinx>=7.4.7", 112 | "sphinx-autodoc-typehints>=1.22", 113 | "sphinx-copybutton>=0.5", 114 | "sphinx-sitemap>=2.6.0", 115 | "sphinxcontrib-napoleon>=0.7", 116 | "sphinxcontrib-programoutput>=0.17", 117 | "sphinxext-opengraph>=0.8", 118 | ] 119 | dev = [ 120 | "httpx-sse==0.4.3", 121 | "httpx[http2]==0.28.1", 122 | "maturin>=1.8,<2.0", 123 | "mypy~=1.15", 124 | "prek>=0.1.2", 125 | "pyright~=1.1", 126 | "pytest>=8", 127 | "pytest-mock>=3.5", 128 | "ruff>=0.11.11", 129 | "zstandard~=0.23", 130 | ] 131 | mixin = [ 132 | "msgpack>=1.1.0", 133 | "msgspec>=0.20", 134 | "numbin>=0.5.0", 135 | "numpy<2 ; python_full_version < '3.12'", 136 | "numpy>=2 ; python_full_version >= '3.12'", 137 | "pyarrow>=0.6.1,<12 ; python_full_version < '3.12'", # pyarrow legacy dependency 138 | "redis>=4.0.0", 139 | ] 140 | -------------------------------------------------------------------------------- /examples/resnet50_msgpack/server.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 MOSEC Authors 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Example: Sample Resnet server.""" 15 | 16 | from io import BytesIO 17 | from typing import List 18 | from urllib.request import urlretrieve 19 | 20 | import numpy as np # type: ignore 21 | import torch # type: ignore 22 | import torchvision # type: ignore 23 | from PIL import Image # type: ignore 24 | from torchvision import transforms # type: ignore 25 | 26 | from mosec import Server, ValidationError, Worker, get_logger 27 | from mosec.mixin import MsgpackMixin 28 | 29 | logger = get_logger() 30 | 31 | INFERENCE_BATCH_SIZE = 16 32 | 33 | 34 | class Preprocess(MsgpackMixin, Worker): 35 | """Sample Preprocess worker""" 36 | 37 | def __init__(self) -> None: 38 | super().__init__() 39 | trans = torch.nn.Sequential( 40 | transforms.Resize((256, 256)), 41 | transforms.CenterCrop(224), 42 | transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), 43 | ) 44 | self.transform = torch.jit.script(trans) # type: ignore 45 | 46 | def forward(self, data: dict): 47 | # Customized validation for input key and field content; raise 48 | # ValidationError so that the client can get 422 as http status 49 | try: 50 | image = Image.open(BytesIO(data["image"])) 51 | except KeyError as err: 52 | raise ValidationError(f"cannot find key {err}") from err 53 | except Exception as err: 54 | raise ValidationError(f"cannot decode as image data: {err}") from err 55 | 56 | tensor = transforms.ToTensor()(image) 57 | data = self.transform(tensor) # type: ignore 58 | return data 59 | 60 | 61 | class Inference(Worker): 62 | """Sample Inference worker""" 63 | 64 | def __init__(self): 65 | super().__init__() 66 | self.device = ( 67 | torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu") 68 | ) 69 | logger.info("using computing device: %s", self.device) 70 | self.model = torchvision.models.resnet50(pretrained=True) 71 | self.model.eval() 72 | self.model.to(self.device) 73 | 74 | # Overwrite self.example for warmup 75 | self.example = [ 76 | np.zeros((3, 244, 244), dtype=np.float32) 77 | ] * INFERENCE_BATCH_SIZE 78 | 79 | def forward(self, data: List[np.ndarray]) -> List[int]: 80 | logger.info("processing batch with size: %d", len(data)) 81 | with torch.no_grad(): 82 | batch = torch.stack([torch.tensor(arr, device=self.device) for arr in data]) 83 | output = self.model(batch) 84 | top1 = torch.argmax(output, dim=1) 85 | return top1.cpu().tolist() 86 | 87 | 88 | class Postprocess(MsgpackMixin, Worker): 89 | """Sample Postprocess worker""" 90 | 91 | def __init__(self): 92 | super().__init__() 93 | logger.info("loading categories file...") 94 | local_filename, _ = urlretrieve( 95 | "https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt" 96 | ) 97 | 98 | with open(local_filename, encoding="utf8") as file: 99 | self.categories = list(map(lambda x: x.strip(), file.readlines())) 100 | 101 | def forward(self, data: int) -> dict: 102 | return {"category": self.categories[data]} 103 | 104 | 105 | if __name__ == "__main__": 106 | server = Server() 107 | server.append_worker(Preprocess, num=4) 108 | server.append_worker(Inference, num=2, max_batch_size=INFERENCE_BATCH_SIZE) 109 | server.append_worker(Postprocess, num=1) 110 | server.run() 111 | -------------------------------------------------------------------------------- /mosec/errors.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 MOSEC Authors 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Exceptions used in the Worker. 16 | 17 | Suppose the input dataflow of our model server is as follows: 18 | 19 | **bytes** ``->`` *deserialize* ``->`` **data** ``->`` *parse* ``->`` **valid data** 20 | 21 | If the raw bytes cannot be successfully deserialized, the `DecodingError` 22 | is raised; if the decoded data cannot pass the validation check (usually 23 | implemented by users), the `ValidationError` should be raised. 24 | """ 25 | 26 | from mosec.protocol import HTTPStatusCode 27 | 28 | 29 | class MosecError(Exception): 30 | """Mosec basic exception.""" 31 | 32 | code: HTTPStatusCode = HTTPStatusCode.INTERNAL_ERROR 33 | msg: str = "mosec error" 34 | 35 | 36 | class ClientError(MosecError): 37 | """Client side error. 38 | 39 | This error indicates that the server cannot or will not process the request 40 | due to something that is perceived to be a client error. It will return the 41 | details to the client side with 42 | `HTTP 400 <https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/400>`__. 43 | """ 44 | 45 | code = HTTPStatusCode.BAD_REQUEST 46 | msg = "bad request" 47 | 48 | 49 | class ServerError(MosecError): 50 | """Server side error. 51 | 52 | This error indicates that the server encountered an unexpected condition 53 | that prevented it from fulfilling the request. It will return the details 54 | to the client side with 55 | `HTTP 500 <https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/500>`__. 56 | 57 | Attention: be careful about the returned message since it may contain some 58 | sensitive information. If you don't want to return the details, just raise 59 | an exception that is not inherited from `mosec.errors.MosecError`. 60 | """ 61 | 62 | code = HTTPStatusCode.INTERNAL_ERROR 63 | msg = "internal error" 64 | 65 | 66 | class EncodingError(ServerError): 67 | """Serialization error. 68 | 69 | The `EncodingError` should be raised in user-implemented codes when 70 | the serialization for the response bytes fails. This error will set 71 | to status code to 72 | `HTTP 500 <https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/500>`__ 73 | and show the details in the response. 74 | """ 75 | 76 | msg = "encoding error" 77 | 78 | 79 | class DecodingError(ClientError): 80 | """De-serialization error. 81 | 82 | The `DecodingError` should be raised in user-implemented codes 83 | when the de-serialization for the request bytes fails. This error 84 | will set the status code to 85 | `HTTP 400 <https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/400>`__ 86 | in the response. 87 | """ 88 | 89 | msg = "decoding error" 90 | 91 | 92 | class ValidationError(MosecError): 93 | """Request data validation error. 94 | 95 | The `ValidationError` should be raised in user-implemented codes, 96 | where the validation for the input data fails. Usually, it should be 97 | put after the data de-serialization, which converts the raw bytes 98 | into structured data. This error will set the status code to 99 | `HTTP 422 <https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/422>`__ 100 | in the response. 101 | """ 102 | 103 | code = HTTPStatusCode.VALIDATION_ERROR 104 | msg = "request validation error" 105 | 106 | 107 | class MosecTimeoutError(BaseException): 108 | """Exception raised when a MOSEC worker operation times out. 109 | 110 | If a bug in the forward code causes the worker to hang indefinitely, a timeout 111 | can be used to ensure that the worker eventually returns control to the main 112 | thread program. When a timeout occurs, the `MosecTimeout` exception is raised. 113 | This exception can be caught and handled appropriately to perform any necessary 114 | cleanup tasks or return a response indicating that the operation timed out. 115 | 116 | Note that `MosecTimeout` is a subclass of `BaseException`, not `Exception`. 117 | This is because timeouts should not be caught and handled in the same way as 118 | other exceptions. Instead, they should be handled in a separate `except` block 119 | which isn't designed to break the working loop. 120 | """ 121 | 122 | code = HTTPStatusCode.TIMEOUT_ERROR 123 | msg = "mosec timeout error" 124 | -------------------------------------------------------------------------------- /examples/embedding/server.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 MOSEC Authors 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """OpenAI compatible embedding server.""" 16 | 17 | import base64 18 | import os 19 | from typing import List, Union 20 | 21 | import numpy as np 22 | import torch # type: ignore 23 | import torch.nn.functional as F # type: ignore 24 | import transformers # type: ignore 25 | from llmspec import EmbeddingData, EmbeddingRequest, EmbeddingResponse, TokenUsage 26 | 27 | from mosec import ClientError, Runtime, Server, Worker 28 | 29 | DEFAULT_MODEL = "thenlper/gte-base" 30 | 31 | 32 | class Embedding(Worker): 33 | def __init__(self): 34 | self.model_name = os.getenv("EMB_MODEL", DEFAULT_MODEL) 35 | self.tokenizer = transformers.AutoTokenizer.from_pretrained(self.model_name) 36 | self.model = transformers.AutoModel.from_pretrained(self.model_name) 37 | self.device = ( 38 | torch.cuda.current_device() if torch.cuda.is_available() else "cpu" 39 | ) 40 | 41 | self.model = self.model.to(self.device) 42 | self.model.eval() 43 | 44 | def get_embedding_with_token_count( 45 | self, sentences: Union[str, List[Union[str, List[int]]]] 46 | ): 47 | # Mean Pooling - Take attention mask into account for correct averaging 48 | def mean_pooling(model_output, attention_mask): 49 | # First element of model_output contains all token embeddings 50 | token_embeddings = model_output[0] 51 | input_mask_expanded = ( 52 | attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float() 53 | ) 54 | return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp( 55 | input_mask_expanded.sum(1), min=1e-9 56 | ) 57 | 58 | # Tokenize sentences 59 | # TODO: support `List[List[int]]` input 60 | encoded_input = self.tokenizer( 61 | sentences, padding=True, truncation=True, return_tensors="pt" 62 | ) 63 | inputs = encoded_input.to(self.device) 64 | token_count = inputs["attention_mask"].sum(dim=1).tolist()[0] 65 | # Compute token embeddings 66 | model_output = self.model(**inputs) 67 | # Perform pooling 68 | sentence_embeddings = mean_pooling(model_output, inputs["attention_mask"]) 69 | # Normalize embeddings 70 | sentence_embeddings = F.normalize(sentence_embeddings, p=2, dim=1) 71 | 72 | return token_count, sentence_embeddings 73 | 74 | def deserialize(self, data: bytes) -> EmbeddingRequest: 75 | return EmbeddingRequest.from_bytes(data) 76 | 77 | def serialize(self, data: EmbeddingResponse) -> bytes: 78 | return data.to_json() 79 | 80 | def forward(self, data: EmbeddingRequest) -> EmbeddingResponse: 81 | if data.model != self.model_name: 82 | raise ClientError( 83 | f"the requested model {data.model} is not supported by " 84 | f"this worker {self.model_name}" 85 | ) 86 | token_count, embeddings = self.get_embedding_with_token_count(data.input) 87 | embeddings = embeddings.detach() 88 | if self.device != "cpu": 89 | embeddings = embeddings.cpu() 90 | embeddings = embeddings.numpy() 91 | if data.encoding_format == "base64": 92 | embeddings = [ 93 | base64.b64encode(emb.astype(np.float32).tobytes()).decode("utf-8") 94 | for emb in embeddings 95 | ] 96 | else: 97 | embeddings = [emb.tolist() for emb in embeddings] 98 | 99 | resp = EmbeddingResponse( 100 | data=[ 101 | EmbeddingData(embedding=emb, index=i) 102 | for i, emb in enumerate(embeddings) 103 | ], 104 | model=self.model_name, 105 | usage=TokenUsage( 106 | prompt_tokens=token_count, 107 | # No completions performed, only embeddings generated. 108 | completion_tokens=0, 109 | total_tokens=token_count, 110 | ), 111 | ) 112 | return resp 113 | 114 | 115 | if __name__ == "__main__": 116 | server = Server() 117 | emb = Runtime(Embedding) 118 | server.register_runtime( 119 | { 120 | "/v1/embeddings": [emb], 121 | "/embeddings": [emb], 122 | } 123 | ) 124 | server.run() 125 | -------------------------------------------------------------------------------- /src/metrics.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2022 MOSEC Authors 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | use std::sync::OnceLock; 16 | 17 | use prometheus_client::encoding::EncodeLabelSet; 18 | use prometheus_client::metrics::counter::Counter; 19 | use prometheus_client::metrics::family::{Family, MetricConstructor}; 20 | use prometheus_client::metrics::gauge::Gauge; 21 | use prometheus_client::metrics::histogram::{Histogram, exponential_buckets}; 22 | use prometheus_client::registry::Registry; 23 | 24 | #[derive(Clone, Debug, Hash, PartialEq, Eq, EncodeLabelSet)] 25 | pub struct CodeLabel { 26 | pub code: u16, 27 | pub endpoint: String, 28 | } 29 | 30 | #[derive(Clone, Debug, Hash, PartialEq, Eq, EncodeLabelSet)] 31 | pub struct StageConnectionLabel { 32 | pub stage: String, 33 | pub connection: String, 34 | } 35 | 36 | #[derive(Debug)] 37 | pub(crate) struct Metrics { 38 | pub(crate) throughput: Family<CodeLabel, Counter>, 39 | pub(crate) duration: Family<StageConnectionLabel, Histogram, CustomHistogramBuilder>, 40 | pub(crate) batch_size: Family<StageConnectionLabel, Histogram>, 41 | pub(crate) batch_duration: Family<StageConnectionLabel, Histogram>, 42 | pub(crate) remaining_task: Gauge, 43 | } 44 | 45 | #[derive(Clone)] 46 | pub(crate) struct CustomHistogramBuilder { 47 | length: u16, 48 | } 49 | 50 | impl MetricConstructor<Histogram> for CustomHistogramBuilder { 51 | fn new_metric(&self) -> Histogram { 52 | // When a new histogram is created, this function will be called. 53 | Histogram::new(exponential_buckets(1e-3f64, 2f64, self.length)) 54 | } 55 | } 56 | 57 | impl Metrics { 58 | pub(crate) fn global() -> &'static Metrics { 59 | METRICS.get().expect("Metrics is not initialized") 60 | } 61 | 62 | pub(crate) fn new(timeout: u64) -> Self { 63 | let builder = CustomHistogramBuilder { 64 | length: (timeout as f64).log2().ceil() as u16 + 1, 65 | }; 66 | Self { 67 | throughput: Family::<CodeLabel, Counter>::default(), 68 | duration: 69 | Family::<StageConnectionLabel, Histogram, CustomHistogramBuilder>::new_with_constructor( 70 | builder, 71 | ), // 1ms ~ 4.096s (default) 72 | batch_size: Family::<StageConnectionLabel, Histogram>::new_with_constructor(|| { 73 | Histogram::new(exponential_buckets(1f64, 2f64, 10)) // 1 ~ 512 74 | }), 75 | batch_duration: Family::<StageConnectionLabel, Histogram>::new_with_constructor(|| { 76 | Histogram::new(exponential_buckets(1e-3f64, 2f64, 13)) // 1ms ~ 4.096s 77 | }), 78 | remaining_task: Gauge::default(), 79 | } 80 | } 81 | 82 | pub(crate) fn init_with_namespace(namespace: &str, timeout: u64) -> Self { 83 | DURATION_LABEL 84 | .set(StageConnectionLabel { 85 | stage: "total".to_string(), 86 | connection: "total".to_string(), 87 | }) 88 | .unwrap(); 89 | let mut registry = <Registry>::default(); 90 | let metrics = Metrics::new(timeout); 91 | registry.register( 92 | format!("{namespace}_throughput"), 93 | "service inference endpoint throughput", 94 | metrics.throughput.clone(), 95 | ); 96 | registry.register( 97 | format!("{namespace}_process_duration_second"), 98 | "process duration for each connection in each stage", 99 | metrics.duration.clone(), 100 | ); 101 | registry.register( 102 | format!("{namespace}_batch_size"), 103 | "batch size for each connection in each stage", 104 | metrics.batch_size.clone(), 105 | ); 106 | registry.register( 107 | format!("{namespace}_batch_duration_second"), 108 | "dynamic batching duration for each connection in each stage", 109 | metrics.batch_duration.clone(), 110 | ); 111 | registry.register( 112 | format!("{namespace}_remaining_task"), 113 | "remaining tasks for the whole service", 114 | metrics.remaining_task.clone(), 115 | ); 116 | REGISTRY.set(registry).unwrap(); 117 | metrics 118 | } 119 | } 120 | 121 | pub(crate) static METRICS: OnceLock<Metrics> = OnceLock::new(); 122 | pub(crate) static REGISTRY: OnceLock<Registry> = OnceLock::new(); 123 | pub(crate) static DURATION_LABEL: OnceLock<StageConnectionLabel> = OnceLock::new(); 124 | -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2022 MOSEC Authors 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #![forbid(unsafe_code)] 16 | 17 | mod apidoc; 18 | mod config; 19 | mod errors; 20 | mod layouts; 21 | mod metrics; 22 | mod protocol; 23 | mod routes; 24 | mod tasks; 25 | 26 | use std::env; 27 | use std::fs::read_to_string; 28 | use std::net::SocketAddr; 29 | 30 | use axum::Router; 31 | use axum::routing::{get, post}; 32 | use log::{debug, info}; 33 | use logforth::append; 34 | use logforth::record::{Level, LevelFilter}; 35 | use tokio::signal::unix::{SignalKind, signal}; 36 | use tower::ServiceBuilder; 37 | use tower_http::compression::CompressionLayer; 38 | use tower_http::decompression::RequestDecompressionLayer; 39 | use utoipa::OpenApi; 40 | use utoipa_swagger_ui::SwaggerUi; 41 | 42 | use crate::apidoc::MosecOpenAPI; 43 | use crate::config::Config; 44 | use crate::layouts::{ColoredLayout, JsonLayout}; 45 | use crate::metrics::{METRICS, Metrics}; 46 | use crate::routes::{RustAPIDoc, index, inference, metrics, sse_inference}; 47 | use crate::tasks::{TASK_MANAGER, TaskManager}; 48 | 49 | async fn shutdown_signal() { 50 | let mut interrupt = signal(SignalKind::interrupt()).unwrap(); 51 | let mut terminate = signal(SignalKind::terminate()).unwrap(); 52 | loop { 53 | tokio::select! { 54 | _ = interrupt.recv() => { 55 | info!("service received interrupt signal, will ignore it here \ 56 | since it should be controlled by the main process (send SIGTERM \ 57 | to `mosec` if you really want to kill it manually)"); 58 | }, 59 | _ = terminate.recv() => { 60 | info!("service received terminate signal"); 61 | let task_manager = TaskManager::global(); 62 | task_manager.shutdown().await; 63 | info!("service shutdown complete"); 64 | break; 65 | }, 66 | } 67 | } 68 | } 69 | 70 | #[tokio::main] 71 | async fn run(conf: &Config) { 72 | let mut doc = MosecOpenAPI { 73 | api: RustAPIDoc::openapi(), 74 | }; 75 | for route in &conf.routes { 76 | doc.merge_route(route); 77 | } 78 | doc.clean(); 79 | 80 | let metrics_instance = Metrics::init_with_namespace(&conf.namespace, conf.timeout); 81 | METRICS.set(metrics_instance).unwrap(); 82 | let mut task_manager = TaskManager::new(conf.timeout); 83 | let barrier = task_manager.init_from_config(conf); 84 | TASK_MANAGER.set(task_manager).unwrap(); 85 | 86 | let mut router = Router::new() 87 | .merge(SwaggerUi::new("/openapi/swagger").url("/openapi/metadata.json", doc.api)) 88 | .route("/", get(index)) 89 | .route("/metrics", get(metrics)); 90 | 91 | for route in &conf.routes { 92 | if route.is_sse { 93 | router = router.route(&route.endpoint, post(sse_inference)); 94 | } else { 95 | router = router.route(&route.endpoint, post(inference)); 96 | } 97 | } 98 | 99 | if conf.compression { 100 | router = router.layer( 101 | ServiceBuilder::new() 102 | .layer(RequestDecompressionLayer::new()) 103 | .layer(CompressionLayer::new()), 104 | ); 105 | } 106 | 107 | // wait until each stage has at least one worker alive 108 | barrier.wait().await; 109 | let addr: SocketAddr = format!("{}:{}", conf.address, conf.port).parse().unwrap(); 110 | let listener = tokio::net::TcpListener::bind(addr).await.unwrap(); 111 | info!(addr:?; "http service is running"); 112 | axum::serve(listener, router) 113 | .with_graceful_shutdown(shutdown_signal()) 114 | .await 115 | .unwrap(); 116 | } 117 | 118 | fn main() { 119 | // let opts: Opts = argh::from_env(); 120 | let cmd_args: Vec<String> = env::args().collect(); 121 | if cmd_args.len() != 2 { 122 | println!("expect one argument as the config path but got {cmd_args:?}"); 123 | return; 124 | } 125 | let config_str = read_to_string(&cmd_args[1]).expect("read config file failure"); 126 | let conf: Config = serde_json::from_str(&config_str).expect("parse config failure"); 127 | 128 | if conf.log_level == "debug" { 129 | // use colorful log for debug 130 | logforth::starter_log::builder() 131 | .dispatch(|d| { 132 | d.filter(LevelFilter::MoreSevereEqual(Level::Debug)) 133 | .append(append::Stderr::default().with_layout(ColoredLayout)) 134 | }) 135 | .apply(); 136 | } else { 137 | // use JSON format for production 138 | let level_filter = 139 | LevelFilter::MoreSevereEqual(match conf.log_level.to_ascii_lowercase().as_str() { 140 | "error" => Level::Error, 141 | "warning" => Level::Warn, 142 | _ => Level::Info, 143 | }); 144 | logforth::starter_log::builder() 145 | .dispatch(|d| { 146 | d.filter(level_filter) 147 | .append(append::Stderr::default().with_layout(JsonLayout)) 148 | }) 149 | .apply(); 150 | } 151 | 152 | debug!(conf:?; "parse service arguments"); 153 | run(&conf); 154 | } 155 | -------------------------------------------------------------------------------- /.github/workflows/package.yml: -------------------------------------------------------------------------------- 1 | # This file added mautrin autogenerated ci file by maturin v1.8.1 2 | # DO NOT OVERWRITE THIS FILE by `maturin generate-ci github` directly 3 | 4 | name: PyPI Publish 5 | 6 | on: 7 | release: 8 | types: [created] 9 | workflow_dispatch: 10 | 11 | concurrency: 12 | group: ${{ github.ref }}-${{ github.workflow }} 13 | cancel-in-progress: true 14 | 15 | permissions: 16 | contents: read 17 | 18 | jobs: 19 | linux: 20 | runs-on: ${{ matrix.platform.runner }} 21 | strategy: 22 | matrix: 23 | platform: 24 | - runner: ubuntu-22.04 25 | target: x86_64 26 | - runner: ubuntu-22.04 27 | target: x86 28 | - runner: ubuntu-22.04 29 | target: aarch64 30 | - runner: ubuntu-22.04 31 | target: armv7 32 | - runner: ubuntu-22.04 33 | target: s390x 34 | - runner: ubuntu-22.04 35 | target: ppc64le 36 | steps: 37 | - uses: actions/checkout@v6 38 | - name: Build wheels 39 | uses: PyO3/maturin-action@v1 40 | with: 41 | target: ${{ matrix.platform.target }} 42 | args: --release --out dist 43 | sccache: 'true' 44 | manylinux: auto 45 | - name: Upload wheels 46 | uses: actions/upload-artifact@v5 47 | with: 48 | name: wheels-linux-${{ matrix.platform.target }} 49 | path: dist 50 | 51 | musllinux: 52 | runs-on: ${{ matrix.platform.runner }} 53 | strategy: 54 | matrix: 55 | platform: 56 | - runner: ubuntu-22.04 57 | target: x86_64 58 | - runner: ubuntu-22.04 59 | target: x86 60 | - runner: ubuntu-22.04 61 | target: aarch64 62 | - runner: ubuntu-22.04 63 | target: armv7 64 | steps: 65 | - uses: actions/checkout@v6 66 | - name: Build wheels 67 | uses: PyO3/maturin-action@v1 68 | with: 69 | target: ${{ matrix.platform.target }} 70 | args: --release --out dist 71 | sccache: 'true' 72 | manylinux: musllinux_1_2 73 | - name: Upload wheels 74 | uses: actions/upload-artifact@v5 75 | with: 76 | name: wheels-musllinux-${{ matrix.platform.target }} 77 | path: dist 78 | 79 | macos: 80 | runs-on: ${{ matrix.platform.runner }} 81 | strategy: 82 | matrix: 83 | platform: 84 | - runner: macos-15-intel 85 | target: x86_64 86 | - runner: macos-14 87 | target: aarch64 88 | steps: 89 | - uses: actions/checkout@v6 90 | - name: Build wheels 91 | uses: PyO3/maturin-action@v1 92 | with: 93 | target: ${{ matrix.platform.target }} 94 | args: --release --out dist 95 | sccache: 'true' 96 | - name: Upload wheels 97 | uses: actions/upload-artifact@v5 98 | with: 99 | name: wheels-macos-${{ matrix.platform.target }} 100 | path: dist 101 | 102 | sdist: 103 | runs-on: ubuntu-latest 104 | steps: 105 | - uses: actions/checkout@v6 106 | - name: Build sdist 107 | uses: PyO3/maturin-action@v1 108 | with: 109 | command: sdist 110 | args: --out dist 111 | - name: Upload sdist 112 | uses: actions/upload-artifact@v5 113 | with: 114 | name: wheels-sdist 115 | path: dist 116 | 117 | release: 118 | name: Release 119 | runs-on: ubuntu-latest 120 | if: ${{ startsWith(github.ref, 'refs/tags/') || github.event_name == 'workflow_dispatch' }} 121 | needs: [linux, musllinux, macos, sdist] 122 | permissions: 123 | # Use to sign the release artifacts 124 | id-token: write 125 | # Used to upload release artifacts 126 | contents: write 127 | # Used to generate artifact attestation 128 | attestations: write 129 | steps: 130 | - uses: actions/download-artifact@v6 131 | - name: Generate artifact attestation 132 | uses: actions/attest-build-provenance@v3 133 | with: 134 | subject-path: 'wheels-*/*' 135 | - name: Publish to PyPI 136 | if: ${{ startsWith(github.ref, 'refs/tags/') }} 137 | uses: PyO3/maturin-action@v1 138 | with: 139 | command: upload 140 | args: --non-interactive --skip-existing wheels-*/* 141 | 142 | image: 143 | name: "Build Docker Image" 144 | runs-on: ubuntu-latest 145 | needs: [release] 146 | permissions: 147 | contents: read 148 | # for GitHub Container Registry 149 | packages: write 150 | steps: 151 | - uses: actions/checkout@v6 152 | - name: Docker meta 153 | id: meta 154 | uses: docker/metadata-action@v5 155 | with: 156 | images: | 157 | ${{ github.repository_owner }}/mosec 158 | ghcr.io/${{ github.repository_owner }}/mosec 159 | - name: Docker Setup QEMU 160 | uses: docker/setup-qemu-action@v3 161 | - name: Set up Docker Buildx 162 | uses: docker/setup-buildx-action@v3 163 | - name: Login to Docker Hub 164 | uses: docker/login-action@v3 165 | with: 166 | username: ${{ secrets.DOCKER_USERNAME }} 167 | password: ${{ secrets.DOCKER_TOKEN }} 168 | - name: Login to ghcr.io 169 | uses: docker/login-action@v3 170 | with: 171 | registry: ghcr.io 172 | username: ${{ github.actor }} 173 | password: ${{ secrets.GITHUB_TOKEN }} 174 | - name: Build and push image 175 | uses: docker/build-push-action@v6 176 | with: 177 | push: true 178 | tags: ${{ steps.meta.outputs.tags }} 179 | labels: ${{ steps.meta.outputs.labels }} 180 | file: Dockerfile 181 | platforms: linux/amd64,linux/arm64 182 | cache-from: type=gha 183 | cache-to: type=gha,mode=max 184 | -------------------------------------------------------------------------------- /mosec/protocol.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 MOSEC Authors 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Server-Worker communication protocol.""" 16 | 17 | import logging 18 | import socket 19 | import struct 20 | import warnings 21 | from enum import IntFlag 22 | from io import BytesIO 23 | from typing import Sequence, Tuple 24 | 25 | from mosec.log import get_internal_logger 26 | 27 | logger = get_internal_logger() 28 | 29 | IPC_LARGE_DATA_SIZE = 1024 * 1024 # set as 1 MB 30 | 31 | 32 | class HTTPStatusCode(IntFlag): 33 | """HTTP status code flag.""" 34 | 35 | OK = 1 # 200 36 | BAD_REQUEST = 2 # 400 37 | VALIDATION_ERROR = 4 # 422 38 | INTERNAL_ERROR = 8 # 500 39 | TIMEOUT_ERROR = 16 # 408 40 | 41 | # special one, indicate that it's a SSE 42 | STREAM_EVENT = 32768 43 | 44 | 45 | class Protocol: 46 | """IPC protocol. 47 | 48 | This private class implements the client-side protocol through Unix domain socket 49 | to communicate with the server. 50 | """ 51 | 52 | # byte formats (https://docs.python.org/3/library/struct.html#format-characters) 53 | FORMAT_FLAG = "!H" 54 | FORMAT_BATCH = "!H" 55 | FORMAT_ID = "!I" 56 | FORMAT_LENGTH = "!I" 57 | FORMAT_STATE = "!H" 58 | 59 | # lengths 60 | LENGTH_TASK_FLAG = 2 61 | LENGTH_TASK_BATCH = 2 62 | LENGTH_TASK_ID = 4 63 | LENGTH_TASK_STATE = 2 64 | LENGTH_TASK_BODY_LEN = 4 65 | 66 | def __init__( 67 | self, 68 | name: str, 69 | addr: str, 70 | timeout: float = 2.0, 71 | ): 72 | """Initialize the protocol client. 73 | 74 | Args: 75 | name (str): name of its belonging coordinator. 76 | addr (str): Unix domain socket address in file system's namespace. 77 | timeout (float, optional): socket timeout. Defaults to 2.0 seconds. 78 | 79 | """ 80 | self.socket = socket.socket( 81 | socket.AF_UNIX, 82 | socket.SOCK_STREAM, 83 | ) 84 | self.socket.settimeout(timeout) 85 | self.name = name 86 | self.addr = addr 87 | 88 | def receive(self) -> Tuple[bytes, Sequence[bytes], Sequence[int], Sequence[bytes]]: 89 | """Receive tasks from the server.""" 90 | flag = self.socket.recv(self.LENGTH_TASK_FLAG) 91 | batch_size_bytes = self.socket.recv(self.LENGTH_TASK_BATCH) 92 | batch_size = struct.unpack(self.FORMAT_BATCH, batch_size_bytes)[0] 93 | ids, states, payloads = [], [], [] 94 | total_bytes = 0 95 | 96 | while batch_size > 0: 97 | batch_size -= 1 98 | id_bytes = self.socket.recv(self.LENGTH_TASK_ID) 99 | state_bytes = self.socket.recv(self.LENGTH_TASK_STATE) 100 | length_bytes = self.socket.recv(self.LENGTH_TASK_BODY_LEN) 101 | length = struct.unpack(self.FORMAT_LENGTH, length_bytes)[0] 102 | payload = _recv_all(self.socket, length) 103 | ids.append(id_bytes) 104 | states.append(struct.unpack(self.FORMAT_STATE, state_bytes)[0]) 105 | payloads.append(payload) 106 | total_bytes += length 107 | 108 | if logger.isEnabledFor(logging.DEBUG): 109 | logger.debug( 110 | "%s received %d tasks with ids: %s", 111 | self.name, 112 | len(ids), 113 | struct.unpack("!" + "I" * len(ids), b"".join(ids)), 114 | ) 115 | 116 | if total_bytes > IPC_LARGE_DATA_SIZE: 117 | warnings.warn( 118 | f"IPC data ({total_bytes} bytes) is large, " 119 | "which may affect performance", 120 | RuntimeWarning, 121 | stacklevel=2, 122 | ) 123 | return flag, ids, states, payloads 124 | 125 | def send( 126 | self, 127 | flag: int, 128 | ids: Sequence[bytes], 129 | states: Sequence[int], 130 | payloads: Sequence[bytes], 131 | ): 132 | """Send results to the server.""" 133 | data = BytesIO() 134 | data.write(struct.pack(self.FORMAT_FLAG, flag)) 135 | if len(ids) != len(payloads): 136 | raise ValueError("`ids` have different length with `payloads`") 137 | batch_size = len(ids) 138 | data.write(struct.pack(self.FORMAT_BATCH, batch_size)) 139 | if batch_size > 0: 140 | for task_id, state, payload in zip(ids, states, payloads, strict=True): 141 | data.write(task_id) 142 | data.write(struct.pack(self.FORMAT_STATE, state)) 143 | data.write(struct.pack(self.FORMAT_LENGTH, len(payload))) 144 | data.write(payload) 145 | self.socket.sendall(data.getbuffer()) 146 | if logger.isEnabledFor(logging.DEBUG): 147 | logger.debug( 148 | "%s sent %d(%d) tasks with ids: %s", 149 | self.name, 150 | len(ids), 151 | flag, 152 | struct.unpack("!" + "I" * len(ids), b"".join(ids)), 153 | ) 154 | 155 | def open(self): 156 | """Open the socket connection.""" 157 | self.socket.connect(self.addr) 158 | logger.info("%s socket connected to %s", self.name, self.addr) 159 | 160 | def close(self): 161 | """Close the socket connection.""" 162 | self.socket.close() 163 | logger.info("%s socket closed", self.name) 164 | 165 | 166 | def _recv_all(conn, length): 167 | buffer = bytearray(length) 168 | view = memoryview(buffer) 169 | size = 0 170 | while size < length: 171 | packet = conn.recv_into(view) 172 | view = view[packet:] 173 | size += packet 174 | return buffer 175 | --------------------------------------------------------------------------------