├── mosec
    ├── py.typed
    ├── mixin
    │   ├── __init__.py
    │   ├── numbin_worker.py
    │   ├── msgpack_worker.py
    │   ├── typed_worker.py
    │   ├── plasma_worker.py
    │   └── redis_worker.py
    ├── __init__.py
    ├── utils.py
    ├── env.py
    ├── errors.py
    └── protocol.py
├── docs
    ├── source
    │   ├── _static
    │   │   └── .gitkeep
    │   ├── development
    │   │   ├── contributing.md
    │   │   └── index.md
    │   ├── robots.txt
    │   ├── license.md
    │   ├── reference
    │   │   ├── arguments.md
    │   │   ├── index.md
    │   │   ├── interface.md
    │   │   ├── migration.md
    │   │   └── concept.md
    │   ├── examples
    │   │   ├── rerank.md
    │   │   ├── embedding.md
    │   │   ├── index.md
    │   │   ├── stable_diffusion.md
    │   │   ├── echo.md
    │   │   ├── compression.md
    │   │   ├── env.md
    │   │   ├── jax.md
    │   │   ├── validate.md
    │   │   ├── ipc.md
    │   │   ├── metric.md
    │   │   ├── multi_route.md
    │   │   └── pytorch.md
    │   ├── index.md
    │   └── conf.py
    ├── Makefile
    └── make.bat
├── .github
    ├── CODEOWNERS
    ├── ISSUE_TEMPLATE
    │   ├── config.yml
    │   ├── feature_request.yaml
    │   └── bug_report.yaml
    ├── release.yml
    ├── workflows
    │   ├── links.yml
    │   ├── deny.yml
    │   ├── label.yml
    │   ├── nightly.yml
    │   ├── page.yml
    │   ├── check.yml
    │   ├── codeql.yml
    │   └── package.yml
    └── dependabot.yml
├── rust-toolchain.toml
├── lychee.toml
├── examples
    ├── monitor
    │   ├── mosec_datasource.yml
    │   ├── mosec_dashboard.yml
    │   ├── dockerfile
    │   ├── prometheus.yml
    │   ├── docker-compose.yml
    │   ├── README.md
    │   └── python_side_metrics.py
    ├── README.md
    ├── __init__.py
    ├── stable_diffusion
    │   ├── build.envd
    │   ├── client.py
    │   └── server.py
    ├── embedding
    │   ├── client.py
    │   └── server.py
    ├── type_validation
    │   ├── client.py
    │   └── server.py
    ├── jax_single_layer
    │   ├── client.py
    │   └── server.py
    ├── rerank
    │   ├── client.py
    │   └── server.py
    ├── resnet50_msgpack
    │   ├── client.py
    │   └── server.py
    ├── server_side_event
    │   ├── client.py
    │   └── server.py
    ├── multi_route
    │   ├── client.py
    │   └── server.py
    ├── custom_env.py
    ├── segment
    │   ├── client.py
    │   └── server.py
    ├── echo.py
    ├── shm_ipc
    │   ├── redis.py
    │   └── plasma_legacy.py
    └── distil_bert_server_pytorch.py
├── rustfmt.toml
├── .devcontainer
    ├── devcontainer.json
    └── Dockerfile
├── CITATION.cff
├── tests
    ├── __init__.py
    ├── services
    │   ├── __init__.py
    │   ├── mixin_typed_service.py
    │   ├── square_service.py
    │   ├── mixin_numbin_service.py
    │   ├── sse_service.py
    │   ├── bad_service.py
    │   ├── timeout_service.py
    │   ├── multi_route_service.py
    │   ├── openapi_service.py
    │   └── mixin_ipc_shm_service.py
    ├── test_utils.py
    ├── test_log.py
    ├── mock_socket.py
    ├── bad_req.py
    ├── test_protocol.py
    └── utils.py
├── .pre-commit-config.yaml
├── CONTRIBUTING.md
├── .readthedocs.yaml
├── src
    ├── errors.rs
    ├── apidoc.rs
    ├── config.rs
    ├── layouts.rs
    ├── metrics.rs
    └── main.rs
├── Cargo.toml
├── Dockerfile
├── Makefile
├── .gitignore
└── pyproject.toml


/mosec/py.typed:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/docs/source/_static/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.github/CODEOWNERS:
--------------------------------------------------------------------------------
1 | * @kemingy
2 | 


--------------------------------------------------------------------------------
/docs/source/development/contributing.md:
--------------------------------------------------------------------------------
1 | ```{include} ../../../CONTRIBUTING.md
2 | ```
3 | 


--------------------------------------------------------------------------------
/rust-toolchain.toml:
--------------------------------------------------------------------------------
1 | [toolchain]
2 | channel = "stable"
3 | components = ["rustfmt", "clippy"]
4 | 


--------------------------------------------------------------------------------
/docs/source/robots.txt:
--------------------------------------------------------------------------------
1 | User-agent: *
2 | 
3 | Sitemap: https://mosecorg.github.io/mosec/sitemap.xml
4 | 


--------------------------------------------------------------------------------
/docs/source/license.md:
--------------------------------------------------------------------------------
1 | ---
2 | orphan: true
3 | ---
4 | 
5 | # License
6 | 
7 | ```{include} ../../LICENSE
8 | ```
9 | 


--------------------------------------------------------------------------------
/lychee.toml:
--------------------------------------------------------------------------------
1 | accept = "100..=103, 200..=208, 403"
2 | scheme = ["https", "http", "mailto"]
3 | exclude_loopback = true
4 | 


--------------------------------------------------------------------------------
/docs/source/development/index.md:
--------------------------------------------------------------------------------
 1 | # Development
 2 | 
 3 | ```{toctree}
 4 | ---
 5 | hidden:
 6 | ---
 7 | 
 8 | contributing
 9 | ```
10 | 
11 | - {doc}`contributing`
12 | 


--------------------------------------------------------------------------------
/examples/monitor/mosec_datasource.yml:
--------------------------------------------------------------------------------
1 | apiVersion: 1
2 | 
3 | datasources:
4 |   - name: 'Prometheus'
5 |     type: prometheus
6 |     access: proxy
7 |     url: prometheus:9090


--------------------------------------------------------------------------------
/docs/source/reference/arguments.md:
--------------------------------------------------------------------------------
1 | # CLI Arguments
2 | 
3 | ```shell
4 | python echo.py --help
5 | ```
6 | 
7 | ```{program-output} python ../../examples/echo.py --help
8 | ```
9 | 


--------------------------------------------------------------------------------
/examples/README.md:
--------------------------------------------------------------------------------
1 | All the examples in this section are self-contained and tested.
2 | 
3 | See https://mosecorg.github.io/mosec/examples/index.html for detailed explanations.
4 | 


--------------------------------------------------------------------------------
/examples/monitor/mosec_dashboard.yml:
--------------------------------------------------------------------------------
1 | apiVersion: 1
2 |  
3 | providers:
4 |   - name: 'Mosec Dashboards'
5 |     type: file
6 |     options:
7 |       path: /etc/grafana/provisioning/dashboards
8 | 


--------------------------------------------------------------------------------
/rustfmt.toml:
--------------------------------------------------------------------------------
1 | reorder_imports = true
2 | 
3 | # unstable
4 | unstable_features = true
5 | format_strings = true
6 | imports_granularity = "Module"
7 | group_imports = "StdExternalCrate"
8 | reorder_impl_items = true
9 | 


--------------------------------------------------------------------------------
/docs/source/reference/index.md:
--------------------------------------------------------------------------------
 1 | # Reference
 2 | 
 3 | ```{toctree}
 4 | ---
 5 | hidden:
 6 | ---
 7 | 
 8 | arguments
 9 | interface
10 | concept
11 | migration
12 | ```
13 | 
14 | - {doc}`arguments`
15 | - {doc}`interface`
16 | - {doc}`concept`
17 | - {doc}`migration`
18 | 


--------------------------------------------------------------------------------
/examples/monitor/dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.9-slim-buster
 2 | 
 3 | COPY python_side_metrics.py /root/python_side_metrics.py
 4 | 
 5 | RUN pip install -U mosec prometheus_client
 6 | 
 7 | ENTRYPOINT ["python", "/root/python_side_metrics.py"]
 8 | 
 9 | EXPOSE 8000 5000
10 | 


--------------------------------------------------------------------------------
/examples/monitor/prometheus.yml:
--------------------------------------------------------------------------------
 1 | global:
 2 |   scrape_interval:     1s
 3 |   evaluation_interval: 1s
 4 | 
 5 | scrape_configs:
 6 |   - job_name: mosec_rust
 7 |     static_configs:
 8 |       - targets: ['mosec:8000']
 9 |   - job_name: mosec_python
10 |     static_configs:
11 |       - targets: ['mosec:5000']
12 |   - job_name: prometheus
13 |     static_configs:
14 |       - targets: ['prometheus:9090']
15 | 


--------------------------------------------------------------------------------
/.devcontainer/devcontainer.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"name": "mosec",
 3 |     "build": { "dockerfile": "Dockerfile" },
 4 | 	"extensions": [
 5 | 		"rust-lang.rust-analyzer",
 6 | 		"ms-python.python"
 7 | 	],
 8 | 	"remoteUser": "dev",
 9 | 	"updateRemoteUserUID": true,
10 | 	"remoteEnv": { "PATH": "${containerEnv:PATH}:/home/dev/.local/bin" },
11 | 	"settings": {
12 | 		"editor.formatOnSave": true,
13 | 		"files.exclude": {
14 | 			"**/LICENSE": true
15 | 		}
16 | 	}
17 | }


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
 1 | blank_issues_enabled: false
 2 | 
 3 | contact_links:
 4 |   - name: Have you read the docs?
 5 |     url: https://mosecorg.github.io/mosec/
 6 |     about: Much help can be found in the docs
 7 |   - name: Ask a question
 8 |     url: https://github.com/mosecorg/mosec/discussions/new/choose
 9 |     about: Ask a question or start a discussion
10 |   - name: Chat on Discord
11 |     url: https://discord.gg/Jq5vxuH69W
12 |     about: Maybe chatting with the community can help
13 | 


--------------------------------------------------------------------------------
/.devcontainer/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.11
 2 | 
 3 | ENV TERM xterm-256color
 4 | 
 5 | RUN useradd -p "" -u 1000 -s /bin/bash -m dev
 6 | 
 7 | RUN apt-get update && apt-get install -y git ripgrep && \
 8 |     curl --proto '=https' --tlsv1.2 -sSf https://starship.rs/install.sh | sh -s -- -y && \
 9 |     printf 'eval "$(starship init bash)"' >> /etc/bash.bashrc && \
10 |     rm -rf /tmp/* /var/lib/apt/lists/*
11 | 
12 | USER dev
13 | 
14 | RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
15 | 
16 | WORKDIR /home/dev
17 | 


--------------------------------------------------------------------------------
/CITATION.cff:
--------------------------------------------------------------------------------
 1 | cff-version: 1.2.0
 2 | message: "If you use this software, please cite it as below."
 3 | authors:
 4 | - family-names: "Yang"
 5 |   given-names: "Keming"
 6 |   orcid: "https://orcid.org/0000-0002-1351-2342"
 7 | - family-names: "Liu"
 8 |   given-names: "Zichen"
 9 |   orcid: "https://orcid.org/0000-0001-9451-8625"
10 | - family-names: "Cheng"
11 |   given-names: "Philip"
12 | title: "MOSEC: Model Serving made Efficient in the Cloud"
13 | url: "https://github.com/mosecorg/mosec"
14 | type: software
15 | date-released: 2021-09-27
16 | 


--------------------------------------------------------------------------------
/docs/source/examples/rerank.md:
--------------------------------------------------------------------------------
 1 | # Cross-Encoder model for reranking
 2 | 
 3 | This example shows how to use a cross-encoder model to rerank a list of passages based on a query. This is useful for hybrid search that combines multiple retrieval results.
 4 | 
 5 | 
 6 | ## Server
 7 | 
 8 | ```bash
 9 | python examples/rerank/server.py
10 | ```
11 | 
12 | ```{include} ../../../examples/rerank/server.py
13 | :code: python
14 | ```
15 | 
16 | ## Client
17 | 
18 | ```bash
19 | python examples/rerank/client.py
20 | ```
21 | 
22 | ```{include} ../../../examples/rerank/client.py
23 | :code: python
24 | ```
25 | 


--------------------------------------------------------------------------------
/.github/release.yml:
--------------------------------------------------------------------------------
 1 | changelog:
 2 |   exclude:
 3 |     labels:
 4 |       - dependencies
 5 |     authors:
 6 |       - dependabot
 7 |   categories:
 8 |     - title: Changes 🛠
 9 |       labels:
10 |         - bug_fix
11 |     - title: Exciting New Features 🎉
12 |       labels:
13 |         - enhancement
14 |     - title: More Documentation 📚
15 |       labels:
16 |         - documentation
17 |     - title: Refactoring 🧬
18 |       labels:
19 |         - refactoring
20 |     - title: Minor changes 🧹
21 |       labels:
22 |         - chore
23 |     - title: Others 🔔
24 |       labels:
25 |         - "*"
26 | 


--------------------------------------------------------------------------------
/docs/source/reference/interface.md:
--------------------------------------------------------------------------------
 1 | # Interface
 2 | 
 3 | ## Server
 4 | 
 5 | ```{eval-rst}
 6 | .. automodule:: mosec.server
 7 |     :members:
 8 | ```
 9 | 
10 | ## Worker
11 | 
12 | ```{eval-rst}
13 | .. automodule:: mosec.worker
14 |     :members:
15 | ```
16 | 
17 | ## Runtime
18 | 
19 | ```{eval-rst}
20 | .. automodule:: mosec.runtime
21 |     :members: Runtime
22 | ```
23 | 
24 | ## Errors
25 | 
26 | ```{eval-rst}
27 | .. automodule:: mosec.errors
28 |     :members:
29 |     :show-inheritance:
30 | ```
31 | 
32 | ## Mixins
33 | 
34 | ```{eval-rst}
35 | .. automodule:: mosec.mixin
36 |     :members:
37 |     :show-inheritance:
38 | ```
39 | 


--------------------------------------------------------------------------------
/examples/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 MOSEC Authors
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 MOSEC Authors
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/tests/services/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 MOSEC Authors
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/.github/workflows/links.yml:
--------------------------------------------------------------------------------
 1 | name: Links
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 |     paths:
 8 |       - '.github/workflows/links.yml'
 9 |       - '**.md'
10 |       - '**.html'
11 |   pull_request:
12 |     paths:
13 |       - '.github/workflows/links.yml'
14 |       - '**.md'
15 |       - '**.html'
16 |   workflow_dispatch:
17 | 
18 | jobs:
19 |   linkChecker:
20 |     runs-on: ubuntu-latest
21 |     steps:
22 |       - uses: actions/checkout@v6
23 | 
24 |       - name: Link Checker
25 |         uses: lycheeverse/lychee-action@v2
26 |         with:
27 |           fail: true
28 |           args: --verbose --no-progress --format detailed .
29 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |   - repo: https://github.com/pre-commit/pre-commit-hooks
 3 |     rev: v6.0.0
 4 |     hooks:
 5 |       - id: check-docstring-first
 6 |       - id: check-merge-conflict
 7 |       - id: check-yaml
 8 |       - id: end-of-file-fixer
 9 |       - id: trailing-whitespace
10 |   - repo: local
11 |     hooks:
12 |       - id: make-lint
13 |         name: Lint
14 |         entry: make lint semantic_lint
15 |         language: system
16 |         types: [python, rust]
17 |         pass_filenames: false
18 |         always_run: true
19 |   - repo: https://github.com/crate-ci/typos
20 |     rev: v1.35.5
21 |     hooks:
22 |       - id: typos
23 | 


--------------------------------------------------------------------------------
/examples/stable_diffusion/build.envd:
--------------------------------------------------------------------------------
 1 | # syntax=v1
 2 | 
 3 | 
 4 | def basic():
 5 |     install.cuda(version="11.6.2")
 6 |     install.python()
 7 |     install.python_packages(
 8 |         name=[
 9 |             "torch --extra-index-url https://download.pytorch.org/whl/cu116",
10 |             "diffusers[torch]",
11 |             "transformers",
12 |             "accelerate",
13 |             "msgpack",
14 |             "mosec",
15 |         ]
16 |     )
17 | 
18 | 
19 | def build():
20 |     base(dev=True)
21 |     basic()
22 |     runtime.expose(envd_port=8000, host_port=8000, service="sd")
23 | 
24 | 
25 | def serving():
26 |     basic()
27 |     io.copy("server.py", "/")
28 |     config.entrypoint(["python", "server.py", "--timeout", "30000"])
29 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?= -W --keep-going
 7 | SPHINXBUILD   ?= uv run sphinx-build
 8 | SOURCEDIR     = source
 9 | BUILDDIR      = build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/docs/source/examples/embedding.md:
--------------------------------------------------------------------------------
 1 | # OpenAI compatible embedding service
 2 | 
 3 | This example shows how to create an embedding service that is compatible with the [OpenAI API](https://platform.openai.com/docs/api-reference/embeddings).
 4 | 
 5 | In this example, we use the embedding model from [Hugging Face LeaderBoard](https://huggingface.co/spaces/mteb/leaderboard).
 6 | 
 7 | 
 8 | ## Server
 9 | 
10 | ```bash
11 | EMB_MODEL=thenlper/gte-base python examples/embedding/server.py
12 | ```
13 | 
14 | ```{include} ../../../examples/embedding/server.py
15 | :code: python
16 | ```
17 | 
18 | ## Client
19 | 
20 | ```bash
21 | EMB_MODEL=thenlper/gte-base python examples/embedding/client.py
22 | ```
23 | 
24 | ```{include} ../../../examples/embedding/client.py
25 | :code: python
26 | ```
27 | 


--------------------------------------------------------------------------------
/docs/source/examples/index.md:
--------------------------------------------------------------------------------
 1 | # Examples
 2 | 
 3 | ```{toctree}
 4 | ---
 5 | hidden:
 6 | ---
 7 | 
 8 | echo
 9 | embedding
10 | env
11 | jax
12 | ipc
13 | metric
14 | multi_route
15 | pytorch
16 | rerank
17 | stable_diffusion
18 | validate
19 | compression
20 | ```
21 | 
22 | We provide examples across different ML frameworks and for various tasks in this section.
23 | 
24 | ## Requirements
25 | 
26 | All the examples in this section are self-contained and tested. Feel free to grab one and run:
27 | 
28 | ```shell
29 | python model_server.py
30 | ```
31 | 
32 | To test the server, we use [`httpie`](https://github.com/httpie/httpie) and [`httpx`](https://github.com/encode/httpx) by default. You can have other choices but if you want to install them:
33 | 
34 | ```shell
35 | pip install httpie httpx
36 | ```
37 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing to `Mosec`
 2 | 
 3 | Before contributing to this repository, please first discuss the change you wish to make via issue, email, or any other method with the owners of this repository before making a change.
 4 | 
 5 | ## Pull Request Process
 6 | 
 7 | 1. After you have forked this repository, you could use `make install` for *the first time* to install the local development dependencies.
 8 | 2. Before committing your changes, you can use `make format && make lint` to ensure the codes follow our style standards.
 9 | 3. Please add corresponding tests to your change if that's related to new feature or API, and ensure `make test` can pass.
10 | 4. Submit your pull request.
11 | 
12 | ## Contacts
13 | 
14 | - [Keming](mailto:kemingy94@gmail.com)
15 | - [zclzc](mailto:lkevinzc@gmail.com)
16 | 


--------------------------------------------------------------------------------
/docs/source/examples/stable_diffusion.md:
--------------------------------------------------------------------------------
 1 | # Stable Diffusion
 2 | 
 3 | This example provides a demo service for stable diffusion. You can develop this in the container environment by using [envd](https://github.com/tensorchord/envd): `envd up -p examples/stable_diffusion`.
 4 | 
 5 | You should be able to try this demo under the `mosec/examples/stable_diffusion/` directory.
 6 | 
 7 | ## Server
 8 | 
 9 | ```shell
10 | envd build -t sd:serving
11 | docker run --rm --gpus all -p 8000:8000 sd:serving
12 | ```
13 | 
14 | ```{include} ../../../examples/stable_diffusion/server.py
15 | :code: python
16 | ```
17 | 
18 | ```shell
19 | python server.py --timeout 30000
20 | ```
21 | 
22 | ## Client
23 | 
24 | ```shell
25 | python client.py --prompt "a cute cat site on the basketball"
26 | ```
27 | 
28 | ```{include} ../../../examples/stable_diffusion/client.py
29 | :code: python
30 | ```
31 | 


--------------------------------------------------------------------------------
/docs/source/examples/echo.md:
--------------------------------------------------------------------------------
 1 | # Echo Example
 2 | 
 3 | An echo server is usually the very first server you wanna implement to get familiar with the framework.
 4 | 
 5 | This server sleeps for a given period and return. It is a simple illustration of how **multi-stage workload** is implemented. It also shows how to write a simple **validation** for input data.
 6 | 
 7 | The default JSON protocol will be used since the (de)serialization methods are not overridden in this demo. In particular, the input `data` of `Preprocess`'s `forward` is a dictionary decoded by JSON from the request body's bytes; and the output dictionary of `Postprocess`'s `forward` will be JSON-encoded as a mirrored process.
 8 | 
 9 | ## **`echo.py`**
10 | 
11 | ```{include} ../../../examples/echo.py
12 | :code: python
13 | ```
14 | 
15 | ## Start
16 | 
17 | ```shell
18 | python echo.py
19 | ```
20 | 
21 | ## Test
22 | 
23 | ```shell
24 | http :8000/inference time=1.5
25 | ```
26 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=source
11 | set BUILDDIR=build
12 | 
13 | %SPHINXBUILD% >NUL 2>NUL
14 | if errorlevel 9009 (
15 | 	echo.
16 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
17 | 	echo.installed, then set the SPHINXBUILD environment variable to point
18 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
19 | 	echo.may add the Sphinx directory to PATH.
20 | 	echo.
21 | 	echo.If you don't have Sphinx installed, grab it from
22 | 	echo.https://www.sphinx-doc.org/
23 | 	exit /b 1
24 | )
25 | 
26 | if "%1" == "" goto help
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/docs/source/index.md:
--------------------------------------------------------------------------------
 1 | # Machine Learning Model Serving made Efficient in the Cloud
 2 | 
 3 | ```{eval-rst}
 4 | .. meta::
 5 |     :description lang=en:
 6 |         MOSEC is a high-performance serving framework for ML models, offers dynamic batching and multi-stage pipeline to fully exploit your compute machine.
 7 | ```
 8 | 
 9 | ```{note}
10 | {doc}`mosec is licensed under the Apache-2 <license>`.
11 | ```
12 | 
13 | ```{include} ../../README.md
14 | ```
15 | 
16 | ```{toctree}
17 | ---
18 | maxdepth: 2
19 | hidden:
20 | caption: User Guide
21 | ---
22 | reference/index
23 | examples/index
24 | ```
25 | 
26 | ```{toctree}
27 | ---
28 | maxdepth: 2
29 | hidden:
30 | caption: Dev Guide
31 | ---
32 | development/index
33 | ```
34 | 
35 | ```{toctree}
36 | ---
37 | hidden:
38 | caption: Project Links
39 | ---
40 | 
41 | GitHub <https://github.com/mosecorg/mosec>
42 | Discord <https://discord.gg/Jq5vxuH69W>
43 | ```
44 | 
45 | ## Indices and tables
46 | 
47 | - {ref}`genindex`
48 | 


--------------------------------------------------------------------------------
/docs/source/examples/compression.md:
--------------------------------------------------------------------------------
 1 | # Compression
 2 | 
 3 | This example demonstrates how to use the `--compression` feature for segmentation tasks. We use the example from the [Segment Anything Model 2](https://github.com/facebookresearch/sam2/blob/main/notebooks/image_predictor_example.ipynb). The request includes an image and its low resolution mask, the response is the final mask. Since there are lots of duplicate values in the mask, we can use `gzip`  or `zstd` to compress it.
 4 | 
 5 | ## Server
 6 | 
 7 | ```shell
 8 | python examples/segment/server.py --compression
 9 | ```
10 | 
11 | <details>
12 | <summary>segment.py</summary>
13 | 
14 | ```{include} ../../../examples/segment/server.py
15 | :code: python
16 | ```
17 | 
18 | </details>
19 | 
20 | ## Client
21 | 
22 | ```shell
23 | python examples/segment/client.py
24 | ```
25 | 
26 | <details>
27 | <summary>segment.py</summary>
28 | 
29 | ```{include} ../../../examples/segment/client.py
30 | :code: python
31 | ```
32 | 
33 | </details>
34 | 


--------------------------------------------------------------------------------
/examples/monitor/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: '3'
 2 | 
 3 | services:
 4 | 
 5 |   mosec:
 6 |     build: .
 7 |     container_name: mosec
 8 |     ports:
 9 |       - "8000:8000"
10 |       - "5000:5000"
11 | 
12 |   prometheus:
13 |     image: prom/prometheus:v2.30.0
14 |     container_name: prometheus
15 |     ports:
16 |       - "9090:9090"
17 |     volumes:
18 |       - ./prometheus.yml:/etc/prometheus/prometheus.yml
19 |     command:
20 |       - --config.file=/etc/prometheus/prometheus.yml
21 |     restart: always
22 |     depends_on:
23 |       - mosec
24 | 
25 |   grafana:
26 |     image: grafana/grafana:8.2.2
27 |     container_name: grafana
28 |     ports:
29 |       - "3000:3000"
30 |     volumes:
31 |       - ./mosec_datasource.yml:/etc/grafana/provisioning/datasources/mosec_datasource.yml
32 |       - ./mosec_dashboard.yml:/etc/grafana/provisioning/dashboards/mosec_dashboard.yml
33 |       - ./mosec_dashboard.json:/etc/grafana/provisioning/dashboards/mosec_dashboard.json
34 |     restart: always
35 |     depends_on:
36 |       - prometheus
37 | 


--------------------------------------------------------------------------------
/examples/embedding/client.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 MOSEC Authors
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """OpenAI embedding client example."""
16 | 
17 | import os
18 | 
19 | from openai import Client
20 | 
21 | DEFAULT_MODEL = "thenlper/gte-base"
22 | 
23 | client = Client(api_key="fake", base_url="http://127.0.0.1:8000/")
24 | emb = client.embeddings.create(
25 |     model=os.getenv("EMB_MODEL", DEFAULT_MODEL),
26 |     input="Hello world!",
27 | )
28 | print(emb.data[0].embedding)  # type: ignore
29 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.yaml:
--------------------------------------------------------------------------------
 1 | name: Feature Request
 2 | description: Feature request for mosec
 3 | labels: ["enhancement"]
 4 | title: "feat: <title>"
 5 | body:
 6 |   - type: markdown
 7 |     attributes:
 8 |       value: |
 9 |         Thanks for taking the time to fill out this feature request!
10 |   - type: textarea
11 |     attributes:
12 |       label: Describe the feature
13 |       description: A clear and concise description of what the feature is.
14 |     validations:
15 |       required: true
16 |   - type: textarea
17 |     attributes:
18 |       label: Why do you need this feature?
19 |       description: A clear and concise description of why you need this feature.
20 |     validations:
21 |       required: false
22 |   - type: textarea
23 |     attributes:
24 |       label: Additional context
25 |       description: Add any other context about the problem here.
26 |     validations:
27 |       required: false
28 |   - type: markdown
29 |     attributes:
30 |       value: |
31 |         Love this enhancement proposal? Give it a 👍. We prioritise the proposals with the most 👍.
32 | 


--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
 1 | # Read the Docs configuration file
 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 3 | 
 4 | # Required
 5 | version: 2
 6 | 
 7 | # Set the OS, Python version and other tools you might need
 8 | build:
 9 |   os: ubuntu-22.04
10 |   tools:
11 |     python: "3.11"
12 |     # You can also specify other tool versions:
13 |     # rust: "1.70"
14 | 
15 | # Build documentation in the "docs/" directory with Sphinx
16 | sphinx:
17 |    configuration: docs/source/conf.py
18 | 
19 | # Optionally build your docs in additional formats such as PDF and ePub
20 | formats:
21 |    - pdf
22 |    - epub
23 | 
24 | # Optional but recommended, declare the Python requirements required
25 | # to build your documentation
26 | # See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html
27 | # https://docs.readthedocs.io/en/stable/config-file/v2.html#python-install
28 | python:
29 |    install:
30 |    - requirements: requirements/doc.txt
31 |    - requirements: requirements/mixin.txt
32 |    - requirements: requirements/dev.txt
33 |    - method: pip
34 |      path: .
35 | 


--------------------------------------------------------------------------------
/examples/type_validation/client.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 MOSEC Authors
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from http import HTTPStatus
16 | 
17 | import httpx
18 | import msgspec
19 | 
20 | req = {
21 |     "bin": b"hello mosec",
22 |     "name": "type check",
23 | }
24 | 
25 | resp = httpx.post(
26 |     "http://127.0.0.1:8000/inference", content=msgspec.msgpack.encode(req)
27 | )
28 | if resp.status_code == HTTPStatus.OK:
29 |     print(f"OK: {msgspec.msgpack.decode(resp.content)}")
30 | else:
31 |     print(f"err[{resp.status_code}] {resp.text}")
32 | 


--------------------------------------------------------------------------------
/src/errors.rs:
--------------------------------------------------------------------------------
 1 | // Copyright 2022 MOSEC Authors
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //      http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | use crate::tasks::TaskCode;
16 | 
17 | #[derive(Debug, derive_more::From, derive_more::Display, derive_more::Error)]
18 | pub(crate) enum ServiceError {
19 |     #[display("inference timeout")]
20 |     Timeout,
21 | 
22 |     #[display("too many request: task queue is full")]
23 |     TooManyRequests,
24 | 
25 |     #[display("mosec unknown error")]
26 |     UnknownError,
27 | 
28 |     #[display("SSE inference error: {_0}")]
29 |     SSEError(TaskCode),
30 | }
31 | 


--------------------------------------------------------------------------------
/mosec/mixin/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 MOSEC Authors
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """Provide useful mixin to extend MOSEC."""
16 | 
17 | from mosec.mixin.msgpack_worker import MsgpackMixin
18 | from mosec.mixin.numbin_worker import NumBinIPCMixin
19 | from mosec.mixin.plasma_worker import PlasmaShmIPCMixin
20 | from mosec.mixin.redis_worker import RedisShmIPCMixin
21 | from mosec.mixin.typed_worker import TypedMsgPackMixin
22 | 
23 | __all__ = [
24 |     "MsgpackMixin",
25 |     "NumBinIPCMixin",
26 |     "PlasmaShmIPCMixin",
27 |     "RedisShmIPCMixin",
28 |     "TypedMsgPackMixin",
29 | ]
30 | 


--------------------------------------------------------------------------------
/.github/workflows/deny.yml:
--------------------------------------------------------------------------------
 1 | name: Cargo Deny
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   schedule:
 6 |     - cron: '30 10 * * 1'  # Every Monday at 10:30 AM UTC
 7 |   pull_request:
 8 |     paths:
 9 |       - 'Cargo.toml'
10 |       - 'Cargo.lock'
11 |       - '.github/workflows/deny.yml'
12 |       - 'deny.toml'
13 |   merge_group:
14 | 
15 | concurrency:
16 |   group: ${{ github.ref }}-${{ github.workflow }}
17 |   cancel-in-progress: true
18 | 
19 | jobs:
20 |   deny:
21 |     runs-on: ubuntu-latest
22 |     env:
23 |       CARGO_TERM_COLOR: always
24 |       VERSION: 0.18.9
25 |       CMD: "cargo-deny"
26 |       DIR: "/tmp/cargo-deny"
27 |     steps:
28 |       - uses: actions/checkout@v6
29 |       - name: Set up Rust
30 |         uses: dtolnay/rust-toolchain@stable
31 |       - name: Install Deny
32 |         run: |
33 |           mkdir -p $DIR
34 |           curl -L -o $DIR/archive.tar.gz https://github.com/EmbarkStudios/$CMD/releases/download/$VERSION/$CMD-$VERSION-x86_64-unknown-linux-musl.tar.gz
35 |           tar -xzvf $DIR/archive.tar.gz --strip-components=1 -C $DIR
36 |       - name: Deny
37 |         run: $DIR/$CMD -L warn check bans licenses advisories --show-stats
38 | 


--------------------------------------------------------------------------------
/examples/jax_single_layer/client.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 MOSEC Authors
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Example: Client of the Jax server."""
15 | 
16 | import random
17 | from http import HTTPStatus
18 | 
19 | import httpx
20 | 
21 | input_data = [random.randint(-99, 99), random.randint(-99, 99), random.randint(-99, 99)]
22 | print("Client : sending data : ", input_data)
23 | 
24 | prediction = httpx.post(
25 |     "http://127.0.0.1:8000/inference",
26 |     json={"array": input_data},
27 | )
28 | if prediction.status_code == HTTPStatus.OK:
29 |     print(prediction.json())
30 | else:
31 |     print(prediction.status_code, prediction.json())
32 | 


--------------------------------------------------------------------------------
/docs/source/examples/env.md:
--------------------------------------------------------------------------------
 1 | # Customized GPU Allocation
 2 | 
 3 | This is an example demonstrating how to give different worker processes customized environment variables to control things like GPU device allocation, etc.
 4 | 
 5 | Assume your machine has 4 GPUs, and you hope to deploy your model to all of them to handle inference requests in parallel, maximizing your service's throughput. With MOSEC, we provide parallel workers with customized environment variables to satisfy the needs.
 6 | 
 7 | As shown in the codes below, we can define our inference worker together with a list of environment variable dictionaries, each of which will be passed to the corresponding worker process. For example, if we set `CUDA_VISIBLE_DEVICES` to `0-3`, (the same copy of) our model will be deployed on 4 different GPUs and be queried in parallel, largely improving the system's throughput. You could verify this either from the server logs or the client response.
 8 | 
 9 | ## **`custom_env.py`**
10 | 
11 | ```{include} ../../../examples/custom_env.py
12 | :code: python
13 | ```
14 | 
15 | ## Start
16 | 
17 | ```shell
18 | python custom_env.py
19 | ```
20 | 
21 | ## Test
22 | 
23 | ```shell
24 | http :8000/inference dummy=0
25 | ```
26 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | # To get started with Dependabot version updates, you'll need to specify which
 2 | # package ecosystems to update and where the package manifests are located.
 3 | # Please see the documentation for all configuration options:
 4 | # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
 5 | 
 6 | version: 2
 7 | updates:
 8 |   - package-ecosystem: "cargo" # See documentation for possible values
 9 |     directory: "/" # Location of package manifests
10 |     schedule:
11 |       interval: "monthly"
12 |     commit-message:
13 |       prefix: "chore(cargo)"
14 |     groups:
15 |       all-crates:
16 |         patterns:
17 |           - "*"
18 | 
19 |   - package-ecosystem: "pip"
20 |     directory: "/"
21 |     schedule:
22 |       interval: "monthly"
23 |     commit-message:
24 |       prefix: "chore(pip)"
25 |     groups:
26 |       all-pips:
27 |         patterns:
28 |           - "*"
29 | 
30 |   - package-ecosystem: "github-actions"
31 |     directory: "/"
32 |     schedule:
33 |       interval: "monthly"
34 |     commit-message:
35 |       prefix: "chore(actions)"
36 |     groups:
37 |       all-actions:
38 |         patterns:
39 |           - "*"
40 | 


--------------------------------------------------------------------------------
/examples/rerank/client.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 MOSEC Authors
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from http import HTTPStatus
16 | 
17 | import httpx
18 | import msgspec
19 | 
20 | req = {
21 |     "query": "talk is cheap, show me the code",
22 |     "docs": [
23 |         "what a nice day",
24 |         "life is short, use python",
25 |         "early bird catches the worm",
26 |     ],
27 | }
28 | 
29 | resp = httpx.post(
30 |     "http://127.0.0.1:8000/inference", content=msgspec.msgpack.encode(req)
31 | )
32 | if resp.status_code == HTTPStatus.OK:
33 |     print(f"OK: {msgspec.msgpack.decode(resp.content)}")
34 | else:
35 |     print(f"err[{resp.status_code}] {resp.text}")
36 | 


--------------------------------------------------------------------------------
/examples/resnet50_msgpack/client.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 MOSEC Authors
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Example: Sample Resnet client."""
15 | 
16 | from http import HTTPStatus
17 | 
18 | import httpx
19 | import msgpack  # type: ignore
20 | 
21 | dog_bytes = httpx.get(
22 |     "https://raw.githubusercontent.com/pytorch/hub/master/images/dog.jpg"
23 | ).content
24 | 
25 | 
26 | prediction = httpx.post(
27 |     "http://127.0.0.1:8000/inference",
28 |     content=msgpack.packb({"image": dog_bytes}),
29 | )
30 | if prediction.status_code == HTTPStatus.OK:
31 |     print(msgpack.unpackb(prediction.content))
32 | else:
33 |     print(prediction.status_code, prediction.content)
34 | 


--------------------------------------------------------------------------------
/tests/services/mixin_typed_service.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 MOSEC Authors
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """Test MsgPack mixin."""
16 | 
17 | from typing import Any, List
18 | 
19 | from msgspec import Struct
20 | 
21 | from mosec import Server, Worker
22 | from mosec.mixin import TypedMsgPackMixin
23 | 
24 | 
25 | class Request(Struct):
26 |     media: str
27 |     binary: bytes
28 | 
29 | 
30 | class Inference(TypedMsgPackMixin, Worker):
31 |     def forward(self, data: List[Request]) -> Any:
32 |         return [len(req.binary) for req in data]
33 | 
34 | 
35 | if __name__ == "__main__":
36 |     server = Server()
37 |     server.append_worker(Inference, max_batch_size=4)
38 |     server.run()
39 | 


--------------------------------------------------------------------------------
/mosec/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 MOSEC Authors
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """MOSEC is a machine learning model serving framework."""
16 | 
17 | from mosec.errors import (
18 |     ClientError,
19 |     DecodingError,
20 |     EncodingError,
21 |     ServerError,
22 |     ValidationError,
23 | )
24 | from mosec.log import get_logger
25 | from mosec.runtime import Runtime
26 | from mosec.server import Server
27 | from mosec.worker import SSEWorker, Worker
28 | 
29 | __all__ = [
30 |     "ClientError",
31 |     "DecodingError",
32 |     "EncodingError",
33 |     "Runtime",
34 |     "SSEWorker",
35 |     "Server",
36 |     "ServerError",
37 |     "ValidationError",
38 |     "Worker",
39 |     "get_logger",
40 | ]
41 | 


--------------------------------------------------------------------------------
/docs/source/examples/jax.md:
--------------------------------------------------------------------------------
 1 | # Jax jitted inference
 2 | 
 3 | This example shows how to utilize the [Jax framework](https://github.com/google/jax) to build a just-in-time (JIT) compiled inference server. You could install Jax following their official guide and you also need `chex` to run this example (`pip install -U chex`).
 4 | 
 5 | We use a single layer neural network for this minimal example. You could also experiment the speedup of JIT by setting the environment variable `USE_JIT=true` and observe the latency difference. Note that in the `__init__` of the worker we set the `self.multi_examples` as a list of example inputs to warmup, because different batch sizes will trigger re-jitting when they are traced for the first time.
 6 | 
 7 | ## Server
 8 | 
 9 | ```shell
10 | USE_JIT=true python examples/jax_single_layer/server.py
11 | ```
12 | 
13 | <details>
14 | <summary>jax_single_layer.py</summary>
15 | 
16 | ```{include} ../../../examples/jax_single_layer/server.py
17 | :code: python
18 | ```
19 | 
20 | </details>
21 | 
22 | ## Client
23 | 
24 | ```shell
25 | python examples/jax_single_layer/client.py
26 | ```
27 | 
28 | <details>
29 | <summary>jax_single_layer_cli.py</summary>
30 | 
31 | ```{include} ../../../examples/jax_single_layer/client.py
32 | :code: python
33 | ```
34 | 
35 | </details>
36 | 


--------------------------------------------------------------------------------
/tests/services/square_service.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 MOSEC Authors
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """Test basic `forward` logic for single/concurrency request."""
16 | 
17 | from typing import List
18 | 
19 | from mosec import Server, Worker
20 | from mosec.errors import ValidationError
21 | 
22 | 
23 | class SquareService(Worker):
24 |     def forward(self, data: List[dict]) -> List[dict]:
25 |         try:
26 |             result = [{"x": int(req["x"]) ** 2} for req in data]
27 |         except KeyError as err:
28 |             raise ValidationError(err) from err
29 |         return result
30 | 
31 | 
32 | if __name__ == "__main__":
33 |     server = Server()
34 |     server.append_worker(SquareService, max_batch_size=8)
35 |     server.run()
36 | 


--------------------------------------------------------------------------------
/examples/server_side_event/client.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 MOSEC Authors
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import httpx
16 | from httpx_sse import connect_sse
17 | 
18 | with (
19 |     httpx.Client() as client,
20 |     connect_sse(
21 |         client, "POST", "http://127.0.0.1:8000/inference", json={"text": "mosec"}
22 |     ) as event_source,
23 | ):
24 |     for sse in event_source.iter_sse():
25 |         print(f"Event({sse.event}): {sse.data}")
26 | 
27 | # error handling
28 | with (
29 |     httpx.Client() as client,
30 |     connect_sse(
31 |         client, "POST", "http://127.0.0.1:8000/inference", json={"error": "mosec"}
32 |     ) as event_source,
33 | ):
34 |     for sse in event_source.iter_sse():
35 |         print(f"Event({sse.event}): {sse.data}")
36 | 


--------------------------------------------------------------------------------
/.github/workflows/label.yml:
--------------------------------------------------------------------------------
 1 | name: Label
 2 | on:
 3 |   pull_request_target:
 4 |     types: [opened, edited]
 5 | 
 6 | # make sure you have the following labels:
 7 | #   [documentation, enhancement, bug_fix, refactoring, chore]
 8 | jobs:
 9 |   pr_label:
10 |     name: PR label
11 |     permissions:
12 |       pull-requests: write
13 |       contents: read
14 |     runs-on: ubuntu-latest
15 |     env:
16 |       GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
17 |       PR: ${{ github.event.number }}
18 |     steps:
19 |       - uses: actions/checkout@v6
20 |       - run: gh pr edit $PR --remove-label documentation --remove-label enhancement  --remove-label bug_fix  --remove-label refactoring --remove-label chore
21 |       - run: gh pr edit $PR --add-label documentation
22 |         if: ${{ startsWith(github.event.pull_request.title, 'doc') }}
23 |       - run: gh pr edit $PR --add-label enhancement
24 |         if: ${{ startsWith(github.event.pull_request.title, 'feat') }}
25 |       - run: gh pr edit $PR --add-label bug_fix
26 |         if: ${{ startsWith(github.event.pull_request.title, 'fix') }}
27 |       - run: gh pr edit $PR --add-label refactoring
28 |         if: ${{ startsWith(github.event.pull_request.title, 'refact') }}
29 |       - run: gh pr edit $PR --add-label chore
30 |         if: ${{ startsWith(github.event.pull_request.title, 'chore') }}
31 | 
32 | 


--------------------------------------------------------------------------------
/tests/test_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 MOSEC Authors
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """Test util functions."""
16 | 
17 | from typing import List
18 | 
19 | from msgspec import Struct
20 | 
21 | from mosec import Worker
22 | from mosec.utils import ParseTarget, parse_func_type
23 | 
24 | 
25 | class Request(Struct):
26 |     name: str
27 | 
28 | 
29 | class Demo(Worker):
30 |     def forward(self, data: Request):
31 |         pass
32 | 
33 |     def batch_forward(self, data: List[Request]):
34 |         pass
35 | 
36 | 
37 | def test_parse_forward_input_type():
38 |     demo = Demo()
39 | 
40 |     single = parse_func_type(demo.forward, ParseTarget.INPUT)
41 |     assert single is Request, single
42 | 
43 |     batch = parse_func_type(demo.batch_forward, ParseTarget.INPUT)
44 |     assert batch is Request, batch
45 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.yaml:
--------------------------------------------------------------------------------
 1 | name: Bug Report
 2 | description: Bug report for mosec
 3 | labels: ["bug"]
 4 | title: "bug: <title>"
 5 | body:
 6 |   - type: markdown
 7 |     attributes:
 8 |       value: |
 9 |         Thanks for taking the time to fill out this bug report!
10 |   - type: textarea
11 |     attributes:
12 |       label: Describe the bug
13 |       description: A clear and concise description of what the bug is.
14 |     validations:
15 |       required: true
16 |   - type: textarea
17 |     attributes:
18 |       label: To Reproduce
19 |       description: Steps to reproduce the behavior.
20 |     validations:
21 |       required: true
22 |   - type: textarea
23 |     attributes:
24 |       label: Expected behavior
25 |       description: A clear and concise description of what you expected to happen.
26 |     validations:
27 |       required: false
28 |   - type: textarea
29 |     attributes:
30 |       label: The mosec version
31 |       description: The output of `pip show mosec` command.
32 |     validations:
33 |       required: true
34 |   - type: textarea
35 |     attributes:
36 |       label: Additional context
37 |       description: Add any other context about the problem here.
38 |     validations:
39 |       required: false
40 |   - type: markdown
41 |     attributes:
42 |       value: |
43 |         Impacted by this bug? Give it a 👍. We prioritise the issues with the most 👍.
44 | 


--------------------------------------------------------------------------------
/tests/test_log.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 MOSEC Authors
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """Test multiprocessing logging configuration."""
16 | 
17 | import logging
18 | 
19 | from mosec.log import get_log_level, get_logger, set_logger
20 | from tests.utils import env_context
21 | 
22 | 
23 | def test_get_logger():
24 |     with env_context():
25 |         logger = get_logger()
26 |         assert logger.level == logging.INFO
27 | 
28 |     with env_context(MOSEC_LOG_LEVEL="warning"):
29 |         set_logger(get_log_level())
30 |         logger = get_logger()
31 |         assert logger.level == logging.WARNING
32 | 
33 |     # `--debug` has higher priority
34 |     with env_context(MOSEC_DEBUG="true", MOSEC_LOG_LEVEL="warning"):
35 |         set_logger(get_log_level())
36 |         logger = get_logger()
37 |         assert logger.level == logging.DEBUG
38 | 


--------------------------------------------------------------------------------
/docs/source/examples/validate.md:
--------------------------------------------------------------------------------
 1 | # Validate Request
 2 | 
 3 | This example shows how to use the `TypedMsgPackMixin` to validate the request with the help of [`msgspec`](https://github.com/jcrist/msgspec).
 4 | 
 5 | Request validation can provide the following benefits:
 6 | 
 7 | - The client can know the exact expected data schema from the type definition.
 8 | - Validation failure will return the details of the failure reason to help the client debug.
 9 | - Ensure that the service is working on the correct data without fear.
10 | 
11 | First of all, define the request type with `msgspec.Struct` like:
12 | 
13 | ```python
14 | class Request(msgspec.Struct):
15 |     media: str
16 |     binary: bytes
17 | ```
18 | 
19 | Then, apply the `TypedMsgPackMixin` mixin and add the type you defined to the annotation of `forward(self, data)`:
20 | 
21 | ```python
22 | class Inference(TypedMsgPackMixin, Worker):
23 |     def forward(self, data: Request):
24 |         pass
25 | ```
26 | 
27 | ```{note}
28 | If you are using dynamic **batch** inference as the first stage, just use the `List[Request]` as the annotation.
29 | ```
30 | 
31 | You can check the full demo code below.
32 | 
33 | ## Server
34 | 
35 | ```{include} ../../../examples/type_validation/server.py
36 | :code: python
37 | ```
38 | 
39 | ## Client
40 | 
41 | ```{include} ../../../examples/type_validation/client.py
42 | :code: python
43 | ```
44 | 
45 | ## Test
46 | 
47 | ```shell
48 | python client.py
49 | ```
50 | 


--------------------------------------------------------------------------------
/tests/services/mixin_numbin_service.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 MOSEC Authors
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """Test NumBin IPC mixin."""
16 | 
17 | from typing import Dict, List
18 | 
19 | import numpy as np
20 | 
21 | from mosec import Server, Worker
22 | from mosec.mixin import NumBinIPCMixin
23 | 
24 | 
25 | class Preprocess(NumBinIPCMixin, Worker):
26 |     def forward(self, data: Dict[str, str]) -> np.ndarray:
27 |         num = int(data.get("num", 10))
28 |         arr = np.ones(num) * (1 / num)
29 |         return arr
30 | 
31 | 
32 | class Inference(NumBinIPCMixin, Worker):
33 |     def forward(self, data: List[np.ndarray]) -> List[str]:
34 |         res = ["equal" if np.equal(1, arr.sum()) else "unequal" for arr in data]
35 |         return res
36 | 
37 | 
38 | if __name__ == "__main__":
39 |     server = Server()
40 |     server.append_worker(Preprocess)
41 |     server.append_worker(Inference, max_batch_size=8)
42 |     server.run()
43 | 


--------------------------------------------------------------------------------
/.github/workflows/nightly.yml:
--------------------------------------------------------------------------------
 1 | name: Nightly Test
 2 | 
 3 | on:
 4 |   schedule:
 5 |     - cron: '30 7 * * *'
 6 |   pull_request:
 7 |     paths:
 8 |       - '.github/workflows/nightly.yml'
 9 |       - 'mosec/**'
10 |       - 'src/**'
11 |       - 'pyproject.toml'
12 |       - 'Cargo.lock'
13 |       - 'Cargo.toml'
14 |       - 'tests/bad_req.py'
15 |   push:
16 |     branches:
17 |       - main
18 |     paths:
19 |       - '.github/workflows/nightly.yml'
20 |       - 'mosec/**'
21 |       - 'src/**'
22 |       - 'pyproject.toml'
23 |       - 'Cargo.lock'
24 |       - 'Cargo.toml'
25 |       - 'tests/bad_req.py'
26 |   workflow_dispatch:
27 | 
28 | concurrency:
29 |   group: ${{ github.ref }}-${{ github.workflow }}
30 |   cancel-in-progress: true
31 | 
32 | env:
33 |   SCCACHE_GHA_ENABLED: "true"
34 |   RUSTC_WRAPPER: "sccache"
35 | 
36 | jobs:
37 |   test:
38 |     name: "stressful bad requests test"
39 |     runs-on: ${{ matrix.os }}
40 |     timeout-minutes: 45
41 |     strategy:
42 |       matrix:
43 |         os: [ubuntu-latest]
44 | 
45 |     steps:
46 |       - uses: actions/checkout@v6
47 |       - name: Install uv
48 |         uses: astral-sh/setup-uv@v7
49 |         with:
50 |           enable-cache: true
51 |       - name: Set up Rust
52 |         uses: dtolnay/rust-toolchain@stable
53 |       - name: Run sccache-cache
54 |         uses: mozilla-actions/sccache-action@v0.0.9
55 |       - name: Install dependencies
56 |         run: make install_py
57 |       - name: Test
58 |         run: make test_chaos
59 | 


--------------------------------------------------------------------------------
/examples/multi_route/client.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 MOSEC Authors
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import json
16 | from http import HTTPStatus
17 | 
18 | import httpx
19 | import msgpack  # type: ignore
20 | 
21 | typed_req = {
22 |     "bin": b"hello mosec with type check",
23 |     "name": "type check",
24 | }
25 | 
26 | print(">> requesting for the typed route with msgpack serde")
27 | resp = httpx.post(
28 |     "http://127.0.0.1:8000/v1/inference", content=msgpack.packb(typed_req)
29 | )
30 | if resp.status_code == HTTPStatus.OK:
31 |     print(f"OK: {msgpack.unpackb(resp.content)}")
32 | else:
33 |     print(f"err[{resp.status_code}] {resp.text}")
34 | 
35 | print(">> requesting for the untyped route with json serde")
36 | resp = httpx.post("http://127.0.0.1:8000/inference", content=b"hello mosec")
37 | if resp.status_code == HTTPStatus.OK:
38 |     print(f"OK: {json.loads(resp.content)}")
39 | else:
40 |     print(f"err[{resp.status_code}] {resp.text}")
41 | 


--------------------------------------------------------------------------------
/examples/server_side_event/server.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 MOSEC Authors
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from mosec import Server, SSEWorker, ValidationError, Worker, get_logger
16 | 
17 | logger = get_logger()
18 | 
19 | 
20 | class Preprocess(Worker):
21 |     def forward(self, data):
22 |         text = data.get("text")
23 |         if text is None:
24 |             raise ValidationError("text is required")
25 |         return text
26 | 
27 | 
28 | class Inference(SSEWorker):
29 |     def forward(self, data):
30 |         epoch = 5
31 |         for i in range(epoch):
32 |             for j in range(len(data)):
33 |                 self.send_stream_event(
34 |                     f"inference: ({i + 1}/{epoch}) {data[j]}", index=j
35 |                 )
36 | 
37 |         # this return value will be ignored
38 |         return data
39 | 
40 | 
41 | if __name__ == "__main__":
42 |     server = Server()
43 |     server.append_worker(Preprocess)
44 |     server.append_worker(Inference, max_batch_size=2)
45 |     server.run()
46 | 


--------------------------------------------------------------------------------
/tests/services/sse_service.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 MOSEC Authors
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """Test Server-Sent Event."""
16 | 
17 | from mosec import Server, SSEWorker, ValidationError, Worker, get_logger
18 | 
19 | logger = get_logger()
20 | EPOCH = 5
21 | 
22 | 
23 | class Preprocess(Worker):
24 |     def forward(self, data):
25 |         text = data.get("text")
26 |         if text is None:
27 |             raise ValidationError("text is required")
28 |         return text
29 | 
30 | 
31 | class Inference(SSEWorker):
32 |     def forward(self, data):
33 |         for _ in range(EPOCH):
34 |             # pylint: disable=consider-using-enumerate
35 |             for j in range(len(data)):
36 |                 self.send_stream_event(f"{data[j]}", index=j)
37 | 
38 |         # this return value will be ignored
39 |         return data
40 | 
41 | 
42 | if __name__ == "__main__":
43 |     server = Server()
44 |     server.append_worker(Preprocess)
45 |     server.append_worker(Inference, max_batch_size=2)
46 |     server.run()
47 | 


--------------------------------------------------------------------------------
/examples/stable_diffusion/client.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 MOSEC Authors
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import argparse
16 | from http import HTTPStatus
17 | 
18 | import httpx
19 | import msgpack  # type: ignore
20 | 
21 | parser = argparse.ArgumentParser(
22 |     prog="stable diffusion client demo",
23 | )
24 | parser.add_argument(
25 |     "-p", "--prompt", default="a photo of an astronaut riding a horse on mars"
26 | )
27 | parser.add_argument(
28 |     "-o", "--output", default="stable_diffusion_result.jpg", help="output filename"
29 | )
30 | parser.add_argument(
31 |     "--port",
32 |     default=8000,
33 |     type=int,
34 |     help="service port",
35 | )
36 | 
37 | 
38 | args = parser.parse_args()
39 | resp = httpx.post(
40 |     f"http://127.0.0.1:{args.port}/inference",
41 |     content=msgpack.packb(args.prompt),
42 |     timeout=httpx.Timeout(20),
43 | )
44 | if resp.status_code == HTTPStatus.OK:
45 |     data = msgpack.unpackb(resp.content)
46 |     with open(args.output, "wb") as f:
47 |         f.write(data)
48 | else:
49 |     print(f"ERROR: <{resp.status_code}> {resp.text}")
50 | 


--------------------------------------------------------------------------------
/docs/source/examples/ipc.md:
--------------------------------------------------------------------------------
 1 | # Shared Memory IPC
 2 | 
 3 | This is an example demonstrating how you can enable the plasma shared memory store or customize your own IPC wrapper.
 4 | 
 5 | Mosec's multi-stage pipeline requires the output data from the previous stage to be transferred to the next stage across python processes. This is coordinated via Unix domain socket between every Python worker process from all stages and the Rust controller process.
 6 | 
 7 | By default, we serialize the data and directly transfer the bytes over the socket. However, users may find wrapping this IPC useful or more efficient for specific use cases. Therefore, we provide an example implementation `PlasmaShmIPCMixin` based on [`pyarrow.plasma`](https://arrow.apache.org/docs/11.0/python/plasma.html) and `RedisShmIPCMixin` based on [`redis`](https://pypi.org/project/redis). We recommend using `RedisShmWrapper` for better performance and longer-lasting updates.
 8 | 
 9 | ```{warning}
10 | `plasma` is deprecated. Please use Redis instead.
11 | ```
12 | 
13 | The additional subprocess can be registered as a daemon thus it will be checked by mosec regularly and trigger graceful shutdown when the daemon exits.
14 | 
15 | ## **`plasma_legacy.py`**
16 | 
17 | ```{include} ../../../examples/shm_ipc/plasma_legacy.py
18 | :code: python
19 | ```
20 | ## **`redis.py`**
21 | 
22 | ```{include} ../../../examples/shm_ipc/redis.py
23 | :code: python
24 | ```
25 | 
26 | ## Start
27 | 
28 | ```shell
29 | python examples/shm_ipc/plasma_legacy.py
30 | ```
31 | 
32 | or
33 | 
34 | ```shell
35 | python examples/shm_ipc/redis.py
36 | ```
37 | ## Test
38 | 
39 | ```shell
40 | http :8000/inference size=100
41 | ```
42 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "mosec"
 3 | version = "0.9.6"
 4 | authors = ["Keming <kemingy94@gmail.com>", "Zichen <lkevinzc@gmail.com>"]
 5 | edition = "2024"
 6 | license = "Apache-2.0"
 7 | readme = "README.md"
 8 | repository = "https://github.com/mosecorg/mosec"
 9 | description = "Model Serving made Efficient in the Cloud."
10 | documentation = "https://docs.rs/mosec"
11 | categories = ["science"]
12 | keywords = [
13 |   "machine-learning",
14 |   "deep-learning",
15 |   "cloud",
16 |   "model-serving",
17 |   "service",
18 | ]
19 | exclude = ["target", "examples", "tests", "scripts"]
20 | rust-version = "1.85"
21 | 
22 | [dependencies]
23 | bytes = "1.11"
24 | tokio = { version = "1.48", features = [
25 |   "rt",
26 |   "rt-multi-thread",
27 |   "time",
28 |   "macros",
29 |   "sync",
30 |   "signal",
31 |   "io-util",
32 | ] }
33 | derive_more = { version = "2.0.1", features = ["display", "error", "from"] }
34 | # MPMS that only one consumer sees each message & async
35 | async-channel = "2.5"
36 | prometheus-client = "0.24.0"
37 | axum = { version = "0.8.7", default-features = false, features = [
38 |   "matched-path",
39 |   "original-uri",
40 |   "query",
41 |   "tokio",
42 |   "http1",
43 |   "http2",
44 | ] }
45 | async-stream = "0.3.6"
46 | serde = "1.0"
47 | serde_json = "1.0"
48 | utoipa = "5.4"
49 | utoipa-swagger-ui = { version = "9", features = ["axum"] }
50 | tower = "0.5.2"
51 | tower-http = { version = "0.6.7", features = [
52 |   "compression-zstd",
53 |   "decompression-zstd",
54 |   "compression-gzip",
55 |   "decompression-gzip",
56 | ] }
57 | log = { version = "0.4.28", features = ["kv"] }
58 | logforth = { version = "0.29.1", features = ["starter-log"] }
59 | jiff = "0.2.15"
60 | 


--------------------------------------------------------------------------------
/docs/source/examples/metric.md:
--------------------------------------------------------------------------------
 1 | # Customized Metrics
 2 | 
 3 | This is an example demonstrating how to add your customized Python side Prometheus metrics.
 4 | 
 5 | Mosec already has the Rust side metrics, including:
 6 | 
 7 | * throughput for the inference endpoint
 8 | * duration for each stage (including the IPC time)
 9 | * batch size (only for the `max_batch_size > 1` workers)
10 | * number of remaining tasks to be processed
11 | 
12 | If you need to monitor more details about the inference process, you can add some Python side metrics. E.g., the inference result distribution, the duration of some CPU-bound or GPU-bound processing, the IPC time (get from `rust_step_duration - python_step_duration`).
13 | 
14 | This example has a simple WSGI app as the monitoring metrics service. In each worker process, the `Counter` will collect the inference results and export them to the metrics service. For the inference part, it parses the batch data and compares them with the average value.
15 | 
16 | For more information about the multiprocess mode for the metrics, check the [Prometheus doc](https://github.com/prometheus/client_python#multiprocess-mode-eg-gunicorn).
17 | 
18 | ## **`python_side_metrics.py`**
19 | 
20 | ```{include} ../../../examples/monitor/python_side_metrics.py
21 | :code: python
22 | ```
23 | 
24 | ## Start
25 | 
26 | ```shell
27 | python python_side_metrics.py
28 | ```
29 | 
30 | ## Test
31 | 
32 | ```shell
33 | http POST :8000/inference num=1
34 | ```
35 | 
36 | ## Check the Python side metrics
37 | 
38 | ```shell
39 | http :8080
40 | ```
41 | 
42 | ## Check the Rust side metrics
43 | 
44 | ```shell
45 | http :8000/metrics
46 | ```
47 | 
48 | ```{include} ../../../examples/monitor/README.md
49 | ```
50 | 


--------------------------------------------------------------------------------
/examples/rerank/server.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 MOSEC Authors
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from os import getenv
16 | from typing import List
17 | 
18 | from msgspec import Struct
19 | from sentence_transformers import CrossEncoder  # type: ignore
20 | 
21 | from mosec import Server, Worker
22 | from mosec.mixin import TypedMsgPackMixin
23 | 
24 | DEFAULT_MODEL = "cross-encoder/ms-marco-MiniLM-L-6-v2"
25 | WORKER_NUM = int(getenv("WORKER_NUM", default="1"))
26 | 
27 | 
28 | class Request(Struct, kw_only=True):
29 |     query: str
30 |     docs: List[str]
31 | 
32 | 
33 | class Response(Struct, kw_only=True):
34 |     scores: List[float]
35 | 
36 | 
37 | class Encoder(TypedMsgPackMixin, Worker):
38 |     def __init__(self):
39 |         self.model_name = getenv("MODEL_NAME", default=DEFAULT_MODEL)
40 |         self.model = CrossEncoder(self.model_name)
41 | 
42 |     def forward(self, data: Request) -> Response:
43 |         scores = self.model.predict([[data.query, doc] for doc in data.docs])
44 |         return Response(scores=scores.tolist())
45 | 
46 | 
47 | if __name__ == "__main__":
48 |     server = Server()
49 |     server.append_worker(Encoder, num=WORKER_NUM)
50 |     server.run()
51 | 


--------------------------------------------------------------------------------
/examples/type_validation/server.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 MOSEC Authors
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """Request validation example."""
16 | 
17 | from typing import Any, List
18 | 
19 | from msgspec import Struct
20 | 
21 | from mosec import Server, Worker
22 | from mosec.mixin import TypedMsgPackMixin
23 | 
24 | 
25 | class Request(Struct):
26 |     """User request struct."""
27 | 
28 |     # pylint: disable=too-few-public-methods
29 | 
30 |     bin: bytes
31 |     name: str = "test"
32 | 
33 | 
34 | class Preprocess(TypedMsgPackMixin, Worker):
35 |     """Dummy preprocess to exit early if the validation failed."""
36 | 
37 |     def forward(self, data: Request) -> Any:
38 |         """Input will be parse as the `Request`."""
39 |         print(f"received {data}")
40 |         return data.bin
41 | 
42 | 
43 | class Inference(TypedMsgPackMixin, Worker):
44 |     """Dummy batch inference."""
45 | 
46 |     def forward(self, data: List[bytes]) -> List[int]:
47 |         return [len(buf) for buf in data]
48 | 
49 | 
50 | if __name__ == "__main__":
51 |     server = Server()
52 |     server.append_worker(Preprocess)
53 |     server.append_worker(Inference, max_batch_size=16)
54 |     server.run()
55 | 


--------------------------------------------------------------------------------
/docs/source/reference/migration.md:
--------------------------------------------------------------------------------
 1 | # Migration Guide
 2 | 
 3 | This guide will help you migrate from other frameworks to `mosec`.
 4 | 
 5 | ## From the `Triton Inference Server`
 6 | 
 7 | Both [`PyTriton`](https://github.com/triton-inference-server/pytriton) and [`Triton Python Backend`](https://github.com/triton-inference-server/python_backend) are using [`Triton Inference Server`](https://github.com/triton-inference-server).
 8 | 
 9 | - `mosec` doesn't require a specific client, you can use any HTTP client library
10 | - dynamic batching is configured when calling the [`append_worker`](mosec.server.Server.append_worker)
11 | - `mosec` doesn't need to declare the `inputs` and `outputs`. If you want to validate the request, you can use the [`TypedMsgPackMixin`](mosec.mixin.typed_worker.TypedMsgPackMixin) (ref [Validate Request](https://mosecorg.github.io/mosec/examples/validate.html))
12 | 
13 | ### `Triton Python Backend`
14 | 
15 | - change the `TritonPythonModel` class to a worker class that inherits [`mosec.Worker`](mosec.worker.Worker)
16 | - move the `initialize` method to the `__init__` method in the new class
17 | - move the `execute` method to the `forward` method in the new class
18 | - if you still prefer to use the `auto_complete_config` method, you can merge it into the `__init__` method
19 | - `mosec` doesn't have the corresponding `finalize` method as an unloading handler
20 | - `mosec` doesn't require any special model directories or configurations
21 | - to run multiple replicas, configure the `num` in [`append_worker`](mosec.server.Server.append_worker)
22 | 
23 | ### `PyTriton`
24 | 
25 | - move the model loading logic to the `__init__` method, since this happens in a different process
26 | - move the `infer_func` function to the `forward` method
27 | 


--------------------------------------------------------------------------------
/docs/source/examples/multi_route.md:
--------------------------------------------------------------------------------
 1 | # Multi-Route
 2 | 
 3 | This example shows how to use the multi-route feature.
 4 | 
 5 | You will need this feature if you want to:
 6 | 
 7 | - Serve multiple models in one service on different endpoints.
 8 |   - i.e. register `/embedding` & `/classify` with different models
 9 | - Serve one model to multiple different endpoints in one service.
10 |   - i.e. register LLaMA with `/inference` and `/v1/chat/completions` to make it compatible with the OpenAI API
11 | - Share a worker in different routes
12 |   - The shared worker will collect the dynamic batch from multiple previous stages.
13 |   - If you want to have multiple runtimes with sharing, you can declare multiple runtime instances with the same worker class.
14 | 
15 | The worker definition part is the same as for a single route. The only difference is how you register the worker with the server.
16 | 
17 | Here we expose a new [concept](../reference/concept.md) called [`Runtime`](mosec.runtime.Runtime).
18 | 
19 | You can create the `Runtime` and register on the server with a `{endpoint: [Runtime]}` dictionary.
20 | 
21 | See the complete demo code below. This will run a service with two endpoints:
22 | 
23 | - `/inference` with `Preprocess` and `Inference`
24 | - `/v1/inference` with `TypedProcess`, `Inference` and `TypedPostprocess`
25 | 
26 | And the `Inference` worker is shared between the two routes.
27 | 
28 | ## Server
29 | 
30 | <details>
31 | <summary>multi_route_server.py</summary>
32 | 
33 | ```{include} ../../../examples/multi_route/server.py
34 | :code: python
35 | ```
36 | 
37 | </details>
38 | 
39 | ## Client
40 | 
41 | <details>
42 | <summary>multi_route_client.py</summary>
43 | 
44 | ```{include} ../../../examples/multi_route/client.py
45 | :code: python
46 | ```
47 | 
48 | </details>
49 | 


--------------------------------------------------------------------------------
/examples/custom_env.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 MOSEC Authors
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Example: Custom Environment setup"""
15 | 
16 | import os
17 | 
18 | from mosec import Server, Worker, get_logger
19 | 
20 | logger = get_logger()
21 | 
22 | 
23 | class Inference(Worker):
24 |     """Customisable inference class."""
25 | 
26 |     def __init__(self):
27 |         super().__init__()
28 |         # initialize your models here and allocate dedicated device to it
29 |         device = os.getenv("CUDA_VISIBLE_DEVICES")
30 |         logger.info("initializing model on device=%s", device)
31 | 
32 |     def forward(self, data: dict) -> dict:
33 |         device = os.getenv("CUDA_VISIBLE_DEVICES")
34 |         # NOTE self.worker_id is 1-indexed
35 |         logger.info("worker=%d on device=%s is processing...", self.worker_id, device)
36 |         return {"device": device}
37 | 
38 | 
39 | if __name__ == "__main__":
40 |     NUM_DEVICE = 4
41 | 
42 |     def _get_cuda_device(cid: int) -> dict:
43 |         return {"CUDA_VISIBLE_DEVICES": str(cid)}
44 | 
45 |     server = Server()
46 | 
47 |     server.append_worker(
48 |         Inference, num=NUM_DEVICE, env=[_get_cuda_device(x) for x in range(NUM_DEVICE)]
49 |     )
50 |     server.run()
51 | 


--------------------------------------------------------------------------------
/mosec/mixin/numbin_worker.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 MOSEC Authors
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """MOSEC NumBin IPC worker mixin.
16 | 
17 | Features:
18 | 
19 |     * deserialize IPC data with numbin
20 |     * serialize IPC data with numbin
21 | 
22 | Attention: numbin only supports NumPy ndarray types.
23 | """
24 | 
25 | # pylint: disable=import-outside-toplevel
26 | 
27 | from typing import Any
28 | 
29 | from mosec.errors import DecodingError, EncodingError
30 | 
31 | 
32 | class NumBinIPCMixin:
33 |     """NumBin IPC worker mixin interface."""
34 | 
35 |     # pylint: disable=no-self-use
36 | 
37 |     def serialize_ipc(self, data: Any) -> bytes:
38 |         """Serialize with NumBin for the IPC."""
39 |         import numbin
40 | 
41 |         try:
42 |             data_bytes = numbin.dumps(data)
43 |         except Exception as err:
44 |             raise EncodingError from err
45 |         return data_bytes
46 | 
47 |     def deserialize_ipc(self, data: bytes) -> Any:
48 |         """Deserialize with NumBin for the IPC."""
49 |         import numbin
50 | 
51 |         try:
52 |             array = numbin.loads(data)
53 |         except Exception as err:
54 |             raise DecodingError from err
55 |         return array
56 | 


--------------------------------------------------------------------------------
/examples/monitor/README.md:
--------------------------------------------------------------------------------
 1 | ## How to build monitoring system for Mosec
 2 | In this tutorial, we will explain how to build monitoring system for Mosec, which includes Prometheus and Grafana.
 3 | 
 4 | ### Prerequisites
 5 | Before starting, you need to have Docker and Docker Compose installed on your machine. If you don't have them installed, you can follow the instructions  [get-docker](https://docs.docker.com/get-docker/) and [compose](https://docs.docker.com/compose/install/) to install them.
 6 | 
 7 | ## Starting the monitoring system
 8 | Clone the repository containing the docker-compose.yaml file:
 9 | ```bash
10 | git clone https://github.com/mosecorg/mosec.git
11 | ```
12 | 
13 | Navigate to the directory containing the docker-compose.yaml file:
14 | ```bash
15 | cd mosec/examples/monitor
16 | ```
17 | 
18 | Start the monitoring system by running the following command:
19 | ```bash
20 | docker-compose up -d
21 | ```
22 | This command will start three containers: Mosec, Prometheus, and Grafana.
23 | 
24 | 
25 | ## Test
26 | Run test and feed metrics to Prometheus.
27 | ```shell
28 | http POST :8000/inference num=1
29 | ```
30 | 
31 | ## Accessing Prometheus
32 | Prometheus is a monitoring and alerting system that collects metrics from Mosec. You can access the Prometheus UI by visiting http://127.0.0.1:9090 in your web browser.
33 | 
34 | ## Accessing Grafana
35 | Grafana is a visualization tool for monitoring and analyzing metrics. You can access the Grafana UI by visiting http://127.0.0.1:3000 in your web browser. The default username and password are both admin.
36 | 
37 | ## Stopping the monitoring system
38 | To stop the monitoring system, run the following command:
39 | 
40 | ```bash
41 | docker-compose down
42 | ```
43 | This command will stop and remove the containers created by Docker Compose.
44 | 


--------------------------------------------------------------------------------
/examples/segment/client.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 MOSEC Authors
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import gzip
16 | from http import HTTPStatus
17 | from io import BytesIO
18 | 
19 | import httpx
20 | import msgpack  # type: ignore
21 | import numbin
22 | import numpy as np
23 | from PIL import Image  # type: ignore
24 | 
25 | truck_image = Image.open(
26 |     BytesIO(
27 |         httpx.get(
28 |             "https://raw.githubusercontent.com/facebookresearch/sam2/main/notebooks/images/truck.jpg"
29 |         ).content
30 |     )
31 | )
32 | array = np.array(truck_image.convert("RGB"))
33 | # assume we have obtains the low resolution mask from the previous step
34 | mask = np.zeros((256, 256))
35 | 
36 | resp = httpx.post(
37 |     "http://127.0.0.1:8000/inference",
38 |     content=gzip.compress(
39 |         msgpack.packb(  # type: ignore
40 |             {
41 |                 "image": numbin.dumps(array),
42 |                 "mask": numbin.dumps(mask),
43 |                 "labels": [1, 1],
44 |                 "point_coords": [[500, 375], [1125, 625]],
45 |             }
46 |         )
47 |     ),
48 |     headers={"Accept-Encoding": "gzip", "Content-Encoding": "gzip"},
49 | )
50 | assert resp.status_code == HTTPStatus.OK, resp.status_code
51 | res = numbin.loads(msgpack.loads(resp.content))
52 | assert res.shape == array.shape[:2], f"expect {array.shape[:2]}, got {res.shape}"
53 | 


--------------------------------------------------------------------------------
/.github/workflows/page.yml:
--------------------------------------------------------------------------------
 1 | name: Pages
 2 | 
 3 | on:
 4 |   pull_request:
 5 |     paths:
 6 |       - 'mosec/**'
 7 |       - 'docs/**'
 8 |       - '.github/workflows/page.yml'
 9 |       - 'examples/**'
10 |       - '**.md'
11 |   push:
12 |     branches: [ main ]
13 |     paths:
14 |       - 'mosec/**'
15 |       - 'docs/**'
16 |       - '.github/workflows/page.yml'
17 |       - 'examples/**'
18 |       - '**.md'
19 |   # Allows you to run this workflow manually from the Actions tab
20 |   workflow_dispatch:
21 | 
22 | concurrency:
23 |   group: ${{ github.ref }}-${{ github.workflow }}
24 |   cancel-in-progress: true
25 | 
26 | env:
27 |   SCCACHE_GHA_ENABLED: "true"
28 |   RUSTC_WRAPPER: "sccache"
29 | 
30 | jobs:
31 |   build:
32 |     runs-on: ubuntu-latest
33 |     steps:
34 |     - uses: actions/checkout@v6
35 |     - name: Setup Pages
36 |       uses: actions/configure-pages@v5
37 |     - name: Install uv
38 |       uses: astral-sh/setup-uv@v7
39 |       with:
40 |         enable-cache: true
41 |     - name: Set up Rust
42 |       uses: dtolnay/rust-toolchain@stable
43 |     - name: Run sccache-cache
44 |       uses: mozilla-actions/sccache-action@v0.0.9
45 |     - name: Install dependencies
46 |       run: |
47 |         make install_py
48 |     - name: Generate docs
49 |       run: |
50 |         cd docs && make html
51 |     - name: Upload artifact
52 |       uses: actions/upload-pages-artifact@v4
53 |       with:
54 |         # Upload entire repository
55 |         path: 'docs/build/html'
56 | 
57 |   deploy:
58 |     runs-on: ubuntu-latest
59 |     needs: build
60 |     if: ${{ github.event_name == 'push' }}
61 |     # Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages
62 |     permissions:
63 |       pages: write
64 |       id-token: write
65 |     environment:
66 |       name: github-pages
67 |       url: ${{ steps.deployment.outputs.page_url }}
68 |     steps:
69 |     - name: Deploy to GitHub Pages
70 |       id: deployment
71 |       uses: actions/deploy-pages@v4
72 | 


--------------------------------------------------------------------------------
/examples/stable_diffusion/server.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 MOSEC Authors
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from io import BytesIO
16 | from typing import List
17 | 
18 | import torch  # type: ignore
19 | from diffusers import StableDiffusionPipeline  # type: ignore
20 | 
21 | from mosec import Server, Worker, get_logger
22 | from mosec.mixin import MsgpackMixin
23 | 
24 | logger = get_logger()
25 | 
26 | 
27 | class StableDiffusion(MsgpackMixin, Worker):
28 |     def __init__(self):
29 |         self.pipe = StableDiffusionPipeline.from_pretrained(
30 |             "sd-legacy/stable-diffusion-v1-5",
31 |             torch_dtype=torch.float16,
32 |         )
33 |         self.pipe.enable_model_cpu_offload()
34 |         self.example = ["useless example prompt"] * 4  # warmup (bs=4)
35 | 
36 |     def forward(self, data: List[str]) -> List[memoryview]:
37 |         logger.debug("generate images for %s", data)
38 |         res = self.pipe(data)  # type: ignore
39 |         logger.debug("NSFW: %s", res[1])
40 |         images = []
41 |         for img in res[0]:  # type: ignore
42 |             dummy_file = BytesIO()
43 |             img.save(dummy_file, format="JPEG")  # type: ignore
44 |             images.append(dummy_file.getbuffer())
45 |         return images
46 | 
47 | 
48 | if __name__ == "__main__":
49 |     server = Server()
50 |     server.append_worker(StableDiffusion, num=1, max_batch_size=4, max_wait_time=10)
51 |     server.run()
52 | 


--------------------------------------------------------------------------------
/examples/echo.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 MOSEC Authors
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Example: Sample structures for using mosec server."""
15 | 
16 | import time
17 | from types import MappingProxyType as ImmutableDict
18 | from typing import List
19 | 
20 | from mosec import Server, ValidationError, Worker, get_logger
21 | 
22 | logger = get_logger()
23 | 
24 | 
25 | class Preprocess(Worker):
26 |     """Sample Class."""
27 | 
28 |     example = ImmutableDict({"time": 0})
29 | 
30 |     def forward(self, data: dict) -> float:
31 |         logger.debug("pre received %s", data)
32 |         # Customized, simple input validation
33 |         try:
34 |             count_time = float(data["time"])
35 |         except KeyError as err:
36 |             raise ValidationError(f"cannot find key {err}") from err
37 |         return count_time
38 | 
39 | 
40 | class Inference(Worker):
41 |     """Sample Class."""
42 | 
43 |     example = (0, 1e-5, 2e-4)
44 | 
45 |     def forward(self, data: List[float]) -> List[float]:
46 |         logger.info("sleeping for %s seconds", max(data))
47 |         time.sleep(max(data))
48 |         return data
49 | 
50 | 
51 | class Postprocess(Worker):
52 |     """Sample Class."""
53 | 
54 |     def forward(self, data: float) -> dict:
55 |         logger.debug("post received %f", data)
56 |         return {"msg": f"sleep {data} seconds"}
57 | 
58 | 
59 | if __name__ == "__main__":
60 |     server = Server()
61 |     server.append_worker(Preprocess)
62 |     server.append_worker(Inference, max_batch_size=32)
63 |     server.append_worker(Postprocess)
64 |     server.run()
65 | 


--------------------------------------------------------------------------------
/tests/services/bad_service.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 MOSEC Authors
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """Simulate bad requests:
16 | 
17 | - Preprocess: raise ValidationError
18 | - Inference: raise random ServerError
19 | - client: disconnection
20 | """
21 | 
22 | import time
23 | from random import random
24 | from typing import List
25 | 
26 | from mosec import Server, ServerError, ValidationError, Worker, get_logger
27 | 
28 | logger = get_logger()
29 | LUCKY_THRESHOLD = 0.5
30 | 
31 | 
32 | class Preprocess(Worker):
33 |     """Sample Class."""
34 | 
35 |     def forward(self, data: dict) -> float:
36 |         logger.debug("pre received %s", data)
37 |         try:
38 |             count_time = float(data["time"])
39 |         except KeyError as err:
40 |             raise ValidationError(f"cannot find key {err}") from err
41 |         return count_time
42 | 
43 | 
44 | class Inference(Worker):
45 |     """Sample Class."""
46 | 
47 |     def forward(self, data: List[float]) -> List[float]:
48 |         # special case: {"time": 0}
49 |         if len(data) == 1 and data[0] == 0:
50 |             return data
51 |         # chaos
52 |         if random() < LUCKY_THRESHOLD:
53 |             logger.info("bad luck, this batch will be drop")
54 |             raise ServerError("no way")
55 |         logger.info("sleeping for %s seconds", max(data))
56 |         time.sleep(max(data))
57 |         return data
58 | 
59 | 
60 | if __name__ == "__main__":
61 |     server = Server()
62 |     server.append_worker(Preprocess, num=2)
63 |     server.append_worker(Inference, max_batch_size=32)
64 |     server.run()
65 | 


--------------------------------------------------------------------------------
/tests/mock_socket.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 MOSEC Authors
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """Mock socket interface."""
16 | 
17 | 
18 | class MockSocket:
19 |     """Mock socket object used to test protocol."""
20 | 
21 |     def __init__(self, family=None):
22 |         self.family = family
23 |         self.buffer = b""
24 |         self.timeout = None
25 | 
26 |     def recv(self, bufsize, flags=None) -> bytes:
27 |         """Receive data from buffer with size=bufsize."""
28 |         data = self.buffer[:bufsize]
29 |         self.buffer = self.buffer[bufsize:]
30 |         return data
31 | 
32 |     def recv_into(self, buf: memoryview, nbytes=1):
33 |         """Set nbytes=1 to avoid boundary condition."""
34 |         chunk = self.buffer[:nbytes]
35 |         buf[:nbytes] = chunk
36 |         self.buffer = self.buffer[nbytes:]
37 |         return nbytes
38 | 
39 |     def settimeout(self, timeout):
40 |         self.timeout = timeout
41 | 
42 |     def setblocking(self, flag):
43 |         pass
44 | 
45 |     def listen(self, backlog):
46 |         pass
47 | 
48 |     def sendall(self, data, flags=None):
49 |         self.buffer += data
50 |         return len(data)
51 | 
52 |     # pylint: disable=no-self-use
53 |     def getpeername(self):
54 |         return ("peer-address", "peer-port")
55 | 
56 |     def close(self):
57 |         pass
58 | 
59 |     def connect(self, host):
60 |         pass
61 | 
62 | 
63 | class Socket:
64 |     AF_UNIX = "AF_UNIX"
65 |     SOCK_STREAM = "SOCK_STREAM"
66 | 
67 |     @staticmethod
68 |     def socket(family=None, typ=None, protocol=None):
69 |         return MockSocket(family)
70 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | ARG base=nvidia/cuda:13.0.2-cudnn-runtime-ubuntu22.04
 2 | 
 3 | FROM ${base}
 4 | 
 5 | ENV DEBIAN_FRONTEND=noninteractive LANG=en_US.UTF-8 LC_ALL=en_US.UTF-8
 6 | ENV PATH /opt/conda/bin:$PATH
 7 | 
 8 | ARG CONDA_VERSION=py311_25.9.1-1
 9 | 
10 | RUN apt update && \
11 |     apt install -y --no-install-recommends \
12 |         wget \
13 |         git \
14 |         ca-certificates && \
15 |     rm -rf /var/lib/apt/lists/*
16 | 
17 | RUN set -x && \
18 |     UNAME_M="$(uname -m)" && \
19 |     if [ "${UNAME_M}" = "x86_64" ]; then \
20 |         MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-${CONDA_VERSION}-Linux-x86_64.sh"; \
21 |         SHA256SUM="238abad23f8d4d8ba89dd05df0b0079e278909a36e06955f12bbef4aa94e6131"; \
22 |     elif [ "${UNAME_M}" = "aarch64" ]; then \
23 |         MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-${CONDA_VERSION}-Linux-aarch64.sh"; \
24 |         SHA256SUM="4e0723b9d76aa491cf22511dac36f4fdec373e41d2a243ff875e19b8df39bf94"; \
25 |     fi && \
26 |     wget "${MINICONDA_URL}" -O miniconda.sh -q && \
27 |     echo "${SHA256SUM} miniconda.sh" > shasum && \
28 |     if [ "${CONDA_VERSION}" != "latest" ]; then sha256sum --check --status shasum; fi && \
29 |     mkdir -p /opt && \
30 |     bash miniconda.sh -b -p /opt/conda && \
31 |     rm miniconda.sh shasum && \
32 |     ln -s /opt/conda/etc/profile.d/conda.sh /etc/profile.d/conda.sh && \
33 |     echo ". /opt/conda/etc/profile.d/conda.sh" >> ~/.bashrc && \
34 |     echo "conda activate base" >> ~/.bashrc && \
35 |     find /opt/conda/ -follow -type f -name '*.a' -delete && \
36 |     find /opt/conda/ -follow -type f -name '*.js.map' -delete && \
37 |     /opt/conda/bin/conda clean -afy
38 | 
39 | ENV PYTHON_PREFIX=/opt/conda/bin
40 | ENV PATH="$PATH:/opt/conda/bin"
41 | 
42 | RUN update-alternatives --install /usr/bin/python python ${PYTHON_PREFIX}/python 1 && \
43 |     update-alternatives --install /usr/bin/python3 python3 ${PYTHON_PREFIX}/python3 1 && \
44 |     update-alternatives --install /usr/bin/pip pip ${PYTHON_PREFIX}/pip 1 && \
45 |     update-alternatives --install /usr/bin/pip3 pip3 ${PYTHON_PREFIX}/pip3 1
46 | 
47 | RUN pip install mosec
48 | 
49 | RUN mkdir -p /workspace
50 | WORKDIR /workspace
51 | 
52 | CMD [ "/bin/bash" ]
53 | 


--------------------------------------------------------------------------------
/examples/shm_ipc/redis.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 MOSEC Authors
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """Example: Using Redis store with mosec mixin RedisShmIPCMixin.
16 | 
17 | We start a subprocess for the Redis server, and pass the url
18 | to the redis client which serves as the shm mixin.
19 | We also register the redis server process as a daemon, so
20 | that when it exits the service is able to gracefully shut down
21 | and be restarted by the orchestrator.
22 | """
23 | 
24 | import subprocess
25 | 
26 | import numpy as np
27 | 
28 | from mosec import Server, ValidationError, Worker
29 | from mosec.mixin import RedisShmIPCMixin
30 | 
31 | 
32 | class DataProducer(RedisShmIPCMixin, Worker):
33 |     """Sample Data Producer."""
34 | 
35 |     def forward(self, data: dict) -> np.ndarray:
36 |         # pylint: disable=duplicate-code
37 |         try:
38 |             nums = np.random.rand(int(data["size"]))
39 |         except KeyError as err:
40 |             raise ValidationError(err) from err
41 |         return nums
42 | 
43 | 
44 | class DataConsumer(RedisShmIPCMixin, Worker):
45 |     """Sample Data Consumer."""
46 | 
47 |     def forward(self, data: np.ndarray) -> dict:
48 |         return {"ipc test data": data.tolist()}
49 | 
50 | 
51 | if __name__ == "__main__":
52 |     with subprocess.Popen(["redis-server"]) as p:  # start the redis server
53 |         # configure the redis url
54 |         RedisShmIPCMixin.set_redis_url("redis://localhost:6379/0")
55 | 
56 |         server = Server()
57 |         # register this process to be monitored
58 |         server.register_daemon("redis-server", p)
59 |         server.append_worker(DataProducer, num=2)
60 |         server.append_worker(DataConsumer, num=2)
61 |         server.run()
62 | 


--------------------------------------------------------------------------------
/tests/services/timeout_service.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 MOSEC Authors
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """Test worker timeout configuration."""
16 | 
17 | import os
18 | import time
19 | from typing import Any
20 | 
21 | from mosec import Runtime, Server, Worker, get_logger
22 | 
23 | logger = get_logger()
24 | 
25 | 
26 | class SleepyInference(Worker):
27 |     """Sample Class."""
28 | 
29 |     def forward(self, data: Any) -> Any:
30 |         sleep_duration = float(os.getenv("SLEEP_DURATION", default="0"))
31 |         logger.info("sleep_duration %s", sleep_duration)
32 |         time.sleep(sleep_duration)
33 |         return data
34 | 
35 | 
36 | if __name__ == "__main__":
37 |     import argparse
38 | 
39 |     parser = argparse.ArgumentParser()
40 | 
41 |     parser.add_argument("--sleep-duration", type=float, help="worker sleep duration")
42 |     parser.add_argument("--worker-timeout", type=float, help="worker timeout")
43 |     parser.add_argument("--port", type=int, help="port")
44 |     parser.add_argument(
45 |         "--runtime",
46 |         action="store_true",
47 |         help="use runtime register instead of append worker",
48 |     )
49 | 
50 |     args = parser.parse_args()
51 | 
52 |     sleep_duration = args.sleep_duration
53 |     worker_timeout = args.worker_timeout
54 |     server = Server()
55 |     if args.runtime:
56 |         sleepy = Runtime(
57 |             SleepyInference,
58 |             timeout=worker_timeout,
59 |             env=[{"SLEEP_DURATION": str(sleep_duration)}],
60 |         )
61 |         server.register_runtime({"/inference": [sleepy]})
62 |     else:
63 |         server.append_worker(
64 |             SleepyInference,
65 |             timeout=worker_timeout,
66 |             env=[{"SLEEP_DURATION": str(sleep_duration)}],
67 |         )
68 |     server.run()
69 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | PY_SOURCE_FILES=mosec tests examples
 2 | RUST_SOURCE_FILES=src/*
 3 | RUST_BACKTRACE=1
 4 | 
 5 | install_py:
 6 | 	uv venv
 7 | 	uv sync --all-groups --all-extras
 8 | 	uv run -- prek install
 9 | 
10 | install_rs:
11 | 	rustup toolchain install nightly --no-self-update
12 | 	rustup component add rustfmt clippy --toolchain nightly
13 | 
14 | install: install_py install_rs
15 | 
16 | test:
17 | 	echo "Running tests for the main logic and mixin(!shm)"
18 | 	uv run -- pytest tests -vv -s -m "not shm"
19 | 	cargo test -vv
20 | 
21 | test_unit:
22 | 	echo "Running tests for the main logic"
23 | 	uv run -- pytest -vv -s tests/test_log.py tests/test_protocol.py tests/test_coordinator.py
24 | 	cargo test -vv
25 | 
26 | test_shm:
27 | 	echo "Running tests for the shm mixin"
28 | 	uv run -- pytest tests -vv -s -m "shm"
29 | 
30 | test_all:
31 | 	echo "Running tests for the all features"
32 | 	uv run -- pytest tests -vv -s
33 | 	cargo test -vv
34 | 
35 | test_chaos:
36 | 	@uv run -m tests.bad_req
37 | 
38 | doc:
39 | 	@cd docs && make html && cd ../
40 | 	@uv run -m http.server -d docs/build/html 7291 -b 127.0.0.1
41 | 
42 | clean:
43 | 	@cargo clean
44 | 	@uv cache clean
45 | 	@-rm -rf build/ dist/ .eggs/ site/ *.egg-info .pytest_cache .mypy_cache .ruff_cache
46 | 	@-find . -name '*.pyc' -type f -exec rm -rf {} +
47 | 	@-find . -name '__pycache__' -exec rm -rf {} +
48 | 
49 | package: clean
50 | 	uv run -- maturin build --release --out dist
51 | 
52 | publish: package
53 | 	uv run -- twine upload dist/*
54 | 
55 | format:
56 | 	@uv run -- ruff check --fix ${PY_SOURCE_FILES}
57 | 	@uv run -- ruff format ${PY_SOURCE_FILES}
58 | 	@cargo +nightly fmt --all
59 | 
60 | lint:
61 | 	@uv run -- ruff check ${PY_SOURCE_FILES}
62 | 	@uv run -- ruff format --check ${PY_SOURCE_FILES}
63 | 	@-rm mosec/_version.py
64 | 	@uv run -- pyright --stats
65 | 	@uv run -- mypy --non-interactive --install-types ${PY_SOURCE_FILES}
66 | 	@cargo +nightly fmt -- --check
67 | 
68 | semantic_lint:
69 | 	@cargo clippy -- -D warnings
70 | 
71 | version:
72 | 	@cargo metadata --format-version 1 | jq -r '.packages[] | select(.name == "mosec") | .version'
73 | 
74 | add_license:
75 | 	@addlicense -c "MOSEC Authors" **/*.py **/*.rs **/**/*.py
76 | 
77 | dep_license:
78 | 	@cargo license --direct-deps-only --authors --avoid-build-deps --avoid-dev-deps --do-not-bundle --all-features --json > license.json
79 | 
80 | .PHONY: test doc
81 | 


--------------------------------------------------------------------------------
/examples/multi_route/server.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 MOSEC Authors
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from typing import Any
16 | 
17 | from msgspec import Struct
18 | 
19 | from mosec import Runtime, Server, Worker
20 | from mosec.mixin import TypedMsgPackMixin
21 | 
22 | 
23 | class Request(Struct):
24 |     """User request struct."""
25 | 
26 |     # pylint: disable=too-few-public-methods
27 | 
28 |     bin: bytes
29 |     name: str = "test"
30 | 
31 | 
32 | class TypedPreprocess(TypedMsgPackMixin, Worker):
33 |     """Dummy preprocess to exit early if the validation failed."""
34 | 
35 |     def forward(self, data: Request) -> Any:
36 |         """Input will be parse as the `Request`."""
37 |         print(f"received from {data.name} with {data.bin!r}")
38 |         return data.bin
39 | 
40 | 
41 | class Preprocess(Worker):
42 |     """Dummy preprocess worker."""
43 | 
44 |     def deserialize(self, data: bytes) -> Any:
45 |         return data
46 | 
47 |     def forward(self, data: Any) -> Any:
48 |         return data
49 | 
50 | 
51 | class Inference(Worker):
52 |     """Dummy inference worker."""
53 | 
54 |     def forward(self, data: Any) -> Any:
55 |         return [{"length": len(datum)} for datum in data]
56 | 
57 | 
58 | class TypedPostprocess(TypedMsgPackMixin, Worker):
59 |     """Dummy postprocess with msgpack."""
60 | 
61 |     def forward(self, data: Any) -> Any:
62 |         return data
63 | 
64 | 
65 | if __name__ == "__main__":
66 |     server = Server()
67 |     typed_pre = Runtime(TypedPreprocess)
68 |     pre = Runtime(Preprocess)
69 |     inf = Runtime(Inference, max_batch_size=16)
70 |     typed_post = Runtime(TypedPostprocess)
71 |     server.register_runtime(
72 |         {
73 |             "/v1/inference": [typed_pre, inf, typed_post],
74 |             "/inference": [pre, inf],
75 |         }
76 |     )
77 |     server.run()
78 | 


--------------------------------------------------------------------------------
/tests/services/multi_route_service.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 MOSEC Authors
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """Test multi-route service."""
16 | 
17 | from typing import Any
18 | 
19 | from msgspec import Struct
20 | 
21 | from mosec import Runtime, Server, Worker
22 | from mosec.mixin import TypedMsgPackMixin
23 | 
24 | 
25 | class Request(Struct):
26 |     """User request struct."""
27 | 
28 |     # pylint: disable=too-few-public-methods
29 | 
30 |     bin: bytes
31 |     name: str = "test"
32 | 
33 | 
34 | class TypedPreprocess(TypedMsgPackMixin, Worker):
35 |     """Dummy preprocess to exit early if the validation failed."""
36 | 
37 |     def forward(self, data: Request) -> Any:
38 |         """Input will be parse as the `Request`."""
39 |         print(f"received from {data.name} with {data.bin!r}")
40 |         return data.bin
41 | 
42 | 
43 | class Preprocess(Worker):
44 |     """Dummy preprocess worker."""
45 | 
46 |     def deserialize(self, data: bytes) -> Any:
47 |         return data
48 | 
49 |     def forward(self, data: Any) -> Any:
50 |         return data
51 | 
52 | 
53 | class Inference(Worker):
54 |     """Dummy inference worker."""
55 | 
56 |     def forward(self, data: Any) -> Any:
57 |         return [{"length": len(datum)} for datum in data]
58 | 
59 | 
60 | class TypedPostprocess(TypedMsgPackMixin, Worker):
61 |     """Dummy postprocess with msgpack."""
62 | 
63 |     def forward(self, data: Any) -> Any:
64 |         return data
65 | 
66 | 
67 | if __name__ == "__main__":
68 |     server = Server()
69 |     typed_pre = Runtime(TypedPreprocess)
70 |     pre = Runtime(Preprocess)
71 |     inf = Runtime(Inference, max_batch_size=16)
72 |     typed_post = Runtime(TypedPostprocess)
73 |     server.register_runtime(
74 |         {
75 |             "/v1/inference": [typed_pre, inf, typed_post],
76 |             "/inference": [pre, inf],
77 |         }
78 |     )
79 |     server.run()
80 | 


--------------------------------------------------------------------------------
/examples/shm_ipc/plasma_legacy.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 MOSEC Authors
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """Example: Using Plasma store with mosec mixin PlasmaShmIPCMixin.
16 | 
17 | We start a subprocess for the plasma server, and pass the path
18 | to the plasma client which serves as the shm mixin.
19 | We also register the plasma server process as a daemon, so
20 | that when it exits the service is able to gracefully shutdown
21 | and restarted by the orchestrator.
22 | """
23 | 
24 | import numpy as np
25 | from pyarrow import plasma  # type: ignore
26 | 
27 | from mosec import Server, ValidationError, Worker
28 | from mosec.mixin import PlasmaShmIPCMixin
29 | 
30 | 
31 | class DataProducer(PlasmaShmIPCMixin, Worker):
32 |     """Sample Data Producer."""
33 | 
34 |     def forward(self, data: dict) -> np.ndarray:
35 |         # pylint: disable=duplicate-code
36 |         try:
37 |             nums = np.random.rand(int(data["size"]))
38 |         except KeyError as err:
39 |             raise ValidationError(err) from err
40 |         return nums
41 | 
42 | 
43 | class DataConsumer(PlasmaShmIPCMixin, Worker):
44 |     """Sample Data Consumer."""
45 | 
46 |     def forward(self, data: np.ndarray) -> dict:
47 |         return {"ipc test data": data.tolist()}
48 | 
49 | 
50 | if __name__ == "__main__":
51 |     # 200 Mb store, adjust the size according to your requirement
52 |     with plasma.start_plasma_store(plasma_store_memory=200 * 1000 * 1000) as (
53 |         shm_path,
54 |         shm_process,
55 |     ):
56 |         # configure the plasma service path
57 |         PlasmaShmIPCMixin.set_plasma_path(shm_path)
58 | 
59 |         server = Server()
60 |         # register this process to be monitored
61 |         server.register_daemon("plasma_server", shm_process)
62 |         server.append_worker(DataProducer, num=2)
63 |         server.append_worker(DataConsumer, num=2)
64 |         server.run()
65 | 


--------------------------------------------------------------------------------
/src/apidoc.rs:
--------------------------------------------------------------------------------
 1 | // Copyright 2023 MOSEC Authors
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //      http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | use utoipa::openapi::{Components, OpenApi};
16 | 
17 | use crate::config::Route;
18 | 
19 | #[derive(Default, Clone)]
20 | pub(crate) struct MosecOpenAPI {
21 |     pub api: OpenApi,
22 | }
23 | 
24 | impl MosecOpenAPI {
25 |     /// Merge the route request_body/response/schemas into the OpenAPI.
26 |     pub fn merge_route(&mut self, route: &Route) -> &mut Self {
27 |         let reserved = match route.is_sse {
28 |             true => "/openapi/reserved/inference",
29 |             false => "/openapi/reserved/inference_sse",
30 |         };
31 |         let mut path = self.api.paths.paths.get(reserved).unwrap().clone();
32 |         if let Some(mut op) = path.post.clone() {
33 |             if let Some(mut user_schemas) = route.schemas.clone() {
34 |                 if self.api.components.is_none() {
35 |                     self.api.components = Some(Components::default());
36 |                 }
37 |                 self.api
38 |                     .components
39 |                     .as_mut()
40 |                     .unwrap()
41 |                     .schemas
42 |                     .append(&mut user_schemas);
43 |             };
44 |             if let Some(req) = route.request_body.clone() {
45 |                 op.request_body = Some(req);
46 |             };
47 | 
48 |             if let Some(mut responses) = route.responses.clone() {
49 |                 op.responses.responses.append(&mut responses);
50 |             };
51 |             path.post = Some(op);
52 |         }
53 |         self.api.paths.paths.insert(route.endpoint.clone(), path);
54 | 
55 |         self
56 |     }
57 | 
58 |     /// Removes the reserved paths from the OpenAPI spec.
59 |     pub fn clean(&mut self) -> &mut Self {
60 |         self.api.paths.paths.remove("/openapi/reserved/inference");
61 |         self.api
62 |             .paths
63 |             .paths
64 |             .remove("/openapi/reserved/inference_sse");
65 |         self
66 |     }
67 | }
68 | 


--------------------------------------------------------------------------------
/mosec/mixin/msgpack_worker.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 MOSEC Authors
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """MOSEC msgpack worker mixin.
16 | 
17 | Features:
18 | 
19 |     * deserialize request body with msgpack
20 |     * serialize response body with msgpack
21 | """
22 | 
23 | # pylint: disable=import-outside-toplevel
24 | 
25 | from typing import Any
26 | 
27 | from mosec.errors import DecodingError, EncodingError
28 | 
29 | 
30 | class MsgpackMixin:
31 |     """Msgpack worker mixin interface."""
32 | 
33 |     # pylint: disable=no-self-use
34 | 
35 |     resp_mime_type = "application/msgpack"
36 | 
37 |     def serialize(self, data: Any) -> bytes:
38 |         """Serialize with msgpack for the last stage (egress).
39 | 
40 |         Arguments:
41 |             data: the **same type** as returned by
42 |                 :py:meth:`Worker.forward <mosec.worker.Worker.forward>`
43 | 
44 |         Returns:
45 |             the bytes you want to put into the response body
46 | 
47 |         Raises:
48 |             EncodingError: if the data cannot be serialized with msgpack
49 | 
50 |         """
51 |         import msgpack  # type: ignore
52 | 
53 |         try:
54 |             data_bytes = msgpack.packb(data)
55 |         except Exception as err:
56 |             raise EncodingError from err
57 |         return data_bytes  # type: ignore
58 | 
59 |     def deserialize(self, data: bytes) -> Any:
60 |         """Deserialize method for the first stage (ingress).
61 | 
62 |         Arguments:
63 |             data: the raw bytes extracted from the request body
64 | 
65 |         Returns:
66 |             the **same type** as the input of
67 |             :py:meth:`Worker.forward <mosec.worker.Worker.forward>`
68 | 
69 |         Raises:
70 |             DecodingError: if the data cannot be deserialized with msgpack
71 | 
72 |         """
73 |         import msgpack
74 | 
75 |         try:
76 |             data_msg = msgpack.unpackb(data, use_list=False)
77 |         except Exception as err:
78 |             raise DecodingError from err
79 |         return data_msg
80 | 


--------------------------------------------------------------------------------
/.github/workflows/check.yml:
--------------------------------------------------------------------------------
 1 | name: lint and test
 2 | 
 3 | on:
 4 |   pull_request:
 5 |     paths:
 6 |       - '.github/workflows/check.yml'
 7 |       - 'mosec/**'
 8 |       - 'src/**'
 9 |       - 'tests/**'
10 |       - 'examples/**'
11 |       - 'pyproject.toml'
12 |       - 'Cargo.lock'
13 |       - 'Cargo.toml'
14 |   push:
15 |     branches:
16 |       - main
17 |     paths:
18 |       - '.github/workflows/check.yml'
19 |       - 'mosec/**'
20 |       - 'src/**'
21 |       - 'tests/**'
22 |       - 'examples/**'
23 |       - 'pyproject.toml'
24 |       - 'Cargo.lock'
25 |       - 'Cargo.toml'
26 |   merge_group:
27 | 
28 | concurrency:
29 |   group: ${{ github.ref }}-${{ github.workflow }}
30 |   cancel-in-progress: true
31 | 
32 | env:
33 |   SCCACHE_GHA_ENABLED: "true"
34 |   RUSTC_WRAPPER: "sccache"
35 | 
36 | jobs:
37 |   lint:
38 |     runs-on: ubuntu-latest
39 |     timeout-minutes: 5
40 |     steps:
41 |       - uses: actions/checkout@v6
42 |       - name: Install uv
43 |         uses: astral-sh/setup-uv@v7
44 |         with:
45 |           enable-cache: true
46 |       - name: Set up Rust
47 |         uses: dtolnay/rust-toolchain@stable
48 |       - name: Run sccache-cache
49 |         uses: mozilla-actions/sccache-action@v0.0.9
50 |       - name: Install dependencies
51 |         run: make install
52 |       - name: Lint
53 |         run: make lint semantic_lint
54 | 
55 |   test:
56 |     runs-on: ${{ matrix.os }}
57 |     timeout-minutes: 20
58 |     strategy:
59 |       fail-fast: false
60 |       matrix:
61 |         python-version: ["3.10", "3.11", "3.12", "3.13", "3.14", "3.14t"]
62 |         os: [ubuntu-24.04, ubuntu-24.04-arm, macos-15-intel, macos-14]
63 | 
64 |     steps:
65 |       - uses: actions/checkout@v6
66 |       - name: Install uv
67 |         uses: astral-sh/setup-uv@v7
68 |         with:
69 |           enable-cache: true
70 |           python-version: ${{ matrix.python-version }}
71 |       - name: Set up Rust
72 |         uses: dtolnay/rust-toolchain@stable
73 |       - name: Run sccache-cache
74 |         uses: mozilla-actions/sccache-action@v0.0.9
75 |       - name: Install components
76 |         run: make install
77 |       - name: Test unit
78 |         run: make test_unit
79 |       - name: Test
80 |         run: make test
81 |       - name: Test shm in Linux
82 |         # ignore the shm test for Python 3.12 since pyarrow doesn't have py3.12 wheel with version < 12
83 |         if: ${{ startsWith(matrix.os, 'ubuntu') && !startsWith(matrix.python-version, '3.12') && !startsWith(matrix.python-version, '3.13') && !startsWith(matrix.python-version, '3.14') }}
84 |         run: |
85 |           docker run --rm -d --name redis -p 6379:6379 redis
86 |           make test_shm
87 |           docker stop redis
88 | 


--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | #
 3 | # For the full list of built-in configuration values, see the documentation:
 4 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
 5 | 
 6 | import os
 7 | import sys
 8 | 
 9 | sys.path.insert(0, os.path.abspath("../.."))
10 | 
11 | # -- Project information -----------------------------------------------------
12 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
13 | 
14 | project = "mosec"
15 | copyright = "2023, mosec maintainers"
16 | author = "mosec maintainers"
17 | release = "latest"
18 | 
19 | # -- General configuration ---------------------------------------------------
20 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
21 | 
22 | extensions = [
23 |     "sphinx.ext.viewcode",
24 |     "sphinx.ext.autodoc",
25 |     "sphinx.ext.githubpages",
26 |     "sphinx.ext.napoleon",
27 |     "myst_parser",
28 |     "sphinx_copybutton",
29 |     "sphinxcontrib.programoutput",
30 |     "sphinx_autodoc_typehints",
31 |     "sphinxext.opengraph",
32 |     "sphinx_sitemap",
33 | ]
34 | 
35 | templates_path = ["_templates"]
36 | exclude_patterns = []
37 | source_suffix = [".rst", ".md"]
38 | master_doc = "index"
39 | language = "en"
40 | 
41 | # Extension configuration
42 | myst_heading_anchors = 3
43 | autodoc_member_order = "bysource"
44 | # https://www.sphinx-doc.org/en/master/usage/extensions/napoleon.html
45 | napoleon_attr_annotations = True
46 | napoleon_include_init_with_doc = True
47 | napoleon_use_admonition_for_references = True
48 | # https://sphinxext-opengraph.readthedocs.io/en/latest/
49 | ogp_site_url = "https://mosecorg.github.io/mosec/"
50 | ogp_image = "https://user-images.githubusercontent.com/38581401/240117836-f06199ba-c80d-413a-9cb4-5adc76316bda.png"
51 | # https://sphinx-sitemap.readthedocs.io/en/latest/getting-started.html
52 | html_baseurl = "https://mosecorg.github.io/mosec/"
53 | html_extra_path = ['robots.txt']
54 | # -- Options for HTML output -------------------------------------------------
55 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
56 | 
57 | html_theme = "furo"
58 | html_logo = "https://user-images.githubusercontent.com/38581401/240117836-f06199ba-c80d-413a-9cb4-5adc76316bda.png"
59 | html_static_path = ["_static"]
60 | html_favicon = "https://user-images.githubusercontent.com/38581401/134798617-0104dc12-e0d4-4ed5-a79c-9e2435e99a14.png"
61 | 
62 | # Theme
63 | html_theme_options = {
64 |     "sidebar_hide_name": True,
65 |     "navigation_with_keys": True,
66 |     "source_repository": "https://github.com/mosecorg/mosec",
67 |     "source_branch": "main",
68 |     "source_directory": "docs/source",
69 | }
70 | 


--------------------------------------------------------------------------------
/mosec/mixin/typed_worker.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 MOSEC Authors
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """MOSEC type validation mixin."""
16 | 
17 | # pylint: disable=import-outside-toplevel
18 | 
19 | from typing import Any, Dict, Optional, Tuple
20 | 
21 | from mosec import get_logger
22 | from mosec.errors import ValidationError
23 | from mosec.utils import ParseTarget, parse_func_type
24 | from mosec.worker import Worker
25 | 
26 | logger = get_logger()
27 | 
28 | 
29 | class TypedMsgPackMixin(Worker):
30 |     """Enable request type validation with `msgspec` and serde with `msgpack`."""
31 | 
32 |     # pylint: disable=no-self-use
33 | 
34 |     resp_mime_type = "application/msgpack"
35 |     _input_typ: Optional[type] = None
36 | 
37 |     def deserialize(self, data: Any) -> Any:
38 |         """Deserialize and validate request with msgspec."""
39 |         import msgspec
40 | 
41 |         if self._input_typ is None:
42 |             self._input_typ = parse_func_type(self.forward, ParseTarget.INPUT)
43 | 
44 |         try:
45 |             return msgspec.msgpack.decode(data, type=self._input_typ)
46 |         except msgspec.ValidationError as err:
47 |             raise ValidationError(err) from err
48 | 
49 |     def serialize(self, data: Any) -> bytes:
50 |         """Serialize with `msgpack`."""
51 |         import msgspec
52 | 
53 |         return msgspec.msgpack.encode(data)
54 | 
55 |     @classmethod
56 |     def get_forward_json_schema(
57 |         cls, target: ParseTarget, ref_template: str
58 |     ) -> Tuple[Dict[str, Any], Dict[str, Any]]:
59 |         """Get the JSON schema of the forward function."""
60 |         import msgspec
61 | 
62 |         schema: Dict[str, Any]
63 |         comp_schema: Dict[str, Any]
64 |         schema, comp_schema = {}, {}
65 |         typ = parse_func_type(cls.forward, target)
66 |         try:
67 |             (schema,), comp_schema = msgspec.json.schema_components(
68 |                 [typ], ref_template=ref_template
69 |             )
70 |         except TypeError as err:
71 |             logger.warning(
72 |                 "Failed to generate JSON schema for %s: %s", cls.__name__, err
73 |             )
74 |         return schema, comp_schema
75 | 


--------------------------------------------------------------------------------
/examples/segment/server.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 MOSEC Authors
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | # refer to https://github.com/facebookresearch/sam2/blob/main/notebooks/image_predictor_example.ipynb
16 | 
17 | import numbin
18 | import torch  # type: ignore
19 | from sam2.sam2_image_predictor import SAM2ImagePredictor  # type: ignore
20 | 
21 | from mosec import Server, Worker, get_logger
22 | from mosec.mixin import MsgpackMixin
23 | 
24 | logger = get_logger()
25 | MIN_TF32_MAJOR = 8
26 | 
27 | 
28 | class SegmentAnything(MsgpackMixin, Worker):
29 |     def __init__(self):
30 |         # select the device for computation
31 |         if torch.cuda.is_available():
32 |             device = torch.device("cuda")
33 |         elif torch.backends.mps.is_available():
34 |             device = torch.device("mps")
35 |         else:
36 |             device = torch.device("cpu")
37 |         logger.info("using device: %s", device)
38 | 
39 |         self.predictor = SAM2ImagePredictor.from_pretrained(
40 |             "facebook/sam2-hiera-large", device=device
41 |         )
42 | 
43 |         if device.type == "cuda":
44 |             # use bfloat16
45 |             torch.autocast("cuda", dtype=torch.bfloat16).__enter__()
46 |             # turn on tf32 for Ampere GPUs (https://pytorch.org/docs/stable/notes/cuda.html#tensorfloat-32-tf32-on-ampere-devices)
47 |             if torch.cuda.get_device_properties(0).major >= MIN_TF32_MAJOR:
48 |                 torch.backends.cuda.matmul.allow_tf32 = True
49 |                 torch.backends.cudnn.allow_tf32 = True
50 | 
51 |     def forward(self, data: dict) -> bytes:
52 |         with torch.inference_mode():
53 |             self.predictor.set_image(numbin.loads(data["image"]))
54 |             masks, _, _ = self.predictor.predict(
55 |                 point_coords=data["point_coords"],
56 |                 point_labels=data["labels"],
57 |                 mask_input=numbin.loads(data["mask"])[None, :, :],
58 |                 multimask_output=False,
59 |             )
60 |         return numbin.dumps(masks[0])
61 | 
62 | 
63 | if __name__ == "__main__":
64 |     server = Server()
65 |     server.append_worker(SegmentAnything, num=1, max_batch_size=1)
66 |     server.run()
67 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Self
  2 | mosec/bin
  3 | 
  4 | # Byte-compiled / optimized / DLL files
  5 | __pycache__/
  6 | *.py[cod]
  7 | *$py.class
  8 | 
  9 | # C extensions
 10 | *.so
 11 | 
 12 | # Distribution / packaging
 13 | .Python
 14 | build/
 15 | develop-eggs/
 16 | dist/
 17 | wheelhouse/
 18 | downloads/
 19 | eggs/
 20 | .eggs/
 21 | lib/
 22 | lib64/
 23 | parts/
 24 | sdist/
 25 | var/
 26 | wheels/
 27 | pip-wheel-metadata/
 28 | share/python-wheels/
 29 | *.egg-info/
 30 | .installed.cfg
 31 | *.egg
 32 | MANIFEST
 33 | 
 34 | # PyInstaller
 35 | #  Usually these files are written by a python script from a template
 36 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 37 | *.manifest
 38 | *.spec
 39 | 
 40 | # Installer logs
 41 | pip-log.txt
 42 | pip-delete-this-directory.txt
 43 | 
 44 | # Unit test / coverage reports
 45 | htmlcov/
 46 | .tox/
 47 | .nox/
 48 | .coverage
 49 | .coverage.*
 50 | .cache
 51 | nosetests.xml
 52 | coverage.xml
 53 | *.cover
 54 | *.py,cover
 55 | .hypothesis/
 56 | .pytest_cache/
 57 | 
 58 | # Translations
 59 | *.mo
 60 | *.pot
 61 | 
 62 | # Django stuff:
 63 | *.log
 64 | local_settings.py
 65 | db.sqlite3
 66 | db.sqlite3-journal
 67 | 
 68 | # Flask stuff:
 69 | instance/
 70 | .webassets-cache
 71 | 
 72 | # Scrapy stuff:
 73 | .scrapy
 74 | 
 75 | # Sphinx documentation
 76 | docs/_build/
 77 | 
 78 | # PyBuilder
 79 | target/
 80 | 
 81 | # Jupyter Notebook
 82 | .ipynb_checkpoints
 83 | 
 84 | # IPython
 85 | profile_default/
 86 | ipython_config.py
 87 | 
 88 | # pyenv
 89 | .python-version
 90 | 
 91 | # pipenv
 92 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 93 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 94 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 95 | #   install all needed dependencies.
 96 | #Pipfile.lock
 97 | 
 98 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 99 | __pypackages__/
100 | 
101 | # Celery stuff
102 | celerybeat-schedule
103 | celerybeat.pid
104 | 
105 | # SageMath parsed files
106 | *.sage.py
107 | 
108 | # Environments
109 | .env
110 | .venv
111 | env/
112 | venv/
113 | ENV/
114 | env.bak/
115 | venv.bak/
116 | 
117 | # Spyder project settings
118 | .spyderproject
119 | .spyproject
120 | 
121 | # Rope project settings
122 | .ropeproject
123 | 
124 | # mkdocs documentation
125 | /site
126 | 
127 | # mypy
128 | .mypy_cache/
129 | .dmypy.json
130 | dmypy.json
131 | 
132 | # Pyre type checker
133 | .pyre/
134 | 
135 | # Added by cargo
136 | debug/
137 | target/
138 | **/*.rs.bk
139 | *.pdb
140 | 
141 | # IDE
142 | .vscode/*
143 | .idea/*
144 | 
145 | # version file generated by setuptools_scm
146 | mosec/_version.py
147 | 
148 | # ruff
149 | .ruff_cache/
150 | 


--------------------------------------------------------------------------------
/docs/source/reference/concept.md:
--------------------------------------------------------------------------------
 1 | # Concept and FAQs
 2 | 
 3 | There are a few terms used in `mosec`.
 4 | 
 5 | - `worker`: a Python process that executes the `forward` method (inherit from [`mosec.Worker`](mosec.worker.Worker))
 6 | - `stage`: one processing unit in the pipeline, each stage contains several `worker` replicas
 7 |   - also known as [`Runtime`](mosec.runtime.Runtime) in the code
 8 |   - each stage retrieves the data from the previous stage and passes the result to the next stage
 9 |   - retrieved data will be deserialized by the [`Worker.deserialize_ipc`](mosec.worker.Worker.deserialize_ipc) method
10 |   - data to be passed will be serialized by the [`Worker.serialize_ipc`](mosec.worker.Worker.serialize_ipc) method
11 | - `ingress/egress`: the first/last stage in the pipeline
12 |   - ingress gets data from the client, while egress sends data to the client
13 |   - data will be deserialized by the ingress [`Worker.serialize`](mosec.worker.Worker.serialize) method and serialized by the egress [`Worker.deserialize`](mosec.worker.Worker.deserialize) method
14 | - `pipeline`: a chain of processing stages, will be registered to an endpoint (default: `/inference`)
15 |   - a server can have multiple pipelines, check the [multi-route](../examples/multi_route.md) example
16 | - `dynamic batching`: batch requests until either the max batch size or the max wait time is reached
17 | - `controller`: a Rust tokio thread that works on:
18 |   - read from the previous queue to get new tasks
19 |   - send tasks to the ready-to-process worker via the Unix domain socket
20 |   - receive results from the worker
21 |   - send the tasks to the next queue
22 | 
23 | ## FAQs
24 | 
25 | ### How to raise an exception?
26 | 
27 | Use the `raise` keyword with [mosec.errors](mosec.errors). Raising other exceptions will be treated as an "500 Internal Server Error".
28 | 
29 | If a request raises any exception, the error will be returned to the client directly without going through the rest stages.
30 | 
31 | ### How to change the serialization/deserialization methods?
32 | 
33 | Just let the ingress/egress worker inherit a suitable mixin like [`MsgpackMixin`](mosec.mixin.MsgpackMixin).
34 | 
35 | ```{note}
36 | The inheritance order matters in Python. Check [multiple inheritance](https://docs.python.org/3/tutorial/classes.html#multiple-inheritance) for more information.
37 | ```
38 | 
39 | You can also implement the `serialize/deserialize` method to your `ingress/egress` worker directly.
40 | 
41 | ### How to share configurations among different workers?
42 | 
43 | If the configuration structure is initialized globally, all the workers should be able to use it directly.
44 | 
45 | If you want to assign different workers with different configurations, the best way is to use the `env` (ref [`append_worker`](mosec.server.Server.append_worker)).
46 | 


--------------------------------------------------------------------------------
/mosec/mixin/plasma_worker.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 MOSEC Authors
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """MOSEC plasma worker mixin.
16 | 
17 | Provide another data transfer way between workers.
18 | 
19 | The data will be stored in plasma shared memory, while the object ID will be
20 | sent via the original way.
21 | 
22 |     use case: large image tensors
23 |     benefits: more stable P99 latency
24 | 
25 | ```{warning}
26 | The plasma is deprecated in `pyarrow`. Please use Redis instead.
27 | ```
28 | """
29 | 
30 | # pylint: disable=import-outside-toplevel
31 | 
32 | from os import environ
33 | from typing import Any
34 | 
35 | from mosec.worker import Worker
36 | 
37 | _PLASMA_PATH_ENV = "MOSEC_INTERNAL_PLASMA_PATH"
38 | 
39 | 
40 | class PlasmaShmIPCMixin(Worker):
41 |     """Plasma shared memory worker mixin interface."""
42 | 
43 |     _plasma_client = None
44 | 
45 |     @classmethod
46 |     def set_plasma_path(cls, path: str):
47 |         """Set the plasma service path."""
48 |         environ[_PLASMA_PATH_ENV] = path
49 | 
50 |     def _get_client(self):
51 |         """Get the plasma client. This will create a new one if not exist."""
52 |         from pyarrow import plasma  # type: ignore
53 | 
54 |         if not self._plasma_client:
55 |             path = environ.get(_PLASMA_PATH_ENV)
56 |             if not path:
57 |                 raise RuntimeError(
58 |                     "please set the plasma path with "
59 |                     "`PlasmaShmIPCMixin.set_plasma_path()`"
60 |                 )
61 |             self._plasma_client = plasma.connect(path)
62 |         return self._plasma_client
63 | 
64 |     def serialize_ipc(self, data: Any) -> bytes:
65 |         """Save the data to the plasma server and return the id."""
66 |         client = self._get_client()
67 |         object_id = client.put(super().serialize_ipc(data))
68 |         return object_id.binary()
69 | 
70 |     def deserialize_ipc(self, data: bytes) -> Any:
71 |         """Get the data from the plasma server and delete it."""
72 |         from pyarrow import plasma  # type: ignore
73 | 
74 |         client = self._get_client()
75 |         object_id = plasma.ObjectID(bytes(data))
76 |         obj = super().deserialize_ipc(client.get(object_id))
77 |         client.delete((object_id,))
78 |         return obj
79 | 


--------------------------------------------------------------------------------
/examples/monitor/python_side_metrics.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 MOSEC Authors
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Example: Adding metrics service."""
15 | 
16 | import os
17 | import pathlib
18 | import tempfile
19 | from typing import List
20 | 
21 | from prometheus_client import (  # type: ignore
22 |     CollectorRegistry,
23 |     Counter,
24 |     multiprocess,
25 |     start_http_server,
26 | )
27 | 
28 | from mosec import Server, ValidationError, Worker, get_logger
29 | 
30 | logger = get_logger()
31 | 
32 | 
33 | # check the PROMETHEUS_MULTIPROC_DIR environment variable before import Prometheus
34 | if not os.getenv("PROMETHEUS_MULTIPROC_DIR"):
35 |     metric_dir_path = os.path.join(tempfile.gettempdir(), "prometheus_multiproc_dir")
36 |     pathlib.Path(metric_dir_path).mkdir(parents=True, exist_ok=True)
37 |     os.environ["PROMETHEUS_MULTIPROC_DIR"] = metric_dir_path
38 | 
39 | 
40 | metric_registry = CollectorRegistry()
41 | multiprocess.MultiProcessCollector(metric_registry)
42 | counter = Counter(
43 |     "inference_result",
44 |     "statistic of result",
45 |     ("status", "worker_id"),
46 |     registry=metric_registry,
47 | )
48 | 
49 | 
50 | class Inference(Worker):
51 |     """Sample Inference Worker."""
52 | 
53 |     def __init__(self):
54 |         super().__init__()
55 |         self.worker_id = str(self.worker_id)
56 | 
57 |     def deserialize(self, data: bytes) -> int:
58 |         json_data = super().deserialize(data)
59 |         try:
60 |             res = int(json_data.get("num"))
61 |         except Exception as err:
62 |             raise ValidationError(err) from err
63 |         return res
64 | 
65 |     def forward(self, data: List[int]) -> List[bool]:
66 |         avg = sum(data) / len(data)
67 |         ans = [x >= avg for x in data]
68 |         counter.labels(status="true", worker_id=self.worker_id).inc(sum(ans))
69 |         counter.labels(status="false", worker_id=self.worker_id).inc(
70 |             len(ans) - sum(ans)
71 |         )
72 |         return ans
73 | 
74 | 
75 | if __name__ == "__main__":
76 |     # Run the metrics server in another thread.
77 |     start_http_server(5000, registry=metric_registry)
78 | 
79 |     # Run the inference server
80 |     server = Server()
81 |     server.append_worker(Inference, num=2, max_batch_size=8)
82 |     server.run()
83 | 


--------------------------------------------------------------------------------
/tests/bad_req.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 MOSEC Authors
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """A chaos test that contains:
16 | 
17 | - normal request
18 | - early disconnection
19 | - client bad request data
20 | - service internal error
21 | """
22 | 
23 | import concurrent.futures
24 | import os
25 | import shlex
26 | import subprocess
27 | from http import HTTPStatus
28 | from random import random
29 | 
30 | import httpx
31 | 
32 | from tests.utils import wait_for_port_free, wait_for_port_open
33 | 
34 | PORT = 5934
35 | URL = f"http://127.0.0.1:{PORT}/inference"
36 | REQ_NUM = int(os.getenv("CHAOS_REQUEST", "10000"))
37 | # set the thread number in case the CI server cannot get the real CPU number.
38 | THREAD = 8
39 | NORMAL_RATE = 0.3
40 | 
41 | 
42 | def random_req(params, timeout):
43 |     resp = httpx.post(URL, json=params, timeout=timeout)
44 |     return resp
45 | 
46 | 
47 | def main():
48 |     with concurrent.futures.ThreadPoolExecutor(max_workers=THREAD) as executor:
49 |         futures = [
50 |             executor.submit(
51 |                 random_req,
52 |                 {"time": 0.1} if random() > NORMAL_RATE else {"hey": 0},
53 |                 random() / 3.0,
54 |             )
55 |             for _ in range(REQ_NUM)
56 |         ]
57 |         count = 0
58 |         for future in concurrent.futures.as_completed(futures):
59 |             try:
60 |                 data = future.result()
61 |             except Exception as err:  # pylint: disable=broad-exception-caught
62 |                 print("[x]", err)
63 |             else:
64 |                 print("[~]", data)
65 |                 count += 1
66 | 
67 |     print(f">> {count}/{REQ_NUM} requests received before disconnection")
68 | 
69 |     # re-try to check if the service is still alive
70 |     resp = httpx.post(URL, json={"time": 0})
71 |     if resp.status_code != HTTPStatus.OK:
72 |         print(resp)
73 |         raise RuntimeError()
74 | 
75 | 
76 | if __name__ == "__main__":
77 |     service = subprocess.Popen(
78 |         shlex.split(
79 |             f"python tests/services/bad_service.py --debug --timeout 500 --port {PORT}"
80 |         )
81 |     )
82 |     assert wait_for_port_open(port=PORT)
83 |     try:
84 |         main()
85 |     finally:
86 |         service.terminate()
87 |     assert wait_for_port_free(port=PORT)
88 | 


--------------------------------------------------------------------------------
/tests/services/openapi_service.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 MOSEC Authors
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """Test OpenAPI generated spec."""
16 | 
17 | import sys
18 | from typing import Any, Dict, List, Type
19 | 
20 | from msgspec import Struct
21 | 
22 | from mosec import Server, Worker
23 | from mosec.mixin import TypedMsgPackMixin
24 | 
25 | 
26 | class Request(Struct):
27 |     """User request struct."""
28 | 
29 |     # pylint: disable=too-few-public-methods
30 | 
31 |     bin: bytes
32 |     name: str = "test"
33 | 
34 | 
35 | class TypedPreprocess(TypedMsgPackMixin, Worker):
36 |     """Dummy preprocess to exit early if the validation failed."""
37 | 
38 |     def forward(self, data: Request) -> Any:
39 |         """Input will be parse as the `Request`."""
40 |         print(f"received {data}")
41 |         return data.bin
42 | 
43 | 
44 | class UntypedPreprocess(TypedMsgPackMixin, Worker):
45 |     """Dummy preprocess to exit early if the validation failed."""
46 | 
47 |     def forward(self, data):
48 |         """Input will be parse as the `Request`."""
49 |         print(f"received {data}")
50 |         return data.bin
51 | 
52 | 
53 | class TypedInference(TypedMsgPackMixin, Worker):
54 |     """Dummy batch inference."""
55 | 
56 |     def forward(self, data: List[bytes]) -> List[int]:
57 |         return [len(buf) for buf in data]
58 | 
59 | 
60 | class UntypedInference(TypedMsgPackMixin, Worker):
61 |     """Dummy batch inference."""
62 | 
63 |     def forward(self, data):
64 |         return [len(buf) for buf in data]
65 | 
66 | 
67 | if __name__ == "__main__":
68 |     if len(sys.argv) <= 1:
69 |         print("Please specify the worker mapping. e.g. TypedPreprocess/TypedInference")
70 |         sys.exit(1)
71 | 
72 |     worker_mapping: Dict[str, Type[Worker]] = {
73 |         "TypedPreprocess": TypedPreprocess,
74 |         "UntypedPreprocess": UntypedPreprocess,
75 |         "TypedInference": TypedInference,
76 |         "UntypedInference": UntypedInference,
77 |     }
78 | 
79 |     server = Server()
80 |     preprocess_worker, inference_worker = sys.argv[1].split("/")
81 |     server.append_worker(worker_mapping[preprocess_worker], route="/v1/inference")
82 |     server.append_worker(
83 |         worker_mapping[inference_worker], max_batch_size=16, route="/v1/inference"
84 |     )
85 |     server.run()
86 | 


--------------------------------------------------------------------------------
/tests/test_protocol.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 MOSEC Authors
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """Test protocol related logic."""
16 | 
17 | import json
18 | import pickle
19 | import random
20 | import struct
21 | from typing import List
22 | 
23 | import pytest
24 | 
25 | from mosec.coordinator import State
26 | from mosec.protocol import Protocol
27 | from tests.mock_socket import Socket
28 | from tests.utils import imitate_controller_send
29 | 
30 | 
31 | def echo(protocol: Protocol, data: List[bytes]):
32 |     sent_flag = random.choice([1, 2, 4, 8])
33 | 
34 |     sent_ids, sent_payloads = imitate_controller_send(protocol.socket, data)
35 | 
36 |     _, got_ids, got_states, got_payloads = protocol.receive()  # client recv
37 |     assert len(protocol.socket.buffer) == 0  # type: ignore
38 |     assert got_ids == sent_ids
39 |     assert all(
40 |         bytes(got_payloads[i]) == sent_payloads[i] for i in range(len(sent_payloads))
41 |     )
42 |     got_payload_bytes = [bytes(x) for x in got_payloads]
43 |     # client echo
44 |     protocol.send(sent_flag, got_ids, got_states, got_payload_bytes)
45 |     # server recv (symmetric protocol)
46 |     got_flag, got_ids, got_states, got_payloads = protocol.receive()
47 | 
48 |     assert len(protocol.socket.buffer) == 0  # type: ignore
49 |     assert struct.unpack("!H", got_flag)[0] == sent_flag
50 |     assert got_states == [State.INGRESS | State.EGRESS] * len(sent_ids)
51 |     assert got_ids == sent_ids
52 |     assert all(
53 |         bytes(got_payloads[i]) == sent_payloads[i] for i in range(len(sent_payloads))
54 |     )
55 | 
56 | 
57 | @pytest.fixture
58 | def mock_protocol(mocker):
59 |     mocker.patch("mosec.protocol.socket", Socket)
60 |     protocol = Protocol(name="test", addr="mock.uds")
61 |     return protocol
62 | 
63 | 
64 | @pytest.mark.parametrize(
65 |     "test_data",
66 |     [
67 |         [],
68 |         ["test"],
69 |         [1, 2, 3],
70 |         [
71 |             json.dumps({"rid": "147982364", "data": "im_b64_str"}),
72 |             json.dumps({"rid": "147982365", "data": "another_im_b64_str"}),
73 |         ]
74 |         * random.randint(1, 20),
75 |     ],
76 | )
77 | def test_echo(mock_protocol, test_data):
78 |     mock_protocol.open()
79 |     echo(mock_protocol, [pickle.dumps(x) for x in test_data])
80 |     mock_protocol.close()
81 | 


--------------------------------------------------------------------------------
/mosec/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 MOSEC Authors
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """Provide useful utils to inspect function type."""
16 | 
17 | import inspect
18 | import os
19 | import sysconfig
20 | from enum import Enum
21 | from pathlib import Path
22 | from typing import Any, List, Optional
23 | 
24 | 
25 | # adopted from https://github.com/PyO3/maturin/blob/main/maturin/__main__.py
26 | # License: Apache-2.0 or MIT
27 | def get_mosec_path() -> Optional[Path]:
28 |     """Get `mosec` binary path."""
29 |     SCRIPT_NAME = "mosec"
30 | 
31 |     def script_dir(scheme: str) -> str:
32 |         return sysconfig.get_path("scripts", scheme)
33 | 
34 |     def script_exists(dir: str) -> bool:
35 |         for _, _, files in os.walk(dir):
36 |             for f in files:
37 |                 name, *_ = os.path.splitext(f)
38 |                 if name == SCRIPT_NAME:
39 |                     return True
40 | 
41 |         return False
42 | 
43 |     paths = list(
44 |         filter(
45 |             script_exists,
46 |             filter(os.path.exists, map(script_dir, sysconfig.get_scheme_names())),
47 |         )
48 |     )
49 | 
50 |     if paths:
51 |         return Path(paths[0]) / SCRIPT_NAME
52 | 
53 |     return None
54 | 
55 | 
56 | class ParseTarget(Enum):
57 |     """Enum to specify the target of parsing func type."""
58 | 
59 |     INPUT = "INPUT"
60 |     RETURN = "RETURN"
61 | 
62 | 
63 | def parse_func_type(func, target: ParseTarget) -> type:
64 |     """Parse the input type of the target function.
65 | 
66 |     - single request: return the type
67 |     - batch request: return the list item type
68 |     """
69 |     annotations = inspect.get_annotations(func, eval_str=True)
70 |     name = func.__name__
71 |     typ = Any
72 |     if target == ParseTarget.INPUT:
73 |         for key in annotations:
74 |             if key != "return":
75 |                 typ = annotations[key]
76 |                 break
77 |     else:
78 |         typ = annotations.get("return", Any)
79 | 
80 |     origin = getattr(typ, "__origin__", None)
81 |     if origin is None:
82 |         return typ  # type: ignore
83 |     # GenericAlias, `func` could be batch inference
84 |     if origin is list or origin is List:
85 |         if not hasattr(typ, "__args__") or len(typ.__args__) != 1:  # type: ignore
86 |             raise TypeError(
87 |                 f"`{name}` with dynamic batch should use "
88 |                 "`List[Struct]` as the input annotation"
89 |             )
90 |         return typ.__args__[0]  # type: ignore
91 |     raise TypeError(f"unsupported type {typ}")
92 | 


--------------------------------------------------------------------------------
/.github/workflows/codeql.yml:
--------------------------------------------------------------------------------
 1 | # For most projects, this workflow file will not need changing; you simply need
 2 | # to commit it to your repository.
 3 | #
 4 | # You may wish to alter this file to override the set of languages analyzed,
 5 | # or to provide custom queries or build logic.
 6 | #
 7 | # ******** NOTE ********
 8 | # We have attempted to detect the languages in your repository. Please check
 9 | # the `language` matrix defined below to confirm you have the correct set of
10 | # supported CodeQL languages.
11 | #
12 | name: "CodeQL"
13 | 
14 | on:
15 |   push:
16 |     branches: [ "main" ]
17 |   pull_request:
18 |     # The branches below must be a subset of the branches above
19 |     branches: [ "main" ]
20 |   schedule:
21 |     - cron: '29 17 * * 1'
22 | 
23 | jobs:
24 |   analyze:
25 |     name: Analyze
26 |     runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-latest' }}
27 |     timeout-minutes: ${{ (matrix.language == 'swift' && 120) || 360 }}
28 |     permissions:
29 |       actions: read
30 |       contents: read
31 |       security-events: write
32 | 
33 |     strategy:
34 |       fail-fast: false
35 |       matrix:
36 |         language: [ 'python' ]
37 |         # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby', 'swift' ]
38 |         # Use only 'java' to analyze code written in Java, Kotlin or both
39 |         # Use only 'javascript' to analyze code written in JavaScript, TypeScript or both
40 |         # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support
41 | 
42 |     steps:
43 |     - name: Checkout repository
44 |       uses: actions/checkout@v6
45 | 
46 |     # Initializes the CodeQL tools for scanning.
47 |     - name: Initialize CodeQL
48 |       uses: github/codeql-action/init@v4
49 |       with:
50 |         languages: ${{ matrix.language }}
51 |         # If you wish to specify custom queries, you can do so here or in a config file.
52 |         # By default, queries listed here will override any specified in a config file.
53 |         # Prefix the list here with "+" to use these queries and those in the config file.
54 | 
55 |         # For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs
56 |         # queries: security-extended,security-and-quality
57 | 
58 | 
59 |     # Autobuild attempts to build any compiled languages (C/C++, C#, Go, Java, or Swift).
60 |     # If this step fails, then you should remove it and run the build manually (see below)
61 |     - name: Autobuild
62 |       uses: github/codeql-action/autobuild@v4
63 | 
64 |     # ℹ️ Command-line programs to run using the OS shell.
65 |     # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun
66 | 
67 |     #   If the Autobuild fails above, remove it and uncomment the following three lines.
68 |     #   modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance.
69 | 
70 |     # - run: |
71 |     #     echo "Run, Build Application using script"
72 |     #     ./location_of_script_within_repo/buildscript.sh
73 | 
74 |     - name: Perform CodeQL Analysis
75 |       uses: github/codeql-action/analyze@v4
76 |       with:
77 |         category: "/language:${{matrix.language}}"
78 | 


--------------------------------------------------------------------------------
/mosec/mixin/redis_worker.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 MOSEC Authors
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """MOSEC redis worker mixin.
16 | 
17 | Provide another data transfer way between workers.
18 | 
19 | The data will be stored in redis shared memory, while the object ID will be
20 | sent via the original way.
21 | 
22 |     use case: large image tensors, cluster-shared data
23 |     benefits: more stable P99 latency
24 | 
25 | """
26 | 
27 | # pylint: disable=import-outside-toplevel
28 | 
29 | from os import environ
30 | from typing import Any
31 | 
32 | from mosec.worker import Worker
33 | 
34 | _REDIS_URL_ENV = "MOSEC_INTERNAL_REDIS_URL"
35 | _DEFAULT_KEY = "REDIS_SHM_IPC_KEY"
36 | 
37 | 
38 | class RedisShmIPCMixin(Worker):
39 |     """Redis shared memory worker mixin interface."""
40 | 
41 |     _redis_client = None
42 |     _redis_key = _DEFAULT_KEY
43 |     _next_id = None
44 | 
45 |     @classmethod
46 |     def set_redis_url(cls, url: str):
47 |         """Set the redis service url."""
48 |         environ[_REDIS_URL_ENV] = url
49 | 
50 |     def _get_client(self) -> Any:
51 |         """Get the redis client. This will create a new one if not exist."""
52 |         import redis
53 | 
54 |         if self._redis_client is None:
55 |             url = environ.get(_REDIS_URL_ENV)
56 |             if not url:
57 |                 raise RuntimeError(
58 |                     "please set the redis url with `RedisShmIPCMixin.set_redis_url()`"
59 |                 )
60 |             self._redis_client = redis.from_url(url)
61 |         return self._redis_client
62 | 
63 |     def _prepare_next_id(self) -> None:
64 |         """Make sure the next id exists. This will create a new one if not exist."""
65 |         if self._next_id is None:
66 |             client = self._get_client()
67 |             key = self._redis_key
68 |             self._next_id = bytes(str(client.incr(key)), encoding="utf-8")
69 | 
70 |     def serialize_ipc(self, data: Any) -> bytes:
71 |         """Save the data to the redis server and return the id."""
72 |         self._prepare_next_id()
73 |         client = self._get_client()
74 |         with client.pipeline() as pipe:
75 |             current_id = self._next_id
76 |             pipe.set(current_id, super().serialize_ipc(data))  # type: ignore
77 |             pipe.incr(self._redis_key)
78 |             _id = pipe.execute()[-1]
79 |             self._next_id = bytes(str(_id), encoding="utf-8")
80 |         return current_id  # type: ignore
81 | 
82 |     def deserialize_ipc(self, data: bytes) -> Any:
83 |         """Get the data from the redis server and delete it."""
84 |         client = self._get_client()
85 |         object_id = bytes(data)
86 |         with client.pipeline() as pipe:
87 |             pipe.get(object_id)
88 |             pipe.delete(object_id)
89 |             obj = pipe.execute()[0]
90 |         return super().deserialize_ipc(obj)
91 | 


--------------------------------------------------------------------------------
/examples/jax_single_layer/server.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 MOSEC Authors
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Example: Simple jax jitted inference with a single layer classifier."""
15 | 
16 | import os
17 | import time
18 | from typing import List
19 | 
20 | import chex  # type: ignore
21 | import jax  # type: ignore
22 | import jax.numpy as jnp  # type: ignore
23 | 
24 | from mosec import Server, ValidationError, Worker, get_logger
25 | 
26 | logger = get_logger()
27 | 
28 | INPUT_SIZE = 3
29 | LATENT_SIZE = 16
30 | OUTPUT_SIZE = 2
31 | 
32 | MAX_BATCH_SIZE = 8
33 | USE_JIT = os.getenv("USE_JIT", default="false")
34 | 
35 | 
36 | class JittedInference(Worker):
37 |     """Sample Class."""
38 | 
39 |     def __init__(self):
40 |         super().__init__()
41 |         key = jax.random.PRNGKey(42)
42 |         k_1, k_2 = jax.random.split(key)
43 |         self._layer1_w = jax.random.normal(k_1, (INPUT_SIZE, LATENT_SIZE))
44 |         self._layer1_b = jnp.zeros(LATENT_SIZE)
45 |         self._layer2_w = jax.random.normal(k_2, (LATENT_SIZE, OUTPUT_SIZE))
46 |         self._layer2_b = jnp.zeros(OUTPUT_SIZE)
47 | 
48 |         # Enumerate all batch sizes for caching.
49 |         self.multi_examples = []
50 |         dummy_array = list(range(INPUT_SIZE))
51 |         for i in range(MAX_BATCH_SIZE):
52 |             self.multi_examples.append([{"array": dummy_array}] * (i + 1))
53 | 
54 |         if USE_JIT == "true":
55 |             self.batch_forward = jax.jit(self._batch_forward)
56 |         else:
57 |             self.batch_forward = self._batch_forward
58 | 
59 |     def _forward(self, x_single: jnp.ndarray) -> jnp.ndarray:  # type: ignore
60 |         chex.assert_rank([x_single], [1])
61 |         h_1 = jnp.dot(self._layer1_w.T, x_single) + self._layer1_b
62 |         a_1 = jax.nn.relu(h_1)
63 |         h_2 = jnp.dot(self._layer2_w.T, a_1) + self._layer2_b
64 |         o_2 = jax.nn.softmax(h_2)
65 |         return jnp.argmax(o_2, axis=-1)
66 | 
67 |     def _batch_forward(self, x_batch: jnp.ndarray) -> jnp.ndarray:  # type: ignore
68 |         chex.assert_rank([x_batch], [2])
69 |         return jax.vmap(self._forward)(x_batch)
70 | 
71 |     def forward(self, data: List[dict]) -> List[dict]:
72 |         time_start = time.perf_counter()
73 |         try:
74 |             input_array_raw = [ele["array"] for ele in data]
75 |         except KeyError as err:
76 |             raise ValidationError(f"cannot find key {err}") from err
77 |         input_array = jnp.array(input_array_raw)
78 |         output_array = self.batch_forward(input_array)
79 |         output_category = output_array.tolist()
80 |         elapse = time.perf_counter() - time_start
81 |         return [{"category": c, "elapse": elapse} for c in output_category]
82 | 
83 | 
84 | if __name__ == "__main__":
85 |     server = Server()
86 |     server.append_worker(JittedInference, max_batch_size=MAX_BATCH_SIZE)
87 |     server.run()
88 | 


--------------------------------------------------------------------------------
/src/config.rs:
--------------------------------------------------------------------------------
  1 | // Copyright 2023 MOSEC Authors
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //     http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | // See the License for the specific language governing permissions and
 13 | // limitations under the License.
 14 | 
 15 | use std::collections::BTreeMap;
 16 | use std::fmt;
 17 | 
 18 | use serde::Deserialize;
 19 | use utoipa::openapi::request_body::RequestBody;
 20 | use utoipa::openapi::{RefOr, Response, Schema};
 21 | 
 22 | #[derive(Deserialize, Debug)]
 23 | pub(crate) struct Runtime {
 24 |     pub max_batch_size: usize,
 25 |     pub max_wait_time: u64,
 26 |     pub worker: String,
 27 | }
 28 | 
 29 | #[derive(Deserialize)]
 30 | pub(crate) struct Route {
 31 |     pub endpoint: String,
 32 |     pub workers: Vec<String>,
 33 |     pub mime: String,
 34 |     pub is_sse: bool,
 35 |     pub request_body: Option<RequestBody>,
 36 |     pub responses: Option<BTreeMap<String, RefOr<Response>>>,
 37 |     pub schemas: Option<BTreeMap<String, RefOr<Schema>>>,
 38 | }
 39 | 
 40 | impl fmt::Debug for Route {
 41 |     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
 42 |         write!(
 43 |             f,
 44 |             "({}: [{}], resp({}))",
 45 |             self.endpoint,
 46 |             self.workers.join(", "),
 47 |             self.mime
 48 |         )
 49 |     }
 50 | }
 51 | 
 52 | #[derive(Deserialize, Debug)]
 53 | pub(crate) struct Config {
 54 |     // socket dir
 55 |     pub path: String,
 56 |     // channel capacity
 57 |     pub capacity: usize,
 58 |     // service timeout (ms)
 59 |     pub timeout: u64,
 60 |     // service address
 61 |     pub address: String,
 62 |     // service port
 63 |     pub port: u16,
 64 |     // metrics namespace
 65 |     pub namespace: String,
 66 |     // log level: (debug, info, warning, error)
 67 |     pub log_level: String,
 68 |     // `zstd` & `gzip` compression
 69 |     pub compression: bool,
 70 |     pub runtimes: Vec<Runtime>,
 71 |     pub routes: Vec<Route>,
 72 | }
 73 | 
 74 | impl Default for Config {
 75 |     fn default() -> Self {
 76 |         Self {
 77 |             path: String::from("/tmp/mosec"),
 78 |             capacity: 1024,
 79 |             timeout: 3000,
 80 |             address: String::from("0.0.0.0"),
 81 |             port: 8000,
 82 |             namespace: String::from("mosec_service"),
 83 |             log_level: String::from("info"),
 84 |             compression: false,
 85 |             runtimes: vec![Runtime {
 86 |                 max_batch_size: 64,
 87 |                 max_wait_time: 3000,
 88 |                 worker: String::from("Inference_1"),
 89 |             }],
 90 |             routes: vec![Route {
 91 |                 endpoint: String::from("/inference"),
 92 |                 workers: vec![String::from("Inference_1")],
 93 |                 mime: String::from("application/json"),
 94 |                 is_sse: false,
 95 |                 request_body: None,
 96 |                 responses: None,
 97 |                 schemas: None,
 98 |             }],
 99 |         }
100 |     }
101 | }
102 | 


--------------------------------------------------------------------------------
/tests/utils.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2022 MOSEC Authors
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #      http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | """Useful functions for test."""
 16 | 
 17 | from __future__ import annotations
 18 | 
 19 | import contextlib
 20 | import os
 21 | import random
 22 | import socket
 23 | import struct
 24 | import time
 25 | from http import HTTPStatus
 26 | from io import BytesIO
 27 | from typing import TYPE_CHECKING, List, Tuple, Union
 28 | 
 29 | from mosec.coordinator import State
 30 | 
 31 | if TYPE_CHECKING:
 32 |     from tests.mock_socket import Socket as mock_socket
 33 | 
 34 | 
 35 | def imitate_controller_send(
 36 |     sock: Union[mock_socket, socket.socket], data: List[bytes]
 37 | ) -> Tuple[List[bytes], List[bytes]]:
 38 |     # explicit byte format here for sanity check
 39 |     # placeholder flag, should be discarded by receiver
 40 |     header = struct.pack("!HH", HTTPStatus.OK, len(data))
 41 |     buf = BytesIO()
 42 |     buf.write(header)
 43 |     sent_ids = []
 44 |     sent_payloads = []
 45 |     for datum in data:
 46 |         tid = struct.pack("!I", random.randint(1, 100))
 47 |         sent_ids.append(tid)
 48 |         sent_payloads.append(datum)
 49 |         length = struct.pack("!I", len(datum))
 50 |         buf.write(tid)
 51 |         buf.write(struct.pack("!H", State.INGRESS | State.EGRESS))  # task state
 52 |         buf.write(length)
 53 |         buf.write(datum)
 54 | 
 55 |     sock.sendall(buf.getbuffer())  # type: ignore
 56 |     return sent_ids, sent_payloads
 57 | 
 58 | 
 59 | def wait_for_port_open(
 60 |     host: str = "127.0.0.1", port: int = 8000, timeout: int = 10
 61 | ) -> bool:
 62 |     start_time = time.monotonic()
 63 |     while time.monotonic() - start_time < timeout:
 64 |         sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
 65 |         try:
 66 |             sock.connect((host, port))
 67 |             sock.shutdown(socket.SHUT_RDWR)
 68 |             return True
 69 |         except (ConnectionRefusedError, OSError):
 70 |             pass
 71 |         finally:
 72 |             sock.close()
 73 |         time.sleep(0.1)
 74 |     return False
 75 | 
 76 | 
 77 | def wait_for_port_free(
 78 |     host: str = "127.0.0.1", port: int = 8000, timeout: int = 5
 79 | ) -> bool:
 80 |     start_time = time.monotonic()
 81 |     while time.monotonic() - start_time < timeout:
 82 |         sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
 83 |         try:
 84 |             sock.connect((host, port))
 85 |             sock.shutdown(socket.SHUT_RDWR)
 86 |         except (ConnectionRefusedError, OSError):
 87 |             return True
 88 |         finally:
 89 |             sock.close()
 90 |         time.sleep(0.1)
 91 |     return False
 92 | 
 93 | 
 94 | @contextlib.contextmanager
 95 | def env_context(**kwargs):
 96 |     """Set environment variables for testing."""
 97 |     old_env = os.environ.copy()
 98 |     os.environ.update(kwargs)
 99 |     yield
100 |     os.environ.update(old_env)
101 | 


--------------------------------------------------------------------------------
/examples/distil_bert_server_pytorch.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2022 MOSEC Authors
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #      http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | """Example: Mosec with Pytorch Distil BERT."""
 15 | 
 16 | from typing import Any, List
 17 | 
 18 | import torch  # type: ignore
 19 | from transformers import (  # type: ignore
 20 |     AutoModelForSequenceClassification,
 21 |     AutoTokenizer,
 22 | )
 23 | 
 24 | from mosec import Server, Worker, get_logger
 25 | 
 26 | logger = get_logger()
 27 | 
 28 | # type alias
 29 | Returns = Any
 30 | 
 31 | INFERENCE_BATCH_SIZE = 32
 32 | INFERENCE_WORKER_NUM = 1
 33 | 
 34 | 
 35 | class Preprocess(Worker):
 36 |     """Preprocess BERT on current setup."""
 37 | 
 38 |     def __init__(self):
 39 |         super().__init__()
 40 |         self.tokenizer = AutoTokenizer.from_pretrained(
 41 |             "distilbert-base-uncased-finetuned-sst-2-english"
 42 |         )
 43 | 
 44 |     def deserialize(self, data: bytes) -> str:
 45 |         # Override `deserialize` for the *first* stage;
 46 |         # `data` is the raw bytes from the request body
 47 |         return data.decode()
 48 | 
 49 |     def forward(self, data: str) -> Returns:
 50 |         tokens = self.tokenizer.encode(data, add_special_tokens=True)
 51 |         return tokens
 52 | 
 53 | 
 54 | class Inference(Worker):
 55 |     """Pytorch Inference class"""
 56 | 
 57 |     resp_mime_type = "text/plain"
 58 | 
 59 |     def __init__(self):
 60 |         super().__init__()
 61 |         self.device = (
 62 |             torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
 63 |         )
 64 |         logger.info("using computing device: %s", self.device)
 65 |         self.model = AutoModelForSequenceClassification.from_pretrained(
 66 |             "distilbert-base-uncased-finetuned-sst-2-english"
 67 |         )
 68 |         self.model.eval()
 69 |         self.model.to(self.device)
 70 | 
 71 |         # Overwrite self.example for warmup
 72 |         self.example = [
 73 |             [101, 2023, 2003, 1037, 8403, 4937, 999, 102] * 5  # make sentence longer
 74 |         ] * INFERENCE_BATCH_SIZE
 75 | 
 76 |     def forward(self, data: List[Returns]) -> List[str]:
 77 |         tensors = [torch.tensor(token) for token in data]
 78 |         with torch.no_grad():
 79 |             result = self.model(
 80 |                 torch.nn.utils.rnn.pad_sequence(tensors, batch_first=True).to(
 81 |                     self.device
 82 |                 )
 83 |             )[0]
 84 |         scores = result.softmax(dim=1).cpu().tolist()
 85 |         return [f"positive={p}" for (_, p) in scores]
 86 | 
 87 |     def serialize(self, data: str) -> bytes:
 88 |         # Override `serialize` for the *last* stage;
 89 |         # `data` is the string from the `forward` output
 90 |         return data.encode()
 91 | 
 92 | 
 93 | if __name__ == "__main__":
 94 |     server = Server()
 95 |     server.append_worker(Preprocess, num=2 * INFERENCE_WORKER_NUM)
 96 |     server.append_worker(
 97 |         Inference, max_batch_size=INFERENCE_BATCH_SIZE, num=INFERENCE_WORKER_NUM
 98 |     )
 99 |     server.run()
100 | 


--------------------------------------------------------------------------------
/tests/services/mixin_ipc_shm_service.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2022 MOSEC Authors
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #      http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | """Test IPC shared memory storage."""
 16 | 
 17 | import sys
 18 | from typing import List
 19 | 
 20 | import numpy as np
 21 | 
 22 | from mosec import Server, Worker
 23 | from mosec.errors import ValidationError
 24 | from mosec.mixin import PlasmaShmIPCMixin, RedisShmIPCMixin
 25 | 
 26 | 
 27 | class PlasmaRandomService(PlasmaShmIPCMixin, Worker):
 28 |     def forward(self, data: List[dict]) -> List[dict]:
 29 |         try:
 30 |             result = [{"x": np.random.rand(int(req["size"]))} for req in data]
 31 |         except KeyError as err:
 32 |             raise ValidationError(err) from err
 33 |         return result
 34 | 
 35 | 
 36 | class PlasmaDummyPostprocess(PlasmaShmIPCMixin, Worker):
 37 |     """This dummy stage is added to test the shm IPC"""
 38 | 
 39 |     def forward(self, data: dict) -> dict:
 40 |         assert isinstance(data.get("x"), np.ndarray), f"wrong data type: {data}"
 41 |         data["x"] = data["x"].tolist()
 42 |         return data
 43 | 
 44 | 
 45 | class RedisRandomService(RedisShmIPCMixin, Worker):
 46 |     def forward(self, data: List[dict]) -> List[dict]:
 47 |         try:
 48 |             result = [{"x": np.random.rand(int(req["size"]))} for req in data]
 49 |         except KeyError as err:
 50 |             raise ValidationError(err) from err
 51 |         return result
 52 | 
 53 | 
 54 | class RedisDummyPostprocess(RedisShmIPCMixin, Worker):
 55 |     """This dummy stage is added to test the shm IPC"""
 56 | 
 57 |     def forward(self, data: dict) -> dict:
 58 |         assert isinstance(data.get("x"), np.ndarray), f"wrong data type: {data}"
 59 |         data["x"] = data["x"].tolist()
 60 |         return data
 61 | 
 62 | 
 63 | def start_redis_shm_mosec():
 64 |     # configure the plasma service path
 65 |     # this assumes the redis server is running at `localhost:6379`
 66 |     RedisShmIPCMixin.set_redis_url("redis://localhost:6379/0")
 67 | 
 68 |     server = Server()
 69 |     server.append_worker(RedisRandomService, max_batch_size=8)
 70 |     server.append_worker(RedisDummyPostprocess, num=2)
 71 |     server.run()
 72 | 
 73 | 
 74 | def start_plasma_shm_mosec():
 75 |     from pyarrow import plasma  # type: ignore
 76 | 
 77 |     # initialize a 20Mb object store as shared memory
 78 |     with plasma.start_plasma_store(plasma_store_memory=20 * 1000 * 1000) as (
 79 |         shm_path,
 80 |         shm_process,
 81 |     ):
 82 |         # configure the plasma shm path
 83 |         PlasmaShmIPCMixin.set_plasma_path(shm_path)
 84 | 
 85 |         server = Server()
 86 |         server.register_daemon("plasma_server", shm_process)
 87 |         server.append_worker(PlasmaRandomService, max_batch_size=8)
 88 |         server.append_worker(PlasmaDummyPostprocess, num=2)
 89 |         server.run()
 90 | 
 91 | 
 92 | if __name__ == "__main__":
 93 |     if len(sys.argv) <= 1:
 94 |         print("Please specify a shm storage service to run: plasma or redis")
 95 |         sys.exit(1)
 96 | 
 97 |     SERVICE = sys.argv[1]
 98 |     if SERVICE == "plasma":
 99 |         start_plasma_shm_mosec()
100 |     elif SERVICE == "redis":
101 |         start_redis_shm_mosec()
102 | 


--------------------------------------------------------------------------------
/src/layouts.rs:
--------------------------------------------------------------------------------
 1 | use logforth::kv::{Key, Value, Visitor};
 2 | use logforth::layout::text::colored::{Color, ColoredString, Colorize};
 3 | use logforth::record::{Level, Record};
 4 | use logforth::{Diagnostic, Error};
 5 | use serde::Serialize;
 6 | use serde_json::Map;
 7 | 
 8 | #[derive(Debug)]
 9 | pub(crate) struct ColoredLayout;
10 | 
11 | impl logforth::Layout for ColoredLayout {
12 |     fn format(&self, record: &Record, diags: &[Box<dyn Diagnostic>]) -> Result<Vec<u8>, Error> {
13 |         let ts = jiff::Timestamp::try_from(record.time()).unwrap();
14 | 
15 |         let level = ColoredString::from(record.level().to_string()).color(match record.level() {
16 |             Level::Fatal | Level::Fatal2 | Level::Fatal3 | Level::Fatal4 => Color::BrightRed,
17 |             Level::Error | Level::Error2 | Level::Error3 | Level::Error4 => Color::Red,
18 |             Level::Warn | Level::Warn2 | Level::Warn3 | Level::Warn4 => Color::Yellow,
19 |             Level::Info | Level::Info2 | Level::Info3 | Level::Info4 => Color::Green,
20 |             Level::Debug | Level::Debug2 | Level::Debug3 | Level::Debug4 => Color::Blue,
21 |             Level::Trace | Level::Trace2 | Level::Trace3 | Level::Trace4 => Color::Magenta,
22 |         });
23 | 
24 |         let target = record.target();
25 |         let line = record.line().unwrap_or_default();
26 |         let message = record.payload();
27 | 
28 |         struct KvWriter(String);
29 | 
30 |         impl Visitor for KvWriter {
31 |             fn visit(&mut self, key: Key, value: Value) -> Result<(), Error> {
32 |                 use std::fmt::Write;
33 |                 // SAFETY: write to a string always succeeds
34 |                 write!(&mut self.0, " {key}={value}").unwrap();
35 |                 Ok(())
36 |             }
37 |         }
38 | 
39 |         let mut visitor = KvWriter(format!("{ts:.6} {level:>6} {target}:{line} {message}"));
40 |         record.key_values().visit(&mut visitor)?;
41 |         for d in diags {
42 |             d.visit(&mut visitor)?;
43 |         }
44 | 
45 |         Ok(visitor.0.into_bytes())
46 |     }
47 | }
48 | 
49 | #[derive(Debug)]
50 | pub(crate) struct JsonLayout;
51 | 
52 | impl logforth::Layout for JsonLayout {
53 |     fn format(&self, record: &Record, diags: &[Box<dyn Diagnostic>]) -> Result<Vec<u8>, Error> {
54 |         let diagnostics = diags;
55 | 
56 |         let ts = jiff::Timestamp::try_from(record.time()).unwrap();
57 | 
58 |         struct FieldsVisitor(Map<String, serde_json::Value>);
59 | 
60 |         impl Visitor for FieldsVisitor {
61 |             fn visit(&mut self, key: Key, value: Value) -> Result<(), Error> {
62 |                 let key = key.to_string();
63 |                 match serde_json::to_value(&value) {
64 |                     Ok(value) => self.0.insert(key, value),
65 |                     Err(_) => self.0.insert(key, value.to_string().into()),
66 |                 };
67 |                 Ok(())
68 |             }
69 |         }
70 | 
71 |         let mut visitor = FieldsVisitor(Map::new());
72 |         visitor.visit(Key::new("message"), record.payload().into())?;
73 |         record.key_values().visit(&mut visitor)?;
74 |         for d in diagnostics {
75 |             d.visit(&mut visitor)?;
76 |         }
77 | 
78 |         #[derive(Debug, Clone, Serialize)]
79 |         struct RecordLine<'a> {
80 |             timestamp: String,
81 |             level: &'a str,
82 |             target: String,
83 |             #[serde(skip_serializing_if = "Map::is_empty")]
84 |             fields: Map<String, serde_json::Value>,
85 |         }
86 | 
87 |         let record_line = RecordLine {
88 |             timestamp: format!("{ts:.6}"),
89 |             level: record.level().name(),
90 |             target: format!("{}:{}", record.target(), record.line().unwrap_or_default(),),
91 |             fields: visitor.0,
92 |         };
93 | 
94 |         Ok(serde_json::to_vec(&record_line).unwrap())
95 |     }
96 | }
97 | 


--------------------------------------------------------------------------------
/mosec/env.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2023 MOSEC Authors
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | """Environment variables related functions."""
 16 | 
 17 | from __future__ import annotations
 18 | 
 19 | import contextlib
 20 | import os
 21 | import warnings
 22 | from argparse import Namespace
 23 | from typing import Any, Dict, List, Union
 24 | 
 25 | MOSEC_ENV_PREFIX = "MOSEC_"
 26 | MOSEC_ENV_CONFIG = {
 27 |     "path": str,
 28 |     "capacity": int,
 29 |     "timeout": int,
 30 |     "address": str,
 31 |     "port": int,
 32 |     "namespace": str,
 33 |     "debug": bool,
 34 |     "dry_run": bool,
 35 |     "log_level": str,
 36 | }
 37 | 
 38 | 
 39 | @contextlib.contextmanager
 40 | def env_var_context(env: None | List[Dict[str, str]], index: int):
 41 |     """Manage the environment variables for a worker process."""
 42 |     default: Dict = {}
 43 |     try:
 44 |         if env is not None:
 45 |             for key, value in env[index].items():
 46 |                 default[key] = os.getenv(key, "")
 47 |                 os.environ[key] = value
 48 |         yield None
 49 |     finally:
 50 |         for key, value in default.items():
 51 |             os.environ[key] = value
 52 | 
 53 | 
 54 | def get_env_namespace(prefix: str = MOSEC_ENV_PREFIX) -> Namespace:
 55 |     """Read the config from environment variables before the argument parsing.
 56 | 
 57 |     Priority: CLI > env > default value.
 58 |     """
 59 |     namespace = Namespace()
 60 |     for name, converter in MOSEC_ENV_CONFIG.items():
 61 |         var = f"{prefix}{name.upper()}"
 62 |         value = os.getenv(var)
 63 |         if not value:
 64 |             continue
 65 |         try:
 66 |             val = converter(value)
 67 |         except ValueError as err:
 68 |             warnings.warn(
 69 |                 f"failed to convert env {var}={value} to type {converter} {err}, "
 70 |                 "will skip this one",
 71 |                 RuntimeWarning,
 72 |                 stacklevel=2,
 73 |             )
 74 |         else:
 75 |             setattr(namespace, name, val)
 76 | 
 77 |     return namespace
 78 | 
 79 | 
 80 | def validate_int_ge(number, name, threshold=1):
 81 |     """Validate int number is greater than threshold."""
 82 |     assert isinstance(number, int), (
 83 |         f"{name} must be integer but you give {type(number)}"
 84 |     )
 85 |     assert number >= threshold, f"{name} must be no less than {threshold}"
 86 | 
 87 | 
 88 | def validate_float_ge(number, name, threshold=0.0):
 89 |     """Validate float number is greater than threshold."""
 90 |     assert isinstance(number, float), (
 91 |         f"{name} must be float but you give {type(number)}"
 92 |     )
 93 |     assert number >= threshold, f"{name} must be no less than {threshold}"
 94 | 
 95 | 
 96 | def validate_str_dict(dictionary: Dict):
 97 |     """Validate keys and values of the dictionary is string type."""
 98 |     for key, value in dictionary.items():
 99 |         if not (isinstance(key, str) and isinstance(value, str)):
100 |             return False
101 |     return True
102 | 
103 | 
104 | def validate_env(env: Union[Any, List[Dict[str, str]]], num: int):
105 |     """Validate keys and values of the dictionary is string type."""
106 |     if env is None:
107 |         return
108 |     assert len(env) == num, "len(env) must equal to num"
109 |     valid = True
110 |     if not isinstance(env, List) or not all(
111 |         isinstance(x, Dict) and validate_str_dict(x) for x in env
112 |     ):
113 |         valid = False
114 |     assert valid, "env must be a list of string dictionary"
115 | 


--------------------------------------------------------------------------------
/docs/source/examples/pytorch.md:
--------------------------------------------------------------------------------
 1 | # PyTorch Examples
 2 | 
 3 | Here are some out-of-the-box model servers powered by mosec for [PyTorch](https://pytorch.org/) users. We use the version 1.9.0 in the following examples.
 4 | 
 5 | ## Natural Language Processing
 6 | 
 7 | Natural language processing model servers usually receive text data and make predictions ranging from text classification, question answering to translation and text generation.
 8 | 
 9 | ### Sentiment Analysis
10 | 
11 | This server receives a string and predicts how positive its content is. We build the model server based on [Transformers](https://github.com/huggingface/transformers) of version 4.11.0.
12 | 
13 | We show how to customize the `deserialize` method of the ingress stage (`Preprocess`) and the `serialize` method of the egress stage (`Inference`). In this way, we can enjoy the high flexibility, directly reading data bytes from request body and writing the results into response body.
14 | 
15 | Note that in a stage that enables batching (e.g. `Inference` in this example), its worker's `forward` method deals with a list of data, while its `serialize` and `deserialize` methods only need to manipulate individual datum.
16 | 
17 | #### Server
18 | 
19 | ```shell
20 | python distil_bert_server_pytorch.py
21 | ```
22 | 
23 | <details>
24 | <summary>distil_bert_server_pytorch.py</summary>
25 | 
26 | ```{include} ../../../examples/distil_bert_server_pytorch.py
27 | :code: python
28 | ```
29 | 
30 | </details>
31 | 
32 | #### Client
33 | 
34 | ```shell
35 | echo 'i bought this product for many times, highly recommend' | http POST :8000/inference
36 | ```
37 | 
38 | ## Computer Vision
39 | 
40 | Computer vision model servers usually receive images or links to the images (downloading from the link becomes an I/O workload then), feed the preprocessed image data into the model and extract information like categories, bounding boxes and pixel labels as results.
41 | 
42 | ### Image Recognition
43 | 
44 | This server receives an image and classify it according to the [ImageNet](https://www.image-net.org/) categorization. We specifically use [ResNet](https://arxiv.org/abs/1512.03385) as an image classifier and build a model service based on it. Nevertheless, this file serves as the starter code for any kind of image recognition model server.
45 | 
46 | We enable multiprocessing for `Preprocess` stage, so that it can produce enough tasks for `Inference` stage to do **batch inference**, which better exploits the GPU computing power. More interestingly, we also started multiple model by setting the number of worker for `Inference` stage to 2. This is because a single model hardly fully occupy the GPU memory or utilization. Multiple models running on the same device in parallel can further increase our service throughput.
47 | 
48 | When instantiating the `Server`, we enable `plasma_shm`, which utilizes the [`pyarrow.plasma`](https://arrow.apache.org/docs/11.0/python/plasma.html) as a shared memory data store for IPC. This could benefit the data transfer, especially when the data is large (preprocessed image data in this case). Note that you need to use `pip install -U pyarrow==11` to install necessary dependencies.
49 | 
50 | We also demonstrate how to customized **validation** on the data content through this example. In the `forward` method of the `Preprocess` worker, we firstly check the key of the input, then try to decode the str and load it into array. If any of these steps fails, we raise the `ValidationError`. The status will be finally returned to our clients as [HTTP 422](https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/422).
51 | 
52 | #### Server
53 | 
54 | ```shell
55 | python examples/resnet50_msgpack/server.py
56 | ```
57 | 
58 | <details>
59 | <summary>resnet50_server_msgpack.py</summary>
60 | 
61 | ```{include} ../../../examples/resnet50_msgpack/server.py
62 | :code: python
63 | ```
64 | 
65 | </details>
66 | 
67 | #### Client
68 | 
69 | ```shell
70 | python examples/resnet50_msgpack/client.py
71 | ```
72 | 
73 | <details>
74 | <summary>resnet50_client_msgpack.py</summary>
75 | 
76 | ```{include} ../../../examples/resnet50_msgpack/client.py
77 | :code: python
78 | ```
79 | 
80 | </details>
81 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
  1 | # https://peps.python.org/pep-0621/
  2 | [project]
  3 | name = "mosec"
  4 | description = "Model Serving made Efficient in the Cloud"
  5 | readme = "README.md"
  6 | authors = [
  7 |   { name = "Keming", email = "kemingy94@gmail.com" },
  8 |   { name = "Zichen", email = "lkevinzc@gmail.com" },
  9 | ]
 10 | license = { text = "Apache-2.0" }
 11 | keywords = ["machine learning", "deep learning", "model serving"]
 12 | dynamic = ["version"]
 13 | requires-python = ">=3.10"
 14 | dependencies = []
 15 | classifiers = [
 16 |   "Environment :: GPU",
 17 |   "Intended Audience :: Developers",
 18 |   "Intended Audience :: Science/Research",
 19 |   "License :: OSI Approved :: Apache Software License",
 20 |   "Programming Language :: Python :: 3 :: Only",
 21 |   "Programming Language :: Python :: 3.10",
 22 |   "Programming Language :: Python :: 3.11",
 23 |   "Programming Language :: Python :: 3.12",
 24 |   "Programming Language :: Python :: 3.13",
 25 |   "Programming Language :: Python :: 3.14",
 26 |   "Programming Language :: Python :: Implementation :: CPython",
 27 |   "Programming Language :: Python :: Implementation :: PyPy",
 28 |   "Programming Language :: Rust",
 29 |   "Topic :: Scientific/Engineering :: Artificial Intelligence",
 30 |   "Topic :: Software Development :: Libraries :: Python Modules",
 31 |   "Topic :: Software Development :: Build Tools",
 32 | ]
 33 | 
 34 | [project.urls]
 35 | homepage = "https://mosecorg.github.io/"
 36 | documentation = "https://mosecorg.github.io/mosec/"
 37 | repository = "https://github.com/mosecorg/mosec"
 38 | changelog = "https://github.com/mosecorg/mosec/releases"
 39 | [project.scripts]
 40 | [project.optional-dependencies]
 41 | validation = ["msgspec>=0.20"]
 42 | redis = ["redis>=5.2.1"]
 43 | msgpack = ["msgpack>=1.1.0"]
 44 | numbin = ["numbin>=0.5.0"]
 45 | 
 46 | [build-system]
 47 | requires = ["maturin>=1.8,<2.0"]
 48 | build-backend = "maturin"
 49 | 
 50 | [tool.maturin]
 51 | bindings = "bin"
 52 | python-packages = ["mosec"]
 53 | strip = true
 54 | exclude = ["tests/**/*", "docs/**/*", "examples/**/*"]
 55 | 
 56 | [tool.uv]
 57 | cache-keys = [
 58 |   { file = "pyproject.toml" },
 59 |   { file = "Cargo.toml" },
 60 |   { file = "src/**/*.rs" },
 61 | ]
 62 | 
 63 | [tool.mypy]
 64 | python_version = "3.10"
 65 | warn_redundant_casts = true
 66 | warn_unreachable = true
 67 | pretty = true
 68 | 
 69 | [[tool.mypy.overrides]]
 70 | module = [
 71 |   "torch.*",
 72 |   "transformers",
 73 |   "sentence_transformers",
 74 |   "llmspec",
 75 |   "openai",
 76 | ]
 77 | ignore_missing_imports = true
 78 | 
 79 | [tool.pyright]
 80 | venvPath = "."
 81 | venv = ".venv"
 82 | pythonPlatform = "Linux"
 83 | pythonVersion = "3.10"
 84 | include = ["mosec", "tests", "examples"]
 85 | reportMissingImports = "warning"
 86 | 
 87 | [tool.pytest.ini_options]
 88 | markers = ["shm: mark a test is related to shared memory"]
 89 | 
 90 | [tool.ruff.lint]
 91 | select = ["E", "F", "G", "B", "I", "SIM", "TID", "PL", "RUF", "D"]
 92 | ignore = ["E501", "D203", "D213", "PLC0415"]
 93 | [tool.ruff.lint.isort]
 94 | known-first-party = ["mosec"]
 95 | [tool.ruff.lint.pylint]
 96 | max-args = 10
 97 | [tool.ruff.lint.per-file-ignores]
 98 | "tests/*" = ["D"]
 99 | "examples/*" = ["D"]
100 | [tool.ruff.lint.pydocstyle]
101 | convention = "google"
102 | 
103 | [tool.typos]
104 | [tool.typos.default.extend-words]
105 | typ = "typ"
106 | 
107 | [dependency-groups]
108 | doc = [
109 |   "furo>=2022.12.7",
110 |   "myst-parser>=0.18",
111 |   "sphinx>=7.4.7",
112 |   "sphinx-autodoc-typehints>=1.22",
113 |   "sphinx-copybutton>=0.5",
114 |   "sphinx-sitemap>=2.6.0",
115 |   "sphinxcontrib-napoleon>=0.7",
116 |   "sphinxcontrib-programoutput>=0.17",
117 |   "sphinxext-opengraph>=0.8",
118 | ]
119 | dev = [
120 |   "httpx-sse==0.4.3",
121 |   "httpx[http2]==0.28.1",
122 |   "maturin>=1.8,<2.0",
123 |   "mypy~=1.15",
124 |   "prek>=0.1.2",
125 |   "pyright~=1.1",
126 |   "pytest>=8",
127 |   "pytest-mock>=3.5",
128 |   "ruff>=0.11.11",
129 |   "zstandard~=0.23",
130 | ]
131 | mixin = [
132 |   "msgpack>=1.1.0",
133 |   "msgspec>=0.20",
134 |   "numbin>=0.5.0",
135 |   "numpy<2 ; python_full_version < '3.12'",
136 |   "numpy>=2 ; python_full_version >= '3.12'",
137 |   "pyarrow>=0.6.1,<12 ; python_full_version < '3.12'", # pyarrow legacy dependency
138 |   "redis>=4.0.0",
139 | ]
140 | 


--------------------------------------------------------------------------------
/examples/resnet50_msgpack/server.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2022 MOSEC Authors
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #      http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | """Example: Sample Resnet server."""
 15 | 
 16 | from io import BytesIO
 17 | from typing import List
 18 | from urllib.request import urlretrieve
 19 | 
 20 | import numpy as np  # type: ignore
 21 | import torch  # type: ignore
 22 | import torchvision  # type: ignore
 23 | from PIL import Image  # type: ignore
 24 | from torchvision import transforms  # type: ignore
 25 | 
 26 | from mosec import Server, ValidationError, Worker, get_logger
 27 | from mosec.mixin import MsgpackMixin
 28 | 
 29 | logger = get_logger()
 30 | 
 31 | INFERENCE_BATCH_SIZE = 16
 32 | 
 33 | 
 34 | class Preprocess(MsgpackMixin, Worker):
 35 |     """Sample Preprocess worker"""
 36 | 
 37 |     def __init__(self) -> None:
 38 |         super().__init__()
 39 |         trans = torch.nn.Sequential(
 40 |             transforms.Resize((256, 256)),
 41 |             transforms.CenterCrop(224),
 42 |             transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
 43 |         )
 44 |         self.transform = torch.jit.script(trans)  # type: ignore
 45 | 
 46 |     def forward(self, data: dict):
 47 |         # Customized validation for input key and field content; raise
 48 |         # ValidationError so that the client can get 422 as http status
 49 |         try:
 50 |             image = Image.open(BytesIO(data["image"]))
 51 |         except KeyError as err:
 52 |             raise ValidationError(f"cannot find key {err}") from err
 53 |         except Exception as err:
 54 |             raise ValidationError(f"cannot decode as image data: {err}") from err
 55 | 
 56 |         tensor = transforms.ToTensor()(image)
 57 |         data = self.transform(tensor)  # type: ignore
 58 |         return data
 59 | 
 60 | 
 61 | class Inference(Worker):
 62 |     """Sample Inference worker"""
 63 | 
 64 |     def __init__(self):
 65 |         super().__init__()
 66 |         self.device = (
 67 |             torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
 68 |         )
 69 |         logger.info("using computing device: %s", self.device)
 70 |         self.model = torchvision.models.resnet50(pretrained=True)
 71 |         self.model.eval()
 72 |         self.model.to(self.device)
 73 | 
 74 |         # Overwrite self.example for warmup
 75 |         self.example = [
 76 |             np.zeros((3, 244, 244), dtype=np.float32)
 77 |         ] * INFERENCE_BATCH_SIZE
 78 | 
 79 |     def forward(self, data: List[np.ndarray]) -> List[int]:
 80 |         logger.info("processing batch with size: %d", len(data))
 81 |         with torch.no_grad():
 82 |             batch = torch.stack([torch.tensor(arr, device=self.device) for arr in data])
 83 |             output = self.model(batch)
 84 |             top1 = torch.argmax(output, dim=1)
 85 |         return top1.cpu().tolist()
 86 | 
 87 | 
 88 | class Postprocess(MsgpackMixin, Worker):
 89 |     """Sample Postprocess worker"""
 90 | 
 91 |     def __init__(self):
 92 |         super().__init__()
 93 |         logger.info("loading categories file...")
 94 |         local_filename, _ = urlretrieve(
 95 |             "https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt"
 96 |         )
 97 | 
 98 |         with open(local_filename, encoding="utf8") as file:
 99 |             self.categories = list(map(lambda x: x.strip(), file.readlines()))
100 | 
101 |     def forward(self, data: int) -> dict:
102 |         return {"category": self.categories[data]}
103 | 
104 | 
105 | if __name__ == "__main__":
106 |     server = Server()
107 |     server.append_worker(Preprocess, num=4)
108 |     server.append_worker(Inference, num=2, max_batch_size=INFERENCE_BATCH_SIZE)
109 |     server.append_worker(Postprocess, num=1)
110 |     server.run()
111 | 


--------------------------------------------------------------------------------
/mosec/errors.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2022 MOSEC Authors
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #      http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | """Exceptions used in the Worker.
 16 | 
 17 | Suppose the input dataflow of our model server is as follows:
 18 | 
 19 | **bytes** ``->`` *deserialize* ``->`` **data** ``->`` *parse* ``->`` **valid data**
 20 | 
 21 | If the raw bytes cannot be successfully deserialized, the `DecodingError`
 22 | is raised; if the decoded data cannot pass the validation check (usually
 23 | implemented by users), the `ValidationError` should be raised.
 24 | """
 25 | 
 26 | from mosec.protocol import HTTPStatusCode
 27 | 
 28 | 
 29 | class MosecError(Exception):
 30 |     """Mosec basic exception."""
 31 | 
 32 |     code: HTTPStatusCode = HTTPStatusCode.INTERNAL_ERROR
 33 |     msg: str = "mosec error"
 34 | 
 35 | 
 36 | class ClientError(MosecError):
 37 |     """Client side error.
 38 | 
 39 |     This error indicates that the server cannot or will not process the request
 40 |     due to something that is perceived to be a client error. It will return the
 41 |     details to the client side with
 42 |     `HTTP 400 <https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/400>`__.
 43 |     """
 44 | 
 45 |     code = HTTPStatusCode.BAD_REQUEST
 46 |     msg = "bad request"
 47 | 
 48 | 
 49 | class ServerError(MosecError):
 50 |     """Server side error.
 51 | 
 52 |     This error indicates that the server encountered an unexpected condition
 53 |     that prevented it from fulfilling the request. It will return the details
 54 |     to the client side with
 55 |     `HTTP 500 <https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/500>`__.
 56 | 
 57 |     Attention: be careful about the returned message since it may contain some
 58 |     sensitive information. If you don't want to return the details, just raise
 59 |     an exception that is not inherited from `mosec.errors.MosecError`.
 60 |     """
 61 | 
 62 |     code = HTTPStatusCode.INTERNAL_ERROR
 63 |     msg = "internal error"
 64 | 
 65 | 
 66 | class EncodingError(ServerError):
 67 |     """Serialization error.
 68 | 
 69 |     The `EncodingError` should be raised in user-implemented codes when
 70 |     the serialization for the response bytes fails. This error will set
 71 |     to status code to
 72 |     `HTTP 500 <https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/500>`__
 73 |     and show the details in the response.
 74 |     """
 75 | 
 76 |     msg = "encoding error"
 77 | 
 78 | 
 79 | class DecodingError(ClientError):
 80 |     """De-serialization error.
 81 | 
 82 |     The `DecodingError` should be raised in user-implemented codes
 83 |     when the de-serialization for the request bytes fails. This error
 84 |     will set the status code to
 85 |     `HTTP 400 <https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/400>`__
 86 |     in the response.
 87 |     """
 88 | 
 89 |     msg = "decoding error"
 90 | 
 91 | 
 92 | class ValidationError(MosecError):
 93 |     """Request data validation error.
 94 | 
 95 |     The `ValidationError` should be raised in user-implemented codes,
 96 |     where the validation for the input data fails. Usually, it should be
 97 |     put after the data de-serialization, which converts the raw bytes
 98 |     into structured data. This error will set the status code to
 99 |     `HTTP 422 <https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/422>`__
100 |     in the response.
101 |     """
102 | 
103 |     code = HTTPStatusCode.VALIDATION_ERROR
104 |     msg = "request validation error"
105 | 
106 | 
107 | class MosecTimeoutError(BaseException):
108 |     """Exception raised when a MOSEC worker operation times out.
109 | 
110 |     If a bug in the forward code causes the worker to hang indefinitely, a timeout
111 |     can be used to ensure that the worker eventually returns control to the main
112 |     thread program. When a timeout occurs, the `MosecTimeout` exception is raised.
113 |     This exception can be caught and handled appropriately to perform any necessary
114 |     cleanup tasks or return a response indicating that the operation timed out.
115 | 
116 |     Note that `MosecTimeout` is a subclass of `BaseException`, not `Exception`.
117 |     This is because timeouts should not be caught and handled in the same way as
118 |     other exceptions. Instead, they should be handled in a separate `except` block
119 |     which isn't designed to break the working loop.
120 |     """
121 | 
122 |     code = HTTPStatusCode.TIMEOUT_ERROR
123 |     msg = "mosec timeout error"
124 | 


--------------------------------------------------------------------------------
/examples/embedding/server.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2023 MOSEC Authors
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | """OpenAI compatible embedding server."""
 16 | 
 17 | import base64
 18 | import os
 19 | from typing import List, Union
 20 | 
 21 | import numpy as np
 22 | import torch  # type: ignore
 23 | import torch.nn.functional as F  # type: ignore
 24 | import transformers  # type: ignore
 25 | from llmspec import EmbeddingData, EmbeddingRequest, EmbeddingResponse, TokenUsage
 26 | 
 27 | from mosec import ClientError, Runtime, Server, Worker
 28 | 
 29 | DEFAULT_MODEL = "thenlper/gte-base"
 30 | 
 31 | 
 32 | class Embedding(Worker):
 33 |     def __init__(self):
 34 |         self.model_name = os.getenv("EMB_MODEL", DEFAULT_MODEL)
 35 |         self.tokenizer = transformers.AutoTokenizer.from_pretrained(self.model_name)
 36 |         self.model = transformers.AutoModel.from_pretrained(self.model_name)
 37 |         self.device = (
 38 |             torch.cuda.current_device() if torch.cuda.is_available() else "cpu"
 39 |         )
 40 | 
 41 |         self.model = self.model.to(self.device)
 42 |         self.model.eval()
 43 | 
 44 |     def get_embedding_with_token_count(
 45 |         self, sentences: Union[str, List[Union[str, List[int]]]]
 46 |     ):
 47 |         # Mean Pooling - Take attention mask into account for correct averaging
 48 |         def mean_pooling(model_output, attention_mask):
 49 |             # First element of model_output contains all token embeddings
 50 |             token_embeddings = model_output[0]
 51 |             input_mask_expanded = (
 52 |                 attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
 53 |             )
 54 |             return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(
 55 |                 input_mask_expanded.sum(1), min=1e-9
 56 |             )
 57 | 
 58 |         # Tokenize sentences
 59 |         # TODO: support `List[List[int]]` input
 60 |         encoded_input = self.tokenizer(
 61 |             sentences, padding=True, truncation=True, return_tensors="pt"
 62 |         )
 63 |         inputs = encoded_input.to(self.device)
 64 |         token_count = inputs["attention_mask"].sum(dim=1).tolist()[0]
 65 |         # Compute token embeddings
 66 |         model_output = self.model(**inputs)
 67 |         # Perform pooling
 68 |         sentence_embeddings = mean_pooling(model_output, inputs["attention_mask"])
 69 |         # Normalize embeddings
 70 |         sentence_embeddings = F.normalize(sentence_embeddings, p=2, dim=1)
 71 | 
 72 |         return token_count, sentence_embeddings
 73 | 
 74 |     def deserialize(self, data: bytes) -> EmbeddingRequest:
 75 |         return EmbeddingRequest.from_bytes(data)
 76 | 
 77 |     def serialize(self, data: EmbeddingResponse) -> bytes:
 78 |         return data.to_json()
 79 | 
 80 |     def forward(self, data: EmbeddingRequest) -> EmbeddingResponse:
 81 |         if data.model != self.model_name:
 82 |             raise ClientError(
 83 |                 f"the requested model {data.model} is not supported by "
 84 |                 f"this worker {self.model_name}"
 85 |             )
 86 |         token_count, embeddings = self.get_embedding_with_token_count(data.input)
 87 |         embeddings = embeddings.detach()
 88 |         if self.device != "cpu":
 89 |             embeddings = embeddings.cpu()
 90 |         embeddings = embeddings.numpy()
 91 |         if data.encoding_format == "base64":
 92 |             embeddings = [
 93 |                 base64.b64encode(emb.astype(np.float32).tobytes()).decode("utf-8")
 94 |                 for emb in embeddings
 95 |             ]
 96 |         else:
 97 |             embeddings = [emb.tolist() for emb in embeddings]
 98 | 
 99 |         resp = EmbeddingResponse(
100 |             data=[
101 |                 EmbeddingData(embedding=emb, index=i)
102 |                 for i, emb in enumerate(embeddings)
103 |             ],
104 |             model=self.model_name,
105 |             usage=TokenUsage(
106 |                 prompt_tokens=token_count,
107 |                 # No completions performed, only embeddings generated.
108 |                 completion_tokens=0,
109 |                 total_tokens=token_count,
110 |             ),
111 |         )
112 |         return resp
113 | 
114 | 
115 | if __name__ == "__main__":
116 |     server = Server()
117 |     emb = Runtime(Embedding)
118 |     server.register_runtime(
119 |         {
120 |             "/v1/embeddings": [emb],
121 |             "/embeddings": [emb],
122 |         }
123 |     )
124 |     server.run()
125 | 


--------------------------------------------------------------------------------
/src/metrics.rs:
--------------------------------------------------------------------------------
  1 | // Copyright 2022 MOSEC Authors
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //      http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | // See the License for the specific language governing permissions and
 13 | // limitations under the License.
 14 | 
 15 | use std::sync::OnceLock;
 16 | 
 17 | use prometheus_client::encoding::EncodeLabelSet;
 18 | use prometheus_client::metrics::counter::Counter;
 19 | use prometheus_client::metrics::family::{Family, MetricConstructor};
 20 | use prometheus_client::metrics::gauge::Gauge;
 21 | use prometheus_client::metrics::histogram::{Histogram, exponential_buckets};
 22 | use prometheus_client::registry::Registry;
 23 | 
 24 | #[derive(Clone, Debug, Hash, PartialEq, Eq, EncodeLabelSet)]
 25 | pub struct CodeLabel {
 26 |     pub code: u16,
 27 |     pub endpoint: String,
 28 | }
 29 | 
 30 | #[derive(Clone, Debug, Hash, PartialEq, Eq, EncodeLabelSet)]
 31 | pub struct StageConnectionLabel {
 32 |     pub stage: String,
 33 |     pub connection: String,
 34 | }
 35 | 
 36 | #[derive(Debug)]
 37 | pub(crate) struct Metrics {
 38 |     pub(crate) throughput: Family<CodeLabel, Counter>,
 39 |     pub(crate) duration: Family<StageConnectionLabel, Histogram, CustomHistogramBuilder>,
 40 |     pub(crate) batch_size: Family<StageConnectionLabel, Histogram>,
 41 |     pub(crate) batch_duration: Family<StageConnectionLabel, Histogram>,
 42 |     pub(crate) remaining_task: Gauge,
 43 | }
 44 | 
 45 | #[derive(Clone)]
 46 | pub(crate) struct CustomHistogramBuilder {
 47 |     length: u16,
 48 | }
 49 | 
 50 | impl MetricConstructor<Histogram> for CustomHistogramBuilder {
 51 |     fn new_metric(&self) -> Histogram {
 52 |         // When a new histogram is created, this function will be called.
 53 |         Histogram::new(exponential_buckets(1e-3f64, 2f64, self.length))
 54 |     }
 55 | }
 56 | 
 57 | impl Metrics {
 58 |     pub(crate) fn global() -> &'static Metrics {
 59 |         METRICS.get().expect("Metrics is not initialized")
 60 |     }
 61 | 
 62 |     pub(crate) fn new(timeout: u64) -> Self {
 63 |         let builder = CustomHistogramBuilder {
 64 |             length: (timeout as f64).log2().ceil() as u16 + 1,
 65 |         };
 66 |         Self {
 67 |             throughput: Family::<CodeLabel, Counter>::default(),
 68 |             duration:
 69 |                 Family::<StageConnectionLabel, Histogram, CustomHistogramBuilder>::new_with_constructor(
 70 |                     builder,
 71 |                 ), // 1ms ~ 4.096s (default)
 72 |             batch_size: Family::<StageConnectionLabel, Histogram>::new_with_constructor(|| {
 73 |                 Histogram::new(exponential_buckets(1f64, 2f64, 10)) // 1 ~ 512
 74 |             }),
 75 |             batch_duration: Family::<StageConnectionLabel, Histogram>::new_with_constructor(|| {
 76 |                 Histogram::new(exponential_buckets(1e-3f64, 2f64, 13)) // 1ms ~ 4.096s
 77 |             }),
 78 |             remaining_task: Gauge::default(),
 79 |         }
 80 |     }
 81 | 
 82 |     pub(crate) fn init_with_namespace(namespace: &str, timeout: u64) -> Self {
 83 |         DURATION_LABEL
 84 |             .set(StageConnectionLabel {
 85 |                 stage: "total".to_string(),
 86 |                 connection: "total".to_string(),
 87 |             })
 88 |             .unwrap();
 89 |         let mut registry = <Registry>::default();
 90 |         let metrics = Metrics::new(timeout);
 91 |         registry.register(
 92 |             format!("{namespace}_throughput"),
 93 |             "service inference endpoint throughput",
 94 |             metrics.throughput.clone(),
 95 |         );
 96 |         registry.register(
 97 |             format!("{namespace}_process_duration_second"),
 98 |             "process duration for each connection in each stage",
 99 |             metrics.duration.clone(),
100 |         );
101 |         registry.register(
102 |             format!("{namespace}_batch_size"),
103 |             "batch size for each connection in each stage",
104 |             metrics.batch_size.clone(),
105 |         );
106 |         registry.register(
107 |             format!("{namespace}_batch_duration_second"),
108 |             "dynamic batching duration for each connection in each stage",
109 |             metrics.batch_duration.clone(),
110 |         );
111 |         registry.register(
112 |             format!("{namespace}_remaining_task"),
113 |             "remaining tasks for the whole service",
114 |             metrics.remaining_task.clone(),
115 |         );
116 |         REGISTRY.set(registry).unwrap();
117 |         metrics
118 |     }
119 | }
120 | 
121 | pub(crate) static METRICS: OnceLock<Metrics> = OnceLock::new();
122 | pub(crate) static REGISTRY: OnceLock<Registry> = OnceLock::new();
123 | pub(crate) static DURATION_LABEL: OnceLock<StageConnectionLabel> = OnceLock::new();
124 | 


--------------------------------------------------------------------------------
/src/main.rs:
--------------------------------------------------------------------------------
  1 | // Copyright 2022 MOSEC Authors
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //      http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | // See the License for the specific language governing permissions and
 13 | // limitations under the License.
 14 | 
 15 | #![forbid(unsafe_code)]
 16 | 
 17 | mod apidoc;
 18 | mod config;
 19 | mod errors;
 20 | mod layouts;
 21 | mod metrics;
 22 | mod protocol;
 23 | mod routes;
 24 | mod tasks;
 25 | 
 26 | use std::env;
 27 | use std::fs::read_to_string;
 28 | use std::net::SocketAddr;
 29 | 
 30 | use axum::Router;
 31 | use axum::routing::{get, post};
 32 | use log::{debug, info};
 33 | use logforth::append;
 34 | use logforth::record::{Level, LevelFilter};
 35 | use tokio::signal::unix::{SignalKind, signal};
 36 | use tower::ServiceBuilder;
 37 | use tower_http::compression::CompressionLayer;
 38 | use tower_http::decompression::RequestDecompressionLayer;
 39 | use utoipa::OpenApi;
 40 | use utoipa_swagger_ui::SwaggerUi;
 41 | 
 42 | use crate::apidoc::MosecOpenAPI;
 43 | use crate::config::Config;
 44 | use crate::layouts::{ColoredLayout, JsonLayout};
 45 | use crate::metrics::{METRICS, Metrics};
 46 | use crate::routes::{RustAPIDoc, index, inference, metrics, sse_inference};
 47 | use crate::tasks::{TASK_MANAGER, TaskManager};
 48 | 
 49 | async fn shutdown_signal() {
 50 |     let mut interrupt = signal(SignalKind::interrupt()).unwrap();
 51 |     let mut terminate = signal(SignalKind::terminate()).unwrap();
 52 |     loop {
 53 |         tokio::select! {
 54 |             _ = interrupt.recv() => {
 55 |                 info!("service received interrupt signal, will ignore it here \
 56 |                     since it should be controlled by the main process (send SIGTERM \
 57 |                     to `mosec` if you really want to kill it manually)");
 58 |             },
 59 |             _ = terminate.recv() => {
 60 |                 info!("service received terminate signal");
 61 |                 let task_manager = TaskManager::global();
 62 |                 task_manager.shutdown().await;
 63 |                 info!("service shutdown complete");
 64 |                 break;
 65 |             },
 66 |         }
 67 |     }
 68 | }
 69 | 
 70 | #[tokio::main]
 71 | async fn run(conf: &Config) {
 72 |     let mut doc = MosecOpenAPI {
 73 |         api: RustAPIDoc::openapi(),
 74 |     };
 75 |     for route in &conf.routes {
 76 |         doc.merge_route(route);
 77 |     }
 78 |     doc.clean();
 79 | 
 80 |     let metrics_instance = Metrics::init_with_namespace(&conf.namespace, conf.timeout);
 81 |     METRICS.set(metrics_instance).unwrap();
 82 |     let mut task_manager = TaskManager::new(conf.timeout);
 83 |     let barrier = task_manager.init_from_config(conf);
 84 |     TASK_MANAGER.set(task_manager).unwrap();
 85 | 
 86 |     let mut router = Router::new()
 87 |         .merge(SwaggerUi::new("/openapi/swagger").url("/openapi/metadata.json", doc.api))
 88 |         .route("/", get(index))
 89 |         .route("/metrics", get(metrics));
 90 | 
 91 |     for route in &conf.routes {
 92 |         if route.is_sse {
 93 |             router = router.route(&route.endpoint, post(sse_inference));
 94 |         } else {
 95 |             router = router.route(&route.endpoint, post(inference));
 96 |         }
 97 |     }
 98 | 
 99 |     if conf.compression {
100 |         router = router.layer(
101 |             ServiceBuilder::new()
102 |                 .layer(RequestDecompressionLayer::new())
103 |                 .layer(CompressionLayer::new()),
104 |         );
105 |     }
106 | 
107 |     // wait until each stage has at least one worker alive
108 |     barrier.wait().await;
109 |     let addr: SocketAddr = format!("{}:{}", conf.address, conf.port).parse().unwrap();
110 |     let listener = tokio::net::TcpListener::bind(addr).await.unwrap();
111 |     info!(addr:?; "http service is running");
112 |     axum::serve(listener, router)
113 |         .with_graceful_shutdown(shutdown_signal())
114 |         .await
115 |         .unwrap();
116 | }
117 | 
118 | fn main() {
119 |     // let opts: Opts = argh::from_env();
120 |     let cmd_args: Vec<String> = env::args().collect();
121 |     if cmd_args.len() != 2 {
122 |         println!("expect one argument as the config path but got {cmd_args:?}");
123 |         return;
124 |     }
125 |     let config_str = read_to_string(&cmd_args[1]).expect("read config file failure");
126 |     let conf: Config = serde_json::from_str(&config_str).expect("parse config failure");
127 | 
128 |     if conf.log_level == "debug" {
129 |         // use colorful log for debug
130 |         logforth::starter_log::builder()
131 |             .dispatch(|d| {
132 |                 d.filter(LevelFilter::MoreSevereEqual(Level::Debug))
133 |                     .append(append::Stderr::default().with_layout(ColoredLayout))
134 |             })
135 |             .apply();
136 |     } else {
137 |         // use JSON format for production
138 |         let level_filter =
139 |             LevelFilter::MoreSevereEqual(match conf.log_level.to_ascii_lowercase().as_str() {
140 |                 "error" => Level::Error,
141 |                 "warning" => Level::Warn,
142 |                 _ => Level::Info,
143 |             });
144 |         logforth::starter_log::builder()
145 |             .dispatch(|d| {
146 |                 d.filter(level_filter)
147 |                     .append(append::Stderr::default().with_layout(JsonLayout))
148 |             })
149 |             .apply();
150 |     }
151 | 
152 |     debug!(conf:?; "parse service arguments");
153 |     run(&conf);
154 | }
155 | 


--------------------------------------------------------------------------------
/.github/workflows/package.yml:
--------------------------------------------------------------------------------
  1 | # This file added mautrin autogenerated ci file by maturin v1.8.1
  2 | # DO NOT OVERWRITE THIS FILE by `maturin generate-ci github` directly
  3 | 
  4 | name: PyPI Publish
  5 | 
  6 | on:
  7 |   release:
  8 |     types: [created]
  9 |   workflow_dispatch:
 10 | 
 11 | concurrency:
 12 |   group: ${{ github.ref }}-${{ github.workflow }}
 13 |   cancel-in-progress: true
 14 | 
 15 | permissions:
 16 |   contents: read
 17 | 
 18 | jobs:
 19 |   linux:
 20 |     runs-on: ${{ matrix.platform.runner }}
 21 |     strategy:
 22 |       matrix:
 23 |         platform:
 24 |           - runner: ubuntu-22.04
 25 |             target: x86_64
 26 |           - runner: ubuntu-22.04
 27 |             target: x86
 28 |           - runner: ubuntu-22.04
 29 |             target: aarch64
 30 |           - runner: ubuntu-22.04
 31 |             target: armv7
 32 |           - runner: ubuntu-22.04
 33 |             target: s390x
 34 |           - runner: ubuntu-22.04
 35 |             target: ppc64le
 36 |     steps:
 37 |       - uses: actions/checkout@v6
 38 |       - name: Build wheels
 39 |         uses: PyO3/maturin-action@v1
 40 |         with:
 41 |           target: ${{ matrix.platform.target }}
 42 |           args: --release --out dist
 43 |           sccache: 'true'
 44 |           manylinux: auto
 45 |       - name: Upload wheels
 46 |         uses: actions/upload-artifact@v5
 47 |         with:
 48 |           name: wheels-linux-${{ matrix.platform.target }}
 49 |           path: dist
 50 | 
 51 |   musllinux:
 52 |     runs-on: ${{ matrix.platform.runner }}
 53 |     strategy:
 54 |       matrix:
 55 |         platform:
 56 |           - runner: ubuntu-22.04
 57 |             target: x86_64
 58 |           - runner: ubuntu-22.04
 59 |             target: x86
 60 |           - runner: ubuntu-22.04
 61 |             target: aarch64
 62 |           - runner: ubuntu-22.04
 63 |             target: armv7
 64 |     steps:
 65 |       - uses: actions/checkout@v6
 66 |       - name: Build wheels
 67 |         uses: PyO3/maturin-action@v1
 68 |         with:
 69 |           target: ${{ matrix.platform.target }}
 70 |           args: --release --out dist
 71 |           sccache: 'true'
 72 |           manylinux: musllinux_1_2
 73 |       - name: Upload wheels
 74 |         uses: actions/upload-artifact@v5
 75 |         with:
 76 |           name: wheels-musllinux-${{ matrix.platform.target }}
 77 |           path: dist
 78 | 
 79 |   macos:
 80 |     runs-on: ${{ matrix.platform.runner }}
 81 |     strategy:
 82 |       matrix:
 83 |         platform:
 84 |           - runner: macos-15-intel
 85 |             target: x86_64
 86 |           - runner: macos-14
 87 |             target: aarch64
 88 |     steps:
 89 |       - uses: actions/checkout@v6
 90 |       - name: Build wheels
 91 |         uses: PyO3/maturin-action@v1
 92 |         with:
 93 |           target: ${{ matrix.platform.target }}
 94 |           args: --release --out dist
 95 |           sccache: 'true'
 96 |       - name: Upload wheels
 97 |         uses: actions/upload-artifact@v5
 98 |         with:
 99 |           name: wheels-macos-${{ matrix.platform.target }}
100 |           path: dist
101 | 
102 |   sdist:
103 |     runs-on: ubuntu-latest
104 |     steps:
105 |       - uses: actions/checkout@v6
106 |       - name: Build sdist
107 |         uses: PyO3/maturin-action@v1
108 |         with:
109 |           command: sdist
110 |           args: --out dist
111 |       - name: Upload sdist
112 |         uses: actions/upload-artifact@v5
113 |         with:
114 |           name: wheels-sdist
115 |           path: dist
116 | 
117 |   release:
118 |     name: Release
119 |     runs-on: ubuntu-latest
120 |     if: ${{ startsWith(github.ref, 'refs/tags/') || github.event_name == 'workflow_dispatch' }}
121 |     needs: [linux, musllinux, macos, sdist]
122 |     permissions:
123 |       # Use to sign the release artifacts
124 |       id-token: write
125 |       # Used to upload release artifacts
126 |       contents: write
127 |       # Used to generate artifact attestation
128 |       attestations: write
129 |     steps:
130 |       - uses: actions/download-artifact@v6
131 |       - name: Generate artifact attestation
132 |         uses: actions/attest-build-provenance@v3
133 |         with:
134 |           subject-path: 'wheels-*/*'
135 |       - name: Publish to PyPI
136 |         if: ${{ startsWith(github.ref, 'refs/tags/') }}
137 |         uses: PyO3/maturin-action@v1
138 |         with:
139 |           command: upload
140 |           args: --non-interactive --skip-existing wheels-*/*
141 | 
142 |   image:
143 |     name: "Build Docker Image"
144 |     runs-on: ubuntu-latest
145 |     needs: [release]
146 |     permissions:
147 |       contents: read
148 |       # for GitHub Container Registry
149 |       packages: write
150 |     steps:
151 |       - uses: actions/checkout@v6
152 |       - name: Docker meta
153 |         id: meta
154 |         uses: docker/metadata-action@v5
155 |         with:
156 |           images: |
157 |             ${{ github.repository_owner }}/mosec
158 |             ghcr.io/${{ github.repository_owner }}/mosec
159 |       - name: Docker Setup QEMU
160 |         uses: docker/setup-qemu-action@v3
161 |       - name: Set up Docker Buildx
162 |         uses: docker/setup-buildx-action@v3
163 |       - name: Login to Docker Hub
164 |         uses: docker/login-action@v3
165 |         with:
166 |           username: ${{ secrets.DOCKER_USERNAME }}
167 |           password: ${{ secrets.DOCKER_TOKEN }}
168 |       - name: Login to ghcr.io
169 |         uses: docker/login-action@v3
170 |         with:
171 |           registry: ghcr.io
172 |           username: ${{ github.actor }}
173 |           password: ${{ secrets.GITHUB_TOKEN }}
174 |       - name: Build and push image
175 |         uses: docker/build-push-action@v6
176 |         with:
177 |           push: true
178 |           tags: ${{ steps.meta.outputs.tags }}
179 |           labels: ${{ steps.meta.outputs.labels }}
180 |           file: Dockerfile
181 |           platforms: linux/amd64,linux/arm64
182 |           cache-from: type=gha
183 |           cache-to: type=gha,mode=max
184 | 


--------------------------------------------------------------------------------
/mosec/protocol.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2022 MOSEC Authors
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #      http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | """Server-Worker communication protocol."""
 16 | 
 17 | import logging
 18 | import socket
 19 | import struct
 20 | import warnings
 21 | from enum import IntFlag
 22 | from io import BytesIO
 23 | from typing import Sequence, Tuple
 24 | 
 25 | from mosec.log import get_internal_logger
 26 | 
 27 | logger = get_internal_logger()
 28 | 
 29 | IPC_LARGE_DATA_SIZE = 1024 * 1024  # set as 1 MB
 30 | 
 31 | 
 32 | class HTTPStatusCode(IntFlag):
 33 |     """HTTP status code flag."""
 34 | 
 35 |     OK = 1  # 200
 36 |     BAD_REQUEST = 2  # 400
 37 |     VALIDATION_ERROR = 4  # 422
 38 |     INTERNAL_ERROR = 8  # 500
 39 |     TIMEOUT_ERROR = 16  # 408
 40 | 
 41 |     # special one, indicate that it's a SSE
 42 |     STREAM_EVENT = 32768
 43 | 
 44 | 
 45 | class Protocol:
 46 |     """IPC protocol.
 47 | 
 48 |     This private class implements the client-side protocol through Unix domain socket
 49 |     to communicate with the server.
 50 |     """
 51 | 
 52 |     # byte formats (https://docs.python.org/3/library/struct.html#format-characters)
 53 |     FORMAT_FLAG = "!H"
 54 |     FORMAT_BATCH = "!H"
 55 |     FORMAT_ID = "!I"
 56 |     FORMAT_LENGTH = "!I"
 57 |     FORMAT_STATE = "!H"
 58 | 
 59 |     # lengths
 60 |     LENGTH_TASK_FLAG = 2
 61 |     LENGTH_TASK_BATCH = 2
 62 |     LENGTH_TASK_ID = 4
 63 |     LENGTH_TASK_STATE = 2
 64 |     LENGTH_TASK_BODY_LEN = 4
 65 | 
 66 |     def __init__(
 67 |         self,
 68 |         name: str,
 69 |         addr: str,
 70 |         timeout: float = 2.0,
 71 |     ):
 72 |         """Initialize the protocol client.
 73 | 
 74 |         Args:
 75 |             name (str): name of its belonging coordinator.
 76 |             addr (str): Unix domain socket address in file system's namespace.
 77 |             timeout (float, optional): socket timeout. Defaults to 2.0 seconds.
 78 | 
 79 |         """
 80 |         self.socket = socket.socket(
 81 |             socket.AF_UNIX,
 82 |             socket.SOCK_STREAM,
 83 |         )
 84 |         self.socket.settimeout(timeout)
 85 |         self.name = name
 86 |         self.addr = addr
 87 | 
 88 |     def receive(self) -> Tuple[bytes, Sequence[bytes], Sequence[int], Sequence[bytes]]:
 89 |         """Receive tasks from the server."""
 90 |         flag = self.socket.recv(self.LENGTH_TASK_FLAG)
 91 |         batch_size_bytes = self.socket.recv(self.LENGTH_TASK_BATCH)
 92 |         batch_size = struct.unpack(self.FORMAT_BATCH, batch_size_bytes)[0]
 93 |         ids, states, payloads = [], [], []
 94 |         total_bytes = 0
 95 | 
 96 |         while batch_size > 0:
 97 |             batch_size -= 1
 98 |             id_bytes = self.socket.recv(self.LENGTH_TASK_ID)
 99 |             state_bytes = self.socket.recv(self.LENGTH_TASK_STATE)
100 |             length_bytes = self.socket.recv(self.LENGTH_TASK_BODY_LEN)
101 |             length = struct.unpack(self.FORMAT_LENGTH, length_bytes)[0]
102 |             payload = _recv_all(self.socket, length)
103 |             ids.append(id_bytes)
104 |             states.append(struct.unpack(self.FORMAT_STATE, state_bytes)[0])
105 |             payloads.append(payload)
106 |             total_bytes += length
107 | 
108 |         if logger.isEnabledFor(logging.DEBUG):
109 |             logger.debug(
110 |                 "%s received %d tasks with ids: %s",
111 |                 self.name,
112 |                 len(ids),
113 |                 struct.unpack("!" + "I" * len(ids), b"".join(ids)),
114 |             )
115 | 
116 |         if total_bytes > IPC_LARGE_DATA_SIZE:
117 |             warnings.warn(
118 |                 f"IPC data ({total_bytes} bytes) is large, "
119 |                 "which may affect performance",
120 |                 RuntimeWarning,
121 |                 stacklevel=2,
122 |             )
123 |         return flag, ids, states, payloads
124 | 
125 |     def send(
126 |         self,
127 |         flag: int,
128 |         ids: Sequence[bytes],
129 |         states: Sequence[int],
130 |         payloads: Sequence[bytes],
131 |     ):
132 |         """Send results to the server."""
133 |         data = BytesIO()
134 |         data.write(struct.pack(self.FORMAT_FLAG, flag))
135 |         if len(ids) != len(payloads):
136 |             raise ValueError("`ids` have different length with `payloads`")
137 |         batch_size = len(ids)
138 |         data.write(struct.pack(self.FORMAT_BATCH, batch_size))
139 |         if batch_size > 0:
140 |             for task_id, state, payload in zip(ids, states, payloads, strict=True):
141 |                 data.write(task_id)
142 |                 data.write(struct.pack(self.FORMAT_STATE, state))
143 |                 data.write(struct.pack(self.FORMAT_LENGTH, len(payload)))
144 |                 data.write(payload)
145 |         self.socket.sendall(data.getbuffer())
146 |         if logger.isEnabledFor(logging.DEBUG):
147 |             logger.debug(
148 |                 "%s sent %d(%d) tasks with ids: %s",
149 |                 self.name,
150 |                 len(ids),
151 |                 flag,
152 |                 struct.unpack("!" + "I" * len(ids), b"".join(ids)),
153 |             )
154 | 
155 |     def open(self):
156 |         """Open the socket connection."""
157 |         self.socket.connect(self.addr)
158 |         logger.info("%s socket connected to %s", self.name, self.addr)
159 | 
160 |     def close(self):
161 |         """Close the socket connection."""
162 |         self.socket.close()
163 |         logger.info("%s socket closed", self.name)
164 | 
165 | 
166 | def _recv_all(conn, length):
167 |     buffer = bytearray(length)
168 |     view = memoryview(buffer)
169 |     size = 0
170 |     while size < length:
171 |         packet = conn.recv_into(view)
172 |         view = view[packet:]
173 |         size += packet
174 |     return buffer
175 | 


--------------------------------------------------------------------------------