├── python ├── .gitattributes ├── MANIFEST.in ├── xoscar │ ├── backends │ │ ├── indigen │ │ │ ├── tests │ │ │ │ ├── test-logging.conf │ │ │ │ ├── __init__.py │ │ │ │ └── test_allocate_strategy.py │ │ │ ├── __main__.py │ │ │ ├── __init__.py │ │ │ ├── driver.py │ │ │ └── backend.py │ │ ├── tests │ │ │ ├── __init__.py │ │ │ └── test_messages.py │ │ ├── __init__.py │ │ ├── test │ │ │ ├── tests │ │ │ │ ├── __init__.py │ │ │ │ ├── test_message.py │ │ │ │ └── test_actor_context.py │ │ │ ├── __init__.py │ │ │ └── backend.py │ │ └── communication │ │ │ ├── tests │ │ │ └── __init__.py │ │ │ ├── errors.py │ │ │ ├── __init__.py │ │ │ ├── core.py │ │ │ └── utils.py │ ├── aio │ │ ├── tests │ │ │ ├── __init__.py │ │ │ └── test_aio_file.py │ │ ├── __init__.py │ │ ├── parallelism.py │ │ ├── file.py │ │ └── base.py │ ├── metrics │ │ ├── tests │ │ │ └── __init__.py │ │ ├── backends │ │ │ ├── __init__.py │ │ │ ├── console │ │ │ │ ├── __init__.py │ │ │ │ ├── tests │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── test_console_metric.py │ │ │ │ └── console_metric.py │ │ │ ├── tests │ │ │ │ ├── __init__.py │ │ │ │ └── test_metric.py │ │ │ └── prometheus │ │ │ │ ├── __init__.py │ │ │ │ ├── tests │ │ │ │ ├── __init__.py │ │ │ │ └── test_prometheus_metric.py │ │ │ │ └── prometheus_metric.py │ │ └── __init__.py │ ├── collective │ │ ├── backend │ │ │ └── __init__.py │ │ ├── tests │ │ │ └── __init__.py │ │ ├── __init__.py │ │ ├── utils.py │ │ └── common.py │ ├── virtualenv │ │ ├── tests │ │ │ ├── __init__.py │ │ │ └── test_utils.py │ │ ├── __init__.py │ │ ├── platform.py │ │ └── utils.py │ ├── tests │ │ ├── __init__.py │ │ ├── test_actorcaller.py │ │ └── core.py │ ├── serialization │ │ ├── tests │ │ │ └── __init__.py │ │ ├── __init__.py │ │ ├── core.pxd │ │ ├── pyfury.py │ │ ├── exception.py │ │ ├── core.pyi │ │ ├── mlx.py │ │ ├── scipy.py │ │ ├── numpy.py │ │ └── cuda.py │ ├── context.pxd │ ├── constants.py │ ├── libcpp.pxd │ ├── _utils.pxd │ ├── errors.py │ ├── core.pxd │ ├── driver.py │ ├── __init__.py │ └── backend.py ├── pyproject.toml └── setup.cfg ├── doc ├── source │ ├── _static │ │ └── architecture.png │ ├── user_guide │ │ ├── index.rst │ │ ├── actor-pool.rst │ │ └── actor.rst │ ├── reference │ │ ├── index.rst │ │ ├── actor.rst │ │ └── actor-pool.rst │ ├── getting_started │ │ ├── index.rst │ │ ├── installation.rst │ │ └── quickstart.rst │ ├── conf.py │ └── index.rst ├── Makefile └── make.bat ├── .github ├── codecov.yml ├── ISSUE_TEMPLATE │ ├── other.md │ ├── feature_request.md │ └── bug_report.md ├── PULL_REQUEST_TEMPLATE.md ├── workflows │ └── assign.yaml └── mergify.yml ├── CPPLINT.cfg ├── .readthedocs.yaml ├── .gitmodules ├── CI ├── conda-environment.yml ├── requirements-wheel.txt └── test_functionality.py ├── .clang-format ├── cpp ├── collective │ ├── gloo │ │ ├── CMakeLists.txt │ │ ├── include │ │ │ ├── rendezvous.h │ │ │ └── transport.h │ │ └── src │ │ │ ├── barrier.cc │ │ │ ├── recv.cc │ │ │ ├── send.cc │ │ │ ├── all_to_all.cc │ │ │ ├── gather.cc │ │ │ ├── scatter.cc │ │ │ ├── broadcast.cc │ │ │ ├── allreduce.cc │ │ │ └── reduce.cc │ └── rendezvous │ │ ├── src │ │ └── exception.cpp │ │ ├── CMakeLists.txt │ │ ├── include │ │ ├── win_sock_utils.hpp │ │ ├── unix_sock_utils.hpp │ │ ├── exception.h │ │ ├── error.h │ │ ├── socket.h │ │ └── call_once.h │ │ └── LICENSE └── CMakeLists.txt ├── .pre-commit-config.yaml ├── .cmake-format.yaml ├── CMakeLists.txt └── .gitignore /python/.gitattributes: -------------------------------------------------------------------------------- 1 | xoscar/_version.py export-subst 2 | -------------------------------------------------------------------------------- /doc/source/_static/architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xorbitsai/xoscar/HEAD/doc/source/_static/architecture.png -------------------------------------------------------------------------------- /.github/codecov.yml: -------------------------------------------------------------------------------- 1 | coverage: 2 | status: 3 | project: 4 | default: 5 | target: 90% 6 | threshold: 1% 7 | -------------------------------------------------------------------------------- /CPPLINT.cfg: -------------------------------------------------------------------------------- 1 | filter=-build/c++11,-build/include_subdir,-build/include_order,-build/include_what_you_use,-readability/todo,-readability/nolint,-runtime/int,-runtime/references,-whitespace/indent -------------------------------------------------------------------------------- /doc/source/user_guide/index.rst: -------------------------------------------------------------------------------- 1 | .. _user_guide_index: 2 | 3 | ========== 4 | User Guide 5 | ========== 6 | 7 | .. toctree:: 8 | :maxdepth: 2 9 | 10 | actor 11 | actor-pool -------------------------------------------------------------------------------- /doc/source/reference/index.rst: -------------------------------------------------------------------------------- 1 | .. _reference_index: 2 | 3 | ============= 4 | API Reference 5 | ============= 6 | 7 | .. toctree:: 8 | :maxdepth: 2 9 | 10 | actor-pool 11 | actor 12 | -------------------------------------------------------------------------------- /doc/source/getting_started/index.rst: -------------------------------------------------------------------------------- 1 | .. _getting_started_index: 2 | 3 | =============== 4 | Getting Started 5 | =============== 6 | 7 | .. toctree:: 8 | :maxdepth: 2 9 | 10 | installation 11 | quickstart 12 | llm-inference -------------------------------------------------------------------------------- /doc/source/reference/actor.rst: -------------------------------------------------------------------------------- 1 | .. _ref_actor: 2 | 3 | ===== 4 | Actor 5 | ===== 6 | 7 | .. autosummary:: 8 | :toctree: generated/ 9 | 10 | xoscar.create_actor 11 | xoscar.destroy_actor 12 | xoscar.actor_ref 13 | xoscar.kill_actor 14 | -------------------------------------------------------------------------------- /doc/source/reference/actor-pool.rst: -------------------------------------------------------------------------------- 1 | .. _ref_actor-pool: 2 | 3 | ========== 4 | Actor pool 5 | ========== 6 | 7 | .. autosummary:: 8 | :toctree: generated/ 9 | 10 | xoscar.create_actor_pool 11 | xoscar.wait_actor_pool_recovered 12 | xoscar.get_pool_config -------------------------------------------------------------------------------- /python/MANIFEST.in: -------------------------------------------------------------------------------- 1 | global-include *.pyx 2 | global-include *.pxd 3 | global-include xoscar/**/*.yml 4 | global-exclude *.c 5 | global-exclude *.cpp 6 | global-exclude */**/tests/*.yml 7 | include setup.cfg 8 | include pyproject.toml 9 | global-exclude .DS_Store 10 | include versioneer.py 11 | include xoscar/_version.py 12 | global-exclude conftest.py 13 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/other.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Other 3 | about: Submit other issues here. 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | Note that the issue tracker is NOT the place for general support. For 11 | discussions about development, questions about usage, or any general questions, 12 | contact us on https://discuss.xorbits.io/. 13 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | 4 | 5 | ## What do these changes do? 6 | 7 | 8 | 9 | ## Related issue number 10 | 11 | 12 | Fixes #xxxx 13 | 14 | ## Check code requirements 15 | 16 | - [ ] tests added / passed (if needed) 17 | - [ ] Ensure all linting tests pass 18 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | # Build documentation in the docs/ directory with Sphinx 4 | sphinx: 5 | configuration: doc/source/conf.py 6 | 7 | build: 8 | os: ubuntu-20.04 9 | tools: 10 | python: "3.9" 11 | apt_packages: 12 | - cmake 13 | 14 | python: 15 | install: 16 | - method: pip 17 | path: python 18 | extra_requirements: 19 | - doc 20 | 21 | submodules: 22 | include: all 23 | recursive: true 24 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "third_party/fmt"] 2 | path = third_party/fmt 3 | url = https://github.com/fmtlib/fmt.git 4 | [submodule "third_party/pybind11"] 5 | path = third_party/pybind11 6 | url = https://github.com/pybind/pybind11.git 7 | [submodule "third_party/gloo"] 8 | path = third_party/gloo 9 | url = https://github.com/facebookincubator/gloo.git 10 | [submodule "third_party/libuv"] 11 | path = third_party/libuv 12 | url = https://github.com/libuv/libuv.git 13 | -------------------------------------------------------------------------------- /python/xoscar/backends/indigen/tests/test-logging.conf: -------------------------------------------------------------------------------- 1 | [loggers] 2 | keys=root,test_indigen_pool 3 | 4 | [handlers] 5 | keys=stream_handler 6 | 7 | [formatters] 8 | keys=formatter 9 | 10 | [logger_root] 11 | level=WARN 12 | handlers=stream_handler 13 | 14 | [logger_test_indigen_pool] 15 | level=DEBUG 16 | handlers=stream_handler 17 | qualname=xoscar.backends.indigen.tests 18 | propagate=0 19 | 20 | [handler_stream_handler] 21 | class=StreamHandler 22 | formatter=formatter 23 | args=(sys.stderr,) 24 | 25 | [formatter_formatter] 26 | format=%(asctime)s %(name)-12s %(process)d %(levelname)-8s %(message)s 27 | -------------------------------------------------------------------------------- /python/xoscar/backends/indigen/__main__.py: -------------------------------------------------------------------------------- 1 | if __name__ == "__main__": 2 | import click 3 | 4 | @click.group( 5 | invoke_without_command=True, 6 | name="xoscar", 7 | help="Xoscar command-line interface.", 8 | ) 9 | def main(): 10 | pass 11 | 12 | @main.command("start_sub_pool", help="Start a sub pool.") 13 | @click.option("shm_name", "-sn", type=str, help="Shared memory name.") 14 | def start_sub_pool(shm_name): 15 | from xoscar.backends.indigen.pool import MainActorPool 16 | 17 | MainActorPool._start_sub_pool_in_child(shm_name) 18 | 19 | main() 20 | -------------------------------------------------------------------------------- /CI/conda-environment.yml: -------------------------------------------------------------------------------- 1 | name: xoscar-test 2 | channels: 3 | - defaults 4 | dependencies: 5 | - numpy 6 | - cloudpickle 7 | - coverage 8 | - cython 9 | - mock 10 | - pyarrow 11 | - pytest 12 | - pytest-asyncio 13 | - pytest-cov 14 | - pytest-timeout 15 | - pytest-forked 16 | - scipy 17 | - pandas 18 | - numexpr 19 | - scikit-learn 20 | - psutil 21 | - tornado 22 | - defusedxml 23 | - pyyaml 24 | - black 25 | - tqdm 26 | - fsspec 27 | - flake8 28 | - pillow 29 | - lz4 30 | - sqlalchemy 31 | - tqdm 32 | - openpyxl 33 | - pip 34 | 35 | - pip: 36 | - uvloop>=0.14.0; sys.platform!="win32" 37 | -------------------------------------------------------------------------------- /python/xoscar/aio/tests/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2023 XProbe Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /python/xoscar/backends/tests/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2025 XProbe Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /python/xoscar/metrics/tests/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2023 XProbe Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /.clang-format: -------------------------------------------------------------------------------- 1 | # See all possible options and defaults with: 2 | # clang-format --style=llvm --dump-config 3 | BasedOnStyle: LLVM 4 | AccessModifierOffset: -4 5 | AllowShortLambdasOnASingleLine: Inline 6 | AlwaysBreakTemplateDeclarations: Yes 7 | BinPackArguments: false 8 | BinPackParameters: false 9 | BreakBeforeBinaryOperators: All 10 | BreakConstructorInitializers: BeforeColon 11 | ColumnLimit: 80 12 | SpacesBeforeTrailingComments: 2 13 | IncludeBlocks: Regroup 14 | IndentCaseLabels: true 15 | IndentPPDirectives: AfterHash 16 | IndentWidth: 4 17 | Language: Cpp 18 | SpaceAfterCStyleCast: true 19 | Standard: c++20 20 | StatementMacros: ['PyObject_HEAD'] 21 | TabWidth: 4 22 | -------------------------------------------------------------------------------- /python/xoscar/collective/backend/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2025 XProbe Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /python/xoscar/collective/tests/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2023 XProbe Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /python/xoscar/metrics/backends/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2023 XProbe Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /python/xoscar/virtualenv/tests/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2025 XProbe Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /python/xoscar/metrics/backends/console/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2023 XProbe Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /python/xoscar/metrics/backends/tests/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2023 XProbe Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /python/xoscar/metrics/backends/console/tests/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2023 XProbe Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /python/xoscar/metrics/backends/prometheus/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2023 XProbe Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /python/xoscar/metrics/backends/prometheus/tests/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2023 XProbe Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /.github/workflows/assign.yaml: -------------------------------------------------------------------------------- 1 | name: Assign 2 | on: 3 | issue_comment: 4 | types: created 5 | 6 | permissions: 7 | contents: read 8 | 9 | jobs: 10 | issue_assign: 11 | permissions: 12 | issues: write 13 | pull-requests: write 14 | runs-on: ubuntu-22.04 15 | steps: 16 | - if: github.event.comment.body == 'take' 17 | run: | 18 | echo "Assigning issue ${{ github.event.issue.number }} to ${{ github.event.comment.user.login }}" 19 | curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" -d '{"assignees": ["${{ github.event.comment.user.login }}"]}' https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.issue.number }}/assignees 20 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | ### Is your feature request related to a problem? Please describe 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | ### Describe the solution you'd like 14 | A clear and concise description of what you want to happen. 15 | 16 | ### Describe alternatives you've considered 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | ### Additional context 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /python/xoscar/backends/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2023 XProbe Inc. 2 | # derived from copyright 1999-2021 Alibaba Group Holding Ltd. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | -------------------------------------------------------------------------------- /python/xoscar/tests/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2023 XProbe Inc. 2 | # derived from copyright 1999-2021 Alibaba Group Holding Ltd. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | -------------------------------------------------------------------------------- /doc/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /python/xoscar/backends/indigen/tests/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2023 XProbe Inc. 2 | # derived from copyright 1999-2021 Alibaba Group Holding Ltd. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | -------------------------------------------------------------------------------- /python/xoscar/backends/test/tests/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2023 XProbe Inc. 2 | # derived from copyright 1999-2021 Alibaba Group Holding Ltd. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | -------------------------------------------------------------------------------- /python/xoscar/serialization/tests/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2023 XProbe Inc. 2 | # derived from copyright 1999-2021 Alibaba Group Holding Ltd. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | -------------------------------------------------------------------------------- /python/xoscar/backends/communication/tests/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2023 XProbe Inc. 2 | # derived from copyright 1999-2021 Alibaba Group Holding Ltd. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: "[BUG]" 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | ### Describe the bug 11 | A clear and concise description of what the bug is. 12 | 13 | ### To Reproduce 14 | To help us to reproduce this bug, please provide information below: 15 | 16 | 1. Your Python version 17 | 2. The version of Xoscar you use 18 | 3. Versions of crucial packages, such as numpy, scipy and pandas 19 | 4. Full stack of the error. 20 | 5. Minimized code to reproduce the error. 21 | 22 | ### Expected behavior 23 | A clear and concise description of what you expected to happen. 24 | 25 | ### Additional context 26 | Add any other context about the problem here. 27 | -------------------------------------------------------------------------------- /python/xoscar/aio/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2023 XProbe Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from .file import AioFileObject 15 | from .lru import alru_cache 16 | from .parallelism import AioEvent 17 | -------------------------------------------------------------------------------- /cpp/collective/gloo/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.11...3.21) 2 | 3 | project( 4 | XoscarGloo 5 | VERSION 0.0.1 6 | LANGUAGES CXX) 7 | 8 | set(CMAKE_CXX_STANDARD 20) 9 | 10 | include_directories(include) 11 | include_directories(../rendezvous/include) 12 | include_directories(../../../third_party/pybind11/include) 13 | 14 | add_library( 15 | GlooLib 16 | include/collective.h 17 | include/rendezvous.h 18 | include/transport.h 19 | include/config.h 20 | src/allgather.cc 21 | src/allreduce.cc 22 | src/barrier.cc 23 | src/broadcast.cc 24 | src/gather.cc 25 | src/recv.cc 26 | src/reduce_scatter.cc 27 | src/reduce.cc 28 | src/rendezvous.cc 29 | src/scatter.cc 30 | src/send.cc 31 | src/transport.cc 32 | src/all_to_all.cc) 33 | -------------------------------------------------------------------------------- /python/xoscar/backends/test/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2023 XProbe Inc. 2 | # derived from copyright 1999-2021 Alibaba Group Holding Ltd. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from .backend import TestActorBackend 17 | -------------------------------------------------------------------------------- /python/xoscar/backends/indigen/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2023 XProbe Inc. 2 | # derived from copyright 1999-2021 Alibaba Group Holding Ltd. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from .backend import IndigenActorBackend 17 | -------------------------------------------------------------------------------- /python/xoscar/metrics/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2023 XProbe Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .api import ( 16 | Metrics, 17 | Percentile, 18 | init_metrics, 19 | record_time_cost_percentile, 20 | shutdown_metrics, 21 | ) 22 | -------------------------------------------------------------------------------- /python/xoscar/context.pxd: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2023 XProbe Inc. 2 | # derived from copyright 1999-2021 Alibaba Group Holding Ltd. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | cdef class BaseActorContext: 18 | cdef public str _address 19 | 20 | 21 | cpdef get_context() 22 | -------------------------------------------------------------------------------- /cpp/collective/rendezvous/src/exception.cpp: -------------------------------------------------------------------------------- 1 | /* Copyright 2022-2023 XProbe Inc. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. */ 14 | #include "exception.h" 15 | 16 | namespace xoscar { 17 | 18 | XoscarError::~XoscarError() = default; 19 | 20 | TimeoutError::~TimeoutError() = default; 21 | 22 | } // namespace xoscar 23 | -------------------------------------------------------------------------------- /cpp/collective/rendezvous/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.11...3.21) 2 | 3 | project( 4 | XoscarRendezvous 5 | VERSION 0.0.1 6 | LANGUAGES CXX) 7 | 8 | # there will be an error that 9 | # "Microsoft Visual Studio\2022\BuildTools\VC\Tools\MSVC\14.36.32532\include\format" 10 | # on windows if c++ standard is set to 20 11 | if(${CMAKE_SYSTEM_NAME} MATCHES "Windows") 12 | set(CMAKE_CXX_STANDARD 17) 13 | else() 14 | set(CMAKE_CXX_STANDARD 20) 15 | endif() 16 | 17 | include_directories(include) 18 | 19 | add_library( 20 | StoreLib 21 | include/error.h 22 | include/call_once.h 23 | include/win_sock_utils.hpp 24 | include/exception.h 25 | src/exception.cpp 26 | include/socket.h 27 | src/socket.cpp 28 | include/tcp_store.hpp 29 | src/tcp_store.cpp 30 | include/unix_sock_utils.hpp 31 | include/utils.hpp) 32 | -------------------------------------------------------------------------------- /python/xoscar/backends/communication/errors.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2023 XProbe Inc. 2 | # derived from copyright 1999-2021 Alibaba Group Holding Ltd. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from ...errors import XoscarError 17 | 18 | 19 | class ChannelClosed(XoscarError): 20 | pass 21 | -------------------------------------------------------------------------------- /python/xoscar/constants.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2023 XProbe Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import os 16 | from pathlib import Path 17 | 18 | XOSCAR_TEMP_DIR = Path(os.getenv("XOSCAR_DIR", Path.home())) / ".xoscar" 19 | 20 | # unix socket. 21 | XOSCAR_UNIX_SOCKET_DIR = XOSCAR_TEMP_DIR / "socket" 22 | 23 | XOSCAR_CONNECT_TIMEOUT = 8 24 | -------------------------------------------------------------------------------- /cpp/collective/gloo/include/rendezvous.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2022-2023 XProbe Inc. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. */ 14 | #pragma once 15 | 16 | #include 17 | #include 18 | 19 | namespace xoscar { 20 | namespace rendezvous { 21 | 22 | void def_rendezvous_module(pybind11::module &m); 23 | } // namespace rendezvous 24 | } // namespace xoscar 25 | -------------------------------------------------------------------------------- /python/xoscar/collective/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2023 XProbe Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .core import ( 16 | RankActor, 17 | allgather, 18 | allreduce, 19 | alltoall, 20 | broadcast, 21 | gather, 22 | init_process_group, 23 | new_group, 24 | reduce, 25 | reduce_scatter, 26 | scatter, 27 | ) 28 | -------------------------------------------------------------------------------- /doc/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | 13 | %SPHINXBUILD% >NUL 2>NUL 14 | if errorlevel 9009 ( 15 | echo. 16 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 17 | echo.installed, then set the SPHINXBUILD environment variable to point 18 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 19 | echo.may add the Sphinx directory to PATH. 20 | echo. 21 | echo.If you don't have Sphinx installed, grab it from 22 | echo.https://www.sphinx-doc.org/ 23 | exit /b 1 24 | ) 25 | 26 | if "%1" == "" goto help 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /python/xoscar/serialization/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2023 XProbe Inc. 2 | # derived from copyright 1999-2021 Alibaba Group Holding Ltd. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from . import cuda, exception, mlx, numpy, scipy 17 | from .aio import AioDeserializer, AioSerializer 18 | from .core import Serializer, deserialize, serialize, serialize_with_spawn 19 | 20 | del cuda, numpy, scipy, mlx, exception 21 | -------------------------------------------------------------------------------- /cpp/collective/gloo/src/barrier.cc: -------------------------------------------------------------------------------- 1 | /* Copyright 2022-2023 XProbe Inc. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. */ 14 | 15 | #include 16 | #include 17 | 18 | namespace xoscar { 19 | 20 | void barrier(const std::shared_ptr &context, uint32_t tag) { 21 | gloo::BarrierOptions opts_(context); 22 | 23 | opts_.setTag(tag); 24 | 25 | gloo::barrier(opts_); 26 | } 27 | } // namespace xoscar 28 | -------------------------------------------------------------------------------- /python/xoscar/backends/indigen/driver.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2023 XProbe Inc. 2 | # derived from copyright 1999-2021 Alibaba Group Holding Ltd. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from numbers import Number 17 | from typing import Dict 18 | 19 | from ...driver import BaseActorDriver 20 | 21 | 22 | class IndigenActorDriver(BaseActorDriver): 23 | @classmethod 24 | def setup_cluster(cls, address_to_resources: Dict[str, Dict[str, Number]]): 25 | # nothing need to be done in driver of Indigen backend 26 | pass 27 | -------------------------------------------------------------------------------- /.github/mergify.yml: -------------------------------------------------------------------------------- 1 | pull_request_rules: 2 | - name: automatic update 3 | conditions: 4 | - -conflict # skip PRs with conflicts 5 | - -draft # filter-out GH draft PRs 6 | actions: 7 | update: 8 | - name: Automatic merge 9 | conditions: 10 | - "#approved-reviews-by>=2" 11 | - check-success=lint (ubuntu-latest, 3.10) 12 | - check-success=build_test_job (ubuntu-latest, 3.8, xoscar) 13 | - check-success=build_test_job (ubuntu-latest, 3.9, xoscar) 14 | - check-success=build_test_job (ubuntu-latest, 3.10, xoscar) 15 | - check-success=build_test_job (ubuntu-latest, 3.11, xoscar) 16 | - check-success=build_test_job (macos-13, 3.8, xoscar) 17 | - check-success=build_test_job (macos-13, 3.11, xoscar) 18 | - check-success=build_test_job (windows-latest, 3.8, xoscar) 19 | - check-success=build_test_job (windows-latest, 3.11, xoscar) 20 | - check-success=codecov/project 21 | # - check-success=docs/readthedocs.org:xoscar 22 | - base=main 23 | actions: 24 | merge: 25 | method: squash 26 | -------------------------------------------------------------------------------- /python/xoscar/serialization/core.pxd: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2023 XProbe Inc. 2 | # derived from copyright 1999-2022 Alibaba Group Holding Ltd. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | cdef class Serializer: 17 | cdef int _serializer_id 18 | 19 | cpdef serial(self, object obj, dict context) 20 | cpdef deserial(self, tuple serialized, dict context, list subs) 21 | cpdef on_deserial_error( 22 | self, 23 | tuple serialized, 24 | dict context, 25 | list subs_serialized, 26 | int error_index, 27 | object exc, 28 | ) 29 | -------------------------------------------------------------------------------- /python/xoscar/backends/communication/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2023 XProbe Inc. 2 | # derived from copyright 1999-2021 Alibaba Group Holding Ltd. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from .base import Channel, ChannelType, Client, Server 17 | from .core import gen_local_address, get_client_type, get_server_type 18 | from .dummy import DummyChannel, DummyClient, DummyServer 19 | from .socket import ( 20 | SocketChannel, 21 | SocketClient, 22 | SocketServer, 23 | UnixSocketClient, 24 | UnixSocketServer, 25 | ) 26 | from .ucx import ( # noqa: F401 # pylint: disable=unused-import 27 | UCXChannel, 28 | UCXClient, 29 | UCXServer, 30 | ) 31 | -------------------------------------------------------------------------------- /python/xoscar/libcpp.pxd: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2023 XProbe Inc. 2 | # derived from copyright 1999-2022 Alibaba Group Holding Ltd. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # complementary header for C++ STL libs not included in Cython 17 | 18 | from libc.stdint cimport uint_fast64_t 19 | 20 | 21 | cdef extern from "" namespace "std" nogil: 22 | cdef cppclass mt19937_64: 23 | ctypedef uint_fast64_t result_type 24 | 25 | mt19937_64() except + 26 | mt19937_64(result_type seed) except + 27 | result_type operator()() except + 28 | result_type min() except + 29 | result_type max() except + 30 | void discard(size_t z) except + 31 | void seed(result_type seed) except + 32 | -------------------------------------------------------------------------------- /python/xoscar/virtualenv/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2025 XProbe Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from __future__ import annotations 16 | 17 | from pathlib import Path 18 | 19 | from .core import VirtualEnvManager 20 | from .uv import UVVirtualEnvManager 21 | 22 | _name_to_managers = {"uv": UVVirtualEnvManager} 23 | 24 | 25 | def get_virtual_env_manager(env_name: str, env_path: str | Path) -> VirtualEnvManager: 26 | try: 27 | manager_cls = _name_to_managers[env_name] 28 | except KeyError: 29 | raise ValueError( 30 | f"Unknown virtualenv manager {env_name}, available: {list(_name_to_managers)}" 31 | ) 32 | 33 | path = Path(env_path) 34 | return manager_cls(path) 35 | -------------------------------------------------------------------------------- /cpp/collective/rendezvous/include/win_sock_utils.hpp: -------------------------------------------------------------------------------- 1 | /* Copyright 2022-2023 XProbe Inc. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. */ 14 | #pragma once 15 | 16 | #include 17 | 18 | namespace xoscar { 19 | namespace tcputil { 20 | 21 | #define CONNECT_SOCKET_OFFSET 1 22 | 23 | inline int poll(struct pollfd *fdArray, unsigned long fds, int timeout) { 24 | return WSAPoll(fdArray, fds, timeout); 25 | } 26 | 27 | inline void 28 | addPollfd(std::vector &fds, int socket, short events) { 29 | fds.push_back({(SOCKET) socket, events}); 30 | } 31 | 32 | inline struct ::pollfd getPollfd(int socket, short events) { 33 | struct ::pollfd res = {(SOCKET) socket, events}; 34 | return res; 35 | } 36 | 37 | } // namespace tcputil 38 | } // namespace xoscar 39 | -------------------------------------------------------------------------------- /python/xoscar/aio/tests/test_aio_file.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2023 XProbe Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import os 16 | import tempfile 17 | 18 | import pytest 19 | 20 | from .. import AioFileObject 21 | 22 | 23 | @pytest.mark.asyncio 24 | async def test_aio_file_object(): 25 | with tempfile.TemporaryDirectory() as tempdir: 26 | file_path = os.path.join(tempdir, "test") 27 | 28 | f = AioFileObject(open(file_path, "w")) 29 | async with f: 30 | assert f.readable() is False 31 | assert f.mode == "w" 32 | await f.write("text for test") 33 | 34 | f2 = AioFileObject(open(file_path)) 35 | async with f2: 36 | async for l in f2: 37 | assert len(l) > 0 38 | -------------------------------------------------------------------------------- /python/xoscar/_utils.pxd: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2023 XProbe Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | cdef class TypeDispatcher: 17 | cdef dict _handlers 18 | cdef dict _lazy_handlers 19 | cdef dict _inherit_handlers 20 | cdef object __weakref__ 21 | 22 | cpdef void register(self, object type_, object handler) 23 | cpdef void unregister(self, object type_) 24 | cdef _reload_lazy_handlers(self) 25 | cpdef get_handler(self, object type_) 26 | 27 | cpdef str to_str(s, encoding=*) 28 | cpdef bytes to_binary(s, encoding=*) 29 | cpdef bytes new_random_id(int byte_len) 30 | cpdef bytes new_actor_id() 31 | cdef bint is_async_generator(obj) 32 | 33 | 34 | cdef class Timer: 35 | cdef object _start 36 | cdef readonly object duration 37 | -------------------------------------------------------------------------------- /cpp/collective/rendezvous/include/unix_sock_utils.hpp: -------------------------------------------------------------------------------- 1 | /* Copyright 2022-2023 XProbe Inc. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. */ 14 | #pragma once 15 | 16 | #include "utils.hpp" 17 | 18 | #include 19 | #include 20 | 21 | namespace xoscar::tcputil { 22 | 23 | #define CONNECT_SOCKET_OFFSET 2 24 | 25 | inline int poll(struct pollfd *fds, unsigned long nfds, int timeout) { 26 | return ::poll(fds, nfds, timeout); 27 | } 28 | 29 | inline void 30 | addPollfd(std::vector &fds, int socket, short events) { 31 | fds.push_back({.fd = socket, .events = events}); 32 | } 33 | 34 | inline struct ::pollfd getPollfd(int socket, short events) { 35 | struct ::pollfd res = {.fd = socket, .events = events}; 36 | return res; 37 | } 38 | 39 | } // namespace xoscar::tcputil 40 | -------------------------------------------------------------------------------- /python/xoscar/serialization/pyfury.py: -------------------------------------------------------------------------------- 1 | import os 2 | import threading 3 | 4 | _fury = threading.local() 5 | _fury_not_installed = object() 6 | _register_class_list = set() 7 | 8 | 9 | def register_classes(*args): 10 | instance = get_fury() 11 | if instance is not None: 12 | _register_class_list.update(args) 13 | for c in _register_class_list: 14 | instance.register_class(c) 15 | 16 | 17 | def get_fury(): 18 | if os.environ.get("USE_FURY") in ("1", "true", "True"): 19 | instance = getattr(_fury, "instance", None) 20 | if instance is _fury_not_installed: # pragma: no cover 21 | return None 22 | if instance is not None: 23 | return instance 24 | else: 25 | try: 26 | import pyfury 27 | 28 | _fury.instance = instance = pyfury.Fury( 29 | language=pyfury.Language.PYTHON, require_class_registration=False 30 | ) 31 | for c in _register_class_list: # pragma: no cover 32 | instance.register_class(c) 33 | print("pyfury is enabled.") 34 | except ImportError: # pragma: no cover 35 | print("pyfury is not installed.") 36 | _fury.instance = _fury_not_installed 37 | return instance 38 | -------------------------------------------------------------------------------- /python/xoscar/collective/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2023 XProbe Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import os 15 | 16 | import numpy as np 17 | 18 | from ..utils import lazy_import 19 | 20 | cupy = lazy_import("cupy") 21 | 22 | 23 | def convert_data_to_np_array(data): 24 | if isinstance(data, np.ndarray): 25 | return data 26 | else: 27 | return np.frombuffer(data, dtype="u1") 28 | 29 | 30 | def convert_data_to_cp_array(data): 31 | if isinstance(data, cupy.ndarray): 32 | return data 33 | else: 34 | return cupy.frombuffer(data, dtype="u1") 35 | 36 | 37 | def get_rank_address_via_env(env_key: str, err_message: str) -> str: 38 | address = os.environ.get(env_key, None) 39 | if address is None: 40 | raise RuntimeError(err_message) 41 | return address 42 | -------------------------------------------------------------------------------- /doc/source/getting_started/installation.rst: -------------------------------------------------------------------------------- 1 | .. _installation: 2 | 3 | ============ 4 | Installation 5 | ============ 6 | 7 | Xoscar can be installed via pip from `PyPI `__. 8 | 9 | :: 10 | 11 | pip install xoscar 12 | 13 | Python version support 14 | ---------------------- 15 | 16 | Officially Python 3.9, 3.10, 3.11, 3.12 and 3.13. 17 | 18 | .. versionadded:: v0.8.0 19 | Python 3.13 is supported since v0.8.0. 20 | 21 | 22 | Dependencies 23 | ------------ 24 | 25 | ================================================================ ========================== 26 | Package Minimum supported version 27 | ================================================================ ========================== 28 | `NumPy `__ 1.20.3 29 | `pandas `__ 1.0.0 30 | `scipy `__ 1.0.0 31 | `scikit-learn `__ 0.20 32 | cloudpickle 1.5.0 33 | psutil 5.9.0 34 | uvloop (for systems other than win32) 0.14.0 35 | ================================================================ ========================== 36 | 37 | -------------------------------------------------------------------------------- /doc/source/user_guide/actor-pool.rst: -------------------------------------------------------------------------------- 1 | .. _actor-pool: 2 | 3 | ========== 4 | Actor pool 5 | ========== 6 | 7 | An actor pool serves as a container and entry point for managing actors. It is also a 8 | self-contained computational unit that in most cases runs within an individual process. 9 | 10 | Before creating any actor, it is necessary to initialize the actor pools. In scenarios involving 11 | multiple machines, it is recommended to initialize an actor pool on each machine to effectively 12 | utilize the resources of the entire cluster. 13 | 14 | Manual creation of each actor pool is not required. Instead, you can specify the desired number of 15 | actor pools using the ``n_process`` parameter when invoking ``xoscar.create_actor_pool``. Xoscar 16 | will automatically handle the creation of the specified number of actor pools for you. Normally, 17 | ``n_process`` should be set to the number of CPUs. 18 | 19 | .. seealso:: 20 | :ref:`ref_actor-pool` 21 | 22 | 23 | Create an actor pool 24 | -------------------- 25 | 26 | To create an actor pool, you are required to provide the address and specify the desired level of 27 | parallelism. 28 | 29 | .. code-block:: python 30 | 31 | import asyncio 32 | import xoscar as xo 33 | 34 | async def _main(): 35 | await xo.create_actor_pool(address="localhost:9999", n_process=4) 36 | 37 | loop = asyncio.get_event_loop() 38 | loop.run_until_complete(_main()) 39 | -------------------------------------------------------------------------------- /python/xoscar/backends/test/backend.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2023 XProbe Inc. 2 | # derived from copyright 1999-2021 Alibaba Group Holding Ltd. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from __future__ import annotations 17 | 18 | from ...backend import register_backend 19 | from ..indigen.backend import IndigenActorBackend 20 | from .pool import TestMainActorPool 21 | 22 | 23 | @register_backend 24 | class TestActorBackend(IndigenActorBackend): 25 | @staticmethod 26 | def name(): 27 | return "test" 28 | 29 | @classmethod 30 | async def create_actor_pool( 31 | cls, address: str, n_process: int | None = None, **kwargs 32 | ): 33 | from ..pool import create_actor_pool 34 | 35 | assert n_process is not None 36 | return await create_actor_pool( 37 | address, pool_cls=TestMainActorPool, n_process=n_process, **kwargs 38 | ) 39 | -------------------------------------------------------------------------------- /python/xoscar/aio/parallelism.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2023 XProbe Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from __future__ import annotations 16 | 17 | import asyncio 18 | import multiprocessing 19 | import threading 20 | from concurrent.futures import Executor 21 | from typing import Union 22 | 23 | from .base import AioBase, delegate_to_executor, proxy_method_directly 24 | 25 | event_types = Union[threading.Event, multiprocessing.Event] # type: ignore 26 | 27 | 28 | @delegate_to_executor("wait") 29 | @proxy_method_directly("set", "is_set", "clear") 30 | class AioEvent(AioBase): 31 | def __init__( 32 | self, 33 | event: event_types | None = None, 34 | loop: asyncio.BaseEventLoop | None = None, 35 | executor: Executor | None = None, 36 | ): 37 | if event is None: 38 | event = threading.Event() 39 | super().__init__(event, loop=loop, executor=executor) 40 | -------------------------------------------------------------------------------- /python/xoscar/errors.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2023 XProbe Inc. 2 | # derived from copyright 1999-2021 Alibaba Group Holding Ltd. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | class XoscarError(Exception): 18 | pass 19 | 20 | 21 | class ReconstructWorkerError(XoscarError): 22 | pass 23 | 24 | 25 | class ActorPoolNotStarted(XoscarError): 26 | pass 27 | 28 | 29 | class ActorNotExist(XoscarError): 30 | pass 31 | 32 | 33 | class ActorAlreadyExist(XoscarError): 34 | pass 35 | 36 | 37 | class NoIdleSlot(XoscarError): 38 | pass 39 | 40 | 41 | class NoFreeSlot(XoscarError): 42 | pass 43 | 44 | 45 | class SlotStateError(XoscarError): 46 | pass 47 | 48 | 49 | class ServerClosed(XoscarError): 50 | pass 51 | 52 | 53 | class CannotCancelTask(XoscarError): 54 | pass 55 | 56 | 57 | class SendMessageFailed(XoscarError): 58 | pass 59 | 60 | 61 | class Return(XoscarError): 62 | def __init__(self, value): 63 | self.value = value 64 | -------------------------------------------------------------------------------- /python/xoscar/core.pxd: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2023 XProbe Inc. 2 | # derived from copyright 1999-2021 Alibaba Group Holding Ltd. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | cdef class ActorRef: 18 | cdef object __weakref__ 19 | cdef public str address 20 | cdef public object uid 21 | cdef public list proxy_addresses 22 | cdef dict _methods 23 | 24 | 25 | cdef class LocalActorRef(ActorRef): 26 | cdef object _actor_weakref 27 | cdef _weakref_local_actor(self) 28 | 29 | 30 | cdef class BufferRef: 31 | cdef public str address 32 | cdef public bytes uid 33 | 34 | 35 | cdef class FileObjectRef: 36 | cdef public str address 37 | cdef public bytes uid 38 | 39 | 40 | cdef class _BaseActor: 41 | cdef object __weakref__ 42 | cdef str _address 43 | cdef object _lock 44 | cdef object _uid 45 | 46 | cpdef ActorRef ref(self) 47 | 48 | 49 | cdef class ActorEnvironment: 50 | cdef public dict actor_locks 51 | cdef public object address 52 | -------------------------------------------------------------------------------- /python/xoscar/driver.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2023 XProbe Inc. 2 | # derived from copyright 1999-2021 Alibaba Group Holding Ltd. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from abc import ABC, abstractmethod 17 | from numbers import Number 18 | from typing import Dict, Type 19 | 20 | 21 | class BaseActorDriver(ABC): 22 | @classmethod 23 | @abstractmethod 24 | def setup_cluster(cls, address_to_resources: Dict[str, Dict[str, Number]]): 25 | """ 26 | Setup cluster according to given resources, 27 | resources is a dict, e.g. {'CPU': 3, 'GPU': 1} 28 | 29 | Parameters 30 | ---------- 31 | address_to_resources: dict 32 | resources that required for each node. 33 | """ 34 | pass 35 | 36 | 37 | _backend_driver_cls: Dict[str, Type[BaseActorDriver]] = dict() 38 | 39 | 40 | def register_backend_driver(scheme: str, cls: Type[BaseActorDriver]): 41 | assert issubclass(cls, BaseActorDriver) 42 | _backend_driver_cls[scheme] = cls 43 | -------------------------------------------------------------------------------- /cpp/collective/rendezvous/include/exception.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2022-2023 XProbe Inc. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. */ 14 | 15 | #pragma once 16 | 17 | #include 18 | 19 | namespace xoscar { 20 | 21 | class XoscarError : public std::runtime_error { 22 | public: 23 | using std::runtime_error::runtime_error; 24 | 25 | XoscarError(const XoscarError &) = default; 26 | 27 | XoscarError &operator=(const XoscarError &) = default; 28 | 29 | XoscarError(XoscarError &&) = default; 30 | 31 | XoscarError &operator=(XoscarError &&) = default; 32 | 33 | ~XoscarError() override; 34 | }; 35 | 36 | class TimeoutError : public XoscarError { 37 | public: 38 | using XoscarError::XoscarError; 39 | 40 | TimeoutError(const TimeoutError &) = default; 41 | 42 | TimeoutError &operator=(const TimeoutError &) = default; 43 | 44 | TimeoutError(TimeoutError &&) = default; 45 | 46 | TimeoutError &operator=(TimeoutError &&) = default; 47 | 48 | ~TimeoutError() override; 49 | }; 50 | 51 | } // namespace xoscar 52 | -------------------------------------------------------------------------------- /CI/requirements-wheel.txt: -------------------------------------------------------------------------------- 1 | oldest-supported-numpy 2 | 3 | numpy 4 | packaging 5 | wheel 6 | 7 | pandas==1.0.4; python_version<'3.9' and platform_machine!='aarch64' and platform_machine!='arm64' 8 | pandas==1.1.3; python_version<'3.9' and platform_machine=='aarch64' 9 | pandas==1.4.0; python_version<'3.9' and platform_machine=='arm64' 10 | pandas==1.2.2; python_version>='3.9' and python_version<'3.10' and platform_machine!='arm64' 11 | pandas==1.4.0; python_version>='3.9' and python_version<'3.10' and platform_machine=='arm64' 12 | pandas==1.3.4; python_version>='3.10' and python_version<'3.11' and platform_machine!='arm64' 13 | pandas==1.4.0; python_version>='3.10' and python_version<'3.11' and platform_machine=='arm64' 14 | pandas==1.5.1; python_version>='3.11' and python_version<'3.12' 15 | pandas>=2.1.1; python_version>'3.11' 16 | 17 | scipy==1.4.1; python_version<'3.9' and platform_machine!='aarch64' and platform_machine!='arm64' 18 | scipy==1.7.3; python_version<'3.9' and platform_machine=='arm64' 19 | scipy==1.5.3; python_version<'3.9' and platform_machine=='aarch64' 20 | scipy==1.5.4; python_version>='3.9' and python_version<'3.10' and platform_machine!='arm64' 21 | scipy==1.7.2; python_version>='3.10' and python_version<'3.11' and platform_machine!='arm64' 22 | scipy==1.7.3; python_version>='3.10' and python_version<'3.11' and platform_machine=='arm64' 23 | scipy==1.9.2; python_version>='3.11' and python_version<'3.12' 24 | scipy>=1.11.2; python_version>'3.11' 25 | 26 | # see: https://github.com/cython/cython/commit/afc00fc3ba5d43c67151c0039847a526e7b627a5 27 | cython==0.29.33 28 | requests>=2.4.0 29 | cloudpickle>=1.5.0 30 | -------------------------------------------------------------------------------- /cpp/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.11...3.21) 2 | 3 | project(XoscarCollective) 4 | set(CMAKE_CXX_STANDARD 20) 5 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC") 6 | file(GLOB TMP_DIRS "../python/build/lib*") 7 | foreach(TMP_DIR ${TMP_DIRS}) 8 | message(${TMP_DIR}) 9 | set(LIBRARY_OUTPUT_DIRECTORY ${TMP_DIR}/xoscar/collective) 10 | endforeach() 11 | 12 | include_directories(${CMAKE_SOURCE_DIR}/cpp/collective/rendezvous/include) 13 | include_directories(${CMAKE_SOURCE_DIR}/cpp/collective/gloo/include) 14 | include_directories(../third_party/gloo) 15 | include_directories(../third_party/fmt/include) 16 | 17 | add_subdirectory(collective/rendezvous) 18 | add_subdirectory(collective/gloo) 19 | 20 | pybind11_add_module(xoscar_pygloo collective/gloo/main.cc) 21 | if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin") 22 | include(FindPkgConfig) 23 | pkg_search_module(libuv REQUIRED libuv>=1.26) 24 | find_file( 25 | libuv_LIBRARY 26 | NAMES libuv.a libuv_a.a 27 | PATHS ${libuv_LIBDIR} 28 | NO_DEFAULT_PATH) 29 | if(NOT EXISTS ${libuv_LIBRARY}) 30 | message(FATAL_ERROR "Unable to find static libuv library in " ${libuv_LIBDIR}) 31 | endif() 32 | add_library(uv_s INTERFACE IMPORTED) 33 | set_target_properties(uv_s PROPERTIES INTERFACE_INCLUDE_DIRECTORIES ${libuv_INCLUDE_DIRS} 34 | INTERFACE_LINK_LIBRARIES ${libuv_LIBRARY}) 35 | target_link_libraries(xoscar_pygloo PRIVATE GlooLib gloo StoreLib fmt::fmt uv_s) 36 | else() 37 | target_link_libraries(xoscar_pygloo PRIVATE GlooLib gloo StoreLib fmt::fmt) 38 | endif() 39 | set_target_properties(xoscar_pygloo PROPERTIES LIBRARY_OUTPUT_DIRECTORY 40 | ${LIBRARY_OUTPUT_DIRECTORY}) 41 | -------------------------------------------------------------------------------- /python/xoscar/aio/file.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2023 XProbe Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .base import ( 16 | AioBase, 17 | delegate_to_executor, 18 | proxy_method_directly, 19 | proxy_property_directly, 20 | ) 21 | 22 | 23 | @delegate_to_executor( 24 | "close", 25 | "flush", 26 | "isatty", 27 | "read", 28 | "read1", 29 | "readinto", 30 | "readline", 31 | "readlines", 32 | "seek", 33 | "seekable", 34 | "tell", 35 | "truncate", 36 | "writable", 37 | "write", 38 | "writelines", 39 | ) 40 | @proxy_method_directly("fileno", "readable") 41 | @proxy_property_directly("closed", "name", "mode") 42 | class AioFileObject(AioBase): 43 | def __aiter__(self): 44 | return self 45 | 46 | async def __anext__(self): 47 | """Simulate normal file iteration.""" 48 | line = await self.readline() 49 | if line: 50 | return line 51 | else: 52 | raise StopAsyncIteration 53 | 54 | async def __aenter__(self): 55 | return self 56 | 57 | async def __aexit__(self, exc_type, exc_val, exc_tb): 58 | await self.close() 59 | self._file = None 60 | -------------------------------------------------------------------------------- /CI/test_functionality.py: -------------------------------------------------------------------------------- 1 | # Tests for wheel 2 | 3 | import pytest 4 | 5 | import xoscar as mo 6 | 7 | import platform 8 | import sys 9 | 10 | 11 | class MyActor(mo.Actor): 12 | def __init__(self): 13 | self.i = 0 14 | 15 | def add(self, j: int) -> int: 16 | self.i += j 17 | return self.i 18 | 19 | def get(self) -> int: 20 | return self.i 21 | 22 | async def add_from(self, ref: mo.ActorRefType["MyActor"]) -> int: 23 | self.i += await ref.get() 24 | return self.i 25 | 26 | 27 | @pytest.mark.asyncio 28 | @pytest.mark.timeout(60) 29 | async def test_basic_cases(): 30 | pool = await mo.create_actor_pool( 31 | "127.0.0.1", 32 | n_process=2, 33 | ) 34 | 35 | try: 36 | async with pool: 37 | ref1 = await mo.create_actor( 38 | MyActor, 39 | address=pool.external_address, 40 | allocate_strategy=mo.allocate_strategy.ProcessIndex(1), 41 | ) 42 | 43 | ref2 = await mo.create_actor( 44 | MyActor, 45 | address=pool.external_address, 46 | allocate_strategy=mo.allocate_strategy.ProcessIndex(2), 47 | ) 48 | 49 | assert await ref1.add(1) == 1 50 | 51 | assert await ref2.add(2) == 2 52 | 53 | assert await ref1.add_from(ref2) == 3 54 | except Exception as e: 55 | raise 56 | finally: 57 | pass 58 | 59 | 60 | def test_pygloo(): 61 | is_windows = sys.platform.startswith("win") 62 | bit_number = platform.architecture()[0] 63 | if not (is_windows and bit_number == "32bit"): 64 | import xoscar.collective.xoscar_pygloo as xp 65 | 66 | print(type(xp.ReduceOp.SUM)) 67 | -------------------------------------------------------------------------------- /python/xoscar/virtualenv/platform.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2025 XProbe Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from typing import Optional 16 | 17 | 18 | def check_cuda_available() -> bool: 19 | try: 20 | import torch 21 | 22 | return torch.cuda.is_available() 23 | except (ImportError, AttributeError): 24 | return False 25 | 26 | 27 | def get_cuda_version() -> Optional[str]: 28 | try: 29 | import torch 30 | 31 | return torch.version.cuda # e.g. '12.1' 32 | except (ImportError, AttributeError): 33 | return None 34 | 35 | 36 | def get_cuda_arch() -> Optional[str]: 37 | try: 38 | import torch 39 | 40 | major, minor = torch.cuda.get_device_capability() 41 | return f"sm_{major}{minor}" # e.g. 'sm_80' 42 | except (ImportError, AttributeError, AssertionError): 43 | # If no cuda available, 44 | # AssertionError("Torch not compiled with CUDA enabled") 45 | # will be raised 46 | return None 47 | 48 | 49 | def check_npu_available() -> bool: 50 | try: 51 | import torch 52 | import torch_npu # noqa: F401 53 | 54 | return torch.npu.is_available() 55 | except ImportError: 56 | return False 57 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/psf/black 3 | rev: 24.10.0 4 | hooks: 5 | - id: black 6 | files: python/xoscar 7 | - repo: https://github.com/pre-commit/pre-commit-hooks 8 | rev: v4.4.0 9 | hooks: 10 | - id: end-of-file-fixer 11 | files: python/xoscar 12 | - id: trailing-whitespace 13 | files: python/xoscar 14 | - repo: https://github.com/PyCQA/flake8 15 | rev: 6.0.0 16 | hooks: 17 | - id: flake8 18 | args: [--config, python/setup.cfg] 19 | files: python/xoscar 20 | - repo: https://github.com/pycqa/isort 21 | rev: 5.12.0 22 | hooks: 23 | - id: isort 24 | args: [--sp, python/setup.cfg] 25 | files: python/xoscar 26 | - repo: https://github.com/pre-commit/mirrors-mypy 27 | rev: v1.9.0 28 | hooks: 29 | - id: mypy 30 | additional_dependencies: [tokenize-rt==3.2.0] 31 | args: [--ignore-missing-imports, --follow-imports, skip] 32 | files: python/xoscar 33 | - repo: https://github.com/codespell-project/codespell 34 | rev: v2.2.2 35 | hooks: 36 | - id: codespell 37 | args: [ --config, python/setup.cfg] 38 | files: python/xoscar 39 | 40 | - repo: https://github.com/pre-commit/mirrors-clang-format 41 | rev: "v15.0.7" 42 | hooks: 43 | - id: clang-format 44 | files: cpp 45 | 46 | - repo: https://github.com/cheshirekow/cmake-format-precommit 47 | rev: "v0.6.13" 48 | hooks: 49 | - id: cmake-format 50 | additional_dependencies: [ pyyaml ] 51 | types: [ file ] 52 | files: (\.cmake|CMakeLists.txt)(.in)?$ 53 | 54 | - repo: https://github.com/pocc/pre-commit-hooks 55 | rev: v1.3.5 56 | hooks: 57 | - id: cpplint 58 | files: cpp 59 | -------------------------------------------------------------------------------- /python/xoscar/virtualenv/tests/test_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2025 XProbe Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import logging 16 | from io import StringIO 17 | from unittest.mock import patch 18 | 19 | import pytest 20 | 21 | from .. import utils 22 | 23 | 24 | @pytest.fixture 25 | def patched_logger(): 26 | stream = StringIO() 27 | handler = logging.StreamHandler(stream) 28 | mock_logger = logging.getLogger("pytest_logger") 29 | mock_logger.setLevel(logging.INFO) 30 | mock_logger.addHandler(handler) 31 | 32 | with patch.object(utils, "logger", mock_logger): 33 | yield stream 34 | 35 | mock_logger.removeHandler(handler) 36 | 37 | 38 | def test_stdout_logging(patched_logger): 39 | stream = patched_logger 40 | with utils.run_subprocess_with_logger(["echo", "hello pytest"]) as p: 41 | pass 42 | stream.seek(0) 43 | logs = stream.read() 44 | assert p.returncode == 0 45 | assert "hello pytest" in logs 46 | 47 | 48 | def test_stderr_logging(patched_logger): 49 | stream = patched_logger 50 | with utils.run_subprocess_with_logger(["ls", "non_existent_file"]) as p: 51 | pass 52 | stream.seek(0) 53 | logs = stream.read() 54 | assert p.returncode != 0 55 | assert "non_existent_file" in logs 56 | -------------------------------------------------------------------------------- /python/xoscar/backends/indigen/backend.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2023 XProbe Inc. 2 | # derived from copyright 1999-2021 Alibaba Group Holding Ltd. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from __future__ import annotations 17 | 18 | from ...backend import BaseActorBackend, register_backend 19 | from ..context import IndigenActorContext 20 | from .driver import IndigenActorDriver 21 | from .pool import MainActorPool 22 | 23 | __all__ = ["IndigenActorBackend"] 24 | 25 | 26 | @register_backend 27 | class IndigenActorBackend(BaseActorBackend): 28 | @staticmethod 29 | def name(): 30 | # None means Indigen is default scheme 31 | # ucx can be recognized as Indigen backend as well 32 | return [None, "ucx"] 33 | 34 | @staticmethod 35 | def get_context_cls(): 36 | return IndigenActorContext 37 | 38 | @staticmethod 39 | def get_driver_cls(): 40 | return IndigenActorDriver 41 | 42 | @classmethod 43 | async def create_actor_pool( 44 | cls, address: str, n_process: int | None = None, **kwargs 45 | ): 46 | from ..pool import create_actor_pool 47 | 48 | assert n_process is not None 49 | return await create_actor_pool( 50 | address, pool_cls=MainActorPool, n_process=n_process, **kwargs 51 | ) 52 | -------------------------------------------------------------------------------- /python/xoscar/serialization/exception.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2023 XProbe Inc. 2 | # derived from copyright 1999-2021 Alibaba Group Holding Ltd. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from __future__ import annotations 17 | 18 | import pickle # nosec # pylint: disable=import_pickle 19 | 20 | from .core import Serializer, buffered, pickle_buffers, unpickle_buffers 21 | 22 | 23 | class UnpickleableError(Exception): 24 | def __init__(self, raw_error: str | Exception): 25 | if isinstance(raw_error, str): 26 | super().__init__(raw_error) 27 | else: 28 | super().__init__( 29 | f"Error cannot be pickled, " 30 | f"error type: {type(raw_error)}, " 31 | f"raw error:\n{raw_error}" 32 | ) 33 | 34 | 35 | class ExceptionSerializer(Serializer): 36 | @buffered 37 | def serial(self, obj: Exception, context: dict): 38 | try: 39 | buffers = pickle_buffers(obj) 40 | except (TypeError, pickle.PicklingError): 41 | buffers = pickle_buffers(UnpickleableError(obj)) 42 | return (), buffers, True 43 | 44 | def deserial(self, serialized: tuple, context: dict, subs: list): 45 | return unpickle_buffers(subs) 46 | 47 | 48 | ExceptionSerializer.register(Exception) 49 | -------------------------------------------------------------------------------- /python/xoscar/__init__.py: -------------------------------------------------------------------------------- 1 | # isort: skip_file 2 | # Copyright 2022-2023 XProbe Inc. 3 | # derived from copyright 1999-2021 Alibaba Group Holding Ltd. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | from typing import TypeVar, Union 18 | 19 | from . import debug 20 | from .api import ( 21 | actor_ref, 22 | create_actor, 23 | has_actor, 24 | destroy_actor, 25 | kill_actor, 26 | buffer_ref, 27 | file_object_ref, 28 | copy_to, 29 | Actor, 30 | StatelessActor, 31 | create_actor_pool, 32 | setup_cluster, 33 | wait_actor_pool_recovered, 34 | get_pool_config, 35 | generator, 36 | wait_for, 37 | ) 38 | from .backends import allocate_strategy 39 | from .backends.pool import MainActorPoolType 40 | from .batch import extensible 41 | from .core import ActorRef, no_lock 42 | from .debug import set_debug_options, get_debug_options, DebugOptions 43 | from .errors import ( 44 | ActorNotExist, 45 | ActorAlreadyExist, 46 | ServerClosed, 47 | SendMessageFailed, 48 | Return, 49 | ) 50 | from ._utils import create_actor_ref 51 | 52 | # make sure methods are registered 53 | from .backends import indigen, test 54 | from . import _version 55 | 56 | del indigen, test 57 | 58 | _T = TypeVar("_T") 59 | ActorRefType = Union[ActorRef, _T] 60 | 61 | __version__ = _version.get_versions()["version"] 62 | -------------------------------------------------------------------------------- /python/xoscar/backends/tests/test_messages.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2025 XProbe Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from ..._utils import create_actor_ref 16 | from ...serialization import deserialize, serialize 17 | from ..message import ForwardMessage, SendMessage, new_message_id 18 | 19 | 20 | def test_serial_forward_message(): 21 | send_message = SendMessage( 22 | message_id=new_message_id(), 23 | actor_ref=create_actor_ref( 24 | "127.0.0.1:1111", "MyActor", proxy_addresses=["127.0.0.1:1112"] 25 | ), 26 | content="sth", 27 | ) 28 | forward_message = ForwardMessage( 29 | message_id=new_message_id(), 30 | address="127.0.0.1:1112", 31 | raw_message=send_message, 32 | ) 33 | 34 | forward_message2 = deserialize(*serialize(forward_message)) 35 | assert id(forward_message) != id(forward_message2) 36 | assert forward_message.message_id == forward_message2.message_id 37 | assert forward_message.address == forward_message2.address 38 | assert id(forward_message.raw_message) != id(forward_message2.raw_message) 39 | assert ( 40 | forward_message.raw_message.actor_ref == forward_message2.raw_message.actor_ref 41 | ) 42 | assert ( 43 | forward_message.raw_message.actor_ref.proxy_addresses 44 | == forward_message2.raw_message.actor_ref.proxy_addresses 45 | ) 46 | assert forward_message.raw_message.content == forward_message2.raw_message.content 47 | -------------------------------------------------------------------------------- /python/xoscar/serialization/core.pyi: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2023 XProbe Inc. 2 | # derived from copyright 1999-2021 Alibaba Group Holding Ltd. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from __future__ import annotations 17 | 18 | from concurrent.futures import Executor 19 | from typing import Any, Callable 20 | 21 | def buffered(func: Callable) -> Callable: ... 22 | def fast_id(obj: Any) -> int: ... 23 | 24 | class Serializer: 25 | serializer_id: int 26 | def serial(self, obj: Any, context: dict): ... 27 | def deserial(self, serialized: tuple, context: dict, subs: list[Any]): ... 28 | def on_deserial_error( 29 | self, 30 | serialized: tuple, 31 | context: dict, 32 | subs_serialized: list, 33 | error_index: int, 34 | exc: BaseException, 35 | ): ... 36 | @classmethod 37 | def register(cls, obj_type, name: str | None = None): ... 38 | @classmethod 39 | def unregister(cls, obj_type): ... 40 | 41 | class Placeholder: 42 | id: int 43 | callbacks: list[Callable] 44 | def __init__(self, id_: int): ... 45 | def __hash__(self): ... 46 | def __eq__(self, other): ... 47 | 48 | def serialize(obj: Any, context: dict | None = None): ... 49 | async def serialize_with_spawn( 50 | obj: Any, 51 | context: dict | None = None, 52 | spawn_threshold: int = 100, 53 | executor: Executor | None = None, 54 | ): ... 55 | def deserialize(headers: list, buffers: list, context: dict | None = None): ... 56 | def pickle_buffers(obj: Any) -> list: ... 57 | def unpickle_buffers(buffers: list) -> Any: ... 58 | -------------------------------------------------------------------------------- /python/xoscar/backend.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2023 XProbe Inc. 2 | # derived from copyright 1999-2021 Alibaba Group Holding Ltd. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from __future__ import annotations 17 | 18 | from abc import ABC, abstractmethod 19 | from typing import Dict, Type 20 | 21 | from .context import register_backend_context 22 | from .driver import register_backend_driver 23 | 24 | __all__ = ["BaseActorBackend", "register_backend", "get_backend"] 25 | 26 | 27 | class BaseActorBackend(ABC): 28 | @staticmethod 29 | @abstractmethod 30 | def name(): 31 | pass 32 | 33 | @staticmethod 34 | @abstractmethod 35 | def get_context_cls(): 36 | pass 37 | 38 | @classmethod 39 | async def create_actor_pool( 40 | cls, address: str, n_process: int | None = None, **kwargs 41 | ): 42 | pass 43 | 44 | @staticmethod 45 | @abstractmethod 46 | def get_driver_cls(): 47 | pass 48 | 49 | 50 | _scheme_to_backend_cls: Dict[str, Type[BaseActorBackend]] = dict() 51 | 52 | 53 | def register_backend(backend_cls: Type[BaseActorBackend]): 54 | name = backend_cls.name() 55 | if isinstance(name, (list, tuple)): 56 | names = name 57 | else: 58 | names = [name] 59 | for name in names: 60 | _scheme_to_backend_cls[name] = backend_cls 61 | register_backend_context(name, backend_cls.get_context_cls()) 62 | register_backend_driver(name, backend_cls.get_driver_cls()) 63 | return backend_cls 64 | 65 | 66 | def get_backend(name): 67 | return _scheme_to_backend_cls[name] 68 | -------------------------------------------------------------------------------- /python/xoscar/metrics/backends/console/tests/test_console_metric.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2023 XProbe Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from ..console_metric import Counter, Gauge, Histogram, Meter 16 | 17 | 18 | def test_counter(): 19 | c = Counter("test_counter", "A test counter", ("service", "tenant")) 20 | assert c.name == "test_counter" 21 | assert c.description == "A test counter" 22 | assert c.tag_keys == ("service", "tenant") 23 | assert c.type == "Counter" 24 | c.record(1, {"service": "indigen", "tenant": "test"}) 25 | c.record(2, {"service": "indigen", "tenant": "test"}) 26 | assert c.value == 3 27 | 28 | 29 | def test_gauge(): 30 | g = Gauge("test_gauge", "A test gauge") 31 | assert g.name == "test_gauge" 32 | assert g.description == "A test gauge" 33 | assert g.tag_keys == () 34 | assert g.type == "Gauge" 35 | g.record(1) 36 | assert g.value == 1 37 | g.record(2) 38 | assert g.value == 2 39 | 40 | 41 | def test_meter(): 42 | m = Meter("test_meter") 43 | assert m.name == "test_meter" 44 | assert m.description == "" 45 | assert m.tag_keys == () 46 | assert m.type == "Meter" 47 | m.record(1) 48 | assert m.value == 0 49 | m.record(2001) 50 | assert m.value > 0 51 | 52 | 53 | def test_histogram(): 54 | h = Histogram("test_histogram") 55 | assert h.name == "test_histogram" 56 | assert h.description == "" 57 | assert h.tag_keys == () 58 | assert h.type == "Histogram" 59 | h.record(1) 60 | assert h.value == 0 61 | for i in range(2002): 62 | h.record(1) 63 | assert h.value > 0 64 | -------------------------------------------------------------------------------- /cpp/collective/rendezvous/include/error.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2022-2023 XProbe Inc. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. */ 14 | 15 | #pragma once 16 | 17 | #include 18 | #include 19 | #include 20 | 21 | namespace fmt { 22 | 23 | template <> 24 | struct formatter { 25 | constexpr decltype(auto) parse(format_parse_context &ctx) const { 26 | return ctx.begin(); 27 | } 28 | 29 | template 30 | decltype(auto) format(const std::error_category &cat, 31 | FormatContext &ctx) const { 32 | if (std::strcmp(cat.name(), "generic") == 0) { 33 | return format_to(ctx.out(), "errno"); 34 | } else { 35 | return format_to(ctx.out(), "{} error", cat.name()); 36 | } 37 | } 38 | }; 39 | 40 | template <> 41 | struct formatter { 42 | constexpr decltype(auto) parse(format_parse_context &ctx) const { 43 | return ctx.begin(); 44 | } 45 | 46 | template 47 | decltype(auto) format(const std::error_code &err, 48 | FormatContext &ctx) const { 49 | return format_to(ctx.out(), 50 | fmt::runtime("({}: {} - {})"), 51 | err.category(), 52 | err.value(), 53 | err.message()); 54 | } 55 | }; 56 | 57 | } // namespace fmt 58 | 59 | namespace xoscar { 60 | namespace detail { 61 | 62 | inline std::error_code lastError() noexcept { 63 | return std::error_code{errno, std::generic_category()}; 64 | } 65 | 66 | } // namespace detail 67 | } // namespace xoscar 68 | -------------------------------------------------------------------------------- /python/xoscar/metrics/backends/prometheus/prometheus_metric.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2023 XProbe Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import os 16 | import socket 17 | from typing import Dict, Optional 18 | 19 | from ....utils import lazy_import 20 | from ..metric import ( 21 | AbstractCounter, 22 | AbstractGauge, 23 | AbstractHistogram, 24 | AbstractMeter, 25 | AbstractMetric, 26 | ) 27 | 28 | pc = lazy_import("prometheus_client", rename="pc") 29 | 30 | 31 | class PrometheusMetricMixin(AbstractMetric): 32 | def _init(self): 33 | # Prometheus metric name must match the regex `[a-zA-Z_:][a-zA-Z0-9_:]*` 34 | # `.` is a common character in metrics, so here replace it with `:` 35 | self._name = self._name.replace(".", ":") 36 | self._tag_keys = self._tag_keys + ( 37 | "host", 38 | "pid", 39 | ) 40 | self._tags = {"host": socket.gethostname(), "pid": os.getpid()} 41 | try: 42 | self._metric = ( 43 | pc.Gauge(self._name, self._description, self._tag_keys) if pc else None 44 | ) 45 | except ValueError: # pragma: no cover 46 | self._metric = None 47 | 48 | def _record(self, value=1, tags: Optional[Dict[str, str]] = None): 49 | if self._metric: 50 | if tags is not None: 51 | tags.update(self._tags) 52 | else: 53 | tags = self._tags 54 | self._metric.labels(**tags).set(value) 55 | 56 | 57 | class Counter(PrometheusMetricMixin, AbstractCounter): 58 | pass 59 | 60 | 61 | class Gauge(PrometheusMetricMixin, AbstractGauge): 62 | pass 63 | 64 | 65 | class Meter(PrometheusMetricMixin, AbstractMeter): 66 | pass 67 | 68 | 69 | class Histogram(PrometheusMetricMixin, AbstractHistogram): 70 | pass 71 | -------------------------------------------------------------------------------- /python/xoscar/serialization/mlx.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2025 XProbe Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from typing import Any, List 16 | 17 | import numpy as np 18 | 19 | from ..utils import lazy_import 20 | from .core import Serializer, buffered 21 | 22 | mx = lazy_import("mlx.core") 23 | 24 | 25 | dtype_map = { 26 | "b": np.int8, 27 | "B": np.uint8, 28 | "h": np.int16, 29 | "H": np.uint16, 30 | "i": np.int32, 31 | "I": np.uint32, 32 | "q": np.int64, 33 | "Q": np.uint64, 34 | "e": np.float16, 35 | "f": np.float32, 36 | "d": np.float64, 37 | } 38 | 39 | 40 | class MLXSerislizer(Serializer): 41 | @buffered 42 | def serial(self, obj: "mx.array", context: dict): # type: ignore 43 | ravel_obj = obj.reshape(-1).view(mx.uint8) 44 | mv = memoryview(ravel_obj) 45 | header = dict( 46 | shape=obj.shape, format=mv.format, dtype=str(obj.dtype).rsplit(".", 1)[-1] 47 | ) 48 | if not mv.c_contiguous: 49 | # NOTE: we only consider c contiguous here, 50 | # MLX has no way to create f contiguous arrays. 51 | mv = memoryview(bytes(mv)) 52 | return (header,), [mv], True 53 | 54 | def deserial(self, serialized: tuple, context: dict, subs: List[Any]): 55 | header = serialized[0] 56 | shape, format, dtype = header["shape"], header["format"], header["dtype"] 57 | mv = memoryview(subs[0]) 58 | if mv.format != format: 59 | dtype = dtype_map.get(format, np.uint8) 60 | np_arr = np.frombuffer(mv, dtype=dtype).reshape(shape) # parse 61 | mv = memoryview(np_arr) # recreate memoryview 62 | ravel_array = mx.array(mv) 63 | return ravel_array.view(getattr(mx, dtype)).reshape(shape) 64 | 65 | 66 | if mx is not None: 67 | MLXSerislizer.register(mx.array) 68 | -------------------------------------------------------------------------------- /python/xoscar/backends/communication/core.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2023 XProbe Inc. 2 | # derived from copyright 1999-2021 Alibaba Group Holding Ltd. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from __future__ import annotations 17 | 18 | from typing import Type 19 | from urllib.parse import urlparse 20 | 21 | from .base import Client, Server 22 | 23 | _scheme_to_client_types: dict[str, Type[Client]] = dict() 24 | _scheme_to_server_types: dict[str, Type[Server]] = dict() 25 | 26 | 27 | def register_client(client_type: Type[Client]): 28 | _scheme_to_client_types[client_type.scheme] = client_type # type: ignore 29 | return client_type 30 | 31 | 32 | def register_server(server_type: Type[Server]): 33 | _scheme_to_server_types[server_type.scheme] = server_type # type: ignore 34 | return server_type 35 | 36 | 37 | def _check_scheme(scheme: str | None, types: dict): 38 | if scheme == "": 39 | scheme = None 40 | if scheme not in types: # pragma: no cover 41 | raise ValueError( 42 | f"address illegal, address scheme " 43 | f"should be one of " 44 | f'{", ".join(types)}, ' 45 | f"got {scheme}" 46 | ) 47 | return scheme 48 | 49 | 50 | def get_scheme(address: str) -> str | None: 51 | if "://" not in address: 52 | scheme = None 53 | else: 54 | scheme = urlparse(address).scheme 55 | return scheme 56 | 57 | 58 | def get_client_type(address: str) -> Type[Client]: 59 | scheme = _check_scheme(get_scheme(address), _scheme_to_client_types) 60 | return _scheme_to_client_types[scheme] 61 | 62 | 63 | def get_server_type(address: str) -> Type[Server]: 64 | scheme = _check_scheme(get_scheme(address), _scheme_to_server_types) 65 | return _scheme_to_server_types[scheme] 66 | 67 | 68 | def gen_local_address(process_index: int) -> str: 69 | return f"dummy://{process_index}" 70 | -------------------------------------------------------------------------------- /python/xoscar/metrics/backends/console/console_metric.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2023 XProbe Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from __future__ import annotations 16 | 17 | import logging 18 | from typing import Dict, Optional, Tuple 19 | 20 | from ..metric import ( 21 | AbstractCounter, 22 | AbstractGauge, 23 | AbstractHistogram, 24 | AbstractMeter, 25 | AbstractMetric, 26 | ) 27 | 28 | logger = logging.getLogger(__name__) 29 | 30 | 31 | class SimpleMetric: 32 | _value: int | float 33 | 34 | def __init__( 35 | self, name: str, description: str = "", tag_keys: Optional[Tuple[str]] = None 36 | ): 37 | self._name = name 38 | self._description = description 39 | self._tag_keys = tag_keys 40 | self._value = 0 41 | 42 | def update(self, value: float = 1.0, tags: Optional[Dict[str, str]] = None): 43 | self._value = value 44 | logger.debug( 45 | "Reporting metric with name: %s, description: %s, value: %s, tags: %s", 46 | self._name, 47 | self._description, 48 | value, 49 | tags, 50 | ) 51 | 52 | @property 53 | def value(self): 54 | return self._value 55 | 56 | 57 | class ConsoleMetricMixin(AbstractMetric): 58 | @property 59 | def value(self): 60 | return self._metric.value 61 | 62 | def _init(self): 63 | self._metric = SimpleMetric(self._name, self._description, self._tag_keys) 64 | 65 | def _record(self, value=1, tags: Optional[Dict[str, str]] = None): 66 | self._metric.update(value, tags) 67 | 68 | 69 | class Counter(ConsoleMetricMixin, AbstractCounter): 70 | pass 71 | 72 | 73 | class Gauge(ConsoleMetricMixin, AbstractGauge): 74 | pass 75 | 76 | 77 | class Meter(ConsoleMetricMixin, AbstractMeter): 78 | pass 79 | 80 | 81 | class Histogram(ConsoleMetricMixin, AbstractHistogram): 82 | pass 83 | -------------------------------------------------------------------------------- /.cmake-format.yaml: -------------------------------------------------------------------------------- 1 | parse: 2 | additional_commands: 3 | pybind11_add_module: 4 | flags: 5 | - THIN_LTO 6 | - MODULE 7 | - SHARED 8 | - NO_EXTRAS 9 | - EXCLUDE_FROM_ALL 10 | - SYSTEM 11 | 12 | format: 13 | line_width: 99 14 | tab_size: 2 15 | 16 | # If an argument group contains more than this many sub-groups 17 | # (parg or kwarg groups) then force it to a vertical layout. 18 | max_subgroups_hwrap: 2 19 | 20 | # If a positional argument group contains more than this many 21 | # arguments, then force it to a vertical layout. 22 | max_pargs_hwrap: 6 23 | 24 | # If a cmdline positional group consumes more than this many 25 | # lines without nesting, then invalidate the layout (and nest) 26 | max_rows_cmdline: 2 27 | separate_ctrl_name_with_space: false 28 | separate_fn_name_with_space: false 29 | dangle_parens: false 30 | 31 | # If the trailing parenthesis must be 'dangled' on its on 32 | # 'line, then align it to this reference: `prefix`: the start' 33 | # 'of the statement, `prefix-indent`: the start of the' 34 | # 'statement, plus one indentation level, `child`: align to' 35 | # the column of the arguments 36 | dangle_align: prefix 37 | # If the statement spelling length (including space and 38 | # parenthesis) is smaller than this amount, then force reject 39 | # nested layouts. 40 | min_prefix_chars: 4 41 | 42 | # If the statement spelling length (including space and 43 | # parenthesis) is larger than the tab width by more than this 44 | # amount, then force reject un-nested layouts. 45 | max_prefix_chars: 10 46 | 47 | # If a candidate layout is wrapped horizontally but it exceeds 48 | # this many lines, then reject the layout. 49 | max_lines_hwrap: 2 50 | 51 | line_ending: unix 52 | 53 | # Format command names consistently as 'lower' or 'upper' case 54 | command_case: canonical 55 | 56 | # Format keywords consistently as 'lower' or 'upper' case 57 | # unchanged is valid too 58 | keyword_case: 'upper' 59 | 60 | # A list of command names which should always be wrapped 61 | always_wrap: [] 62 | 63 | # If true, the argument lists which are known to be sortable 64 | # will be sorted lexicographically 65 | enable_sort: true 66 | 67 | # If true, the parsers may infer whether or not an argument 68 | # list is sortable (without annotation). 69 | autosort: false 70 | 71 | # Causes a few issues - can be solved later, possibly. 72 | markup: 73 | enable_markup: false -------------------------------------------------------------------------------- /python/xoscar/aio/base.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2023 XProbe Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from __future__ import annotations 16 | 17 | import asyncio 18 | import functools 19 | from concurrent.futures import Executor 20 | from typing import Any, Type 21 | 22 | 23 | def _make_delegate_method(attr): 24 | async def method(self, *args, **kwargs): 25 | func = functools.partial(getattr(self._file, attr), *args, **kwargs) 26 | return await self._loop.run_in_executor(self._executor, func) 27 | 28 | return method 29 | 30 | 31 | def _make_proxy_method(attr): 32 | def method(self, *args, **kwargs): 33 | return getattr(self._file, attr)(*args, **kwargs) 34 | 35 | return method 36 | 37 | 38 | def _make_proxy_property(attr): 39 | def proxy_property(self): 40 | return getattr(self._file, attr) 41 | 42 | return property(proxy_property) 43 | 44 | 45 | def delegate_to_executor(*attrs): 46 | def wrap_cls(cls: Type): 47 | for attr in attrs: 48 | setattr(cls, attr, _make_delegate_method(attr)) 49 | return cls 50 | 51 | return wrap_cls 52 | 53 | 54 | def proxy_method_directly(*attrs): 55 | def wrap_cls(cls: Type): 56 | for attr in attrs: 57 | setattr(cls, attr, _make_proxy_method(attr)) 58 | return cls 59 | 60 | return wrap_cls 61 | 62 | 63 | def proxy_property_directly(*attrs): 64 | def wrap_cls(cls): 65 | for attr in attrs: 66 | setattr(cls, attr, _make_proxy_property(attr)) 67 | return cls 68 | 69 | return wrap_cls 70 | 71 | 72 | class AioBase: 73 | def __init__( 74 | self, 75 | file: Any, 76 | loop: asyncio.BaseEventLoop | None = None, 77 | executor: Executor | None = None, 78 | ): 79 | if loop is None: 80 | loop = asyncio.get_event_loop() # type: ignore 81 | if isinstance(file, AioBase): 82 | file = file._file # type: ignore 83 | 84 | self._file = file 85 | self._loop = loop 86 | self._executor = executor 87 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.11...3.21) 2 | 3 | project(XoscarCollective) 4 | if(NOT DEFINED PYTHON_PATH) 5 | find_package(Python COMPONENTS Interpreter Development) 6 | else() 7 | set(PYTHON_EXECUTABLE ${PYTHON_PATH}) 8 | endif() 9 | 10 | set(CMAKE_CXX_STANDARD 20) 11 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC") 12 | 13 | if(${CMAKE_SYSTEM_NAME} MATCHES "Linux") 14 | option(USE_LIBUV "Build tcp transport on linux" OFF) 15 | else() 16 | option(USE_LIBUV "Build libuv transport on others" ON) 17 | endif() 18 | 19 | if(MSVC) 20 | add_compile_options(/utf-8) 21 | message(STATUS "Done setting /utf-8 for MSVC") 22 | endif() 23 | 24 | include_directories(${CMAKE_SOURCE_DIR}) 25 | #find python3 include dir 26 | execute_process(COMMAND python -c "import sysconfig; print(sysconfig.get_path('include'))" 27 | OUTPUT_VARIABLE PYTHON_INCLUDE_PATH) 28 | 29 | # Set include directories 30 | include_directories(${PYTHON_INCLUDE_PATH}) 31 | if(${CMAKE_SYSTEM_NAME} MATCHES "Windows") 32 | set(libuv_dir ${CMAKE_SOURCE_DIR}/third_party/libuv/build/uvlib) 33 | if(NOT EXISTS ${libuv_dir}) 34 | execute_process( 35 | COMMAND 36 | cmd /c 37 | "echo %cd% && cd ..\\..\\..\\..\\..\\third_party\\libuv && mkdir build && cd build && mkdir uvlib && cmake .. -DCMAKE_INSTALL_PREFIX=uvlib -DCMAKE_POLICY_VERSION_MINIMUM=3.10 && msbuild.exe INSTALL.vcxproj" 38 | ) 39 | message(STATUS "Done creating libuv_dir = ${libuv_dir}") 40 | endif() 41 | set(libuv_ROOT ${CMAKE_SOURCE_DIR}/third_party/libuv/build/uvlib) 42 | set(uv_HEADER_PATH ${CMAKE_SOURCE_DIR}/third_party/libuv/include) 43 | include_directories(${uv_HEADER_PATH}) 44 | #copy uv.dll to /python/xoscar/collective 45 | file(COPY ${CMAKE_SOURCE_DIR}/third_party/libuv/build/uvlib/bin/uv.dll 46 | DESTINATION ${CMAKE_SOURCE_DIR}/python/xoscar/collective) 47 | add_definitions(-DNOMINMAX) 48 | endif() 49 | 50 | add_subdirectory(third_party/fmt) 51 | add_subdirectory(third_party/pybind11) 52 | add_subdirectory(third_party/gloo) 53 | 54 | # set c++11 for gloo 55 | set_target_properties( 56 | gloo 57 | PROPERTIES CXX_STANDARD 11 58 | CXX_STANDARD_REQUIRED ON 59 | CXX_EXTENSIONS OFF) 60 | if(NOT DEFINED BUILD_TMP_DIR) 61 | file(GLOB TMP_DIRS "python/build/temp*") 62 | foreach(TMP_DIR ${TMP_DIRS}) 63 | set(BUILD_TMP_DIR ${TMP_DIR}/xoscar_pygloo) 64 | endforeach() 65 | else() 66 | set(BUILD_TMP_DIR python/${BUILD_TMP_DIR}) 67 | endif() 68 | # copy config.h to cpp/gloo/include 69 | file(COPY ${BUILD_TMP_DIR}/third_party/gloo/gloo/config.h 70 | DESTINATION ${CMAKE_SOURCE_DIR}/cpp/collective/gloo/include) 71 | 72 | add_subdirectory(cpp) 73 | -------------------------------------------------------------------------------- /python/pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = [ 3 | "setuptools>=64,<70; python_version<'3.12'", 4 | "setuptools>=75; python_version>='3.12'", 5 | "packaging", 6 | "wheel", 7 | "oldest-supported-numpy", 8 | "scipy==1.4.1; python_version<'3.9' and platform_machine!='aarch64' and platform_machine!='arm64'", 9 | "scipy==1.7.3; python_version<'3.9' and platform_machine=='arm64'", 10 | "scipy==1.5.3; python_version<'3.9' and platform_machine=='aarch64'", 11 | "scipy==1.5.4; python_version>='3.9' and python_version<'3.10' and platform_machine!='arm64'", 12 | "scipy==1.7.2; python_version>='3.10' and python_version<'3.11' and platform_machine!='arm64'", 13 | "scipy==1.7.3; python_version>='3.10' and python_version<'3.11' and platform_machine=='arm64'", 14 | "scipy==1.9.2; python_version>='3.11' and python_version<'3.12'", 15 | "scipy>=1.11.2; python_version>'3.11'", 16 | "pandas==1.0.4; python_version<'3.9' and platform_machine!='aarch64' and platform_machine!='arm64'", 17 | "pandas==1.1.3; python_version<'3.9' and platform_machine=='aarch64'", 18 | "pandas==1.4.0; python_version<'3.9' and platform_machine=='arm64'", 19 | "pandas==1.2.2; python_version>='3.9' and python_version<'3.10' and platform_machine!='arm64'", 20 | "pandas==1.4.0; python_version>='3.9' and python_version<'3.10' and platform_machine=='arm64'", 21 | "pandas==1.3.4; python_version>='3.10' and python_version<'3.11' and platform_machine!='arm64'", 22 | "pandas==1.4.0; python_version>='3.10' and python_version<'3.11' and platform_machine=='arm64'", 23 | "pandas==1.5.1; python_version>='3.11' and python_version<'3.12'", 24 | "pandas>=2.1.1; python_version>'3.11'", 25 | "numpy", 26 | "cython>=0.29.33", 27 | "requests>=2.4.0", 28 | "cloudpickle>=2.2.1; python_version>='3.11'", 29 | "cloudpickle==1.5.0; python_version<'3.11'", 30 | ] 31 | build-backend = "setuptools.build_meta" 32 | 33 | [tool.black] 34 | include = '\.pyi?$' 35 | force-exclude = ''' 36 | ( 37 | ^/xoscar/(_version.py) 38 | | shared_memory.py 39 | ) 40 | ''' 41 | 42 | [tool.pytest.ini_options] 43 | asyncio_mode = "auto" 44 | asyncio_default_fixture_loop_scope="function" 45 | markers = [ 46 | "cuda: mark a test as a cuda case.", 47 | ] 48 | log_cli = true 49 | log_cli_level = "INFO" 50 | log_cli_format = "%(asctime)s [%(levelname)8s] %(message)s (%(filename)s:%(lineno)s)" 51 | log_cli_date_format = "%Y-%m-%d %H:%M:%S" 52 | 53 | [tool.cibuildwheel] 54 | build = ["cp39-*", "cp310-*", "cp311-*", "cp312-*", "cp313-*"] 55 | skip = "pp* *musllinux* *i686 cp38-win32 cp39-win32 cp310-win32 cp311-win32 cp312-win32 cp313-win32" 56 | manylinux-x86_64-image = "manylinux2014" 57 | -------------------------------------------------------------------------------- /python/xoscar/serialization/scipy.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2023 XProbe Inc. 2 | # derived from copyright 1999-2021 Alibaba Group Holding Ltd. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from typing import Any, Dict, List, Tuple 17 | 18 | import numpy as np 19 | 20 | try: 21 | import scipy.sparse as sps 22 | except ImportError: # pragma: no cover 23 | sps = None 24 | 25 | from .core import Serializer, buffered, deserialize, serialize 26 | 27 | 28 | class CsrMatrixSerializer(Serializer): 29 | @buffered 30 | def serial(self, obj: Any, context: Dict): 31 | data_header, data_buffers = serialize(obj.data) 32 | idx_header, idx_buffers = serialize(obj.indices) 33 | indptr_header, indptr_buffers = serialize(obj.indptr) 34 | header = ( 35 | data_header, # data_header 36 | len(data_buffers), # data_buf_num 37 | idx_header, # idx_header 38 | len(idx_buffers), # idx_buf_num 39 | indptr_header, # indptr_header 40 | obj.shape, # shape 41 | ) 42 | return header, data_buffers + idx_buffers + indptr_buffers, True 43 | 44 | def deserial(self, serialized: Tuple, context: Dict, subs: List): 45 | ( 46 | data_header, 47 | data_buf_num, 48 | idx_header, 49 | idx_buf_num, 50 | indptr_header, 51 | shape, 52 | ) = serialized 53 | data_buffers = subs[:data_buf_num] 54 | idx_buffers = subs[data_buf_num : data_buf_num + idx_buf_num] 55 | indptr_buffers = subs[data_buf_num + idx_buf_num :] 56 | 57 | data = deserialize(data_header, data_buffers) 58 | indices = deserialize(idx_header, idx_buffers) 59 | indptr = deserialize(indptr_header, indptr_buffers) 60 | shape = tuple(shape) 61 | 62 | empty_arr = np.zeros(0, dtype=data.dtype) 63 | 64 | target_csr = sps.coo_matrix( 65 | (empty_arr, (empty_arr,) * 2), dtype=data.dtype, shape=shape 66 | ).tocsr() 67 | target_csr.data, target_csr.indices, target_csr.indptr = data, indices, indptr 68 | return target_csr 69 | 70 | 71 | if sps: # pragma: no branch 72 | CsrMatrixSerializer.register(sps.csr_matrix) 73 | -------------------------------------------------------------------------------- /doc/source/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | # If extensions (or modules to document with autodoc) are in another directory, 10 | # add these directories to sys.path here. If the directory is relative to the 11 | # documentation root, use os.path.abspath to make it absolute, like shown here. 12 | # 13 | # import os 14 | # import sys 15 | # sys.path.insert(0, os.path.abspath('.')) 16 | 17 | 18 | # -- Project information ----------------------------------------------------- 19 | 20 | project = 'Xoscar' 21 | copyright = '2023, Xorbits Inc.' 22 | author = 'xorbitsai' 23 | 24 | 25 | # -- General configuration --------------------------------------------------- 26 | 27 | # Add any Sphinx extension module names here, as strings. They can be 28 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 29 | # ones. 30 | extensions = [ 31 | "sphinx.ext.mathjax", 32 | "sphinx.ext.ifconfig", 33 | "sphinx.ext.intersphinx", 34 | "sphinx.ext.viewcode", 35 | "sphinx.ext.githubpages", 36 | "sphinx.ext.autosummary", 37 | "sphinx.ext.napoleon", 38 | "IPython.sphinxext.ipython_directive", 39 | "IPython.sphinxext.ipython_console_highlighting", 40 | ] 41 | 42 | # Add any paths that contain templates here, relative to this directory. 43 | templates_path = ['_templates'] 44 | 45 | # List of patterns, relative to source directory, that match files and 46 | # directories to ignore when looking for source files. 47 | # This pattern also affects html_static_path and html_extra_path. 48 | exclude_patterns = [] 49 | 50 | # i18n 51 | locale_dirs = ["locale/"] # path is example but recommended. 52 | gettext_compact = False # optional 53 | 54 | 55 | # -- Options for HTML output ------------------------------------------------- 56 | 57 | # The theme to use for HTML and HTML Help pages. See the documentation for 58 | # a list of builtin themes. 59 | # 60 | html_theme = 'pydata_sphinx_theme' 61 | 62 | # Add any paths that contain custom static files (such as style sheets) here, 63 | # relative to this directory. They are copied after the builtin static files, 64 | # so a file named "default.css" will overwrite the builtin "default.css". 65 | html_static_path = ['_static'] 66 | 67 | html_theme_options = { 68 | "icon_links": [ 69 | { 70 | "name": "GitHub", 71 | "url": "https://github.com/xorbitsai/xoscar", 72 | "icon": "fa-brands fa-github", 73 | "type": "fontawesome", 74 | }, 75 | ] 76 | } 77 | 78 | html_favicon = "_static/favicon.svg" 79 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | .DS_Store 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | pip-wheel-metadata/ 25 | share/python-wheels/ 26 | *.egg-info/ 27 | .installed.cfg 28 | *.egg 29 | MANIFEST 30 | 31 | # PyInstaller 32 | # Usually these files are written by a python script from a template 33 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 34 | *.manifest 35 | *.spec 36 | 37 | # Installer logs 38 | pip-log.txt 39 | pip-delete-this-directory.txt 40 | 41 | # Unit test / coverage reports 42 | htmlcov/ 43 | .tox/ 44 | .nox/ 45 | .coverage 46 | .coverage.* 47 | .cache 48 | nosetests.xml 49 | coverage.xml 50 | *.cover 51 | *.py,cover 52 | .hypothesis/ 53 | .pytest_cache/ 54 | 55 | # Translations 56 | *.mo 57 | *.pot 58 | 59 | # Django stuff: 60 | *.log 61 | local_settings.py 62 | db.sqlite3 63 | db.sqlite3-journal 64 | 65 | # Flask stuff: 66 | instance/ 67 | .webassets-cache 68 | 69 | # Scrapy stuff: 70 | .scrapy 71 | 72 | # Sphinx documentation 73 | generated/ 74 | 75 | # PyBuilder 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | .python-version 87 | 88 | # pipenv 89 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 90 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 91 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 92 | # install all needed dependencies. 93 | #Pipfile.lock 94 | 95 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 96 | __pypackages__/ 97 | 98 | # Celery stuff 99 | celerybeat-schedule 100 | celerybeat.pid 101 | 102 | # SageMath parsed files 103 | *.sage.py 104 | 105 | # Environments 106 | .env 107 | .venv 108 | env/ 109 | venv/ 110 | ENV/ 111 | env.bak/ 112 | venv.bak/ 113 | 114 | # Spyder project settings 115 | .spyderproject 116 | .spyproject 117 | 118 | # Rope project settings 119 | .ropeproject 120 | 121 | # mkdocs documentation 122 | /site 123 | 124 | # mypy 125 | .mypy_cache/ 126 | .dmypy.json 127 | dmypy.json 128 | 129 | # Pyre type checker 130 | .pyre/ 131 | 132 | .idea 133 | 134 | # cython compiled files 135 | python/xoscar/**/*.c* 136 | 137 | # cmake 138 | cmake-* 139 | CMakeFiles 140 | CMakeCache.txt 141 | *.cmake 142 | Makefile 143 | 144 | #config.h 145 | cpp/collective/gloo/include/config.h 146 | 147 | #filestore 148 | python/xoscar/collective/tests/collective 149 | 150 | #libuv 151 | python/xoscar/collective/uv.dll -------------------------------------------------------------------------------- /doc/source/getting_started/quickstart.rst: -------------------------------------------------------------------------------- 1 | .. _quickstart: 2 | 3 | ========== 4 | Quickstart 5 | ========== 6 | 7 | This concise introduction demonstrates how to estimate Pi in a parallel manner 8 | using the Monte Carlo method with Xoscar. 9 | 10 | We import :code:`xoscar` at the very beginning: 11 | 12 | .. code-block:: python 13 | 14 | import asyncio 15 | import xoscar as xo 16 | 17 | 18 | Create actor pools 19 | ------------------ 20 | 21 | To begin, we need to create actor pools, each of which will run within its own individual process. 22 | 23 | .. seealso:: 24 | :ref:`actor-pool` 25 | 26 | 27 | .. code-block:: python 28 | 29 | dop = 4 # degree of parallelism 30 | loop = asyncio.get_event_loop() 31 | loop.run_until_complete(xo.create_actor_pool(address="localhost:9999", n_process=dop)) 32 | 33 | 34 | After successfully creating the actor pools, we gather the address of each pool for following 35 | steps. 36 | 37 | .. code-block:: python 38 | 39 | pool_config = await xo.get_pool_config("localhost:9999") 40 | pool_addresses = pool_config.get_external_addresses() 41 | 42 | Define an actor 43 | --------------- 44 | 45 | Next, we define an actor that will perform the estimation. This actor includes a method called 46 | ``estimate`` that takes the total number of points as input and returns the number of points inside 47 | the circle. Since this actor doesn't have any internal state, it inherits from 48 | ``xo.StatelessActor`` to ensure lock-free execution. 49 | 50 | .. seealso:: 51 | :ref:`actor` 52 | 53 | .. code-block:: python 54 | 55 | class MyActor(xo.StatelessActor): 56 | def estimate(self, n): 57 | import random 58 | from math import sqrt 59 | 60 | inside = 0 61 | for _ in range(n): 62 | x = random.uniform(-1, 1) 63 | y = random.uniform(-1, 1) 64 | if sqrt(x ** 2 + y ** 2) < 1: 65 | inside += 1 66 | return inside 67 | 68 | Create actors 69 | ------------- 70 | 71 | Finally, we create an actor within each actor pool. 72 | 73 | .. code-block:: python 74 | 75 | actors = [] 76 | for i, address in enumerate(pool_addresses): 77 | actor = await xo.create_actor( 78 | MyActor, 79 | address=address, 80 | uid=str(i), 81 | ) 82 | actors.append(actor) 83 | 84 | Compute Pi 85 | ---------- 86 | 87 | Finally, we invoke the ``estimate`` method on each actor, leveraging the parallelism provided by Xoscar for efficient computation and distribution of the estimation task, and finally gather their individual outputs to calculate the value of Pi. 88 | 89 | .. code-block:: python 90 | 91 | N = 10 ** 7 92 | tasks = [] 93 | for actor in actors: 94 | tasks.append(actor.estimate(N)) 95 | 96 | inside = sum(await asyncio.gather(*tasks)) 97 | pi = 4 * inside / (len(actors) * N) 98 | print('pi: %.5f' % pi) 99 | -------------------------------------------------------------------------------- /python/xoscar/tests/test_actorcaller.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2023 XProbe Inc. 2 | # derived from copyright 1999-2021 Alibaba Group Holding Ltd. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import asyncio 17 | from unittest import mock 18 | 19 | import pytest 20 | 21 | from ..backends.communication import ChannelType 22 | from ..backends.core import ActorCaller 23 | from ..backends.router import Router 24 | from ..errors import ServerClosed 25 | 26 | 27 | @pytest.mark.asyncio 28 | @mock.patch.object(Router, "get_client") 29 | async def test_send_when_close(fake_get_client): 30 | class FakeClient: 31 | def __init__(self): 32 | self.closed = False 33 | self.send_num = 0 34 | self._messages = asyncio.Queue() 35 | self.dest_address = "test" 36 | self.channel_type = ChannelType.local 37 | 38 | async def send(self, message): 39 | await self._messages.put(message) 40 | self.send_num += 1 41 | if self.send_num >= 3: 42 | raise ConnectionError("test") 43 | 44 | async def recv(self, *args, **kwargs): 45 | await asyncio.sleep(3) 46 | res = await self._messages.get() 47 | return res 48 | 49 | async def close(self): 50 | self.closed = True 51 | 52 | fake_client = FakeClient() 53 | fake_get_client.side_effect = lambda *args, **kwargs: fake_client 54 | 55 | class FakeMessage: 56 | def __init__(self, id_num): 57 | self.message_id = id_num 58 | 59 | caller = ActorCaller() 60 | 61 | router = Router( 62 | external_addresses=["test1"], 63 | local_address="test2", 64 | ) 65 | futures = [] 66 | for index in range(2): 67 | futures.append( 68 | await caller.call( 69 | router=router, 70 | dest_address="test1", 71 | message=FakeMessage(index), 72 | wait=False, 73 | ) 74 | ) 75 | 76 | with pytest.raises(ServerClosed): 77 | # Just wait _list run. 78 | await asyncio.sleep(1) 79 | await caller.call( 80 | router=router, dest_address="test1", message=FakeMessage(2), wait=False 81 | ) 82 | 83 | res0 = await futures[0] 84 | assert res0.message_id == 0 85 | 86 | with pytest.raises(ServerClosed): 87 | await futures[1] 88 | -------------------------------------------------------------------------------- /python/xoscar/backends/test/tests/test_message.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2023 XProbe Inc. 2 | # derived from copyright 1999-2021 Alibaba Group Holding Ltd. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import cloudpickle as pickle 17 | 18 | from ...message import ErrorMessage 19 | 20 | 21 | def test_as_instanceof_cause(): 22 | fake_address = "Fake address" 23 | fake_pid = 123 24 | value = 3 25 | 26 | class CustomException(Exception): 27 | def __init__(self, i): 28 | self.i = i 29 | 30 | def __str__(self): 31 | return "Custom Exception." 32 | 33 | try: 34 | raise CustomException(value) 35 | except Exception as e: 36 | em = ErrorMessage( 37 | b"Fake message id", fake_address, fake_pid, type(e), e, e.__traceback__ 38 | ) 39 | assert "Fake message id" in repr(em) 40 | try: 41 | cause = em.as_instanceof_cause() 42 | # Test serialization. 43 | cause1 = pickle.loads(pickle.dumps(cause)) 44 | assert type(cause) is type(cause1) 45 | raise cause 46 | except Exception as e1: 47 | e1 = pickle.loads(pickle.dumps(e1)) 48 | # Check cause exception. 49 | assert isinstance(e1, CustomException) 50 | assert e1.i == value 51 | assert e1.address == fake_address 52 | assert e1.pid == fake_pid 53 | assert fake_address in str(e1) 54 | assert "Custom Exception" in str(e1) 55 | assert str(fake_pid) in str(e1) 56 | em1 = ErrorMessage( 57 | b"Fake message id", 58 | fake_address, 59 | fake_pid, 60 | type(e1), 61 | e1, 62 | e1.__traceback__, 63 | ) 64 | try: 65 | raise em1.as_instanceof_cause() 66 | except Exception as e2: 67 | e2 = pickle.loads(pickle.dumps(e2)) 68 | # Check recursive cause exception. 69 | assert isinstance(e2, CustomException) 70 | assert e2.i == value 71 | assert e2.address == fake_address 72 | assert e2.pid == fake_pid 73 | assert str(e2).count("Custom Exception") == 1 74 | assert str(e2).count(fake_address) == 1 75 | assert str(e2).count(str(fake_pid)) == 1 76 | -------------------------------------------------------------------------------- /cpp/collective/gloo/src/recv.cc: -------------------------------------------------------------------------------- 1 | /* Copyright 2022-2023 XProbe Inc. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. */ 14 | 15 | #include 16 | #include 17 | 18 | namespace xoscar { 19 | 20 | template 21 | void recv(const std::shared_ptr &context, 22 | intptr_t recvbuf, 23 | size_t size, 24 | int peer, 25 | uint32_t tag) { 26 | if (context->rank == peer) 27 | throw std::runtime_error( 28 | "peer equals to current rank. Please specify other peer values."); 29 | 30 | auto outputBuffer = context->createUnboundBuffer( 31 | reinterpret_cast(recvbuf), size * sizeof(T)); 32 | 33 | constexpr uint8_t kSendRecvSlotPrefix = 0x09; 34 | gloo::Slot slot = gloo::Slot::build(kSendRecvSlotPrefix, tag); 35 | 36 | outputBuffer->recv(peer, slot); 37 | outputBuffer->waitRecv(context->getTimeout()); 38 | } 39 | 40 | void recv_wrapper(const std::shared_ptr &context, 41 | intptr_t recvbuf, 42 | size_t size, 43 | glooDataType_t datatype, 44 | int peer, 45 | uint32_t tag) { 46 | switch (datatype) { 47 | case glooDataType_t::glooInt8: 48 | recv(context, recvbuf, size, peer, tag); 49 | break; 50 | case glooDataType_t::glooUint8: 51 | recv(context, recvbuf, size, peer, tag); 52 | break; 53 | case glooDataType_t::glooInt32: 54 | recv(context, recvbuf, size, peer, tag); 55 | break; 56 | case glooDataType_t::glooUint32: 57 | recv(context, recvbuf, size, peer, tag); 58 | break; 59 | case glooDataType_t::glooInt64: 60 | recv(context, recvbuf, size, peer, tag); 61 | break; 62 | case glooDataType_t::glooUint64: 63 | recv(context, recvbuf, size, peer, tag); 64 | break; 65 | case glooDataType_t::glooFloat16: 66 | recv(context, recvbuf, size, peer, tag); 67 | break; 68 | case glooDataType_t::glooFloat32: 69 | recv(context, recvbuf, size, peer, tag); 70 | break; 71 | case glooDataType_t::glooFloat64: 72 | recv(context, recvbuf, size, peer, tag); 73 | break; 74 | default: 75 | throw std::runtime_error("Unhandled dataType"); 76 | } 77 | } 78 | } // namespace xoscar 79 | -------------------------------------------------------------------------------- /cpp/collective/gloo/src/send.cc: -------------------------------------------------------------------------------- 1 | /* Copyright 2022-2023 XProbe Inc. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. */ 14 | 15 | #include 16 | #include 17 | #include 18 | namespace xoscar { 19 | 20 | template 21 | void send(const std::shared_ptr &context, 22 | intptr_t sendbuf, 23 | size_t size, 24 | int peer, 25 | uint32_t tag) { 26 | if (context->rank == peer) 27 | throw std::runtime_error( 28 | "peer equals to current rank. Please specify other peer values."); 29 | 30 | auto inputBuffer = context->createUnboundBuffer( 31 | reinterpret_cast(sendbuf), size * sizeof(T)); 32 | 33 | constexpr uint8_t kSendRecvSlotPrefix = 0x09; 34 | gloo::Slot slot = gloo::Slot::build(kSendRecvSlotPrefix, tag); 35 | 36 | inputBuffer->send(peer, slot); 37 | inputBuffer->waitSend(context->getTimeout()); 38 | } 39 | 40 | void send_wrapper(const std::shared_ptr &context, 41 | intptr_t sendbuf, 42 | size_t size, 43 | glooDataType_t datatype, 44 | int peer, 45 | uint32_t tag) { 46 | switch (datatype) { 47 | case glooDataType_t::glooInt8: 48 | send(context, sendbuf, size, peer, tag); 49 | break; 50 | case glooDataType_t::glooUint8: 51 | send(context, sendbuf, size, peer, tag); 52 | break; 53 | case glooDataType_t::glooInt32: 54 | send(context, sendbuf, size, peer, tag); 55 | break; 56 | case glooDataType_t::glooUint32: 57 | send(context, sendbuf, size, peer, tag); 58 | break; 59 | case glooDataType_t::glooInt64: 60 | send(context, sendbuf, size, peer, tag); 61 | break; 62 | case glooDataType_t::glooUint64: 63 | send(context, sendbuf, size, peer, tag); 64 | break; 65 | case glooDataType_t::glooFloat16: 66 | send(context, sendbuf, size, peer, tag); 67 | break; 68 | case glooDataType_t::glooFloat32: 69 | send(context, sendbuf, size, peer, tag); 70 | break; 71 | case glooDataType_t::glooFloat64: 72 | send(context, sendbuf, size, peer, tag); 73 | break; 74 | default: 75 | throw std::runtime_error("Unhandled dataType"); 76 | } 77 | } 78 | } // namespace xoscar 79 | -------------------------------------------------------------------------------- /cpp/collective/gloo/src/all_to_all.cc: -------------------------------------------------------------------------------- 1 | /* Copyright 2022-2023 XProbe Inc. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. */ 14 | 15 | #include 16 | #include 17 | #include 18 | #include 19 | 20 | namespace xoscar { 21 | 22 | template 23 | void all_to_all(const std::shared_ptr &context, 24 | intptr_t sendbuf, 25 | intptr_t recvbuf, 26 | size_t size, 27 | uint32_t tag) { 28 | T *input_ptr = reinterpret_cast(sendbuf); 29 | T *output_ptr = reinterpret_cast(recvbuf); 30 | 31 | // Configure AlltoallOptions struct and call alltoall function 32 | gloo::AlltoallOptions opts_(context); 33 | opts_.setInput(input_ptr, size); 34 | opts_.setOutput(output_ptr, size); 35 | opts_.setTag(tag); 36 | 37 | gloo::alltoall(opts_); 38 | } 39 | 40 | void all_to_all_wrapper(const std::shared_ptr &context, 41 | intptr_t sendbuf, 42 | intptr_t recvbuf, 43 | size_t size, 44 | glooDataType_t datatype, 45 | uint32_t tag) { 46 | switch (datatype) { 47 | case glooDataType_t::glooInt8: 48 | all_to_all(context, sendbuf, recvbuf, size, tag); 49 | break; 50 | case glooDataType_t::glooUint8: 51 | all_to_all(context, sendbuf, recvbuf, size, tag); 52 | break; 53 | case glooDataType_t::glooInt32: 54 | all_to_all(context, sendbuf, recvbuf, size, tag); 55 | break; 56 | case glooDataType_t::glooUint32: 57 | all_to_all(context, sendbuf, recvbuf, size, tag); 58 | break; 59 | case glooDataType_t::glooInt64: 60 | all_to_all(context, sendbuf, recvbuf, size, tag); 61 | break; 62 | case glooDataType_t::glooUint64: 63 | all_to_all(context, sendbuf, recvbuf, size, tag); 64 | break; 65 | case glooDataType_t::glooFloat16: 66 | all_to_all(context, sendbuf, recvbuf, size, tag); 67 | break; 68 | case glooDataType_t::glooFloat32: 69 | all_to_all(context, sendbuf, recvbuf, size, tag); 70 | break; 71 | case glooDataType_t::glooFloat64: 72 | all_to_all(context, sendbuf, recvbuf, size, tag); 73 | break; 74 | default: 75 | throw std::runtime_error("Unhandled dataType"); 76 | } 77 | } 78 | 79 | } // namespace xoscar 80 | -------------------------------------------------------------------------------- /cpp/collective/rendezvous/include/socket.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2022-2023 XProbe Inc. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. */ 14 | 15 | #pragma once 16 | 17 | #include "exception.h" 18 | 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | #define XOSCAR_WARNING(...) 27 | 28 | namespace xoscar { 29 | namespace detail { 30 | 31 | class SocketOptions { 32 | public: 33 | SocketOptions &prefer_ipv6(bool value) noexcept { 34 | prefer_ipv6_ = value; 35 | 36 | return *this; 37 | } 38 | 39 | bool prefer_ipv6() const noexcept { return prefer_ipv6_; } 40 | 41 | SocketOptions &connect_timeout(std::chrono::seconds value) noexcept { 42 | connect_timeout_ = value; 43 | 44 | return *this; 45 | } 46 | 47 | std::chrono::seconds connect_timeout() const noexcept { 48 | return connect_timeout_; 49 | } 50 | 51 | private: 52 | bool prefer_ipv6_ = true; 53 | std::chrono::seconds connect_timeout_{30}; 54 | }; 55 | 56 | class SocketImpl; 57 | 58 | class Socket { 59 | public: 60 | // This function initializes the underlying socket library and must be 61 | // called before any other socket function. 62 | static void initialize(); 63 | 64 | static Socket listen(std::uint16_t port, const SocketOptions &opts = {}); 65 | 66 | static Socket connect(const std::string &host, 67 | std::uint16_t port, 68 | const SocketOptions &opts = {}); 69 | 70 | Socket() noexcept = default; 71 | 72 | Socket(const Socket &other) = delete; 73 | 74 | Socket &operator=(const Socket &other) = delete; 75 | 76 | Socket(Socket &&other) noexcept; 77 | 78 | Socket &operator=(Socket &&other) noexcept; 79 | 80 | ~Socket(); 81 | 82 | Socket accept() const; 83 | 84 | int handle() const noexcept; 85 | 86 | std::uint16_t port() const; 87 | 88 | private: 89 | explicit Socket(std::unique_ptr &&impl) noexcept; 90 | 91 | std::unique_ptr impl_; 92 | }; 93 | 94 | } // namespace detail 95 | 96 | class SocketError : public XoscarError { 97 | public: 98 | using XoscarError::XoscarError; 99 | 100 | SocketError(const SocketError &) = default; 101 | 102 | SocketError &operator=(const SocketError &) = default; 103 | 104 | SocketError(SocketError &&) = default; 105 | 106 | SocketError &operator=(SocketError &&) = default; 107 | 108 | ~SocketError() override; 109 | }; 110 | 111 | } // namespace xoscar 112 | -------------------------------------------------------------------------------- /cpp/collective/gloo/src/gather.cc: -------------------------------------------------------------------------------- 1 | /* Copyright 2022-2023 XProbe Inc. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. */ 14 | 15 | #include 16 | #include 17 | 18 | namespace xoscar { 19 | 20 | template 21 | void gather(const std::shared_ptr &context, 22 | intptr_t sendbuf, 23 | intptr_t recvbuf, 24 | size_t size, 25 | int root, 26 | uint32_t tag) { 27 | // Configure GatherOptions struct 28 | gloo::GatherOptions opts_(context); 29 | 30 | T *input_ptr = reinterpret_cast(sendbuf); 31 | opts_.setInput(input_ptr, size); 32 | 33 | if (root == context->rank) { 34 | T *output_ptr = reinterpret_cast(recvbuf); 35 | opts_.setOutput(output_ptr, context->size * size); 36 | } 37 | opts_.setRoot(root); 38 | opts_.setTag(tag); 39 | 40 | gloo::gather(opts_); 41 | } 42 | 43 | void gather_wrapper(const std::shared_ptr &context, 44 | intptr_t sendbuf, 45 | intptr_t recvbuf, 46 | size_t size, 47 | glooDataType_t datatype, 48 | int root, 49 | uint32_t tag) { 50 | switch (datatype) { 51 | case glooDataType_t::glooInt8: 52 | gather(context, sendbuf, recvbuf, size, root, tag); 53 | break; 54 | case glooDataType_t::glooUint8: 55 | gather(context, sendbuf, recvbuf, size, root, tag); 56 | break; 57 | case glooDataType_t::glooInt32: 58 | gather(context, sendbuf, recvbuf, size, root, tag); 59 | break; 60 | case glooDataType_t::glooUint32: 61 | gather(context, sendbuf, recvbuf, size, root, tag); 62 | break; 63 | case glooDataType_t::glooInt64: 64 | gather(context, sendbuf, recvbuf, size, root, tag); 65 | break; 66 | case glooDataType_t::glooUint64: 67 | gather(context, sendbuf, recvbuf, size, root, tag); 68 | break; 69 | case glooDataType_t::glooFloat16: 70 | gather(context, sendbuf, recvbuf, size, root, tag); 71 | break; 72 | case glooDataType_t::glooFloat32: 73 | gather(context, sendbuf, recvbuf, size, root, tag); 74 | break; 75 | case glooDataType_t::glooFloat64: 76 | gather(context, sendbuf, recvbuf, size, root, tag); 77 | break; 78 | default: 79 | throw std::runtime_error("Unhandled dataType"); 80 | } 81 | } 82 | } // namespace xoscar 83 | -------------------------------------------------------------------------------- /python/xoscar/backends/indigen/tests/test_allocate_strategy.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2023 XProbe Inc. 2 | # derived from copyright 1999-2021 Alibaba Group Holding Ltd. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import pytest 17 | 18 | from .... import create_actor_ref 19 | from ....errors import NoIdleSlot 20 | from ...allocate_strategy import ( 21 | AddressSpecified, 22 | IdleLabel, 23 | MainPool, 24 | Random, 25 | RandomLabel, 26 | RandomSubPool, 27 | ) 28 | from ...config import ActorPoolConfig 29 | 30 | config = ActorPoolConfig() 31 | config.add_pool_conf(0, "main", "unixsocket:///0", "127.0.0.1:1111") 32 | config.add_pool_conf(1, "test", "unixsocket:///1", "127.0.0.1:1112") 33 | config.add_pool_conf(2, "test2", "unixsocket:///2", "127.0.0.1:1113") 34 | config.add_pool_conf(3, "test", "unixsocket:///3", "127.0.0.1:1114") 35 | 36 | 37 | def test_address_specified(): 38 | addr = "127.0.0.1:1112" 39 | strategy = AddressSpecified(addr) 40 | assert strategy.get_allocated_address(config, dict()) == addr 41 | 42 | 43 | def test_main_pool(): 44 | strategy = MainPool() 45 | assert strategy.get_allocated_address(config, dict()) == "127.0.0.1:1111" 46 | 47 | 48 | def test_random(): 49 | strategy = Random() 50 | addresses = config.get_external_addresses() 51 | assert strategy.get_allocated_address(config, dict()) in addresses 52 | 53 | 54 | def test_random_sub_pool(): 55 | strategy = RandomSubPool() 56 | addresses = config.get_external_addresses()[1:] 57 | assert strategy.get_allocated_address(config, dict()) in addresses 58 | 59 | 60 | def test_random_label(): 61 | strategy = RandomLabel("test") 62 | addresses = config.get_external_addresses(label="test") 63 | assert len(addresses) == 2 64 | assert strategy.get_allocated_address(config, dict()) in addresses 65 | 66 | 67 | def test_idle_label(): 68 | strategy = IdleLabel("test", "my_mark") 69 | addresses = config.get_external_addresses(label="test") 70 | assert len(addresses) == 2 71 | allocated = { 72 | addresses[0]: {create_actor_ref(addresses[0], b"id1"): (strategy, None)} 73 | } 74 | assert strategy.get_allocated_address(config, allocated) == addresses[1] 75 | 76 | strategy2 = IdleLabel("test", "my_mark") 77 | allocated = { 78 | addresses[0]: { 79 | create_actor_ref(addresses[0], b"id1"): (strategy, None), 80 | create_actor_ref(addresses[0], b"id2"): (RandomLabel("test"), None), 81 | }, 82 | addresses[1]: {create_actor_ref(addresses[1], b"id3"): (strategy2, None)}, 83 | } 84 | with pytest.raises(NoIdleSlot): 85 | strategy2.get_allocated_address(config, allocated) 86 | -------------------------------------------------------------------------------- /doc/source/user_guide/actor.rst: -------------------------------------------------------------------------------- 1 | .. _actor: 2 | 3 | ===== 4 | Actor 5 | ===== 6 | 7 | Actors are self-contained computational entities that represent individual units of computation 8 | within the framework. They encapsulate both state and behavior, and communicate through message 9 | passing. 10 | 11 | Xoscar supports both stateful and stateless actors. Stateful actors ensure thread safety for 12 | concurrent systems while stateless actors can handle massive volumes of concurrent messages. 13 | 14 | .. seealso:: 15 | :ref:`ref_actor` 16 | 17 | 18 | Define an actor 19 | --------------- 20 | 21 | To define a stateful actor, your actor should inherit from the base class ``xoscar.Actor``. For 22 | stateless actors, the inheritance should be from ``xoscar.StatelessActor``. 23 | 24 | Two special methods are available for customization. The first method is invoked before the actor 25 | is created, allowing you to set up any necessary initialization logic. The second method is called 26 | after the actor is destroyed, providing an opportunity for cleanup or finalization tasks. 27 | 28 | .. code-block:: python 29 | 30 | import xoscar as xo 31 | 32 | # a stateful actor. 33 | # to define a stateless actor, inherit from xo.StatelessActor. 34 | class MyActor(xo.Actor): 35 | def __init__(self, *args, **kwargs): 36 | pass 37 | async def __post_create__(self): 38 | # called after created 39 | pass 40 | async def __pre_destroy__(self): 41 | # called before destroy 42 | pass 43 | def method_a(self, arg_1, arg_2, **kw_1): # user-defined function 44 | pass 45 | async def method_b(self, arg_1, arg_2, **kw_1): # user-defined async function 46 | pass 47 | 48 | 49 | Create an actor 50 | --------------- 51 | 52 | To create an actor, you need to provide the address of the actor pool where you want the actor to 53 | reside, along with a unique ID for the actor. Additionally, you need to provide any required 54 | positional and keyword arguments during the actor's initialization. 55 | 56 | .. code-block:: python 57 | 58 | actor_ref = await xo.create_actor( 59 | MyActor, 1, 2, a=1, b=2, 60 | address=':', uid='UniqueActorName' 61 | ) 62 | 63 | Create a actor reference 64 | ------------------------ 65 | 66 | To create a reference to a specific actor, you need to provide both the ID of the actor and the 67 | address of the actor pool in which the actor is located. 68 | 69 | .. code-block:: python 70 | 71 | actor_ref = await xo.actor_ref(address, actor_id) 72 | 73 | Check the existence of an actor 74 | ------------------------------- 75 | 76 | To check the existence of an actor, you need to provide a reference to the actor. 77 | 78 | .. code-block:: python 79 | 80 | await xo.has_actor(actor_ref) 81 | 82 | Invoke an actor's method 83 | ------------------------ 84 | 85 | You can invoke an actor's method by its reference. 86 | 87 | .. code-block:: python 88 | 89 | await actor_ref.method_a(1, 2, a=1, b=2) 90 | 91 | Destroy an actor 92 | ---------------- 93 | 94 | You can destroy an actor and release corresponding resources by its reference. 95 | 96 | .. code-block:: python 97 | 98 | await xo.destroy_actor(actor_ref) 99 | -------------------------------------------------------------------------------- /python/xoscar/serialization/numpy.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2023 XProbe Inc. 2 | # derived from copyright 1999-2021 Alibaba Group Holding Ltd. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from typing import Any, Dict, List, Tuple 17 | 18 | import numpy as np 19 | 20 | from .core import Serializer, buffered, pickle_buffers, unpickle_buffers 21 | 22 | 23 | class NDArraySerializer(Serializer): 24 | @buffered 25 | def serial(self, obj: np.ndarray, context: Dict): 26 | header: dict = {} 27 | if obj.dtype.hasobject: 28 | header["pickle"] = True 29 | buffers = pickle_buffers(obj) 30 | return (header,), buffers, True 31 | 32 | order = "C" 33 | if obj.flags.f_contiguous: 34 | order = "F" 35 | elif not obj.flags.c_contiguous: 36 | obj = np.ascontiguousarray(obj) 37 | try: 38 | desc = np.lib.format.dtype_to_descr(obj.dtype) 39 | dtype_new_order = None 40 | except ValueError: 41 | # for structured dtype, array[[field2, field1]] will create a view, 42 | # and dtype_to_desc will fail due to the order 43 | fields = obj.dtype.fields 44 | new_fields = sorted(fields, key=lambda k: fields[k][1]) 45 | desc = np.lib.format.dtype_to_descr(obj.dtype[new_fields]) 46 | dtype_new_order = list(fields) 47 | header.update( 48 | dict( 49 | pickle=False, 50 | descr=desc, 51 | dtype_new_order=dtype_new_order, 52 | shape=list(obj.shape), 53 | strides=list(obj.strides), 54 | order=order, 55 | ) 56 | ) 57 | return (header,), [memoryview(obj.ravel(order=order).view("uint8").data)], True # type: ignore 58 | 59 | def deserial(self, serialized: Tuple, context: Dict, subs: List[Any]): 60 | header = serialized[0] 61 | if header["pickle"]: 62 | return unpickle_buffers(subs) 63 | 64 | try: 65 | dtype = np.lib.format.descr_to_dtype(header["descr"]) 66 | except AttributeError: # pragma: no cover 67 | # for older numpy versions, descr_to_dtype is not implemented 68 | dtype = np.dtype(header["descr"]) 69 | 70 | dtype_new_order = header["dtype_new_order"] 71 | if dtype_new_order: 72 | dtype = dtype[dtype_new_order] 73 | return np.ndarray( 74 | shape=tuple(header["shape"]), 75 | dtype=dtype, 76 | buffer=subs[0], 77 | strides=tuple(header["strides"]), 78 | order=header["order"], 79 | ) 80 | 81 | 82 | NDArraySerializer.register(np.ndarray) 83 | -------------------------------------------------------------------------------- /cpp/collective/gloo/src/scatter.cc: -------------------------------------------------------------------------------- 1 | /* Copyright 2022-2023 XProbe Inc. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. */ 14 | 15 | #include 16 | #include 17 | 18 | namespace xoscar { 19 | 20 | template 21 | void scatter(const std::shared_ptr &context, 22 | std::vector sendbuf, 23 | intptr_t recvbuf, 24 | size_t size, 25 | int root, 26 | uint32_t tag) { 27 | std::vector input_ptr; 28 | for (size_t i = 0; i < sendbuf.size(); ++i) 29 | input_ptr.emplace_back(reinterpret_cast(sendbuf[i])); 30 | 31 | T *output_ptr = reinterpret_cast(recvbuf); 32 | 33 | // Configure ScatterOptions struct 34 | gloo::ScatterOptions opts_(context); 35 | opts_.setInputs(input_ptr, size); 36 | opts_.setOutput(output_ptr, size); 37 | opts_.setTag(tag); 38 | opts_.setRoot(root); 39 | 40 | gloo::scatter(opts_); 41 | } 42 | 43 | void scatter_wrapper(const std::shared_ptr &context, 44 | std::vector sendbuf, 45 | intptr_t recvbuf, 46 | size_t size, 47 | glooDataType_t datatype, 48 | int root, 49 | uint32_t tag) { 50 | switch (datatype) { 51 | case glooDataType_t::glooInt8: 52 | scatter(context, sendbuf, recvbuf, size, root, tag); 53 | break; 54 | case glooDataType_t::glooUint8: 55 | scatter(context, sendbuf, recvbuf, size, root, tag); 56 | break; 57 | case glooDataType_t::glooInt32: 58 | scatter(context, sendbuf, recvbuf, size, root, tag); 59 | break; 60 | case glooDataType_t::glooUint32: 61 | scatter(context, sendbuf, recvbuf, size, root, tag); 62 | break; 63 | case glooDataType_t::glooInt64: 64 | scatter(context, sendbuf, recvbuf, size, root, tag); 65 | break; 66 | case glooDataType_t::glooUint64: 67 | scatter(context, sendbuf, recvbuf, size, root, tag); 68 | break; 69 | case glooDataType_t::glooFloat16: 70 | scatter(context, sendbuf, recvbuf, size, root, tag); 71 | break; 72 | case glooDataType_t::glooFloat32: 73 | scatter(context, sendbuf, recvbuf, size, root, tag); 74 | break; 75 | case glooDataType_t::glooFloat64: 76 | scatter(context, sendbuf, recvbuf, size, root, tag); 77 | break; 78 | default: 79 | throw std::runtime_error("Unhandled dataType"); 80 | } 81 | } 82 | } // namespace xoscar 83 | -------------------------------------------------------------------------------- /cpp/collective/gloo/src/broadcast.cc: -------------------------------------------------------------------------------- 1 | /* Copyright 2022-2023 XProbe Inc. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. */ 14 | 15 | #include 16 | #include 17 | #include 18 | 19 | namespace xoscar { 20 | 21 | template 22 | void broadcast(const std::shared_ptr &context, 23 | intptr_t sendbuf, 24 | intptr_t recvbuf, 25 | size_t size, 26 | int root, 27 | uint32_t tag) { 28 | // Configure BroadcastOptions struct and call broadcast function 29 | gloo::BroadcastOptions opts_(context); 30 | 31 | if (context->rank == root) { 32 | T *input_ptr = reinterpret_cast(sendbuf); 33 | opts_.setInput(input_ptr, size); 34 | } 35 | T *output_ptr = reinterpret_cast(recvbuf); 36 | opts_.setOutput(output_ptr, size); 37 | 38 | opts_.setRoot(root); 39 | opts_.setTag(tag); 40 | 41 | gloo::broadcast(opts_); 42 | } 43 | 44 | void broadcast_wrapper(const std::shared_ptr &context, 45 | intptr_t sendbuf, 46 | intptr_t recvbuf, 47 | size_t size, 48 | glooDataType_t datatype, 49 | int root, 50 | uint32_t tag) { 51 | switch (datatype) { 52 | case glooDataType_t::glooInt8: 53 | broadcast(context, sendbuf, recvbuf, size, root, tag); 54 | break; 55 | case glooDataType_t::glooUint8: 56 | broadcast(context, sendbuf, recvbuf, size, root, tag); 57 | break; 58 | case glooDataType_t::glooInt32: 59 | broadcast(context, sendbuf, recvbuf, size, root, tag); 60 | break; 61 | case glooDataType_t::glooUint32: 62 | broadcast(context, sendbuf, recvbuf, size, root, tag); 63 | break; 64 | case glooDataType_t::glooInt64: 65 | broadcast(context, sendbuf, recvbuf, size, root, tag); 66 | break; 67 | case glooDataType_t::glooUint64: 68 | broadcast(context, sendbuf, recvbuf, size, root, tag); 69 | break; 70 | case glooDataType_t::glooFloat16: 71 | broadcast( 72 | context, sendbuf, recvbuf, size, root, tag); 73 | break; 74 | case glooDataType_t::glooFloat32: 75 | broadcast(context, sendbuf, recvbuf, size, root, tag); 76 | break; 77 | case glooDataType_t::glooFloat64: 78 | broadcast(context, sendbuf, recvbuf, size, root, tag); 79 | break; 80 | default: 81 | throw std::runtime_error("Unhandled dataType"); 82 | } 83 | } 84 | } // namespace xoscar 85 | -------------------------------------------------------------------------------- /python/xoscar/virtualenv/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2025 XProbe Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import logging 16 | import re 17 | import subprocess 18 | import sys 19 | import threading 20 | from contextlib import contextmanager 21 | from typing import BinaryIO, Callable, Iterator, List, Optional, TextIO, Union 22 | 23 | logger = logging.getLogger(__name__) 24 | 25 | ansi_escape = re.compile(r"\x1B\[[0-?]*[ -/]*[@-~]") 26 | 27 | 28 | def clean_ansi(text: str) -> str: 29 | """Remove ANSI escape sequences from text.""" 30 | return ansi_escape.sub("", text) 31 | 32 | 33 | def stream_reader( 34 | stream: BinaryIO, log_func: Callable[[str], None], output_stream: TextIO 35 | ) -> None: 36 | """ 37 | Read from the stream, write to logger, and also write to the terminal. 38 | """ 39 | for line in iter(stream.readline, b""): 40 | decoded = line.decode(errors="replace") 41 | output_stream.write(decoded) 42 | output_stream.flush() 43 | log_func(clean_ansi(decoded.rstrip("\n"))) 44 | 45 | 46 | @contextmanager 47 | def run_subprocess_with_logger( 48 | cmd: Union[str, List[str]], cwd: Optional[str] = None, env: Optional[dict] = None 49 | ) -> Iterator[subprocess.Popen]: 50 | """ 51 | Run a subprocess, redirect stdout to logger.info and stderr to logger.error. 52 | Returns the Popen object as a context manager. 53 | 54 | :param cmd: Command to execute 55 | :param kwargs: Additional arguments passed to subprocess.Popen 56 | :yield: The subprocess.Popen object 57 | """ 58 | 59 | process = subprocess.Popen( 60 | cmd, 61 | stdout=subprocess.PIPE, 62 | stderr=subprocess.PIPE, 63 | cwd=cwd, 64 | env=env, 65 | bufsize=1, 66 | ) 67 | 68 | threads = [ 69 | threading.Thread( 70 | target=stream_reader, args=(process.stdout, logger.info, sys.stdout) 71 | ), 72 | threading.Thread( 73 | target=stream_reader, args=(process.stderr, logger.error, sys.stderr) 74 | ), 75 | ] 76 | for t in threads: 77 | t.start() 78 | 79 | try: 80 | yield process 81 | finally: 82 | process.wait() 83 | for t in threads: 84 | t.join() 85 | 86 | 87 | def is_vcs_url(spec_str: str) -> bool: 88 | """ 89 | Check if the given spec string is a VCS URL. 90 | 91 | Supports common VCS schemes like git+, svn+, hg+, bzr+, and HTTP/HTTPS URLs. 92 | 93 | Args: 94 | spec_str (str): The package spec string. 95 | 96 | Returns: 97 | bool: True if it's a VCS URL, False otherwise. 98 | """ 99 | vcs_prefixes = ("git+", "http://", "https://", "svn+", "hg+", "bzr+") 100 | return spec_str.startswith(vcs_prefixes) 101 | -------------------------------------------------------------------------------- /python/xoscar/metrics/backends/tests/test_metric.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2023 XProbe Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import pytest 16 | 17 | from ..metric import ( 18 | AbstractCounter, 19 | AbstractGauge, 20 | AbstractHistogram, 21 | AbstractMeter, 22 | AbstractMetric, 23 | ) 24 | 25 | 26 | def test_illegal_arguments(): 27 | class DummyMetric(AbstractMetric): 28 | pass 29 | 30 | DummyMetric.__abstractmethods__ = set() 31 | with pytest.raises(AssertionError): 32 | DummyMetric(1) 33 | 34 | with pytest.raises(AssertionError): 35 | DummyMetric("dummy_metric", 1) 36 | 37 | with pytest.raises(AssertionError): 38 | DummyMetric("dummy_metric", "A test metric", "service") 39 | 40 | with pytest.raises(AssertionError): 41 | DummyMetric("dummy_metric", "A test metric", ("service", 1)) 42 | 43 | 44 | def test_dummy_metric(): 45 | class DummyMetric(AbstractMetric): 46 | pass 47 | 48 | DummyMetric.__abstractmethods__ = set() 49 | m = DummyMetric("dummy_metric", "A test metric", ("service", "tenant")) 50 | assert isinstance(m, AbstractMetric) 51 | assert m.name == "dummy_metric" 52 | assert m.description == "A test metric" 53 | assert m.tag_keys == ("service", "tenant") 54 | assert m.type is None 55 | assert m._init() is None 56 | assert m.record() is None 57 | assert m._record() is None 58 | 59 | 60 | def test_counter(): 61 | class DummyCounter(AbstractCounter): 62 | pass 63 | 64 | DummyCounter.__abstractmethods__ = set() 65 | c = DummyCounter("test_counter", "A test counter", ("service", "tenant")) 66 | assert c.name == "test_counter" 67 | assert c.description == "A test counter" 68 | assert c.tag_keys == ("service", "tenant") 69 | assert c.type == "Counter" 70 | assert c.record(1, {"service": "indigen", "tenant": "test"}) is None 71 | 72 | 73 | def test_gauge(): 74 | class DummyGauge(AbstractGauge): 75 | pass 76 | 77 | DummyGauge.__abstractmethods__ = set() 78 | g = DummyGauge("test_gauge", "A test gauge") 79 | assert g.name == "test_gauge" 80 | assert g.description == "A test gauge" 81 | assert g.tag_keys == () 82 | assert g.type == "Gauge" 83 | assert g.record(1) is None 84 | 85 | 86 | def test_meter(): 87 | class DummyMeter(AbstractMeter): 88 | pass 89 | 90 | DummyMeter.__abstractmethods__ = set() 91 | m = DummyMeter("test_meter") 92 | assert m.name == "test_meter" 93 | assert m.description == "" 94 | assert m.tag_keys == () 95 | assert m.type == "Meter" 96 | assert m.record(1) is None 97 | 98 | 99 | def test_histogram(): 100 | class DummyHistogram(AbstractHistogram): 101 | pass 102 | 103 | DummyHistogram.__abstractmethods__ = set() 104 | h = DummyHistogram("test_histogram") 105 | assert h.name == "test_histogram" 106 | assert h.description == "" 107 | assert h.tag_keys == () 108 | assert h.type == "Histogram" 109 | assert h.record(1) is None 110 | -------------------------------------------------------------------------------- /python/xoscar/tests/core.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2023 XProbe Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import fnmatch 16 | import itertools 17 | 18 | import pytest 19 | 20 | from ..utils import is_linux, is_windows, lazy_import 21 | 22 | cupy = lazy_import("cupy") 23 | cudf = lazy_import("cudf") 24 | ucx = lazy_import("ucp") 25 | 26 | 27 | def require_cupy(func): 28 | if pytest: 29 | func = pytest.mark.cuda(func) 30 | func = pytest.mark.skipif(cupy is None, reason="cupy not installed")(func) 31 | return func 32 | 33 | 34 | def require_cudf(func): 35 | if pytest: 36 | func = pytest.mark.cuda(func) 37 | func = pytest.mark.skipif(cudf is None, reason="cudf not installed")(func) 38 | return func 39 | 40 | 41 | def require_ucx(func): 42 | if pytest: 43 | func = pytest.mark.ucx(func) 44 | func = pytest.mark.skipif(ucx is None, reason="ucx not installed")(func) 45 | return func 46 | 47 | 48 | def require_unix(func): 49 | if pytest: 50 | func = pytest.mark.unix(func) 51 | 52 | func = pytest.mark.skipif(is_windows(), reason="only unix is supported")(func) 53 | return func 54 | 55 | 56 | def require_linux(func): 57 | if pytest: 58 | func = pytest.mark.linux(func) 59 | 60 | func = pytest.mark.skipif(not is_linux(), reason="only linux is supported")(func) 61 | return func 62 | 63 | 64 | DICT_NOT_EMPTY = type("DICT_NOT_EMPTY", (object,), {}) # is check works for deepcopy 65 | 66 | 67 | def check_dict_structure_same(a, b, prefix=None): 68 | def _p(k): 69 | if prefix is None: 70 | return k 71 | return ".".join(str(i) for i in prefix + [k]) 72 | 73 | for ai, bi in itertools.zip_longest( 74 | a.items(), b.items(), fillvalue=("_KEY_NOT_EXISTS_", None) 75 | ): 76 | if ai[0] != bi[0]: 77 | if "*" in ai[0]: 78 | pattern, target = ai[0], bi[0] 79 | elif "*" in bi[0]: 80 | pattern, target = bi[0], ai[0] 81 | else: 82 | raise KeyError(f"Key {_p(ai[0])} != {_p(bi[0])}") 83 | if not fnmatch.fnmatch(target, pattern): 84 | raise KeyError(f"Key {_p(target)} not match {_p(pattern)}") 85 | 86 | if ai[1] is DICT_NOT_EMPTY: 87 | target = bi[1] 88 | elif bi[1] is DICT_NOT_EMPTY: 89 | target = ai[1] 90 | else: 91 | target = None 92 | if target is not None: 93 | if not isinstance(target, dict): 94 | raise TypeError(f"Value type of {_p(ai[0])} is not a dict.") 95 | if not target: 96 | raise TypeError(f"Value of {_p(ai[0])} empty.") 97 | continue 98 | 99 | if type(ai[1]) is not type(bi[1]): 100 | raise TypeError(f"Value type of {_p(ai[0])} mismatch {ai[1]} != {bi[1]}") 101 | if isinstance(ai[1], dict): 102 | check_dict_structure_same( 103 | ai[1], bi[1], [ai[0]] if prefix is None else prefix + [ai[0]] 104 | ) 105 | -------------------------------------------------------------------------------- /doc/source/index.rst: -------------------------------------------------------------------------------- 1 | .. _index: 2 | 3 | .. raw:: html 4 | 5 | Xoscar Logo 6 | 7 | ==== 8 | 9 | 10 | Xoscar: Python actor framework for heterogeneous computing 11 | """""""""""""""""""""""""""""""""""""""""""""""""""""""""" 12 | 13 | What is actor model 14 | ------------------- 15 | Writing parallel and distributed programs is often challenging and requires a lot of time to deal 16 | with concurrency issues. Actor model provides a high-level, scalable and robust abstraction for 17 | building distributed applications. It provides several benefits: 18 | 19 | - Scalability: Actors easily scale across nodes. The asynchronous, non-blocking nature of actors 20 | allows them to handle huge volumes of concurrent tasks efficiently. 21 | 22 | - Concurrency: The actor model abstracts over concurrency, allowing developers to avoid raw threads 23 | and locks. 24 | 25 | - Modularity: An actor system decomposes naturally into a collection of actors that can be 26 | understood independently. Actor logic is encapsulated within the actor itself. 27 | 28 | 29 | Why Xoscar 30 | ---------- 31 | Xoscar implements the actor model in Python and provides user-friendly APIs that offer significant 32 | benefits for building applications on heterogeneous hardware: 33 | 34 | - **Abstraction over low-level communication details**: Xoscar handles all communication between 35 | actors transparently, whether on CPUs, GPUs, or across nodes. Developers focus on application 36 | logic rather than managing hardware resources and optimizing data transfer. 37 | 38 | - **Flexible actor models**: Xoscar supports both stateful and stateless actors. Stateful actors 39 | ensure thread safety for concurrent systems while stateless actors can handle massive volumes of 40 | concurrent messages. Developers choose the appropriate actor model for their needs. 41 | 42 | - **Batch method**: Xoscar provides a batch interface to significantly improve call efficiency 43 | when an actor interface is invoked a large number of times. 44 | 45 | - **Advanced debugging support**: Xoscar can detect potential issues like deadlocks, long-running 46 | calls, and performance bottlenecks that would otherwise be nearly impossible to troubleshoot in a 47 | heterogeneous environment. 48 | 49 | - **Automated recovery**: If an actor fails for any reason, Xoscar will automatically restart it if 50 | you want. It can monitor actors and restart them upon failure, enabling fault-tolerant systems. 51 | 52 | Overview 53 | -------- 54 | .. image:: _static/architecture.png 55 | :alt: architecture 56 | 57 | Xoscar allows you to create multiple actor pools on each worker node, typically binding an actor 58 | pool to a CPU core or a GPU card. Xoscar provides allocation policies so that whenever an actor is 59 | created, it will be instantiated in the appropriate pool based on the specified policy. 60 | 61 | When actors communicate, Xoscar will choose the optimal communication mechanism based on which 62 | pools the actors belong to. This allows Xoscar to optimize communication in heterogeneous 63 | environments with multiple processing units and accelerators. 64 | 65 | Where to get it 66 | --------------- 67 | The source code is currently hosted on GitHub at: https://github.com/xorbitsai/xoscar 68 | 69 | Binary installers for the latest released version are available at the 70 | `Python Package Index (PyPI) `_. 71 | 72 | :: 73 | 74 | # PyPI 75 | pip install xoscar 76 | 77 | License 78 | ------- 79 | `Apache 2 `_ 80 | 81 | .. toctree:: 82 | :maxdepth: 2 83 | :hidden: 84 | 85 | getting_started/index 86 | user_guide/index 87 | reference/index 88 | -------------------------------------------------------------------------------- /cpp/collective/gloo/src/allreduce.cc: -------------------------------------------------------------------------------- 1 | /* Copyright 2022-2023 XProbe Inc. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. */ 14 | 15 | #include 16 | #include 17 | #include 18 | 19 | namespace xoscar { 20 | 21 | template 22 | void allreduce(const std::shared_ptr &context, 23 | intptr_t sendbuf, 24 | intptr_t recvbuf, 25 | size_t size, 26 | ReduceOp reduceop, 27 | gloo::AllreduceOptions::Algorithm algorithm, 28 | uint32_t tag) { 29 | std::vector input_ptr{reinterpret_cast(sendbuf)}; 30 | std::vector output_ptr{reinterpret_cast(recvbuf)}; 31 | 32 | // Configure AllreduceOptions struct and call allreduce function 33 | gloo::AllreduceOptions opts_(context); 34 | opts_.setInputs(input_ptr, size); 35 | opts_.setOutputs(output_ptr, size); 36 | opts_.setAlgorithm(algorithm); 37 | gloo::ReduceOptions::Func fn = toFunction(reduceop); 38 | opts_.setReduceFunction(fn); 39 | opts_.setTag(tag); 40 | 41 | gloo::allreduce(opts_); 42 | } 43 | 44 | void allreduce_wrapper(const std::shared_ptr &context, 45 | intptr_t sendbuf, 46 | intptr_t recvbuf, 47 | size_t size, 48 | glooDataType_t datatype, 49 | ReduceOp reduceop, 50 | gloo::AllreduceOptions::Algorithm algorithm, 51 | uint32_t tag) { 52 | switch (datatype) { 53 | case glooDataType_t::glooInt8: 54 | allreduce( 55 | context, sendbuf, recvbuf, size, reduceop, algorithm, tag); 56 | break; 57 | case glooDataType_t::glooUint8: 58 | allreduce( 59 | context, sendbuf, recvbuf, size, reduceop, algorithm, tag); 60 | break; 61 | case glooDataType_t::glooInt32: 62 | allreduce( 63 | context, sendbuf, recvbuf, size, reduceop, algorithm, tag); 64 | break; 65 | case glooDataType_t::glooUint32: 66 | allreduce( 67 | context, sendbuf, recvbuf, size, reduceop, algorithm, tag); 68 | break; 69 | case glooDataType_t::glooInt64: 70 | allreduce( 71 | context, sendbuf, recvbuf, size, reduceop, algorithm, tag); 72 | break; 73 | case glooDataType_t::glooUint64: 74 | allreduce( 75 | context, sendbuf, recvbuf, size, reduceop, algorithm, tag); 76 | break; 77 | case glooDataType_t::glooFloat16: 78 | allreduce( 79 | context, sendbuf, recvbuf, size, reduceop, algorithm, tag); 80 | break; 81 | case glooDataType_t::glooFloat32: 82 | allreduce( 83 | context, sendbuf, recvbuf, size, reduceop, algorithm, tag); 84 | break; 85 | case glooDataType_t::glooFloat64: 86 | allreduce( 87 | context, sendbuf, recvbuf, size, reduceop, algorithm, tag); 88 | break; 89 | default: 90 | throw std::runtime_error("Unhandled dataType"); 91 | } 92 | } 93 | } // namespace xoscar 94 | -------------------------------------------------------------------------------- /cpp/collective/gloo/src/reduce.cc: -------------------------------------------------------------------------------- 1 | /* Copyright 2022-2023 XProbe Inc. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. */ 14 | 15 | #include 16 | #include 17 | 18 | namespace xoscar { 19 | 20 | template 21 | void reduce(const std::shared_ptr &context, 22 | intptr_t sendbuf, 23 | intptr_t recvbuf, 24 | size_t size, 25 | ReduceOp reduceop, 26 | int root, 27 | uint32_t tag) { 28 | T *input_ptr = reinterpret_cast(sendbuf); 29 | 30 | T *output_ptr; 31 | if (context->rank == root) 32 | output_ptr = reinterpret_cast(recvbuf); 33 | else 34 | output_ptr = new T[size]; 35 | 36 | // Configure reduceOptions struct 37 | gloo::ReduceOptions opts_(context); 38 | opts_.setInput(input_ptr, size); 39 | opts_.setOutput(output_ptr, size); 40 | gloo::ReduceOptions::Func fn = toFunction(reduceop); 41 | opts_.setReduceFunction(fn); 42 | opts_.setRoot(root); 43 | opts_.setTag(tag); 44 | 45 | gloo::reduce(opts_); 46 | 47 | if (context->rank != root) 48 | delete output_ptr; 49 | } 50 | 51 | void reduce_wrapper(const std::shared_ptr &context, 52 | intptr_t sendbuf, 53 | intptr_t recvbuf, 54 | size_t size, 55 | glooDataType_t datatype, 56 | ReduceOp reduceop, 57 | int root, 58 | uint32_t tag) { 59 | switch (datatype) { 60 | case glooDataType_t::glooInt8: 61 | reduce( 62 | context, sendbuf, recvbuf, size, reduceop, root, tag); 63 | break; 64 | case glooDataType_t::glooUint8: 65 | reduce( 66 | context, sendbuf, recvbuf, size, reduceop, root, tag); 67 | break; 68 | case glooDataType_t::glooInt32: 69 | reduce( 70 | context, sendbuf, recvbuf, size, reduceop, root, tag); 71 | break; 72 | case glooDataType_t::glooUint32: 73 | reduce( 74 | context, sendbuf, recvbuf, size, reduceop, root, tag); 75 | break; 76 | case glooDataType_t::glooInt64: 77 | reduce( 78 | context, sendbuf, recvbuf, size, reduceop, root, tag); 79 | break; 80 | case glooDataType_t::glooUint64: 81 | reduce( 82 | context, sendbuf, recvbuf, size, reduceop, root, tag); 83 | break; 84 | case glooDataType_t::glooFloat16: 85 | reduce( 86 | context, sendbuf, recvbuf, size, reduceop, root, tag); 87 | break; 88 | case glooDataType_t::glooFloat32: 89 | reduce( 90 | context, sendbuf, recvbuf, size, reduceop, root, tag); 91 | break; 92 | case glooDataType_t::glooFloat64: 93 | reduce( 94 | context, sendbuf, recvbuf, size, reduceop, root, tag); 95 | break; 96 | default: 97 | throw std::runtime_error("Unhandled dataType"); 98 | } 99 | } 100 | } // namespace xoscar 101 | -------------------------------------------------------------------------------- /cpp/collective/rendezvous/LICENSE: -------------------------------------------------------------------------------- 1 | Apache 2.0 License with Code from PyTorch Repository 2 | 3 | This software includes code primarily sourced from the PyTorch repository, which is governed by its own original license. 4 | The original license for the PyTorch repository, as well as the additional terms below, apply to the code in this software. 5 | 6 | ----------------------------------------------------------------------------- 7 | From PyTorch: 8 | 9 | Copyright (c) 2016- Facebook, Inc (Adam Paszke) 10 | Copyright (c) 2014- Facebook, Inc (Soumith Chintala) 11 | Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert) 12 | Copyright (c) 2012-2014 Deepmind Technologies (Koray Kavukcuoglu) 13 | Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu) 14 | Copyright (c) 2011-2013 NYU (Clement Farabet) 15 | Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou, Iain Melvin, Jason Weston) 16 | Copyright (c) 2006 Idiap Research Institute (Samy Bengio) 17 | Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert, Samy Bengio, Johnny Mariethoz) 18 | 19 | From Caffe2: 20 | 21 | Copyright (c) 2016-present, Facebook Inc. All rights reserved. 22 | 23 | All contributions by Facebook: 24 | Copyright (c) 2016 Facebook Inc. 25 | 26 | All contributions by Google: 27 | Copyright (c) 2015 Google Inc. 28 | All rights reserved. 29 | 30 | All contributions by Yangqing Jia: 31 | Copyright (c) 2015 Yangqing Jia 32 | All rights reserved. 33 | 34 | All contributions by Kakao Brain: 35 | Copyright 2019-2020 Kakao Brain 36 | 37 | All contributions by Cruise LLC: 38 | Copyright (c) 2022 Cruise LLC. 39 | All rights reserved. 40 | 41 | All contributions from Caffe: 42 | Copyright(c) 2013, 2014, 2015, the respective contributors 43 | All rights reserved. 44 | 45 | All other contributions: 46 | Copyright(c) 2015, 2016 the respective contributors 47 | All rights reserved. 48 | 49 | Caffe2 uses a copyright model similar to Caffe: each contributor holds 50 | copyright over their contributions to Caffe2. The project versioning records 51 | all such contribution and copyright details. If a contributor wants to further 52 | mark their specific copyright on a particular contribution, they should 53 | indicate their copyright solely in the commit message of the change when it is 54 | committed. 55 | 56 | All rights reserved. 57 | 58 | Redistribution and use in source and binary forms, with or without 59 | modification, are permitted provided that the following conditions are met: 60 | 61 | 1. Redistributions of source code must retain the above copyright 62 | notice, this list of conditions and the following disclaimer. 63 | 64 | 2. Redistributions in binary form must reproduce the above copyright 65 | notice, this list of conditions and the following disclaimer in the 66 | documentation and/or other materials provided with the distribution. 67 | 68 | 3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories America 69 | and IDIAP Research Institute nor the names of its contributors may be 70 | used to endorse or promote products derived from this software without 71 | specific prior written permission. 72 | 73 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 74 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 75 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 76 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 77 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 78 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 79 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 80 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 81 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 82 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 83 | POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /python/xoscar/metrics/backends/prometheus/tests/test_prometheus_metric.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2023 XProbe Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import time 16 | 17 | import pytest 18 | 19 | try: 20 | import requests # type: ignore 21 | except ImportError: 22 | requests = None # type: ignore 23 | 24 | try: 25 | from prometheus_client import start_http_server 26 | except ImportError: 27 | start_http_server = None 28 | 29 | from .....utils import get_next_port 30 | from ..prometheus_metric import Counter, Gauge, Histogram, Meter 31 | 32 | _PROMETHEUS_CLIENT_PORT = get_next_port() 33 | 34 | 35 | @pytest.fixture(scope="module") 36 | def start_prometheus_http_server(): 37 | if start_http_server: 38 | start_http_server(_PROMETHEUS_CLIENT_PORT) 39 | 40 | 41 | def verify_metric(name, value, delta=1e-6): 42 | if start_http_server is None or requests is None: 43 | return 44 | resp = requests.get("http://127.0.0.1:{}".format(_PROMETHEUS_CLIENT_PORT)).text 45 | assert name in resp 46 | lines = resp.splitlines() 47 | for line in lines: 48 | if line.startswith(name): 49 | items = line.split(" ") 50 | assert len(items) == 2 51 | assert pytest.approx(float(items[1]), abs=delta) == value 52 | 53 | 54 | def test_counter(start_prometheus_http_server): 55 | c = Counter("test_counter", "A test counter", ("service", "tenant")) 56 | assert c.name == "test_counter" 57 | assert c.description == "A test counter" 58 | assert set(["host", "pid"]).issubset(set(c.tag_keys)) 59 | assert set(["service", "tenant"]).issubset(set(c.tag_keys)) 60 | assert c.type == "Counter" 61 | c.record(1, {"service": "indigen", "tenant": "test"}) 62 | verify_metric("test_counter", 1.0) 63 | c.record(2, {"service": "indigen", "tenant": "test"}) 64 | verify_metric("test_counter", 3.0) 65 | 66 | 67 | def test_gauge(start_prometheus_http_server): 68 | g = Gauge("test_gauge", "A test gauge") 69 | assert g.name == "test_gauge" 70 | assert g.description == "A test gauge" 71 | assert set(["host", "pid"]).issubset(set(g.tag_keys)) 72 | assert g.type == "Gauge" 73 | g.record(0.1) 74 | verify_metric("test_gauge", 0.1) 75 | g.record(1.1) 76 | verify_metric("test_gauge", 1.1) 77 | 78 | 79 | def test_meter(start_prometheus_http_server): 80 | m = Meter("test_meter") 81 | assert m.name == "test_meter" 82 | assert m.description == "" 83 | assert set(["host", "pid"]).issubset(set(m.tag_keys)) 84 | assert m.type == "Meter" 85 | num = 3 86 | while num > 0: 87 | m.record(1) 88 | time.sleep(1) 89 | num -= 1 90 | verify_metric("test_meter", 1, 0.05) 91 | 92 | 93 | def test_histogram(start_prometheus_http_server): 94 | h = Histogram("test_histogram") 95 | assert h.name == "test_histogram" 96 | assert h.description == "" 97 | assert set(["host", "pid"]).issubset(set(h.tag_keys)) 98 | assert h.type == "Histogram" 99 | num = 3 100 | while num > 0: 101 | h.record(1) 102 | h.record(2) 103 | time.sleep(1) 104 | num -= 1 105 | verify_metric("test_histogram", 1.5, 0.15) 106 | num = 3 107 | while num > 0: 108 | h.record(3) 109 | time.sleep(1) 110 | num -= 1 111 | verify_metric("test_histogram", 3, 0.1) 112 | -------------------------------------------------------------------------------- /python/xoscar/backends/communication/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2023 XProbe Inc. 2 | # derived from copyright 1999-2021 Alibaba Group Holding Ltd. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from asyncio import StreamReader, StreamWriter 17 | from typing import Dict, List, Union 18 | 19 | import numpy as np 20 | 21 | from ...serialization.aio import BUFFER_SIZES_NAME 22 | from ...utils import lazy_import 23 | 24 | cupy = lazy_import("cupy") 25 | cudf = lazy_import("cudf") 26 | rmm = lazy_import("rmm") 27 | 28 | CUDA_CHUNK_SIZE = 16 * 1024**2 29 | 30 | 31 | def _convert_to_cupy_ndarray( 32 | cuda_buffer: Union["cupy.ndarray", "rmm.DeviceBuffer"] # type: ignore 33 | ) -> "cupy.ndarray": # type: ignore 34 | if isinstance(cuda_buffer, cupy.ndarray): 35 | return cuda_buffer 36 | 37 | size = cuda_buffer.nbytes 38 | data = cuda_buffer.__cuda_array_interface__["data"][0] 39 | memory = cupy.cuda.UnownedMemory(data, size, cuda_buffer) 40 | ptr = cupy.cuda.MemoryPointer(memory, 0) 41 | return cupy.ndarray(shape=size, dtype="u1", memptr=ptr) 42 | 43 | 44 | def write_buffers(writer: StreamWriter, buffers: List): 45 | def _write_cuda_buffer(cuda_buffer: Union["cupy.ndarray", "rmm.DeviceBuffer"]): # type: ignore 46 | # convert cuda buffer to cupy ndarray 47 | cuda_buffer = _convert_to_cupy_ndarray(cuda_buffer) 48 | 49 | chunk_size = CUDA_CHUNK_SIZE 50 | offset = 0 51 | nbytes = buffer.nbytes 52 | while offset < nbytes: 53 | size = chunk_size if (offset + chunk_size) < nbytes else nbytes - offset 54 | # slice on cupy ndarray 55 | chunk_buffer = cuda_buffer[offset : offset + size] 56 | # `get` will return numpy ndarray, 57 | # write its data which is a memoryview into writer 58 | writer.write(chunk_buffer.get().data) 59 | offset += size 60 | 61 | for buffer in buffers: 62 | if hasattr(buffer, "__cuda_array_interface__"): 63 | # GPU buffer 64 | _write_cuda_buffer(buffer) 65 | else: 66 | writer.write(buffer) 67 | 68 | 69 | async def read_buffers(header: Dict, reader: StreamReader): 70 | is_cuda_buffers = header[0].get("is_cuda_buffers") 71 | buffer_sizes = header[0].pop(BUFFER_SIZES_NAME) 72 | 73 | buffers = [] 74 | for is_cuda_buffer, buf_size in zip(is_cuda_buffers, buffer_sizes): 75 | if is_cuda_buffer: # pragma: no cover 76 | if buf_size == 0: 77 | # uniformly use rmm.DeviceBuffer for cuda's deserialization 78 | buffers.append(rmm.DeviceBuffer(size=buf_size)) 79 | else: 80 | buffer = rmm.DeviceBuffer(size=buf_size) 81 | arr = _convert_to_cupy_ndarray(buffer) 82 | offset = 0 83 | chunk_size = CUDA_CHUNK_SIZE 84 | while offset < buf_size: 85 | read_size = ( 86 | chunk_size 87 | if (offset + chunk_size) < buf_size 88 | else buf_size - offset 89 | ) 90 | content = await reader.readexactly(read_size) 91 | chunk_arr = np.frombuffer(content, dtype="u1") 92 | arr[offset : offset + len(content)].set(chunk_arr) 93 | offset += read_size 94 | buffers.append(buffer) 95 | else: 96 | buffers.append(await reader.readexactly(buf_size)) 97 | return buffers 98 | -------------------------------------------------------------------------------- /python/xoscar/collective/common.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2023 XProbe Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from enum import IntEnum 15 | from typing import Dict, Type 16 | 17 | import numpy as np 18 | 19 | from ..utils import lazy_import 20 | from . import xoscar_pygloo as xp 21 | 22 | ReduceOpMappingGloo: Dict["CollectiveReduceOp", "xp.ReduceOp"] = {} 23 | AllReduceAlgorithmMappingGloo: Dict["AllReduceAlgorithm", "xp.AllreduceAlgorithm"] = {} 24 | 25 | 26 | def _register_reduce_op(reduce_op): 27 | for op_type in reduce_op: 28 | ReduceOpMappingGloo[op_type] = xp.ReduceOp(op_type) 29 | return reduce_op 30 | 31 | 32 | def _register_allreduce_algo(algorithms): 33 | for algo in algorithms: 34 | AllReduceAlgorithmMappingGloo[algo] = xp.AllreduceAlgorithm(algo) 35 | return algorithms 36 | 37 | 38 | @_register_reduce_op 39 | class CollectiveReduceOp(IntEnum): 40 | SUM = 0 41 | PRODUCT = 1 42 | MIN = 2 43 | MAX = 3 44 | BAND = 4 45 | BOR = 5 46 | BXOR = 6 47 | UNUSED = 7 48 | 49 | 50 | @_register_allreduce_algo 51 | class AllReduceAlgorithm(IntEnum): 52 | UNSPECIFIED = 0 53 | RING = 1 54 | BCUBE = 2 55 | 56 | 57 | TypeMappingGloo: Dict[Type[np.dtype], "xp.GlooDataType_t"] = { 58 | np.int8: xp.GlooDataType_t.glooInt8, # type: ignore 59 | np.uint8: xp.GlooDataType_t.glooUint8, # type: ignore 60 | np.int32: xp.GlooDataType_t.glooInt32, # type: ignore 61 | np.uint32: xp.GlooDataType_t.glooUint32, # type: ignore 62 | np.int64: xp.GlooDataType_t.glooInt64, # type: ignore 63 | np.uint64: xp.GlooDataType_t.glooUint64, # type: ignore 64 | np.float16: xp.GlooDataType_t.glooFloat16, # type: ignore 65 | np.float32: xp.GlooDataType_t.glooFloat32, # type: ignore 66 | np.float64: xp.GlooDataType_t.glooFloat64, # type: ignore 67 | } 68 | cupy = lazy_import("cupy") 69 | if cupy is not None: 70 | from cupy.cuda import nccl 71 | 72 | TypeMappingNCCL: Dict[Type[np.dtype], int] = { 73 | np.int8: nccl.NCCL_INT8, # type: ignore 74 | np.uint8: nccl.NCCL_UINT8, # type: ignore 75 | np.int32: nccl.NCCL_INT32, # type: ignore 76 | np.uint32: nccl.NCCL_UINT32, # type: ignore 77 | np.int64: nccl.NCCL_INT64, # type: ignore 78 | np.uint64: nccl.NCCL_UINT64, # type: ignore 79 | np.float16: nccl.NCCL_FLOAT16, # type: ignore 80 | np.float32: nccl.NCCL_FLOAT32, # type: ignore 81 | np.float64: nccl.NCCL_FLOAT64, # type: ignore 82 | } 83 | 84 | ReduceOpMappingNCCL: Dict[CollectiveReduceOp, int] = { 85 | CollectiveReduceOp.SUM: nccl.NCCL_SUM, 86 | CollectiveReduceOp.PRODUCT: nccl.NCCL_PROD, 87 | CollectiveReduceOp.MAX: nccl.NCCL_MAX, 88 | CollectiveReduceOp.MIN: nccl.NCCL_MIN, 89 | } 90 | 91 | ReduceOpMappingNCCLStr: Dict[CollectiveReduceOp, str] = { 92 | CollectiveReduceOp.SUM: "sum", 93 | CollectiveReduceOp.PRODUCT: "prod", 94 | CollectiveReduceOp.MAX: "max", 95 | CollectiveReduceOp.MIN: "min", 96 | } 97 | # Some static variables 98 | INVOKE_ERROR_MESSAGE = "Collective-related functions must be called in a process that is involved in collection communication." 99 | RANK_ADDRESS_ENV_KEY = "COLLECTIVE_RANK_ADDRESS" 100 | RENDEZVOUS_MASTER_IP_ENV_KEY = "COLLECTIVE_MASTER_IP" 101 | RENDEZVOUS_MASTER_PORT_ENV_KEY = "COLLECTIVE_MASTER_PORT" 102 | COLLECTIVE_DEVICE_ID_ENV_KEY = "COLLECTIVE_DEVICE_ID_FOR_AN_ACTOR" 103 | -------------------------------------------------------------------------------- /python/setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | name = xoscar 3 | description = Python actor framework for heterogeneous computing. 4 | author = Qin Xuye 5 | author_email = qinxuye@xprobe.io 6 | maintainer = Qin Xuye 7 | maintainer_email = qinxuye@xprobe.io 8 | license = Apache License 2.0 9 | url = http://github.com/xorbitsai/xoscar 10 | python_requires = >=3.9 11 | classifier = 12 | Operating System :: OS Independent 13 | Programming Language :: Python 14 | Programming Language :: Python :: 3 15 | Programming Language :: Python :: 3.9 16 | Programming Language :: Python :: 3.10 17 | Programming Language :: Python :: 3.11 18 | Programming Language :: Python :: 3.12 19 | Programming Language :: Python :: 3.13 20 | Programming Language :: Python :: Implementation :: CPython 21 | Topic :: Software Development :: Libraries 22 | 23 | [options] 24 | zip_safe = False 25 | include_package_data = True 26 | packages = find: 27 | install_requires = 28 | numpy>=1.14.0 29 | pandas>=1.0.0 30 | scipy>=1.0.0; sys_platform!="win32" or python_version>="3.10" 31 | scipy>=1.0.0,<=1.9.1; sys_platform=="win32" and python_version<"3.10" 32 | cloudpickle>=1.5.0 33 | psutil>=5.9.0 34 | tblib>=1.7.0 35 | uvloop>=0.14.0; sys_platform!="win32" 36 | packaging 37 | click 38 | 39 | [options.packages.find] 40 | exclude = 41 | *.conftest* 42 | *.tests.* 43 | *.tests 44 | 45 | [options.extras_require] 46 | dev = 47 | cython>=0.29 48 | pytest>=3.5.0 49 | pytest-cov>=2.5.0 50 | pytest-timeout>=1.2.0 51 | pytest-forked>=1.0 52 | pytest-asyncio>=0.14.0 53 | ipython>=6.5.0 54 | sphinx 55 | pydata-sphinx-theme>=0.3.0 56 | sphinx-intl>=0.9.9 57 | flake8>=3.8.0 58 | black 59 | uv 60 | click 61 | doc = 62 | ipython>=6.5.0 63 | sphinx 64 | pydata-sphinx-theme>=0.3.0 65 | sphinx-intl>=0.9.9 66 | extra = 67 | pyarrow>=5.0.0 68 | kubernetes = 69 | kubernetes>=10.0.0 70 | ray = 71 | xoscar_ray>=0.0.1 72 | 73 | [coverage:run] 74 | branch = True 75 | relative_files = True 76 | cover_pylib = False 77 | plugins = Cython.Coverage 78 | include = 79 | xoscar/* 80 | omit = 81 | xoscar/_version.py 82 | xoscar/aio/lru.py 83 | xoscar/entrypoints.py 84 | xoscar/nvutils.py 85 | *.pxd 86 | */tests/* 87 | disable_warnings = 88 | include-ignored 89 | 90 | [coverage:report] 91 | exclude_lines = 92 | pragma: no cover 93 | def __repr__ 94 | raise AssertionError 95 | raise NotImplementedError 96 | return NotImplemented 97 | 98 | [versioneer] 99 | VCS = git 100 | style = pep440 101 | versionfile_source = xoscar/_version.py 102 | versionfile_build = xoscar/_version.py 103 | tag_prefix = v 104 | parentdir_prefix = xoscar- 105 | 106 | [flake8] 107 | max-line-length = 100 108 | select = 109 | E9, 110 | E101, 111 | E111, 112 | E117, 113 | E127, 114 | E201, 115 | E202, 116 | E223, 117 | E224, 118 | E225, 119 | E231, 120 | E242, 121 | E251, 122 | E273, 123 | E274, 124 | E275, 125 | E301, 126 | E302, 127 | E303, 128 | E304, 129 | E305, 130 | E401, 131 | E703, 132 | E901, 133 | E999, 134 | F7, 135 | F63, 136 | F82, 137 | F401, 138 | F811, 139 | F821, 140 | F822, 141 | F823, 142 | F841, 143 | W191, 144 | W291, 145 | W292, 146 | W293, 147 | W391, 148 | W601, 149 | W602, 150 | W603, 151 | W604, 152 | W605 153 | exclude = 154 | __init__.py 155 | __pycache__ 156 | .git/ 157 | .github/ 158 | build/ 159 | ci/ 160 | dist/ 161 | docs/ 162 | shared_memory.py 163 | 164 | [codespell] 165 | ignore-words-list = hist,rcall,fpr,ser,nd,inout,ot,Ba,ba,asend,hart,coo,splitted,datas,fro 166 | skip = .idea,.git,./build,./docs/build,node_modules,static,generated,*.po,*.ts,*.json,*.c,*.cpp,*.cfg 167 | 168 | [isort] 169 | profile = black 170 | skip=shared_memory.py 171 | 172 | [mypy] 173 | ignore_missing_imports=True 174 | follow_imports=skip 175 | exclude = shared_memory.py 176 | -------------------------------------------------------------------------------- /cpp/collective/rendezvous/include/call_once.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2022-2023 XProbe Inc. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. */ 14 | 15 | #pragma once 16 | 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | 24 | #if defined(__GNUC__) || defined(__ICL) || defined(__clang__) 25 | # ifndef XOSCAR_LIKELY 26 | # define XOSCAR_LIKELY(expr) \ 27 | (__builtin_expect(static_cast(expr), 1)) 28 | # define XOSCAR_UNLIKELY(expr) \ 29 | (__builtin_expect(static_cast(expr), 0)) 30 | # endif 31 | #else 32 | # ifndef XOSCAR_LIKELY 33 | # define XOSCAR_LIKELY(expr) (expr) 34 | # define XOSCAR_UNLIKELY(expr) (expr) 35 | # endif 36 | #endif 37 | 38 | namespace xoscar { 39 | 40 | template 41 | #if defined(__cpp_lib_is_invocable) && __cpp_lib_is_invocable >= 201703L 42 | using invoke_result = typename std::invoke_result; 43 | #else 44 | using invoke_result = typename std::result_of; 45 | #endif 46 | 47 | template 48 | using invoke_result_t = typename invoke_result::type; 49 | 50 | template 51 | typename std::enable_if< 52 | std::is_member_pointer::type>::value, 53 | typename xoscar::invoke_result_t>::type 54 | invoke(Functor &&f, Args &&...args) { 55 | return std::mem_fn(std::forward(f))(std::forward(args)...); 56 | } 57 | 58 | template 59 | typename std::enable_if< 60 | !std::is_member_pointer::type>::value, 61 | typename xoscar::invoke_result_t>::type 62 | invoke(Functor &&f, Args &&...args) { 63 | return std::forward(f)(std::forward(args)...); 64 | } 65 | 66 | // custom xoscar call_once implementation to avoid the deadlock in 67 | // std::call_once. The implementation here is a simplified version from folly 68 | // and likely much much higher memory footprint. 69 | template 70 | inline void call_once(Flag &flag, F &&f, Args &&...args) { 71 | if (XOSCAR_LIKELY(flag.test_once())) { 72 | return; 73 | } 74 | flag.call_once_slow(std::forward(f), std::forward(args)...); 75 | } 76 | 77 | class once_flag { 78 | public: 79 | #ifndef _WIN32 80 | constexpr 81 | #endif 82 | 83 | once_flag() noexcept = default; 84 | once_flag(const once_flag &) = delete; 85 | once_flag &operator=(const once_flag &) = delete; 86 | 87 | private: 88 | template 89 | friend void call_once(Flag &flag, F &&f, Args &&...args); 90 | 91 | template 92 | void call_once_slow(F &&f, Args &&...args) { 93 | std::lock_guard guard(mutex_); 94 | if (init_.load(std::memory_order_relaxed)) { 95 | return; 96 | } 97 | invoke(f, std::forward(args)...); 98 | init_.store(true, std::memory_order_release); 99 | } 100 | 101 | bool test_once() { return init_.load(std::memory_order_acquire); } 102 | 103 | void reset_once() { init_.store(false, std::memory_order_release); } 104 | 105 | private: 106 | std::mutex mutex_; 107 | std::atomic init_{false}; 108 | }; 109 | 110 | } // namespace xoscar 111 | -------------------------------------------------------------------------------- /python/xoscar/serialization/cuda.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2023 XProbe Inc. 2 | # derived from copyright 1999-2021 Alibaba Group Holding Ltd. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from typing import Any, Dict, List, Tuple 17 | 18 | import pandas as pd 19 | 20 | from ..utils import lazy_import 21 | from .core import Serializer, buffered 22 | 23 | cupy = lazy_import("cupy") 24 | cudf = lazy_import("cudf") 25 | 26 | 27 | class CupySerializer(Serializer): 28 | @buffered 29 | def serial(self, obj: Any, context: Dict): 30 | if not (obj.flags["C_CONTIGUOUS"] or obj.flags["F_CONTIGUOUS"]): 31 | obj = cupy.array(obj, copy=True) 32 | 33 | header = obj.__cuda_array_interface__.copy() 34 | header["strides"] = tuple(obj.strides) 35 | header["lengths"] = [obj.nbytes] 36 | buffer = cupy.ndarray( 37 | shape=(obj.nbytes,), dtype=cupy.dtype("u1"), memptr=obj.data, strides=(1,) 38 | ) 39 | return (header,), [buffer], True 40 | 41 | def deserial(self, serialized: Tuple, context: Dict, subs: List): 42 | (header,) = serialized 43 | return cupy.ndarray( 44 | shape=header["shape"], 45 | dtype=header["typestr"], 46 | memptr=cupy.asarray(subs[0]).data, 47 | strides=header["strides"], 48 | ) 49 | 50 | 51 | class CudfSerializer(Serializer): 52 | @staticmethod 53 | def _get_ext_index_type(index_obj): 54 | import cudf 55 | 56 | multi_index_type = None 57 | if isinstance(index_obj, pd.MultiIndex): 58 | multi_index_type = "pandas" 59 | elif isinstance(index_obj, cudf.MultiIndex): 60 | multi_index_type = "cudf" 61 | 62 | if multi_index_type is None: 63 | return None 64 | return { 65 | "index_type": multi_index_type, 66 | "names": list(index_obj.names), 67 | } 68 | 69 | @staticmethod 70 | def _apply_index_type(obj, attr, header): 71 | import cudf 72 | 73 | multi_index_cls = ( 74 | pd.MultiIndex if header["index_type"] == "pandas" else cudf.MultiIndex 75 | ) 76 | original_index = getattr(obj, attr) 77 | if isinstance(original_index, (pd.MultiIndex, cudf.MultiIndex)): 78 | return 79 | new_index = multi_index_cls.from_tuples(original_index, names=header["names"]) 80 | setattr(obj, attr, new_index) 81 | 82 | def serial(self, obj: Any, context: Dict): 83 | header, buffers = obj.device_serialize() 84 | if hasattr(obj, "columns"): 85 | header["_ext_columns"] = self._get_ext_index_type(obj.columns) 86 | if hasattr(obj, "index"): 87 | header["_ext_index"] = self._get_ext_index_type(obj.index) 88 | return (header,), buffers, True 89 | 90 | def deserial(self, serialized: Tuple, context: Dict, buffers: List): 91 | from cudf.core.abc import Serializable 92 | 93 | (header,) = serialized 94 | col_header = header.pop("_ext_columns", None) 95 | index_header = header.pop("_ext_index", None) 96 | 97 | result = Serializable.device_deserialize(header, buffers) 98 | 99 | if col_header is not None: 100 | self._apply_index_type(result, "columns", col_header) 101 | if index_header is not None: 102 | self._apply_index_type(result, "index", index_header) 103 | return result 104 | 105 | 106 | if cupy is not None: 107 | CupySerializer.register("cupy.ndarray") 108 | if cudf is not None: 109 | CudfSerializer.register("cudf.DataFrame") 110 | CudfSerializer.register("cudf.Series") 111 | CudfSerializer.register("cudf.Index") 112 | -------------------------------------------------------------------------------- /cpp/collective/gloo/include/transport.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2022-2023 XProbe Inc. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. */ 14 | 15 | #pragma once 16 | 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #if GLOO_HAVE_TRANSPORT_TCP 25 | 26 | # include 27 | # include 28 | # include 29 | # include 30 | # include 31 | # include 32 | # include 33 | # include 34 | 35 | #endif 36 | 37 | #if GLOO_HAVE_TRANSPORT_UV 38 | 39 | # include 40 | # include 41 | # include 42 | # include 43 | # include 44 | 45 | #endif 46 | 47 | #if !GLOO_HAVE_TRANSPORT_UV 48 | # if !GLOO_HAVE_TRANSPORT_UV 49 | # include 50 | # include 51 | # include 52 | # include 53 | # include 54 | # endif 55 | #endif 56 | 57 | namespace xoscar { 58 | namespace transport { 59 | class PyDevice : public gloo::transport::Device { 60 | public: 61 | using gloo::transport::Device::Device; 62 | 63 | std::string str() const override { 64 | PYBIND11_OVERRIDE_PURE( 65 | std::string, // Return type 66 | gloo::transport::Device, // Parent class 67 | str, /* Name of function in C++ (must match Python name) */ 68 | /* Argument(s) */); 69 | } 70 | 71 | const std::string &getPCIBusID() const override { 72 | PYBIND11_OVERRIDE_PURE( 73 | const std::string &, /* Return type */ 74 | gloo::transport::Device, /* Parent class */ 75 | getPCIBusID, /* Name of function in C++ (must match Python name) */ 76 | /* Argument(s) */); 77 | } 78 | 79 | int getInterfaceSpeed() const override { 80 | PYBIND11_OVERRIDE(int, /* Return type */ 81 | gloo::transport::Device, // Parent class 82 | getInterfaceSpeed, // Name of function in C++ (must 83 | // match Python name) 84 | /* Argument(s) */); 85 | } 86 | 87 | bool hasGPUDirect() const override { 88 | PYBIND11_OVERRIDE( 89 | bool, /* Return type */ 90 | gloo::transport::Device, /* Parent class */ 91 | hasGPUDirect, /* Name of function in C++ (must match Python name) */ 92 | /* Argument(s) */); 93 | } 94 | 95 | std::shared_ptr createContext(int rank, 96 | int size) override { 97 | PYBIND11_OVERRIDE_PURE( 98 | std::shared_ptr, /* Return type */ 99 | gloo::transport::Device, /* Parent class */ 100 | createContext, // Name of function in C++ (must match Python name) 101 | rank, 102 | size /* Argument(s) */); 103 | } 104 | }; 105 | 106 | void def_transport_module(pybind11::module &m); 107 | void def_transport_tcp_module(pybind11::module &m); 108 | void def_transport_uv_module(pybind11::module &m); 109 | } // namespace transport 110 | } // namespace xoscar 111 | -------------------------------------------------------------------------------- /python/xoscar/backends/test/tests/test_actor_context.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2023 XProbe Inc. 2 | # derived from copyright 1999-2021 Alibaba Group Holding Ltd. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | import asyncio 16 | import gc 17 | import threading 18 | 19 | import pytest 20 | 21 | import xoscar as mo 22 | 23 | from ...communication.dummy import DummyServer 24 | from ...router import Router 25 | 26 | 27 | class DummyActor(mo.Actor): 28 | def __init__(self, value): 29 | super().__init__() 30 | 31 | if value < 0: 32 | raise ValueError("value < 0") 33 | self.value = value 34 | 35 | async def add(self, value): 36 | if not isinstance(value, int): 37 | raise TypeError("add number must be int") 38 | self.value += value 39 | return self.value 40 | 41 | 42 | @pytest.fixture 43 | async def actor_pool_context(): 44 | pool = await mo.create_actor_pool("test://127.0.0.1", n_process=2) 45 | async with pool: 46 | yield pool 47 | 48 | 49 | @pytest.mark.asyncio 50 | async def test_simple(actor_pool_context): 51 | pool = actor_pool_context 52 | actor_ref = await mo.create_actor( 53 | DummyActor, 54 | 100, 55 | address=pool.external_address, 56 | allocate_strategy=mo.allocate_strategy.RandomSubPool(), 57 | ) 58 | assert await actor_ref.add(1) == 101 59 | 60 | 61 | def _cancel_all_tasks(loop): 62 | to_cancel = asyncio.all_tasks(loop) 63 | if not to_cancel: 64 | return 65 | 66 | for task in to_cancel: 67 | task.cancel() 68 | 69 | loop.run_until_complete(asyncio.gather(*to_cancel, return_exceptions=True)) 70 | 71 | for task in to_cancel: 72 | if task.cancelled(): 73 | continue 74 | if task.exception() is not None: 75 | loop.call_exception_handler( 76 | { 77 | "message": "unhandled exception during asyncio.run() shutdown", 78 | "exception": task.exception(), 79 | "task": task, 80 | } 81 | ) 82 | 83 | 84 | def _run_forever(loop): 85 | loop.run_forever() 86 | _cancel_all_tasks(loop) 87 | 88 | 89 | @pytest.mark.asyncio 90 | async def test_channel_cleanup(actor_pool_context): 91 | pool = actor_pool_context 92 | actor_ref = await mo.create_actor( 93 | DummyActor, 94 | 0, 95 | address=pool.external_address, 96 | allocate_strategy=mo.allocate_strategy.RandomSubPool(), 97 | ) 98 | 99 | curr_router = Router.get_instance() 100 | server_address = curr_router.get_internal_address(actor_ref.address) 101 | dummy_server = DummyServer.get_instance(server_address) 102 | 103 | async def inc(): 104 | await asyncio.gather(*(actor_ref.add.tell(1) for _ in range(10))) 105 | 106 | loops = [] 107 | threads = [] 108 | futures = [] 109 | for _ in range(10): 110 | loop = asyncio.new_event_loop() 111 | t = threading.Thread(target=_run_forever, args=(loop,)) 112 | t.start() 113 | loops.append(loop) 114 | threads.append(t) 115 | fut = asyncio.run_coroutine_threadsafe(inc(), loop=loop) 116 | futures.append(fut) 117 | 118 | for fut in futures: 119 | fut.result() 120 | 121 | while True: 122 | if await actor_ref.add(0) == 100: 123 | break 124 | 125 | assert len(dummy_server._channels) == 12 126 | assert len(dummy_server._tasks) == 12 127 | 128 | for loop in loops: 129 | loop.call_soon_threadsafe(loop.stop) 130 | 131 | for t in threads: 132 | t.join() 133 | threads.clear() 134 | 135 | curr_router = Router.get_instance() 136 | server_address = curr_router.get_internal_address(actor_ref.address) 137 | dummy_server = DummyServer.get_instance(server_address) 138 | 139 | while True: 140 | gc.collect() 141 | # Two channels left: 142 | # 1. from the main pool to the actor 143 | # 2. from current main thread to the actor. 144 | if len(dummy_server._channels) == 2 and len(dummy_server._tasks) == 2: 145 | break 146 | --------------------------------------------------------------------------------