├── .github └── workflows │ ├── build_wheel.yml │ └── build_wheel │ └── Dockerfile ├── .gitignore ├── LICENSE ├── MANIFEST.in ├── README.md ├── docs ├── .nojekyll ├── LICENSE.md ├── README.md ├── _sidebar.md ├── index.html ├── modelconnector │ ├── framworks.md │ ├── introduction.md │ ├── ld_preload.md │ └── python_apis.md └── torchconnector │ ├── configuration.md │ ├── examples.md │ ├── installation.md │ └── introduction.md ├── oss-model-connector ├── ossmodelconnector │ ├── __init__.py │ ├── _oss_connector │ │ ├── __init__.py │ │ └── oss_model_connector.pyi │ └── oss_model_connector.py ├── pyproject.toml └── setup.py └── oss-torch-connector ├── osstorchconnector ├── __init__.py ├── _oss_bucket_iterable.py ├── _oss_client.py ├── _oss_connector │ ├── __init__.py │ └── oss_connector.pyi ├── _oss_tar_iterable.py ├── oss_checkpoint.py ├── oss_iterable_dataset.py └── oss_map_dataset.py ├── pyproject.toml ├── setup.py └── tools └── generate_tar_archive.py /.github/workflows/build_wheel.yml: -------------------------------------------------------------------------------- 1 | name: Build Wheel 2 | 3 | on: 4 | push: 5 | tags: 6 | - "osstorchconnector/v*" 7 | - "ossmodelconnector/v*" 8 | 9 | jobs: 10 | build: 11 | name: "Build Release" 12 | runs-on: ubuntu-latest 13 | strategy: 14 | fail-fast: false 15 | matrix: 16 | python: ["3.8", "3.9", "3.10", "3.11", "3.12"] 17 | steps: 18 | - name: Checkout 19 | uses: actions/checkout@v3 20 | - name: Login ghcr.io 21 | uses: docker/login-action@v2 22 | with: 23 | registry: ghcr.io 24 | username: ${{ github.actor }} 25 | password: ${{ secrets.GITHUB_TOKEN }} 26 | - name: Setup buildx instance 27 | uses: docker/setup-buildx-action@v2 28 | with: 29 | use: true 30 | - name: Build 31 | shell: bash 32 | run: | 33 | REFS=${{ github.ref }} 34 | REFS=${REFS#refs/tags/} 35 | PACKAGE_NAME=${REFS%%/*} 36 | echo "PACKAGE_NAME=${PACKAGE_NAME}" 37 | RELEASE_VERSION="${REFS#*/v}" 38 | echo "RELEASE_VERSION=${RELEASE_VERSION}" 39 | PYTHON_VERSION=${{ matrix.python }} 40 | PYTHON_VERSION=${PYTHON_VERSION//./} 41 | echo "PYTHON_VERSION=${PYTHON_VERSION}" 42 | BUILD_IMAGE="quay.io/pypa/manylinux2014_x86_64:2024-03-10-4935fcc" 43 | echo "BUILD_IMAGE=${BUILD_IMAGE}" 44 | RELEASE_IMAGE="ghcr.io/${GITHUB_REPOSITORY,,}/connector_builder:${PACKAGE_NAME}-${RELEASE_VERSION}" 45 | echo "RELEASE_IMAGE=${RELEASE_IMAGE}" 46 | if [[ "${PACKAGE_NAME}" == "osstorchconnector" ]]; then 47 | PACKAGE_DIR="oss-torch-connector" 48 | elif [[ "${PACKAGE_NAME}" == "ossmodelconnector" ]]; then 49 | PACKAGE_DIR="oss-model-connector" 50 | fi 51 | sed -i -e "s/version.*/version = \"${RELEASE_VERSION}\"/g" ${PACKAGE_DIR}/pyproject.toml 52 | docker buildx build --build-arg BUILD_IMAGE=${BUILD_IMAGE} --build-arg RELEASE_IMAGE=${RELEASE_IMAGE} --build-arg PYTHON_VERSION=${PYTHON_VERSION} --build-arg PACKAGE_DIR=${PACKAGE_DIR} -f .github/workflows/build_wheel/Dockerfile -o dist/ . 53 | ls -l dist/ 54 | - name: Upload 55 | uses: actions/upload-artifact@v4 56 | with: 57 | name: dist-${{ strategy.job-index }} 58 | path: dist/oss* 59 | 60 | release: 61 | name: "Tagged Release" 62 | runs-on: ubuntu-latest 63 | needs: [build] 64 | steps: 65 | - name: Download builds and release notes 66 | uses: actions/download-artifact@v4 67 | with: 68 | pattern: dist-* 69 | merge-multiple: true 70 | path: dist 71 | - name: Display downloaded files 72 | shell: bash 73 | run: | 74 | ls -l dist 75 | REFS=${{ github.ref }} 76 | REFS=${REFS#refs/tags/} 77 | echo "RELEASE_TAG=${REFS}" >> $GITHUB_ENV 78 | - name: Create Release 79 | uses: "marvinpinto/action-automatic-releases@latest" 80 | with: 81 | repo_token: "${{ secrets.GITHUB_TOKEN }}" 82 | automatic_release_tag: "${{ env.RELEASE_TAG }}" 83 | prerelease: false 84 | files: dist/oss* 85 | -------------------------------------------------------------------------------- /.github/workflows/build_wheel/Dockerfile: -------------------------------------------------------------------------------- 1 | ARG RELEASE_IMAGE 2 | ARG BUILD_IMAGE 3 | FROM ${RELEASE_IMAGE} AS release 4 | 5 | FROM ${BUILD_IMAGE} AS builder 6 | WORKDIR /libconnector 7 | COPY --from=release /libconnector . 8 | ARG PACKAGE_DIR 9 | COPY ${PACKAGE_DIR} . 10 | COPY README.md LICENSE MANIFEST.in . 11 | ARG PYTHON_VERSION 12 | ENV PY_VER=${PYTHON_VERSION} 13 | ENV PY_ABI_TAG=cp${PY_VER}-cp${PY_VER} 14 | RUN export PATH="/opt/python/${PY_ABI_TAG}/bin:$PATH" && \ 15 | python3 -V && \ 16 | libconnector=$(find . -type f -name "oss_*.cpython-${PY_VER}-x86_64-linux-gnu.so") && \ 17 | chmod +x ${libconnector} && \ 18 | echo -e "[build_ext]\nlibrary_path=${libconnector}" > setup.cfg && \ 19 | cat setup.cfg && \ 20 | python3 -u setup.py bdist_wheel && \ 21 | auditwheel repair dist/oss*.whl -w repaired_wheel && \ 22 | find . -type f -name "oss-connector-lib-*" -exec cp {} repaired_wheel/ \; 23 | 24 | FROM scratch 25 | COPY --from=builder /libconnector/repaired_wheel/oss* / 26 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.egg-info/ 2 | build/ 3 | dist/ 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2024 aliyun.com 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | 8 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the 9 | Software. 10 | 11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE 12 | WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 13 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 14 | OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE README.md 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # OSS Connector for AI/ML 2 | 3 | [ossconnector.github.io](https://ossconnector.github.io/) 4 | 5 | ## Overview 6 | 7 | OSS Connector for AI/ML contains some high-performance Python libraries specifically designed for AI and ML scenariosis, tailored to work with [Alibaba Cloud OSS (Object Storage Service)](https://www.alibabacloud.com/en/product/object-storage-service). 8 | 9 | Currently, the OSS connector is composed of two libraries: OSS Model Connector and OSS Torch Connector. 10 | 11 | - [OSS Torch Connector](https://aliyun.github.io/oss-connector-for-ai-ml/#/torchconnector/introduction) is dedicated to AI training scenarios, including loading [datasets](https://pytorch.org/docs/stable/data.html#dataset-types) from OSS and loading/saving checkpoints from/to OSS. 12 | 13 | - [OSS Model Connector](https://aliyun.github.io/oss-connector-for-ai-ml/#/modelconnector/introduction) focuses on AI inference scenarios, loading large model files from OSS into local AI inference frameworks. 14 | 15 | The core component of the OSS Connector for AI/ML is implemented in C++ using [PhotonLibOS](https://github.com/alibaba/PhotonLibOS) and is provided as dynamic link libraries within wheel packages. This repository only contains the code of Python. 16 | 17 | For details, please refer to [ossconnector.github.io](https://ossconnector.github.io/) or [aliyun.github.io/oss-connector-for-ai-ml](https://aliyun.github.io/oss-connector-for-ai-ml). 18 | 19 | 20 | ## Related 21 | 22 | [OSS Connector for AI/ML 中文文档](https://help.aliyun.com/zh/oss/developer-reference/oss-connector-for-ai-ml) 23 | 24 | ## License 25 | 26 | This project is licensed under the terms of the [MIT License](LICENSE). -------------------------------------------------------------------------------- /docs/.nojekyll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aliyun/oss-connector-for-ai-ml/0945da1942b9afee5efef2d733db370472af5afa/docs/.nojekyll -------------------------------------------------------------------------------- /docs/LICENSE.md: -------------------------------------------------------------------------------- 1 | Copyright (c) 2024 aliyun.com 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 5 | rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 6 | permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | 8 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the 9 | Software. 10 | 11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE 12 | WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 13 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 14 | OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # OSS Connector for AI/ML 2 | 3 | OSS Connector for AI/ML contains some high-performance Python libraries specifically designed for AI and ML scenariosis, tailored to work with [Alibaba Cloud OSS (Object Storage Service)](https://www.alibabacloud.com/en/product/object-storage-service). 4 | 5 | Currently, the OSS connector is composed of two libraries: OSS Model Connector and OSS Torch Connector. 6 | 7 | - [OSS Torch Connector](https://aliyun.github.io/oss-connector-for-ai-ml/#/torchconnector/introduction) is dedicated to AI training scenarios, including loading [datasets](https://pytorch.org/docs/stable/data.html#dataset-types) from OSS and loading/saving checkpoints from/to OSS. 8 | 9 | - [OSS Model Connector](https://aliyun.github.io/oss-connector-for-ai-ml/#/modelconnector/introduction) focuses on AI inference scenarios, loading large model files from OSS into local AI inference frameworks. 10 | 11 | The core component of the OSS Connector for AI/ML is implemented in C++ using [PhotonLibOS](https://github.com/alibaba/PhotonLibOS) and is provided as dynamic link libraries within wheel packages. This repository only contains the code of Python. 12 | 13 | 14 | ## License 15 | 16 | This project is licensed under the terms of the [MIT License](LICENSE.md). -------------------------------------------------------------------------------- /docs/_sidebar.md: -------------------------------------------------------------------------------- 1 | - [Home](/) 2 | 3 | - OSS Model Connector 4 | 5 | - [Introduction](/modelconnector/introduction.md) 6 | - [Python APIs](/modelconnector/python_apis.md) 7 | - [Inference Framworks](/modelconnector/framworks.md) 8 | - [LD_PRELOAD](/modelconnector/ld_preload.md) 9 | 10 | - OSS Torch Connector 11 | 12 | - [Introduction](/torchconnector/introduction.md) 13 | - [Installation](/torchconnector/installation.md) 14 | - [Configuration](/torchconnector/configuration.md) 15 | - [Examples](/torchconnector/examples.md) 16 | -------------------------------------------------------------------------------- /docs/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | OSS Connector for AI/ML 10 | 14 | 15 | 16 |
17 | 27 | 28 | 29 | 30 | 31 | -------------------------------------------------------------------------------- /docs/modelconnector/framworks.md: -------------------------------------------------------------------------------- 1 | # Work with AI frameworks 2 | 3 | ## Overview 4 | 5 | Mainstream AI inference frameworks, such as vllm and transformers, load models from a local directory. The number of files in the model directory is not large, comprising several small files and multiple larger model files. For example, the directory below shows the model directory for Qwen2.5-72B, including 37 large safetensors files and several small files. 6 | 7 | ```bash 8 | # ll -lh /root/Qwen2.5-72B 9 | total 136G 10 | -rw-r--r-- 1 root root 664 Sep 25 12:23 config.json 11 | -rw-r--r-- 1 root root 2 Sep 25 12:23 configuration.json 12 | -rw-r--r-- 1 root root 138 Sep 25 12:23 generation_config.json 13 | -rw-r--r-- 1 root root 6.8K Sep 25 12:23 LICENSE 14 | -rw-r--r-- 1 root root 1.6M Sep 25 12:23 merges.txt 15 | -rw-r--r-- 1 root root 3.6G Sep 25 12:28 model-00001-of-00037.safetensors 16 | -rw-r--r-- 1 root root 3.8G Sep 25 12:33 model-00002-of-00037.safetensors 17 | -rw-r--r-- 1 root root 3.6G Sep 25 12:39 model-00003-of-00037.safetensors 18 | -rw-r--r-- 1 root root 3.8G Sep 25 12:44 model-00004-of-00037.safetensors 19 | -rw-r--r-- 1 root root 3.8G Sep 25 12:50 model-00005-of-00037.safetensors 20 | -rw-r--r-- 1 root root 3.8G Sep 25 12:55 model-00006-of-00037.safetensors 21 | -rw-r--r-- 1 root root 3.6G Sep 25 13:00 model-00007-of-00037.safetensors 22 | -rw-r--r-- 1 root root 3.8G Sep 25 13:06 model-00008-of-00037.safetensors 23 | -rw-r--r-- 1 root root 3.8G Sep 25 13:11 model-00009-of-00037.safetensors 24 | -rw-r--r-- 1 root root 3.8G Sep 25 13:17 model-00010-of-00037.safetensors 25 | -rw-r--r-- 1 root root 3.6G Sep 25 13:22 model-00011-of-00037.safetensors 26 | -rw-r--r-- 1 root root 3.8G Sep 25 13:28 model-00012-of-00037.safetensors 27 | -rw-r--r-- 1 root root 3.8G Sep 25 13:33 model-00013-of-00037.safetensors 28 | -rw-r--r-- 1 root root 3.8G Sep 25 13:39 model-00014-of-00037.safetensors 29 | -rw-r--r-- 1 root root 3.6G Sep 25 13:44 model-00015-of-00037.safetensors 30 | -rw-r--r-- 1 root root 3.8G Sep 25 13:49 model-00016-of-00037.safetensors 31 | -rw-r--r-- 1 root root 3.8G Sep 25 13:55 model-00017-of-00037.safetensors 32 | -rw-r--r-- 1 root root 3.8G Sep 25 14:00 model-00018-of-00037.safetensors 33 | -rw-r--r-- 1 root root 3.6G Sep 25 14:06 model-00019-of-00037.safetensors 34 | -rw-r--r-- 1 root root 3.8G Sep 25 14:11 model-00020-of-00037.safetensors 35 | -rw-r--r-- 1 root root 3.8G Sep 25 14:17 model-00021-of-00037.safetensors 36 | -rw-r--r-- 1 root root 3.8G Sep 25 14:22 model-00022-of-00037.safetensors 37 | -rw-r--r-- 1 root root 3.6G Sep 25 14:27 model-00023-of-00037.safetensors 38 | -rw-r--r-- 1 root root 3.8G Sep 25 14:33 model-00024-of-00037.safetensors 39 | -rw-r--r-- 1 root root 3.8G Sep 25 14:38 model-00025-of-00037.safetensors 40 | -rw-r--r-- 1 root root 3.8G Sep 25 14:44 model-00026-of-00037.safetensors 41 | -rw-r--r-- 1 root root 3.6G Sep 25 14:49 model-00027-of-00037.safetensors 42 | -rw-r--r-- 1 root root 3.8G Sep 25 14:55 model-00028-of-00037.safetensors 43 | -rw-r--r-- 1 root root 3.8G Sep 25 15:00 model-00029-of-00037.safetensors 44 | -rw-r--r-- 1 root root 3.8G Sep 25 15:05 model-00030-of-00037.safetensors 45 | -rw-r--r-- 1 root root 3.6G Sep 25 15:11 model-00031-of-00037.safetensors 46 | -rw-r--r-- 1 root root 3.8G Sep 25 15:16 model-00032-of-00037.safetensors 47 | -rw-r--r-- 1 root root 3.8G Sep 25 15:22 model-00033-of-00037.safetensors 48 | -rw-r--r-- 1 root root 3.8G Sep 25 15:27 model-00034-of-00037.safetensors 49 | -rw-r--r-- 1 root root 3.6G Sep 25 15:32 model-00035-of-00037.safetensors 50 | -rw-r--r-- 1 root root 3.8G Sep 25 15:38 model-00036-of-00037.safetensors 51 | -rw-r--r-- 1 root root 3.3G Sep 25 15:43 model-00037-of-00037.safetensors 52 | -rw-r--r-- 1 root root 78K Sep 25 15:43 model.safetensors.index.json 53 | -rw-r--r-- 1 root root 3.8K Sep 25 15:43 README.md 54 | -rw-r--r-- 1 root root 7.1K Sep 25 15:43 tokenizer_config.json 55 | -rw-r--r-- 1 root root 6.8M Sep 25 15:43 tokenizer.json 56 | -rw-r--r-- 1 root root 2.7M Sep 25 15:43 vocab.json 57 | ``` 58 | 59 | Another common scenario is like the Stable Diffusion web UI, where a large number of models are stored in one or several folders, and there might be situations where models need to be switched during use. 60 | 61 | The OssModelConnector offers a method to directly pass in an OSS directory to the inference frameworks and read the models directly from OSS. 62 | 63 | Compared to the FUSE-based mounting solution, OssModelConnector has a significant performance advantage. Compared to downloading before loading to framworks, the OssModelConnector allows for simultaneous downloading and loading, achieving faster model deployment speeds. 64 | 65 | ## Usage 66 | 67 | Before starting inference frameworks like vllm and transformers, call `connector.prepare_directory(oss_dir, model_dir)`, and then pass model_dir to the inference framework. 68 | 69 | The `oss_dir` is the directory in OSS where the model files are stored, formatted as a URL, for example, `oss://ai-testset/qwen/qwen2.5-72B/`. 70 | 71 | The `model_dir` is the local model directory. During the process, the connector will download some temporary data into model_dir, which can be deleted afterward. 72 | 73 | After the prepare_directory called, the OssModelConnector begins downloading and prefetching data. Smaller files will be downloaded to the `model_dir` concurrently, while larger model files start prefetching into memory in alphabetical order. To avoid being corrupted by dirty data, the OssModelConnector will clean the contents of the `model_dir` before running. 74 | 75 | ## Examples 76 | 77 | ### Transformers 78 | ```python 79 | from transformers import AutoModelForCausalLM, AutoTokenizer 80 | from ossmodelconnector import OssModelConnector 81 | 82 | # initialize OssModelConnector 83 | connector = OssModelConnector(...) 84 | 85 | # prepare_directory 86 | oss_path = "oss://ai-testset/qwen/Qwen25-75B" 87 | model_dir = '/root/abc/' 88 | connector.prepare_directory(oss_path, model_dir) 89 | 90 | # pass model_dir to transformer 91 | tokenizer = AutoTokenizer.from_pretrained(model_dir, trust_remote_code=True) 92 | model = AutoModelForCausalLM.from_pretrained( 93 | model_dir, 94 | device_map="cpu", 95 | trust_remote_code=True, 96 | ).eval() 97 | 98 | # close to release resource 99 | connector.close() 100 | 101 | # do inference 102 | ``` 103 | 104 | ### Vllm 105 | 106 | ```python 107 | from transformers import AutoTokenizer 108 | from vllm import LLM, SamplingParams 109 | from ossmodelconnector import OssModelConnector 110 | 111 | # initialize OssModelConnector 112 | connector = OssModelConnector(...) 113 | 114 | # prepare_directory 115 | oss_path = "oss://ai-testset/qwen/Qwen25-75B" 116 | model_dir = '/root/abc/' 117 | connector.prepare_directory(oss_path, model_dir) 118 | 119 | # pass model_dir to vllm 120 | tokenizer = AutoTokenizer.from_pretrained(model_dir, trust_remote_code=True) 121 | sampling_params = SamplingParams(temperature=0.7, top_p=0.8, repetition_penalty=1.05, max_tokens=512) 122 | llm = LLM(model=model_dir, trust_remote_code=True) 123 | 124 | # close to release resource 125 | connector.close() 126 | 127 | # do inference 128 | ``` 129 | 130 | # Stable Diffusion web UI 131 | 132 | Edit launch.py to initalize and configure OssModelConnector. 133 | 134 | ```python 135 | from modules import launch_utils 136 | 137 | import oss2 138 | from oss2.credentials import EnvironmentVariableCredentialsProvider 139 | from ossmodelconnector import OssModelConnector 140 | 141 | ... 142 | 143 | def main(): 144 | ... 145 | 146 | 147 | if __name__ == "__main__": 148 | connector = OssModelConnector(endpoint='oss-cn-beijing-internal.aliyuncs.com', 149 | cred_provider=EnvironmentVariableCredentialsProvider(), 150 | config_path='/etc/connector.json') 151 | connector.prepare_directory('oss://ai-testset/Stable-diffusion/', '/root/stable-diffusion-webui/models/Stable-diffusion') 152 | 153 | main() 154 | ``` 155 | 156 | Currently, prepare_directory() loads all models into memory, which can put pressure on memory and even cause crashes in scenarios with a large number of models. In the future, prepare_directory() will support lazy loading, downloading models only when switching to or open them, and it will include a garbage collection feature to release memory for unused models after a specified time. 157 | -------------------------------------------------------------------------------- /docs/modelconnector/introduction.md: -------------------------------------------------------------------------------- 1 | 2 | # OSS Model Connector 3 | 4 | ## Overview 5 | 6 | Storing (large) models on a lower-cost object storage (like Alibaba Cloud OSS) is a cost-effective option. The OSS Model Connector provides high-performance methods for loading (large) model files from OSS in AI inference scenarios. 7 | 8 | In current, memory of computing nodes for AI inference are generally large. The common practice is to first load the model from network storage or local disk into the node's memory before applying it for subsequent use. 9 | The primary function of the OSS Model Connector is to fully leverage local memory to accelerate the process of downloading models from OSS. 10 | In our testing environment, the download speed can exceed 15GB/s, approaching 20GB/s. 11 | 12 | The OSS Model Connector mainly offers 3 usage methods. 13 | 14 | - The first method is using the Python interface, allowing users to open OSS objects and read their contents through list stream api. 15 | We also provide an interface for listing objects on OSS, as well as an implementation call 'fast list', which can complete the listing of a million objects within several seconds. 16 | 17 | - The second method is utilizing the libraries for loading models in inference frameworks such as transformer or vllm. This method enables the integration of model file downloading and loading, optimizing the model deployment time. 18 | 19 | - The third method is to use LD_PRELOAD to address scenarios that the second method cannot handle, such as multi-process environments. The advantage of this approach is that it does not require modifying the code, configuration alone is sufficient. 20 | 21 | ## Features 22 | 23 | Compared to other solutions for loading OSS data, the OSS Model Connector is more focused, simpler, and high-performance. 24 | 25 | - Focus 26 | 27 | Unlike [ossfs](https://github.com/aliyun/ossfs), which provides a generic POSIX interface, the OSS Model Connector is more focused on AI inference scenarios. In this context, only data reading is involved, so there is no need to implement complex write operations. Additionally, memory resources are usually more abundant in these scenarios, allowing for the use of large amounts of memory for caching to accelerate the speed of data downloading from OSS. 28 | 29 | - Simpler 30 | 31 | The OSS Model Connector is used as an SDK, implemented entirely in user space, without the need for kernel or FUSE modules, resulting in a more simpler I/O path. 32 | 33 | - High-performance 34 | 35 | Thanks to the simpler I/O path and efficient C++ implementation, the OSS Model Connector can achieve better performance. The C++ code is implemented based on the high-performance [PhotonLibOS](https://github.com/alibaba/PhotonLibOS), which includes features such as coroutines and HTTP client. In our testing environment, the model loading speed can exceed 15GB/s, approaching 20GB/s, achieve the maximum bandwidth of the OSS server configuration. 36 | -------------------------------------------------------------------------------- /docs/modelconnector/ld_preload.md: -------------------------------------------------------------------------------- 1 | # Loading Models via LD_PRELOAD 2 | 3 | ## Overview 4 | In multi-process scenarios, the OSSModelConnector configuration initialized via the Python interface may be lost in Python sub-processes, causing OSS data to fail to load. For example, `vllm.entrypoints.openai.api_server`, where the main process is the API server and model inference happens in sub-processes; or in multi-GPU scenarios, where different processes load models onto different GPUs. 5 | 6 | In such cases, you can start the OSSModelConnector using the `LD_PRELOAD` method, passing configuration parameters via environment variables. Compared to initializing with Python, this `LD_PRELOAD` method generally does not require code modifications. 7 | 8 | ## Installation 9 | 10 | Download the installation package `oss-connector-lib` from [Release](https://github.com/aliyun/oss-connector-for-ai-ml/releases) 11 | 12 | For example, download the `oss-connector-lib-1.0.0rc8` and install. 13 | 14 | rpm: 15 | 16 | ```shell 17 | yum install -y https://github.com/aliyun/oss-connector-for-ai-ml/releases/download/ossmodelconnector%2Fv1.0.0rc8/oss-connector-lib-1.0.0rc8.x86_64.rpm 18 | ``` 19 | 20 | deb: 21 | ```shell 22 | wget https://github.com/aliyun/oss-connector-for-ai-ml/releases/download/ossmodelconnector%2Fv1.0.0rc8/oss-connector-lib-1.0.0rc8.x86_64.deb 23 | dpkg -i oss-connector-lib-1.0.0rc8.x86_64.deb 24 | ``` 25 | 26 | **After installation, check `/usr/local/lib/libossc_preload.so`.** 27 | 28 | 29 | ## Usage Method 30 | 31 | ### Configuration File 32 | 33 | The configuration file path is `/etc/oss-connector/config.json`. The installation package **already includes** a default configuration file as follows: 34 | 35 | ```json 36 | { 37 | "logLevel": 1, 38 | "logPath": "/var/log/oss-connector/connector.log", 39 | "auditPath": "/var/log/oss-connector/audit.log", 40 | "prefetch": { 41 | "vcpus": 16, 42 | "workers": 16 43 | } 44 | } 45 | ``` 46 | 47 | The main performance-related parameters are: 48 | 49 | - `prefetch.vcpus`: Number of vCPUs (CPU cores) to prefetch, default value is 16. 50 | - `prefetch.workers`: Number of coroutines per prefetched vCPU, default value is 16. 51 | 52 | ### Configure Environment Variables 53 | 54 | | Environment Variable KEY | Environment Variable VALUE Description | 55 | | --- | --- | 56 | | OSS_ACCESS_KEY_ID | OSS access key | 57 | | OSS_ACCESS_KEY_SECRET | OSS access key secret | 58 | | OSS_SESSION_TOKEN | Optional, STS token | 59 | | OSS_ENDPOINT | Endpoint for OSS, e.g., `http://oss-cn-beijing-internal.aliyuncs.com`, default HTTP schema is `http` | 60 | | OSS_PATH | OSS model directory, e.g., `oss://example-bucket/example-model-path/` | 61 | | MODEL_DIR | Local model directory, passed to vLLM or other inference frameworks. To avoid interference from dirty data, it is recommended to clear this directory first. Temporary data will be downloaded during use, and it can be deleted afterward. | 62 | | LD_PRELOAD | `/usr/local/lib/libossc_preload.so` | 63 | | **ENABLE_CONNECTOR** | `1`, **Enable Connector, must be set for the main process** | 64 | 65 | ### Start Python Program 66 | 67 | ```shell 68 | LD_PRELOAD=/usr/local/lib/libossc_preload.so ENABLE_CONNECTOR=1 OSS_ACCESS_KEY_ID=${akid} OSS_ACCESS_KEY_SECRET=${aksecret} OSS_ENDPOINT=${endpoint} OSS_PATH=oss://${bucket}/${path}/ MODEL_DIR=/tmp/model python3 -m vllm.entrypoints.openai.api_server --model /tmp/model --trust-remote-code --tensor-parallel-size 1 --disable-custom-all-reduce 69 | ``` 70 | 71 | ### Note! 72 | 73 | 1. `MODEL_DIR` must be consistent with the model dir for AI framework, e.g., vLLM's `--model`. 74 | 75 | 2. `ENABLE_CONNECTOR=1` must be set for the entrypoint process. `LD_PRELOAD` is recommended to be set for the entrypoint process but can also be directly set for the container. 76 | 77 | 3. Currently, when starting the OSSModelConnector via `LD_PRELOAD`, additional memory used for caching will be released with a delay, currently set at 120 seconds. 78 | 79 | 4. If using `nohup` to start, do not configure the environment variables for `nohup`. Instead, encapsulate the environment variables and startup command into a script and execute `nohup` on the script. 80 | 81 | 5. For now, try to use this method in single-machine scenarios. In multi-machine setups, there might be repeated loading or other unknown issues. 82 | -------------------------------------------------------------------------------- /docs/modelconnector/python_apis.md: -------------------------------------------------------------------------------- 1 | # Python API 2 | 3 | ## Overview 4 | 5 | Users can create an OssModelConnector in Python and call its provided methods to access data on OSS. The OssModelConnector provides methods for read-only access to OSS, such as list, open, and read, but does not offer any write methods for now. 6 | 7 | ## Key Features 8 | 9 | - List and FastList 10 | 11 | In addition to offering a normal list implementation, a faster method called "FastList" is also provided to significantly enhance the efficiency of listing a large number of objects. FastList achieves this by concurrently sending list requests and more intelligently handling the segmentation of lists, allowing the listing of millions of objects to be completed within seconds. 12 | 13 | - Data Prefetching 14 | 15 | This optimization is specifically designed for large models. After the open api is called, the ModelConnector performs high-concurrency data prefetching according to the order of opening to fully leverage the bandwidth advantages of OSS. It temporarily stores the data in memory, allowing users to quickly load data from memory when reading. 16 | 17 | ## Installation 18 | 19 | ### Requirements 20 | 21 | - OS: Linux x86-64 22 | - glibc: >= 2.17 23 | - Python: 3.8-3.12 24 | - PyTorch: >= 2.0 25 | 26 | ### Install lastest version 27 | 28 | Download the latest OSSModelConnector package from [Release](https://github.com/aliyun/oss-connector-for-ai-ml/releases) and use pip to install it. 29 | 30 | For example, download the `ossmodelconnector/v1.0.0rc8` for Python 3.11 and install: 31 | 32 | ```bash 33 | wget https://github.com/aliyun/oss-connector-for-ai-ml/releases/download/ossmodelconnector%2Fv1.0.0rc1/ossmodelconnector-1.0.0rc8-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl 34 | 35 | pip install ossmodelconnector-1.0.0rc8-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl 36 | ``` 37 | 38 | ## Configuration 39 | 40 | ### Credential 41 | 42 | When initializing the OssModelConnector, it is necessary to specify the authentication information required to access OSS. 43 | 44 | Two methods are supported: Crendentials provider and Crendentials file. 45 | 46 | #### Crendentials Provider 47 | 48 | OssModelConnector supports all authentication configuration methods of the OSS Python SDK. 49 | Please refer to the documentation: 50 | [How to configure access credentials for OSS SDK for Python](https://www.alibabacloud.com/help/en/oss/developer-reference/python-configuration-access-credentials) / 51 | [如何为OSS Python SDK配置访问凭证](https://help.aliyun.com/zh/oss/developer-reference/python-configuration-access-credentials) 52 | 53 | When using it, simply pass the `credentials_provider` to the constructor of the OssModelConnector. 54 | 55 | The following is an example of configuring authentication from environment variables. 56 | 57 | ```bash 58 | export OSS_ACCESS_KEY_ID= 59 | export OSS_ACCESS_KEY_SECRET= 60 | export OSS_SESSION_TOKEN= 61 | ``` 62 | 63 | ```python 64 | import oss2 65 | from oss2.credentials import EnvironmentVariableCredentialsProvider 66 | from ossmodelconnector import OssModelConnector 67 | 68 | connector = OssModelConnector(endpoint=ENDPOINT, 69 | cred_provider=EnvironmentVariableCredentialsProvider(), 70 | config_path=CONFIG_PATH) 71 | ``` 72 | 73 | The following is an example of user-custom credentials. 74 | 75 | ```python 76 | from oss2 import CredentialsProvider 77 | from oss2.credentials import Credentials 78 | from ossmodelconnector import OssModelConnector 79 | 80 | class CredentialProviderWrapper(CredentialsProvider): 81 | def get_credentials(self): 82 | return Credentials('', '') 83 | 84 | 85 | credentials_provider = CredentialProviderWrapper() 86 | connector = OssModelConnector(endpoint=ENDPOINT, 87 | cred_provider=credentials_provider, 88 | config_path=CONFIG_PATH) 89 | ``` 90 | 91 | 92 | #### Crendentials File 93 | 94 | For now only JSON format credential file is supported. 95 | 96 | ```bash 97 | mkdir -p /root/.alibabacloud/ 98 | cat <<-EOF | tee /root/.alibabacloud/credentials 99 | { 100 | "AccessKeyId": "", 101 | "AccessKeySecret": "", 102 | "SecurityToken": "", 103 | "Expiration": "2024-08-02T15:04:05Z" 104 | } 105 | EOF 106 | ``` 107 | `SecurityToken` and `Expiration` are optional. 108 | The credential file must be updated before expiration to avoid authorization errors. 109 | 110 | ```python 111 | from ossmodelconnector import OssModelConnector 112 | 113 | connector = OssModelConnector(endpoint=ENDPOINT, 114 | cred_path='/root/.alibabacloud/credentials', 115 | config_path='/tmp/config.json') 116 | ``` 117 | 118 | 119 | ### Config File 120 | 121 | The configuration file is responsible for setting parameters such as logging and concurrency. Below is an example. 122 | 123 | ```bash 124 | mkdir -p /etc/oss-connector/ 125 | cat <<-EOF | tee /etc/oss-connector/config.json 126 | { 127 | "logLevel": 1, 128 | "logPath": "/var/log/oss-connector/connector.log", 129 | "auditPath": "/var/log/oss-connector/audit.log", 130 | "prefetch": { 131 | "vcpus": 24, 132 | "workers": 32 133 | } 134 | "fastList": { 135 | "vcpus": 2, 136 | "workers": 16 137 | } 138 | } 139 | EOF 140 | ``` 141 | 142 | Pass the path to `config_path` when initializing OssModelConnector. 143 | 144 | ```python 145 | import oss2 146 | from oss2.credentials import EnvironmentVariableCredentialsProvider 147 | from ossmodelconnector import OssModelConnector 148 | 149 | connector = OssModelConnector(endpoint=ENDPOINT, 150 | cred_provider=EnvironmentVariableCredentialsProvider(), 151 | config_path='/etc/oss-connector/config.json') 152 | ``` 153 | 154 | Below is an explanation of each configuration item. 155 | 156 | | Field | Description | 157 | |---------------|-------------------------------------------------------------------------------------------------------| 158 | | logLevel | The log level for log file, 0 - DEBUG, 1 - INFO, 2 - WARN, 3 - ERROR, 1 is the default value. | 159 | | logPath | The path for log file, `/var/log/oss-connector/connector.log` is the default value. | 160 | | auditPath | The path for audit file, `/var/log/oss-connector/audit.log` is the default value. | 161 | | prefetch.vcpus | The vcpu number for perfetching data. 16 is the default value. | 162 | | prefetch.workers | The worker number for perfetching data in each vcpu. 16 is the default value. | 163 | | fastList.vcpus | The vcpu number for doing fast list. 1 is the default value. | 164 | | fastList.workers | The worker number for doing fast list in each vcpu. 16 is the default value. | 165 | 166 | 167 | ## Main APIs 168 | 169 | - Initialization 170 | 171 | To initialize an OssModelConnector, please refer to [configuration](./configuration.md) 172 | 173 | ```python 174 | connector = OssModelConnector(endpoint=ENDPOINT, 175 | cred_provider=EnvironmentVariableCredentialsProvider(), 176 | config_path='/tmp/config.json') 177 | ``` 178 | 179 | - List objects 180 | 181 | By passing in the `bucket` and `prefix`, users can obtain a list of all objects that meet the criteria, including name and size of objects. 182 | 183 | ```python 184 | objs = connector.list('ai-testset', "geonet/images/DISC/DISC.01/2022.001") 185 | for obj in objs: 186 | print(obj.key) 187 | print(obj.size) 188 | ``` 189 | 190 | Do FastList by passing True in the second parameter, which works more faster for large amount objects. 191 | The order of objects obtained by FastList is not guaranteed. If a specific order is required, users can sort the result based on `key`. 192 | 193 | ```python 194 | objs = connector.list('ai-testset', "geonet/images/DISC/DISC.01/2022.001", True) 195 | ``` 196 | 197 | - Open object 198 | 199 | Open an object through a URI. The URI format is `oss://{bucket}/{name}`. For example, `oss://ai-testset/dir1/obj1` represents an object named `dir1/obj1` in the `ai-testset` bucket. 200 | 201 | The open function accepts two parameters: the first is the URI, and the second is binary, which is of type bool and defaults to True, indicating that the file will be opened in binary mode. If set to False, it will be opened in text mode. 202 | 203 | ```python 204 | # open as binary mode 205 | obj = connector.open('oss://ai-testset/dir1/obj1') 206 | 207 | # open as text mode 208 | obj1 = connector.open('oss://ai-testset/dir1/obj1', False) 209 | ``` 210 | 211 | After calling open, OssModelConnector will start prefetching in the order of the open calls. For scenarios involving loading large model files in shards (e.g. model-00001-of-00038.safetensors to model-00038-of-00038.safetensors), we recommend making sequential batch calls to open first, and then reading each one individually." 212 | 213 | - Read object data 214 | 215 | Read, readinto, seek methods are provided and they follow the standard usage of Python streams. 216 | 217 | When a read call is made, if the data has already been prefetched into memory, it is returned directly from memory. Otherwise, a request is sent to OSS to retrieve and return the data. 218 | 219 | ```python 220 | # read whole data 221 | data = obj.read() 222 | 223 | # read a specified amount of data 224 | data = obj.read(4*1024*1024) 225 | 226 | # read into buffer 227 | buf = bytearray(4 * 1024 * 1024) 228 | obj.readinto(buf) 229 | 230 | # seek to a position 231 | obj.seek(0) 232 | ``` 233 | 234 | - Destroy object 235 | 236 | Destroying an object will release its occupied memory resources. Users can rely on Python's GC to handle it automatically, or perform manual destruction in memory-sensitive scenarios. 237 | 238 | 239 | ## Example 240 | 241 | Below is a sample code for loading a model in multiple shards. First, open them to initiate prefetching, and then read them sequentially. 242 | 243 | ```python 244 | import oss2 245 | from oss2.credentials import EnvironmentVariableCredentialsProvider 246 | from ossmodelconnector import OssModelConnector 247 | 248 | connector = OssModelConnector(endpoint=ENDPOINT, 249 | cred_provider=EnvironmentVariableCredentialsProvider(), 250 | config_path='/tmp/config.json') 251 | 252 | objs = [] 253 | for i in range(1, 39): # 1-38 254 | name = f"oss://ai-testset/qwen/Qwen1.5-72B-Chat/model-{i:05d}-of-00038.safetensors" 255 | obj = connector.open(name) 256 | objs.append(obj) 257 | 258 | # using read 259 | for i in range(0, 38): # 0-37 260 | while True: 261 | data = objs[i].read(4*1024*1024) 262 | if not data: 263 | print("read object done ", i+1) 264 | break 265 | 266 | # or using readinto (recommended) 267 | buf = bytearray(4 * 1024 * 1024) 268 | for i in range(0, 38): # 0-37 269 | objs[i].seek(0) 270 | while True: 271 | n = objs[i].readinto(buf) 272 | if n == 0: 273 | print("readinto object done ", i+1) 274 | break 275 | ``` 276 | -------------------------------------------------------------------------------- /docs/torchconnector/configuration.md: -------------------------------------------------------------------------------- 1 | # Configuration 2 | 3 | ## Credential 4 | 5 | When initializing the OssTorchConnector components (OssMapDataset, OssIterableDataset, OssCheckpoint ...), it is necessary to specify the authentication information required to access OSS. 6 | 7 | Two methods are supported: Crendentials provider and Crendentials file. 8 | 9 | ### Crendentials Provider 10 | 11 | OssTorchConnector supports all authentication configuration methods of the OSS Python SDK. 12 | Please refer to the documentation: 13 | [How to configure access credentials for OSS SDK for Python](https://www.alibabacloud.com/help/en/oss/developer-reference/python-configuration-access-credentials) / 14 | [如何为OSS Python SDK配置访问凭证](https://help.aliyun.com/zh/oss/developer-reference/python-configuration-access-credentials) 15 | 16 | When using it, simply pass the `credentials_provider` to the constructor of the OssTorchConnector components. 17 | 18 | The following is an example of configuring authentication from environment variables. 19 | 20 | ```bash 21 | export OSS_ACCESS_KEY_ID= 22 | export OSS_ACCESS_KEY_SECRET= 23 | export OSS_SESSION_TOKEN= 24 | ``` 25 | 26 | ```python 27 | import oss2 28 | from oss2.credentials import EnvironmentVariableCredentialsProvider 29 | from osstorchconnector import OssMapDataset 30 | 31 | map_dataset = OssMapDataset.from_prefix(OSS_URI, endpoint=ENDPOINT, 32 | cred_provider=EnvironmentVariableCredentialsProvider(), 33 | config_path=CONFIG_PATH) 34 | ``` 35 | 36 | The following is an example of user-custom credentials. 37 | 38 | ```python 39 | from oss2 import CredentialsProvider 40 | from oss2.credentials import Credentials 41 | from osstorchconnector import OssMapDataset 42 | 43 | class CredentialProviderWrapper(CredentialsProvider): 44 | def get_credentials(self): 45 | return Credentials('', '') 46 | 47 | 48 | credentials_provider = CredentialProviderWrapper() 49 | map_dataset = OssMapDataset.from_prefix(OSS_URI, endpoint=ENDPOINT, 50 | cred_provider=credentials_provider, 51 | config_path=CONFIG_PATH) 52 | ``` 53 | 54 | 55 | ### Crendentials File 56 | 57 | For now only JSON format credential file is supported. 58 | 59 | ```bash 60 | mkdir -p /root/.alibabacloud/ 61 | cat <<-EOF | tee /root/.alibabacloud/credentials 62 | { 63 | "AccessKeyId": "", 64 | "AccessKeySecret": "", 65 | "SecurityToken": "", 66 | "Expiration": "2024-08-02T15:04:05Z" 67 | } 68 | EOF 69 | ``` 70 | `SecurityToken` and `Expiration` are optional. 71 | The credential file must be updated before expiration to avoid authorization errors. 72 | 73 | ```python 74 | from osstorchconnector import OssMapDataset 75 | 76 | map_dataset = OssMapDataset(endpoint=ENDPOINT, 77 | cred_path='/root/.alibabacloud/credentials', 78 | config_path=CONFIG_PATH) 79 | ``` 80 | 81 | 82 | ## Config 83 | 84 | The configuration file is responsible for setting parameters such as logging and concurrency. Below is an example. 85 | 86 | ```bash 87 | mkdir -p /etc/oss-connector/ 88 | cat <<-EOF | tee /etc/oss-connector/config.json 89 | { 90 | "logLevel": 1, 91 | "logPath": "/var/log/oss-connector/connector.log", 92 | "auditPath": "/var/log/oss-connector/audit.log", 93 | "datasetConfig": { 94 | "prefetchConcurrency": 24, 95 | "prefetchWorker": 2 96 | }, 97 | "checkpointConfig": { 98 | "prefetchConcurrency": 24, 99 | "prefetchWorker": 4, 100 | "uploadConcurrency": 64 101 | } 102 | } 103 | EOF 104 | ``` 105 | 106 | Pass the path to `config_path` when initializing OssTorchConnector components. 107 | 108 | ```python 109 | import oss2 110 | from oss2.credentials import EnvironmentVariableCredentialsProvider 111 | from osstorchconnector import OssMapDataset 112 | 113 | map_dataset = OssMapDataset.from_prefix(OSS_URI, endpoint=ENDPOINT, 114 | cred_provider=EnvironmentVariableCredentialsProvider(), 115 | config_path='/etc/oss-connector/config.json') 116 | ``` 117 | 118 | | Field | Description | 119 | |---------------|-------------------------------------------------------------------------------------------------------| 120 | | logLevel | The log level for log file, 0 - DEBUG, 1 - INFO, 2 - WARN, 3 - ERROR | 121 | | logPath | The path for log file, `/var/log/oss-connector/connector.log` is the default value. | 122 | | auditPath | The path for audit file, `/var/log/oss-connector/audit.log` is the default value. | 123 | | datasetConfig.prefetchConcurrency | The concurrency for perfetching data from Dataset. 24 is the default value. | 124 | | datasetConfig.prefetchWorker | The vcpu number for perfetching data from Dataset. 2 is the default value. | 125 | | datasetConfig.enableFastList | Flag to enable or disable FastList. false is the default value. | 126 | | datasetConfig.listConcurrency | The concurrency for FastList. 16 is the default value. | 127 | | datasetConfig.listWorker | The vcpu number for FastList. 1 is the default value. | 128 | | checkpointConfig.prefetchConcurrency | The concurrency for perfetching checkpoint. 24 is the default value. | 129 | | checkpointConfig.prefetchWorker | The vcpu number for perfetching checkpoint. 4 is the default value. | 130 | | checkpointConfig.uploadConcurrency | The concurrency for uploading checkpoint. 64 is the default value. | -------------------------------------------------------------------------------- /docs/torchconnector/examples.md: -------------------------------------------------------------------------------- 1 | # Examples 2 | 3 | ## Dataset 4 | 5 | ### IterableDataset 6 | 7 | ```py 8 | from osstorchconnector import OssIterableDataset 9 | 10 | ENDPOINT = "http://oss-cn-beijing-internal.aliyuncs.com" 11 | CONFIG_PATH = "/etc/oss-connector/config.json" 12 | CRED_PATH = "/root/.alibabacloud/credentials" 13 | OSS_URI = "oss://ossconnectorbucket/EnglistImg/Img/BadImag/Bmp/Sample001/" 14 | 15 | # 1) from_prefix 16 | iterable_dataset = OssIterableDataset.from_prefix(OSS_URI, endpoint=ENDPOINT, cred_path=CRED_PATH, config_path=CONFIG_PATH) 17 | for item in iterable_dataset: 18 | print(item.key) 19 | print(item.size) 20 | content = item.read() 21 | print(len(content)) 22 | item.close() 23 | 24 | 25 | # 2) from_objects 26 | uris = [ 27 | "oss://ossconnectorbucket/EnglistImg/Img/BadImag/Bmp/Sample001/img001-00001.png", 28 | "oss://ossconnectorbucket/EnglistImg/Img/BadImag/Bmp/Sample001/img001-00002.png", 29 | "oss://ossconnectorbucket/EnglistImg/Img/BadImag/Bmp/Sample001/img001-00003.png" 30 | ] 31 | 32 | iterable_dataset = OssIterableDataset.from_objects(uris, endpoint=ENDPOINT, cred_path=CRED_PATH, config_path=CONFIG_PATH)] 33 | for item in iterable_dataset: 34 | print(item.key) 35 | print(item.size) 36 | content = item.read() 37 | print(len(content)) 38 | item.close() 39 | ``` 40 | 41 | ### MapDataset 42 | 43 | ```py 44 | from osstorchconnector import OssMapDataset 45 | 46 | ENDPOINT = "http://oss-cn-beijing-internal.aliyuncs.com" 47 | CONFIG_PATH = "/etc/oss-connector/config.json" 48 | CRED_PATH = "/root/.alibabacloud/credentials" 49 | OSS_URI = "oss://ossconnectorbucket/EnglistImg/Img/BadImag/Bmp/Sample001/" 50 | 51 | # 1) from_prefix 52 | map_dataset = OssMapDataset.from_prefix(OSS_URI, endpoint=ENDPOINT, cred_path=CRED_PATH, config_path=CONFIG_PATH) 53 | # random access 54 | item = map_dataset[0] 55 | print(item.key) 56 | content = item.read() 57 | print(item.size) 58 | print(len(content)) 59 | item.close() 60 | 61 | # or 62 | with map_dataset[5] as item: 63 | print(item.key) 64 | content = item.read() 65 | print(item.size) 66 | print(len(content)) 67 | 68 | # iterable 69 | for item in map_dataset: 70 | print(item.key) 71 | print(item.size) 72 | content = item.read() 73 | print(len(content)) 74 | item.close() 75 | 76 | 77 | # 2) from_objects 78 | uris = [ 79 | "oss://ossconnectorbucket/EnglistImg/Img/BadImag/Bmp/Sample001/img001-00001.png", 80 | "oss://ossconnectorbucket/EnglistImg/Img/BadImag/Bmp/Sample001/img001-00002.png", 81 | "oss://ossconnectorbucket/EnglistImg/Img/BadImag/Bmp/Sample001/img001-00003.png" 82 | ] 83 | 84 | map_dataset = OssMapDataset.from_objects(uris, endpoint=ENDPOINT, cred_path=CRED_PATH, config_path=CONFIG_PATH) 85 | # random access 86 | item = map_dataset[1] 87 | print(item.key) 88 | print(item.size) 89 | content = item.read() 90 | print(len(content)) 91 | item.close() 92 | 93 | # iterable 94 | for item in map_dataset: 95 | print(item.key) 96 | print(item.size) 97 | content = item.read() 98 | print(len(content)) 99 | item.close() 100 | ``` 101 | 102 | Please note that OssMapDataset performs an OSS list objects operation under the given prefix first (which may take some time). 103 | 104 | ### Manifest file 105 | 106 | Manifest file contains objects name (and label) of OSS objects. 107 | Building datasets with manifest file can reduce the overhead of listing objects in OSS, making it suitable for datasets with a large number of objects and repeated dataset loading. 108 | 109 | A manifest file must be constructed in advance, and a method for parsing it must be provided during use. 110 | Below are examples of manifest files and loading a dataset with manifest file. 111 | 112 | Example manifest file with object name: 113 | ``` 114 | Img/BadImag/Bmp/Sample001/img001-00001.png 115 | Img/BadImag/Bmp/Sample001/img001-00002.png 116 | Img/BadImag/Bmp/Sample001/img001-00003.png 117 | ``` 118 | 119 | Example manifest file with object name and label: 120 | ``` 121 | Img/BadImag/Bmp/Sample001/img001-00001.png label1 122 | Img/BadImag/Bmp/Sample001/img001-00002.png label2 123 | Img/BadImag/Bmp/Sample001/img001-00003.png label3 124 | ``` 125 | 126 | ```py 127 | from osstorchconnector import OssIterableDataset 128 | 129 | ENDPOINT = "http://oss-cn-beijing-internal.aliyuncs.com" 130 | CONFIG_PATH = "/etc/oss-connector/config.json" 131 | CRED_PATH = "/root/.alibabacloud/credentials" 132 | OSS_URI = "oss://ossconnectorbucket/EnglistImg/Img/BadImag/Bmp/Sample001/" 133 | 134 | # manifest_parser 135 | def manifest_parser(reader: io.IOBase) -> Iterable[Tuple[str, str]]: 136 | lines = reader.read().decode("utf-8").strip().split("\n") 137 | for i, line in enumerate(lines): 138 | try: 139 | items = line.strip().split(' ') 140 | if len(items) >= 2: 141 | key = items[0] 142 | label = items[1] 143 | yield (key, label) 144 | elif len(items) == 1: 145 | key = items[0] 146 | yield (key, '') 147 | else: 148 | raise ValueError("format error") 149 | except ValueError as e: 150 | raise e 151 | 152 | # from local manifest_file 153 | iterable_dataset = OssIterableDataset.from_manifest_file("manifest_file", manifest_parser, "oss://ossconnectorbucket/EnglistImg/", endpoint=ENDPOINT, cred_path=CRED_PATH, config_path=CONFIG_PATH) 154 | for item in iterable_dataset: 155 | print(item.key) 156 | print(item.size) 157 | print(item.label) 158 | content = item.read() 159 | print(len(content)) 160 | item.close() 161 | 162 | # manifest_file on oss 163 | iterable_dataset = OssIterableDataset.from_manifest_file("oss://ossconnectorbucket/manifest_file/EnglistImg/manifest_file", manifest_parser, "oss://ossconnectorbucket/EnglistImg/", endpoint=ENDPOINT, cred_path=CRED_PATH, config_path=CONFIG_PATH) 164 | ``` 165 | 166 | ### Dataset and transform 167 | 168 | ```py 169 | import sys 170 | import io 171 | import torchvision.transforms as transforms 172 | from PIL import Image 173 | 174 | from osstorchconnector import OssIterableDataset, OssMapDataset 175 | 176 | ENDPOINT = "http://oss-cn-beijing-internal.aliyuncs.com" 177 | CONFIG_PATH = "/etc/oss-connector/config.json" 178 | CRED_PATH = "/root/.alibabacloud/credentials" 179 | OSS_URI = "oss://ossconnectorbucket/EnglistImg/Img/BadImag/Bmp/Sample001/" 180 | 181 | trans = transforms.Compose([ 182 | transforms.Resize(256), 183 | transforms.CenterCrop(224), 184 | transforms.ToTensor(), 185 | transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 186 | ]) 187 | 188 | def transform(data): 189 | try: 190 | img = Image.open(io.BytesIO(data.read())).convert('RGB') 191 | val = trans(img) 192 | except Exception as e: 193 | raise e 194 | return val, data.label 195 | 196 | iterable_dataset = OssIterableDataset.from_prefix(OSS_URI, endpoint=ENDPOINT, transform=transform, cred_path=CRED_PATH, config_path=CONFIG_PATH) 197 | 198 | for item in iterable_dataset: 199 | print(item[0]) 200 | print(item[1]) 201 | ``` 202 | 203 | ### Pytorch dataloader 204 | ```py 205 | import sys 206 | import io 207 | import torch 208 | import torchvision.transforms as transforms 209 | from PIL import Image 210 | from osstorchconnector import OssIterableDataset, OssMapDataset 211 | 212 | ENDPOINT = "http://oss-cn-beijing-internal.aliyuncs.com" 213 | CONFIG_PATH = "/etc/oss-connector/config.json" 214 | CRED_PATH = "/root/.alibabacloud/credentials" 215 | OSS_URI = "oss://ossconnectorbucket/EnglistImg/Img/BadImag/Bmp/Sample001/" 216 | 217 | 218 | trans = transforms.Compose([ 219 | transforms.Resize(256), 220 | transforms.CenterCrop(224), 221 | transforms.ToTensor(), 222 | transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 223 | ]) 224 | 225 | def transform(data): 226 | try: 227 | img = Image.open(io.BytesIO(data.read())).convert('RGB') 228 | val = trans(img) 229 | except Exception as e: 230 | raise e 231 | return val, data.key, data.label 232 | 233 | # OssIterableDataset 234 | iterable_dataset = OssIterableDataset.from_prefix(OSS_URI, endpoint=ENDPOINT, transform=transform, cred_path=CRED_PATH, config_path=CONFIG_PATH) 235 | loader = torch.utils.data.DataLoader(iterable_dataset, batch_size=256, num_workers=32, prefetch_factor=2) 236 | for i, (datas, keys, labels) in enumerate(loader): 237 | print(datas) 238 | print(keys) 239 | 240 | # OssMapDataset with shuffle 241 | map_dataset = OssMapDataset.from_prefix(OSS_URI, endpoint=ENDPOINT, transform=transform, cred_path=CRED_PATH, config_path=CONFIG_PATH) 242 | loader = torch.utils.data.DataLoader(map_dataset, batch_size=256, num_workers=32, prefetch_factor=2, shuffle=True) 243 | for i, (datas, keys, labels) in enumerate(loader): 244 | print(datas) 245 | print(keys) 246 | ``` 247 | 248 | When using with DataLoader, the main DataLoader worker responsible for listing from OSS or receiving objects from_prefix/from_manifest_file, all workers obtain their assigned objects from the main worker. 249 | This approach avoids issues of redundant listing and data reading (which may exist in other connectors), allowing better performance from multiple workers. When testing data download speed (excluding transform and other CPU-bound workload) with a large number of small files (e.g., ImageNet), it can exceed 10GB/s. 250 | 251 | OssIterableDataset includes prefetch optimization by increasing concurrency. When the DataLoader is configured with multiple workers, the iteration order may not be deterministic (local order might be disrupted). 252 | 253 | ## Checkpoint 254 | 255 | ```py 256 | import torch 257 | from osstorchconnector import OssCheckpoint 258 | 259 | ENDPOINT = "http://oss-cn-beijing-internal.aliyuncs.com" 260 | CONFIG_PATH = "/etc/oss-connector/config.json" 261 | CRED_PATH = "/root/.alibabacloud/credentials" 262 | 263 | checkpoint = OssCheckpoint(endpoint=ENDPOINT, cred_path=CRED_PATH, config_path=CONFIG_PATH) 264 | 265 | # read checkpoint 266 | CHECKPOINT_READ_URI = "oss://ossconnectorbucket/checkpoint/epoch.0" 267 | with checkpoint.reader(CHECKPOINT_READ_URI) as reader: 268 | state_dict = torch.load(reader) 269 | 270 | # write checkpoint 271 | CHECKPOINT_WRITE_URI = "oss://ossconnectorbucket/checkpoint/epoch.1" 272 | with checkpoint.writer(CHECKPOINT_WRITE_URI) as writer: 273 | torch.save(state_dict, writer) 274 | ``` 275 | 276 | OssCheckpoint can be used for checkpoints, and also for high-speed uploading and downloading of arbitrary objects. In our testing environment, the download speed can exceed 15GB/s. -------------------------------------------------------------------------------- /docs/torchconnector/installation.md: -------------------------------------------------------------------------------- 1 | # Installation 2 | 3 | ## Requirements 4 | 5 | - OS: Linux x86-64 6 | - glibc: >= 2.17 7 | - Python: 3.8-3.12 8 | - PyTorch: >= 2.0 9 | 10 | ## Install 11 | 12 | ### Install stable version 13 | 14 | ```bash 15 | pip install osstorchconnector 16 | ``` 17 | 18 | ### Install lastest version 19 | 20 | Download the latest osstorchconnector package from [Release](https://github.com/aliyun/oss-connector-for-ai-ml/releases) and use pip to install it. 21 | 22 | For example, download the `osstorchconnector/v1.1.0rc1` for Python 3.11 and install: 23 | 24 | ```bash 25 | wget https://github.com/aliyun/oss-connector-for-ai-ml/releases/download/osstorchconnector%2Fv1.1.0rc1/osstorchconnector-1.1.0rc1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl 26 | 27 | pip install osstorchconnector-1.1.0rc1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl 28 | ``` -------------------------------------------------------------------------------- /docs/torchconnector/introduction.md: -------------------------------------------------------------------------------- 1 | 2 | # OSS Torch Connector 3 | 4 | ## Overview 5 | 6 | OSS Torch Connector provides both [Map-style and Iterable-style datasets](https://pytorch.org/docs/stable/data.html#dataset-types) for loading datasets from OSS. 7 | And also provides a method for loading and saving checkpoints from and to OSS. 8 | 9 | The core part of is OSS Connector for AI/ML is implemented in C++ using [PhotonLibOS](https://github.com/alibaba/PhotonLibOS). This repository only contains the code of Python. 10 | 11 | 12 | ## Related 13 | 14 | [OSS Connector for AI/ML 中文文档](https://help.aliyun.com/zh/oss/developer-reference/oss-connector-for-ai-ml) 15 | -------------------------------------------------------------------------------- /oss-model-connector/ossmodelconnector/__init__.py: -------------------------------------------------------------------------------- 1 | from ._oss_connector import ( 2 | DataObject, 3 | DataObjectInfo, 4 | Connector, 5 | new_oss_connector, 6 | ) 7 | from .oss_model_connector import OssModelConnector 8 | 9 | __all__ = ["DataObject", "DataObjectInfo", "Connector", "new_oss_connector", "OssModelConnector"] 10 | -------------------------------------------------------------------------------- /oss-model-connector/ossmodelconnector/_oss_connector/__init__.py: -------------------------------------------------------------------------------- 1 | from .oss_model_connector import ( 2 | DataObject, 3 | DataObjectInfo, 4 | Connector, 5 | new_oss_connector 6 | ) 7 | 8 | __all__ = ["DataObject", "DataObjectInfo", "Connector", "new_oss_connector"] 9 | -------------------------------------------------------------------------------- /oss-model-connector/ossmodelconnector/_oss_connector/oss_model_connector.pyi: -------------------------------------------------------------------------------- 1 | from typing import List, Union, Any 2 | 3 | class DataObject: 4 | key: str 5 | 6 | def __enter__(self) -> DataObject: ... 7 | def __exit__(self, exc_type, exc_val, exc_tb): ... 8 | def tell(self) -> int: ... 9 | def seek(self, offset: int, whence: int) -> int: ... 10 | def read(self, size: int): ... 11 | def readline(self, size: int): ... 12 | def readinto(self, buf) -> int: ... 13 | def mmap(self) -> int: ... 14 | def close(self): ... 15 | def size(self) -> int: ... 16 | 17 | 18 | class DataObjectInfo: 19 | key: str 20 | size: int 21 | 22 | 23 | class Connector: 24 | def open(uri: str, prefetch: bool, userfault: bool, binary: bool) -> DataObject: ... 25 | def prepare_directory(uri: str, dir: str, libc: bool) -> int: ... 26 | def list(bucket: str, prefix: str, fast: bool) -> List[DataObjectInfo]: ... 27 | 28 | 29 | def new_oss_connector(endpoint: str, cred: Union[str, Any], config_path: str) -> Connector: 30 | ... 31 | -------------------------------------------------------------------------------- /oss-model-connector/ossmodelconnector/oss_model_connector.py: -------------------------------------------------------------------------------- 1 | from ._oss_connector import new_oss_connector, Connector 2 | import ctypes 3 | import torch 4 | import builtins 5 | import pathlib 6 | from typing import Any 7 | 8 | 9 | class UntypedStorageEx: 10 | def __init__(self, file, size): 11 | self.file = file 12 | self.addr = memoryview((ctypes.c_ubyte * size).from_address(self.file.mmap())) 13 | 14 | def untyped(self): 15 | return self 16 | 17 | def __getitem__(self, idx): 18 | return self.addr[idx] 19 | 20 | class OssModelConnector: 21 | """ 22 | A connector class for interfacing with OSS for model loading, 23 | providing high-performance methods to load models/objects/files for AI inference. 24 | """ 25 | 26 | def __init__( 27 | self, 28 | endpoint: str, 29 | cred_path: str = "", 30 | config_path: str = "", 31 | cred_provider: Any = None, 32 | ): 33 | """ 34 | Initializes the connector with endpoint and optional credential information. 35 | 36 | Args: 37 | endpoint(str): The OSS endpoint to connect to. 38 | cred_path(str, optional): Path to the credential file. Defaults to "". 39 | config_path(str, optional): Path to the configuration file. Defaults to "". 40 | cred_provider(Any, optional): Credential provider. Defaults to None. 41 | 42 | Raises: 43 | ValueError: If endpoint or credential is not provided. 44 | """ 45 | if not endpoint: 46 | raise ValueError("endpoint must be non-empty") 47 | if cred_provider is None and not cred_path: 48 | raise ValueError("Either cred_path or cred_provider must be provided") 49 | 50 | self._endpoint = endpoint 51 | if not cred_path: 52 | self._cred_path = "" 53 | else: 54 | self._cred_path = cred_path 55 | if not config_path: 56 | self._config_path = "" 57 | else: 58 | self._config_path = config_path 59 | self._cred_provider = cred_provider 60 | 61 | self._real_connector = None 62 | self._hook_dir = '' 63 | self._origin_from_file = torch.UntypedStorage.from_file 64 | self._origin_open = builtins.open 65 | 66 | def __del__(self): 67 | self.close() 68 | @property 69 | def _connector(self): 70 | if self._real_connector is None: 71 | if self._cred_provider is not None: 72 | self._real_connector = new_oss_connector(self._endpoint, self._cred_provider, self._config_path) 73 | else: 74 | self._real_connector = new_oss_connector(self._endpoint, self._cred_path, self._config_path) 75 | 76 | return self._real_connector 77 | 78 | def close(self): 79 | """ 80 | Close the connector and release resources. 81 | """ 82 | try: 83 | if self._hook_dir: 84 | self._hook_dir = '' 85 | 86 | if builtins.open == self._connector_open: 87 | builtins.open = self._origin_open 88 | 89 | if torch.UntypedStorage.from_file == self._from_file_helper: 90 | torch.UntypedStorage.from_file = self._origin_from_file 91 | 92 | if self._real_connector is not None: 93 | del self._real_connector 94 | self._real_connector = None 95 | except: 96 | print("exception in close, ignore") 97 | 98 | def open(self, uri, binary = True): 99 | """ 100 | Opens an object from OSS storage. 101 | 102 | Args: 103 | uri(str): The uri (oss://{bucket}/{object_name}) of the object to open. 104 | binary(bool): Flag indicating whether to open in binary mode or not. 105 | 106 | Returns: 107 | Stream-like object of the opened OSS object. 108 | """ 109 | return self._connector.open(uri, True, True, binary) 110 | 111 | def _from_file_helper(self, filename, shared, nbytes): 112 | if self._hook_dir and filename.startswith(self._hook_dir): 113 | file = self._connector.open(filename, True, True) 114 | return UntypedStorageEx(file, nbytes) 115 | else: 116 | return self._origin_from_file(filename, shared, nbytes) 117 | 118 | def _connector_open(self, file, mode='r', buffering=-1, encoding=None, errors=None, newline=None, closefd=True, opener=None): 119 | if isinstance(file, pathlib.Path): 120 | file = str(file) 121 | if self._hook_dir and file.startswith(self._hook_dir): 122 | binary = False 123 | if 'b' in mode: 124 | binary = True 125 | try: 126 | return self.open(file, binary) 127 | except: 128 | return self._origin_open(file, mode, buffering, encoding, errors, newline, closefd, opener) 129 | else: 130 | return self._origin_open(file, mode, buffering, encoding, errors, newline, closefd, opener) 131 | 132 | def prepare_directory(self, uri: str, dir: str, libc_hook: bool = False): 133 | """ 134 | Prepare the directory from OSS storage, which can be used as directory 'dir' in vllm/transformers or other frameworks. 135 | 136 | Args: 137 | uri(str): The URI (oss://{bucket}/{directory}) of the OSS directory. 138 | dir(str): The local directory used for vllm/transformers or other frameworks. 139 | libc_hook (bool): Flag to enable libc hooking. 140 | 141 | Raises: 142 | RuntimeError: If prepare directory failed. 143 | """ 144 | if not dir.endswith('/'): 145 | dir += '/' 146 | self._connector.prepare_directory(uri, dir, libc_hook) 147 | if not libc_hook: 148 | builtins.open = self._connector_open 149 | torch.UntypedStorage.from_file = self._from_file_helper 150 | self._hook_dir = dir 151 | 152 | def list(self, bucket: str, prefix: str, fast: bool = False): 153 | """ 154 | Lists objects in a specified OSS bucket with a given prefix. 155 | 156 | Args: 157 | bucket(str): The OSS bucket name. 158 | prefix(str): The prefix filter for object listing. 159 | fast (bool): If true, enables fast list mode. 160 | 161 | Returns: 162 | List: A list of objects matching the bucket and prefix criteria. 163 | """ 164 | return self._connector.list(bucket, prefix, fast) 165 | -------------------------------------------------------------------------------- /oss-model-connector/pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools", "wheel"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "ossmodelconnector" 7 | version = "1.0.0rc1" 8 | description = "OSS model connector for AI/ML" 9 | requires-python = ">=3.8,<3.13" 10 | readme = "README.md" 11 | dependencies = [ 12 | "torch >= 2.0", 13 | ] 14 | classifiers = [ 15 | "Development Status :: 4 - Beta", 16 | "Intended Audience :: Developers", 17 | "Topic :: Utilities", 18 | "License :: OSI Approved :: MIT License", 19 | "Operating System :: POSIX :: Linux", 20 | 21 | "Programming Language :: Python :: 3", 22 | "Programming Language :: Python :: 3.8", 23 | "Programming Language :: Python :: 3.9", 24 | "Programming Language :: Python :: 3.10", 25 | "Programming Language :: Python :: 3.11", 26 | "Programming Language :: Python :: 3.12", 27 | ] 28 | 29 | [tool.setuptools.packages.find] 30 | where = ["."] 31 | include = ["ossmodelconnector", "ossmodelconnector._oss_connector"] 32 | 33 | [tool.setuptools.package-data] 34 | osstorchconnector = ["_oss_connector/*.so"] 35 | -------------------------------------------------------------------------------- /oss-model-connector/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, Extension 2 | from setuptools.command.build_ext import build_ext 3 | import os 4 | import subprocess 5 | import shutil 6 | 7 | 8 | class BuildExtension(Extension): 9 | def __init__(self, name, source_dir=''): 10 | Extension.__init__(self, name, sources=[source_dir]) 11 | self.source_dir = os.path.abspath(source_dir) 12 | 13 | class LibraryBuild(build_ext): 14 | user_options = build_ext.user_options + [ 15 | ('library-path=', None, 'oss_connector library path'), 16 | ] 17 | def initialize_options(self): 18 | super().initialize_options() 19 | self.library_path = None 20 | def run(self): 21 | if not self.library_path: 22 | raise RuntimeError("library path is not specified by '--library-path'") 23 | self.library_path = os.path.abspath(self.library_path) 24 | if os.path.exists(self.library_path): 25 | print('library path:', self.library_path) 26 | else: 27 | raise RuntimeError("invalid library path: " + self.library_path) 28 | for ext in self.extensions: 29 | self.build_extension(ext) 30 | 31 | def run_command(self, command, cwd): 32 | try: 33 | subprocess.run(command, capture_output=True, text=True, check=True, cwd=cwd) 34 | except subprocess.CalledProcessError as e: 35 | print(f"Command '{' '.join(command)}' failed with exit code {e.returncode}") 36 | print(f"Stdout: {e.stdout}") 37 | print(f"Stderr: {e.stderr}") 38 | raise RuntimeError("Subprocess execution failed") from e 39 | 40 | def build_extension(self, ext): 41 | print('name:', ext.name) 42 | print('source path:', ext.source_dir) 43 | print('current dir:', os.getcwd()) 44 | 45 | # copy .so 46 | library_file_name = os.path.basename(self.library_path) 47 | dest_so_path = os.path.abspath( 48 | os.path.join(self.build_lib, 'ossmodelconnector', '_oss_connector', library_file_name)) 49 | print('copy %s to %s' % (self.library_path, dest_so_path)) 50 | shutil.copy(self.library_path, dest_so_path) 51 | 52 | 53 | setup( 54 | ext_modules=[BuildExtension('oss_model_connector', '.')], 55 | cmdclass=dict(build_ext=LibraryBuild), 56 | ) 57 | -------------------------------------------------------------------------------- /oss-torch-connector/osstorchconnector/__init__.py: -------------------------------------------------------------------------------- 1 | from .oss_iterable_dataset import OssIterableDataset 2 | from .oss_map_dataset import OssMapDataset 3 | from .oss_checkpoint import OssCheckpoint 4 | from ._oss_client import OssClient 5 | from ._oss_bucket_iterable import imagenet_manifest_parser 6 | from ._oss_tar_iterable import generate_tar_archive 7 | 8 | __all__ = [ 9 | "OssIterableDataset", 10 | "OssMapDataset", 11 | "OssCheckpoint", 12 | "OssClient", 13 | "imagenet_manifest_parser", 14 | "generate_tar_archive", 15 | ] 16 | -------------------------------------------------------------------------------- /oss-torch-connector/osstorchconnector/_oss_bucket_iterable.py: -------------------------------------------------------------------------------- 1 | from typing import Iterator, Iterable, Union, Tuple, Callable 2 | from ._oss_client import OssClient, DataObject 3 | from ._oss_connector import new_data_object 4 | import logging 5 | import io 6 | 7 | log = logging.getLogger(__name__) 8 | 9 | def identity(obj: DataObject) -> DataObject: 10 | if obj is not None: 11 | return obj.copy() 12 | else: 13 | return None 14 | 15 | def parse_oss_uri(uri: str) -> Tuple[str, str]: 16 | if not uri or not (uri.startswith("oss://") or uri.startswith("/")): 17 | raise ValueError("Only oss:// URIs are supported") 18 | if uri.startswith("oss://"): 19 | uri = uri[len("oss://"):] 20 | elif uri.startswith("/"): 21 | uri = uri[1:] 22 | if not uri: 23 | raise ValueError("Bucket name must be non-empty") 24 | split = uri.split("/", maxsplit=1) 25 | if len(split) == 1: 26 | bucket = split[0] 27 | prefix = "" 28 | else: 29 | bucket, prefix = split 30 | if not bucket: 31 | raise ValueError("Bucket name must be non-empty") 32 | return bucket, prefix 33 | 34 | def imagenet_manifest_parser(reader: io.IOBase) -> Iterable[Tuple[str, str]]: 35 | lines = reader.read().decode("utf-8").strip().split("\n") 36 | for i, line in enumerate(lines): 37 | try: 38 | items = line.strip().split('\t') 39 | if len(items) >= 2: 40 | key = items[0] 41 | label = items[1] 42 | yield (key, label) 43 | elif len(items) == 1: 44 | key = items[0] 45 | yield (key, '') 46 | else: 47 | raise ValueError("format error") 48 | except ValueError as e: 49 | logging.error(f"Error: {e} for line {i}: {line}") 50 | 51 | 52 | class OssBucketIterable: 53 | def __init__(self, client: OssClient, *, 54 | oss_uri: str = None, 55 | object_uris: Iterable[str] = None, 56 | preload: bool = False, 57 | manifest_file_path: str = None, 58 | manifest_parser: Callable[[io.IOBase], Iterable[Tuple[str, str]]] = None, 59 | oss_base_uri: str = None): 60 | log.info("OssBucketIterable init") 61 | self._client = client 62 | self._oss_uri = oss_uri 63 | self._object_uris = object_uris 64 | self._preload = preload 65 | self._manifest_file_path = manifest_file_path 66 | self._manifest_parser = manifest_parser 67 | self._oss_base_uri = oss_base_uri 68 | self._data_objects: Iterable[DataObject] = None 69 | 70 | @classmethod 71 | def from_uris(cls, object_uris: Union[str, Iterable[str]], client: OssClient, preload: bool = False): 72 | if not object_uris: 73 | raise ValueError("object_uris must be non-empty") 74 | if isinstance(object_uris, str): 75 | object_uris = [object_uris] 76 | return cls(client, object_uris=object_uris, preload=preload) 77 | 78 | @classmethod 79 | def from_prefix(cls, oss_uri: str, client: OssClient, preload: bool = False): 80 | if not oss_uri: 81 | raise ValueError("oss_uri must be non-empty") 82 | if not oss_uri.startswith("oss://"): 83 | raise ValueError("only oss:// uri are supported") 84 | return cls(client, oss_uri=oss_uri, preload=preload) 85 | 86 | @classmethod 87 | def from_manifest_file(cls, manifest_file_path: str, manifest_parser: Callable[[io.IOBase], Iterable[Tuple[str, str]]], 88 | oss_base_uri: str, client: OssClient, preload: bool = False): 89 | if not manifest_file_path: 90 | raise ValueError("manifest_file_path must be non-empty") 91 | if not manifest_parser: 92 | raise ValueError("manifest_parser must be non-empty") 93 | return cls(client, manifest_file_path=manifest_file_path, manifest_parser=manifest_parser, 94 | oss_base_uri=oss_base_uri, preload=preload) 95 | 96 | def _get_data_object_by_manifest(self) -> Iterator[DataObject]: 97 | if self._manifest_file_path.startswith("oss://"): 98 | ibucket, ikey = parse_oss_uri(self._manifest_file_path) 99 | with self._client.get_object(ibucket, ikey, type=0) as manifest_file: 100 | for key, label in self._manifest_parser(manifest_file): 101 | yield new_data_object(self._oss_base_uri + key, 0, label) 102 | else: 103 | with open(self._manifest_file_path, "rb") as manifest_file: 104 | for key, label in self._manifest_parser(manifest_file): 105 | yield new_data_object(self._oss_base_uri + key, 0, label) 106 | 107 | def __iter__(self) -> Iterator[DataObject]: 108 | # This allows us to iterate multiple times by re-creating the `_list_stream` 109 | if self._object_uris is not None: 110 | log.info("OssBucketIterable get iter by object uris") 111 | self._data_objects = [new_data_object(uri, 0, "") for uri in self._object_uris] 112 | return iter(OssBucketObjectsIterator(self._client, self._data_objects, self._preload)) 113 | elif self._manifest_file_path is not None and self._manifest_parser is not None: 114 | log.info("OssBucketIterable get iter by manifest file: %s", self._manifest_file_path) 115 | self._data_objects = self._get_data_object_by_manifest() 116 | return iter(OssBucketObjectsIterator(self._client, self._data_objects, self._preload)) 117 | elif self._oss_uri is not None: 118 | log.info("OssBucketIterable get iter by oss prefix: %s", self._oss_uri) 119 | return iter(OssBucketPrefixIterator(self._client, self._oss_uri, self._preload)) 120 | else: 121 | log.error("OssBucketIterable get iter failed") 122 | return None 123 | 124 | 125 | class OssBucketObjectsIterator: 126 | def __init__(self, client: OssClient, objects: Iterable[DataObject], preload: bool) -> Iterator[DataObject]: 127 | log.info("OssBucketObjectsIterator init") 128 | if preload: 129 | self._list_stream = iter(client.list_objects_from_uris_with_preload(objects)) 130 | else: 131 | self._list_stream = iter(objects) # map does not need pass objects to client for now 132 | 133 | def __iter__(self) -> Iterator[DataObject]: 134 | log.info("OssBucketObjectsIterator get iter") 135 | return self._list_stream 136 | 137 | 138 | class OssBucketPrefixIterator: 139 | def __init__(self, client: OssClient, oss_uri: str, preload: bool): 140 | log.info("OssBucketPrefixIterator init") 141 | bucket, prefix = parse_oss_uri(oss_uri) 142 | if preload: 143 | self._list_stream = iter(client.list_objects_with_preload(bucket, prefix)) 144 | else: 145 | self._list_stream = iter(client.list_objects(bucket, prefix)) 146 | 147 | def __iter__(self) -> Iterator[DataObject]: 148 | log.info("OssBucketPrefixIterator get iter") 149 | return self._list_stream 150 | -------------------------------------------------------------------------------- /oss-torch-connector/osstorchconnector/_oss_client.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import Iterator, Iterable, Any 3 | import logging 4 | 5 | log = logging.getLogger(__name__) 6 | 7 | from ._oss_connector import ( 8 | DataSet, 9 | DataObject, 10 | new_oss_dataset 11 | ) 12 | 13 | O_MULTI_PART = 0x40000000 # oss multi-part upload 14 | 15 | """ 16 | _oss_client.py 17 | Internal client wrapper class on top of OSS client interface 18 | with multi-process support. 19 | """ 20 | 21 | class OssClient: 22 | def __init__(self, endpoint: str, cred_path: str = "", config_path: str = "", uuid: str = "", id: int = 0, total: int = 1, cred_provider: Any = None): 23 | self._endpoint = endpoint 24 | self._cred_path = cred_path 25 | self._config_path = config_path 26 | self._uuid = uuid 27 | self._real_client = None 28 | self._client_pid = None 29 | self._id = id 30 | self._total = total 31 | self._cred_provider = cred_provider 32 | 33 | @property 34 | def _client(self) -> DataSet: 35 | if self._client_pid is None or self._client_pid != os.getpid(): 36 | # does OSS client survive forking ? NO 37 | if self._client_pid != os.getpid() and self._real_client is not None: 38 | log.info("OssClient delete dataset") 39 | # del self._real_client 40 | self._client_pid = os.getpid() 41 | self._real_client = self._client_builder() 42 | return self._real_client 43 | 44 | def _client_builder(self) -> DataSet: 45 | log.info("OssClient new_oss_dataset, id %d, total %d", self._id, self._total) 46 | return new_oss_dataset(self._endpoint, self._cred_path, self._cred_provider, self._config_path, str(self._uuid), self._id, self._total) 47 | 48 | def get_object(self, bucket: str, key: str, size: int = 0, type: int = 0, label: str = "") -> DataObject: 49 | return self._client.open_ro(bucket, key, size, type, label) 50 | 51 | def put_object(self, bucket: str, key: str) -> DataObject: 52 | return self._client.open_wo(bucket, key) 53 | 54 | def list_objects(self, bucket: str, prefix: str = "") -> Iterator[DataObject]: 55 | log.debug("OssClient list_objects") 56 | return self._client.list(bucket, prefix) 57 | 58 | def list_objects_with_preload(self, bucket: str, prefix: str = "") -> Iterator[DataObject]: 59 | log.debug("OssClient list_objects_with_preload") 60 | return self._client.list_with_preload(bucket, prefix) 61 | 62 | def list_objects_from_uris(self, object_uris: Iterable, prefetch: bool = False, include_errors: bool = False) -> Iterator[DataObject]: 63 | log.debug("OssClient list_objects_from_uris") 64 | return self._client.list_from_uris(object_uris, prefetch, include_errors) 65 | 66 | def list_objects_from_uris_with_preload(self, object_uris: Iterable) -> Iterator[DataObject]: 67 | log.debug("OssClient list_objects_from_uris_with_preload") 68 | return self._client.list_from_uris_with_preload(object_uris) 69 | 70 | def list_objects_from_tar(self, bucket: str, tar_key: str, index_key: str, chunks: Iterable = [], sizes: Iterable = [], 71 | prefetch: bool = False, include_errors: bool = False) -> Iterator[DataObject]: 72 | log.debug("OssClient list_objects_from_tar") 73 | return self._client.list_from_tar(bucket, tar_key, index_key, chunks, sizes, prefetch, include_errors) 74 | 75 | def gen_tar_archive(self, tar_path: str, index_path: str, source_path: str, index_only: bool = False) -> int: 76 | return self._client.gen_tar_archive(tar_path, index_path, source_path, index_only) 77 | -------------------------------------------------------------------------------- /oss-torch-connector/osstorchconnector/_oss_connector/__init__.py: -------------------------------------------------------------------------------- 1 | from .oss_connector import ( 2 | DataSet, 3 | DataObject, 4 | new_oss_dataset, 5 | new_data_object 6 | ) 7 | 8 | __all__ = ["DataSet", "DataObject", "new_oss_dataset", "new_data_object"] 9 | -------------------------------------------------------------------------------- /oss-torch-connector/osstorchconnector/_oss_connector/oss_connector.pyi: -------------------------------------------------------------------------------- 1 | from typing import Iterable, Iterator, Any 2 | 3 | 4 | class DataObject: 5 | key: str 6 | size: int 7 | label: str 8 | 9 | def __enter__(self) -> DataObject: ... 10 | def __exit__(self, exc_type, exc_val, exc_tb): ... 11 | def tell(self) -> int: ... 12 | def seek(self, offset: int, whence: int) -> int: ... 13 | def read(self, count: int) -> bytes: ... 14 | def readinto(self, buf) -> int: ... 15 | def write(self, data) -> int: ... 16 | def close(self) -> int: ... 17 | def flush(self) -> int: ... 18 | def err(self) -> int: ... 19 | def error_msg(self) -> str: ... 20 | def copy(self) -> DataObject: ... 21 | 22 | 23 | class DataSet: 24 | def list(self, bucket: str, prefix: str) -> Iterator[DataObject]: ... 25 | def list_with_preload(self, bucket: str, prefix: str) -> Iterator[DataObject]: ... 26 | def list_from_uris(self, iter: Iterable, prefetch: bool, include_errors: bool) -> Iterator[DataObject]: ... 27 | def list_from_uris_with_preload(self, iter: Iterable) -> Iterator[DataObject]: ... 28 | def list_from_tar(self, bucket: str, tar_key: str, index_key: str, chunks: Iterable, sizes: Iterable, 29 | prefetch: bool, include_errors: bool) -> Iterator[DataObject]: ... 30 | def open_ro(self, bucket: str, key: str, size: int, mmap: int, label: str) -> DataObject: ... 31 | def open_wo(self, bucket: str, key: str) -> DataObject: ... 32 | def gen_tar_archive(self, tar_path: str, index_path: str, source_path: str, index_only: bool) -> int: ... 33 | 34 | 35 | def new_oss_dataset(endpoint: str, cred_path: str, cred_provider: Any, config_path: str, uuid: str, id: int, total: int) -> DataSet: 36 | ... 37 | 38 | 39 | def new_data_object(key: str, size: int, label: str) -> DataObject: 40 | ... 41 | -------------------------------------------------------------------------------- /oss-torch-connector/osstorchconnector/_oss_tar_iterable.py: -------------------------------------------------------------------------------- 1 | from typing import Iterator, List, Tuple, Any 2 | from ._oss_client import OssClient, DataObject 3 | from ._oss_bucket_iterable import parse_oss_uri 4 | import logging 5 | 6 | log = logging.getLogger(__name__) 7 | 8 | class OssTarIterable: 9 | def __init__(self, client: OssClient, *, 10 | tar_uri: str = None, 11 | tar_index_uri: str = None, 12 | preload: bool = False, 13 | chunks: List[Tuple[int, int]] = []): 14 | log.info("OssTarIterable init, preload: %s", preload) 15 | self._client = client 16 | self._tar_uri = tar_uri 17 | self._tar_index_uri = tar_index_uri 18 | self._preload = preload 19 | self._chunks = chunks 20 | self._list_stream = None 21 | 22 | @classmethod 23 | def from_tar(cls, tar_uri: str, tar_index_uri: str, client: OssClient, preload: bool = False, 24 | chunks: List[Tuple[int, int]] = []): 25 | if not tar_uri: 26 | raise ValueError("tar_uri must be non-empty") 27 | if not tar_uri.startswith("oss://"): 28 | raise ValueError("only oss:// uri are supported for tar_uri") 29 | if not tar_index_uri: 30 | raise ValueError("tar_index_uri must be non-empty") 31 | if not tar_index_uri.startswith("oss://"): 32 | raise ValueError("only oss:// uri are supported for tar_index_uri") 33 | return cls(client, tar_uri=tar_uri, tar_index_uri=tar_index_uri, preload=preload, 34 | chunks=chunks) 35 | 36 | def __iter__(self) -> Iterator[DataObject]: 37 | # This allows us to iterate multiple times by re-creating the `_list_stream` 38 | self._list_stream = OssTarObjectsIterator(self._client, self._tar_uri, self._tar_index_uri, self._preload, 39 | chunks=self._chunks) 40 | return iter(self._list_stream) 41 | 42 | def __len__(self): 43 | if self._list_stream is None: 44 | self._list_stream = OssTarObjectsIterator(self._client, self._tar_uri, self._tar_index_uri, self._preload, 45 | chunks=self._chunks) 46 | return len(self._list_stream) 47 | 48 | 49 | class OssTarObjectsIterator: 50 | def __init__(self, client: OssClient, tar_uri: str, tar_index_uri: str, preload: bool, 51 | chunks: List[Tuple[int, int]] = []): 52 | log.info("OssTarObjectsIterator init") 53 | tar_bucket, tar_key = parse_oss_uri(tar_uri) 54 | index_bucket, index_key = parse_oss_uri(tar_index_uri) 55 | if tar_bucket != index_bucket: 56 | raise ValueError("tar_uri and tar_index_uri must be in the same bucket") 57 | starts = [start for start, _ in chunks] if chunks else [] 58 | sizes = [size for _, size in chunks] if chunks else [] 59 | self._list_stream = client.list_objects_from_tar(tar_bucket, tar_key, index_key, prefetch=preload, 60 | chunks=starts, sizes=sizes) 61 | 62 | def __iter__(self) -> Iterator[DataObject]: 63 | log.info("OssTarObjectsIterator get iter") 64 | return iter(self._list_stream) 65 | 66 | def __len__(self): 67 | return len(self._list_stream) 68 | 69 | 70 | def generate_tar_archive(endpoint: str, cred_path: str, config_path: str, tar_path: str, 71 | index_path: str, source_path: str, index_only: bool = False, 72 | cred_provider: Any = None): 73 | """ Generate tar archive and its index. 74 | 75 | Args: 76 | endpoint(str): Endpoint of the OSS bucket where the objects are stored. 77 | cred_path(str): Credential info of the OSS bucket where the objects are stored. 78 | config_path(str): Configuration file path of the OSS connector. 79 | tar_path(str): Path to the tar archive. (OSS URI or local path) 80 | index_path(str): Path to the tar index. (OSS URI or local path) 81 | source_path(str): Path to the source directory. (OSS URI or local path) 82 | index_only(bool): If True, generate tar index from tar archive specified by 'tar_path', 83 | otherwise (by default) generate tar archive and its index from 84 | source directory specified by 'source_path'. 85 | cred_provider: OSS credential provider. 86 | """ 87 | if not endpoint: 88 | raise ValueError("endpoint must be non-empty") 89 | if not cred_path and not cred_provider: 90 | raise ValueError("neither cred_path nor cred_provider is specified") 91 | client = OssClient(endpoint, cred_path, config_path, cred_provider=cred_provider) 92 | return client.gen_tar_archive(tar_path, index_path, source_path, index_only) 93 | -------------------------------------------------------------------------------- /oss-torch-connector/osstorchconnector/oss_checkpoint.py: -------------------------------------------------------------------------------- 1 | from ._oss_bucket_iterable import parse_oss_uri 2 | from ._oss_client import OssClient, DataObject 3 | from ctypes import * 4 | from typing import Any 5 | 6 | class OssCheckpoint: 7 | """A checkpoint manager for OSS. 8 | 9 | To read a checkpoint from OSS, users need to create an `DataObject` 10 | by providing oss_uri of the checkpoint stored in OSS. Similarly, to save a 11 | checkpoint to OSS, users need to create an `DataObject` by providing oss_uri. 12 | `DataObject` can be passed to torch.load, and torch.save. 13 | """ 14 | 15 | def __init__( 16 | self, 17 | endpoint: str, 18 | cred_path: str = "", 19 | config_path: str = "", 20 | cred_provider: Any = None, 21 | ): 22 | if not endpoint: 23 | raise ValueError("endpoint must be non-empty") 24 | else: 25 | self._endpoint = endpoint 26 | if not cred_path: 27 | self._cred_path = "" 28 | else: 29 | self._cred_path = cred_path 30 | if not config_path: 31 | self._config_path = "" 32 | else: 33 | self._config_path = config_path 34 | self._cred_provider = cred_provider 35 | self._client = OssClient(self._endpoint, self._cred_path, self._config_path, cred_provider=self._cred_provider) 36 | 37 | def reader(self, oss_uri: str): 38 | """Creates an DataObject from a given oss_uri. 39 | 40 | Args: 41 | oss_uri (str): A valid oss_uri. (i.e. oss:///) 42 | 43 | Returns: 44 | DataObject: a read-only binary stream of the OSS object's contents, specified by the oss_uri. 45 | """ 46 | bucket, key = parse_oss_uri(oss_uri) 47 | return self._client.get_object(bucket, key, type=1) 48 | 49 | def writer(self, oss_uri: str) -> DataObject: 50 | """Creates an DataObject from a given oss_uri. 51 | 52 | Args: 53 | oss_uri (str): A valid oss_uri. (i.e. oss:///) 54 | 55 | Returns: 56 | DataObject: a write-only binary stream. The content is saved to OSS using the specified oss_uri. 57 | """ 58 | bucket, key = parse_oss_uri(oss_uri) 59 | return self._client.put_object(bucket, key) 60 | -------------------------------------------------------------------------------- /oss-torch-connector/osstorchconnector/oss_iterable_dataset.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | from typing import Iterator, Any, Union, Iterable, Callable, Tuple 3 | import io 4 | import torch.utils.data 5 | import uuid 6 | import logging 7 | import random 8 | 9 | from ._oss_client import OssClient, DataObject 10 | from ._oss_bucket_iterable import OssBucketIterable, identity 11 | from ._oss_tar_iterable import OssTarIterable 12 | 13 | log = logging.getLogger(__name__) 14 | 15 | class OssIterableDataset(torch.utils.data.IterableDataset): 16 | """An IterableStyle dataset created from OSS objects. 17 | 18 | To create an instance of OssIterableDataset, you need to use 19 | `from_prefix`, `from_objects`, `from_manifest_file` or `from_tar` methods. 20 | """ 21 | 22 | def __init__( 23 | self, 24 | endpoint: str, 25 | cred_path: str, 26 | config_path: str, 27 | get_dataset_objects: Callable[[OssClient], Iterable[DataObject]], 28 | transform: Callable[[DataObject], Any] = identity, 29 | cred_provider: Any = None, 30 | from_tar: bool = False, 31 | shuffle: bool = False, 32 | shuffle_chunk_size: int = 1000, 33 | ): 34 | self._uuid = uuid.uuid4() 35 | self._endpoint = endpoint 36 | log.info("OssIterableDataset init, uuid: %s, endpoint: %s", self._uuid, self._endpoint) 37 | if not endpoint: 38 | raise ValueError("endpoint must be non-empty") 39 | if not cred_path: 40 | self._cred_path = "" 41 | else: 42 | self._cred_path = cred_path 43 | self._cred_provider = cred_provider 44 | if not config_path: 45 | self._config_path = "" 46 | else: 47 | self._config_path = config_path 48 | self._get_dataset_objects = get_dataset_objects 49 | self._transform = transform 50 | self._client = None 51 | self._from_tar = from_tar 52 | self._shuffle = shuffle 53 | self._chunk_size = shuffle_chunk_size 54 | if from_tar and shuffle: 55 | self._bucket_objects = self._get_dataset_objects(self._get_client(0, 1), preload=False) 56 | self._dataset_size = len(self._bucket_objects) 57 | self.shuffle() 58 | else: 59 | self._bucket_objects = None 60 | 61 | @classmethod 62 | def from_objects( 63 | cls, 64 | object_uris: Union[str, Iterable[str]], 65 | endpoint: str, 66 | *, 67 | cred_path: str = "", 68 | cred_provider: Any = None, 69 | config_path: str = "", 70 | transform: Callable[[DataObject], Any] = identity, 71 | ): 72 | """Returns an instance of OssIterableDataset using the OSS URI(s) provided. 73 | 74 | Args: 75 | object_uris(str | Iterable[str]): OSS URI of the object(s) desired. 76 | endpoint(str): Endpoint of the OSS bucket where the objects are stored. 77 | cred_path(str): Credential info of the OSS bucket where the objects are stored. 78 | config_path(str): Configuration file path of the OSS connector. 79 | transform: Optional callable which is used to transform an DataObject into the desired type. 80 | cred_provider: OSS credential provider. 81 | 82 | Returns: 83 | OssIterableDataset: An IterableStyle dataset created from OSS objects. 84 | """ 85 | log.info(f"Building {cls.__name__} from_objects") 86 | return cls( 87 | endpoint, cred_path, config_path, partial(OssBucketIterable.from_uris, object_uris, preload=True), 88 | transform=transform, cred_provider=cred_provider 89 | ) 90 | 91 | @classmethod 92 | def from_prefix( 93 | cls, 94 | oss_uri: str, 95 | endpoint: str, 96 | *, 97 | cred_path: str = "", 98 | cred_provider: Any = None, 99 | config_path: str = "", 100 | transform: Callable[[DataObject], Any] = identity, 101 | ): 102 | """Returns an instance of OssIterableDataset using the OSS URI provided. 103 | 104 | Args: 105 | oss_uri(str): An OSS URI (prefix) of the object(s) desired. Objects matching the prefix will be included in the returned dataset. 106 | endpoint(str): Endpoint of the OSS bucket where the objects are stored. 107 | cred_path(str): Credential info of the OSS bucket where the objects are stored. 108 | config_path(str): Configuration file path of the OSS connector. 109 | transform: Optional callable which is used to transform an DataObject into the desired type. 110 | cred_provider: OSS credential provider. 111 | 112 | Returns: 113 | OssIterableDataset: An IterableStyle dataset created from OSS objects. 114 | """ 115 | log.info(f"Building {cls.__name__} from_prefix") 116 | return cls( 117 | endpoint, cred_path, config_path, partial(OssBucketIterable.from_prefix, oss_uri, preload=True), 118 | transform=transform, cred_provider=cred_provider 119 | ) 120 | 121 | @classmethod 122 | def from_manifest_file( 123 | cls, 124 | manifest_file_path: str, 125 | manifest_parser: Callable[[io.IOBase], Iterable[Tuple[str, str]]], 126 | oss_base_uri: str, 127 | endpoint: str, 128 | *, 129 | cred_path: str = "", 130 | cred_provider: Any = None, 131 | config_path: str = "", 132 | transform: Callable[[DataObject], Any] = identity, 133 | ): 134 | """Returns an instance of OssIterableDataset using manifest file provided. 135 | 136 | Args: 137 | manifest_file_path(str): OSS URI or local path of manifest file. 138 | manifest_parser: A callable which takes an io.IOBase object and returns an iterable of (object_uri, label). 139 | oss_base_uri(str): The base URI of the OSS object in manifest file. 140 | endpoint(str): Endpoint of the OSS bucket where the objects are stored. 141 | cred_path(str): Credential info of the OSS bucket where the objects are stored. 142 | config_path(str): Configuration file path of the OSS connector. 143 | transform: Optional callable which is used to transform an DataObject into the desired type. 144 | cred_provider: OSS credential provider. 145 | 146 | Returns: 147 | OssIterableDataset: An IterableStyle dataset created from OSS objects. 148 | """ 149 | log.info(f"Building {cls.__name__} from_manifest_file") 150 | return cls( 151 | endpoint, cred_path, config_path, partial(OssBucketIterable.from_manifest_file, manifest_file_path, manifest_parser, oss_base_uri, preload=True), 152 | transform=transform, cred_provider=cred_provider 153 | ) 154 | 155 | @classmethod 156 | def from_tar( 157 | cls, 158 | tar_uri: str, 159 | tar_index_uri: str, 160 | endpoint: str, 161 | *, 162 | cred_path: str = "", 163 | cred_provider: Any = None, 164 | config_path: str = "", 165 | transform: Callable[[DataObject], Any] = identity, 166 | shuffle: bool = False, 167 | shuffle_chunk_size: int = 1000, 168 | ): 169 | """Returns an instance of OssIterableDataset using tar file provided. 170 | 171 | Args: 172 | tar_uri(str): OSS URI of tar archive. 173 | tar_index_uri(str): OSS URI of tar index file corresponding to tar archive. 174 | shuffle(bool): Whether to shuffle the dataset. 175 | shuffle_chunk_size(int): Size of chunks to shuffle over. 176 | endpoint(str): Endpoint of the OSS bucket where the objects are stored. 177 | cred_path(str): Credential info of the OSS bucket where the objects are stored. 178 | config_path(str): Configuration file path of the OSS connector. 179 | transform: Optional callable which is used to transform an DataObject into the desired type. 180 | cred_provider: OSS credential provider. 181 | 182 | Returns: 183 | OssIterableDataset: An IterableStyle dataset created from tar file. 184 | """ 185 | log.info(f"Building {cls.__name__} from_tar") 186 | return cls( 187 | endpoint, cred_path, config_path, partial(OssTarIterable.from_tar, tar_uri, tar_index_uri, preload=True), 188 | transform=transform, cred_provider=cred_provider, from_tar=True, shuffle=shuffle, shuffle_chunk_size=shuffle_chunk_size 189 | ) 190 | 191 | def _get_client(self, id, total): 192 | if self._client is None: 193 | self._client = OssClient(self._endpoint, self._cred_path, self._config_path, self._uuid, id, total, cred_provider=self._cred_provider) 194 | log.info("OssIterableDataset new client") 195 | self._client._id = id 196 | self._client._total = total 197 | return self._client 198 | 199 | def _get_transformed_object(self, object: DataObject) -> Any: 200 | return self._transform(object) 201 | 202 | def __iter__(self) -> Iterator[Any]: 203 | worker_info = torch.utils.data.get_worker_info() 204 | 205 | if worker_info is None: # single-process data loading, return the full iterator 206 | log.info("OssIterableDataset get iter (single-process)") 207 | if self._from_tar and self._shuffle: 208 | if len(self._chunks) >= 1: 209 | chunks = self._chunks 210 | else: 211 | chunks = [] 212 | log.info("OssIterableDataset chunk num: %d", len(chunks)) 213 | worker_iter = self._get_dataset_objects(self._get_client(0, 1), chunks=chunks) 214 | else: 215 | worker_iter = self._get_dataset_objects(self._get_client(0, 1)) 216 | else: # in a worker process, split workload 217 | num_workers = worker_info.num_workers 218 | worker_id = worker_info.id 219 | log.info("OssIterableDataset get iter (multi-process), num_workers: %d, worker id: %d", num_workers, worker_id) 220 | if self._from_tar and self._shuffle: 221 | if len(self._chunks) >= num_workers: 222 | chunks = [chunk for i, chunk in enumerate(self._chunks) if i % num_workers == worker_id] 223 | else: 224 | chunks = [] 225 | log.info("OssIterableDataset chunk num: %d", len(chunks)) 226 | worker_iter = self._get_dataset_objects(self._get_client(worker_id, num_workers), chunks=chunks) 227 | else: 228 | worker_iter = self._get_dataset_objects(self._get_client(worker_id, num_workers)) 229 | 230 | return map(self._get_transformed_object, worker_iter) 231 | 232 | def shuffle(self, generator=None): 233 | if generator is None: 234 | seed = int(torch.empty((), dtype=torch.int64).random_().item()) 235 | generator = torch.Generator() 236 | generator.manual_seed(seed) 237 | log.debug("OssIterableDataset shuffle seed: %d", seed) 238 | chunks = [] 239 | index = 0 240 | while index < self._dataset_size: 241 | chunk_size = min(max(1, int(random.gauss(self._chunk_size, 10))), self._dataset_size - index) 242 | chunks.append((index, chunk_size)) 243 | index += chunk_size 244 | random_sampler = torch.utils.data.SubsetRandomSampler(chunks, generator=generator) 245 | self._chunks = list(random_sampler) 246 | log.info("OssIterableDataset shuffle chunk indices, dataset size: %d, chunk num: %d", 247 | self._dataset_size, len(self._chunks)) 248 | -------------------------------------------------------------------------------- /oss-torch-connector/osstorchconnector/oss_map_dataset.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | from typing import List, Any, Callable, Iterable, Union, Tuple 3 | import io 4 | import torch.utils.data 5 | import uuid 6 | import logging 7 | import time 8 | import os 9 | import errno 10 | 11 | from ._oss_client import OssClient, DataObject 12 | from ._oss_bucket_iterable import OssBucketIterable, identity, parse_oss_uri 13 | from ._oss_tar_iterable import OssTarIterable 14 | 15 | log = logging.getLogger(__name__) 16 | 17 | class OssMapDataset(torch.utils.data.Dataset): 18 | """A Map-Style dataset created from OSS objects. 19 | 20 | To create an instance of OssMapDataset, you need to use 21 | `from_prefix`, `from_objects`, `from_manifest_file` or `from_tar` methods. 22 | """ 23 | 24 | def __init__( 25 | self, 26 | endpoint: str, 27 | cred_path: str, 28 | config_path: str, 29 | get_dataset_objects: Callable[[OssClient], Iterable[DataObject]], 30 | transform: Callable[[DataObject], Any] = identity, 31 | tar_uri: str = None, 32 | tar_index_uri: str = None, 33 | cred_provider: Any = None, 34 | ): 35 | self._uuid = uuid.uuid4() 36 | self._endpoint = endpoint 37 | log.info("OssMapDataset init, uuid: %s, endpoint: %s", self._uuid, self._endpoint) 38 | init_time = time.time() 39 | if not endpoint: 40 | raise ValueError("endpoint must be non-empty") 41 | if not cred_path: 42 | self._cred_path = "" 43 | else: 44 | self._cred_path = cred_path 45 | self._cred_provider = cred_provider 46 | if not config_path: 47 | self._config_path = "" 48 | else: 49 | self._config_path = config_path 50 | self._get_dataset_objects = get_dataset_objects 51 | self._transform = transform 52 | self._client = OssClient(self._endpoint, self._cred_path, self._config_path, self._uuid, cred_provider=self._cred_provider) 53 | self._client_pid = os.getpid() 54 | self._from_tar = False 55 | if tar_uri and tar_index_uri: 56 | tar_bucket, tar_key = parse_oss_uri(tar_uri) 57 | index_bucket, index_key = parse_oss_uri(tar_index_uri) 58 | if tar_bucket != index_bucket: 59 | raise ValueError("tar_uri and tar_index_uri must be in the same bucket") 60 | self._from_tar = True 61 | self._tar_bucket = tar_bucket 62 | self._tar_key = tar_key 63 | self._tar_index_key = index_key 64 | self._bucket_objects = self._get_dataset_objects(self._client) 65 | else: 66 | self._bucket_objects = list(self._get_dataset_objects(self._client)) 67 | log.info("OssMapDataset init done, uuid: %s, time cost: %.2f s", self._uuid, time.time() - init_time) 68 | 69 | 70 | @property 71 | def _dataset_bucket_objects(self) -> List[DataObject]: 72 | if self._bucket_objects is None: 73 | self._bucket_objects = list(self._get_dataset_objects(self._get_client())) 74 | log.info("OssMapDataset get bucket objects") 75 | return self._bucket_objects 76 | 77 | @classmethod 78 | def from_objects( 79 | cls, 80 | object_uris: Union[str, Iterable[str]], 81 | endpoint: str, 82 | *, 83 | cred_path: str = "", 84 | cred_provider: Any = None, 85 | config_path: str = "", 86 | transform: Callable[[DataObject], Any] = identity, 87 | ): 88 | """Returns an instance of OssMapDataset using the OSS URI(s) provided. 89 | 90 | Args: 91 | object_uris(str | Iterable[str]): OSS URI of the object(s) desired. 92 | endpoint(str): Endpoint of the OSS bucket where the objects are stored. 93 | cred_path(str): Credential info of the OSS bucket where the objects are stored. 94 | config_path(str): Configuration file path of the OSS connector. 95 | transform: Optional callable which is used to transform an DataObject into the desired type. 96 | cred_provider: OSS credential provider. 97 | 98 | Returns: 99 | OssMapDataset: A Map-Style dataset created from OSS objects. 100 | """ 101 | log.info(f"Building {cls.__name__} from_objects") 102 | return cls( 103 | endpoint, cred_path, config_path, partial(OssBucketIterable.from_uris, object_uris, preload=False), 104 | transform=transform, cred_provider=cred_provider 105 | ) 106 | 107 | @classmethod 108 | def from_prefix( 109 | cls, 110 | oss_uri: str, 111 | endpoint: str, 112 | *, 113 | cred_path: str = "", 114 | cred_provider: Any = None, 115 | config_path: str = "", 116 | transform: Callable[[DataObject], Any] = identity, 117 | ): 118 | """Returns an instance of OssMapDataset using the OSS URI provided. 119 | 120 | Args: 121 | oss_uri(str): An OSS URI (prefix) of the object(s) desired. Objects matching the prefix will be included in the returned dataset. 122 | endpoint(str): Endpoint of the OSS bucket where the objects are stored. 123 | cred_path(str): Credential info of the OSS bucket where the objects are stored. 124 | config_path(str): Configuration file path of the OSS connector. 125 | transform: Optional callable which is used to transform an DataObject into the desired type. 126 | cred_provider: OSS credential provider. 127 | 128 | Returns: 129 | OssMapDataset: A Map-Style dataset created from OSS objects. 130 | """ 131 | log.info(f"Building {cls.__name__} from_prefix") 132 | return cls( 133 | endpoint, cred_path, config_path, partial(OssBucketIterable.from_prefix, oss_uri, preload=False), 134 | transform=transform, cred_provider=cred_provider 135 | ) 136 | 137 | @classmethod 138 | def from_manifest_file( 139 | cls, 140 | manifest_file_path: str, 141 | manifest_parser: Callable[[io.IOBase], Iterable[Tuple[str, str]]], 142 | oss_base_uri: str, 143 | endpoint: str, 144 | *, 145 | cred_path: str = "", 146 | cred_provider: Any = None, 147 | config_path: str = "", 148 | transform: Callable[[DataObject], Any] = identity, 149 | ): 150 | """Returns an instance of OssMapDataset using manifest file provided. 151 | 152 | Args: 153 | manifest_file_path(str): OSS URI or local path of manifest file. 154 | manifest_parser: A callable which takes an io.IOBase object and returns an iterable of (object_uri, label). 155 | oss_base_uri(str): The base URI of the OSS object in manifest file. 156 | endpoint(str): Endpoint of the OSS bucket where the objects are stored. 157 | cred_path(str): Credential info of the OSS bucket where the objects are stored. 158 | config_path(str): Configuration file path of the OSS connector. 159 | transform: Optional callable which is used to transform an DataObject into the desired type. 160 | cred_provider: OSS credential provider. 161 | 162 | Returns: 163 | OssMapDataset: A Map-Style dataset created from OSS objects. 164 | """ 165 | log.info(f"Building {cls.__name__} from_manifest_file") 166 | return cls( 167 | endpoint, cred_path, config_path, partial(OssBucketIterable.from_manifest_file, manifest_file_path, manifest_parser, oss_base_uri, preload=False), 168 | transform=transform, cred_provider=cred_provider 169 | ) 170 | 171 | @classmethod 172 | def from_tar( 173 | cls, 174 | tar_uri: str, 175 | tar_index_uri: str, 176 | endpoint: str, 177 | *, 178 | cred_path: str = "", 179 | cred_provider: Any = None, 180 | config_path: str = "", 181 | transform: Callable[[DataObject], Any] = identity, 182 | ): 183 | """Returns an instance of OssMapDataset using tar file provided. 184 | 185 | Args: 186 | tar_uri(str): OSS URI of tar archive. 187 | tar_index_uri(str): OSS URI of tar index file corresponding to tar archive. 188 | endpoint(str): Endpoint of the OSS bucket where the objects are stored. 189 | cred_path(str): Credential info of the OSS bucket where the objects are stored. 190 | config_path(str): Configuration file path of the OSS connector. 191 | transform: Optional callable which is used to transform an DataObject into the desired type. 192 | cred_provider: OSS credential provider. 193 | 194 | Returns: 195 | OssMapDataset: An Map-Style dataset created from tar file. 196 | """ 197 | log.info(f"Building {cls.__name__} from_tar") 198 | return cls( 199 | endpoint, cred_path, config_path, partial(OssTarIterable.from_tar, tar_uri, tar_index_uri, preload=False), 200 | transform=transform, cred_provider=cred_provider, tar_uri=tar_uri, tar_index_uri=tar_index_uri 201 | ) 202 | 203 | def _get_client(self): 204 | if self._client is None: 205 | self._client = OssClient(self._endpoint, self._cred_path, self._config_path, self._uuid) 206 | log.info("OssMapDataset new client") 207 | if self._client_pid != os.getpid(): 208 | worker_info = torch.utils.data.get_worker_info() 209 | if worker_info is not None: 210 | # reset client id 211 | self._client._id = worker_info.id 212 | self._client._total = worker_info.num_workers 213 | self._client_pid = os.getpid() 214 | return self._client 215 | 216 | def _get_transformed_object_safe(self, object: DataObject) -> Any: 217 | eno = object.err() 218 | if eno != 0: 219 | errstr = "failed to get next object, errno=%d(%s), msg=%s" % (eno, os.strerror(eno), object.error_msg()) 220 | log.error("OssMapDataset get item %s faild: %s", object.key, errstr) 221 | if eno == errno.ENOENT: 222 | return self._transform(None) 223 | else: 224 | raise RuntimeError(errstr) 225 | return self._transform(object) 226 | 227 | def __getitem__(self, i: int) -> Any: 228 | if not self._from_tar: 229 | object = self._dataset_bucket_objects[i] 230 | log.debug("OssMapDataset get item [%d], key: %s, size: %d, label: %s", i, object.key, object.size, object.label) 231 | bucket, key = parse_oss_uri(object.key) 232 | if object.size <= 0: 233 | new_object = self._get_client().get_object(bucket, key, 0, label=object.label, type=2) # mem 234 | else: 235 | new_object = self._get_client().get_object(bucket, key, object.size, label=object.label, type=0) # basic 236 | else: 237 | new_object = self._get_client().get_object(bucket=self._tar_bucket, key=self._tar_key, size=i, 238 | label=self._tar_index_key, type=3) # tar 239 | return self._get_transformed_object_safe(new_object) 240 | 241 | def __getitems__(self, indices: List[int]) -> List[Any]: 242 | log.debug("OssMapDataset get items %s", indices) 243 | if not self._from_tar: 244 | objects = [self._dataset_bucket_objects[i] for i in indices] 245 | iter = self._get_client().list_objects_from_uris(objects, prefetch=True, include_errors=True) 246 | # should return list, default collate needs batch be subscriptable 247 | return [self._get_transformed_object_safe(object) for object in iter] 248 | else: 249 | if self.is_continuous(indices): 250 | log.debug("OssMapDataset get items, start: %d, length: %d", indices[0], len(indices)) 251 | iter = self._get_client().list_objects_from_tar(self._tar_bucket, self._tar_key, self._tar_index_key, 252 | [indices[0]], [len(indices)], prefetch=True, include_errors=True) 253 | return [self._get_transformed_object_safe(object) for object in iter] 254 | else: 255 | iter = self._get_client().list_objects_from_tar(self._tar_bucket, self._tar_key, self._tar_index_key, 256 | indices, [], prefetch=True, include_errors=True) 257 | return [self._get_transformed_object_safe(object) for object in iter] 258 | 259 | def __len__(self): 260 | size = len(self._dataset_bucket_objects) 261 | log.info("OssMapDataset get len (%d)", size) 262 | return size 263 | 264 | def is_continuous(self, indices): 265 | for i in range(1, len(indices)): 266 | if indices[i] - indices[i - 1] != 1: 267 | return False 268 | return True 269 | -------------------------------------------------------------------------------- /oss-torch-connector/pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools", "wheel"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "osstorchconnector" 7 | version = "1.0.0rc1" 8 | description = "OSS connector for AI/ML" 9 | requires-python = ">=3.8,<3.13" 10 | readme = "README.md" 11 | dependencies = [ 12 | "torch >= 2.0", 13 | ] 14 | classifiers = [ 15 | "Development Status :: 4 - Beta", 16 | "Intended Audience :: Developers", 17 | "Topic :: Utilities", 18 | "License :: OSI Approved :: MIT License", 19 | "Operating System :: POSIX :: Linux", 20 | 21 | "Programming Language :: Python :: 3", 22 | "Programming Language :: Python :: 3.8", 23 | "Programming Language :: Python :: 3.9", 24 | "Programming Language :: Python :: 3.10", 25 | "Programming Language :: Python :: 3.11", 26 | "Programming Language :: Python :: 3.12", 27 | ] 28 | 29 | [tool.setuptools.packages.find] 30 | where = ["."] 31 | include = ["osstorchconnector", "osstorchconnector._oss_connector"] 32 | 33 | [tool.setuptools.package-data] 34 | osstorchconnector = ["_oss_connector/*.so"] 35 | -------------------------------------------------------------------------------- /oss-torch-connector/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, Extension 2 | from setuptools.command.build_ext import build_ext 3 | import os 4 | import subprocess 5 | import shutil 6 | 7 | 8 | class BuildExtension(Extension): 9 | def __init__(self, name, source_dir=''): 10 | Extension.__init__(self, name, sources=[source_dir]) 11 | self.source_dir = os.path.abspath(source_dir) 12 | 13 | class LibraryBuild(build_ext): 14 | user_options = build_ext.user_options + [ 15 | ('library-path=', None, 'oss_connector library path'), 16 | ] 17 | def initialize_options(self): 18 | super().initialize_options() 19 | self.library_path = None 20 | def run(self): 21 | if not self.library_path: 22 | raise RuntimeError("library path is not specified by '--library-path'") 23 | self.library_path = os.path.abspath(self.library_path) 24 | if os.path.exists(self.library_path): 25 | print('library path:', self.library_path) 26 | else: 27 | raise RuntimeError("invalid library path: " + self.library_path) 28 | for ext in self.extensions: 29 | self.build_extension(ext) 30 | 31 | def run_command(self, command, cwd): 32 | try: 33 | subprocess.run(command, capture_output=True, text=True, check=True, cwd=cwd) 34 | except subprocess.CalledProcessError as e: 35 | print(f"Command '{' '.join(command)}' failed with exit code {e.returncode}") 36 | print(f"Stdout: {e.stdout}") 37 | print(f"Stderr: {e.stderr}") 38 | raise RuntimeError("Subprocess execution failed") from e 39 | 40 | def build_extension(self, ext): 41 | print('name:', ext.name) 42 | print('source path:', ext.source_dir) 43 | print('current dir:', os.getcwd()) 44 | 45 | # copy .so 46 | library_file_name = os.path.basename(self.library_path) 47 | dest_so_path = os.path.abspath( 48 | os.path.join(self.build_lib, 'osstorchconnector', '_oss_connector', library_file_name)) 49 | print('copy %s to %s' % (self.library_path, dest_so_path)) 50 | shutil.copy(self.library_path, dest_so_path) 51 | 52 | 53 | setup( 54 | ext_modules=[BuildExtension('oss_connector', '.')], 55 | cmdclass=dict(build_ext=LibraryBuild), 56 | ) 57 | -------------------------------------------------------------------------------- /oss-torch-connector/tools/generate_tar_archive.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | """ 4 | Generate tar archive and its index 5 | 6 | This script is designed to generate a tar archive and its corresponding index. 7 | It can also generate an index from an existing tar archive. 8 | Both source and target path can be specified by OSS URI or local path. 9 | 10 | Usage: 11 | 1. Generate tar archive and its index from source: 12 | python generate_tar_archive.py --endpoint --cred-path --config-path \ 13 | --tar-path --index-path --source-path 14 | 2. Generate tar index from existing tar archive: 15 | python generate_tar_archive.py --endpoint --cred-path --config-path \ 16 | --tar-path --index-path --index-only 17 | """ 18 | 19 | from osstorchconnector import generate_tar_archive 20 | import argparse 21 | 22 | parser = argparse.ArgumentParser(description='Generate tar archive and its index') 23 | parser.add_argument('-ep', '--endpoint', type=str, help='Endpoint of the OSS bucket where the objects are stored.') 24 | parser.add_argument('--cred-path', type=str, help='Credential info of the OSS bucket where the objects are stored.') 25 | parser.add_argument('--config-path', type=str, help='Configuration file path of the OSS connector.') 26 | parser.add_argument('--tar-path', type=str, help='Path to the tar archive. (OSS URI or local path)') 27 | parser.add_argument('--index-path', type=str, help='Path to the tar index. (OSS URI or local path)') 28 | parser.add_argument('--source-path', type=str, help='Path to the source directory. (OSS URI or local path)') 29 | parser.add_argument('--index-only', action='store_true', help='''If True, generate tar index from tar archive specified by 'tar_path', 30 | otherwise (by default) generate tar archive and its index from source directory specified by 'source_path'.''') 31 | 32 | 33 | def main(): 34 | args = parser.parse_args() 35 | generate_tar_archive(args.endpoint, args.cred_path, args.config_path, args.tar_path, args.index_path, args.source_path, args.index_only) 36 | 37 | 38 | if __name__ == "__main__": 39 | main() 40 | --------------------------------------------------------------------------------