├── .clang-format ├── .gitignore ├── CHANGELOG.md ├── LICENSE ├── MANIFEST.in ├── README.md ├── README.zh.md ├── bench ├── io_bench.py └── io_bench.sh ├── docs ├── encrypt_model.md ├── imgs │ └── SFCS.png ├── index.md └── pin_mem.md ├── mkdocs.yml ├── pyproject.toml ├── setup.py ├── tests ├── test_assert_exceptions.py ├── test_convert.py ├── test_fetch_cipher.py ├── test_load_op.py ├── test_save_op.py ├── test_sfcs_sdk_op.py └── test_share_tensor_cpu.py └── veturboio ├── __init__.py ├── convert.py ├── io.py ├── loader ├── __init__.py ├── base_loader.py ├── faster_posix_loader.py └── sfcs_client_loader.py ├── ops ├── __init__.py ├── cipher.py ├── consts.py ├── csrc │ ├── cipher.cpp │ ├── include │ │ ├── cfs.h │ │ ├── cfsaio.h │ │ ├── cipher.h │ │ ├── common.h │ │ ├── fastcrypto.h │ │ ├── io_helper.h │ │ ├── logging.h │ │ ├── posix.h │ │ └── sfcs.h │ ├── io_helper.cu │ ├── io_helper_cpu.cpp │ ├── io_helper_cpu_common.cpp │ ├── io_helper_npu.cpp │ ├── lib │ │ └── libfastcrypto_gpu.so.0.3 │ ├── posix.cpp │ ├── pybind.cpp │ └── sfcs.cpp ├── io_utils.py ├── posix_utils.py └── sfcs_utils.py ├── safetensors.py ├── saver ├── __init__.py ├── base_saver.py └── sfcs_client_saver.py ├── types.py ├── utils ├── __init__.py └── load_veturboio_ext.py └── version.py /.clang-format: -------------------------------------------------------------------------------- 1 | --- 2 | Language: Cpp 3 | # BasedOnStyle: Microsoft 4 | AccessModifierOffset: -2 5 | AlignAfterOpenBracket: Align 6 | AlignConsecutiveMacros: false 7 | AlignConsecutiveAssignments: false 8 | AlignConsecutiveDeclarations: false 9 | AlignEscapedNewlines: Right 10 | AlignOperands: true 11 | AlignTrailingComments: true 12 | AllowAllArgumentsOnNextLine: true 13 | AllowAllConstructorInitializersOnNextLine: true 14 | AllowAllParametersOfDeclarationOnNextLine: true 15 | AllowShortBlocksOnASingleLine: Never 16 | AllowShortCaseLabelsOnASingleLine: false 17 | AllowShortFunctionsOnASingleLine: None 18 | AllowShortLambdasOnASingleLine: All 19 | AllowShortIfStatementsOnASingleLine: Never 20 | AllowShortLoopsOnASingleLine: false 21 | AlwaysBreakAfterDefinitionReturnType: None 22 | AlwaysBreakAfterReturnType: None 23 | AlwaysBreakBeforeMultilineStrings: false 24 | AlwaysBreakTemplateDeclarations: MultiLine 25 | BinPackArguments: true 26 | BinPackParameters: true 27 | BraceWrapping: 28 | AfterCaseLabel: false 29 | AfterClass: true 30 | AfterControlStatement: true 31 | AfterEnum: true 32 | AfterFunction: true 33 | AfterNamespace: true 34 | AfterObjCDeclaration: true 35 | AfterStruct: true 36 | AfterUnion: false 37 | AfterExternBlock: true 38 | BeforeCatch: true 39 | BeforeElse: true 40 | IndentBraces: false 41 | SplitEmptyFunction: true 42 | SplitEmptyRecord: true 43 | SplitEmptyNamespace: true 44 | BreakBeforeBinaryOperators: None 45 | BreakBeforeBraces: Custom 46 | BreakBeforeInheritanceComma: false 47 | BreakInheritanceList: BeforeColon 48 | BreakBeforeTernaryOperators: true 49 | BreakConstructorInitializersBeforeComma: false 50 | BreakConstructorInitializers: BeforeColon 51 | BreakAfterJavaFieldAnnotations: false 52 | BreakStringLiterals: true 53 | ColumnLimit: 120 54 | CommentPragmas: '^ IWYU pragma:' 55 | CompactNamespaces: false 56 | ConstructorInitializerAllOnOneLineOrOnePerLine: false 57 | ConstructorInitializerIndentWidth: 4 58 | ContinuationIndentWidth: 4 59 | Cpp11BracedListStyle: true 60 | DeriveLineEnding: true 61 | DerivePointerAlignment: false 62 | DisableFormat: false 63 | ExperimentalAutoDetectBinPacking: false 64 | FixNamespaceComments: true 65 | ForEachMacros: 66 | - foreach 67 | - Q_FOREACH 68 | - BOOST_FOREACH 69 | IncludeBlocks: Preserve 70 | IncludeCategories: 71 | - Regex: '^"(llvm|llvm-c|clang|clang-c)/' 72 | Priority: 2 73 | SortPriority: 0 74 | - Regex: '^(<|"(gtest|gmock|isl|json)/)' 75 | Priority: 3 76 | SortPriority: 0 77 | - Regex: '.*' 78 | Priority: 1 79 | SortPriority: 0 80 | IncludeIsMainRegex: '(Test)?$' 81 | IncludeIsMainSourceRegex: '' 82 | IndentCaseLabels: false 83 | IndentGotoLabels: true 84 | IndentPPDirectives: None 85 | IndentWidth: 4 86 | IndentWrappedFunctionNames: false 87 | JavaScriptQuotes: Leave 88 | JavaScriptWrapImports: true 89 | KeepEmptyLinesAtTheStartOfBlocks: true 90 | MacroBlockBegin: '' 91 | MacroBlockEnd: '' 92 | MaxEmptyLinesToKeep: 1 93 | NamespaceIndentation: None 94 | ObjCBinPackProtocolList: Auto 95 | ObjCBlockIndentWidth: 2 96 | ObjCSpaceAfterProperty: false 97 | ObjCSpaceBeforeProtocolList: true 98 | PenaltyBreakAssignment: 2 99 | PenaltyBreakBeforeFirstCallParameter: 19 100 | PenaltyBreakComment: 300 101 | PenaltyBreakFirstLessLess: 120 102 | PenaltyBreakString: 1000 103 | PenaltyBreakTemplateDeclaration: 10 104 | PenaltyExcessCharacter: 1000000 105 | PenaltyReturnTypeOnItsOwnLine: 1000 106 | PointerAlignment: Right 107 | ReflowComments: true 108 | SortIncludes: false 109 | SortUsingDeclarations: true 110 | SpaceAfterCStyleCast: false 111 | SpaceAfterLogicalNot: false 112 | SpaceAfterTemplateKeyword: true 113 | SpaceBeforeAssignmentOperators: true 114 | SpaceBeforeCpp11BracedList: false 115 | SpaceBeforeCtorInitializerColon: true 116 | SpaceBeforeInheritanceColon: true 117 | SpaceBeforeParens: ControlStatements 118 | SpaceBeforeRangeBasedForLoopColon: true 119 | SpaceInEmptyBlock: false 120 | SpaceInEmptyParentheses: false 121 | SpacesBeforeTrailingComments: 1 122 | SpacesInAngles: false 123 | SpacesInConditionalStatement: false 124 | SpacesInContainerLiterals: true 125 | SpacesInCStyleCastParentheses: false 126 | SpacesInParentheses: false 127 | SpacesInSquareBrackets: false 128 | SpaceBeforeSquareBrackets: false 129 | Standard: Latest 130 | StatementMacros: 131 | - Q_UNUSED 132 | - QT_REQUIRE_VERSION 133 | TabWidth: 4 134 | UseCRLF: false 135 | UseTab: Never 136 | ... 137 | 138 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | 162 | # local build cache 163 | build 164 | dist 165 | *.pt 166 | veturbo/ops/lib/ 167 | veturbo/lego_pipeline/lib/ 168 | 169 | # cmake 170 | CMakeFiles/ 171 | CMakeCache.txt 172 | CMakeScripts/ 173 | CMakeTmp/ 174 | cmake_install.cmake 175 | Makefile 176 | cmake-build-debug/ 177 | cmake-build-release/ 178 | cmake-build-relwithdebinfo/ 179 | cmake-build-minsize/ 180 | 181 | # library 182 | !veturboio/ops/csrc/lib/ 183 | !veturboio/ops/csrc/lib/*.so 184 | 185 | # vscode 186 | .vscode 187 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | All notable changes to this project will be documented in this file. See [conventional commits](https://www.conventionalcommits.org/) for commit guidelines. 4 | 5 | --- 6 | ## [0.1.3] - 2024-04-26 7 | 8 | ### Bug Fixes 9 | 10 | - **(load)** set cuda device in each thread 11 | - **(security)** fix typo in comments 12 | - **(ut)** clean files in ut 13 | - directIO segmentation fault 14 | - modify aksk less real 15 | 16 | ### Features 17 | 18 | - **(security)** compat with cipher header and use cipher in posix 19 | - **(sfcs)** set sfcs sync interval by environ 20 | - **(sfcs)** reduce memcpy 21 | - **(sfcs)** parse sfcs confs from environ in json format 22 | - add clone mode for shared tensor 23 | - get cfs task id from env 24 | 25 | ### Miscellaneous Chores 26 | 27 | - **(security)** clarify cipher readme 28 | - cpp coding style 29 | 30 | ### License 31 | 32 | - add license file and header 33 | 34 | ## [0.1.2] - 2024-01-25 35 | 36 | ### Bug Fixes 37 | 38 | - **(saver)** add return to remove repetitive writing 39 | - **(security)** socket path and ut bug 40 | - MANIFEST does not contain all fastcrypto lib files 41 | 42 | ### Documentation 43 | 44 | - update readme 45 | 46 | ### Features 47 | 48 | - **(security)** fetch key and iv 49 | - **(security)** get and refresh sfcs aksk from datapipe 50 | - **(security)** get namenode ip from datapipe and fix write xml bug 51 | - **(sfcs)** decide load use sfcs sdk from environ 52 | 53 | ## [0.1.1] - 2023-11-17 54 | 55 | ### Bug Fixes 56 | 57 | - **(sfcs)** keep in consistent with reading when open for writing 58 | - **(ut)** delete potential residual test file before testing 59 | - fix ci release and update readme for pip install 60 | 61 | ### Documentation 62 | 63 | - use index-url as default install method 64 | 65 | ### Features 66 | 67 | - **(ci)** add import format tool in ci 68 | - **(saver)** introduce saver class to aggregate save operations 69 | - **(security)** add cipher in sfcs sdk 70 | - **(sfcs)** load and save pt 71 | - load pt file in parallel from sfcs 72 | 73 | ### Miscellaneous Chores 74 | 75 | - bump version to v0.1.0 76 | - bump version to v0.1.1 77 | 78 | ### Performance 79 | 80 | - make the read usage with good alignment. 81 | 82 | 83 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include veturboio/ops/csrc/lib/*.so 2 | include veturboio/ops/csrc/lib/*.so.* 3 | include veturboio/ops/csrc/include/*.h 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # veTurboIO 2 | 3 | 4 | [En](./README.md) | [中文](./README.zh.md) 5 | 6 | 7 | A Python library for high-performance reading and writing of PyTorch model files 8 | developed by Volcano Engine. This library mainly implements based on the safetensors 9 | file format to achieve efficient storage and reading of tensor data. 10 | 11 | ## Install 12 | 13 | It can be installed directly through the following way: 14 | ```bash 15 | cd veturboio 16 | python setup.py get_libcfs 17 | python setup.py install 18 | ``` 19 | 20 | Tips: This instruction will preferentially download the whl file that matches the 21 | current Python and PyTorch versions. If no matching whl file is found, it will 22 | automatically download the source code for compilation and installation. 23 | 24 | 25 | If the installation fails, you can also try to install by downloading the source code, 26 | and then compile and install it manually. 27 | 28 | ```bash 29 | # CUDA ops, default 30 | python setup.py install --cuda_ext 31 | 32 | # NPU ops 33 | python setup.py install --npu_ext 34 | 35 | # CPU only 36 | python setup.py install --cpu_ext 37 | ``` 38 | 39 | 40 | ## Quick Start 41 | 42 | ### Read and write model files 43 | 44 | 45 | ```python 46 | import torch 47 | import veturboio 48 | 49 | tensors = { 50 | "weight1": torch.zeros((1024, 1024)), 51 | "weight2": torch.zeros((1024, 1024)) 52 | } 53 | 54 | veturboio.save_file(tensors, "model.safetensors") 55 | 56 | new_tensors = veturboio.load("model.safetensors") 57 | 58 | # check if the tensors are the same 59 | for k, v in tensors.items(): 60 | assert torch.allclose(v, new_tensors[k]) 61 | ``` 62 | 63 | ### Convert existing PyTorch files 64 | 65 | ```bash 66 | python -m veturboio.convert -i model.pt -o model.safetensors 67 | ``` 68 | 69 | ## Performance test 70 | 71 | Run directly: 72 | ```bash 73 | bash bench/io_bench.sh 74 | ``` 75 | Then, you can get the following results: 76 | ``` 77 | fs_name tensor_size veturboio load_time(s) torch load_time(s) 78 | shm 1073741824 0.08 0.63 79 | shm 2147483648 0.19 1.26 80 | shm 4294967296 0.36 2.32 81 | ``` 82 | 83 | Also, you can run the following command to get more options: 84 | ```bash 85 | python bench/io_bench.py -h 86 | ``` 87 | 88 | ## Advance Features 89 | 90 | ### Using veMLP to accelerate reading and writing 91 | Volcano Engine Machine Learning Platform (veMLP) provides a distributed cache file system 92 | based on the physical disks of the GPU cluster. 93 | 94 |

95 | 96 |

97 | 98 | When a cluster-level task needs to read 99 | a model file, the caching system can efficiently distribute the model file between GPU 100 | machines via RDMA transfer, thus avoiding network transfer bottlenecks. When using this 101 | system, veTurboIO can maximize its performance advantages. 102 | 103 | ### Encrypt and decrypt model files 104 | veTurboIO supports encryption and decryption of model files. You can read the [tutorial](./docs/encrypt_model.md) 105 | to learn how to keep your model files secure. When you use GPU as target device, veTurboIO can decrypt the model file on the fly. 106 | 107 | 108 | ## License 109 | 110 | [Apache License 2.0](./LICENSE) 111 | 112 | -------------------------------------------------------------------------------- /README.zh.md: -------------------------------------------------------------------------------- 1 | # veTurboIO 2 | 3 | 4 | [en](./README.md) | [中文](./README.zh.md) 5 | 6 | 7 | 一个由 Volcano Engine 开发的用于高性能读写 PyTorch 模型文件的 Python 库。该库主要基于 safetensors 文件格式实现,以实现对张量数据的高效存储和读取。 8 | 9 | ## 安装 10 | 11 | 可以直接通过以下方式安装: 12 | ```bash 13 | pip install veturboio -f https://veturbo-cn-beijing.tos-cn-beijing.volces.com/veturboio/index.html --no-build-isolation 14 | ``` 15 | 16 | 提示:此指令会优先下载与当前 Python 和 PyTorch 版本匹配的 whl 文件,如果没有找到匹配的 whl 文件,会自动下载源码进行编译安装。 17 | 18 | 如果安装失败,也可以尝试通过下载源码安装,然后手动编译安装。 19 | ```bash 20 | # CUDA ops, default 21 | python setup.py install --cuda_ext 22 | 23 | # NPU ops 24 | python setup.py install --npu_ext 25 | 26 | # CPU only 27 | python setup.py install --cpu_ext 28 | ``` 29 | 30 | ## 快速开始 31 | 32 | ### 读写模型文件 33 | 34 | 35 | ```python 36 | import torch 37 | import veturboio 38 | 39 | tensors = { 40 | "weight1": torch.zeros((1024, 1024)), 41 | "weight2": torch.zeros((1024, 1024)) 42 | } 43 | 44 | veturboio.save_file(tensors, "model.safetensors") 45 | 46 | new_tensors = veturboio.load("model.safetensors") 47 | 48 | # check if the tensors are the same 49 | for k, v in tensors.items(): 50 | assert torch.allclose(v, new_tensors[k]) 51 | ``` 52 | 53 | ## 转换已有 PyTorch 文件 54 | 55 | ```bash 56 | python -m veturboio.convert -i model.pt -o model.safetensors 57 | ``` 58 | 59 | ## 性能测试 60 | 61 | 直接运行: 62 | ```bash 63 | bash bench/io_bench.sh 64 | ``` 65 | 66 | 接下来,你可以获得如下的结果: 67 | ``` 68 | fs_name tensor_size veturboio load_time(s) torch load_time(s) 69 | shm 1073741824 0.08 0.63 70 | shm 2147483648 0.19 1.26 71 | shm 4294967296 0.36 2.32 72 | ``` 73 | 74 | ## 进阶功能 75 | 76 | ### 使用 veMLP 加速读写 77 | Volcano Engine Machine Learning Platform (veMLP) 提供了基于 GPU 集群的物理磁盘的分布式缓存文件系统。 78 | 79 |

80 | 81 |

82 | 83 | 当集群级任务需要读取模型文件时,缓存系统可以通过 RDMA 传输高效地在 GPU 机器之间分发模型文件,从而避免网络传输瓶颈。使用此系统时,veTurboIO 可以最大化其性能优势。 84 | 85 | 86 | ### 加密和解密模型文件 87 | 88 | veTurboIO 支持模型文件的加密和解密。您可以阅读[教程]([tutorial](./docs/encrypt_model.md))以了解如何保护您的模型文件。当您使用 GPU 作为目标设备时,veTurboIO 可以实时解密模型文件。 89 | 90 | ## 许可证 91 | 92 | [Apache License 2.0](./LICENSE) 93 | -------------------------------------------------------------------------------- /bench/io_bench.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copyright (c) 2024 Beijing Volcano Engine Technology Ltd. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | ''' 16 | 17 | import argparse 18 | import os 19 | import time 20 | from functools import lru_cache 21 | 22 | import numpy as np 23 | import torch 24 | 25 | import veturboio 26 | 27 | 28 | def human_read_to_byte(size): 29 | factors = { 30 | 'B': 1, 31 | 'KB': 1024, 32 | 'MB': 1048576, 33 | 'GB': 1073741824, 34 | 'TB': 1099511627776, 35 | 'PB': 1125899906842624, 36 | 'EB': 1152921504606846976, 37 | 'ZB': 1180591620717411303424, 38 | 'YB': 1208925819614629174706176, 39 | } 40 | if size[-2:] in factors: 41 | return factors[size[-2:]] * int(size[:-2]) 42 | elif size[-1:] in factors: 43 | return int(size[:-1]) 44 | else: 45 | return int(size) 46 | 47 | 48 | def parse_args(): 49 | parser = argparse.ArgumentParser( 50 | description='benchmark veturboio, notice to clear page cache manually when benchmarking for existing file' 51 | ) 52 | parser.add_argument( 53 | '--begin', 54 | default='1048576', 55 | dest='begin', 56 | help='specify the minimum file size to benchmark in bytes or in format like xxKB/MB/GB', 57 | ) 58 | parser.add_argument( 59 | '--end', 60 | default='1048576', 61 | dest='end', 62 | help='specify the maximum file size to benchmark in bytes or in format like xxKB/MB/GB', 63 | ) 64 | parser.add_argument('--base_dir', dest='base_dir', help='specify the the base dir of files to be benchmarked') 65 | parser.add_argument('--fs_name', default='local_fs', help='file system name that would be displayed in the result') 66 | parser.add_argument('--gen_data', default=False, action=argparse.BooleanOptionalAction, dest='gen_data') 67 | parser.add_argument( 68 | '--map_location', default='cpu', dest='map_location', help='map location of tensor to be loaded' 69 | ) 70 | parser.add_argument('--use_pinmem', default=False, action=argparse.BooleanOptionalAction, dest='use_pinmem') 71 | parser.add_argument( 72 | '--load_mode', default='veturboio', dest='load_mode', help='load modes specified, seperated by comma' 73 | ) 74 | 75 | args = parser.parse_args() 76 | return args 77 | 78 | 79 | def print_header(load_modes): 80 | mode_list = list(map(lambda mode: f"{mode}{' load_time(s)' + ' ':<25}", load_modes)) 81 | print(f"{'fs_name' + ' ':<10} {'tensor_size' + ' ':<15}", ' '.join(mode_list)) 82 | 83 | 84 | def print_load_time(fs_name, tensor_size, load_times): 85 | load_times = list(map(lambda load_time: f"{load_time}{' ':<30}", load_times)) 86 | print(f"{fs_name:<10} {str(tensor_size):<15}", ' '.join(load_times)) 87 | 88 | 89 | def sfcs_env(): 90 | os.environ['SFCS_FSNAME'] = 'byted-cpu-sfcs' 91 | os.environ['SFCS_REGION'] = 'cn-beijing' 92 | os.environ['SFCS_ACCESS_KEY'] = os.environ['CI_SFCS_AK'] 93 | os.environ['SFCS_SECRET_KEY'] = os.environ['CI_SFCS_SK'] 94 | os.environ['SFCS_AUTHENTICATION_SERVICE_NAME'] = 'cfs' 95 | os.environ['SFCS_NS_ID'] = '18014398509481988' 96 | os.environ['SFCS_UFS_PATH'] = 'tos://yinzq-bucket/' 97 | os.environ['SFCS_MULTI_NIC_WHITELIST'] = 'eth0' 98 | os.environ['SFCS_NETWORK_SEGMENT'] = '172.31.128.0/17' 99 | os.environ['SFCS_NAMENODE_ENDPOINT_ADDRESS'] = '100.67.19.231' 100 | os.environ['SFCS_LOG_SEVERITY'] = 'ERROR' 101 | 102 | 103 | def main(): 104 | args = parse_args() 105 | if args.base_dir.startswith('sfcs://'): 106 | sfcs_env() 107 | load_modes = args.load_mode.split(',') 108 | # warmup GPU otherwise the first case would be slow 109 | device = torch.device(args.map_location) 110 | if device.type == "cuda": 111 | file_path = os.path.join(args.base_dir if args.base_dir else "", 'warmup.safetensors') 112 | tensors = {"weight": torch.randn(10)} 113 | veturboio.save_file(tensors, file_path) 114 | veturboio.load(file_path, map_location=args.map_location, use_pinmem=args.use_pinmem) 115 | print_header(load_modes) 116 | tensor_size = human_read_to_byte(args.begin) 117 | end_size = human_read_to_byte(args.end) 118 | while tensor_size <= end_size: 119 | if args.gen_data: 120 | numel = tensor_size // np.dtype(float).itemsize * 2 121 | tensors = {"weight": torch.randn(numel)} 122 | load_times = [] 123 | for mode in load_modes: 124 | if mode == 'veturboio': 125 | file_path = os.path.join(args.base_dir if args.base_dir else "", f'{tensor_size}.safetensors') 126 | if args.gen_data: 127 | veturboio.save_file(tensors, file_path) 128 | 129 | start = time.time() 130 | loaded_tensor = veturboio.load(file_path, map_location=args.map_location, use_pinmem=args.use_pinmem) 131 | if mode == 'torch': 132 | file_path = os.path.join(args.base_dir if args.base_dir else "", f'{tensor_size}.pt') 133 | if args.gen_data: 134 | veturboio.save_pt(tensors, file_path) 135 | 136 | start = time.time() 137 | 138 | loaded_tensor = veturboio.load(file_path, map_location=args.map_location) 139 | end = time.time() 140 | load_times.append("%.2f" % (end - start)) 141 | 142 | if device.type == "cuda": 143 | del loaded_tensor 144 | torch.cuda.empty_cache() 145 | 146 | print_load_time(args.fs_name, tensor_size, load_times) 147 | tensor_size = tensor_size * 2 148 | 149 | 150 | if __name__ == '__main__': 151 | main() 152 | -------------------------------------------------------------------------------- /bench/io_bench.sh: -------------------------------------------------------------------------------- 1 | ### 2 | # Copyright (c) 2024 Beijing Volcano Engine Technology Ltd. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | ### 16 | 17 | # shm 18 | mkdir -p /dev/shm/test_files 19 | python bench/io_bench.py --load_mode=veturboio,torch --base_dir=/dev/shm/test_files --begin=1GB --end=4GB --gen_data --fs_name=shm 20 | 21 | # sfcs 22 | python bench/io_bench.py --load_mode=veturboio,torch --base_dir=sfcs:// --begin=1GB --end=4GB --gen_data --fs_name=sfcs 23 | -------------------------------------------------------------------------------- /docs/encrypt_model.md: -------------------------------------------------------------------------------- 1 | # 加解密模型文件 2 | 3 | 该库底层通过两种接口读写:SFCS SDK 和 POSIX。如果文件路径前缀为 `sfcs://` 就视为使用 SFCS SDK,所需的鉴权信息可以从火山引擎可信服务的 `unix domain socket` 获取或者设置以下三个环境变量: 4 | 5 | | 环境变量名 | 含义 | 6 | | ------------------------------ | --------------------------------- | 7 | | SFCS_ACCESS_KEY | SFCS 文件系统的 AK | 8 | | SFCS_SECRET_KEY | SFCS 文件系统的 SK | 9 | | SFCS_NAMENODE_ENDPOINT_ADDRESS | SFCS 文件系统 NameNode 地址 | 10 | 11 | 12 | 加解密读写模型文件需要 data key 和 iv,有 3 种获取方式,读取优先级按照下列顺序: 13 | - [1] 加密的 data key 和 iv 存放在密文模型文件的 header 中,使用火山引擎 KMS 解密得到明文的 data key。 14 | - [1.1] 访问 KMS 所需的 AK/SK/ST 从火山引擎可信服务的 unix domain socket 获取,需要额外挂载。 15 | - [1.2] 访问 KMS 所需的 AK/SK/ST 从环境变量获取。 16 | - [2] 访问火山引擎可信服务的 unix domain socket 直接获取 data key 和 iv,需要额外挂载。 17 | - [3] 通过环境变量直接设置 data key 和 iv。 18 | 19 | 不同方式需要设置的环境变量如下: 20 | 21 | | 环境变量名 | 含义 | 22 | | ------------------------------ | --------------------------------- | 23 | | VETURBOIO_KMS_HOST | [1] KMS 服务地址,默认值 open.volcengineapi.com| 24 | | VETURBOIO_KMS_REGION | [1] KMS 服务所在区域,默认值 cn-beijing | 25 | | VETURBOIO_KMS_KEYRING_NAME | [1] KMS 服务解密 data key 的钥匙环名 | 26 | | VETURBOIO_KMS_KEY_NAME | [1] KMS 服务解密 data key 的主密钥名 | 27 | | DATAPIPE_SOCKET_PATH | [1.1][2] 可信服务 uds 的路径 | 28 | | VETURBOIO_KMS_ACCESS_KEY | [1.2] KMS 鉴权的 AK | 29 | | VETURBOIO_KMS_SECRET_KEY | [1.2] KMS 鉴权的 SK | 30 | | VETURBOIO_KMS_SESSION_TOKEN | [1.2] KMS 鉴权的临时令牌,非必需| 31 | | VETURBOIO_KEY | [3] 加解密的 128 位数据密钥的 base64 编码 | 32 | | VETURBOIO_IV | [3] 加解密的 128 位初始向量的 base64 编码 | 33 | 34 | 35 | 按照上述三种方式设置好后,可以参考下面代码在读写模型文件时启用加解密: 36 | ```python 37 | import torch 38 | import veturboio 39 | 40 | tensors = { 41 | "weight1": torch.zeros((1024, 1024)), 42 | "weight2": torch.zeros((1024, 1024)) 43 | } 44 | 45 | # use cpu to encrypt 46 | veturboio.save_file(tensors, "sfcs://model.safetensors", use_cipher=True) 47 | 48 | # use cpu to decrypt if map_location is cpu 49 | reloaded_tensor1 = veturboio.load("sfcs://model.safetensors", map_location="cpu", use_cipher=True) 50 | 51 | # use gpu to decrypt if map_location is cuda 52 | reloaded_tensor2 = veturboio.load("sfcs://model.safetensors", map_location="cuda:0", use_cipher=True) 53 | 54 | # check if the tensors are the same 55 | for k, v in tensors.items(): 56 | assert torch.allclose(v, reloaded_tensor1[k]) 57 | for k, v in tensors.items(): 58 | assert torch.allclose(v, reloaded_tensor2[k]) 59 | ``` 60 | 61 | -------------------------------------------------------------------------------- /docs/imgs/SFCS.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/veTurboIO/0ed05516013da4332433784660660ca4d1904505/docs/imgs/SFCS.png -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | # veTurboIO 2 | 3 | 火山引擎研发的一款用于高性能读写 PyTorch 模型文件的 Python 库。该库实现了主要基于 safetensors 文件格式,实现高效的存储与读取张量数据。 4 | 5 | ## 安装 6 | 7 | ```bash 8 | cd veturboio 9 | python setup.py install 10 | ``` 11 | 12 | ## 快速开始 13 | 14 | ```python 15 | import torch 16 | import veturboio 17 | 18 | tensors = { 19 | "weight1": torch.zeros((1024, 1024)), 20 | "weight2": torch.zeros((1024, 1024)) 21 | } 22 | 23 | veturboio.save_file(tensors, "model.safetensors") 24 | 25 | reloaded_tensor = veturboio.load("model.safetensors", map_location="cpu") 26 | 27 | # check if the tensors are the same 28 | for k, v in tensors.items(): 29 | assert torch.allclose(v, reloaded_tensor[k]) 30 | ``` 31 | 32 | ### 使用锁页内存加速连续加载数据到GPU 33 | ```python 34 | import torch 35 | import veturboio 36 | 37 | tensors1 = { 38 | "weight1": torch.zeros((1024, 1024)), 39 | "weight2": torch.zeros((1024, 1024)) 40 | } 41 | 42 | veturboio.save_file(tensors1, "model1.safetensors") 43 | 44 | tensors2 = { 45 | "weight1": torch.zeros((1024, 1024)), 46 | "weight2": torch.zeros((1024, 1024)) 47 | } 48 | 49 | veturboio.save_file(tensors2, "model2.safetensors") 50 | 51 | helper = veturboio.init_io_helper() 52 | reloaded_tensor1 = veturboio.load("model1.safetensors", map_location="cuda:0", use_pinmem=True, helper=helper) 53 | # the map_location may be different 54 | reloaded_tensor2 = veturboio.load("model2.safetensors", map_location="cuda:0", use_pinmem=True, helper=helper) 55 | 56 | # check if the tensors are the same 57 | for k, v in tensors1.items(): 58 | assert torch.allclose(v.cuda(), reloaded_tensor1[k]) 59 | for k, v in tensors2.items(): 60 | assert torch.allclose(v.cuda(), reloaded_tensor2[k]) 61 | ``` 62 | 63 | ### 转换现有的 PyTorch 文件 64 | ```bash 65 | python -m veturboio.convert -i model.pt -o model.safetensors 66 | ``` 67 | 68 | 69 | ## 特性 70 | 71 | - 多线程读取文件; 72 | - zero-copy 读取,不额外花费内存; 73 | - 支持直接加载到 CUDA; 74 | - BFloat16 数值支持; 75 | - 固定 pinmem 用于快速反复读取; 76 | - 兼容 PyTorch 标准格式(无性能提升); 77 | - 兼容 safetensors 格式; 78 | 79 | ## 收益 80 | 81 | 标准的 PyTorch 模型文件会经过 zip 与 pickle 两次操作,这两个操作极大的抑制了读取的速度,同时 unpickle 也会带来潜在的不安全性。我们使用一种自定义的模型格式来存储 tensor 数据,希望可以改善 PyTorch 标准格式所存在的这些问题。目前已经实现的优点有: 82 | 83 | - 多线程读取:当前文件对象主要的存放点为云端存储,单一进程无法达到云存储的带宽上限,必须使用多线程读取才能达到最大的读取速度。PyTorch 标准格式的读取速度受限于 pickle 解析速度,远无法达到云存储的速度上限; 84 | - 云端适配:基于火山引擎的云端存储(vePFS、SFCS)特性,最大化的利用了云端存储的带宽; 85 | - 安全性:不再使用 pickle 对象,避免了 pickle 的安全性问题; 86 | 87 | -------------------------------------------------------------------------------- /docs/pin_mem.md: -------------------------------------------------------------------------------- 1 | ### 使用锁页内存加速连续加载数据到GPU 2 | ```python 3 | import torch 4 | import veturboio 5 | 6 | tensors1 = { 7 | "weight1": torch.zeros((1024, 1024)), 8 | "weight2": torch.zeros((1024, 1024)) 9 | } 10 | 11 | veturboio.save_file(tensors1, "model1.safetensors") 12 | 13 | tensors2 = { 14 | "weight1": torch.zeros((1024, 1024)), 15 | "weight2": torch.zeros((1024, 1024)) 16 | } 17 | 18 | veturboio.save_file(tensors2, "model2.safetensors") 19 | 20 | helper = veturboio.init_io_helper() 21 | reloaded_tensor1 = veturboio.load("model1.safetensors", map_location="cuda:0", use_pinmem=True, helper=helper) 22 | # the map_location may be different 23 | reloaded_tensor2 = veturboio.load("model2.safetensors", map_location="cuda:0", use_pinmem=True, helper=helper) 24 | 25 | # check if the tensors are the same 26 | for k, v in tensors1.items(): 27 | assert torch.allclose(v.cuda(), reloaded_tensor1[k]) 28 | for k, v in tensors2.items(): 29 | assert torch.allclose(v.cuda(), reloaded_tensor2[k]) 30 | ``` 31 | 32 | -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: "veTurboIO" 2 | 3 | theme: 4 | name: "material" 5 | 6 | docs_dir: docs 7 | 8 | nav: 9 | - 首页: index.md 10 | - 最佳实践: 11 | - 动态加载: dynamic_load.md 12 | - SFCS 加载优化: sfcs_support.md 13 | - API: api.md 14 | - 发布日志: release.md 15 | 16 | plugins: 17 | - mkdocstrings: 18 | default_handler: python -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | 2 | [tool.isort] 3 | profile = "black" # black-compatible 4 | line_length = 119 # should match black parameters 5 | py_version = 310 # python 3.10 as a target version 6 | sections = ["FUTURE", "STDLIB", "THIRDPARTY", "FIRSTPARTY", "LOCALFOLDER"] 7 | default_section = "THIRDPARTY" 8 | 9 | 10 | [tool.black] 11 | line_length = 119 12 | skip_string_normalization = true 13 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copyright (c) 2024 Beijing Volcano Engine Technology Ltd. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | ''' 16 | 17 | import os 18 | import platform 19 | import sys 20 | 21 | import requests 22 | import setuptools 23 | import torch 24 | from pkg_resources import parse_version 25 | from setuptools import Extension, find_packages, setup 26 | from torch.utils.cpp_extension import BuildExtension, CppExtension, include_paths 27 | 28 | # initialize variables for compilation 29 | IS_LINUX = platform.system() == "Linux" 30 | IS_DARWIN = platform.system() == "Darwin" 31 | IS_WINDOWS = platform.system() == "Windows" 32 | 33 | this_dir = os.path.dirname(os.path.abspath(__file__)) 34 | 35 | 36 | def get_option(): 37 | if os.getenv("NPU_EXTENSION_ENABLED", "0") == "1": 38 | sys.argv.append("--npu_ext") 39 | elif "--cuda_ext" not in sys.argv and "--npu_ext" not in sys.argv and "--cpu_ext" not in sys.argv: 40 | print( 41 | '''No known extension specified, default to use --cuda_ext. Currently supported: 42 | --cuda_ext 43 | --npu_ext 44 | --cpu_ext''' 45 | ) 46 | sys.argv.append("--cuda_ext") 47 | 48 | 49 | def get_version(): 50 | import importlib.util 51 | 52 | spec = importlib.util.spec_from_file_location("version", os.path.join("veturboio", "version.py")) 53 | m = importlib.util.module_from_spec(spec) 54 | spec.loader.exec_module(m) 55 | 56 | if "--cpu_ext" in sys.argv: 57 | return m.__version__ + "+cpu" 58 | elif "--npu_ext" in sys.argv: 59 | return m.__version__ + "+npu" 60 | else: 61 | return m.__version__ 62 | 63 | 64 | def make_relative_rpath(path): 65 | if IS_DARWIN: 66 | return '-Wl,-rpath,@loader_path/' + path 67 | elif IS_WINDOWS: 68 | return '' 69 | else: 70 | return '-Wl,-rpath,$ORIGIN/' + path 71 | 72 | 73 | def get_veturboio_extension(): 74 | get_option() 75 | # prevent ninja from using too many resources 76 | try: 77 | import psutil 78 | 79 | num_cpu = len(psutil.Process().cpu_affinity()) 80 | cpu_use = max(4, num_cpu - 1) 81 | except (ModuleNotFoundError, AttributeError): 82 | cpu_use = 4 83 | 84 | os.environ.setdefault("MAX_JOBS", str(cpu_use)) 85 | # os.environ.setdefault("TORCH_CUDA_ARCH_LIST", "8.0;8.6") 86 | os.environ["TORCH_CUDA_ARCH_LIST"] = "8.0;8.6" 87 | 88 | define_macros = [] 89 | 90 | # Before PyTorch1.8.0, when compiling CUDA code, `cxx` is a 91 | # required key passed to PyTorch. Even if there is no flag passed 92 | # to cxx, users also need to pass an empty list to PyTorch. 93 | # Since PyTorch1.8.0, it has a default value so users do not need 94 | # to pass an empty list anymore. 95 | # More details at https://github.com/pytorch/pytorch/pull/45956 96 | extra_compile_args = {'cxx': ['-fvisibility=hidden'], 'nvcc': ['-O3']} 97 | 98 | if parse_version(torch.__version__) <= parse_version('1.12.1'): 99 | extra_compile_args['cxx'].append('-std=c++14') 100 | else: 101 | extra_compile_args['cxx'].append('-std=c++17') 102 | 103 | name = "veturboio_ext" 104 | 105 | sources = [ 106 | "veturboio/ops/csrc/pybind.cpp", 107 | "veturboio/ops/csrc/posix.cpp", 108 | "veturboio/ops/csrc/sfcs.cpp", 109 | "veturboio/ops/csrc/io_helper_cpu_common.cpp", 110 | "veturboio/ops/csrc/cipher.cpp", 111 | ] 112 | 113 | include_dirs = include_paths() 114 | include_dirs.append("veturboio/ops/csrc/include") 115 | 116 | torch_dir = os.path.join(os.path.dirname(torch.__file__), "lib") 117 | library_dirs = [torch_dir] 118 | library_dirs.append("veturboio/ops/csrc/lib") 119 | 120 | libraries = ["cloudfs", ":libfastcrypto_gpu.so.0.3"] 121 | 122 | extra_link_args = [make_relative_rpath("veturboio/ops/csrc/lib")] 123 | 124 | # Refer to: https://github.com/pytorch/pytorch/blob/main/torch/utils/cpp_extension.py#L918 125 | # In torch 2.0, this flag is False, and the *.so lib set this flag as False when building. 126 | # In newer torch, this flag is True, to keep compatibility with *.so lib, we set it False 127 | # to generate g++ flags '-D_GLIBCXX_USE_CXX11_ABI=0' when building veturboio_ext, otherwise 128 | # some 'undefine symbol' error of std::string will be thrown. 129 | torch._C._GLIBCXX_USE_CXX11_ABI = False 130 | 131 | if "--cuda_ext" in sys.argv: 132 | sys.argv.remove("--cuda_ext") 133 | 134 | extra_compile_args['nvcc'].append('-O3') 135 | 136 | sources.append("veturboio/ops/csrc/io_helper.cu") 137 | 138 | define_macros.append(("USE_CUDA", "1")) 139 | 140 | from torch.utils.cpp_extension import CUDAExtension 141 | 142 | return CUDAExtension( 143 | name=name, 144 | sources=sources, 145 | define_macros=define_macros, 146 | include_dirs=include_dirs, 147 | library_dirs=library_dirs, 148 | libraries=libraries, 149 | extra_compile_args=extra_compile_args, 150 | extra_link_args=extra_link_args, 151 | ) 152 | else: 153 | extra_compile_args['cxx'].append('-O3') 154 | 155 | libraries.append("torch_cpu") 156 | libraries.append("torch_python") 157 | 158 | extra_link_args.append(f"-Wl,--rpath={torch_dir},--enable-new-dtags") 159 | 160 | if "--npu_ext" in sys.argv: 161 | sys.argv.remove("--npu_ext") 162 | 163 | sources.append("veturboio/ops/csrc/io_helper_npu.cpp") 164 | define_macros.append(("USE_NPU", "1")) 165 | 166 | return Extension( 167 | name=name, 168 | sources=sources, 169 | define_macros=define_macros, 170 | include_dirs=include_dirs, 171 | library_dirs=library_dirs, 172 | libraries=libraries, 173 | extra_compile_args=extra_compile_args, 174 | extra_link_args=extra_link_args, 175 | ) 176 | elif "--cpu_ext" in sys.argv: 177 | sys.argv.remove("--cpu_ext") 178 | 179 | sources.append("veturboio/ops/csrc/io_helper_cpu.cpp") 180 | 181 | return Extension( 182 | name=name, 183 | sources=sources, 184 | define_macros=define_macros, 185 | include_dirs=include_dirs, 186 | library_dirs=library_dirs, 187 | libraries=libraries, 188 | extra_compile_args=extra_compile_args, 189 | extra_link_args=extra_link_args, 190 | ) 191 | 192 | 193 | class GetLibCfsCommand(setuptools.Command): 194 | """get libcfs from url""" 195 | 196 | description = 'get libcfs from url' 197 | user_options = [('src=', 's', 'source url of libcloudfs.so'), ('dst=', 'd', 'dest filepath of libcloudfs.so')] 198 | 199 | def initialize_options(self): 200 | from veturboio.utils.load_veturboio_ext import LIBCFS_DEFAULT_PATH, LIBCFS_DEFAULT_URL 201 | 202 | self.src = LIBCFS_DEFAULT_URL 203 | self.dst = LIBCFS_DEFAULT_PATH 204 | 205 | def finalize_options(self): 206 | pass 207 | 208 | def run(self): 209 | print(f"download libcloudfs.so from {self.src}, save to {self.dst}") 210 | r = requests.get(self.src, timeout=60) 211 | with open(self.dst, 'wb') as f: 212 | f.write(r.content) 213 | 214 | 215 | setup( 216 | name="veturboio", 217 | version=get_version(), 218 | description="Effcient PyTorch IO libraray on Volcanic Engine", 219 | author="AML Team", 220 | ext_modules=[get_veturboio_extension()], 221 | packages=find_packages(exclude=("veturboio.ops.csrc.common.sfcs.lib")), 222 | install_requires=[ 223 | "safetensors", 224 | "numpy", 225 | "netifaces", 226 | "loguru", 227 | "requests-unixsocket", 228 | "requests", 229 | ], 230 | include_package_data=True, 231 | cmdclass={"get_libcfs": GetLibCfsCommand, "build_ext": BuildExtension}, 232 | dependency_links=['https://mirrors.ivolces.com/pypi/'], 233 | ) 234 | -------------------------------------------------------------------------------- /tests/test_assert_exceptions.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copyright (c) 2024 Beijing Volcano Engine Technology Ltd. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | ''' 16 | 17 | import os 18 | import tempfile 19 | import unittest 20 | from unittest import TestCase 21 | 22 | import torch 23 | 24 | import veturboio 25 | 26 | 27 | class TestAssertException(TestCase): 28 | @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available") 29 | def test_modify_use_pinmem_attr(self): 30 | helper = veturboio.init_io_helper() 31 | with tempfile.TemporaryDirectory() as tmpdirname: 32 | filepath = os.path.join(tmpdirname, "model.safetensors") 33 | veturboio.save_file(self.tensors, filepath) 34 | 35 | with self.assertRaises(Exception) as context: 36 | veturboio.load(filepath, map_location="cuda:0", use_pinmem=False, helper=helper) 37 | veturboio.load(filepath, map_location="cuda:0", use_pinmem=True, helper=helper) 38 | self.assertTrue( 39 | 'use_pinmem attribute of an exising IOHelper should not be changed' in str(context.exception) 40 | ) 41 | 42 | @classmethod 43 | def setUpClass(cls): 44 | cls.tensors = { 45 | "weight1": torch.randn(20, 10), 46 | "weight2": torch.randn(20, 10), 47 | } 48 | -------------------------------------------------------------------------------- /tests/test_convert.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copyright (c) 2024 Beijing Volcano Engine Technology Ltd. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | ''' 16 | 17 | import os 18 | import tempfile 19 | from unittest import TestCase 20 | 21 | import torch 22 | 23 | import veturboio 24 | 25 | 26 | class TestConvertUtil(TestCase): 27 | def test_convert(self): 28 | with tempfile.TemporaryDirectory() as tmpdirname: 29 | filepath = os.path.join(tmpdirname, "model.pt") 30 | torch.save(self.tensors, filepath) 31 | convertpath = os.path.join(tmpdirname, "model.safetensors") 32 | 33 | print(f"python -m veturboio.convert -i {filepath} -o {convertpath}") 34 | os.system(f"python -m veturboio.convert -i {filepath} -o {convertpath}") 35 | 36 | loaded_tensors = veturboio.load(convertpath) 37 | for key in self.tensors.keys(): 38 | self.assertTrue(torch.allclose(self.tensors[key], loaded_tensors[key])) 39 | 40 | @classmethod 41 | def setUpClass(cls): 42 | cls.tensors = { 43 | "weight1": torch.randn(20, 10), 44 | "weight2": torch.randn(20, 10), 45 | } 46 | -------------------------------------------------------------------------------- /tests/test_fetch_cipher.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copyright (c) 2024 Beijing Volcano Engine Technology Ltd. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | ''' 16 | 17 | import base64 18 | import http.server 19 | import json 20 | import os 21 | import socketserver 22 | import tempfile 23 | import threading 24 | from datetime import datetime, timedelta 25 | from time import sleep 26 | from unittest import TestCase 27 | 28 | import numpy as np 29 | 30 | from veturboio.ops.cipher import CipherInfo, DataPipeClient 31 | from veturboio.ops.sfcs_utils import ( 32 | SFCS_OPT_ENV_LIST, 33 | SFCS_REQ_ENV_LIST, 34 | credentials_helper, 35 | generate_sfcs_conf_xml, 36 | init_sfcs_conf, 37 | ) 38 | 39 | 40 | class UnixSocketHttpServer(socketserver.UnixStreamServer): 41 | def get_request(self): 42 | request, client_address = super().get_request() 43 | return (request, ["local", 0]) 44 | 45 | 46 | class DatapipeHandler(http.server.SimpleHTTPRequestHandler): 47 | def do_POST(self): 48 | action = self.headers.get('X-Datapipe-Task-Type') 49 | if action == 'top': 50 | # mock kms response 51 | self.send_response(200) 52 | self.send_header('Content-Type', 'application/json') 53 | self.end_headers() 54 | res = {'Result': {'Plaintext': base64.b64encode(b'abcdefgh87654321').decode('ascii')}} 55 | self.wfile.write(bytes(json.dumps(res), encoding='ascii')) 56 | return 57 | self.send_response(400) 58 | self.end_headers() 59 | return 60 | 61 | def do_GET(self): 62 | action = self.headers.get('X-Datapipe-Task-Type') 63 | if action == 'ping': 64 | self.send_response(200) 65 | self.send_header('Content-Type', 'application/json') 66 | self.end_headers() 67 | self.wfile.write(bytes(json.dumps({'message': 'pong'}), encoding='ascii')) 68 | return 69 | if action == 'encrypt-key': 70 | self.send_response(200) 71 | self.send_header('Content-Type', 'application/json') 72 | self.end_headers() 73 | self.wfile.write( 74 | bytes( 75 | json.dumps({'Key': 'YWJjZGVmZ2gxMjM0NTY3OA==', 'IV': 'MTIzNDU2Nzg4NzY1NDMyMQ=='}), encoding='ascii' 76 | ) 77 | ) 78 | return 79 | if action == 'sfcs-sts': 80 | self.send_response(200) 81 | self.send_header('Content-Type', 'application/json') 82 | self.end_headers() 83 | date_now = datetime.now() 84 | date_exp = date_now + timedelta(seconds=4) 85 | res = { 86 | 'Cred': { 87 | 'CurrentTime': date_now.isoformat(), 88 | 'ExpiredTime': date_exp.isoformat(), 89 | 'AccessKeyId': 'A' * 12, 90 | 'SecretAccessKey': 'S' * 12, 91 | 'SessionToken': 'ST' * 12, # fake SessionToken real one is longer 92 | }, 93 | 'SfcsNameNodeAddress': '100.67.19.231', 94 | } 95 | self.wfile.write(bytes(json.dumps(res), encoding='ascii')) 96 | return 97 | if action == 'kms-sts': 98 | self.send_response(200) 99 | self.send_header('Content-Type', 'application/json') 100 | self.end_headers() 101 | res = { 102 | 'Cred': { 103 | 'AccessKeyId': os.environ['CI_VENDOR_AK'], 104 | 'SecretAccessKey': os.environ['CI_VENDOR_AK'], 105 | 'SessionToken': '', 106 | }, 107 | } 108 | self.wfile.write(bytes(json.dumps(res), encoding='ascii')) 109 | return 110 | self.send_response(400) 111 | self.end_headers() 112 | return 113 | 114 | 115 | class TestCipherInfo(TestCase): 116 | @classmethod 117 | def setUpClass(cls): 118 | cls.sock_dir = tempfile.TemporaryDirectory() 119 | cls.server_address = os.path.join(cls.sock_dir.name, 'datapipe.sock') 120 | cls.server = UnixSocketHttpServer(cls.server_address, DatapipeHandler, bind_and_activate=True) 121 | 122 | def run(): 123 | cls.server.serve_forever() 124 | 125 | cls.thread = threading.Thread(target=run) 126 | cls.thread.start() 127 | cls.target_key = np.frombuffer(b'abcdefgh12345678', dtype=np.byte) 128 | cls.target_key_2 = np.frombuffer(b'abcdefgh87654321', dtype=np.byte) 129 | cls.target_iv = np.frombuffer(b'1234567887654321', dtype=np.byte) 130 | 131 | def test_fetch_from_file_header(self): 132 | os.environ.pop('VETURBOIO_KEY', None) 133 | os.environ.pop('VETURBOIO_IV', None) 134 | DataPipeClient.DATAPIPE_SOCKET_PATH = '/path/not/exist' 135 | 136 | header_dict = { 137 | 'mode': 'CTR-128', 138 | 'iv': 'MTIzNDU2Nzg4NzY1NDMyMQ==', 139 | 'meta_data_key': 'bl2htKYLQ2+CjyyJ84Q3twAA9ZpCbFxwznRb0NkR9zGGRp1RK5Mb9u8NNOiahY+0yVrxNw3IVQ9Wgn6PDscw77Cb3eImjVn14hNBJRlwtSyQ7tRZLOsZBEHv5cWwDQ==', 140 | } 141 | header_bytes = bytearray(256 * 1024) 142 | header_str = 'Byte3ncryptM0del' + json.dumps(header_dict) 143 | header_bytes[: len(header_str)] = header_str.encode('utf-8') 144 | 145 | # case1: get kms cred from env 146 | ENV_KMS_HOST = 'VETURBOIO_KMS_HOST' 147 | ENV_KMS_REGION = 'VETURBOIO_KMS_REGION' 148 | ENV_KMS_AK = 'VETURBOIO_KMS_ACCESS_KEY' 149 | ENV_KMS_SK = 'VETURBOIO_KMS_SECRET_KEY' 150 | ENV_KMS_KEYRING = 'VETURBOIO_KMS_KEYRING_NAME' 151 | ENV_KMS_KEY = 'VETURBOIO_KMS_KEY_NAME' 152 | os.environ[ENV_KMS_HOST] = 'open.volcengineapi.com' 153 | os.environ[ENV_KMS_REGION] = 'cn-beijing' 154 | os.environ[ENV_KMS_AK] = os.environ['CI_VENDOR_AK'] 155 | os.environ[ENV_KMS_SK] = os.environ['CI_VENDOR_SK'] 156 | os.environ[ENV_KMS_KEYRING] = 'datapipe_keyring' 157 | os.environ[ENV_KMS_KEY] = 'datapipe_key_ml_maas' 158 | info = CipherInfo(True, header_bytes) 159 | self.assertTrue(info.use_cipher) 160 | self.assertTrue(info.use_header) 161 | self.assertTrue(np.array_equal(info.key, self.target_key)) 162 | self.assertTrue(np.array_equal(info.iv, self.target_iv)) 163 | 164 | # case2: get kms cred from datapipe and access kms with datapipe proxy 165 | os.environ.pop(ENV_KMS_HOST, None) 166 | os.environ.pop(ENV_KMS_REGION, None) 167 | os.environ.pop(ENV_KMS_AK, None) 168 | os.environ.pop(ENV_KMS_SK, None) 169 | DataPipeClient.DATAPIPE_SOCKET_PATH = self.server_address 170 | info = CipherInfo(True, header_bytes) 171 | self.assertTrue(info.use_cipher) 172 | self.assertTrue(info.use_header) 173 | self.assertTrue(np.array_equal(info.key, self.target_key_2)) 174 | self.assertTrue(np.array_equal(info.iv, self.target_iv)) 175 | 176 | def test_fetch_from_datapipe(self): 177 | DataPipeClient.DATAPIPE_SOCKET_PATH = self.server_address 178 | DataPipeClient.ENCRYPT_HEADER['X-Encrypt-Caller-Pod'] = 'test-pod-name' 179 | info = CipherInfo(True, None, '/maas_model/test_path') 180 | self.assertTrue(info.use_cipher) 181 | self.assertTrue(np.array_equal(info.key, self.target_key)) 182 | self.assertTrue(np.array_equal(info.iv, self.target_iv)) 183 | 184 | def test_fetch_from_env(self): 185 | DataPipeClient.DATAPIPE_SOCKET_PATH = '/path/not/exist' 186 | os.environ['VETURBOIO_KEY'] = base64.b64encode(b'abcdefgh12345678').decode('ascii') 187 | os.environ['VETURBOIO_IV'] = base64.b64encode(b'1234567887654321').decode('ascii') 188 | info = CipherInfo(True) 189 | self.assertTrue(info.use_cipher) 190 | self.assertTrue(np.array_equal(info.key, self.target_key)) 191 | self.assertTrue(np.array_equal(info.iv, self.target_iv)) 192 | 193 | def test_raise_error(self): 194 | DataPipeClient.DATAPIPE_SOCKET_PATH = '/path/not/exist' 195 | os.environ['VETURBOIO_KEY'] = base64.b64encode(b'abcdefgh12').decode('ascii') 196 | os.environ['VETURBOIO_IV'] = base64.b64encode(b'1234567887').decode('ascii') 197 | with self.assertRaises(RuntimeError): 198 | info = CipherInfo(True) 199 | 200 | @classmethod 201 | def tearDownClass(cls): 202 | os.environ.pop('VETURBOIO_KEY', None) 203 | os.environ.pop('VETURBOIO_IV', None) 204 | cls.server.shutdown() 205 | cls.server.server_close() 206 | cls.thread.join() 207 | cls.sock_dir.cleanup() 208 | 209 | 210 | class TestCredentials(TestCase): 211 | @classmethod 212 | def setUpClass(cls): 213 | cls.sock_dir = tempfile.TemporaryDirectory() 214 | cls.server_address = os.path.join(cls.sock_dir.name, 'datapipe.sock') 215 | cls.server = UnixSocketHttpServer(cls.server_address, DatapipeHandler, bind_and_activate=True) 216 | 217 | def run(): 218 | cls.server.serve_forever() 219 | 220 | cls.thread = threading.Thread(target=run) 221 | cls.thread.start() 222 | 223 | def test_sfcs_sts(self): 224 | DataPipeClient.DATAPIPE_SOCKET_PATH = self.server_address 225 | client = DataPipeClient() 226 | cred = client.get_sfcs_ak_sk_st() 227 | self.assertIsNotNone(cred) 228 | self.assertEqual(cred['SfcsNameNodeAddress'], '100.67.19.231') 229 | cred = cred['Cred'] 230 | self.assertEqual(cred['AccessKeyId'], 'A' * 12) 231 | self.assertEqual(cred['SecretAccessKey'], 'S' * 12) 232 | self.assertEqual(cred['SessionToken'], 'ST' * 12) 233 | 234 | def test_sfcs_conf(self): 235 | for e in SFCS_REQ_ENV_LIST: 236 | os.environ[e] = 'test-value' 237 | # case 1: env SFCS_ACCESS_KEY and SFCS_SECRET_KEY and SFCS_NAMENODE_ENDPOINT_ADDRESS exists 238 | os.environ['SFCS_ACCESS_KEY'] = 'A' * 12 239 | os.environ['SFCS_SECRET_KEY'] = 'S' * 12 240 | os.environ['SFCS_NAMENODE_ENDPOINT_ADDRESS'] = '100.67.19.231' 241 | sfcs_conf = os.path.join(os.getcwd(), 'base_model2.xml') 242 | if os.path.exists(sfcs_conf): 243 | os.remove(sfcs_conf) 244 | init_sfcs_conf('/base_model2/tensor.pt') 245 | self.assertEqual(os.environ['LIBCLOUDFS_CONF'], sfcs_conf) 246 | self.assertEqual(len(credentials_helper.threads), 0) 247 | self.assertEqual(len(credentials_helper.running), 0) 248 | self.assertTrue(os.path.exists(sfcs_conf)) 249 | os.remove(sfcs_conf) 250 | 251 | # case 2: use datapipe socket to get and refresh ak, sk, st and namenode_ip 252 | DataPipeClient.DATAPIPE_SOCKET_PATH = self.server_address 253 | os.environ.pop('SFCS_ACCESS_KEY', None) 254 | os.environ.pop('SFCS_SECRET_KEY', None) 255 | os.environ.pop('SFCS_NAMENODE_ENDPOINT_ADDRESS', None) 256 | sfcs_conf3 = os.path.join(os.getcwd(), 'base_model3.xml') 257 | sfcs_conf4 = os.path.join(os.getcwd(), 'base_model4.xml') 258 | if os.path.exists(sfcs_conf3): 259 | os.remove(sfcs_conf3) 260 | if os.path.exists(sfcs_conf4): 261 | os.remove(sfcs_conf4) 262 | init_sfcs_conf('/base_model3/tensor.pt') 263 | init_sfcs_conf('/base_model4/tensor.pt') 264 | self.assertTrue('base_model3' in credentials_helper.threads) 265 | self.assertTrue('base_model4' in credentials_helper.threads) 266 | self.assertTrue(credentials_helper.running['base_model3']) 267 | self.assertTrue(credentials_helper.running['base_model4']) 268 | self.assertTrue(os.path.exists(sfcs_conf3)) 269 | self.assertTrue(os.path.exists(sfcs_conf4)) 270 | for i in range(5): 271 | os.remove(sfcs_conf3) 272 | os.remove(sfcs_conf4) 273 | sleep(3) 274 | self.assertTrue(os.path.exists(sfcs_conf3)) 275 | self.assertTrue(os.path.exists(sfcs_conf4)) 276 | print(credentials_helper.threads) 277 | os.remove(sfcs_conf3) 278 | os.remove(sfcs_conf4) 279 | 280 | def test_sfcs_conf_json(self): 281 | for e in SFCS_REQ_ENV_LIST: 282 | os.environ[e] = 'test-value' 283 | os.environ['SFCS_FSNAME'] = json.dumps({'base_model1': 'test-value1', 'base_model2': 'test-value2'}) 284 | os.environ['SFCS_NS_ID'] = json.dumps({'base_model1': 'test-value1', 'base_model2': 'test-value2'}) 285 | os.environ['SFCS_UFS_PATH'] = json.dumps({'base_model1': 'test-value1', 'base_model2': 'test-value2'}) 286 | DataPipeClient.DATAPIPE_SOCKET_PATH = self.server_address 287 | os.environ.pop('SFCS_ACCESS_KEY', None) 288 | os.environ.pop('SFCS_SECRET_KEY', None) 289 | os.environ.pop('SFCS_NAMENODE_ENDPOINT_ADDRESS', None) 290 | sfcs_conf1 = os.path.join(os.getcwd(), 'base_model1.xml') 291 | sfcs_conf2 = os.path.join(os.getcwd(), 'base_model2.xml') 292 | if os.path.exists(sfcs_conf1): 293 | os.remove(sfcs_conf1) 294 | if os.path.exists(sfcs_conf2): 295 | os.remove(sfcs_conf2) 296 | init_sfcs_conf('/base_model1/tensor.pt') 297 | init_sfcs_conf('/base_model2/tensor.pt') 298 | self.assertTrue('base_model1' in credentials_helper.threads) 299 | self.assertTrue('base_model2' in credentials_helper.threads) 300 | self.assertTrue(credentials_helper.running['base_model1']) 301 | self.assertTrue(credentials_helper.running['base_model2']) 302 | self.assertTrue(os.path.exists(sfcs_conf1)) 303 | self.assertTrue(os.path.exists(sfcs_conf2)) 304 | for i in range(5): 305 | sleep(3) 306 | self.assertTrue(os.path.exists(sfcs_conf1)) 307 | self.assertTrue(os.path.exists(sfcs_conf2)) 308 | print(credentials_helper.threads) 309 | os.remove(sfcs_conf1) 310 | os.remove(sfcs_conf2) 311 | 312 | @classmethod 313 | def tearDownClass(cls): 314 | credentials_helper.stop() 315 | os.environ.pop('LIBCLOUDFS_CONF', None) 316 | for e in SFCS_REQ_ENV_LIST: 317 | os.environ.pop(e, None) 318 | for e in SFCS_OPT_ENV_LIST: 319 | os.environ.pop(e, None) 320 | cls.server.shutdown() 321 | cls.server.server_close() 322 | cls.thread.join() 323 | cls.sock_dir.cleanup() 324 | -------------------------------------------------------------------------------- /tests/test_load_op.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copyright (c) 2024 Beijing Volcano Engine Technology Ltd. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | ''' 16 | 17 | import base64 18 | import os 19 | import tempfile 20 | import unittest 21 | from copy import deepcopy 22 | from unittest import TestCase 23 | 24 | import torch 25 | 26 | import veturboio 27 | 28 | 29 | class TestLoad(TestCase): 30 | @classmethod 31 | def setUpClass(cls): 32 | ENV_KMS_HOST = 'VETURBOIO_KMS_HOST' 33 | ENV_KMS_REGION = 'VETURBOIO_KMS_REGION' 34 | ENV_KMS_AK = 'VETURBOIO_KMS_ACCESS_KEY' 35 | ENV_KMS_SK = 'VETURBOIO_KMS_SECRET_KEY' 36 | ENV_KMS_KEYRING = 'VETURBOIO_KMS_KEYRING_NAME' 37 | ENV_KMS_KEY = 'VETURBOIO_KMS_KEY_NAME' 38 | os.environ[ENV_KMS_HOST] = 'open.volcengineapi.com' 39 | os.environ[ENV_KMS_REGION] = 'cn-beijing' 40 | os.environ[ENV_KMS_AK] = os.environ['CI_VENDOR_AK'] 41 | os.environ[ENV_KMS_SK] = os.environ['CI_VENDOR_SK'] 42 | os.environ[ENV_KMS_KEYRING] = 'datapipe_keyring' 43 | os.environ[ENV_KMS_KEY] = 'datapipe_key_ml_maas' 44 | 45 | cls.tempdir = tempfile.TemporaryDirectory() 46 | 47 | cls.tensors_0 = { 48 | "weight1": torch.randn(2000, 10), 49 | "weight2": torch.IntTensor(2000, 10), 50 | } 51 | 52 | cls.tensors_1 = { 53 | "weight1": torch.randn(2000, 10), 54 | "weight2": torch.IntTensor(2000, 10), 55 | "weight3": torch.BoolTensor(2000, 10), 56 | } 57 | 58 | cls.filepath_0 = os.path.join(cls.tempdir.name, "model_0.safetensors") 59 | cls.filepath_1 = os.path.join(cls.tempdir.name, "model_1.safetensors") 60 | veturboio.save_file(cls.tensors_0, cls.filepath_0) 61 | veturboio.save_file(cls.tensors_1, cls.filepath_1, enable_fast_mode=True) 62 | 63 | cls.pt_filepath = os.path.join(cls.tempdir.name, "model.pt") 64 | torch.save(cls.tensors_0, cls.pt_filepath) 65 | 66 | # cipher 67 | os.environ["VETURBOIO_KEY"] = base64.b64encode(b"abcdefgh12345678").decode("ascii") 68 | os.environ["VETURBOIO_IV"] = base64.b64encode(b"1234567887654321").decode("ascii") 69 | 70 | cls.filepath_0_enc = os.path.join(cls.tempdir.name, "model_0_enc.safetensors") 71 | cls.filepath_1_enc = os.path.join(cls.tempdir.name, "model_1_enc.safetensors") 72 | veturboio.save_file(cls.tensors_0, cls.filepath_0_enc, use_cipher=True) 73 | veturboio.save_file(cls.tensors_1, cls.filepath_1_enc, use_cipher=True, enable_fast_mode=True) 74 | 75 | cls.pt_filepath_enc = os.path.join(cls.tempdir.name, "model_enc.pt") 76 | veturboio.save_pt(cls.tensors_0, cls.pt_filepath_enc, use_cipher=True) 77 | 78 | # cipher with header 79 | os.environ["VETURBOIO_CIPHER_HEADER"] = "1" 80 | cls.filepath_0_enc_h = os.path.join(cls.tempdir.name, "model_0_enc_h.safetensors") 81 | veturboio.save_file(cls.tensors_0, cls.filepath_0_enc_h, use_cipher=True) 82 | 83 | cls.pt_filepath_enc_h = os.path.join(cls.tempdir.name, "model_enc_h.pt") 84 | veturboio.save_pt(cls.tensors_0, cls.pt_filepath_enc_h, use_cipher=True) 85 | del os.environ["VETURBOIO_CIPHER_HEADER"] 86 | 87 | if torch.cuda.is_available(): 88 | cls.cuda_tensors_0 = deepcopy(cls.tensors_0) 89 | cls.cuda_tensors_1 = deepcopy(cls.tensors_1) 90 | 91 | for key in cls.cuda_tensors_0.keys(): 92 | cls.cuda_tensors_0[key] = cls.cuda_tensors_0[key].cuda() 93 | for key in cls.cuda_tensors_1.keys(): 94 | cls.cuda_tensors_1[key] = cls.cuda_tensors_1[key].cuda() 95 | 96 | @classmethod 97 | def tearDownClass(cls): 98 | cls.tempdir.cleanup() 99 | 100 | def _run_pipeline(self, tensors, filepath, map_location, use_cipher, enable_fast_mode=True, state_dict=None): 101 | loaded_tensors = veturboio.load( 102 | filepath, 103 | map_location=map_location, 104 | use_cipher=use_cipher, 105 | enable_fast_mode=enable_fast_mode, 106 | state_dict=state_dict, 107 | ) 108 | for key in tensors.keys(): 109 | self.assertTrue(torch.allclose(tensors[key], loaded_tensors[key])) 110 | return loaded_tensors 111 | 112 | def test_pipeline_cpu(self): 113 | self._run_pipeline(self.tensors_0, self.filepath_0, "cpu", use_cipher=False) 114 | self._run_pipeline(self.tensors_0, self.filepath_0_enc, "cpu", use_cipher=True) 115 | self._run_pipeline(self.tensors_0, self.filepath_0, "cpu", use_cipher=False, enable_fast_mode=False) 116 | self._run_pipeline(self.tensors_0, self.filepath_0_enc, "cpu", use_cipher=True, enable_fast_mode=False) 117 | pre_allocated_tensors = { 118 | "weight1": torch.randn(2000, 10), 119 | "weight2": torch.IntTensor(2000, 10), 120 | } 121 | self._run_pipeline(self.tensors_0, self.filepath_0, "cpu", use_cipher=False, state_dict=pre_allocated_tensors) 122 | self._run_pipeline( 123 | self.tensors_0, self.filepath_0_enc, "cpu", use_cipher=True, state_dict=pre_allocated_tensors 124 | ) 125 | self._run_pipeline( 126 | self.tensors_0, 127 | self.filepath_0, 128 | "cpu", 129 | use_cipher=False, 130 | enable_fast_mode=False, 131 | state_dict=pre_allocated_tensors, 132 | ) 133 | self._run_pipeline( 134 | self.tensors_0, 135 | self.filepath_0_enc, 136 | "cpu", 137 | use_cipher=True, 138 | enable_fast_mode=False, 139 | state_dict=pre_allocated_tensors, 140 | ) 141 | 142 | @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available") 143 | def test_pipeline_cuda(self): 144 | self._run_pipeline(self.cuda_tensors_0, self.filepath_0, "cuda:0", use_cipher=False) 145 | self._run_pipeline(self.cuda_tensors_0, self.filepath_0_enc, "cuda:0", use_cipher=True) 146 | self._run_pipeline(self.cuda_tensors_0, self.filepath_0, "cuda:0", use_cipher=False, enable_fast_mode=False) 147 | self._run_pipeline(self.cuda_tensors_0, self.filepath_0_enc, "cuda:0", use_cipher=True, enable_fast_mode=False) 148 | pre_allocated_tensors = { 149 | "weight1": torch.randn(2000, 10).cuda(), 150 | "weight2": torch.IntTensor(2000, 10).cuda(), 151 | } 152 | self._run_pipeline( 153 | self.cuda_tensors_0, self.filepath_0, "cuda:0", use_cipher=False, state_dict=pre_allocated_tensors 154 | ) 155 | self._run_pipeline( 156 | self.cuda_tensors_0, self.filepath_0_enc, "cuda:0", use_cipher=True, state_dict=pre_allocated_tensors 157 | ) 158 | self._run_pipeline( 159 | self.cuda_tensors_0, 160 | self.filepath_0, 161 | "cuda:0", 162 | use_cipher=False, 163 | enable_fast_mode=False, 164 | state_dict=pre_allocated_tensors, 165 | ) 166 | self._run_pipeline( 167 | self.cuda_tensors_0, 168 | self.filepath_0_enc, 169 | "cuda:0", 170 | use_cipher=True, 171 | enable_fast_mode=False, 172 | state_dict=pre_allocated_tensors, 173 | ) 174 | 175 | def test_read_multi_state_dict_cpu(self): 176 | load_tensor_0 = self._run_pipeline(self.tensors_0, self.filepath_0, "cpu", use_cipher=False) 177 | load_tensor_1 = self._run_pipeline(self.tensors_1, self.filepath_1, "cpu", use_cipher=False) 178 | 179 | self.assertEqual(len(load_tensor_0), 2) 180 | self.assertEqual(len(load_tensor_1), 3) 181 | 182 | load_tensor_0_enc = self._run_pipeline(self.tensors_0, self.filepath_0_enc, "cpu", use_cipher=True) 183 | load_tensor_1_enc = self._run_pipeline(self.tensors_1, self.filepath_1_enc, "cpu", use_cipher=True) 184 | 185 | self.assertEqual(len(load_tensor_0_enc), 2) 186 | self.assertEqual(len(load_tensor_1_enc), 3) 187 | 188 | @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available") 189 | def test_read_multi_state_dict_cuda(self): 190 | load_tensor_0 = self._run_pipeline(self.cuda_tensors_0, self.filepath_0, "cuda:0", use_cipher=False) 191 | load_tensor_1 = self._run_pipeline(self.cuda_tensors_1, self.filepath_1, "cuda:0", use_cipher=False) 192 | 193 | self.assertEqual(len(load_tensor_0), 2) 194 | self.assertEqual(len(load_tensor_1), 3) 195 | 196 | load_tensor_0_enc = self._run_pipeline(self.cuda_tensors_0, self.filepath_0_enc, "cuda:0", use_cipher=True) 197 | load_tensor_1_enc = self._run_pipeline(self.cuda_tensors_1, self.filepath_1_enc, "cuda:0", use_cipher=True) 198 | 199 | self.assertEqual(len(load_tensor_0_enc), 2) 200 | self.assertEqual(len(load_tensor_1_enc), 3) 201 | 202 | def test_load_pt_cpu(self): 203 | loaded_tensors = veturboio.load(self.pt_filepath, map_location="cpu", use_cipher=False) 204 | for key in self.tensors_0.keys(): 205 | self.assertTrue(torch.allclose(self.tensors_0[key], loaded_tensors[key])) 206 | 207 | loaded_tensors_enc = veturboio.load(self.pt_filepath_enc, map_location="cpu", use_cipher=True) 208 | for key in self.tensors_0.keys(): 209 | self.assertTrue(torch.allclose(self.tensors_0[key], loaded_tensors_enc[key])) 210 | 211 | @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available") 212 | def test_load_pt_cuda(self): 213 | loaded_tensors = veturboio.load(self.pt_filepath, map_location="cuda:0", use_cipher=False) 214 | for key in self.tensors_0.keys(): 215 | self.assertTrue(torch.allclose(self.cuda_tensors_0[key], loaded_tensors[key])) 216 | 217 | loaded_tensors_enc = veturboio.load(self.pt_filepath_enc, map_location="cuda:0", use_cipher=True) 218 | for key in self.tensors_0.keys(): 219 | self.assertTrue(torch.allclose(self.cuda_tensors_0[key], loaded_tensors_enc[key])) 220 | 221 | def test_load_cipher_header_cpu(self): 222 | self._run_pipeline(self.tensors_0, self.filepath_0_enc_h, "cpu", use_cipher=True) 223 | self._run_pipeline(self.tensors_0, self.pt_filepath_enc_h, "cpu", use_cipher=True) 224 | self._run_pipeline(self.tensors_0, self.filepath_0_enc_h, "cpu", use_cipher=True, enable_fast_mode=False) 225 | self._run_pipeline(self.tensors_0, self.pt_filepath_enc_h, "cpu", use_cipher=True, enable_fast_mode=False) 226 | 227 | @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available") 228 | def test_load_cipher_header_cuda(self): 229 | self._run_pipeline(self.cuda_tensors_0, self.filepath_0_enc_h, "cuda:0", use_cipher=True) 230 | self._run_pipeline(self.cuda_tensors_0, self.pt_filepath_enc_h, "cuda:0", use_cipher=True) 231 | self._run_pipeline( 232 | self.cuda_tensors_0, self.filepath_0_enc_h, "cuda:0", use_cipher=True, enable_fast_mode=False 233 | ) 234 | self._run_pipeline( 235 | self.cuda_tensors_0, self.pt_filepath_enc_h, "cuda:0", use_cipher=True, enable_fast_mode=False 236 | ) 237 | 238 | def test_load_directIO_fall_back(self): 239 | with tempfile.NamedTemporaryFile(dir="/dev/shm") as tmpFile: 240 | veturboio.save_file(self.tensors_0, tmpFile.name) 241 | tmpFile.flush() 242 | loaded_tensors = veturboio.load(tmpFile.name, map_location="cpu", use_direct_io=True) 243 | for key in self.tensors_0.keys(): 244 | self.assertTrue(torch.allclose(self.tensors_0[key], loaded_tensors[key])) 245 | 246 | def test_load_to_shmem(self): 247 | shmem = veturboio.load_to_shmem(self.filepath_0, use_cipher=False) 248 | loaded_tensors = veturboio.load( 249 | os.path.join("/dev/shm/", shmem.name), map_location="cpu", enable_fast_mode=False, use_cipher=False 250 | ) 251 | for key in self.tensors_0.keys(): 252 | self.assertTrue(torch.allclose(self.tensors_0[key], loaded_tensors[key])) 253 | shmem.close() 254 | shmem.unlink() 255 | 256 | shmem = veturboio.load_to_shmem(self.filepath_0_enc, use_cipher=True) 257 | loaded_tensors = veturboio.load( 258 | os.path.join("/dev/shm/", shmem.name), map_location="cpu", enable_fast_mode=False, use_cipher=False 259 | ) 260 | for key in self.tensors_0.keys(): 261 | self.assertTrue(torch.allclose(self.tensors_0[key], loaded_tensors[key])) 262 | shmem.close() 263 | shmem.unlink() 264 | -------------------------------------------------------------------------------- /tests/test_save_op.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copyright (c) 2024 Beijing Volcano Engine Technology Ltd. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | ''' 16 | 17 | import os 18 | import tempfile 19 | import unittest 20 | from copy import deepcopy 21 | from unittest import TestCase 22 | 23 | import torch 24 | from safetensors import safe_open 25 | 26 | import veturboio 27 | 28 | 29 | class TestSave(TestCase): 30 | @classmethod 31 | def setUpClass(cls): 32 | cls.tensors_0 = { 33 | "weight1": torch.randn(2000, 10), 34 | "weight2": torch.IntTensor(2000, 10), 35 | "weight3": torch.BoolTensor(2000, 10), 36 | } 37 | 38 | class MockModel(torch.nn.Module): 39 | def __init__(self) -> None: 40 | super().__init__() 41 | 42 | self.linear1 = torch.nn.Linear(100, 50) 43 | self.linear2 = torch.nn.Linear(100, 50) 44 | 45 | cls.model = MockModel() 46 | 47 | cls.tempdir = tempfile.TemporaryDirectory() 48 | cls.filepath_0 = os.path.join(cls.tempdir.name, "model_0.safetensors") 49 | cls.filepath_1 = os.path.join(cls.tempdir.name, "model_0.pt") 50 | cls.filepath_2 = os.path.join(cls.tempdir.name, "model_0_fast.safetensors") 51 | cls.filepath_3 = os.path.join(cls.tempdir.name, "model_1.safetensors") 52 | 53 | @classmethod 54 | def tearDownClass(cls): 55 | cls.tempdir.cleanup() 56 | 57 | def test_save_file(self): 58 | veturboio.save_file(self.tensors_0, self.filepath_0) 59 | with safe_open(self.filepath_0, framework="pt", device="cpu") as f: 60 | assert len(f.keys()) == 3 61 | for key in f.keys(): 62 | self.assertTrue(torch.allclose(self.tensors_0[key], f.get_tensor(key))) 63 | 64 | # enable fast mode 65 | veturboio.save_file(self.tensors_0, self.filepath_2, enable_fast_mode=True) 66 | with safe_open(self.filepath_2, framework="pt", device="cpu") as f: 67 | assert len(f.keys()) == 3 68 | for key in f.keys(): 69 | self.assertTrue(torch.allclose(self.tensors_0[key], f.get_tensor(key))) 70 | 71 | def test_save_file_for_clone_share_tensors(self): 72 | share_dict = {"key1": self.tensors_0["weight1"], "key2": self.tensors_0["weight1"]} 73 | veturboio.save_file(share_dict, self.filepath_0, force_save_shared_tensor=True, force_clone_shared_tensor=True) 74 | assert len(share_dict) == 2 # assert save_file won't change user's state_dict. 75 | with safe_open(self.filepath_0, framework="pt", device="cpu") as f: 76 | for key in f.keys(): 77 | assert key in share_dict 78 | self.assertTrue(torch.allclose(share_dict[key], f.get_tensor(key))) 79 | 80 | def test_save_model(self): 81 | veturboio.save_model(self.model, self.filepath_3, use_cipher=True) 82 | loaded_tensors = veturboio.load(self.filepath_3, map_location="cpu", use_cipher=True) 83 | state_dict = self.model.state_dict() 84 | for key in state_dict.keys(): 85 | self.assertTrue(torch.allclose(state_dict[key], loaded_tensors[key])) 86 | 87 | def test_save_pt(self): 88 | veturboio.save_pt(self.tensors_0, self.filepath_1) 89 | loaded_tensors = torch.load(self.filepath_1) 90 | for key in self.tensors_0.keys(): 91 | self.assertTrue(torch.allclose(self.tensors_0[key], loaded_tensors[key])) 92 | -------------------------------------------------------------------------------- /tests/test_share_tensor_cpu.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copyright (c) 2024 Beijing Volcano Engine Technology Ltd. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | ''' 16 | 17 | import os 18 | import tempfile 19 | from unittest import TestCase 20 | 21 | import torch 22 | 23 | import veturboio 24 | 25 | 26 | class TestSharedTensorLoad(TestCase): 27 | @classmethod 28 | def setUpClass(cls): 29 | class MockModel(torch.nn.Module): 30 | def __init__(self) -> None: 31 | super().__init__() 32 | 33 | self.linear1 = torch.nn.Linear(10, 20) 34 | self.linear2 = torch.nn.Linear(20, 10) 35 | self.linear3 = self.linear2 36 | 37 | cls.model = MockModel() 38 | 39 | def test_pipeline(self): 40 | with tempfile.TemporaryDirectory() as tmpdirname: 41 | filepath = os.path.join(tmpdirname, "model.safetensors") 42 | veturboio.save_model(self.model, filepath) 43 | loaded_tensors = veturboio.load(filepath, map_location="cpu") 44 | 45 | state_dict = self.model.state_dict() 46 | for key in state_dict.keys(): 47 | self.assertTrue(torch.allclose(state_dict[key], loaded_tensors[key])) 48 | 49 | def test_save_file(self): 50 | with tempfile.TemporaryDirectory() as tmpdirname: 51 | filepath = os.path.join(tmpdirname, "model.safetensors") 52 | veturboio.save_file(self.model.state_dict(), filepath, force_save_shared_tensor=True) 53 | loaded_tensors = veturboio.load(filepath, map_location="cpu") 54 | 55 | state_dict = self.model.state_dict() 56 | for key in state_dict.keys(): 57 | self.assertTrue(torch.allclose(state_dict[key], loaded_tensors[key])) 58 | -------------------------------------------------------------------------------- /veturboio/__init__.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copyright (c) 2024 Beijing Volcano Engine Technology Ltd. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | ''' 16 | 17 | from veturboio.io import load, load_to_shmem, save_file, save_model, save_pt 18 | from veturboio.ops.io_utils import init_io_helper 19 | 20 | __all__ = ["load", "load_to_shmem", "save_file", "save_model", "init_io_helper", "save_pt"] 21 | -------------------------------------------------------------------------------- /veturboio/convert.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copyright (c) 2024 Beijing Volcano Engine Technology Ltd. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | ''' 16 | 17 | import argparse 18 | import gc 19 | import logging 20 | import os 21 | import sys 22 | import traceback 23 | from datetime import datetime 24 | 25 | import torch 26 | from safetensors.torch import _find_shared_tensors, _is_complete 27 | 28 | import veturboio 29 | 30 | 31 | def to_valid_state_dict(state_dict: dict[str, torch.Tensor]) -> dict[str, torch.Tensor]: 32 | invalid_key = [k for k, v in state_dict.items() if not isinstance(v, torch.Tensor)] 33 | if len(invalid_key) > 0: 34 | logger.warning(f"invalid keys to removed: {invalid_key}") 35 | state_dict = {k: v for k, v in state_dict.items() if k not in invalid_key} 36 | 37 | result = {} 38 | shared_tensor_groups = _find_shared_tensors(state_dict) 39 | for group in shared_tensor_groups: 40 | # check if all share tensors have the same data ptr, same shape, and same size 41 | shared_tensors = [state_dict[k] for k in group] 42 | data_ptrs = [t.data_ptr() for t in shared_tensors] 43 | shapes = [t.shape for t in shared_tensors] 44 | if len(set(data_ptrs)) != 1 or len(set(shapes)) != 1: 45 | raise Exception(f"shared tensors {group} are not equal") 46 | # make sure these tensors are complete and identical 47 | converted_tensor = shared_tensors[0] 48 | if not _is_complete(converted_tensor): 49 | converted_tensor = converted_tensor.clone() 50 | for t in group: 51 | result[t] = converted_tensor 52 | for k, v in state_dict.items(): 53 | if k not in result: 54 | result[k] = v 55 | return result 56 | 57 | 58 | def add_handlers(logger: logging.Logger): 59 | """ 60 | Add handlers to logger 61 | """ 62 | handler = logging.StreamHandler(stream=sys.stdout) 63 | formatter = logging.Formatter(fmt="[%(levelname)s %(asctime)s] %(filename)s: %(lineno)d %(message)s") 64 | handler.setFormatter(formatter) 65 | logger.addHandler(handler) 66 | 67 | 68 | def validate_result(input_state_dict: dict[str, torch.Tensor], output_state_dict: dict[str, torch.Tensor]): 69 | input_state_dict = {k: v for k, v in input_state_dict.items() if isinstance(v, torch.Tensor)} 70 | output_state_dict = {k: v for k, v in output_state_dict.items() if isinstance(v, torch.Tensor)} 71 | 72 | input_key_set = set(input_state_dict.keys()) 73 | output_key_set = set(output_state_dict.keys()) 74 | 75 | if input_key_set != output_key_set: 76 | not_in_output_key_set = input_key_set - output_key_set 77 | not_in_input_key_set = output_key_set - input_key_set 78 | raise Exception( 79 | f"key set not equal, not in output key set: {not_in_output_key_set}, not in input key set: {not_in_input_key_set}" 80 | ) 81 | 82 | not_equal_tensor = [] 83 | for key in input_state_dict: 84 | if not torch.allclose(input_state_dict[key], output_state_dict[key]): 85 | not_equal_tensor.append(key) 86 | if len(not_equal_tensor) > 0: 87 | raise Exception(f"result is not valid, not equal tensors: {not_equal_tensor}") 88 | 89 | logger.info(f"all {len(input_key_set)} keys in state dict are equal") 90 | 91 | 92 | def _get_available_cpu() -> int: 93 | avail_cpu = os.cpu_count() 94 | if os.path.isfile('/sys/fs/cgroup/cpu/cpu.cfs_quota_us'): 95 | cpu_quota = int(open('/sys/fs/cgroup/cpu/cpu.cfs_quota_us').read().rstrip()) 96 | if cpu_quota != -1 and os.path.isfile('/sys/fs/cgroup/cpu/cpu.cfs_period_us'): 97 | cpu_period = int(open('/sys/fs/cgroup/cpu/cpu.cfs_period_us').read().rstrip()) 98 | avail_cpu = int(cpu_quota / cpu_period) 99 | logger.info(f"get veturboio thread {avail_cpu} from cgroup info") 100 | return avail_cpu 101 | 102 | 103 | class Pt2SafeTensorConverter: 104 | def __init__( 105 | self, 106 | input_path: str, 107 | output_path: str, 108 | dry_run: bool, 109 | enable_to_valid_state_dict: bool, 110 | overwrite: bool, 111 | use_direct_io: bool, 112 | ): 113 | self.input_path = input_path 114 | self.output_path = output_path 115 | self.dry_run = dry_run 116 | self.enable_to_valid_state_dict = enable_to_valid_state_dict 117 | self.use_direct_io = use_direct_io 118 | if self.input_path.startswith("sfcs://"): 119 | try: 120 | self.input_file_size = veturboio.ops.sfcs_utils.sfcs_get_file_size(self.input_path) 121 | except BaseException as Exp: 122 | raise FileNotFoundError("can't get size of sfcs file", Exp) 123 | else: 124 | if not os.path.exists(self.input_path): 125 | raise Exception(f"file not exist: {self.input_path}") 126 | # convert to abs path 127 | if not os.path.isabs(self.input_path): 128 | self.input_path = os.path.abspath(self.input_path) 129 | self.input_file_size = os.path.getsize(self.input_path) 130 | if not self.input_path.endswith(".pt"): 131 | raise Exception("input file must end with .pt") 132 | 133 | if self.output_path is None: 134 | self.output_path = self.input_path.replace(".pt", ".safetensors") 135 | elif not self.output_path.startswith("sfcs://") and not os.path.isabs(self.output_path): 136 | self.output_path = os.path.abspath(self.output_path) 137 | if not self.output_path.endswith(".safetensors"): 138 | raise Exception("output file must end with .safetensors") 139 | 140 | if overwrite: 141 | if self.output_path.startswith("sfcs://"): 142 | raise Exception("overwrite flag cannot be set when using sfcs") 143 | if os.path.exists(self.output_path): 144 | logger.info(f"overwrite output file {self.output_path}") 145 | if not dry_run: 146 | os.remove(self.output_path) 147 | elif not self.output_path.startswith("sfcs://") and os.path.exists(self.output_path): 148 | raise Exception(f"output file {self.output_path} already exists") 149 | 150 | def convert(self): 151 | logger.info(f"converting {self.input_path} to {self.output_path}") 152 | available_cpus = _get_available_cpu() 153 | ext_name = self.output_path.split(".")[-1] 154 | state_dict = {} 155 | if ext_name != "safetensors": 156 | raise ValueError("output file should be safetensors file") 157 | logger.info(f"start loading the pt file, the pt file has size of {self.input_file_size // 1000 // 1000}MB") 158 | start_time = datetime.now() 159 | if self.dry_run: 160 | logger.info("dry run finished for veturboio.load_pt_file") 161 | else: 162 | state_dict = veturboio.load( 163 | self.input_path, num_thread=available_cpus, use_direct_io=self.use_direct_io, enable_fast_mode=True 164 | ) 165 | end_time = datetime.now() 166 | logger.info(f"finish loading the pt file with duration {end_time - start_time}") 167 | logger.info("start saving the safetensors file") 168 | start_time = datetime.now() 169 | if self.dry_run: 170 | logger.info("dry run finished for veturboio.save_safetensors_file") 171 | else: 172 | if self.enable_to_valid_state_dict: 173 | state_dict = to_valid_state_dict(state_dict) 174 | veturboio.save_file(state_dict, self.output_path, force_save_shared_tensor=True) 175 | end_time = datetime.now() 176 | logger.info(f"finish saving the safetensors file with duration {end_time - start_time}") 177 | 178 | del state_dict 179 | gc.collect() 180 | logger.info(f"gc finished") 181 | 182 | def validate(self): 183 | available_cpus = _get_available_cpu() 184 | logger.info(f"validating if {self.input_path} in equal to {self.output_path}") 185 | input_state_dict = veturboio.load( 186 | self.input_path, num_thread=available_cpus, use_direct_io=self.use_direct_io, enable_fast_mode=True 187 | ) 188 | logger.info(f"{self.input_path} loaded") 189 | 190 | output_state_dict = veturboio.load( 191 | self.output_path, num_thread=available_cpus, use_direct_io=self.use_direct_io, enable_fast_mode=True 192 | ) 193 | logger.info(f"{self.output_path} loaded") 194 | 195 | validate_result(input_state_dict, output_state_dict) 196 | 197 | 198 | if __name__ == "__main__": 199 | logger = logging.getLogger(__name__) 200 | logger.setLevel(logging.INFO) 201 | add_handlers(logger) 202 | 203 | parser = argparse.ArgumentParser(description="converter used to convert .pt model to .safeTensor") 204 | parser.add_argument( 205 | "--input", 206 | "-i", 207 | type=str, 208 | required=True, 209 | help="indicate the path of .pt file, both posix path" "and sfcs prefix are supported", 210 | ) 211 | parser.add_argument( 212 | "--output", 213 | "-o", 214 | type=str, 215 | required=False, 216 | help="indicate the path of .safeTensor file, both " 217 | "posix path and sfcs prefix are supported." 218 | "will be placed into the same dir of the .pt " 219 | "file if left empty", 220 | ) 221 | parser.add_argument("--dry-run", "-d", action="store_true", help="just dry run, not really convert") 222 | parser.add_argument("--overwrite", action="store_true", help="overwrite the output file if it exists") 223 | parser.add_argument( 224 | "--enable-to-valid-state-dict", 225 | action="store_true", 226 | help="execute to_valid_state_dict function before save to .safetensors", 227 | ) 228 | parser.add_argument("--validate-result", action="store_true", help="validate result", default=False) 229 | parser.add_argument("--use-direct-io", action="store_true", help="use direct io to load file", default=False) 230 | args = parser.parse_args() 231 | 232 | instance = Pt2SafeTensorConverter( 233 | args.input, args.output, args.dry_run, args.enable_to_valid_state_dict, args.overwrite, args.use_direct_io 234 | ) 235 | try: 236 | instance.convert() 237 | if args.validate_result: 238 | instance.validate() 239 | except Exception as e: 240 | logger.error(f"convert failed.") 241 | traceback.print_exc() 242 | exit(1) 243 | -------------------------------------------------------------------------------- /veturboio/io.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copyright (c) 2024 Beijing Volcano Engine Technology Ltd. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | ''' 16 | 17 | import os 18 | from multiprocessing import shared_memory 19 | from typing import Dict, Optional 20 | 21 | import torch 22 | from loguru import logger 23 | from safetensors.torch import _remove_duplicate_names 24 | 25 | from veturboio.loader import FasterPosixLoader, PosixLoader, SfcsClientLoader 26 | from veturboio.ops.io_utils import IOHelper 27 | from veturboio.safetensors import SafetensorsFile 28 | from veturboio.saver import PosixSaver, SfcsClientSaver 29 | from veturboio.types import FILE_PATH 30 | 31 | 32 | def is_sfcs_path(file: FILE_PATH): 33 | if len(file) > 7 and file[:7] == "sfcs://": 34 | return True, file[6:] 35 | elif len(file) > 9 and file[:9] == "/dev/shm/": 36 | return False, file 37 | elif os.environ.get("VETURBOIO_USE_SFCS_SDK", "0") == "1": 38 | return True, file 39 | else: 40 | return False, file 41 | 42 | 43 | def load( 44 | file: FILE_PATH, 45 | map_location: Optional[str] = "cpu", 46 | enable_fast_mode: Optional[bool] = True, 47 | num_thread: Optional[int] = 32, 48 | helper: Optional[IOHelper] = None, 49 | use_pinmem: Optional[bool] = False, 50 | use_direct_io: Optional[bool] = False, 51 | use_cipher: Optional[bool] = None, 52 | state_dict: Dict[str, torch.Tensor] = None, 53 | ) -> Dict: 54 | """Load state dict object from checkpoint file. The file can be both safetensors file and pytorch file. 55 | If the file is safetensors file, it will be loaded by veturboio and the loading speed will be accelerated. 56 | 57 | Args: 58 | file (FILE_PATH): file path 59 | map_location (str, optional): map location. Defaults to "cpu". 60 | enable_fast_mode (bool, optional): enable fast mode. Defaults to True. 61 | helper (IOHelper, optional): use IOHelper. Defaults to None. 62 | use_pinmem (bool, optional): use pin memory. Defaults to False. 63 | num_thread (int, optional): number of threads. Defaults to 32. 64 | use_direct_io (bool, optional): open file in direct io mode. Defaults to False. 65 | use_cipher (bool, optional): decrypt file. Defaults to None. Note: cipher is 66 | disabled by force when use_cipher set to False. Otherwise, when use_cipher 67 | set to True or environ VETURBOIO_USE_CIPHER set to '1', cipher is enabled. 68 | state_dict (Dict): pre allocated state dict. Defaults to None. 69 | 70 | Returns: 71 | state_dict (Dict): state dict 72 | 73 | Examples: 74 | ``` 75 | import veturboio 76 | state_dict = veturboio.load("model.safetensors") 77 | ``` 78 | """ 79 | 80 | if IOHelper is None: 81 | enable_fast_mode = False 82 | elif helper is None: 83 | helper = IOHelper() 84 | 85 | use_sfcs_sdk, file = is_sfcs_path(file) 86 | if enable_fast_mode == False: 87 | loader = PosixLoader(file) 88 | elif use_sfcs_sdk: 89 | loader = SfcsClientLoader( 90 | helper=helper, 91 | file=file, 92 | num_thread=num_thread, 93 | use_pinmem=use_pinmem, 94 | use_direct_io=use_direct_io, 95 | ) 96 | else: 97 | loader = FasterPosixLoader( 98 | file, 99 | helper, 100 | num_thread=num_thread, 101 | use_pinmem=use_pinmem, 102 | use_direct_io=use_direct_io, 103 | ) 104 | 105 | safetensors_file = SafetensorsFile(file, loader, use_cipher) 106 | return safetensors_file.load(map_location=map_location, state_dict=state_dict) 107 | 108 | 109 | def load_to_shmem( 110 | file: FILE_PATH, 111 | num_thread: Optional[int] = 32, 112 | helper: Optional[IOHelper] = None, 113 | use_direct_io: Optional[bool] = False, 114 | use_cipher: Optional[bool] = None, 115 | ) -> shared_memory.SharedMemory: 116 | """Load checkpoint file to shmem. 117 | 118 | Args: 119 | file (FILE_PATH): file path 120 | num_thread (int, optional): number of threads. Defaults to 32. 121 | helper (IOHelper, optional): use IOHelper. Defaults to None. 122 | use_cipher (bool, optional): decrypt file. Defaults to None. Note: cipher is 123 | disabled by force when use_cipher set to False. Otherwise, when use_cipher 124 | set to True or environ VETURBOIO_USE_CIPHER set to '1', cipher is enabled. 125 | 126 | Returns: 127 | shmem (shared_memory.SharedMemory): shared memory object. 128 | 129 | Examples: 130 | ``` 131 | import veturboio 132 | shmem_file = veturboio.load_to_shmem("sfcs://model.safetensors") 133 | ``` 134 | """ 135 | 136 | if helper is None: 137 | helper = IOHelper() 138 | 139 | use_sfcs_sdk, file = is_sfcs_path(file) 140 | if use_sfcs_sdk: 141 | loader = SfcsClientLoader( 142 | helper=helper, 143 | file=file, 144 | num_thread=num_thread, 145 | ) 146 | else: 147 | loader = FasterPosixLoader( 148 | file, 149 | helper, 150 | num_thread=num_thread, 151 | use_direct_io=use_direct_io, 152 | ) 153 | 154 | safetensors_file = SafetensorsFile(file, loader, use_cipher) 155 | return safetensors_file.load_to_shmem() 156 | 157 | 158 | def save_file( 159 | state_dict: Dict[str, torch.Tensor], 160 | file: FILE_PATH, 161 | force_contiguous: bool = True, 162 | force_save_shared_tensor: bool = False, 163 | force_clone_shared_tensor: bool = False, 164 | metadata: Dict[str, str] = None, 165 | use_cipher: Optional[bool] = False, 166 | helper: Optional[IOHelper] = None, 167 | enable_fast_mode: Optional[bool] = False, 168 | ) -> None: 169 | """Save state dict object to safetensors file. 170 | 171 | Args: 172 | state_dict (Dict): state dict 173 | file (FILE_PATH): file path 174 | force_contiguous (bool, optional): force contiguous. Defaults to True. 175 | force_save_shared_tensor (bool, optional): force save shared tensor. Defaults to False. 176 | force_clone_shared_tensor (bool, optional): force to clone shared tensor rather than delete 177 | when force_save_shared_tensor is enabled. Defaults to False. 178 | metadata (Dict[str, str], optional): metadata. Defaults to None. 179 | use_cipher (bool, optional): decrypt file. Defaults to False. 180 | helper (IOHelper, optional): use IOHelper. Defaults to None. 181 | enable_fast_mode (bool, optional): enable fast mode. Defaults to False. 182 | 183 | Examples: 184 | ``` 185 | import torch 186 | import veturboio 187 | 188 | state_dict = {"weight": torch.randn(10, 10)} 189 | veturboio.save_file(state_dict, "model.safetensors") 190 | ``` 191 | """ 192 | if helper is None: 193 | helper = IOHelper() 194 | 195 | use_sfcs_sdk, file = is_sfcs_path(file) 196 | if use_sfcs_sdk: 197 | saver = SfcsClientSaver(file=file, use_cipher=use_cipher, helper=helper) 198 | else: 199 | saver = PosixSaver(file=file, use_cipher=use_cipher, helper=helper) 200 | 201 | # TODO: there are some bugs while state_dict is loaded from veturboio 202 | if not force_save_shared_tensor: 203 | if force_clone_shared_tensor: 204 | logger.warning("force_clone_shared_tensor won't take any effect while force_save_shared_tensor is False;") 205 | try: 206 | saver.save_file(state_dict, metadata=metadata, enable_fast_mode=enable_fast_mode) 207 | except ValueError as e: 208 | msg = str(e) 209 | raise ValueError(msg) 210 | else: 211 | return 212 | 213 | to_removes = _remove_duplicate_names(state_dict) 214 | 215 | for kept_name, to_remove_group in to_removes.items(): 216 | for to_remove in to_remove_group: 217 | if metadata is None: 218 | metadata = {} 219 | 220 | if to_remove not in metadata: 221 | # Do not override user data 222 | metadata[to_remove] = kept_name 223 | if force_clone_shared_tensor: 224 | state_dict[to_remove] = state_dict[to_remove].clone() 225 | else: 226 | del state_dict[to_remove] 227 | if force_contiguous: 228 | state_dict = {k: v.contiguous() for k, v in state_dict.items()} 229 | 230 | return saver.save_file(state_dict, metadata=metadata, enable_fast_mode=enable_fast_mode) 231 | 232 | 233 | def save_model(model: torch.nn.Module, file: FILE_PATH, use_cipher: Optional[bool] = False) -> None: 234 | """Save model state dict to safetensors file. 235 | 236 | Args: 237 | model (torch.nn.Module): model 238 | file (FILE_PATH): file path 239 | use_cipher (bool, optional): decrypt file. Defaults to False. 240 | 241 | Examples: 242 | ``` 243 | import torch 244 | import veturboio 245 | 246 | model = torch.nn.Linear(10, 10) 247 | veturboio.save_model(model, "model.safetensors") 248 | ``` 249 | """ 250 | 251 | use_sfcs_sdk, file = is_sfcs_path(file) 252 | if use_sfcs_sdk: 253 | saver = SfcsClientSaver(file=file, use_cipher=use_cipher) 254 | else: 255 | saver = PosixSaver(file=file, use_cipher=use_cipher) 256 | 257 | return saver.save_model(model) 258 | 259 | 260 | def save_pt(state_dict: Dict[str, torch.Tensor], file: FILE_PATH, use_cipher: Optional[bool] = False) -> None: 261 | """Save state dict object to pytorch file. 262 | 263 | Args: 264 | state_dict (Dict): state dict 265 | file (FILE_PATH): file path 266 | use_cipher (bool, optional): encrypt file. Defaults to False. 267 | 268 | Examples: 269 | ``` 270 | import torch 271 | import veturboio 272 | 273 | state_dict = {"weight": torch.randn(10, 10)} 274 | veturboio.save_pt(state_dict, "model.pt") 275 | ``` 276 | """ 277 | use_sfcs_sdk, file = is_sfcs_path(file) 278 | if use_sfcs_sdk: 279 | saver = SfcsClientSaver(file=file, use_cipher=use_cipher) 280 | else: 281 | saver = PosixSaver(file=file, use_cipher=use_cipher) 282 | 283 | return saver.save_pt(state_dict) 284 | -------------------------------------------------------------------------------- /veturboio/loader/__init__.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copyright (c) 2024 Beijing Volcano Engine Technology Ltd. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | ''' 16 | 17 | from veturboio.loader.base_loader import BaseLoader, PosixLoader 18 | from veturboio.loader.faster_posix_loader import FasterPosixLoader 19 | from veturboio.loader.sfcs_client_loader import SfcsClientLoader 20 | 21 | __all__ = ["BaseLoader", "PosixLoader", "FasterPosixLoader", "SfcsClientLoader"] 22 | -------------------------------------------------------------------------------- /veturboio/loader/base_loader.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copyright (c) 2024 Beijing Volcano Engine Technology Ltd. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | ''' 16 | 17 | import io 18 | from typing import Any, Dict 19 | 20 | import numpy as np 21 | import torch 22 | from numpy import ndarray 23 | 24 | from veturboio.ops.cipher import CipherInfo, decrypt 25 | 26 | # from veturboio.safetensors import SafetensorsFile 27 | from veturboio.types import FILE_PATH 28 | 29 | SAFETENSORS_FILE_MAGIC_NUM = 123 30 | BUF_ALIGN_SIZE = 4096 31 | 32 | 33 | class BaseLoader: 34 | def __init__(self, method: str) -> None: 35 | self.method = method 36 | 37 | def load_to_bytes(self, offset: int, count: int, cipher_info: CipherInfo = CipherInfo(False)) -> bytes: 38 | raise NotImplementedError 39 | 40 | def load_safetensors( 41 | self, 42 | safetensors_file: Any, 43 | map_location: str = "cpu", 44 | state_dict: Dict[str, torch.Tensor] = None, 45 | ) -> Dict[str, torch.Tensor]: 46 | raise NotImplementedError 47 | 48 | def init_aligned_tensor(self, device, device_id: int, file_size, base_offset: int) -> torch.Tensor: 49 | if device_id != -1: 50 | try: 51 | total_tensor = torch.empty(file_size - base_offset, dtype=torch.uint8, device=device) 52 | except RuntimeError as e: 53 | msg = str(e) 54 | raise RuntimeError(msg) 55 | 56 | else: 57 | array = np.empty(file_size - base_offset + BUF_ALIGN_SIZE, dtype=np.uint8) 58 | offset1 = array.ctypes.data % BUF_ALIGN_SIZE 59 | offset2 = base_offset % BUF_ALIGN_SIZE 60 | if offset1 > offset2: 61 | align = BUF_ALIGN_SIZE - offset1 + offset2 62 | else: 63 | align = offset2 - offset1 64 | 65 | sub_array = array[align : align + file_size - base_offset].view(dtype=np.uint8) 66 | total_tensor = torch.from_numpy(sub_array) 67 | return total_tensor 68 | 69 | 70 | class PosixLoader(BaseLoader): 71 | def __init__(self, file: FILE_PATH) -> None: 72 | super().__init__(method="posix") 73 | self.file = file 74 | 75 | def load_to_bytes(self, offset: int, count: int, cipher_info: CipherInfo = CipherInfo(False)) -> bytes: 76 | arr = np.fromfile(self.file, dtype=np.uint8, offset=offset, count=count) 77 | if cipher_info.use_cipher: 78 | h_off = CipherInfo.HEADER_SIZE if cipher_info.use_header else 0 79 | decrypt(cipher_info, arr, arr, offset - h_off) 80 | return arr.tobytes() 81 | 82 | def load_safetensors( 83 | self, 84 | safetensors_file: Any, 85 | map_location: str = "cpu", 86 | state_dict: Dict[str, torch.Tensor] = None, 87 | ) -> Dict[str, torch.Tensor]: 88 | if not state_dict: 89 | state_dict = {} 90 | 91 | base_offset = safetensors_file.tensor_offset 92 | device = torch.device(map_location) 93 | 94 | cipher_info = safetensors_file._cipher_info 95 | for tensor_meta in safetensors_file.meta.values(): 96 | tensor_bytes = np.memmap( 97 | safetensors_file.file, 98 | dtype=np.uint8, 99 | mode="c", 100 | offset=base_offset + tensor_meta.data_offsets[0], 101 | shape=tensor_meta.data_offsets[1] - tensor_meta.data_offsets[0], 102 | ) 103 | if cipher_info.use_cipher: 104 | h_off = CipherInfo.HEADER_SIZE if cipher_info.use_header else 0 105 | decrypt(cipher_info, tensor_bytes, tensor_bytes, base_offset + tensor_meta.data_offsets[0] - h_off) 106 | tensor = torch.frombuffer(tensor_bytes, dtype=tensor_meta.dtype) 107 | tensor = tensor.view(tensor_meta.shape) 108 | if device.type == "cuda": 109 | state_dict[tensor_meta.name] = tensor.pin_memory().to(device=device, non_blocking=True) 110 | else: 111 | state_dict[tensor_meta.name] = tensor 112 | 113 | return state_dict 114 | 115 | def load_pt( 116 | self, map_location: str = "cpu", cipher_info: CipherInfo = CipherInfo(False) 117 | ) -> Dict[str, torch.Tensor]: 118 | if cipher_info.use_cipher: 119 | h_off = CipherInfo.HEADER_SIZE if cipher_info.use_header else 0 120 | arr = np.fromfile(self.file, dtype=np.uint8, offset=h_off, count=-1) 121 | decrypt(cipher_info, arr, arr, 0) 122 | return torch.load(io.BytesIO(arr.data), map_location=map_location) 123 | 124 | return torch.load(self.file, map_location=map_location) 125 | -------------------------------------------------------------------------------- /veturboio/loader/faster_posix_loader.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copyright (c) 2024 Beijing Volcano Engine Technology Ltd. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | ''' 16 | 17 | import io 18 | import os 19 | import random 20 | import string 21 | from multiprocessing import shared_memory 22 | from typing import Dict 23 | 24 | import numpy as np 25 | import torch 26 | 27 | from veturboio.ops.cipher import CipherInfo, decrypt 28 | from veturboio.ops.io_utils import IOHelper, load_file_to_tensor 29 | from veturboio.ops.posix_utils import posix_read_file 30 | from veturboio.safetensors import SafetensorsFile 31 | from veturboio.types import FILE_PATH 32 | 33 | from .base_loader import PosixLoader 34 | 35 | 36 | class FasterPosixLoader(PosixLoader): 37 | def __init__( 38 | self, 39 | file: FILE_PATH, 40 | helper: IOHelper, 41 | num_thread: int = 32, 42 | use_pinmem: bool = False, 43 | use_direct_io: bool = False, 44 | ) -> None: 45 | super().__init__(file) 46 | self.helper = helper 47 | self.num_thread = num_thread 48 | self.use_pinmem = use_pinmem 49 | self.use_direct_io = use_direct_io 50 | 51 | def load_safetensors( 52 | self, 53 | safetensors_file: SafetensorsFile, 54 | map_location: str = "cpu", 55 | state_dict: Dict[str, torch.Tensor] = None, 56 | ) -> Dict[str, torch.Tensor]: 57 | file_size = os.path.getsize(safetensors_file.file) 58 | base_offset = safetensors_file.tensor_offset 59 | device = torch.device(map_location) 60 | if device.type == "cuda": 61 | device_id = device.index if device.index is not None else torch.cuda.current_device() 62 | else: 63 | device_id = -1 64 | 65 | if state_dict: 66 | for tensor_meta in safetensors_file._meta.values(): 67 | tensor = state_dict[tensor_meta.name] 68 | if not tensor.is_contiguous(): 69 | raise RuntimeError("allocated tensor not contiguous") 70 | if not tensor.dtype == tensor_meta.dtype: 71 | raise RuntimeError("allocated tensor dtype not match") 72 | 73 | offset = tensor_meta.data_offsets[0] 74 | length = tensor_meta.data_offsets[1] - tensor_meta.data_offsets[0] 75 | tensor_length = torch.numel(tensor) * tensor.element_size() 76 | if tensor_length < length: 77 | raise RuntimeError("allocated tensor size not enough") 78 | 79 | load_file_to_tensor( 80 | file_path=safetensors_file.file, 81 | total_tensor=tensor, 82 | length=length, 83 | offset=base_offset + offset, 84 | helper=self.helper, 85 | device_id=device_id, 86 | num_thread=self.num_thread, 87 | use_pinmem=self.use_pinmem, 88 | use_sfcs_sdk=False, 89 | use_direct_io=self.use_direct_io, 90 | cipher_info=safetensors_file._cipher_info, 91 | ) 92 | tensor = tensor.resize_(tensor_meta.shape) 93 | state_dict[tensor_meta.name] = tensor 94 | return state_dict 95 | else: 96 | total_tensor = self.init_aligned_tensor(device, device_id, file_size, base_offset) 97 | load_file_to_tensor( 98 | file_path=safetensors_file.file, 99 | total_tensor=total_tensor, 100 | offset=base_offset, 101 | helper=self.helper, 102 | device_id=device_id, 103 | num_thread=self.num_thread, 104 | use_pinmem=self.use_pinmem, 105 | use_sfcs_sdk=False, 106 | use_direct_io=self.use_direct_io, 107 | cipher_info=safetensors_file._cipher_info, 108 | ) 109 | 110 | return SafetensorsFile.split_tensor_to_state_dict(total_tensor, safetensors_file) 111 | 112 | def load_to_shmem(self, cipher_info: CipherInfo = CipherInfo(False)) -> shared_memory.SharedMemory: 113 | file_size = os.path.getsize(self.file) 114 | file_name = ''.join(random.sample(string.ascii_lowercase + string.ascii_uppercase, 10)) 115 | shm = shared_memory.SharedMemory(name=file_name, create=True, size=file_size) 116 | 117 | h_off = CipherInfo.HEADER_SIZE if cipher_info.use_header else 0 118 | candidate = np.frombuffer(shm.buf, dtype=np.byte) 119 | posix_read_file( 120 | self.file, 121 | candidate, 122 | length=file_size - h_off, 123 | offset=h_off, 124 | num_thread=self.num_thread, 125 | cipher_info=cipher_info, 126 | use_direct_io=self.use_direct_io, 127 | ) 128 | return shm 129 | 130 | def load_pt( 131 | self, map_location: str = "cpu", cipher_info: CipherInfo = CipherInfo(False) 132 | ) -> Dict[str, torch.Tensor]: 133 | if cipher_info.use_cipher: 134 | h_off = CipherInfo.HEADER_SIZE if cipher_info.use_header else 0 135 | arr = np.fromfile(self.file, dtype=np.uint8, offset=h_off, count=-1) 136 | decrypt(cipher_info, arr, arr, 0) 137 | return torch.load(io.BytesIO(arr.data), map_location=map_location) 138 | 139 | return torch.load(self.file, map_location=map_location) 140 | -------------------------------------------------------------------------------- /veturboio/loader/sfcs_client_loader.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copyright (c) 2024 Beijing Volcano Engine Technology Ltd. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | ''' 16 | 17 | import os 18 | import random 19 | import string 20 | from io import BytesIO 21 | from multiprocessing import shared_memory 22 | from typing import Dict 23 | 24 | import numpy as np 25 | import torch 26 | from numpy import ndarray 27 | 28 | from veturboio.loader.base_loader import BaseLoader 29 | from veturboio.ops.cipher import CipherInfo 30 | from veturboio.ops.io_utils import IOHelper, load_file_to_tensor 31 | from veturboio.ops.sfcs_utils import ( 32 | init_sfcs_conf, 33 | path_mapper, 34 | sfcs_default_config, 35 | sfcs_get_file_size, 36 | sfcs_read_file, 37 | ) 38 | from veturboio.safetensors import SafetensorsFile 39 | from veturboio.types import FILE_PATH 40 | 41 | 42 | class SfcsClientLoader(BaseLoader): 43 | def __init__( 44 | self, 45 | file: FILE_PATH, 46 | helper: IOHelper, 47 | num_thread: int = 32, 48 | use_pinmem: bool = False, 49 | use_direct_io: bool = False, 50 | ) -> None: 51 | super().__init__(method="client") 52 | 53 | self.file = file 54 | self.helper = helper 55 | self.num_thread = num_thread 56 | self.use_pinmem = use_pinmem 57 | self.use_direct_io = use_direct_io 58 | self._mount_path = init_sfcs_conf(file) 59 | self._sfcs_valid_path = path_mapper(self.file, self._mount_path) 60 | 61 | def load_to_bytes(self, offset: int, count: int, cipher_info: CipherInfo = CipherInfo(False)) -> bytes: 62 | file_size = sfcs_get_file_size(self._sfcs_valid_path) 63 | if offset + count > file_size: 64 | count = file_size - offset 65 | 66 | file_bytes = bytes(count) 67 | candidate = np.frombuffer(file_bytes, dtype=np.byte) 68 | sfcs_read_file( 69 | self._sfcs_valid_path, 70 | candidate, 71 | length=count, 72 | offset=offset, 73 | num_thread=self.num_thread, 74 | cipher_info=cipher_info, 75 | ) 76 | return file_bytes 77 | 78 | def load_to_shmem(self, cipher_info: CipherInfo = CipherInfo(False)) -> shared_memory.SharedMemory: 79 | file_size = sfcs_get_file_size(self._sfcs_valid_path) 80 | file_name = ''.join(random.sample(string.ascii_lowercase + string.ascii_uppercase, 10)) 81 | shm = shared_memory.SharedMemory(name=file_name, create=True, size=file_size) 82 | 83 | h_off = CipherInfo.HEADER_SIZE if cipher_info.use_header else 0 84 | candidate = np.frombuffer(shm.buf, dtype=np.byte) 85 | sfcs_read_file( 86 | self._sfcs_valid_path, 87 | candidate, 88 | length=file_size - h_off, 89 | offset=h_off, 90 | num_thread=self.num_thread, 91 | cipher_info=cipher_info, 92 | ) 93 | return shm 94 | 95 | def load_safetensors( 96 | self, 97 | safetensors_file: SafetensorsFile, 98 | map_location: str = "cpu", 99 | state_dict: Dict[str, torch.Tensor] = None, 100 | ) -> Dict[str, torch.Tensor]: 101 | # TODO should be the same as self.loader 102 | sfcs_valid_path = path_mapper(safetensors_file.file, self._mount_path) 103 | file_size = sfcs_get_file_size(sfcs_valid_path) 104 | base_offset = safetensors_file.tensor_offset 105 | device = torch.device(map_location) 106 | if device.type == "cuda": 107 | device_id = device.index if device.index is not None else torch.cuda.current_device() 108 | else: 109 | device_id = -1 110 | 111 | if state_dict: 112 | for tensor_meta in safetensors_file._meta.values(): 113 | tensor = state_dict[tensor_meta.name] 114 | if not tensor.is_contiguous(): 115 | raise RuntimeError("allocated tensor not contiguous") 116 | if not tensor.dtype == tensor_meta.dtype: 117 | raise RuntimeError("allocated tensor dtype not match") 118 | 119 | offset = tensor_meta.data_offsets[0] 120 | length = tensor_meta.data_offsets[1] - tensor_meta.data_offsets[0] 121 | tensor_length = torch.numel(tensor) * tensor.element_size() 122 | if tensor_length < length: 123 | raise RuntimeError("allocated tensor size not enough") 124 | 125 | load_file_to_tensor( 126 | file_path=sfcs_valid_path, 127 | total_tensor=tensor, 128 | length=length, 129 | offset=base_offset + offset, 130 | helper=self.helper, 131 | device_id=device_id, 132 | num_thread=self.num_thread, 133 | use_pinmem=self.use_pinmem, 134 | use_sfcs_sdk=True, 135 | use_direct_io=self.use_direct_io, 136 | cipher_info=safetensors_file._cipher_info, 137 | ) 138 | tensor = tensor.resize_(tensor_meta.shape) 139 | state_dict[tensor_meta.name] = tensor 140 | return state_dict 141 | else: 142 | total_tensor = self.init_aligned_tensor(device, device_id, file_size, base_offset) 143 | load_file_to_tensor( 144 | file_path=sfcs_valid_path, 145 | total_tensor=total_tensor, 146 | offset=base_offset, 147 | helper=self.helper, 148 | device_id=device_id, 149 | num_thread=self.num_thread, 150 | use_pinmem=self.use_pinmem, 151 | use_sfcs_sdk=True, 152 | use_direct_io=self.use_direct_io, 153 | cipher_info=safetensors_file._cipher_info, 154 | ) 155 | 156 | return SafetensorsFile.split_tensor_to_state_dict(total_tensor, safetensors_file) 157 | 158 | def load_pt( 159 | self, map_location: str = "cpu", cipher_info: CipherInfo = CipherInfo(False) 160 | ) -> Dict[str, torch.Tensor]: 161 | file_size = sfcs_get_file_size(self._sfcs_valid_path) 162 | h_off = CipherInfo.HEADER_SIZE if cipher_info.use_header else 0 163 | file_bytes = self.load_to_bytes(offset=h_off, count=file_size - h_off, cipher_info=cipher_info) 164 | return torch.load(BytesIO(file_bytes), map_location=map_location) 165 | -------------------------------------------------------------------------------- /veturboio/ops/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/veTurboIO/0ed05516013da4332433784660660ca4d1904505/veturboio/ops/__init__.py -------------------------------------------------------------------------------- /veturboio/ops/consts.py: -------------------------------------------------------------------------------- 1 | MLP_SECRET_KEY_FILENAME = "MLP_SECRET_KEY" 2 | MLP_ACCESS_KEY_FILENAME = "MLP_ACCESS_KEY" 3 | 4 | SFCS_DEFAULT_CONFIG_PATH_ENV = "SFCS_METAINFO_PATH" 5 | 6 | SFCS_DEFAULT_METAINFO_PATH = "/root/.volc/SFCSConfiguration.json" 7 | 8 | RDMA_NIC_ENV = "MLP_RDMA_NIC_NAMES" 9 | DEFAULT_NIC_NAME = "eth0" 10 | RDMA_SEGMENT_ENV = "MLP_RDMA_NETWORK_SEGMENT" 11 | DEFAULT_CREDENTIAL_PATH_ENV = "CREDENTIAL_PATH" 12 | DEFAULT_CREDENTIAL_PATH = "/mlplatform/.credential/" 13 | -------------------------------------------------------------------------------- /veturboio/ops/csrc/cipher.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024 Beijing Volcano Engine Technology Ltd. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #include 17 | #include 18 | #include "include/cipher.h" 19 | #include 20 | 21 | CipherInfo::CipherInfo(bool use_cipher, pybind11::array_t key_arr, pybind11::array_t iv_arr, 22 | size_t header_size) 23 | : use_cipher(use_cipher), header_size(header_size) 24 | { 25 | if (use_cipher) 26 | { 27 | pybind11::buffer_info key_info = key_arr.request(); 28 | size_t key_size = key_info.size; 29 | if (key_size == 16) 30 | { 31 | mode = "CTR-128"; 32 | } 33 | else if (key_size == 32) 34 | { 35 | mode = "CTR-256"; 36 | } 37 | else 38 | { 39 | throw std::runtime_error("Cipher Exception: key length invalid"); 40 | } 41 | key = reinterpret_cast(key_info.ptr); 42 | 43 | pybind11::buffer_info iv_info = iv_arr.request(); 44 | if ((size_t)iv_info.size != AES_BLOCK_SIZE) 45 | { 46 | throw std::runtime_error("Cipher Exception: iv length invalid"); 47 | } 48 | iv = reinterpret_cast(iv_info.ptr); 49 | } 50 | } 51 | 52 | CtrEncWrap::CtrEncWrap(std::string mode, pybind11::array_t key_arr, 53 | pybind11::array_t iv_arr, size_t global_offset) 54 | { 55 | pybind11::buffer_info key_info = key_arr.request(); 56 | pybind11::buffer_info iv_info = iv_arr.request(); 57 | enc_.reset(new CtrEncrypter(mode, (unsigned char *)key_info.ptr, (unsigned char *)iv_info.ptr, global_offset)); 58 | } 59 | 60 | size_t CtrEncWrap::encrypt_update(pybind11::array_t pt, pybind11::array_t ct) 61 | { 62 | pybind11::buffer_info pt_info = pt.request(); 63 | pybind11::buffer_info ct_info = ct.request(); 64 | unsigned char *pt_ptr = (unsigned char *)pt_info.ptr; 65 | unsigned char *ct_ptr = (unsigned char *)ct_info.ptr; 66 | return enc_->encrypt_update(pt_ptr, pt_info.size, ct_ptr); 67 | } 68 | 69 | CtrDecWrap::CtrDecWrap(std::string mode, pybind11::array_t key_arr, 70 | pybind11::array_t iv_arr, size_t global_offset) 71 | { 72 | pybind11::buffer_info key_info = key_arr.request(); 73 | pybind11::buffer_info iv_info = iv_arr.request(); 74 | dec_.reset(new CtrDecrypter(mode, (unsigned char *)key_info.ptr, (unsigned char *)iv_info.ptr, global_offset)); 75 | } 76 | 77 | size_t CtrDecWrap::decrypt_update(pybind11::array_t ct, pybind11::array_t pt) 78 | { 79 | pybind11::buffer_info pt_info = pt.request(); 80 | pybind11::buffer_info ct_info = ct.request(); 81 | unsigned char *pt_ptr = (unsigned char *)pt_info.ptr; 82 | unsigned char *ct_ptr = (unsigned char *)ct_info.ptr; 83 | return dec_->decrypt_update(ct_ptr, ct_info.size, pt_ptr); 84 | } 85 | -------------------------------------------------------------------------------- /veturboio/ops/csrc/include/cfsaio.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024 Beijing Volcano Engine Technology Ltd. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #ifndef _CLOUDFS_LIBCFS3_CLIENT_CFS_AIO_H_ 17 | #define _CLOUDFS_LIBCFS3_CLIENT_CFS_AIO_H_ 18 | 19 | #include /* for uint64_t, etc. */ 20 | 21 | #ifdef __cplusplus 22 | extern "C" 23 | { 24 | #endif 25 | /** 26 | * Some utility decls used in libcfs. 27 | */ 28 | typedef int32_t tSize; /// size of data for read/write io ops 29 | typedef int64_t tOffset; /// offset within the file 30 | 31 | struct CfsFileSystemInternalWrapper; 32 | typedef struct CfsFileSystemInternalWrapper *cfsFS; 33 | 34 | struct CfsFileInternalWrapper; 35 | typedef struct CfsFileInternalWrapper *cfsFile; 36 | 37 | typedef enum cfsStatus 38 | { 39 | STATUS_OK = 0, 40 | STATUS_MISSING_BLOCK = -1002, 41 | STATUS_TIMEOUT = -1003, 42 | STATUS_INVALID_RANGE = -1004, 43 | STATUS_CONNECTION_CLOSED = -1005, 44 | STATUS_WRITE_FAILED = -1006, 45 | STATUS_IO_BUSY = -1007, 46 | STATUS_INVALID_PARAMETER = -1098, 47 | STATUS_UNSUPPORTED_OP = -1099, 48 | STATUS_UNKNOWN_ERR = -1100, 49 | } cfsStatus; 50 | 51 | typedef void (*cfsWriteCallback)(cfsStatus status, void *args); 52 | 53 | typedef void (*cfsReadCallback)(cfsStatus status, int32_t readLength, char *buffer, void *args); 54 | 55 | typedef struct cfsAsyncContext 56 | { 57 | cfsReadCallback readCallback; 58 | cfsWriteCallback writeCallback; 59 | char *buffer; 60 | void *args; 61 | } cfsAsyncContext; 62 | 63 | /** 64 | * cfsAsyncPRead - Async positional read of data from an open file. 65 | * 66 | * @param fs The configured filesystem handle. 67 | * @param file The file handle. 68 | * @param offset Position from which to read. 69 | * @param length The length of the buffer. 70 | * @param context The callback context passed by user. 71 | * @return Status of Async method. 72 | */ 73 | cfsStatus cfsAsyncPRead(cfsFS fs, cfsFile file, tSize length, tOffset offset, cfsAsyncContext *context); 74 | 75 | /** 76 | * cfsAsyncWrite - Write data to the internal buffer of outputstream, 77 | * 78 | * @param fs The configured filesystem handle. 79 | * @param file The file handle. 80 | * @param buffer The buffer to copy write bytes into. 81 | * @param length The length of the buffer. 82 | * @param context The callback context passed by user. 83 | * @return Status of Async method. 84 | */ 85 | cfsStatus cfsAsyncWrite(cfsFS fs, cfsFile file, const void *buffer, tSize length, cfsAsyncContext *context); 86 | 87 | /** 88 | * cfsAsyncFlush - Wait for data is acked by remote dn. 89 | * 90 | * @param fs The configured filesystem handle. 91 | * @param file The file handle. 92 | * @param context The callback context passed by user. 93 | * @return Status of Async method. 94 | */ 95 | cfsStatus cfsAsyncFlush(cfsFS fs, cfsFile file, cfsAsyncContext *context); 96 | 97 | /** 98 | * cfsAsyncWriteAndFlush - Write data to remote datanode and wait for ack. 99 | * 100 | * @param fs The configured filesystem handle. 101 | * @param file The file handle. 102 | * @param buffer The buffer to copy write bytes into. 103 | * @param length The length of the buffer. 104 | * @param context The callback context passed by user. 105 | * @return Status of Async method. 106 | */ 107 | cfsStatus cfsAsyncWriteAndFlush(cfsFS fs, cfsFile file, const void *buffer, tSize length, cfsAsyncContext *context); 108 | 109 | #ifdef __cplusplus 110 | } 111 | #endif 112 | 113 | #endif /* _CLOUDFS_LIBCFS3_CLIENT_CFS_AIO_H_ */ 114 | -------------------------------------------------------------------------------- /veturboio/ops/csrc/include/cipher.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024 Beijing Volcano Engine Technology Ltd. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #ifndef VETURBOIO_CIPHER_H 17 | #define VETURBOIO_CIPHER_H 18 | 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include "fastcrypto.h" 24 | 25 | class CipherInfo 26 | { 27 | public: 28 | bool use_cipher = false; 29 | std::string mode = "CTR-128"; 30 | size_t header_size = 0; 31 | unsigned char *key = NULL; 32 | unsigned char *iv = NULL; 33 | CipherInfo(bool use_cipher, pybind11::array_t key_arr, pybind11::array_t iv_arr, size_t header_size); 34 | CipherInfo() = default; 35 | }; 36 | 37 | class CtrEncWrap 38 | { 39 | private: 40 | std::unique_ptr enc_; 41 | 42 | public: 43 | CtrEncWrap() = default; 44 | CtrEncWrap(std::string mode, pybind11::array_t key_arr, pybind11::array_t iv_arr, 45 | size_t global_offset); 46 | size_t encrypt_update(pybind11::array_t pt, pybind11::array_t ct); 47 | }; 48 | 49 | class CtrDecWrap 50 | { 51 | private: 52 | std::unique_ptr dec_; 53 | 54 | public: 55 | CtrDecWrap() = default; 56 | CtrDecWrap(std::string mode, pybind11::array_t key_arr, pybind11::array_t iv_arr, 57 | size_t global_offset); 58 | size_t decrypt_update(pybind11::array_t ct, pybind11::array_t pt); 59 | }; 60 | 61 | #endif 62 | -------------------------------------------------------------------------------- /veturboio/ops/csrc/include/common.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024 Beijing Volcano Engine Technology Ltd. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #ifndef COMMON_H 17 | #define COMMON_H 18 | 19 | #include 20 | #include 21 | #if defined(USE_CUDA) 22 | #include 23 | #endif 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include "cfs.h" 31 | #include "logging.h" 32 | #include "sfcs.h" 33 | 34 | #define THREAD_NICE_ADJ -10 35 | #define BUF_ALIGN_SIZE (size_t)4096 36 | 37 | using namespace std; 38 | 39 | #endif 40 | -------------------------------------------------------------------------------- /veturboio/ops/csrc/include/fastcrypto.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024 Beijing Volcano Engine Technology Ltd. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #ifndef VETURBOIO_FASTCRYPTO_H 17 | #define VETURBOIO_FASTCRYPTO_H 18 | 19 | #include 20 | #include 21 | 22 | #define EVP_UPDATE_MAX 0x7ffffff0 23 | #define AES_BLOCK_SIZE 16 24 | #define AES_BUF_MAX_SIZE 32 25 | #define MAX_CTR_KEY_SIZE 32 26 | #define FASTCRYPTO_MAGIC_SIZE 16 27 | 28 | inline void counter_inc_by(unsigned char *counter, size_t n, size_t c) 29 | { 30 | do 31 | { 32 | --n; 33 | c += counter[n]; 34 | counter[n] = static_cast(c); 35 | c >>= 8; 36 | } while (n); 37 | } 38 | 39 | typedef struct evp_cipher_ctx_st EVP_CIPHER_CTX; 40 | typedef struct evp_cipher_st EVP_CIPHER; 41 | typedef struct evp_mac_ctx_st EVP_MAC_CTX; 42 | typedef struct evp_mac_st EVP_MAC; 43 | 44 | class CtrEncrypter 45 | { 46 | private: 47 | EVP_CIPHER_CTX *ctx = NULL; 48 | EVP_CIPHER *cipher = NULL; 49 | 50 | public: 51 | CtrEncrypter() = default; 52 | CtrEncrypter(std::string algo, const unsigned char *key, const unsigned char *iv, size_t global_offset); 53 | ~CtrEncrypter(); 54 | int encrypt_update(unsigned char *pt, size_t pt_size, unsigned char *ct); 55 | }; 56 | 57 | class CtrDecrypter 58 | { 59 | private: 60 | EVP_CIPHER_CTX *ctx = NULL; 61 | EVP_CIPHER *cipher = NULL; 62 | 63 | public: 64 | CtrDecrypter() = default; 65 | CtrDecrypter(std::string algo, const unsigned char *key, const unsigned char *iv, size_t global_offset); 66 | ~CtrDecrypter(); 67 | int decrypt_update(unsigned char *ct, size_t ct_size, unsigned char *pt); 68 | }; 69 | 70 | // Both encrypt and decrypt require length of ct and pt multiple of 16 71 | int ctr_encrypt_gpu(std::string algo, const unsigned char *key, const unsigned char *iv, unsigned char *pt, 72 | size_t pt_size, unsigned char *ct); 73 | 74 | int ctr_decrypt_gpu(std::string algo, const unsigned char *key, const unsigned char *iv, unsigned char *ct, 75 | size_t ct_size, unsigned char *pt); 76 | #endif -------------------------------------------------------------------------------- /veturboio/ops/csrc/include/io_helper.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024 Beijing Volcano Engine Technology Ltd. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #ifndef IO_HELPER_H 17 | #define IO_HELPER_H 18 | 19 | #include "posix.h" 20 | #include "sfcs.h" 21 | 22 | class IOHelper 23 | { 24 | private: 25 | char *pin_mem = NULL; 26 | bool use_pinmem_ = false; 27 | size_t buffer_size_ = 0; 28 | 29 | public: 30 | ~IOHelper(); 31 | void load_file_to_tensor(std::string file_path, torch::Tensor res_tensor, size_t length, int64_t offset, 32 | int64_t device_id, int64_t num_thread, bool use_pinmem, bool use_sfcs_sdk, 33 | bool use_direct_io, bool use_cipher, pybind11::array_t key_arr, 34 | pybind11::array_t iv_arr, int64_t header_size); 35 | void save_tensor_to_file(torch::Tensor tensor, std::string file_path, size_t length, bool use_pinmem, 36 | bool use_sfcs_sdk, bool use_cipher, pybind11::array_t key_arr, 37 | pybind11::array_t iv_arr, int64_t header_size); 38 | void save_tensor_to_file_cpu(torch::Tensor tensor, std::string file_path, size_t length, bool use_pinmem, 39 | bool use_sfcs_sdk, bool use_cipher, pybind11::array_t key_arr, 40 | pybind11::array_t iv_arr, int64_t header_size); 41 | void init_buffer(string file_path, int64_t file_size, bool use_pinmem, bool use_sfcs_sdk); 42 | void free_buffer(); 43 | }; 44 | 45 | size_t get_file_size(const char *file_name, bool use_sfcs_sdk); 46 | 47 | void read_file(string file_path, char *addr, int device_id, char *dev_mem, int num_thread, size_t total_size, 48 | size_t global_offset, bool use_sfcs_sdk, bool use_direct_io, CipherInfo cipher_info); 49 | 50 | void load_file_to_tensor_cpu(std::string file_path, torch::Tensor res_tensor, size_t length, int64_t offset, 51 | int64_t device_id, int64_t num_thread, bool use_pinmem, bool use_sfcs_sdk, 52 | bool use_direct_io, bool use_cipher, pybind11::array_t key_arr, 53 | pybind11::array_t iv_arr, int64_t header_size); 54 | 55 | #endif 56 | -------------------------------------------------------------------------------- /veturboio/ops/csrc/include/logging.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024 Beijing Volcano Engine Technology Ltd. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #ifndef LOGGER_H 17 | #define LOGGER_H 18 | 19 | #include 20 | using namespace std; 21 | 22 | #define PR std::cout 23 | #define ENDL std::endl 24 | #define FILE_INFO "[" << __FUNCTION__ << " at " << __FILE__ << ":" << __LINE__ << "] " 25 | 26 | #define ARG_COUNT_PRIVATE(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, N, ...) N 27 | #define ARG_COUNT(...) ARG_COUNT_PRIVATE(0, __VA_ARGS__, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) 28 | 29 | #define FUN_COUNT_GLUE(M, count) M##count 30 | #define FUN_JOIN_COUNT(M, count) FUN_COUNT_GLUE(M, count) 31 | #define FUN_JOIN_ARGS(x, y) x y 32 | #define CallSomeOne(fn, ...) FUN_JOIN_ARGS(FUN_JOIN_COUNT(fn, ARG_COUNT(__VA_ARGS__)), (__VA_ARGS__)) 33 | 34 | #define param1(a) a 35 | #define param2(a, b) a << ", " #b ":" << b 36 | #define param3(a, b, c) a << ", " #b ":" << b << ", " #c ":" << c 37 | #define param4(a, b, c, d) a << ", " #b ":" << b << ", " #c ":" << c << ", " #d ":" << d 38 | #define param5(a, b, c, d, e) a << ", " #b ":" << b << ", " #c ":" << c << ", " #d ":" << d << ", " #e ":" << e 39 | 40 | #define pr1(...) param1(__VA_ARGS__) 41 | #define pr2(...) param2(__VA_ARGS__) 42 | #define pr3(...) param3(__VA_ARGS__) 43 | #define pr4(...) param4(__VA_ARGS__) 44 | #define pr5(...) param5(__VA_ARGS__) 45 | 46 | #define logDebug(...) PR << "VETURBOIO_CPP_DEBUG " << FILE_INFO << CallSomeOne(pr, __VA_ARGS__) << ENDL 47 | #define logInfo(...) PR << "VETURBOIO_CPP_INFO " << FILE_INFO << CallSomeOne(pr, __VA_ARGS__) << ENDL 48 | #define logWarn(...) PR << "VETURBOIO_CPP_WARN " << FILE_INFO << CallSomeOne(pr, __VA_ARGS__) << ENDL 49 | #define logError(...) PR << "VETURBOIO_CPP_ERROR " << FILE_INFO << CallSomeOne(pr, __VA_ARGS__) << ENDL 50 | #endif // LOGGER_H -------------------------------------------------------------------------------- /veturboio/ops/csrc/include/posix.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024 Beijing Volcano Engine Technology Ltd. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #ifndef LOAD_UTILS_H 17 | #define LOAD_UTILS_H 18 | 19 | #include "common.h" 20 | #include "cipher.h" 21 | 22 | class POSIXFile 23 | { 24 | public: 25 | std::string file_path; 26 | // cipher related 27 | CipherInfo cipher_info; 28 | 29 | POSIXFile(std::string file_path); 30 | POSIXFile(std::string file_path, CipherInfo cipher_info); 31 | POSIXFile(std::string file_path, bool use_cipher, pybind11::array_t key_arr, pybind11::array_t iv_arr, 32 | size_t header_size); 33 | 34 | size_t read_file_to_address_parallel(char *addr, int device_id, char *dev_mem, int num_thread, size_t total_size, 35 | size_t global_offset, bool use_direct_io); 36 | size_t read_file_to_array(pybind11::array_t arr, size_t length, size_t offset, int num_thread, 37 | bool use_direct_io); 38 | size_t write_file_from_addr(char *addr, size_t length, bool append); 39 | 40 | private: 41 | void read_file_to_address_thread(int thread_id, char *addr, int device_id, char *dev_mem, size_t block_size, 42 | size_t total_size, size_t global_offset, bool use_direct_io, 43 | CipherInfo cipher_info); 44 | }; 45 | 46 | #endif -------------------------------------------------------------------------------- /veturboio/ops/csrc/include/sfcs.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024 Beijing Volcano Engine Technology Ltd. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #ifndef SFCS_H 18 | #define SFCS_H 19 | 20 | #include 21 | #include 22 | #include "common.h" 23 | #include "cfs.h" 24 | #include "logging.h" 25 | #include "cipher.h" 26 | 27 | #define SFCS_NAME_NODE "default" 28 | #define SFCS_USER_NAME "demo-user" 29 | 30 | using namespace std; 31 | 32 | class SFCSFs 33 | { 34 | public: 35 | cfsFS fs; 36 | 37 | SFCSFs(); 38 | ~SFCSFs(); 39 | void concat_files(std::string file_name, vector file_paths); 40 | void rename_file(const char *file_path, const char *file_name); 41 | void mkdir(std::string file_path); 42 | int64_t get_block_size(); 43 | size_t read_file_to_addr(std::string file_name, CipherInfo cipher_info, char *addr, size_t length, size_t offset); 44 | size_t write_file_from_addr(std::string file_name, CipherInfo cipher_info, char *addr, size_t length, 45 | size_t offset); 46 | void read_multi_files(pybind11::list file_paths, pybind11::list tensors, pybind11::list lengths, 47 | pybind11::list offsets, int num_thread, bool use_cipher, pybind11::array_t key_arr, 48 | pybind11::array_t iv_arr, size_t header_size); 49 | void write_multi_files(pybind11::list file_paths, pybind11::list tensors, pybind11::list lengths, 50 | pybind11::list offsets, int num_thread, bool use_cipher, pybind11::array_t key_arr, 51 | pybind11::array_t iv_arr, size_t header_size); 52 | void get_file_size(std::string file_name, size_t *size); 53 | void get_multi_file_size(pybind11::list file_paths, pybind11::list sizes, int num_thread); 54 | }; 55 | 56 | class SFCSFile 57 | { 58 | public: 59 | cfsFS fs; 60 | bool fs_owner; 61 | SFCSFs *sfcs_fs; 62 | std::string file_path; 63 | // cipher related 64 | CipherInfo cipher_info; 65 | 66 | SFCSFile(std::string file_path); 67 | SFCSFile(std::string path, SFCSFs *sfcs_fs); 68 | SFCSFile(std::string file_path, bool use_cipher, pybind11::array_t key_arr, pybind11::array_t iv_arr, 69 | size_t header_size); 70 | SFCSFile(std::string file_path, CipherInfo cipher_info); 71 | SFCSFile(std::string file_path, SFCSFs *sfcs_fs, CipherInfo cipher_info); 72 | ~SFCSFile(); 73 | size_t get_file_size(); 74 | size_t read_file_to_address_parallel(char *addr, int device_id, char *dev_mem, int num_thread, size_t total_size, 75 | size_t global_offset); 76 | size_t read_file_to_addr(char *addr, size_t length, size_t offset); 77 | size_t read_file_to_array(pybind11::array_t arr, size_t length, size_t offset, int num_thread); 78 | size_t write_file_from_array(pybind11::array_t arr, size_t length, bool append); 79 | size_t write_file_from_tensors(pybind11::list tensors, pybind11::list sizes, pybind11::list offsets, 80 | std::string concat_dir, std::string concat_file); 81 | size_t write_file_from_addr(char *addr, size_t length, size_t offset, bool append); 82 | void delete_file(); 83 | 84 | private: 85 | void read_file_to_address_thread(int thread_id, char *addr, int device_id, char *dev_mem, size_t block_size, 86 | size_t total_size, size_t global_offset); 87 | void write_file_from_tensor(torch::Tensor tensor, size_t length, size_t offset, std::string file_name); 88 | }; 89 | 90 | #endif -------------------------------------------------------------------------------- /veturboio/ops/csrc/io_helper.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024 Beijing Volcano Engine Technology Ltd. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #include "include/io_helper.h" 17 | #include "include/cipher.h" 18 | #include "include/fastcrypto.h" 19 | 20 | IOHelper::~IOHelper() 21 | { 22 | free_buffer(); 23 | } 24 | 25 | // init buffer with given positive size or the size of the file in specified 26 | // path 27 | void IOHelper::init_buffer(string file_path, int64_t buffer_size, bool use_pinmem, bool use_sfcs_sdk) 28 | { 29 | if (buffer_size <= 0) 30 | { 31 | buffer_size = get_file_size(file_path.c_str(), use_sfcs_sdk); 32 | } 33 | 34 | if (buffer_size_ > 0) 35 | { 36 | free_buffer(); 37 | } 38 | 39 | buffer_size_ = buffer_size; 40 | if (use_pinmem) 41 | { 42 | use_pinmem_ = true; 43 | cudaMallocHost(&pin_mem, buffer_size, cudaHostAllocMapped); 44 | } 45 | else 46 | { 47 | pin_mem = (char *)mmap(NULL, buffer_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0); 48 | madvise(pin_mem, buffer_size, MADV_HUGEPAGE); 49 | } 50 | } 51 | 52 | void IOHelper::free_buffer() 53 | { 54 | if (pin_mem != NULL) 55 | { 56 | if (use_pinmem_) 57 | cudaFreeHost(pin_mem); 58 | else 59 | munmap(pin_mem, buffer_size_); 60 | } 61 | } 62 | 63 | void read_unaligned_part_gpu(std::string file_path, torch::Tensor res_tensor, int64_t *offset, int64_t device_id, 64 | size_t *total_size, bool use_sfcs_sdk, bool use_direct_io, size_t *read_unaligned_size, 65 | CipherInfo cipher_info) 66 | { 67 | // cpu align only read head part, while gpu align read both head and tail part 68 | if (device_id < 0) 69 | { 70 | throw std::runtime_error("read_unaligned_part_gpu only support gpu device"); 71 | } 72 | size_t end_offset = *offset + *total_size; 73 | // both head and tail are aligned 74 | if ((*offset & (BUF_ALIGN_SIZE - 1)) == 0 && ((end_offset) & (BUF_ALIGN_SIZE - 1)) == 0) 75 | { 76 | return; 77 | } 78 | char tmp_buf_head[BUF_ALIGN_SIZE] = {}; 79 | char tmp_buf_tail[BUF_ALIGN_SIZE] = {}; 80 | // read head unaligned 81 | cudaSetDevice(device_id); 82 | if ((*offset & (BUF_ALIGN_SIZE - 1)) != 0) 83 | { 84 | size_t read_head_size = min(BUF_ALIGN_SIZE - (*offset & (BUF_ALIGN_SIZE - 1)), *total_size); 85 | read_file(file_path, tmp_buf_head, device_id, (char *)res_tensor.data_ptr(), 1, read_head_size, *offset, 86 | use_sfcs_sdk, use_direct_io, cipher_info); 87 | *read_unaligned_size = read_head_size; 88 | *offset += read_head_size; 89 | *total_size -= read_head_size; 90 | } 91 | // read tail unaligned 92 | if (*total_size > 0 && (end_offset & (BUF_ALIGN_SIZE - 1)) != 0) 93 | { 94 | size_t tail_offset = end_offset - (end_offset & (BUF_ALIGN_SIZE - 1)); 95 | size_t tensor_offset = tail_offset - *offset + *read_unaligned_size; 96 | read_file(file_path, tmp_buf_tail, device_id, (char *)res_tensor.data_ptr() + tensor_offset, 1, 97 | end_offset - tail_offset, tail_offset, use_sfcs_sdk, use_direct_io, cipher_info); 98 | *total_size -= end_offset - tail_offset; 99 | } 100 | cudaDeviceSynchronize(); 101 | } 102 | 103 | void IOHelper::load_file_to_tensor(std::string file_path, torch::Tensor res_tensor, size_t length, int64_t offset, 104 | int64_t device_id, int64_t num_thread, bool use_pinmem, bool use_sfcs_sdk, 105 | bool use_direct_io, bool use_cipher, pybind11::array_t key_arr, 106 | pybind11::array_t iv_arr, int64_t header_size) 107 | { 108 | size_t file_size = get_file_size(file_path.c_str(), use_sfcs_sdk); 109 | size_t read_unaligned_size = 0; 110 | size_t total_size = length > 0 ? length : file_size - offset; 111 | // set cipher 112 | CipherInfo cipher_info(use_cipher, key_arr, iv_arr, header_size); 113 | if (device_id < 0) 114 | { 115 | read_file(file_path, (char *)res_tensor.data_ptr() + read_unaligned_size, device_id, NULL, num_thread, 116 | total_size, offset, use_sfcs_sdk, use_direct_io, cipher_info); 117 | } 118 | else 119 | { 120 | // read unaligned part first, since GPU can only decrypt data in integral multiple of 16 Bytes 121 | read_unaligned_part_gpu(file_path, res_tensor, &offset, device_id, &total_size, use_sfcs_sdk, use_direct_io, 122 | &read_unaligned_size, cipher_info); 123 | 124 | // change use_pinmem attribute may introduce ambiguity 125 | if (buffer_size_ > 0 && use_pinmem != use_pinmem_) 126 | { 127 | throw std::runtime_error("use_pinmem attribute of an exising IOHelper should not be changed"); 128 | } 129 | 130 | // TODO: HPA might be slow 131 | // only use pin_mem as buffer for copying data to device memory 132 | // the lifecycle of the pin_mem is the same as helper 133 | if (pin_mem == NULL || total_size > buffer_size_) 134 | { 135 | init_buffer(file_path, total_size, use_pinmem, use_sfcs_sdk); 136 | } 137 | cudaSetDevice(device_id); 138 | read_file(file_path, pin_mem, device_id, (char *)res_tensor.data_ptr() + read_unaligned_size, num_thread, 139 | total_size, offset, use_sfcs_sdk, use_direct_io, CipherInfo()); 140 | cudaDeviceSynchronize(); 141 | // decrypt with gpu 142 | if (cipher_info.use_cipher && total_size > 0) 143 | { 144 | if (offset % AES_BLOCK_SIZE != 0 || total_size % AES_BLOCK_SIZE != 0) 145 | { 146 | throw std::runtime_error("cannot decrypt because gpu read is not aligned"); 147 | } 148 | unsigned char iv[AES_BLOCK_SIZE]; 149 | for (size_t i = 0; i < AES_BLOCK_SIZE; i++) 150 | { 151 | iv[i] = cipher_info.iv[i]; 152 | } 153 | counter_inc_by(iv, AES_BLOCK_SIZE, (offset - cipher_info.header_size) / AES_BLOCK_SIZE); 154 | unsigned char *iv_gpu = NULL; 155 | cudaMalloc((void **)&iv_gpu, AES_BLOCK_SIZE); 156 | if (iv_gpu == NULL) 157 | { 158 | throw std::runtime_error("iv_gpu cannot be allocated"); 159 | } 160 | cudaMemcpy(iv_gpu, iv, AES_BLOCK_SIZE, cudaMemcpyHostToDevice); 161 | unsigned char *ct = reinterpret_cast(res_tensor.data_ptr()) + read_unaligned_size; 162 | int cipher_ret = ctr_decrypt_gpu(cipher_info.mode, cipher_info.key, iv_gpu, ct, total_size, ct); 163 | if (!cipher_ret) 164 | { 165 | throw std::runtime_error("Cipher Exception: gpu decrypt fail"); 166 | } 167 | cudaDeviceSynchronize(); 168 | cudaFree(iv_gpu); 169 | } 170 | } 171 | } 172 | 173 | void IOHelper::save_tensor_to_file(torch::Tensor tensor, std::string file_path, size_t length, bool use_pinmem, 174 | bool use_sfcs_sdk, bool use_cipher, pybind11::array_t key_arr, 175 | pybind11::array_t iv_arr, int64_t header_size) 176 | { 177 | char *buf; 178 | 179 | CipherInfo cipher_info(use_cipher, key_arr, iv_arr, header_size); 180 | if (tensor.device().is_cuda() || use_cipher) 181 | { 182 | // change use_pinmem attribute may introduce ambiguity 183 | if (buffer_size_ > 0 && use_pinmem != use_pinmem_) 184 | { 185 | throw std::runtime_error("use_pinmem attribute of an exising IOHelper should not be changed"); 186 | } 187 | 188 | if (pin_mem == NULL || length > buffer_size_) 189 | { 190 | init_buffer(file_path, length, use_pinmem, use_sfcs_sdk); 191 | } 192 | 193 | buf = pin_mem; 194 | if (tensor.device().is_cuda()) 195 | { 196 | cudaSetDevice(tensor.device().index()); 197 | cudaMemcpyAsync(buf, (char *)tensor.data_ptr(), length, cudaMemcpyDeviceToHost); 198 | cudaDeviceSynchronize(); 199 | } 200 | else 201 | { 202 | memcpy(buf, (char *)tensor.data_ptr(), length); 203 | } 204 | } 205 | else 206 | { 207 | buf = (char *)tensor.data_ptr(); 208 | } 209 | 210 | if (use_sfcs_sdk) 211 | { 212 | SFCSFile sfcs_file(file_path, cipher_info); 213 | sfcs_file.write_file_from_addr(buf, length, 0, true); 214 | } 215 | else 216 | { 217 | POSIXFile posix_file(file_path, cipher_info); 218 | posix_file.write_file_from_addr(buf, length, true); 219 | } 220 | } 221 | -------------------------------------------------------------------------------- /veturboio/ops/csrc/io_helper_cpu.cpp: -------------------------------------------------------------------------------- 1 | #include "include/io_helper.h" 2 | #include "include/cipher.h" 3 | 4 | IOHelper::~IOHelper() 5 | { 6 | } 7 | 8 | // init buffer with given positive size or the size of the file in specified 9 | // path 10 | void IOHelper::init_buffer(string file_path, int64_t buffer_size, bool use_pinmem, bool use_sfcs_sdk) 11 | { 12 | } 13 | 14 | void IOHelper::free_buffer() 15 | { 16 | } 17 | 18 | void IOHelper::load_file_to_tensor(std::string file_path, torch::Tensor res_tensor, size_t length, int64_t offset, 19 | int64_t device_id, int64_t num_thread, bool use_pinmem, bool use_sfcs_sdk, 20 | bool use_direct_io, bool use_cipher, pybind11::array_t key_arr, 21 | pybind11::array_t iv_arr, int64_t header_size) 22 | { 23 | load_file_to_tensor_cpu(file_path, res_tensor, length, offset, device_id, num_thread, use_pinmem, use_sfcs_sdk, 24 | use_direct_io, use_cipher, key_arr, iv_arr, header_size); 25 | } 26 | 27 | void IOHelper::save_tensor_to_file(torch::Tensor tensor, std::string file_path, size_t length, bool use_pinmem, 28 | bool use_sfcs_sdk, bool use_cipher, pybind11::array_t key_arr, 29 | pybind11::array_t iv_arr, int64_t header_size) 30 | { 31 | save_tensor_to_file_cpu(tensor, file_path, length, use_pinmem, use_sfcs_sdk, use_cipher, key_arr, iv_arr, 32 | header_size); 33 | } 34 | -------------------------------------------------------------------------------- /veturboio/ops/csrc/io_helper_cpu_common.cpp: -------------------------------------------------------------------------------- 1 | #include "include/io_helper.h" 2 | #include "include/cipher.h" 3 | 4 | size_t get_file_size(const char *file_name, bool use_sfcs_sdk) 5 | { 6 | if (use_sfcs_sdk) 7 | { 8 | SFCSFile sfcs_file(file_name); 9 | return sfcs_file.get_file_size(); 10 | } 11 | else 12 | { 13 | struct stat st; 14 | stat(file_name, &st); 15 | return st.st_size; 16 | } 17 | } 18 | 19 | void read_file(string file_path, char *addr, int device_id, char *dev_mem, int num_thread, size_t total_size, 20 | size_t global_offset, bool use_sfcs_sdk, bool use_direct_io, CipherInfo cipher_info) 21 | { 22 | if (total_size == 0) 23 | { 24 | return; 25 | } 26 | 27 | if (use_sfcs_sdk) 28 | { 29 | SFCSFile sfcs_file(file_path, cipher_info); 30 | sfcs_file.read_file_to_address_parallel(addr, device_id, dev_mem, num_thread, total_size, global_offset); 31 | } 32 | else 33 | { 34 | POSIXFile posix_file(file_path, cipher_info); 35 | posix_file.read_file_to_address_parallel(addr, device_id, dev_mem, num_thread, total_size, global_offset, 36 | use_direct_io); 37 | } 38 | } 39 | 40 | void load_file_to_tensor_cpu(std::string file_path, torch::Tensor res_tensor, size_t length, int64_t offset, 41 | int64_t device_id, int64_t num_thread, bool use_pinmem, bool use_sfcs_sdk, 42 | bool use_direct_io, bool use_cipher, pybind11::array_t key_arr, 43 | pybind11::array_t iv_arr, int64_t header_size) 44 | { 45 | size_t file_size = get_file_size(file_path.c_str(), use_sfcs_sdk); 46 | size_t read_unaligned_size = 0; 47 | size_t total_size = length > 0 ? length : file_size - offset; 48 | // set cipher 49 | CipherInfo cipher_info(use_cipher, key_arr, iv_arr, header_size); 50 | if (device_id < 0) 51 | { 52 | read_file(file_path, (char *)res_tensor.data_ptr() + read_unaligned_size, device_id, NULL, num_thread, 53 | total_size, offset, use_sfcs_sdk, use_direct_io, cipher_info); 54 | } 55 | } 56 | 57 | void IOHelper::save_tensor_to_file_cpu(torch::Tensor tensor, std::string file_path, size_t length, bool use_pinmem, 58 | bool use_sfcs_sdk, bool use_cipher, pybind11::array_t key_arr, 59 | pybind11::array_t iv_arr, int64_t header_size) 60 | { 61 | char *buf; 62 | 63 | CipherInfo cipher_info(use_cipher, key_arr, iv_arr, header_size); 64 | if (use_cipher) 65 | { 66 | // change use_pinmem attribute may introduce ambiguity 67 | if (buffer_size_ > 0 && use_pinmem != use_pinmem_) 68 | { 69 | throw std::runtime_error("use_pinmem attribute of an exising IOHelper should not be changed"); 70 | } 71 | 72 | if (pin_mem == NULL || length > buffer_size_) 73 | { 74 | init_buffer(file_path, length, use_pinmem, use_sfcs_sdk); 75 | } 76 | 77 | buf = pin_mem; 78 | memcpy(buf, (char *)tensor.data_ptr(), length); 79 | } 80 | else 81 | { 82 | buf = (char *)tensor.data_ptr(); 83 | } 84 | 85 | if (use_sfcs_sdk) 86 | { 87 | SFCSFile sfcs_file(file_path, cipher_info); 88 | sfcs_file.write_file_from_addr(buf, length, 0, true); 89 | } 90 | else 91 | { 92 | POSIXFile posix_file(file_path, cipher_info); 93 | posix_file.write_file_from_addr(buf, length, true); 94 | } 95 | } 96 | -------------------------------------------------------------------------------- /veturboio/ops/csrc/io_helper_npu.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024 Beijing Volcano Engine Technology Ltd. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #include "include/io_helper.h" 17 | #include "include/cipher.h" 18 | 19 | IOHelper::~IOHelper() 20 | { 21 | } 22 | 23 | // init buffer with given positive size or the size of the file in specified 24 | // path 25 | void IOHelper::init_buffer(string file_path, int64_t buffer_size, bool use_pinmem, bool use_sfcs_sdk) 26 | { 27 | } 28 | 29 | void IOHelper::free_buffer() 30 | { 31 | } 32 | 33 | void IOHelper::load_file_to_tensor(std::string file_path, torch::Tensor res_tensor, size_t length, int64_t offset, 34 | int64_t device_id, int64_t num_thread, bool use_pinmem, bool use_sfcs_sdk, 35 | bool use_direct_io, bool use_cipher, pybind11::array_t key_arr, 36 | pybind11::array_t iv_arr, int64_t header_size) 37 | { 38 | load_file_to_tensor_cpu(file_path, res_tensor, length, offset, device_id, num_thread, use_pinmem, use_sfcs_sdk, 39 | use_direct_io, use_cipher, key_arr, iv_arr, header_size); 40 | } 41 | 42 | void IOHelper::save_tensor_to_file(torch::Tensor tensor, std::string file_path, size_t length, bool use_pinmem, 43 | bool use_sfcs_sdk, bool use_cipher, pybind11::array_t key_arr, 44 | pybind11::array_t iv_arr, int64_t header_size) 45 | { 46 | save_tensor_to_file_cpu(tensor, file_path, length, use_pinmem, use_sfcs_sdk, use_cipher, key_arr, iv_arr, 47 | header_size); 48 | } 49 | -------------------------------------------------------------------------------- /veturboio/ops/csrc/lib/libfastcrypto_gpu.so.0.3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/veTurboIO/0ed05516013da4332433784660660ca4d1904505/veturboio/ops/csrc/lib/libfastcrypto_gpu.so.0.3 -------------------------------------------------------------------------------- /veturboio/ops/csrc/posix.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024 Beijing Volcano Engine Technology Ltd. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #include "include/posix.h" 17 | #include "include/logging.h" 18 | #include "include/cipher.h" 19 | #include "include/fastcrypto.h" 20 | #include 21 | 22 | POSIXFile::POSIXFile(std::string file_path) 23 | { 24 | this->file_path = file_path; 25 | } 26 | 27 | POSIXFile::POSIXFile(std::string file_path, CipherInfo cipher_info) 28 | { 29 | this->file_path = file_path; 30 | this->cipher_info = cipher_info; 31 | } 32 | 33 | POSIXFile::POSIXFile(std::string file_path, bool use_cipher, pybind11::array_t key_arr, 34 | pybind11::array_t iv_arr, size_t header_size) 35 | : POSIXFile(file_path) 36 | { 37 | this->cipher_info = CipherInfo(use_cipher, key_arr, iv_arr, header_size); 38 | } 39 | 40 | void POSIXFile::read_file_to_address_thread(int thread_id, char *addr, int device_id, char *dev_mem, size_t block_size, 41 | size_t total_size, size_t global_offset, bool use_direct_io, 42 | CipherInfo cipher_info) 43 | { 44 | size_t offset = thread_id * block_size; 45 | size_t read_size = block_size; 46 | int fd = -1; 47 | int ret = 0; 48 | size_t size_read = 0; 49 | 50 | if (offset + read_size >= total_size) 51 | { 52 | read_size = (total_size > offset) ? total_size - offset : 0; 53 | } 54 | // TODO: use_direct_io if sfcs file detected 55 | if (use_direct_io) 56 | { 57 | if ((fd = open(file_path.c_str(), O_RDONLY | O_DIRECT)) < 0) 58 | { 59 | if (errno == EINVAL) 60 | { 61 | logWarn("open file using directIO failed, fall back to bufferIO", file_path.c_str(), 62 | std::strerror(EINVAL)); 63 | } 64 | else 65 | { 66 | logError("open file using directIO failed", file_path.c_str(), std::strerror(errno)); 67 | throw std::runtime_error("veTurboIO Exception: can't apply open operation"); 68 | } 69 | } 70 | } 71 | 72 | if (fd == -1) 73 | { 74 | if ((fd = open(file_path.c_str(), O_RDONLY)) < 0) 75 | { 76 | logError("open file using bufferIO failed", file_path.c_str(), std::strerror(errno)); 77 | throw std::runtime_error("veTurboIO Exception: can't apply open operation"); 78 | } 79 | } 80 | 81 | FILE *fp = fdopen(fd, "rb"); 82 | if (fp == NULL) 83 | { 84 | logError("can't apply fdopen to file", file_path.c_str(), std::strerror(errno)); 85 | throw std::runtime_error("veTurboIO Exception: can't apply fdopen operation"); 86 | } 87 | 88 | if ((ret = fseek(fp, global_offset + offset, SEEK_SET)) < 0) 89 | { 90 | logError("can't apply fseek to file", file_path.c_str(), std::strerror(errno)); 91 | throw std::runtime_error("veTurboIO Exception: can't apply fseek operation"); 92 | } 93 | 94 | if ((size_read = fread(addr + offset, 1, read_size, fp)) == 0) 95 | { 96 | logWarn("read file with 0 bytes returned", file_path.c_str(), offset, read_size); 97 | } 98 | 99 | if ((ret = fclose(fp)) < 0) 100 | { 101 | logError("can't apply fclose to file", file_path.c_str(), std::strerror(errno)); 102 | throw std::runtime_error("veTurboIO Exception: can't apply fclose operation"); 103 | } 104 | 105 | // Decrypt if use_cipher is true 106 | if (cipher_info.use_cipher) 107 | { 108 | CtrDecrypter dec(cipher_info.mode, cipher_info.key, cipher_info.iv, 109 | global_offset + offset - cipher_info.header_size); 110 | unsigned char *ct = reinterpret_cast(addr + offset); 111 | int cipher_ret = dec.decrypt_update(ct, read_size, ct); 112 | if (!cipher_ret) 113 | { 114 | throw std::runtime_error("Cipher Exception: decrypt fail"); 115 | } 116 | } 117 | 118 | #if defined(USE_CUDA) 119 | if (dev_mem != NULL && device_id >= 0) 120 | { 121 | cudaSetDevice(device_id); 122 | cudaMemcpyAsync(dev_mem + offset, addr + offset, read_size, cudaMemcpyHostToDevice); 123 | } 124 | #elif defined(USE_NPU) 125 | #else 126 | #endif 127 | } 128 | 129 | size_t POSIXFile::read_file_to_address_parallel(char *addr, int device_id, char *dev_mem, int num_thread, 130 | size_t total_size, size_t global_offset, bool use_direct_io) 131 | { 132 | vector threads(num_thread); 133 | 134 | size_t block_size = (size_t)ceil((double)total_size / num_thread); 135 | // align the block_size; 136 | block_size = (block_size + BUF_ALIGN_SIZE - 1) / BUF_ALIGN_SIZE * BUF_ALIGN_SIZE; 137 | // re-caculate the real needed thread num; 138 | num_thread = (total_size + block_size - 1) / block_size; 139 | 140 | for (int thread_id = 0; thread_id < num_thread; thread_id++) 141 | { 142 | threads[thread_id] = std::thread(&POSIXFile::read_file_to_address_thread, this, thread_id, addr, device_id, 143 | dev_mem, block_size, total_size, global_offset, use_direct_io, cipher_info); 144 | } 145 | 146 | for (int thread_id = 0; thread_id < num_thread; thread_id++) 147 | { 148 | threads[thread_id].join(); 149 | } 150 | 151 | return total_size; 152 | } 153 | 154 | size_t POSIXFile::read_file_to_array(pybind11::array_t arr, size_t length, size_t offset, int num_thread, 155 | bool use_direct_io) 156 | { 157 | pybind11::buffer_info buf_info = arr.request(); 158 | char *addr = static_cast(buf_info.ptr); 159 | madvise(addr, length, MADV_HUGEPAGE); 160 | return read_file_to_address_parallel(addr, -1, NULL, num_thread, length, offset, use_direct_io); 161 | } 162 | 163 | size_t POSIXFile::write_file_from_addr(char *addr, size_t length, bool append) 164 | { 165 | int fd; 166 | int flags = O_WRONLY; 167 | size_t ret; 168 | size_t count; 169 | char *src = addr; 170 | size_t offset = 0; 171 | 172 | if (append) 173 | { 174 | struct stat st; 175 | stat(file_path.c_str(), &st); 176 | offset = st.st_size; 177 | flags |= O_APPEND; 178 | } 179 | 180 | if (cipher_info.use_cipher) 181 | { 182 | size_t h_off = cipher_info.header_size; 183 | CtrEncrypter enc(cipher_info.mode, cipher_info.key, cipher_info.iv, offset - h_off); 184 | unsigned char *pt = reinterpret_cast(addr); 185 | int cipher_ret = enc.encrypt_update(pt, length, pt); 186 | if (!cipher_ret) 187 | { 188 | throw std::runtime_error("Cipher Exception: encrypt fail"); 189 | } 190 | } 191 | 192 | fd = open(file_path.c_str(), flags); 193 | if (fd < 0) 194 | { 195 | logError("open failed", file_path.c_str(), std::strerror(errno)); 196 | throw std::runtime_error("veTurboIO Exception: open failed"); 197 | } 198 | 199 | count = length; 200 | while (count > 0) 201 | { 202 | ret = write(fd, src, count); 203 | if (ret < 0) 204 | { 205 | logError("Failed to write file", file_path.c_str()); 206 | throw std::runtime_error("veTurboIO Exception: write file"); 207 | } 208 | count -= ret; 209 | src += ret; 210 | } 211 | close(fd); 212 | return length; 213 | } 214 | -------------------------------------------------------------------------------- /veturboio/ops/csrc/pybind.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024 Beijing Volcano Engine Technology Ltd. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #include "include/io_helper.h" 17 | #include "include/sfcs.h" 18 | #include "include/cipher.h" 19 | 20 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) 21 | { 22 | py::class_(m, "IOHelper") 23 | .def(py::init<>()) 24 | .def("load_file_to_tensor", &IOHelper::load_file_to_tensor) 25 | .def("save_tensor_to_file", &IOHelper::save_tensor_to_file); 26 | 27 | py::class_(m, "POSIXFile") 28 | .def(py::init()) 29 | .def(py::init, pybind11::array_t, size_t>()) 30 | .def("read_file_to_array", &POSIXFile::read_file_to_array); 31 | 32 | py::class_(m, "SFCSFs") 33 | .def(py::init<>()) 34 | .def("mkdir", &SFCSFs::mkdir) 35 | .def("read_multi_files", &SFCSFs::read_multi_files) 36 | .def("write_multi_files", &SFCSFs::write_multi_files) 37 | .def("get_multi_file_size", &SFCSFs::get_multi_file_size); 38 | 39 | py::class_(m, "SFCSFile") 40 | .def(py::init()) 41 | .def(py::init, pybind11::array_t, size_t>()) 42 | .def("get_file_size", &SFCSFile::get_file_size) 43 | .def("read_file_to_array", &SFCSFile::read_file_to_array) 44 | .def("write_file_from_array", &SFCSFile::write_file_from_array) 45 | .def("write_file_from_tensors", &SFCSFile::write_file_from_tensors) 46 | .def("delete_file", &SFCSFile::delete_file); 47 | 48 | py::class_(m, "CtrEncWrap") 49 | .def(py::init, pybind11::array_t, size_t>()) 50 | .def("encrypt_update", &CtrEncWrap::encrypt_update); 51 | 52 | py::class_(m, "CtrDecWrap") 53 | .def(py::init, pybind11::array_t, size_t>()) 54 | .def("decrypt_update", &CtrDecWrap::decrypt_update); 55 | } 56 | -------------------------------------------------------------------------------- /veturboio/ops/io_utils.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copyright (c) 2024 Beijing Volcano Engine Technology Ltd. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | ''' 16 | 17 | import json 18 | import os 19 | from typing import Dict, Optional 20 | 21 | import numpy as np 22 | import torch 23 | from loguru import logger 24 | from safetensors.torch import save_file as safetensors_save_file 25 | 26 | from veturboio.ops.cipher import CipherInfo, CipherMode, create_cipher_with_header, encrypt 27 | from veturboio.ops.sfcs_utils import sfcs_delete_file, sfcs_write_file, sfcs_write_file_in_parallel 28 | from veturboio.safetensors import parse_state_dict 29 | from veturboio.types import FILE_PATH 30 | 31 | try: 32 | import veturboio_ext 33 | 34 | IOHelper = veturboio_ext.IOHelper 35 | except ImportError: 36 | IOHelper = None 37 | logger.warning("veturboio_ext not found, fallback to pure python implementation") 38 | 39 | 40 | def load_file_to_tensor( 41 | file_path: str, 42 | total_tensor: torch.Tensor, 43 | offset: int, 44 | helper: IOHelper, 45 | length: int = 0, 46 | device_id: Optional[int] = -1, 47 | num_thread: Optional[int] = 32, 48 | use_pinmem: Optional[bool] = False, 49 | use_sfcs_sdk: Optional[bool] = False, 50 | use_direct_io: Optional[bool] = False, 51 | cipher_info: CipherInfo = CipherInfo(False), 52 | ) -> torch.Tensor: 53 | return helper.load_file_to_tensor( 54 | file_path, 55 | total_tensor, 56 | length, 57 | offset, 58 | device_id, 59 | num_thread, 60 | use_pinmem, 61 | use_sfcs_sdk, 62 | use_direct_io, 63 | cipher_info.use_cipher, 64 | cipher_info.key, 65 | cipher_info.iv, 66 | CipherInfo.HEADER_SIZE if cipher_info.use_header else 0, 67 | ) 68 | 69 | 70 | def save_tensor_to_file( 71 | tensor: torch.Tensor, 72 | file_path: FILE_PATH, 73 | length: int, 74 | helper: IOHelper, 75 | use_pinmem: Optional[bool] = False, 76 | use_sfcs_sdk: Optional[bool] = False, 77 | cipher_info: CipherInfo = CipherInfo(False), 78 | ): 79 | return helper.save_tensor_to_file( 80 | tensor, 81 | file_path, 82 | length, 83 | use_pinmem, 84 | use_sfcs_sdk, 85 | cipher_info.use_cipher, 86 | cipher_info.key, 87 | cipher_info.iv, 88 | CipherInfo.HEADER_SIZE if cipher_info.use_header else 0, 89 | ) 90 | 91 | 92 | def save_file( 93 | state_dict: Dict[str, torch.Tensor], 94 | filename: FILE_PATH, 95 | helper: IOHelper, 96 | metadata: Optional[Dict[str, str]] = None, 97 | use_sfcs_sdk: bool = False, 98 | cipher_info: CipherInfo = CipherInfo(False), 99 | ): 100 | if helper is None: 101 | if cipher_info.use_cipher: 102 | logger.warning("helper is None, cipher is not supported in pure python implementation") 103 | return safetensors_save_file(state_dict, filename, metadata=metadata) 104 | 105 | meta, tensors, sizes, offsets = parse_state_dict(state_dict) 106 | 107 | if metadata: 108 | meta["__metadata__"] = metadata 109 | 110 | meta_bytes = json.dumps(meta).encode('utf-8') 111 | meta_len = len(meta_bytes) 112 | 113 | # alignment 114 | if not meta_len % 8 == 0: 115 | meta_len_pad = (meta_len + 8) // 8 * 8 116 | meta_bytes += b' ' * (meta_len_pad - meta_len) 117 | meta_len = meta_len_pad 118 | 119 | st_header_bytes = meta_len.to_bytes(8, 'little') + meta_bytes 120 | st_header_len = len(st_header_bytes) 121 | 122 | if use_sfcs_sdk: 123 | sfcs_write_file_in_parallel(filename, tensors, sizes, offsets, st_header_bytes, st_header_len, cipher_info) 124 | else: 125 | with open(filename, "wb") as f: 126 | if cipher_info.use_cipher: 127 | if cipher_info.use_header: 128 | cipher_header_bytes = cipher_info.to_header_bytes() 129 | f.write(cipher_header_bytes) 130 | enc_st_header_arr = np.zeros(st_header_len, dtype=np.uint8) 131 | encrypt(cipher_info, np.frombuffer(st_header_bytes, dtype=np.uint8), enc_st_header_arr, 0) 132 | f.write(enc_st_header_arr.tobytes()) 133 | else: 134 | f.write(st_header_bytes) 135 | 136 | for i in range(len(tensors)): 137 | tensor = tensors[i] 138 | size = sizes[i] 139 | save_tensor_to_file( 140 | tensor, 141 | filename, 142 | size, 143 | helper=helper, 144 | use_pinmem=False, 145 | use_sfcs_sdk=use_sfcs_sdk, 146 | cipher_info=cipher_info, 147 | ) 148 | 149 | 150 | def init_io_helper() -> IOHelper: 151 | return IOHelper() 152 | -------------------------------------------------------------------------------- /veturboio/ops/posix_utils.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copyright (c) 2024 Beijing Volcano Engine Technology Ltd. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | ''' 16 | 17 | from typing import Optional 18 | 19 | import numpy as np 20 | from loguru import logger 21 | 22 | from veturboio.ops.cipher import CipherInfo 23 | 24 | try: 25 | from veturboio.utils.load_veturboio_ext import load_veturboio_ext 26 | 27 | veturboio_ext = load_veturboio_ext() 28 | IOHelper = veturboio_ext.IOHelper 29 | POSIXFile = veturboio_ext.POSIXFile 30 | except ImportError: 31 | POSIXFile = None 32 | logger.warning("veturboio_ext not found, fallback to pure python implementation") 33 | 34 | 35 | def posix_read_file( 36 | file_path: str, 37 | arr: np.ndarray, 38 | length: int, 39 | offset: int, 40 | num_thread: Optional[int] = 1, 41 | cipher_info: CipherInfo = CipherInfo(False), 42 | use_direct_io: bool = False, 43 | ) -> int: 44 | posix_file = POSIXFile( 45 | file_path, 46 | cipher_info.use_cipher, 47 | cipher_info.key, 48 | cipher_info.iv, 49 | CipherInfo.HEADER_SIZE if cipher_info.use_header else 0, 50 | ) 51 | return posix_file.read_file_to_array(arr, length, offset, num_thread, use_direct_io) 52 | -------------------------------------------------------------------------------- /veturboio/safetensors.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copyright (c) 2024 Beijing Volcano Engine Technology Ltd. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | ''' 16 | 17 | import json 18 | import os 19 | import pprint 20 | from multiprocessing import shared_memory 21 | from typing import Callable, Dict, List, Optional 22 | 23 | import numpy as np 24 | import torch 25 | from loguru import logger 26 | 27 | from veturboio.loader import BaseLoader 28 | from veturboio.ops.cipher import CipherInfo 29 | from veturboio.types import FILE_PATH 30 | 31 | # All safetensors file will start with a json string, which is the meta info of the file. 32 | # We use the beginning char to determine whether it is a safetensors file. The beginning 33 | # char is '{' and its ascii code is 123. 34 | SAFETENSORS_FILE_MAGIC_NUM = 123 35 | 36 | _safetensors_dtype_mapper = { 37 | "F64": torch.float64, 38 | "F32": torch.float32, 39 | "F16": torch.float16, 40 | "BF16": torch.bfloat16, 41 | "I64": torch.int64, 42 | "I32": torch.int32, 43 | "I16": torch.int16, 44 | "I8": torch.int8, 45 | "U8": torch.uint8, 46 | "BOOL": torch.bool, 47 | } 48 | 49 | 50 | def only_safetensors_property(func: Callable): 51 | func_name = func.__name__ 52 | warning_msg = "This safetensors file is invalid, will take it as a normal torch file." 53 | 54 | def wrapper(self, *args, **kwargs): 55 | if not self.is_valid: 56 | logger.patch(lambda r: r.update(function=func_name)).warning(warning_msg) 57 | return None 58 | return func(self, *args, **kwargs) 59 | 60 | return wrapper 61 | 62 | 63 | class TensorMeta: 64 | def __init__(self, name: str, dtype: str, shape: List[int], data_offsets: List[int]) -> None: 65 | self._name = name 66 | self._dtype = _safetensors_dtype_mapper[dtype] 67 | self._shape = shape 68 | self._data_offsets = data_offsets 69 | 70 | @property 71 | def name(self) -> str: 72 | return self._name 73 | 74 | @property 75 | def dtype(self) -> torch.dtype: 76 | return self._dtype 77 | 78 | @property 79 | def shape(self) -> List[int]: 80 | return self._shape 81 | 82 | @property 83 | def data_offsets(self) -> List[int]: 84 | return self._data_offsets 85 | 86 | def __str__(self) -> str: 87 | return str( 88 | { 89 | "name": self._name, 90 | "dtype": self._dtype, 91 | "shape": self._shape, 92 | "data_offsets": self._data_offsets, 93 | } 94 | ) 95 | 96 | def __repr__(self) -> str: 97 | return self.__str__() 98 | 99 | 100 | class SafetensorsFile: 101 | def __init__(self, file: FILE_PATH, loader: BaseLoader, use_cipher: Optional[bool] = None) -> None: 102 | self._file = file 103 | self._loader = loader 104 | 105 | self._is_valid = True 106 | 107 | # cipher related 108 | self._cipher_info = CipherInfo(False) 109 | if use_cipher == True or use_cipher == None and os.getenv("VETURBOIO_USE_CIPHER", "0") == "1": 110 | header_bytes = loader.load_to_bytes(offset=0, count=CipherInfo.HEADER_SIZE) 111 | self._cipher_info = CipherInfo(True, header_bytes, os.path.abspath(self.file)) 112 | 113 | if self._cipher_info.use_header: 114 | h_off = CipherInfo.HEADER_SIZE 115 | else: 116 | h_off = 0 117 | 118 | magic_number = loader.load_to_bytes(offset=8 + h_off, count=1, cipher_info=self._cipher_info)[0] 119 | if magic_number != SAFETENSORS_FILE_MAGIC_NUM: 120 | self._is_valid = False 121 | return 122 | 123 | self._meta_size = np.frombuffer( 124 | loader.load_to_bytes(offset=h_off, count=8, cipher_info=self._cipher_info), dtype=np.int64 125 | )[0] 126 | meta_bytes = loader.load_to_bytes(offset=8 + h_off, count=self._meta_size, cipher_info=self._cipher_info) 127 | meta_dict = json.loads(meta_bytes.decode("utf-8")) 128 | 129 | self._shared_tensor = {} 130 | self._ignored_meta = {} 131 | if "__metadata__" in meta_dict: 132 | meta_data = meta_dict.pop("__metadata__") 133 | for key, value in meta_data.items(): 134 | if value not in meta_dict: 135 | self._ignored_meta[key] = value 136 | else: 137 | self._shared_tensor[key] = value 138 | 139 | self._meta = {} 140 | for key in meta_dict: 141 | self._meta[key] = TensorMeta( 142 | name=key, 143 | dtype=meta_dict[key]["dtype"], 144 | shape=meta_dict[key]["shape"], 145 | data_offsets=meta_dict[key]["data_offsets"], 146 | ) 147 | 148 | # record the offset of the tensor data 149 | self._tensor_offset = np.dtype(np.int64).itemsize + self._meta_size + h_off 150 | 151 | @staticmethod 152 | def split_tensor_to_state_dict( 153 | total_tensor: torch.Tensor, safetensor_file: "SafetensorsFile" 154 | ) -> Dict[str, torch.Tensor]: 155 | state_dict = {} 156 | 157 | for tensor_meta in safetensor_file.meta.values(): 158 | tensor = total_tensor[tensor_meta.data_offsets[0] : tensor_meta.data_offsets[1]] 159 | tensor = tensor.view(dtype=tensor_meta.dtype) 160 | tensor = tensor.reshape(tensor_meta.shape) 161 | state_dict[tensor_meta.name] = tensor 162 | 163 | for src_tensor_key, tgt_tensor_key in safetensor_file.shared_tensor.items(): 164 | state_dict[src_tensor_key] = state_dict[tgt_tensor_key] 165 | return state_dict 166 | 167 | @property 168 | def file(self) -> FILE_PATH: 169 | return self._file 170 | 171 | @property 172 | def is_valid(self) -> bool: 173 | return self._is_valid 174 | 175 | @property 176 | @only_safetensors_property 177 | def meta_size(self) -> int: 178 | return self._meta_size 179 | 180 | @property 181 | @only_safetensors_property 182 | def meta(self) -> Dict[str, TensorMeta]: 183 | return self._meta 184 | 185 | @property 186 | @only_safetensors_property 187 | def tensor_offset(self) -> int: 188 | return self._tensor_offset 189 | 190 | @property 191 | @only_safetensors_property 192 | def shared_tensor(self) -> Dict[str, str]: 193 | return self._shared_tensor 194 | 195 | def __str__(self) -> str: 196 | if not self._is_valid: 197 | return f"{self.file} is not a valid safetensors file." 198 | return pprint.pformat( 199 | { 200 | "file": self._file, 201 | "meta_size": self._meta_size, 202 | "meta": self._meta, 203 | "tensor_offset": self._tensor_offset, 204 | } 205 | ) 206 | 207 | def __repr__(self) -> str: 208 | return self.__str__() 209 | 210 | def load(self, map_location: str = "cpu", state_dict: Dict[str, torch.Tensor] = None) -> Dict[str, torch.Tensor]: 211 | if not self._is_valid: 212 | return self._loader.load_pt(map_location, self._cipher_info) 213 | else: 214 | return self._loader.load_safetensors(self, map_location, state_dict) 215 | 216 | def load_to_shmem(self) -> shared_memory.SharedMemory: 217 | return self._loader.load_to_shmem(self._cipher_info) 218 | 219 | 220 | def parse_state_dict(state_dict: Dict[str, torch.Tensor]): 221 | meta = {} 222 | tensors = [] 223 | sizes = [] 224 | offsets = [] 225 | 226 | data_offset_begin = 0 227 | data_offset_end = 0 228 | _safetensors_dtype_str = {v: k for k, v in _safetensors_dtype_mapper.items()} 229 | bool_state_dict = {} 230 | for key, tensor in state_dict.items(): 231 | if tensor.dtype == torch.bool: 232 | bool_state_dict[key] = tensor 233 | continue 234 | else: 235 | size = 1 236 | for d in range(tensor.dim()): 237 | size *= tensor.shape[d] 238 | 239 | try: 240 | bytes = torch.finfo(tensor.dtype).bits // 8 241 | except: 242 | bytes = torch.iinfo(tensor.dtype).bits // 8 243 | size *= bytes 244 | 245 | data_offset_end = data_offset_begin + size 246 | meta[key] = { 247 | "dtype": _safetensors_dtype_str[tensor.dtype], 248 | "shape": tensor.shape, 249 | "data_offsets": [data_offset_begin, data_offset_end], 250 | } 251 | if size > 0: 252 | tensors.append(tensor) 253 | sizes.append(size) 254 | offsets.append(data_offset_begin) 255 | data_offset_begin = data_offset_end 256 | 257 | for key, tensor in bool_state_dict.items(): 258 | size = 1 259 | for d in range(tensor.dim()): 260 | size *= tensor.shape[d] 261 | 262 | data_offset_end = data_offset_begin + size 263 | meta[key] = { 264 | "dtype": _safetensors_dtype_str[tensor.dtype], 265 | "shape": tensor.shape, 266 | "data_offsets": [data_offset_begin, data_offset_end], 267 | } 268 | if size > 0: 269 | tensors.append(tensor) 270 | sizes.append(size) 271 | offsets.append(data_offset_begin) 272 | data_offset_begin = data_offset_end 273 | return meta, tensors, sizes, offsets 274 | -------------------------------------------------------------------------------- /veturboio/saver/__init__.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copyright (c) 2024 Beijing Volcano Engine Technology Ltd. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | ''' 16 | 17 | from veturboio.saver.base_saver import BaseSaver, PosixSaver 18 | from veturboio.saver.sfcs_client_saver import SfcsClientSaver 19 | 20 | __all__ = ["BaseSaver", "PosixSaver", "SfcsClientSaver"] 21 | -------------------------------------------------------------------------------- /veturboio/saver/base_saver.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copyright (c) 2024 Beijing Volcano Engine Technology Ltd. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | ''' 16 | 17 | import os 18 | import tempfile 19 | from typing import Any, Dict 20 | 21 | import numpy as np 22 | import torch 23 | from safetensors.torch import save_file as safetenors_save_file 24 | from safetensors.torch import save_model as safetensors_save_model 25 | 26 | from veturboio.ops.cipher import CipherInfo, CipherMode, create_cipher_with_header, encrypt 27 | from veturboio.ops.io_utils import IOHelper 28 | from veturboio.ops.io_utils import save_file as fast_save_file 29 | from veturboio.types import FILE_PATH 30 | 31 | 32 | class BaseSaver: 33 | def __init__(self, method: str) -> None: 34 | self.method = method 35 | 36 | def save_file(self, state_dict: Dict[str, torch.Tensor], file: FILE_PATH, metadata: Dict[str, str] = None) -> None: 37 | raise NotImplementedError 38 | 39 | def save_model(self, model: torch.nn.Module, file: FILE_PATH) -> None: 40 | raise NotImplementedError 41 | 42 | 43 | class PosixSaver(BaseSaver): 44 | def __init__(self, file: FILE_PATH, helper: IOHelper = None, use_cipher: bool = False) -> None: 45 | super().__init__(method="posix") 46 | self.file = file 47 | use_cipher = use_cipher or os.getenv("VETURBOIO_USE_CIPHER", "0") == "1" 48 | use_header = use_cipher and os.getenv("VETURBOIO_CIPHER_HEADER", "0") == "1" 49 | if use_header: 50 | self.cipher_info = create_cipher_with_header(CipherMode.CTR_128, os.path.abspath(self.file)) 51 | else: 52 | self.cipher_info = CipherInfo(use_cipher, None, os.path.abspath(self.file)) 53 | 54 | self.helper = helper 55 | 56 | def save_file( 57 | self, state_dict: Dict[str, torch.Tensor], metadata: Dict[str, str] = None, enable_fast_mode: bool = False 58 | ) -> None: 59 | if enable_fast_mode: 60 | fast_save_file( 61 | state_dict, 62 | self.file, 63 | helper=self.helper, 64 | metadata=metadata, 65 | cipher_info=self.cipher_info, 66 | ) 67 | else: 68 | if self.cipher_info.use_cipher: 69 | with tempfile.NamedTemporaryFile(dir="/dev/shm") as tmpfile: 70 | tmp_file_path = tmpfile.name 71 | safetenors_save_file(state_dict, tmp_file_path, metadata=metadata) 72 | tmp_file_size = os.path.getsize(tmp_file_path) 73 | tmp_file_bytes = np.memmap(tmp_file_path, dtype=np.uint8, mode='r', shape=tmp_file_size) 74 | h_off = CipherInfo.HEADER_SIZE if self.cipher_info.use_header else 0 75 | file_bytes = np.memmap(self.file, dtype=np.uint8, mode='w+', shape=tmp_file_size + h_off) 76 | encrypt(self.cipher_info, tmp_file_bytes, file_bytes[h_off:], 0) 77 | if h_off: 78 | file_bytes[:h_off] = np.frombuffer(self.cipher_info.to_header_bytes(), dtype=np.uint8) 79 | file_bytes.flush() 80 | else: 81 | safetenors_save_file(state_dict, self.file, metadata=metadata) 82 | 83 | def save_model(self, model: torch.nn.Module) -> None: 84 | if self.cipher_info.use_cipher: 85 | with tempfile.NamedTemporaryFile(dir="/dev/shm") as tmpfile: 86 | tmp_file_path = tmpfile.name 87 | safetensors_save_model(model, tmp_file_path) 88 | tmp_file_size = os.path.getsize(tmp_file_path) 89 | tmp_file_bytes = np.memmap(tmp_file_path, dtype=np.uint8, mode='r', shape=tmp_file_size) 90 | h_off = CipherInfo.HEADER_SIZE if self.cipher_info.use_header else 0 91 | file_bytes = np.memmap(self.file, dtype=np.uint8, mode='w+', shape=tmp_file_size + h_off) 92 | encrypt(self.cipher_info, tmp_file_bytes, file_bytes[h_off:], 0) 93 | if h_off: 94 | file_bytes[:h_off] = np.frombuffer(self.cipher_info.to_header_bytes(), dtype=np.uint8) 95 | file_bytes.flush() 96 | else: 97 | safetensors_save_model(model, self.file) 98 | 99 | def save_pt(self, state_dict: Dict[str, torch.Tensor]) -> None: 100 | if self.cipher_info.use_cipher: 101 | with tempfile.NamedTemporaryFile(dir="/dev/shm") as tmpfile: 102 | tmp_file_path = tmpfile.name 103 | torch.save(state_dict, tmp_file_path) 104 | tmp_file_size = os.path.getsize(tmp_file_path) 105 | tmp_file_bytes = np.memmap(tmp_file_path, dtype=np.uint8, mode='r', shape=tmp_file_size) 106 | h_off = CipherInfo.HEADER_SIZE if self.cipher_info.use_header else 0 107 | file_bytes = np.memmap(self.file, dtype=np.uint8, mode='w+', shape=tmp_file_size + h_off) 108 | encrypt(self.cipher_info, tmp_file_bytes, file_bytes[h_off:], 0) 109 | if h_off: 110 | file_bytes[:h_off] = np.frombuffer(self.cipher_info.to_header_bytes(), dtype=np.uint8) 111 | file_bytes.flush() 112 | else: 113 | torch.save(state_dict, self.file) 114 | -------------------------------------------------------------------------------- /veturboio/saver/sfcs_client_saver.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copyright (c) 2024 Beijing Volcano Engine Technology Ltd. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | ''' 16 | 17 | import os 18 | import tempfile 19 | from typing import Any, Dict 20 | 21 | import numpy as np 22 | import torch 23 | from safetensors.torch import save_file as safetenors_save_file 24 | from safetensors.torch import save_model as safetensors_save_model 25 | 26 | from veturboio.ops.cipher import CipherInfo, CipherMode, create_cipher_with_header 27 | from veturboio.ops.io_utils import IOHelper 28 | from veturboio.ops.io_utils import save_file as fast_save_file 29 | from veturboio.ops.sfcs_utils import ( 30 | init_sfcs_conf, 31 | path_mapper, 32 | sfcs_delete_file, 33 | sfcs_write_file, 34 | sfcs_write_file_in_parallel, 35 | ) 36 | from veturboio.saver.base_saver import BaseSaver 37 | from veturboio.types import FILE_PATH 38 | 39 | 40 | class SfcsClientSaver(BaseSaver): 41 | def __init__( 42 | self, 43 | file: FILE_PATH, 44 | helper: IOHelper = None, 45 | use_cipher: bool = False, 46 | ) -> None: 47 | super().__init__(method="client") 48 | 49 | self.file = file 50 | self.helper = helper 51 | 52 | mount_path = init_sfcs_conf(file) 53 | self.sfcs_valid_path = path_mapper(self.file, mount_path) 54 | 55 | use_cipher = use_cipher or os.getenv("VETURBOIO_USE_CIPHER", "0") == "1" 56 | use_header = use_cipher and os.getenv("VETURBOIO_CIPHER_HEADER", "0") == "1" 57 | if use_header: 58 | self.cipher_info = create_cipher_with_header(CipherMode.CTR_128, os.path.abspath(self.file)) 59 | else: 60 | self.cipher_info = CipherInfo(use_cipher, None, os.path.abspath(self.file)) 61 | 62 | def save_file( 63 | self, state_dict: Dict[str, torch.Tensor], metadata: Dict[str, str] = None, enable_fast_mode: bool = False 64 | ) -> None: 65 | if enable_fast_mode: 66 | fast_save_file( 67 | state_dict, 68 | self.sfcs_valid_path, 69 | helper=self.helper, 70 | metadata=metadata, 71 | cipher_info=self.cipher_info, 72 | use_sfcs_sdk=True, 73 | ) 74 | else: 75 | with tempfile.NamedTemporaryFile(dir="/dev/shm") as tmpfile: 76 | file_path = tmpfile.name 77 | safetenors_save_file(state_dict, file_path, metadata=metadata) 78 | 79 | file_size = os.path.getsize(file_path) 80 | if self.cipher_info.use_header: 81 | h_off = CipherInfo.HEADER_SIZE 82 | file_bytes = np.empty(file_size + h_off, dtype=np.byte) 83 | file_bytes[:h_off] = np.frombuffer(self.cipher_info.to_header_bytes(), dtype=np.byte) 84 | file_bytes[h_off:] = np.fromfile(file_path, dtype=np.byte, count=file_size) 85 | else: 86 | file_bytes = np.memmap(file_path, dtype=np.byte, mode='r+', shape=file_size) 87 | sfcs_write_file(self.sfcs_valid_path, file_bytes, len(file_bytes), self.cipher_info) 88 | 89 | def save_model(self, model: torch.nn.Module) -> None: 90 | with tempfile.NamedTemporaryFile(dir="/dev/shm") as tmpfile: 91 | file_path = tmpfile.name 92 | safetensors_save_model(model, file_path) 93 | 94 | file_size = os.path.getsize(file_path) 95 | if self.cipher_info.use_header: 96 | h_off = CipherInfo.HEADER_SIZE 97 | file_bytes = np.empty(file_size + h_off, dtype=np.byte) 98 | file_bytes[:h_off] = np.frombuffer(self.cipher_info.to_header_bytes(), dtype=np.byte) 99 | file_bytes[h_off:] = np.fromfile(file_path, dtype=np.byte, count=file_size) 100 | else: 101 | file_bytes = np.memmap(file_path, dtype=np.byte, mode='r+', shape=file_size) 102 | sfcs_write_file(self.sfcs_valid_path, file_bytes, len(file_bytes), self.cipher_info) 103 | 104 | def save_pt(self, state_dict: Dict[str, torch.Tensor]) -> None: 105 | with tempfile.NamedTemporaryFile(dir="/dev/shm") as tmpfile: 106 | file_path = tmpfile.name 107 | torch.save(state_dict, file_path) 108 | 109 | file_size = os.path.getsize(file_path) 110 | if self.cipher_info.use_header: 111 | h_off = CipherInfo.HEADER_SIZE 112 | file_bytes = np.empty(file_size + h_off, dtype=np.byte) 113 | file_bytes[:h_off] = np.frombuffer(self.cipher_info.to_header_bytes(), dtype=np.byte) 114 | file_bytes[h_off:] = np.fromfile(file_path, dtype=np.byte, count=file_size) 115 | else: 116 | file_bytes = np.memmap(file_path, dtype=np.byte, mode='r+', shape=file_size) 117 | sfcs_write_file(self.sfcs_valid_path, file_bytes, len(file_bytes), self.cipher_info) 118 | -------------------------------------------------------------------------------- /veturboio/types.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copyright (c) 2024 Beijing Volcano Engine Technology Ltd. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | ''' 16 | 17 | import os 18 | from typing import Union 19 | 20 | FILE_PATH = Union[str, bytes, os.PathLike] 21 | -------------------------------------------------------------------------------- /veturboio/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/veTurboIO/0ed05516013da4332433784660660ca4d1904505/veturboio/utils/__init__.py -------------------------------------------------------------------------------- /veturboio/utils/load_veturboio_ext.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copyright (c) 2024 Beijing Volcano Engine Technology Ltd. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | ''' 16 | 17 | import os 18 | 19 | from loguru import logger 20 | 21 | LIBCFS_DEFAULT_URL = "https://veturbo-cn-beijing.tos-cn-beijing.volces.com/veturboio/libcfs/libcloudfs.so" 22 | LIBCFS_DEFAULT_PATH = "/usr/lib/libcloudfs.so" 23 | 24 | 25 | def load_libcfs(): 26 | libcfs_path = os.getenv("LIBCFS_PATH", LIBCFS_DEFAULT_PATH) 27 | if not os.path.isfile(libcfs_path): 28 | # libcfs_path not exist, download from url 29 | import requests 30 | 31 | libcfs_url = os.getenv("LIBCFS_URL", LIBCFS_DEFAULT_URL) 32 | logger.info(f"download libcloudfs.so from {libcfs_url}, save to {libcfs_path}") 33 | r = requests.get(libcfs_url, timeout=60) 34 | with open(libcfs_path, 'wb') as f: 35 | f.write(r.content) 36 | 37 | 38 | def load_veturboio_ext(): 39 | load_libcfs() 40 | import veturboio_ext 41 | 42 | return veturboio_ext 43 | -------------------------------------------------------------------------------- /veturboio/version.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copyright (c) 2024 Beijing Volcano Engine Technology Ltd. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | ''' 16 | 17 | __version__ = "0.1.3rc4" 18 | --------------------------------------------------------------------------------