├── .clang-format
├── .gitignore
├── CHANGELOG.md
├── LICENSE
├── MANIFEST.in
├── README.md
├── README.zh.md
├── bench
├── io_bench.py
└── io_bench.sh
├── docs
├── encrypt_model.md
├── imgs
│ └── SFCS.png
├── index.md
└── pin_mem.md
├── mkdocs.yml
├── pyproject.toml
├── setup.py
├── tests
├── test_assert_exceptions.py
├── test_convert.py
├── test_fetch_cipher.py
├── test_load_op.py
├── test_save_op.py
├── test_sfcs_sdk_op.py
└── test_share_tensor_cpu.py
└── veturboio
├── __init__.py
├── convert.py
├── io.py
├── loader
├── __init__.py
├── base_loader.py
├── faster_posix_loader.py
└── sfcs_client_loader.py
├── ops
├── __init__.py
├── cipher.py
├── consts.py
├── csrc
│ ├── cipher.cpp
│ ├── include
│ │ ├── cfs.h
│ │ ├── cfsaio.h
│ │ ├── cipher.h
│ │ ├── common.h
│ │ ├── fastcrypto.h
│ │ ├── io_helper.h
│ │ ├── logging.h
│ │ ├── posix.h
│ │ └── sfcs.h
│ ├── io_helper.cu
│ ├── io_helper_cpu.cpp
│ ├── io_helper_cpu_common.cpp
│ ├── io_helper_npu.cpp
│ ├── lib
│ │ └── libfastcrypto_gpu.so.0.3
│ ├── posix.cpp
│ ├── pybind.cpp
│ └── sfcs.cpp
├── io_utils.py
├── posix_utils.py
└── sfcs_utils.py
├── safetensors.py
├── saver
├── __init__.py
├── base_saver.py
└── sfcs_client_saver.py
├── types.py
├── utils
├── __init__.py
└── load_veturboio_ext.py
└── version.py
/.clang-format:
--------------------------------------------------------------------------------
1 | ---
2 | Language: Cpp
3 | # BasedOnStyle: Microsoft
4 | AccessModifierOffset: -2
5 | AlignAfterOpenBracket: Align
6 | AlignConsecutiveMacros: false
7 | AlignConsecutiveAssignments: false
8 | AlignConsecutiveDeclarations: false
9 | AlignEscapedNewlines: Right
10 | AlignOperands: true
11 | AlignTrailingComments: true
12 | AllowAllArgumentsOnNextLine: true
13 | AllowAllConstructorInitializersOnNextLine: true
14 | AllowAllParametersOfDeclarationOnNextLine: true
15 | AllowShortBlocksOnASingleLine: Never
16 | AllowShortCaseLabelsOnASingleLine: false
17 | AllowShortFunctionsOnASingleLine: None
18 | AllowShortLambdasOnASingleLine: All
19 | AllowShortIfStatementsOnASingleLine: Never
20 | AllowShortLoopsOnASingleLine: false
21 | AlwaysBreakAfterDefinitionReturnType: None
22 | AlwaysBreakAfterReturnType: None
23 | AlwaysBreakBeforeMultilineStrings: false
24 | AlwaysBreakTemplateDeclarations: MultiLine
25 | BinPackArguments: true
26 | BinPackParameters: true
27 | BraceWrapping:
28 | AfterCaseLabel: false
29 | AfterClass: true
30 | AfterControlStatement: true
31 | AfterEnum: true
32 | AfterFunction: true
33 | AfterNamespace: true
34 | AfterObjCDeclaration: true
35 | AfterStruct: true
36 | AfterUnion: false
37 | AfterExternBlock: true
38 | BeforeCatch: true
39 | BeforeElse: true
40 | IndentBraces: false
41 | SplitEmptyFunction: true
42 | SplitEmptyRecord: true
43 | SplitEmptyNamespace: true
44 | BreakBeforeBinaryOperators: None
45 | BreakBeforeBraces: Custom
46 | BreakBeforeInheritanceComma: false
47 | BreakInheritanceList: BeforeColon
48 | BreakBeforeTernaryOperators: true
49 | BreakConstructorInitializersBeforeComma: false
50 | BreakConstructorInitializers: BeforeColon
51 | BreakAfterJavaFieldAnnotations: false
52 | BreakStringLiterals: true
53 | ColumnLimit: 120
54 | CommentPragmas: '^ IWYU pragma:'
55 | CompactNamespaces: false
56 | ConstructorInitializerAllOnOneLineOrOnePerLine: false
57 | ConstructorInitializerIndentWidth: 4
58 | ContinuationIndentWidth: 4
59 | Cpp11BracedListStyle: true
60 | DeriveLineEnding: true
61 | DerivePointerAlignment: false
62 | DisableFormat: false
63 | ExperimentalAutoDetectBinPacking: false
64 | FixNamespaceComments: true
65 | ForEachMacros:
66 | - foreach
67 | - Q_FOREACH
68 | - BOOST_FOREACH
69 | IncludeBlocks: Preserve
70 | IncludeCategories:
71 | - Regex: '^"(llvm|llvm-c|clang|clang-c)/'
72 | Priority: 2
73 | SortPriority: 0
74 | - Regex: '^(<|"(gtest|gmock|isl|json)/)'
75 | Priority: 3
76 | SortPriority: 0
77 | - Regex: '.*'
78 | Priority: 1
79 | SortPriority: 0
80 | IncludeIsMainRegex: '(Test)?$'
81 | IncludeIsMainSourceRegex: ''
82 | IndentCaseLabels: false
83 | IndentGotoLabels: true
84 | IndentPPDirectives: None
85 | IndentWidth: 4
86 | IndentWrappedFunctionNames: false
87 | JavaScriptQuotes: Leave
88 | JavaScriptWrapImports: true
89 | KeepEmptyLinesAtTheStartOfBlocks: true
90 | MacroBlockBegin: ''
91 | MacroBlockEnd: ''
92 | MaxEmptyLinesToKeep: 1
93 | NamespaceIndentation: None
94 | ObjCBinPackProtocolList: Auto
95 | ObjCBlockIndentWidth: 2
96 | ObjCSpaceAfterProperty: false
97 | ObjCSpaceBeforeProtocolList: true
98 | PenaltyBreakAssignment: 2
99 | PenaltyBreakBeforeFirstCallParameter: 19
100 | PenaltyBreakComment: 300
101 | PenaltyBreakFirstLessLess: 120
102 | PenaltyBreakString: 1000
103 | PenaltyBreakTemplateDeclaration: 10
104 | PenaltyExcessCharacter: 1000000
105 | PenaltyReturnTypeOnItsOwnLine: 1000
106 | PointerAlignment: Right
107 | ReflowComments: true
108 | SortIncludes: false
109 | SortUsingDeclarations: true
110 | SpaceAfterCStyleCast: false
111 | SpaceAfterLogicalNot: false
112 | SpaceAfterTemplateKeyword: true
113 | SpaceBeforeAssignmentOperators: true
114 | SpaceBeforeCpp11BracedList: false
115 | SpaceBeforeCtorInitializerColon: true
116 | SpaceBeforeInheritanceColon: true
117 | SpaceBeforeParens: ControlStatements
118 | SpaceBeforeRangeBasedForLoopColon: true
119 | SpaceInEmptyBlock: false
120 | SpaceInEmptyParentheses: false
121 | SpacesBeforeTrailingComments: 1
122 | SpacesInAngles: false
123 | SpacesInConditionalStatement: false
124 | SpacesInContainerLiterals: true
125 | SpacesInCStyleCastParentheses: false
126 | SpacesInParentheses: false
127 | SpacesInSquareBrackets: false
128 | SpaceBeforeSquareBrackets: false
129 | Standard: Latest
130 | StatementMacros:
131 | - Q_UNUSED
132 | - QT_REQUIRE_VERSION
133 | TabWidth: 4
134 | UseCRLF: false
135 | UseTab: Never
136 | ...
137 |
138 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | share/python-wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 | MANIFEST
28 |
29 | # PyInstaller
30 | # Usually these files are written by a python script from a template
31 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
32 | *.manifest
33 | *.spec
34 |
35 | # Installer logs
36 | pip-log.txt
37 | pip-delete-this-directory.txt
38 |
39 | # Unit test / coverage reports
40 | htmlcov/
41 | .tox/
42 | .nox/
43 | .coverage
44 | .coverage.*
45 | .cache
46 | nosetests.xml
47 | coverage.xml
48 | *.cover
49 | *.py,cover
50 | .hypothesis/
51 | .pytest_cache/
52 | cover/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | .pybuilder/
76 | target/
77 |
78 | # Jupyter Notebook
79 | .ipynb_checkpoints
80 |
81 | # IPython
82 | profile_default/
83 | ipython_config.py
84 |
85 | # pyenv
86 | # For a library or package, you might want to ignore these files since the code is
87 | # intended to run in multiple environments; otherwise, check them in:
88 | # .python-version
89 |
90 | # pipenv
91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
94 | # install all needed dependencies.
95 | #Pipfile.lock
96 |
97 | # poetry
98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99 | # This is especially recommended for binary packages to ensure reproducibility, and is more
100 | # commonly ignored for libraries.
101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 |
104 | # pdm
105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | # in version control.
109 | # https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 |
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 |
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 |
119 | # SageMath parsed files
120 | *.sage.py
121 |
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 |
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 |
135 | # Rope project settings
136 | .ropeproject
137 |
138 | # mkdocs documentation
139 | /site
140 |
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 |
146 | # Pyre type checker
147 | .pyre/
148 |
149 | # pytype static type analyzer
150 | .pytype/
151 |
152 | # Cython debug symbols
153 | cython_debug/
154 |
155 | # PyCharm
156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | # and can be added to the global gitignore or merged into this file. For a more nuclear
159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 |
162 | # local build cache
163 | build
164 | dist
165 | *.pt
166 | veturbo/ops/lib/
167 | veturbo/lego_pipeline/lib/
168 |
169 | # cmake
170 | CMakeFiles/
171 | CMakeCache.txt
172 | CMakeScripts/
173 | CMakeTmp/
174 | cmake_install.cmake
175 | Makefile
176 | cmake-build-debug/
177 | cmake-build-release/
178 | cmake-build-relwithdebinfo/
179 | cmake-build-minsize/
180 |
181 | # library
182 | !veturboio/ops/csrc/lib/
183 | !veturboio/ops/csrc/lib/*.so
184 |
185 | # vscode
186 | .vscode
187 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # Changelog
2 |
3 | All notable changes to this project will be documented in this file. See [conventional commits](https://www.conventionalcommits.org/) for commit guidelines.
4 |
5 | ---
6 | ## [0.1.3] - 2024-04-26
7 |
8 | ### Bug Fixes
9 |
10 | - **(load)** set cuda device in each thread
11 | - **(security)** fix typo in comments
12 | - **(ut)** clean files in ut
13 | - directIO segmentation fault
14 | - modify aksk less real
15 |
16 | ### Features
17 |
18 | - **(security)** compat with cipher header and use cipher in posix
19 | - **(sfcs)** set sfcs sync interval by environ
20 | - **(sfcs)** reduce memcpy
21 | - **(sfcs)** parse sfcs confs from environ in json format
22 | - add clone mode for shared tensor
23 | - get cfs task id from env
24 |
25 | ### Miscellaneous Chores
26 |
27 | - **(security)** clarify cipher readme
28 | - cpp coding style
29 |
30 | ### License
31 |
32 | - add license file and header
33 |
34 | ## [0.1.2] - 2024-01-25
35 |
36 | ### Bug Fixes
37 |
38 | - **(saver)** add return to remove repetitive writing
39 | - **(security)** socket path and ut bug
40 | - MANIFEST does not contain all fastcrypto lib files
41 |
42 | ### Documentation
43 |
44 | - update readme
45 |
46 | ### Features
47 |
48 | - **(security)** fetch key and iv
49 | - **(security)** get and refresh sfcs aksk from datapipe
50 | - **(security)** get namenode ip from datapipe and fix write xml bug
51 | - **(sfcs)** decide load use sfcs sdk from environ
52 |
53 | ## [0.1.1] - 2023-11-17
54 |
55 | ### Bug Fixes
56 |
57 | - **(sfcs)** keep in consistent with reading when open for writing
58 | - **(ut)** delete potential residual test file before testing
59 | - fix ci release and update readme for pip install
60 |
61 | ### Documentation
62 |
63 | - use index-url as default install method
64 |
65 | ### Features
66 |
67 | - **(ci)** add import format tool in ci
68 | - **(saver)** introduce saver class to aggregate save operations
69 | - **(security)** add cipher in sfcs sdk
70 | - **(sfcs)** load and save pt
71 | - load pt file in parallel from sfcs
72 |
73 | ### Miscellaneous Chores
74 |
75 | - bump version to v0.1.0
76 | - bump version to v0.1.1
77 |
78 | ### Performance
79 |
80 | - make the read usage with good alignment.
81 |
82 |
83 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include veturboio/ops/csrc/lib/*.so
2 | include veturboio/ops/csrc/lib/*.so.*
3 | include veturboio/ops/csrc/include/*.h
4 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # veTurboIO
2 |
3 |
4 | [En](./README.md) | [中文](./README.zh.md)
5 |
6 |
7 | A Python library for high-performance reading and writing of PyTorch model files
8 | developed by Volcano Engine. This library mainly implements based on the safetensors
9 | file format to achieve efficient storage and reading of tensor data.
10 |
11 | ## Install
12 |
13 | It can be installed directly through the following way:
14 | ```bash
15 | cd veturboio
16 | python setup.py get_libcfs
17 | python setup.py install
18 | ```
19 |
20 | Tips: This instruction will preferentially download the whl file that matches the
21 | current Python and PyTorch versions. If no matching whl file is found, it will
22 | automatically download the source code for compilation and installation.
23 |
24 |
25 | If the installation fails, you can also try to install by downloading the source code,
26 | and then compile and install it manually.
27 |
28 | ```bash
29 | # CUDA ops, default
30 | python setup.py install --cuda_ext
31 |
32 | # NPU ops
33 | python setup.py install --npu_ext
34 |
35 | # CPU only
36 | python setup.py install --cpu_ext
37 | ```
38 |
39 |
40 | ## Quick Start
41 |
42 | ### Read and write model files
43 |
44 |
45 | ```python
46 | import torch
47 | import veturboio
48 |
49 | tensors = {
50 | "weight1": torch.zeros((1024, 1024)),
51 | "weight2": torch.zeros((1024, 1024))
52 | }
53 |
54 | veturboio.save_file(tensors, "model.safetensors")
55 |
56 | new_tensors = veturboio.load("model.safetensors")
57 |
58 | # check if the tensors are the same
59 | for k, v in tensors.items():
60 | assert torch.allclose(v, new_tensors[k])
61 | ```
62 |
63 | ### Convert existing PyTorch files
64 |
65 | ```bash
66 | python -m veturboio.convert -i model.pt -o model.safetensors
67 | ```
68 |
69 | ## Performance test
70 |
71 | Run directly:
72 | ```bash
73 | bash bench/io_bench.sh
74 | ```
75 | Then, you can get the following results:
76 | ```
77 | fs_name tensor_size veturboio load_time(s) torch load_time(s)
78 | shm 1073741824 0.08 0.63
79 | shm 2147483648 0.19 1.26
80 | shm 4294967296 0.36 2.32
81 | ```
82 |
83 | Also, you can run the following command to get more options:
84 | ```bash
85 | python bench/io_bench.py -h
86 | ```
87 |
88 | ## Advance Features
89 |
90 | ### Using veMLP to accelerate reading and writing
91 | Volcano Engine Machine Learning Platform (veMLP) provides a distributed cache file system
92 | based on the physical disks of the GPU cluster.
93 |
94 |
95 |
96 |
97 |
98 | When a cluster-level task needs to read
99 | a model file, the caching system can efficiently distribute the model file between GPU
100 | machines via RDMA transfer, thus avoiding network transfer bottlenecks. When using this
101 | system, veTurboIO can maximize its performance advantages.
102 |
103 | ### Encrypt and decrypt model files
104 | veTurboIO supports encryption and decryption of model files. You can read the [tutorial](./docs/encrypt_model.md)
105 | to learn how to keep your model files secure. When you use GPU as target device, veTurboIO can decrypt the model file on the fly.
106 |
107 |
108 | ## License
109 |
110 | [Apache License 2.0](./LICENSE)
111 |
112 |
--------------------------------------------------------------------------------
/README.zh.md:
--------------------------------------------------------------------------------
1 | # veTurboIO
2 |
3 |
4 | [en](./README.md) | [中文](./README.zh.md)
5 |
6 |
7 | 一个由 Volcano Engine 开发的用于高性能读写 PyTorch 模型文件的 Python 库。该库主要基于 safetensors 文件格式实现,以实现对张量数据的高效存储和读取。
8 |
9 | ## 安装
10 |
11 | 可以直接通过以下方式安装:
12 | ```bash
13 | pip install veturboio -f https://veturbo-cn-beijing.tos-cn-beijing.volces.com/veturboio/index.html --no-build-isolation
14 | ```
15 |
16 | 提示:此指令会优先下载与当前 Python 和 PyTorch 版本匹配的 whl 文件,如果没有找到匹配的 whl 文件,会自动下载源码进行编译安装。
17 |
18 | 如果安装失败,也可以尝试通过下载源码安装,然后手动编译安装。
19 | ```bash
20 | # CUDA ops, default
21 | python setup.py install --cuda_ext
22 |
23 | # NPU ops
24 | python setup.py install --npu_ext
25 |
26 | # CPU only
27 | python setup.py install --cpu_ext
28 | ```
29 |
30 | ## 快速开始
31 |
32 | ### 读写模型文件
33 |
34 |
35 | ```python
36 | import torch
37 | import veturboio
38 |
39 | tensors = {
40 | "weight1": torch.zeros((1024, 1024)),
41 | "weight2": torch.zeros((1024, 1024))
42 | }
43 |
44 | veturboio.save_file(tensors, "model.safetensors")
45 |
46 | new_tensors = veturboio.load("model.safetensors")
47 |
48 | # check if the tensors are the same
49 | for k, v in tensors.items():
50 | assert torch.allclose(v, new_tensors[k])
51 | ```
52 |
53 | ## 转换已有 PyTorch 文件
54 |
55 | ```bash
56 | python -m veturboio.convert -i model.pt -o model.safetensors
57 | ```
58 |
59 | ## 性能测试
60 |
61 | 直接运行:
62 | ```bash
63 | bash bench/io_bench.sh
64 | ```
65 |
66 | 接下来,你可以获得如下的结果:
67 | ```
68 | fs_name tensor_size veturboio load_time(s) torch load_time(s)
69 | shm 1073741824 0.08 0.63
70 | shm 2147483648 0.19 1.26
71 | shm 4294967296 0.36 2.32
72 | ```
73 |
74 | ## 进阶功能
75 |
76 | ### 使用 veMLP 加速读写
77 | Volcano Engine Machine Learning Platform (veMLP) 提供了基于 GPU 集群的物理磁盘的分布式缓存文件系统。
78 |
79 |
80 |
81 |
82 |
83 | 当集群级任务需要读取模型文件时,缓存系统可以通过 RDMA 传输高效地在 GPU 机器之间分发模型文件,从而避免网络传输瓶颈。使用此系统时,veTurboIO 可以最大化其性能优势。
84 |
85 |
86 | ### 加密和解密模型文件
87 |
88 | veTurboIO 支持模型文件的加密和解密。您可以阅读[教程]([tutorial](./docs/encrypt_model.md))以了解如何保护您的模型文件。当您使用 GPU 作为目标设备时,veTurboIO 可以实时解密模型文件。
89 |
90 | ## 许可证
91 |
92 | [Apache License 2.0](./LICENSE)
93 |
--------------------------------------------------------------------------------
/bench/io_bench.py:
--------------------------------------------------------------------------------
1 | '''
2 | Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
3 |
4 | Licensed under the Apache License, Version 2.0 (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | '''
16 |
17 | import argparse
18 | import os
19 | import time
20 | from functools import lru_cache
21 |
22 | import numpy as np
23 | import torch
24 |
25 | import veturboio
26 |
27 |
28 | def human_read_to_byte(size):
29 | factors = {
30 | 'B': 1,
31 | 'KB': 1024,
32 | 'MB': 1048576,
33 | 'GB': 1073741824,
34 | 'TB': 1099511627776,
35 | 'PB': 1125899906842624,
36 | 'EB': 1152921504606846976,
37 | 'ZB': 1180591620717411303424,
38 | 'YB': 1208925819614629174706176,
39 | }
40 | if size[-2:] in factors:
41 | return factors[size[-2:]] * int(size[:-2])
42 | elif size[-1:] in factors:
43 | return int(size[:-1])
44 | else:
45 | return int(size)
46 |
47 |
48 | def parse_args():
49 | parser = argparse.ArgumentParser(
50 | description='benchmark veturboio, notice to clear page cache manually when benchmarking for existing file'
51 | )
52 | parser.add_argument(
53 | '--begin',
54 | default='1048576',
55 | dest='begin',
56 | help='specify the minimum file size to benchmark in bytes or in format like xxKB/MB/GB',
57 | )
58 | parser.add_argument(
59 | '--end',
60 | default='1048576',
61 | dest='end',
62 | help='specify the maximum file size to benchmark in bytes or in format like xxKB/MB/GB',
63 | )
64 | parser.add_argument('--base_dir', dest='base_dir', help='specify the the base dir of files to be benchmarked')
65 | parser.add_argument('--fs_name', default='local_fs', help='file system name that would be displayed in the result')
66 | parser.add_argument('--gen_data', default=False, action=argparse.BooleanOptionalAction, dest='gen_data')
67 | parser.add_argument(
68 | '--map_location', default='cpu', dest='map_location', help='map location of tensor to be loaded'
69 | )
70 | parser.add_argument('--use_pinmem', default=False, action=argparse.BooleanOptionalAction, dest='use_pinmem')
71 | parser.add_argument(
72 | '--load_mode', default='veturboio', dest='load_mode', help='load modes specified, seperated by comma'
73 | )
74 |
75 | args = parser.parse_args()
76 | return args
77 |
78 |
79 | def print_header(load_modes):
80 | mode_list = list(map(lambda mode: f"{mode}{' load_time(s)' + ' ':<25}", load_modes))
81 | print(f"{'fs_name' + ' ':<10} {'tensor_size' + ' ':<15}", ' '.join(mode_list))
82 |
83 |
84 | def print_load_time(fs_name, tensor_size, load_times):
85 | load_times = list(map(lambda load_time: f"{load_time}{' ':<30}", load_times))
86 | print(f"{fs_name:<10} {str(tensor_size):<15}", ' '.join(load_times))
87 |
88 |
89 | def sfcs_env():
90 | os.environ['SFCS_FSNAME'] = 'byted-cpu-sfcs'
91 | os.environ['SFCS_REGION'] = 'cn-beijing'
92 | os.environ['SFCS_ACCESS_KEY'] = os.environ['CI_SFCS_AK']
93 | os.environ['SFCS_SECRET_KEY'] = os.environ['CI_SFCS_SK']
94 | os.environ['SFCS_AUTHENTICATION_SERVICE_NAME'] = 'cfs'
95 | os.environ['SFCS_NS_ID'] = '18014398509481988'
96 | os.environ['SFCS_UFS_PATH'] = 'tos://yinzq-bucket/'
97 | os.environ['SFCS_MULTI_NIC_WHITELIST'] = 'eth0'
98 | os.environ['SFCS_NETWORK_SEGMENT'] = '172.31.128.0/17'
99 | os.environ['SFCS_NAMENODE_ENDPOINT_ADDRESS'] = '100.67.19.231'
100 | os.environ['SFCS_LOG_SEVERITY'] = 'ERROR'
101 |
102 |
103 | def main():
104 | args = parse_args()
105 | if args.base_dir.startswith('sfcs://'):
106 | sfcs_env()
107 | load_modes = args.load_mode.split(',')
108 | # warmup GPU otherwise the first case would be slow
109 | device = torch.device(args.map_location)
110 | if device.type == "cuda":
111 | file_path = os.path.join(args.base_dir if args.base_dir else "", 'warmup.safetensors')
112 | tensors = {"weight": torch.randn(10)}
113 | veturboio.save_file(tensors, file_path)
114 | veturboio.load(file_path, map_location=args.map_location, use_pinmem=args.use_pinmem)
115 | print_header(load_modes)
116 | tensor_size = human_read_to_byte(args.begin)
117 | end_size = human_read_to_byte(args.end)
118 | while tensor_size <= end_size:
119 | if args.gen_data:
120 | numel = tensor_size // np.dtype(float).itemsize * 2
121 | tensors = {"weight": torch.randn(numel)}
122 | load_times = []
123 | for mode in load_modes:
124 | if mode == 'veturboio':
125 | file_path = os.path.join(args.base_dir if args.base_dir else "", f'{tensor_size}.safetensors')
126 | if args.gen_data:
127 | veturboio.save_file(tensors, file_path)
128 |
129 | start = time.time()
130 | loaded_tensor = veturboio.load(file_path, map_location=args.map_location, use_pinmem=args.use_pinmem)
131 | if mode == 'torch':
132 | file_path = os.path.join(args.base_dir if args.base_dir else "", f'{tensor_size}.pt')
133 | if args.gen_data:
134 | veturboio.save_pt(tensors, file_path)
135 |
136 | start = time.time()
137 |
138 | loaded_tensor = veturboio.load(file_path, map_location=args.map_location)
139 | end = time.time()
140 | load_times.append("%.2f" % (end - start))
141 |
142 | if device.type == "cuda":
143 | del loaded_tensor
144 | torch.cuda.empty_cache()
145 |
146 | print_load_time(args.fs_name, tensor_size, load_times)
147 | tensor_size = tensor_size * 2
148 |
149 |
150 | if __name__ == '__main__':
151 | main()
152 |
--------------------------------------------------------------------------------
/bench/io_bench.sh:
--------------------------------------------------------------------------------
1 | ###
2 | # Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | ###
16 |
17 | # shm
18 | mkdir -p /dev/shm/test_files
19 | python bench/io_bench.py --load_mode=veturboio,torch --base_dir=/dev/shm/test_files --begin=1GB --end=4GB --gen_data --fs_name=shm
20 |
21 | # sfcs
22 | python bench/io_bench.py --load_mode=veturboio,torch --base_dir=sfcs:// --begin=1GB --end=4GB --gen_data --fs_name=sfcs
23 |
--------------------------------------------------------------------------------
/docs/encrypt_model.md:
--------------------------------------------------------------------------------
1 | # 加解密模型文件
2 |
3 | 该库底层通过两种接口读写:SFCS SDK 和 POSIX。如果文件路径前缀为 `sfcs://` 就视为使用 SFCS SDK,所需的鉴权信息可以从火山引擎可信服务的 `unix domain socket` 获取或者设置以下三个环境变量:
4 |
5 | | 环境变量名 | 含义 |
6 | | ------------------------------ | --------------------------------- |
7 | | SFCS_ACCESS_KEY | SFCS 文件系统的 AK |
8 | | SFCS_SECRET_KEY | SFCS 文件系统的 SK |
9 | | SFCS_NAMENODE_ENDPOINT_ADDRESS | SFCS 文件系统 NameNode 地址 |
10 |
11 |
12 | 加解密读写模型文件需要 data key 和 iv,有 3 种获取方式,读取优先级按照下列顺序:
13 | - [1] 加密的 data key 和 iv 存放在密文模型文件的 header 中,使用火山引擎 KMS 解密得到明文的 data key。
14 | - [1.1] 访问 KMS 所需的 AK/SK/ST 从火山引擎可信服务的 unix domain socket 获取,需要额外挂载。
15 | - [1.2] 访问 KMS 所需的 AK/SK/ST 从环境变量获取。
16 | - [2] 访问火山引擎可信服务的 unix domain socket 直接获取 data key 和 iv,需要额外挂载。
17 | - [3] 通过环境变量直接设置 data key 和 iv。
18 |
19 | 不同方式需要设置的环境变量如下:
20 |
21 | | 环境变量名 | 含义 |
22 | | ------------------------------ | --------------------------------- |
23 | | VETURBOIO_KMS_HOST | [1] KMS 服务地址,默认值 open.volcengineapi.com|
24 | | VETURBOIO_KMS_REGION | [1] KMS 服务所在区域,默认值 cn-beijing |
25 | | VETURBOIO_KMS_KEYRING_NAME | [1] KMS 服务解密 data key 的钥匙环名 |
26 | | VETURBOIO_KMS_KEY_NAME | [1] KMS 服务解密 data key 的主密钥名 |
27 | | DATAPIPE_SOCKET_PATH | [1.1][2] 可信服务 uds 的路径 |
28 | | VETURBOIO_KMS_ACCESS_KEY | [1.2] KMS 鉴权的 AK |
29 | | VETURBOIO_KMS_SECRET_KEY | [1.2] KMS 鉴权的 SK |
30 | | VETURBOIO_KMS_SESSION_TOKEN | [1.2] KMS 鉴权的临时令牌,非必需|
31 | | VETURBOIO_KEY | [3] 加解密的 128 位数据密钥的 base64 编码 |
32 | | VETURBOIO_IV | [3] 加解密的 128 位初始向量的 base64 编码 |
33 |
34 |
35 | 按照上述三种方式设置好后,可以参考下面代码在读写模型文件时启用加解密:
36 | ```python
37 | import torch
38 | import veturboio
39 |
40 | tensors = {
41 | "weight1": torch.zeros((1024, 1024)),
42 | "weight2": torch.zeros((1024, 1024))
43 | }
44 |
45 | # use cpu to encrypt
46 | veturboio.save_file(tensors, "sfcs://model.safetensors", use_cipher=True)
47 |
48 | # use cpu to decrypt if map_location is cpu
49 | reloaded_tensor1 = veturboio.load("sfcs://model.safetensors", map_location="cpu", use_cipher=True)
50 |
51 | # use gpu to decrypt if map_location is cuda
52 | reloaded_tensor2 = veturboio.load("sfcs://model.safetensors", map_location="cuda:0", use_cipher=True)
53 |
54 | # check if the tensors are the same
55 | for k, v in tensors.items():
56 | assert torch.allclose(v, reloaded_tensor1[k])
57 | for k, v in tensors.items():
58 | assert torch.allclose(v, reloaded_tensor2[k])
59 | ```
60 |
61 |
--------------------------------------------------------------------------------
/docs/imgs/SFCS.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/volcengine/veTurboIO/0ed05516013da4332433784660660ca4d1904505/docs/imgs/SFCS.png
--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
1 | # veTurboIO
2 |
3 | 火山引擎研发的一款用于高性能读写 PyTorch 模型文件的 Python 库。该库实现了主要基于 safetensors 文件格式,实现高效的存储与读取张量数据。
4 |
5 | ## 安装
6 |
7 | ```bash
8 | cd veturboio
9 | python setup.py install
10 | ```
11 |
12 | ## 快速开始
13 |
14 | ```python
15 | import torch
16 | import veturboio
17 |
18 | tensors = {
19 | "weight1": torch.zeros((1024, 1024)),
20 | "weight2": torch.zeros((1024, 1024))
21 | }
22 |
23 | veturboio.save_file(tensors, "model.safetensors")
24 |
25 | reloaded_tensor = veturboio.load("model.safetensors", map_location="cpu")
26 |
27 | # check if the tensors are the same
28 | for k, v in tensors.items():
29 | assert torch.allclose(v, reloaded_tensor[k])
30 | ```
31 |
32 | ### 使用锁页内存加速连续加载数据到GPU
33 | ```python
34 | import torch
35 | import veturboio
36 |
37 | tensors1 = {
38 | "weight1": torch.zeros((1024, 1024)),
39 | "weight2": torch.zeros((1024, 1024))
40 | }
41 |
42 | veturboio.save_file(tensors1, "model1.safetensors")
43 |
44 | tensors2 = {
45 | "weight1": torch.zeros((1024, 1024)),
46 | "weight2": torch.zeros((1024, 1024))
47 | }
48 |
49 | veturboio.save_file(tensors2, "model2.safetensors")
50 |
51 | helper = veturboio.init_io_helper()
52 | reloaded_tensor1 = veturboio.load("model1.safetensors", map_location="cuda:0", use_pinmem=True, helper=helper)
53 | # the map_location may be different
54 | reloaded_tensor2 = veturboio.load("model2.safetensors", map_location="cuda:0", use_pinmem=True, helper=helper)
55 |
56 | # check if the tensors are the same
57 | for k, v in tensors1.items():
58 | assert torch.allclose(v.cuda(), reloaded_tensor1[k])
59 | for k, v in tensors2.items():
60 | assert torch.allclose(v.cuda(), reloaded_tensor2[k])
61 | ```
62 |
63 | ### 转换现有的 PyTorch 文件
64 | ```bash
65 | python -m veturboio.convert -i model.pt -o model.safetensors
66 | ```
67 |
68 |
69 | ## 特性
70 |
71 | - 多线程读取文件;
72 | - zero-copy 读取,不额外花费内存;
73 | - 支持直接加载到 CUDA;
74 | - BFloat16 数值支持;
75 | - 固定 pinmem 用于快速反复读取;
76 | - 兼容 PyTorch 标准格式(无性能提升);
77 | - 兼容 safetensors 格式;
78 |
79 | ## 收益
80 |
81 | 标准的 PyTorch 模型文件会经过 zip 与 pickle 两次操作,这两个操作极大的抑制了读取的速度,同时 unpickle 也会带来潜在的不安全性。我们使用一种自定义的模型格式来存储 tensor 数据,希望可以改善 PyTorch 标准格式所存在的这些问题。目前已经实现的优点有:
82 |
83 | - 多线程读取:当前文件对象主要的存放点为云端存储,单一进程无法达到云存储的带宽上限,必须使用多线程读取才能达到最大的读取速度。PyTorch 标准格式的读取速度受限于 pickle 解析速度,远无法达到云存储的速度上限;
84 | - 云端适配:基于火山引擎的云端存储(vePFS、SFCS)特性,最大化的利用了云端存储的带宽;
85 | - 安全性:不再使用 pickle 对象,避免了 pickle 的安全性问题;
86 |
87 |
--------------------------------------------------------------------------------
/docs/pin_mem.md:
--------------------------------------------------------------------------------
1 | ### 使用锁页内存加速连续加载数据到GPU
2 | ```python
3 | import torch
4 | import veturboio
5 |
6 | tensors1 = {
7 | "weight1": torch.zeros((1024, 1024)),
8 | "weight2": torch.zeros((1024, 1024))
9 | }
10 |
11 | veturboio.save_file(tensors1, "model1.safetensors")
12 |
13 | tensors2 = {
14 | "weight1": torch.zeros((1024, 1024)),
15 | "weight2": torch.zeros((1024, 1024))
16 | }
17 |
18 | veturboio.save_file(tensors2, "model2.safetensors")
19 |
20 | helper = veturboio.init_io_helper()
21 | reloaded_tensor1 = veturboio.load("model1.safetensors", map_location="cuda:0", use_pinmem=True, helper=helper)
22 | # the map_location may be different
23 | reloaded_tensor2 = veturboio.load("model2.safetensors", map_location="cuda:0", use_pinmem=True, helper=helper)
24 |
25 | # check if the tensors are the same
26 | for k, v in tensors1.items():
27 | assert torch.allclose(v.cuda(), reloaded_tensor1[k])
28 | for k, v in tensors2.items():
29 | assert torch.allclose(v.cuda(), reloaded_tensor2[k])
30 | ```
31 |
32 |
--------------------------------------------------------------------------------
/mkdocs.yml:
--------------------------------------------------------------------------------
1 | site_name: "veTurboIO"
2 |
3 | theme:
4 | name: "material"
5 |
6 | docs_dir: docs
7 |
8 | nav:
9 | - 首页: index.md
10 | - 最佳实践:
11 | - 动态加载: dynamic_load.md
12 | - SFCS 加载优化: sfcs_support.md
13 | - API: api.md
14 | - 发布日志: release.md
15 |
16 | plugins:
17 | - mkdocstrings:
18 | default_handler: python
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 |
2 | [tool.isort]
3 | profile = "black" # black-compatible
4 | line_length = 119 # should match black parameters
5 | py_version = 310 # python 3.10 as a target version
6 | sections = ["FUTURE", "STDLIB", "THIRDPARTY", "FIRSTPARTY", "LOCALFOLDER"]
7 | default_section = "THIRDPARTY"
8 |
9 |
10 | [tool.black]
11 | line_length = 119
12 | skip_string_normalization = true
13 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | '''
2 | Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
3 |
4 | Licensed under the Apache License, Version 2.0 (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | '''
16 |
17 | import os
18 | import platform
19 | import sys
20 |
21 | import requests
22 | import setuptools
23 | import torch
24 | from pkg_resources import parse_version
25 | from setuptools import Extension, find_packages, setup
26 | from torch.utils.cpp_extension import BuildExtension, CppExtension, include_paths
27 |
28 | # initialize variables for compilation
29 | IS_LINUX = platform.system() == "Linux"
30 | IS_DARWIN = platform.system() == "Darwin"
31 | IS_WINDOWS = platform.system() == "Windows"
32 |
33 | this_dir = os.path.dirname(os.path.abspath(__file__))
34 |
35 |
36 | def get_option():
37 | if os.getenv("NPU_EXTENSION_ENABLED", "0") == "1":
38 | sys.argv.append("--npu_ext")
39 | elif "--cuda_ext" not in sys.argv and "--npu_ext" not in sys.argv and "--cpu_ext" not in sys.argv:
40 | print(
41 | '''No known extension specified, default to use --cuda_ext. Currently supported:
42 | --cuda_ext
43 | --npu_ext
44 | --cpu_ext'''
45 | )
46 | sys.argv.append("--cuda_ext")
47 |
48 |
49 | def get_version():
50 | import importlib.util
51 |
52 | spec = importlib.util.spec_from_file_location("version", os.path.join("veturboio", "version.py"))
53 | m = importlib.util.module_from_spec(spec)
54 | spec.loader.exec_module(m)
55 |
56 | if "--cpu_ext" in sys.argv:
57 | return m.__version__ + "+cpu"
58 | elif "--npu_ext" in sys.argv:
59 | return m.__version__ + "+npu"
60 | else:
61 | return m.__version__
62 |
63 |
64 | def make_relative_rpath(path):
65 | if IS_DARWIN:
66 | return '-Wl,-rpath,@loader_path/' + path
67 | elif IS_WINDOWS:
68 | return ''
69 | else:
70 | return '-Wl,-rpath,$ORIGIN/' + path
71 |
72 |
73 | def get_veturboio_extension():
74 | get_option()
75 | # prevent ninja from using too many resources
76 | try:
77 | import psutil
78 |
79 | num_cpu = len(psutil.Process().cpu_affinity())
80 | cpu_use = max(4, num_cpu - 1)
81 | except (ModuleNotFoundError, AttributeError):
82 | cpu_use = 4
83 |
84 | os.environ.setdefault("MAX_JOBS", str(cpu_use))
85 | # os.environ.setdefault("TORCH_CUDA_ARCH_LIST", "8.0;8.6")
86 | os.environ["TORCH_CUDA_ARCH_LIST"] = "8.0;8.6"
87 |
88 | define_macros = []
89 |
90 | # Before PyTorch1.8.0, when compiling CUDA code, `cxx` is a
91 | # required key passed to PyTorch. Even if there is no flag passed
92 | # to cxx, users also need to pass an empty list to PyTorch.
93 | # Since PyTorch1.8.0, it has a default value so users do not need
94 | # to pass an empty list anymore.
95 | # More details at https://github.com/pytorch/pytorch/pull/45956
96 | extra_compile_args = {'cxx': ['-fvisibility=hidden'], 'nvcc': ['-O3']}
97 |
98 | if parse_version(torch.__version__) <= parse_version('1.12.1'):
99 | extra_compile_args['cxx'].append('-std=c++14')
100 | else:
101 | extra_compile_args['cxx'].append('-std=c++17')
102 |
103 | name = "veturboio_ext"
104 |
105 | sources = [
106 | "veturboio/ops/csrc/pybind.cpp",
107 | "veturboio/ops/csrc/posix.cpp",
108 | "veturboio/ops/csrc/sfcs.cpp",
109 | "veturboio/ops/csrc/io_helper_cpu_common.cpp",
110 | "veturboio/ops/csrc/cipher.cpp",
111 | ]
112 |
113 | include_dirs = include_paths()
114 | include_dirs.append("veturboio/ops/csrc/include")
115 |
116 | torch_dir = os.path.join(os.path.dirname(torch.__file__), "lib")
117 | library_dirs = [torch_dir]
118 | library_dirs.append("veturboio/ops/csrc/lib")
119 |
120 | libraries = ["cloudfs", ":libfastcrypto_gpu.so.0.3"]
121 |
122 | extra_link_args = [make_relative_rpath("veturboio/ops/csrc/lib")]
123 |
124 | # Refer to: https://github.com/pytorch/pytorch/blob/main/torch/utils/cpp_extension.py#L918
125 | # In torch 2.0, this flag is False, and the *.so lib set this flag as False when building.
126 | # In newer torch, this flag is True, to keep compatibility with *.so lib, we set it False
127 | # to generate g++ flags '-D_GLIBCXX_USE_CXX11_ABI=0' when building veturboio_ext, otherwise
128 | # some 'undefine symbol' error of std::string will be thrown.
129 | torch._C._GLIBCXX_USE_CXX11_ABI = False
130 |
131 | if "--cuda_ext" in sys.argv:
132 | sys.argv.remove("--cuda_ext")
133 |
134 | extra_compile_args['nvcc'].append('-O3')
135 |
136 | sources.append("veturboio/ops/csrc/io_helper.cu")
137 |
138 | define_macros.append(("USE_CUDA", "1"))
139 |
140 | from torch.utils.cpp_extension import CUDAExtension
141 |
142 | return CUDAExtension(
143 | name=name,
144 | sources=sources,
145 | define_macros=define_macros,
146 | include_dirs=include_dirs,
147 | library_dirs=library_dirs,
148 | libraries=libraries,
149 | extra_compile_args=extra_compile_args,
150 | extra_link_args=extra_link_args,
151 | )
152 | else:
153 | extra_compile_args['cxx'].append('-O3')
154 |
155 | libraries.append("torch_cpu")
156 | libraries.append("torch_python")
157 |
158 | extra_link_args.append(f"-Wl,--rpath={torch_dir},--enable-new-dtags")
159 |
160 | if "--npu_ext" in sys.argv:
161 | sys.argv.remove("--npu_ext")
162 |
163 | sources.append("veturboio/ops/csrc/io_helper_npu.cpp")
164 | define_macros.append(("USE_NPU", "1"))
165 |
166 | return Extension(
167 | name=name,
168 | sources=sources,
169 | define_macros=define_macros,
170 | include_dirs=include_dirs,
171 | library_dirs=library_dirs,
172 | libraries=libraries,
173 | extra_compile_args=extra_compile_args,
174 | extra_link_args=extra_link_args,
175 | )
176 | elif "--cpu_ext" in sys.argv:
177 | sys.argv.remove("--cpu_ext")
178 |
179 | sources.append("veturboio/ops/csrc/io_helper_cpu.cpp")
180 |
181 | return Extension(
182 | name=name,
183 | sources=sources,
184 | define_macros=define_macros,
185 | include_dirs=include_dirs,
186 | library_dirs=library_dirs,
187 | libraries=libraries,
188 | extra_compile_args=extra_compile_args,
189 | extra_link_args=extra_link_args,
190 | )
191 |
192 |
193 | class GetLibCfsCommand(setuptools.Command):
194 | """get libcfs from url"""
195 |
196 | description = 'get libcfs from url'
197 | user_options = [('src=', 's', 'source url of libcloudfs.so'), ('dst=', 'd', 'dest filepath of libcloudfs.so')]
198 |
199 | def initialize_options(self):
200 | from veturboio.utils.load_veturboio_ext import LIBCFS_DEFAULT_PATH, LIBCFS_DEFAULT_URL
201 |
202 | self.src = LIBCFS_DEFAULT_URL
203 | self.dst = LIBCFS_DEFAULT_PATH
204 |
205 | def finalize_options(self):
206 | pass
207 |
208 | def run(self):
209 | print(f"download libcloudfs.so from {self.src}, save to {self.dst}")
210 | r = requests.get(self.src, timeout=60)
211 | with open(self.dst, 'wb') as f:
212 | f.write(r.content)
213 |
214 |
215 | setup(
216 | name="veturboio",
217 | version=get_version(),
218 | description="Effcient PyTorch IO libraray on Volcanic Engine",
219 | author="AML Team",
220 | ext_modules=[get_veturboio_extension()],
221 | packages=find_packages(exclude=("veturboio.ops.csrc.common.sfcs.lib")),
222 | install_requires=[
223 | "safetensors",
224 | "numpy",
225 | "netifaces",
226 | "loguru",
227 | "requests-unixsocket",
228 | "requests",
229 | ],
230 | include_package_data=True,
231 | cmdclass={"get_libcfs": GetLibCfsCommand, "build_ext": BuildExtension},
232 | dependency_links=['https://mirrors.ivolces.com/pypi/'],
233 | )
234 |
--------------------------------------------------------------------------------
/tests/test_assert_exceptions.py:
--------------------------------------------------------------------------------
1 | '''
2 | Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
3 |
4 | Licensed under the Apache License, Version 2.0 (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | '''
16 |
17 | import os
18 | import tempfile
19 | import unittest
20 | from unittest import TestCase
21 |
22 | import torch
23 |
24 | import veturboio
25 |
26 |
27 | class TestAssertException(TestCase):
28 | @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
29 | def test_modify_use_pinmem_attr(self):
30 | helper = veturboio.init_io_helper()
31 | with tempfile.TemporaryDirectory() as tmpdirname:
32 | filepath = os.path.join(tmpdirname, "model.safetensors")
33 | veturboio.save_file(self.tensors, filepath)
34 |
35 | with self.assertRaises(Exception) as context:
36 | veturboio.load(filepath, map_location="cuda:0", use_pinmem=False, helper=helper)
37 | veturboio.load(filepath, map_location="cuda:0", use_pinmem=True, helper=helper)
38 | self.assertTrue(
39 | 'use_pinmem attribute of an exising IOHelper should not be changed' in str(context.exception)
40 | )
41 |
42 | @classmethod
43 | def setUpClass(cls):
44 | cls.tensors = {
45 | "weight1": torch.randn(20, 10),
46 | "weight2": torch.randn(20, 10),
47 | }
48 |
--------------------------------------------------------------------------------
/tests/test_convert.py:
--------------------------------------------------------------------------------
1 | '''
2 | Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
3 |
4 | Licensed under the Apache License, Version 2.0 (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | '''
16 |
17 | import os
18 | import tempfile
19 | from unittest import TestCase
20 |
21 | import torch
22 |
23 | import veturboio
24 |
25 |
26 | class TestConvertUtil(TestCase):
27 | def test_convert(self):
28 | with tempfile.TemporaryDirectory() as tmpdirname:
29 | filepath = os.path.join(tmpdirname, "model.pt")
30 | torch.save(self.tensors, filepath)
31 | convertpath = os.path.join(tmpdirname, "model.safetensors")
32 |
33 | print(f"python -m veturboio.convert -i {filepath} -o {convertpath}")
34 | os.system(f"python -m veturboio.convert -i {filepath} -o {convertpath}")
35 |
36 | loaded_tensors = veturboio.load(convertpath)
37 | for key in self.tensors.keys():
38 | self.assertTrue(torch.allclose(self.tensors[key], loaded_tensors[key]))
39 |
40 | @classmethod
41 | def setUpClass(cls):
42 | cls.tensors = {
43 | "weight1": torch.randn(20, 10),
44 | "weight2": torch.randn(20, 10),
45 | }
46 |
--------------------------------------------------------------------------------
/tests/test_fetch_cipher.py:
--------------------------------------------------------------------------------
1 | '''
2 | Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
3 |
4 | Licensed under the Apache License, Version 2.0 (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | '''
16 |
17 | import base64
18 | import http.server
19 | import json
20 | import os
21 | import socketserver
22 | import tempfile
23 | import threading
24 | from datetime import datetime, timedelta
25 | from time import sleep
26 | from unittest import TestCase
27 |
28 | import numpy as np
29 |
30 | from veturboio.ops.cipher import CipherInfo, DataPipeClient
31 | from veturboio.ops.sfcs_utils import (
32 | SFCS_OPT_ENV_LIST,
33 | SFCS_REQ_ENV_LIST,
34 | credentials_helper,
35 | generate_sfcs_conf_xml,
36 | init_sfcs_conf,
37 | )
38 |
39 |
40 | class UnixSocketHttpServer(socketserver.UnixStreamServer):
41 | def get_request(self):
42 | request, client_address = super().get_request()
43 | return (request, ["local", 0])
44 |
45 |
46 | class DatapipeHandler(http.server.SimpleHTTPRequestHandler):
47 | def do_POST(self):
48 | action = self.headers.get('X-Datapipe-Task-Type')
49 | if action == 'top':
50 | # mock kms response
51 | self.send_response(200)
52 | self.send_header('Content-Type', 'application/json')
53 | self.end_headers()
54 | res = {'Result': {'Plaintext': base64.b64encode(b'abcdefgh87654321').decode('ascii')}}
55 | self.wfile.write(bytes(json.dumps(res), encoding='ascii'))
56 | return
57 | self.send_response(400)
58 | self.end_headers()
59 | return
60 |
61 | def do_GET(self):
62 | action = self.headers.get('X-Datapipe-Task-Type')
63 | if action == 'ping':
64 | self.send_response(200)
65 | self.send_header('Content-Type', 'application/json')
66 | self.end_headers()
67 | self.wfile.write(bytes(json.dumps({'message': 'pong'}), encoding='ascii'))
68 | return
69 | if action == 'encrypt-key':
70 | self.send_response(200)
71 | self.send_header('Content-Type', 'application/json')
72 | self.end_headers()
73 | self.wfile.write(
74 | bytes(
75 | json.dumps({'Key': 'YWJjZGVmZ2gxMjM0NTY3OA==', 'IV': 'MTIzNDU2Nzg4NzY1NDMyMQ=='}), encoding='ascii'
76 | )
77 | )
78 | return
79 | if action == 'sfcs-sts':
80 | self.send_response(200)
81 | self.send_header('Content-Type', 'application/json')
82 | self.end_headers()
83 | date_now = datetime.now()
84 | date_exp = date_now + timedelta(seconds=4)
85 | res = {
86 | 'Cred': {
87 | 'CurrentTime': date_now.isoformat(),
88 | 'ExpiredTime': date_exp.isoformat(),
89 | 'AccessKeyId': 'A' * 12,
90 | 'SecretAccessKey': 'S' * 12,
91 | 'SessionToken': 'ST' * 12, # fake SessionToken real one is longer
92 | },
93 | 'SfcsNameNodeAddress': '100.67.19.231',
94 | }
95 | self.wfile.write(bytes(json.dumps(res), encoding='ascii'))
96 | return
97 | if action == 'kms-sts':
98 | self.send_response(200)
99 | self.send_header('Content-Type', 'application/json')
100 | self.end_headers()
101 | res = {
102 | 'Cred': {
103 | 'AccessKeyId': os.environ['CI_VENDOR_AK'],
104 | 'SecretAccessKey': os.environ['CI_VENDOR_AK'],
105 | 'SessionToken': '',
106 | },
107 | }
108 | self.wfile.write(bytes(json.dumps(res), encoding='ascii'))
109 | return
110 | self.send_response(400)
111 | self.end_headers()
112 | return
113 |
114 |
115 | class TestCipherInfo(TestCase):
116 | @classmethod
117 | def setUpClass(cls):
118 | cls.sock_dir = tempfile.TemporaryDirectory()
119 | cls.server_address = os.path.join(cls.sock_dir.name, 'datapipe.sock')
120 | cls.server = UnixSocketHttpServer(cls.server_address, DatapipeHandler, bind_and_activate=True)
121 |
122 | def run():
123 | cls.server.serve_forever()
124 |
125 | cls.thread = threading.Thread(target=run)
126 | cls.thread.start()
127 | cls.target_key = np.frombuffer(b'abcdefgh12345678', dtype=np.byte)
128 | cls.target_key_2 = np.frombuffer(b'abcdefgh87654321', dtype=np.byte)
129 | cls.target_iv = np.frombuffer(b'1234567887654321', dtype=np.byte)
130 |
131 | def test_fetch_from_file_header(self):
132 | os.environ.pop('VETURBOIO_KEY', None)
133 | os.environ.pop('VETURBOIO_IV', None)
134 | DataPipeClient.DATAPIPE_SOCKET_PATH = '/path/not/exist'
135 |
136 | header_dict = {
137 | 'mode': 'CTR-128',
138 | 'iv': 'MTIzNDU2Nzg4NzY1NDMyMQ==',
139 | 'meta_data_key': 'bl2htKYLQ2+CjyyJ84Q3twAA9ZpCbFxwznRb0NkR9zGGRp1RK5Mb9u8NNOiahY+0yVrxNw3IVQ9Wgn6PDscw77Cb3eImjVn14hNBJRlwtSyQ7tRZLOsZBEHv5cWwDQ==',
140 | }
141 | header_bytes = bytearray(256 * 1024)
142 | header_str = 'Byte3ncryptM0del' + json.dumps(header_dict)
143 | header_bytes[: len(header_str)] = header_str.encode('utf-8')
144 |
145 | # case1: get kms cred from env
146 | ENV_KMS_HOST = 'VETURBOIO_KMS_HOST'
147 | ENV_KMS_REGION = 'VETURBOIO_KMS_REGION'
148 | ENV_KMS_AK = 'VETURBOIO_KMS_ACCESS_KEY'
149 | ENV_KMS_SK = 'VETURBOIO_KMS_SECRET_KEY'
150 | ENV_KMS_KEYRING = 'VETURBOIO_KMS_KEYRING_NAME'
151 | ENV_KMS_KEY = 'VETURBOIO_KMS_KEY_NAME'
152 | os.environ[ENV_KMS_HOST] = 'open.volcengineapi.com'
153 | os.environ[ENV_KMS_REGION] = 'cn-beijing'
154 | os.environ[ENV_KMS_AK] = os.environ['CI_VENDOR_AK']
155 | os.environ[ENV_KMS_SK] = os.environ['CI_VENDOR_SK']
156 | os.environ[ENV_KMS_KEYRING] = 'datapipe_keyring'
157 | os.environ[ENV_KMS_KEY] = 'datapipe_key_ml_maas'
158 | info = CipherInfo(True, header_bytes)
159 | self.assertTrue(info.use_cipher)
160 | self.assertTrue(info.use_header)
161 | self.assertTrue(np.array_equal(info.key, self.target_key))
162 | self.assertTrue(np.array_equal(info.iv, self.target_iv))
163 |
164 | # case2: get kms cred from datapipe and access kms with datapipe proxy
165 | os.environ.pop(ENV_KMS_HOST, None)
166 | os.environ.pop(ENV_KMS_REGION, None)
167 | os.environ.pop(ENV_KMS_AK, None)
168 | os.environ.pop(ENV_KMS_SK, None)
169 | DataPipeClient.DATAPIPE_SOCKET_PATH = self.server_address
170 | info = CipherInfo(True, header_bytes)
171 | self.assertTrue(info.use_cipher)
172 | self.assertTrue(info.use_header)
173 | self.assertTrue(np.array_equal(info.key, self.target_key_2))
174 | self.assertTrue(np.array_equal(info.iv, self.target_iv))
175 |
176 | def test_fetch_from_datapipe(self):
177 | DataPipeClient.DATAPIPE_SOCKET_PATH = self.server_address
178 | DataPipeClient.ENCRYPT_HEADER['X-Encrypt-Caller-Pod'] = 'test-pod-name'
179 | info = CipherInfo(True, None, '/maas_model/test_path')
180 | self.assertTrue(info.use_cipher)
181 | self.assertTrue(np.array_equal(info.key, self.target_key))
182 | self.assertTrue(np.array_equal(info.iv, self.target_iv))
183 |
184 | def test_fetch_from_env(self):
185 | DataPipeClient.DATAPIPE_SOCKET_PATH = '/path/not/exist'
186 | os.environ['VETURBOIO_KEY'] = base64.b64encode(b'abcdefgh12345678').decode('ascii')
187 | os.environ['VETURBOIO_IV'] = base64.b64encode(b'1234567887654321').decode('ascii')
188 | info = CipherInfo(True)
189 | self.assertTrue(info.use_cipher)
190 | self.assertTrue(np.array_equal(info.key, self.target_key))
191 | self.assertTrue(np.array_equal(info.iv, self.target_iv))
192 |
193 | def test_raise_error(self):
194 | DataPipeClient.DATAPIPE_SOCKET_PATH = '/path/not/exist'
195 | os.environ['VETURBOIO_KEY'] = base64.b64encode(b'abcdefgh12').decode('ascii')
196 | os.environ['VETURBOIO_IV'] = base64.b64encode(b'1234567887').decode('ascii')
197 | with self.assertRaises(RuntimeError):
198 | info = CipherInfo(True)
199 |
200 | @classmethod
201 | def tearDownClass(cls):
202 | os.environ.pop('VETURBOIO_KEY', None)
203 | os.environ.pop('VETURBOIO_IV', None)
204 | cls.server.shutdown()
205 | cls.server.server_close()
206 | cls.thread.join()
207 | cls.sock_dir.cleanup()
208 |
209 |
210 | class TestCredentials(TestCase):
211 | @classmethod
212 | def setUpClass(cls):
213 | cls.sock_dir = tempfile.TemporaryDirectory()
214 | cls.server_address = os.path.join(cls.sock_dir.name, 'datapipe.sock')
215 | cls.server = UnixSocketHttpServer(cls.server_address, DatapipeHandler, bind_and_activate=True)
216 |
217 | def run():
218 | cls.server.serve_forever()
219 |
220 | cls.thread = threading.Thread(target=run)
221 | cls.thread.start()
222 |
223 | def test_sfcs_sts(self):
224 | DataPipeClient.DATAPIPE_SOCKET_PATH = self.server_address
225 | client = DataPipeClient()
226 | cred = client.get_sfcs_ak_sk_st()
227 | self.assertIsNotNone(cred)
228 | self.assertEqual(cred['SfcsNameNodeAddress'], '100.67.19.231')
229 | cred = cred['Cred']
230 | self.assertEqual(cred['AccessKeyId'], 'A' * 12)
231 | self.assertEqual(cred['SecretAccessKey'], 'S' * 12)
232 | self.assertEqual(cred['SessionToken'], 'ST' * 12)
233 |
234 | def test_sfcs_conf(self):
235 | for e in SFCS_REQ_ENV_LIST:
236 | os.environ[e] = 'test-value'
237 | # case 1: env SFCS_ACCESS_KEY and SFCS_SECRET_KEY and SFCS_NAMENODE_ENDPOINT_ADDRESS exists
238 | os.environ['SFCS_ACCESS_KEY'] = 'A' * 12
239 | os.environ['SFCS_SECRET_KEY'] = 'S' * 12
240 | os.environ['SFCS_NAMENODE_ENDPOINT_ADDRESS'] = '100.67.19.231'
241 | sfcs_conf = os.path.join(os.getcwd(), 'base_model2.xml')
242 | if os.path.exists(sfcs_conf):
243 | os.remove(sfcs_conf)
244 | init_sfcs_conf('/base_model2/tensor.pt')
245 | self.assertEqual(os.environ['LIBCLOUDFS_CONF'], sfcs_conf)
246 | self.assertEqual(len(credentials_helper.threads), 0)
247 | self.assertEqual(len(credentials_helper.running), 0)
248 | self.assertTrue(os.path.exists(sfcs_conf))
249 | os.remove(sfcs_conf)
250 |
251 | # case 2: use datapipe socket to get and refresh ak, sk, st and namenode_ip
252 | DataPipeClient.DATAPIPE_SOCKET_PATH = self.server_address
253 | os.environ.pop('SFCS_ACCESS_KEY', None)
254 | os.environ.pop('SFCS_SECRET_KEY', None)
255 | os.environ.pop('SFCS_NAMENODE_ENDPOINT_ADDRESS', None)
256 | sfcs_conf3 = os.path.join(os.getcwd(), 'base_model3.xml')
257 | sfcs_conf4 = os.path.join(os.getcwd(), 'base_model4.xml')
258 | if os.path.exists(sfcs_conf3):
259 | os.remove(sfcs_conf3)
260 | if os.path.exists(sfcs_conf4):
261 | os.remove(sfcs_conf4)
262 | init_sfcs_conf('/base_model3/tensor.pt')
263 | init_sfcs_conf('/base_model4/tensor.pt')
264 | self.assertTrue('base_model3' in credentials_helper.threads)
265 | self.assertTrue('base_model4' in credentials_helper.threads)
266 | self.assertTrue(credentials_helper.running['base_model3'])
267 | self.assertTrue(credentials_helper.running['base_model4'])
268 | self.assertTrue(os.path.exists(sfcs_conf3))
269 | self.assertTrue(os.path.exists(sfcs_conf4))
270 | for i in range(5):
271 | os.remove(sfcs_conf3)
272 | os.remove(sfcs_conf4)
273 | sleep(3)
274 | self.assertTrue(os.path.exists(sfcs_conf3))
275 | self.assertTrue(os.path.exists(sfcs_conf4))
276 | print(credentials_helper.threads)
277 | os.remove(sfcs_conf3)
278 | os.remove(sfcs_conf4)
279 |
280 | def test_sfcs_conf_json(self):
281 | for e in SFCS_REQ_ENV_LIST:
282 | os.environ[e] = 'test-value'
283 | os.environ['SFCS_FSNAME'] = json.dumps({'base_model1': 'test-value1', 'base_model2': 'test-value2'})
284 | os.environ['SFCS_NS_ID'] = json.dumps({'base_model1': 'test-value1', 'base_model2': 'test-value2'})
285 | os.environ['SFCS_UFS_PATH'] = json.dumps({'base_model1': 'test-value1', 'base_model2': 'test-value2'})
286 | DataPipeClient.DATAPIPE_SOCKET_PATH = self.server_address
287 | os.environ.pop('SFCS_ACCESS_KEY', None)
288 | os.environ.pop('SFCS_SECRET_KEY', None)
289 | os.environ.pop('SFCS_NAMENODE_ENDPOINT_ADDRESS', None)
290 | sfcs_conf1 = os.path.join(os.getcwd(), 'base_model1.xml')
291 | sfcs_conf2 = os.path.join(os.getcwd(), 'base_model2.xml')
292 | if os.path.exists(sfcs_conf1):
293 | os.remove(sfcs_conf1)
294 | if os.path.exists(sfcs_conf2):
295 | os.remove(sfcs_conf2)
296 | init_sfcs_conf('/base_model1/tensor.pt')
297 | init_sfcs_conf('/base_model2/tensor.pt')
298 | self.assertTrue('base_model1' in credentials_helper.threads)
299 | self.assertTrue('base_model2' in credentials_helper.threads)
300 | self.assertTrue(credentials_helper.running['base_model1'])
301 | self.assertTrue(credentials_helper.running['base_model2'])
302 | self.assertTrue(os.path.exists(sfcs_conf1))
303 | self.assertTrue(os.path.exists(sfcs_conf2))
304 | for i in range(5):
305 | sleep(3)
306 | self.assertTrue(os.path.exists(sfcs_conf1))
307 | self.assertTrue(os.path.exists(sfcs_conf2))
308 | print(credentials_helper.threads)
309 | os.remove(sfcs_conf1)
310 | os.remove(sfcs_conf2)
311 |
312 | @classmethod
313 | def tearDownClass(cls):
314 | credentials_helper.stop()
315 | os.environ.pop('LIBCLOUDFS_CONF', None)
316 | for e in SFCS_REQ_ENV_LIST:
317 | os.environ.pop(e, None)
318 | for e in SFCS_OPT_ENV_LIST:
319 | os.environ.pop(e, None)
320 | cls.server.shutdown()
321 | cls.server.server_close()
322 | cls.thread.join()
323 | cls.sock_dir.cleanup()
324 |
--------------------------------------------------------------------------------
/tests/test_load_op.py:
--------------------------------------------------------------------------------
1 | '''
2 | Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
3 |
4 | Licensed under the Apache License, Version 2.0 (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | '''
16 |
17 | import base64
18 | import os
19 | import tempfile
20 | import unittest
21 | from copy import deepcopy
22 | from unittest import TestCase
23 |
24 | import torch
25 |
26 | import veturboio
27 |
28 |
29 | class TestLoad(TestCase):
30 | @classmethod
31 | def setUpClass(cls):
32 | ENV_KMS_HOST = 'VETURBOIO_KMS_HOST'
33 | ENV_KMS_REGION = 'VETURBOIO_KMS_REGION'
34 | ENV_KMS_AK = 'VETURBOIO_KMS_ACCESS_KEY'
35 | ENV_KMS_SK = 'VETURBOIO_KMS_SECRET_KEY'
36 | ENV_KMS_KEYRING = 'VETURBOIO_KMS_KEYRING_NAME'
37 | ENV_KMS_KEY = 'VETURBOIO_KMS_KEY_NAME'
38 | os.environ[ENV_KMS_HOST] = 'open.volcengineapi.com'
39 | os.environ[ENV_KMS_REGION] = 'cn-beijing'
40 | os.environ[ENV_KMS_AK] = os.environ['CI_VENDOR_AK']
41 | os.environ[ENV_KMS_SK] = os.environ['CI_VENDOR_SK']
42 | os.environ[ENV_KMS_KEYRING] = 'datapipe_keyring'
43 | os.environ[ENV_KMS_KEY] = 'datapipe_key_ml_maas'
44 |
45 | cls.tempdir = tempfile.TemporaryDirectory()
46 |
47 | cls.tensors_0 = {
48 | "weight1": torch.randn(2000, 10),
49 | "weight2": torch.IntTensor(2000, 10),
50 | }
51 |
52 | cls.tensors_1 = {
53 | "weight1": torch.randn(2000, 10),
54 | "weight2": torch.IntTensor(2000, 10),
55 | "weight3": torch.BoolTensor(2000, 10),
56 | }
57 |
58 | cls.filepath_0 = os.path.join(cls.tempdir.name, "model_0.safetensors")
59 | cls.filepath_1 = os.path.join(cls.tempdir.name, "model_1.safetensors")
60 | veturboio.save_file(cls.tensors_0, cls.filepath_0)
61 | veturboio.save_file(cls.tensors_1, cls.filepath_1, enable_fast_mode=True)
62 |
63 | cls.pt_filepath = os.path.join(cls.tempdir.name, "model.pt")
64 | torch.save(cls.tensors_0, cls.pt_filepath)
65 |
66 | # cipher
67 | os.environ["VETURBOIO_KEY"] = base64.b64encode(b"abcdefgh12345678").decode("ascii")
68 | os.environ["VETURBOIO_IV"] = base64.b64encode(b"1234567887654321").decode("ascii")
69 |
70 | cls.filepath_0_enc = os.path.join(cls.tempdir.name, "model_0_enc.safetensors")
71 | cls.filepath_1_enc = os.path.join(cls.tempdir.name, "model_1_enc.safetensors")
72 | veturboio.save_file(cls.tensors_0, cls.filepath_0_enc, use_cipher=True)
73 | veturboio.save_file(cls.tensors_1, cls.filepath_1_enc, use_cipher=True, enable_fast_mode=True)
74 |
75 | cls.pt_filepath_enc = os.path.join(cls.tempdir.name, "model_enc.pt")
76 | veturboio.save_pt(cls.tensors_0, cls.pt_filepath_enc, use_cipher=True)
77 |
78 | # cipher with header
79 | os.environ["VETURBOIO_CIPHER_HEADER"] = "1"
80 | cls.filepath_0_enc_h = os.path.join(cls.tempdir.name, "model_0_enc_h.safetensors")
81 | veturboio.save_file(cls.tensors_0, cls.filepath_0_enc_h, use_cipher=True)
82 |
83 | cls.pt_filepath_enc_h = os.path.join(cls.tempdir.name, "model_enc_h.pt")
84 | veturboio.save_pt(cls.tensors_0, cls.pt_filepath_enc_h, use_cipher=True)
85 | del os.environ["VETURBOIO_CIPHER_HEADER"]
86 |
87 | if torch.cuda.is_available():
88 | cls.cuda_tensors_0 = deepcopy(cls.tensors_0)
89 | cls.cuda_tensors_1 = deepcopy(cls.tensors_1)
90 |
91 | for key in cls.cuda_tensors_0.keys():
92 | cls.cuda_tensors_0[key] = cls.cuda_tensors_0[key].cuda()
93 | for key in cls.cuda_tensors_1.keys():
94 | cls.cuda_tensors_1[key] = cls.cuda_tensors_1[key].cuda()
95 |
96 | @classmethod
97 | def tearDownClass(cls):
98 | cls.tempdir.cleanup()
99 |
100 | def _run_pipeline(self, tensors, filepath, map_location, use_cipher, enable_fast_mode=True, state_dict=None):
101 | loaded_tensors = veturboio.load(
102 | filepath,
103 | map_location=map_location,
104 | use_cipher=use_cipher,
105 | enable_fast_mode=enable_fast_mode,
106 | state_dict=state_dict,
107 | )
108 | for key in tensors.keys():
109 | self.assertTrue(torch.allclose(tensors[key], loaded_tensors[key]))
110 | return loaded_tensors
111 |
112 | def test_pipeline_cpu(self):
113 | self._run_pipeline(self.tensors_0, self.filepath_0, "cpu", use_cipher=False)
114 | self._run_pipeline(self.tensors_0, self.filepath_0_enc, "cpu", use_cipher=True)
115 | self._run_pipeline(self.tensors_0, self.filepath_0, "cpu", use_cipher=False, enable_fast_mode=False)
116 | self._run_pipeline(self.tensors_0, self.filepath_0_enc, "cpu", use_cipher=True, enable_fast_mode=False)
117 | pre_allocated_tensors = {
118 | "weight1": torch.randn(2000, 10),
119 | "weight2": torch.IntTensor(2000, 10),
120 | }
121 | self._run_pipeline(self.tensors_0, self.filepath_0, "cpu", use_cipher=False, state_dict=pre_allocated_tensors)
122 | self._run_pipeline(
123 | self.tensors_0, self.filepath_0_enc, "cpu", use_cipher=True, state_dict=pre_allocated_tensors
124 | )
125 | self._run_pipeline(
126 | self.tensors_0,
127 | self.filepath_0,
128 | "cpu",
129 | use_cipher=False,
130 | enable_fast_mode=False,
131 | state_dict=pre_allocated_tensors,
132 | )
133 | self._run_pipeline(
134 | self.tensors_0,
135 | self.filepath_0_enc,
136 | "cpu",
137 | use_cipher=True,
138 | enable_fast_mode=False,
139 | state_dict=pre_allocated_tensors,
140 | )
141 |
142 | @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
143 | def test_pipeline_cuda(self):
144 | self._run_pipeline(self.cuda_tensors_0, self.filepath_0, "cuda:0", use_cipher=False)
145 | self._run_pipeline(self.cuda_tensors_0, self.filepath_0_enc, "cuda:0", use_cipher=True)
146 | self._run_pipeline(self.cuda_tensors_0, self.filepath_0, "cuda:0", use_cipher=False, enable_fast_mode=False)
147 | self._run_pipeline(self.cuda_tensors_0, self.filepath_0_enc, "cuda:0", use_cipher=True, enable_fast_mode=False)
148 | pre_allocated_tensors = {
149 | "weight1": torch.randn(2000, 10).cuda(),
150 | "weight2": torch.IntTensor(2000, 10).cuda(),
151 | }
152 | self._run_pipeline(
153 | self.cuda_tensors_0, self.filepath_0, "cuda:0", use_cipher=False, state_dict=pre_allocated_tensors
154 | )
155 | self._run_pipeline(
156 | self.cuda_tensors_0, self.filepath_0_enc, "cuda:0", use_cipher=True, state_dict=pre_allocated_tensors
157 | )
158 | self._run_pipeline(
159 | self.cuda_tensors_0,
160 | self.filepath_0,
161 | "cuda:0",
162 | use_cipher=False,
163 | enable_fast_mode=False,
164 | state_dict=pre_allocated_tensors,
165 | )
166 | self._run_pipeline(
167 | self.cuda_tensors_0,
168 | self.filepath_0_enc,
169 | "cuda:0",
170 | use_cipher=True,
171 | enable_fast_mode=False,
172 | state_dict=pre_allocated_tensors,
173 | )
174 |
175 | def test_read_multi_state_dict_cpu(self):
176 | load_tensor_0 = self._run_pipeline(self.tensors_0, self.filepath_0, "cpu", use_cipher=False)
177 | load_tensor_1 = self._run_pipeline(self.tensors_1, self.filepath_1, "cpu", use_cipher=False)
178 |
179 | self.assertEqual(len(load_tensor_0), 2)
180 | self.assertEqual(len(load_tensor_1), 3)
181 |
182 | load_tensor_0_enc = self._run_pipeline(self.tensors_0, self.filepath_0_enc, "cpu", use_cipher=True)
183 | load_tensor_1_enc = self._run_pipeline(self.tensors_1, self.filepath_1_enc, "cpu", use_cipher=True)
184 |
185 | self.assertEqual(len(load_tensor_0_enc), 2)
186 | self.assertEqual(len(load_tensor_1_enc), 3)
187 |
188 | @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
189 | def test_read_multi_state_dict_cuda(self):
190 | load_tensor_0 = self._run_pipeline(self.cuda_tensors_0, self.filepath_0, "cuda:0", use_cipher=False)
191 | load_tensor_1 = self._run_pipeline(self.cuda_tensors_1, self.filepath_1, "cuda:0", use_cipher=False)
192 |
193 | self.assertEqual(len(load_tensor_0), 2)
194 | self.assertEqual(len(load_tensor_1), 3)
195 |
196 | load_tensor_0_enc = self._run_pipeline(self.cuda_tensors_0, self.filepath_0_enc, "cuda:0", use_cipher=True)
197 | load_tensor_1_enc = self._run_pipeline(self.cuda_tensors_1, self.filepath_1_enc, "cuda:0", use_cipher=True)
198 |
199 | self.assertEqual(len(load_tensor_0_enc), 2)
200 | self.assertEqual(len(load_tensor_1_enc), 3)
201 |
202 | def test_load_pt_cpu(self):
203 | loaded_tensors = veturboio.load(self.pt_filepath, map_location="cpu", use_cipher=False)
204 | for key in self.tensors_0.keys():
205 | self.assertTrue(torch.allclose(self.tensors_0[key], loaded_tensors[key]))
206 |
207 | loaded_tensors_enc = veturboio.load(self.pt_filepath_enc, map_location="cpu", use_cipher=True)
208 | for key in self.tensors_0.keys():
209 | self.assertTrue(torch.allclose(self.tensors_0[key], loaded_tensors_enc[key]))
210 |
211 | @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
212 | def test_load_pt_cuda(self):
213 | loaded_tensors = veturboio.load(self.pt_filepath, map_location="cuda:0", use_cipher=False)
214 | for key in self.tensors_0.keys():
215 | self.assertTrue(torch.allclose(self.cuda_tensors_0[key], loaded_tensors[key]))
216 |
217 | loaded_tensors_enc = veturboio.load(self.pt_filepath_enc, map_location="cuda:0", use_cipher=True)
218 | for key in self.tensors_0.keys():
219 | self.assertTrue(torch.allclose(self.cuda_tensors_0[key], loaded_tensors_enc[key]))
220 |
221 | def test_load_cipher_header_cpu(self):
222 | self._run_pipeline(self.tensors_0, self.filepath_0_enc_h, "cpu", use_cipher=True)
223 | self._run_pipeline(self.tensors_0, self.pt_filepath_enc_h, "cpu", use_cipher=True)
224 | self._run_pipeline(self.tensors_0, self.filepath_0_enc_h, "cpu", use_cipher=True, enable_fast_mode=False)
225 | self._run_pipeline(self.tensors_0, self.pt_filepath_enc_h, "cpu", use_cipher=True, enable_fast_mode=False)
226 |
227 | @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
228 | def test_load_cipher_header_cuda(self):
229 | self._run_pipeline(self.cuda_tensors_0, self.filepath_0_enc_h, "cuda:0", use_cipher=True)
230 | self._run_pipeline(self.cuda_tensors_0, self.pt_filepath_enc_h, "cuda:0", use_cipher=True)
231 | self._run_pipeline(
232 | self.cuda_tensors_0, self.filepath_0_enc_h, "cuda:0", use_cipher=True, enable_fast_mode=False
233 | )
234 | self._run_pipeline(
235 | self.cuda_tensors_0, self.pt_filepath_enc_h, "cuda:0", use_cipher=True, enable_fast_mode=False
236 | )
237 |
238 | def test_load_directIO_fall_back(self):
239 | with tempfile.NamedTemporaryFile(dir="/dev/shm") as tmpFile:
240 | veturboio.save_file(self.tensors_0, tmpFile.name)
241 | tmpFile.flush()
242 | loaded_tensors = veturboio.load(tmpFile.name, map_location="cpu", use_direct_io=True)
243 | for key in self.tensors_0.keys():
244 | self.assertTrue(torch.allclose(self.tensors_0[key], loaded_tensors[key]))
245 |
246 | def test_load_to_shmem(self):
247 | shmem = veturboio.load_to_shmem(self.filepath_0, use_cipher=False)
248 | loaded_tensors = veturboio.load(
249 | os.path.join("/dev/shm/", shmem.name), map_location="cpu", enable_fast_mode=False, use_cipher=False
250 | )
251 | for key in self.tensors_0.keys():
252 | self.assertTrue(torch.allclose(self.tensors_0[key], loaded_tensors[key]))
253 | shmem.close()
254 | shmem.unlink()
255 |
256 | shmem = veturboio.load_to_shmem(self.filepath_0_enc, use_cipher=True)
257 | loaded_tensors = veturboio.load(
258 | os.path.join("/dev/shm/", shmem.name), map_location="cpu", enable_fast_mode=False, use_cipher=False
259 | )
260 | for key in self.tensors_0.keys():
261 | self.assertTrue(torch.allclose(self.tensors_0[key], loaded_tensors[key]))
262 | shmem.close()
263 | shmem.unlink()
264 |
--------------------------------------------------------------------------------
/tests/test_save_op.py:
--------------------------------------------------------------------------------
1 | '''
2 | Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
3 |
4 | Licensed under the Apache License, Version 2.0 (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | '''
16 |
17 | import os
18 | import tempfile
19 | import unittest
20 | from copy import deepcopy
21 | from unittest import TestCase
22 |
23 | import torch
24 | from safetensors import safe_open
25 |
26 | import veturboio
27 |
28 |
29 | class TestSave(TestCase):
30 | @classmethod
31 | def setUpClass(cls):
32 | cls.tensors_0 = {
33 | "weight1": torch.randn(2000, 10),
34 | "weight2": torch.IntTensor(2000, 10),
35 | "weight3": torch.BoolTensor(2000, 10),
36 | }
37 |
38 | class MockModel(torch.nn.Module):
39 | def __init__(self) -> None:
40 | super().__init__()
41 |
42 | self.linear1 = torch.nn.Linear(100, 50)
43 | self.linear2 = torch.nn.Linear(100, 50)
44 |
45 | cls.model = MockModel()
46 |
47 | cls.tempdir = tempfile.TemporaryDirectory()
48 | cls.filepath_0 = os.path.join(cls.tempdir.name, "model_0.safetensors")
49 | cls.filepath_1 = os.path.join(cls.tempdir.name, "model_0.pt")
50 | cls.filepath_2 = os.path.join(cls.tempdir.name, "model_0_fast.safetensors")
51 | cls.filepath_3 = os.path.join(cls.tempdir.name, "model_1.safetensors")
52 |
53 | @classmethod
54 | def tearDownClass(cls):
55 | cls.tempdir.cleanup()
56 |
57 | def test_save_file(self):
58 | veturboio.save_file(self.tensors_0, self.filepath_0)
59 | with safe_open(self.filepath_0, framework="pt", device="cpu") as f:
60 | assert len(f.keys()) == 3
61 | for key in f.keys():
62 | self.assertTrue(torch.allclose(self.tensors_0[key], f.get_tensor(key)))
63 |
64 | # enable fast mode
65 | veturboio.save_file(self.tensors_0, self.filepath_2, enable_fast_mode=True)
66 | with safe_open(self.filepath_2, framework="pt", device="cpu") as f:
67 | assert len(f.keys()) == 3
68 | for key in f.keys():
69 | self.assertTrue(torch.allclose(self.tensors_0[key], f.get_tensor(key)))
70 |
71 | def test_save_file_for_clone_share_tensors(self):
72 | share_dict = {"key1": self.tensors_0["weight1"], "key2": self.tensors_0["weight1"]}
73 | veturboio.save_file(share_dict, self.filepath_0, force_save_shared_tensor=True, force_clone_shared_tensor=True)
74 | assert len(share_dict) == 2 # assert save_file won't change user's state_dict.
75 | with safe_open(self.filepath_0, framework="pt", device="cpu") as f:
76 | for key in f.keys():
77 | assert key in share_dict
78 | self.assertTrue(torch.allclose(share_dict[key], f.get_tensor(key)))
79 |
80 | def test_save_model(self):
81 | veturboio.save_model(self.model, self.filepath_3, use_cipher=True)
82 | loaded_tensors = veturboio.load(self.filepath_3, map_location="cpu", use_cipher=True)
83 | state_dict = self.model.state_dict()
84 | for key in state_dict.keys():
85 | self.assertTrue(torch.allclose(state_dict[key], loaded_tensors[key]))
86 |
87 | def test_save_pt(self):
88 | veturboio.save_pt(self.tensors_0, self.filepath_1)
89 | loaded_tensors = torch.load(self.filepath_1)
90 | for key in self.tensors_0.keys():
91 | self.assertTrue(torch.allclose(self.tensors_0[key], loaded_tensors[key]))
92 |
--------------------------------------------------------------------------------
/tests/test_share_tensor_cpu.py:
--------------------------------------------------------------------------------
1 | '''
2 | Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
3 |
4 | Licensed under the Apache License, Version 2.0 (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | '''
16 |
17 | import os
18 | import tempfile
19 | from unittest import TestCase
20 |
21 | import torch
22 |
23 | import veturboio
24 |
25 |
26 | class TestSharedTensorLoad(TestCase):
27 | @classmethod
28 | def setUpClass(cls):
29 | class MockModel(torch.nn.Module):
30 | def __init__(self) -> None:
31 | super().__init__()
32 |
33 | self.linear1 = torch.nn.Linear(10, 20)
34 | self.linear2 = torch.nn.Linear(20, 10)
35 | self.linear3 = self.linear2
36 |
37 | cls.model = MockModel()
38 |
39 | def test_pipeline(self):
40 | with tempfile.TemporaryDirectory() as tmpdirname:
41 | filepath = os.path.join(tmpdirname, "model.safetensors")
42 | veturboio.save_model(self.model, filepath)
43 | loaded_tensors = veturboio.load(filepath, map_location="cpu")
44 |
45 | state_dict = self.model.state_dict()
46 | for key in state_dict.keys():
47 | self.assertTrue(torch.allclose(state_dict[key], loaded_tensors[key]))
48 |
49 | def test_save_file(self):
50 | with tempfile.TemporaryDirectory() as tmpdirname:
51 | filepath = os.path.join(tmpdirname, "model.safetensors")
52 | veturboio.save_file(self.model.state_dict(), filepath, force_save_shared_tensor=True)
53 | loaded_tensors = veturboio.load(filepath, map_location="cpu")
54 |
55 | state_dict = self.model.state_dict()
56 | for key in state_dict.keys():
57 | self.assertTrue(torch.allclose(state_dict[key], loaded_tensors[key]))
58 |
--------------------------------------------------------------------------------
/veturboio/__init__.py:
--------------------------------------------------------------------------------
1 | '''
2 | Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
3 |
4 | Licensed under the Apache License, Version 2.0 (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | '''
16 |
17 | from veturboio.io import load, load_to_shmem, save_file, save_model, save_pt
18 | from veturboio.ops.io_utils import init_io_helper
19 |
20 | __all__ = ["load", "load_to_shmem", "save_file", "save_model", "init_io_helper", "save_pt"]
21 |
--------------------------------------------------------------------------------
/veturboio/convert.py:
--------------------------------------------------------------------------------
1 | '''
2 | Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
3 |
4 | Licensed under the Apache License, Version 2.0 (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | '''
16 |
17 | import argparse
18 | import gc
19 | import logging
20 | import os
21 | import sys
22 | import traceback
23 | from datetime import datetime
24 |
25 | import torch
26 | from safetensors.torch import _find_shared_tensors, _is_complete
27 |
28 | import veturboio
29 |
30 |
31 | def to_valid_state_dict(state_dict: dict[str, torch.Tensor]) -> dict[str, torch.Tensor]:
32 | invalid_key = [k for k, v in state_dict.items() if not isinstance(v, torch.Tensor)]
33 | if len(invalid_key) > 0:
34 | logger.warning(f"invalid keys to removed: {invalid_key}")
35 | state_dict = {k: v for k, v in state_dict.items() if k not in invalid_key}
36 |
37 | result = {}
38 | shared_tensor_groups = _find_shared_tensors(state_dict)
39 | for group in shared_tensor_groups:
40 | # check if all share tensors have the same data ptr, same shape, and same size
41 | shared_tensors = [state_dict[k] for k in group]
42 | data_ptrs = [t.data_ptr() for t in shared_tensors]
43 | shapes = [t.shape for t in shared_tensors]
44 | if len(set(data_ptrs)) != 1 or len(set(shapes)) != 1:
45 | raise Exception(f"shared tensors {group} are not equal")
46 | # make sure these tensors are complete and identical
47 | converted_tensor = shared_tensors[0]
48 | if not _is_complete(converted_tensor):
49 | converted_tensor = converted_tensor.clone()
50 | for t in group:
51 | result[t] = converted_tensor
52 | for k, v in state_dict.items():
53 | if k not in result:
54 | result[k] = v
55 | return result
56 |
57 |
58 | def add_handlers(logger: logging.Logger):
59 | """
60 | Add handlers to logger
61 | """
62 | handler = logging.StreamHandler(stream=sys.stdout)
63 | formatter = logging.Formatter(fmt="[%(levelname)s %(asctime)s] %(filename)s: %(lineno)d %(message)s")
64 | handler.setFormatter(formatter)
65 | logger.addHandler(handler)
66 |
67 |
68 | def validate_result(input_state_dict: dict[str, torch.Tensor], output_state_dict: dict[str, torch.Tensor]):
69 | input_state_dict = {k: v for k, v in input_state_dict.items() if isinstance(v, torch.Tensor)}
70 | output_state_dict = {k: v for k, v in output_state_dict.items() if isinstance(v, torch.Tensor)}
71 |
72 | input_key_set = set(input_state_dict.keys())
73 | output_key_set = set(output_state_dict.keys())
74 |
75 | if input_key_set != output_key_set:
76 | not_in_output_key_set = input_key_set - output_key_set
77 | not_in_input_key_set = output_key_set - input_key_set
78 | raise Exception(
79 | f"key set not equal, not in output key set: {not_in_output_key_set}, not in input key set: {not_in_input_key_set}"
80 | )
81 |
82 | not_equal_tensor = []
83 | for key in input_state_dict:
84 | if not torch.allclose(input_state_dict[key], output_state_dict[key]):
85 | not_equal_tensor.append(key)
86 | if len(not_equal_tensor) > 0:
87 | raise Exception(f"result is not valid, not equal tensors: {not_equal_tensor}")
88 |
89 | logger.info(f"all {len(input_key_set)} keys in state dict are equal")
90 |
91 |
92 | def _get_available_cpu() -> int:
93 | avail_cpu = os.cpu_count()
94 | if os.path.isfile('/sys/fs/cgroup/cpu/cpu.cfs_quota_us'):
95 | cpu_quota = int(open('/sys/fs/cgroup/cpu/cpu.cfs_quota_us').read().rstrip())
96 | if cpu_quota != -1 and os.path.isfile('/sys/fs/cgroup/cpu/cpu.cfs_period_us'):
97 | cpu_period = int(open('/sys/fs/cgroup/cpu/cpu.cfs_period_us').read().rstrip())
98 | avail_cpu = int(cpu_quota / cpu_period)
99 | logger.info(f"get veturboio thread {avail_cpu} from cgroup info")
100 | return avail_cpu
101 |
102 |
103 | class Pt2SafeTensorConverter:
104 | def __init__(
105 | self,
106 | input_path: str,
107 | output_path: str,
108 | dry_run: bool,
109 | enable_to_valid_state_dict: bool,
110 | overwrite: bool,
111 | use_direct_io: bool,
112 | ):
113 | self.input_path = input_path
114 | self.output_path = output_path
115 | self.dry_run = dry_run
116 | self.enable_to_valid_state_dict = enable_to_valid_state_dict
117 | self.use_direct_io = use_direct_io
118 | if self.input_path.startswith("sfcs://"):
119 | try:
120 | self.input_file_size = veturboio.ops.sfcs_utils.sfcs_get_file_size(self.input_path)
121 | except BaseException as Exp:
122 | raise FileNotFoundError("can't get size of sfcs file", Exp)
123 | else:
124 | if not os.path.exists(self.input_path):
125 | raise Exception(f"file not exist: {self.input_path}")
126 | # convert to abs path
127 | if not os.path.isabs(self.input_path):
128 | self.input_path = os.path.abspath(self.input_path)
129 | self.input_file_size = os.path.getsize(self.input_path)
130 | if not self.input_path.endswith(".pt"):
131 | raise Exception("input file must end with .pt")
132 |
133 | if self.output_path is None:
134 | self.output_path = self.input_path.replace(".pt", ".safetensors")
135 | elif not self.output_path.startswith("sfcs://") and not os.path.isabs(self.output_path):
136 | self.output_path = os.path.abspath(self.output_path)
137 | if not self.output_path.endswith(".safetensors"):
138 | raise Exception("output file must end with .safetensors")
139 |
140 | if overwrite:
141 | if self.output_path.startswith("sfcs://"):
142 | raise Exception("overwrite flag cannot be set when using sfcs")
143 | if os.path.exists(self.output_path):
144 | logger.info(f"overwrite output file {self.output_path}")
145 | if not dry_run:
146 | os.remove(self.output_path)
147 | elif not self.output_path.startswith("sfcs://") and os.path.exists(self.output_path):
148 | raise Exception(f"output file {self.output_path} already exists")
149 |
150 | def convert(self):
151 | logger.info(f"converting {self.input_path} to {self.output_path}")
152 | available_cpus = _get_available_cpu()
153 | ext_name = self.output_path.split(".")[-1]
154 | state_dict = {}
155 | if ext_name != "safetensors":
156 | raise ValueError("output file should be safetensors file")
157 | logger.info(f"start loading the pt file, the pt file has size of {self.input_file_size // 1000 // 1000}MB")
158 | start_time = datetime.now()
159 | if self.dry_run:
160 | logger.info("dry run finished for veturboio.load_pt_file")
161 | else:
162 | state_dict = veturboio.load(
163 | self.input_path, num_thread=available_cpus, use_direct_io=self.use_direct_io, enable_fast_mode=True
164 | )
165 | end_time = datetime.now()
166 | logger.info(f"finish loading the pt file with duration {end_time - start_time}")
167 | logger.info("start saving the safetensors file")
168 | start_time = datetime.now()
169 | if self.dry_run:
170 | logger.info("dry run finished for veturboio.save_safetensors_file")
171 | else:
172 | if self.enable_to_valid_state_dict:
173 | state_dict = to_valid_state_dict(state_dict)
174 | veturboio.save_file(state_dict, self.output_path, force_save_shared_tensor=True)
175 | end_time = datetime.now()
176 | logger.info(f"finish saving the safetensors file with duration {end_time - start_time}")
177 |
178 | del state_dict
179 | gc.collect()
180 | logger.info(f"gc finished")
181 |
182 | def validate(self):
183 | available_cpus = _get_available_cpu()
184 | logger.info(f"validating if {self.input_path} in equal to {self.output_path}")
185 | input_state_dict = veturboio.load(
186 | self.input_path, num_thread=available_cpus, use_direct_io=self.use_direct_io, enable_fast_mode=True
187 | )
188 | logger.info(f"{self.input_path} loaded")
189 |
190 | output_state_dict = veturboio.load(
191 | self.output_path, num_thread=available_cpus, use_direct_io=self.use_direct_io, enable_fast_mode=True
192 | )
193 | logger.info(f"{self.output_path} loaded")
194 |
195 | validate_result(input_state_dict, output_state_dict)
196 |
197 |
198 | if __name__ == "__main__":
199 | logger = logging.getLogger(__name__)
200 | logger.setLevel(logging.INFO)
201 | add_handlers(logger)
202 |
203 | parser = argparse.ArgumentParser(description="converter used to convert .pt model to .safeTensor")
204 | parser.add_argument(
205 | "--input",
206 | "-i",
207 | type=str,
208 | required=True,
209 | help="indicate the path of .pt file, both posix path" "and sfcs prefix are supported",
210 | )
211 | parser.add_argument(
212 | "--output",
213 | "-o",
214 | type=str,
215 | required=False,
216 | help="indicate the path of .safeTensor file, both "
217 | "posix path and sfcs prefix are supported."
218 | "will be placed into the same dir of the .pt "
219 | "file if left empty",
220 | )
221 | parser.add_argument("--dry-run", "-d", action="store_true", help="just dry run, not really convert")
222 | parser.add_argument("--overwrite", action="store_true", help="overwrite the output file if it exists")
223 | parser.add_argument(
224 | "--enable-to-valid-state-dict",
225 | action="store_true",
226 | help="execute to_valid_state_dict function before save to .safetensors",
227 | )
228 | parser.add_argument("--validate-result", action="store_true", help="validate result", default=False)
229 | parser.add_argument("--use-direct-io", action="store_true", help="use direct io to load file", default=False)
230 | args = parser.parse_args()
231 |
232 | instance = Pt2SafeTensorConverter(
233 | args.input, args.output, args.dry_run, args.enable_to_valid_state_dict, args.overwrite, args.use_direct_io
234 | )
235 | try:
236 | instance.convert()
237 | if args.validate_result:
238 | instance.validate()
239 | except Exception as e:
240 | logger.error(f"convert failed.")
241 | traceback.print_exc()
242 | exit(1)
243 |
--------------------------------------------------------------------------------
/veturboio/io.py:
--------------------------------------------------------------------------------
1 | '''
2 | Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
3 |
4 | Licensed under the Apache License, Version 2.0 (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | '''
16 |
17 | import os
18 | from multiprocessing import shared_memory
19 | from typing import Dict, Optional
20 |
21 | import torch
22 | from loguru import logger
23 | from safetensors.torch import _remove_duplicate_names
24 |
25 | from veturboio.loader import FasterPosixLoader, PosixLoader, SfcsClientLoader
26 | from veturboio.ops.io_utils import IOHelper
27 | from veturboio.safetensors import SafetensorsFile
28 | from veturboio.saver import PosixSaver, SfcsClientSaver
29 | from veturboio.types import FILE_PATH
30 |
31 |
32 | def is_sfcs_path(file: FILE_PATH):
33 | if len(file) > 7 and file[:7] == "sfcs://":
34 | return True, file[6:]
35 | elif len(file) > 9 and file[:9] == "/dev/shm/":
36 | return False, file
37 | elif os.environ.get("VETURBOIO_USE_SFCS_SDK", "0") == "1":
38 | return True, file
39 | else:
40 | return False, file
41 |
42 |
43 | def load(
44 | file: FILE_PATH,
45 | map_location: Optional[str] = "cpu",
46 | enable_fast_mode: Optional[bool] = True,
47 | num_thread: Optional[int] = 32,
48 | helper: Optional[IOHelper] = None,
49 | use_pinmem: Optional[bool] = False,
50 | use_direct_io: Optional[bool] = False,
51 | use_cipher: Optional[bool] = None,
52 | state_dict: Dict[str, torch.Tensor] = None,
53 | ) -> Dict:
54 | """Load state dict object from checkpoint file. The file can be both safetensors file and pytorch file.
55 | If the file is safetensors file, it will be loaded by veturboio and the loading speed will be accelerated.
56 |
57 | Args:
58 | file (FILE_PATH): file path
59 | map_location (str, optional): map location. Defaults to "cpu".
60 | enable_fast_mode (bool, optional): enable fast mode. Defaults to True.
61 | helper (IOHelper, optional): use IOHelper. Defaults to None.
62 | use_pinmem (bool, optional): use pin memory. Defaults to False.
63 | num_thread (int, optional): number of threads. Defaults to 32.
64 | use_direct_io (bool, optional): open file in direct io mode. Defaults to False.
65 | use_cipher (bool, optional): decrypt file. Defaults to None. Note: cipher is
66 | disabled by force when use_cipher set to False. Otherwise, when use_cipher
67 | set to True or environ VETURBOIO_USE_CIPHER set to '1', cipher is enabled.
68 | state_dict (Dict): pre allocated state dict. Defaults to None.
69 |
70 | Returns:
71 | state_dict (Dict): state dict
72 |
73 | Examples:
74 | ```
75 | import veturboio
76 | state_dict = veturboio.load("model.safetensors")
77 | ```
78 | """
79 |
80 | if IOHelper is None:
81 | enable_fast_mode = False
82 | elif helper is None:
83 | helper = IOHelper()
84 |
85 | use_sfcs_sdk, file = is_sfcs_path(file)
86 | if enable_fast_mode == False:
87 | loader = PosixLoader(file)
88 | elif use_sfcs_sdk:
89 | loader = SfcsClientLoader(
90 | helper=helper,
91 | file=file,
92 | num_thread=num_thread,
93 | use_pinmem=use_pinmem,
94 | use_direct_io=use_direct_io,
95 | )
96 | else:
97 | loader = FasterPosixLoader(
98 | file,
99 | helper,
100 | num_thread=num_thread,
101 | use_pinmem=use_pinmem,
102 | use_direct_io=use_direct_io,
103 | )
104 |
105 | safetensors_file = SafetensorsFile(file, loader, use_cipher)
106 | return safetensors_file.load(map_location=map_location, state_dict=state_dict)
107 |
108 |
109 | def load_to_shmem(
110 | file: FILE_PATH,
111 | num_thread: Optional[int] = 32,
112 | helper: Optional[IOHelper] = None,
113 | use_direct_io: Optional[bool] = False,
114 | use_cipher: Optional[bool] = None,
115 | ) -> shared_memory.SharedMemory:
116 | """Load checkpoint file to shmem.
117 |
118 | Args:
119 | file (FILE_PATH): file path
120 | num_thread (int, optional): number of threads. Defaults to 32.
121 | helper (IOHelper, optional): use IOHelper. Defaults to None.
122 | use_cipher (bool, optional): decrypt file. Defaults to None. Note: cipher is
123 | disabled by force when use_cipher set to False. Otherwise, when use_cipher
124 | set to True or environ VETURBOIO_USE_CIPHER set to '1', cipher is enabled.
125 |
126 | Returns:
127 | shmem (shared_memory.SharedMemory): shared memory object.
128 |
129 | Examples:
130 | ```
131 | import veturboio
132 | shmem_file = veturboio.load_to_shmem("sfcs://model.safetensors")
133 | ```
134 | """
135 |
136 | if helper is None:
137 | helper = IOHelper()
138 |
139 | use_sfcs_sdk, file = is_sfcs_path(file)
140 | if use_sfcs_sdk:
141 | loader = SfcsClientLoader(
142 | helper=helper,
143 | file=file,
144 | num_thread=num_thread,
145 | )
146 | else:
147 | loader = FasterPosixLoader(
148 | file,
149 | helper,
150 | num_thread=num_thread,
151 | use_direct_io=use_direct_io,
152 | )
153 |
154 | safetensors_file = SafetensorsFile(file, loader, use_cipher)
155 | return safetensors_file.load_to_shmem()
156 |
157 |
158 | def save_file(
159 | state_dict: Dict[str, torch.Tensor],
160 | file: FILE_PATH,
161 | force_contiguous: bool = True,
162 | force_save_shared_tensor: bool = False,
163 | force_clone_shared_tensor: bool = False,
164 | metadata: Dict[str, str] = None,
165 | use_cipher: Optional[bool] = False,
166 | helper: Optional[IOHelper] = None,
167 | enable_fast_mode: Optional[bool] = False,
168 | ) -> None:
169 | """Save state dict object to safetensors file.
170 |
171 | Args:
172 | state_dict (Dict): state dict
173 | file (FILE_PATH): file path
174 | force_contiguous (bool, optional): force contiguous. Defaults to True.
175 | force_save_shared_tensor (bool, optional): force save shared tensor. Defaults to False.
176 | force_clone_shared_tensor (bool, optional): force to clone shared tensor rather than delete
177 | when force_save_shared_tensor is enabled. Defaults to False.
178 | metadata (Dict[str, str], optional): metadata. Defaults to None.
179 | use_cipher (bool, optional): decrypt file. Defaults to False.
180 | helper (IOHelper, optional): use IOHelper. Defaults to None.
181 | enable_fast_mode (bool, optional): enable fast mode. Defaults to False.
182 |
183 | Examples:
184 | ```
185 | import torch
186 | import veturboio
187 |
188 | state_dict = {"weight": torch.randn(10, 10)}
189 | veturboio.save_file(state_dict, "model.safetensors")
190 | ```
191 | """
192 | if helper is None:
193 | helper = IOHelper()
194 |
195 | use_sfcs_sdk, file = is_sfcs_path(file)
196 | if use_sfcs_sdk:
197 | saver = SfcsClientSaver(file=file, use_cipher=use_cipher, helper=helper)
198 | else:
199 | saver = PosixSaver(file=file, use_cipher=use_cipher, helper=helper)
200 |
201 | # TODO: there are some bugs while state_dict is loaded from veturboio
202 | if not force_save_shared_tensor:
203 | if force_clone_shared_tensor:
204 | logger.warning("force_clone_shared_tensor won't take any effect while force_save_shared_tensor is False;")
205 | try:
206 | saver.save_file(state_dict, metadata=metadata, enable_fast_mode=enable_fast_mode)
207 | except ValueError as e:
208 | msg = str(e)
209 | raise ValueError(msg)
210 | else:
211 | return
212 |
213 | to_removes = _remove_duplicate_names(state_dict)
214 |
215 | for kept_name, to_remove_group in to_removes.items():
216 | for to_remove in to_remove_group:
217 | if metadata is None:
218 | metadata = {}
219 |
220 | if to_remove not in metadata:
221 | # Do not override user data
222 | metadata[to_remove] = kept_name
223 | if force_clone_shared_tensor:
224 | state_dict[to_remove] = state_dict[to_remove].clone()
225 | else:
226 | del state_dict[to_remove]
227 | if force_contiguous:
228 | state_dict = {k: v.contiguous() for k, v in state_dict.items()}
229 |
230 | return saver.save_file(state_dict, metadata=metadata, enable_fast_mode=enable_fast_mode)
231 |
232 |
233 | def save_model(model: torch.nn.Module, file: FILE_PATH, use_cipher: Optional[bool] = False) -> None:
234 | """Save model state dict to safetensors file.
235 |
236 | Args:
237 | model (torch.nn.Module): model
238 | file (FILE_PATH): file path
239 | use_cipher (bool, optional): decrypt file. Defaults to False.
240 |
241 | Examples:
242 | ```
243 | import torch
244 | import veturboio
245 |
246 | model = torch.nn.Linear(10, 10)
247 | veturboio.save_model(model, "model.safetensors")
248 | ```
249 | """
250 |
251 | use_sfcs_sdk, file = is_sfcs_path(file)
252 | if use_sfcs_sdk:
253 | saver = SfcsClientSaver(file=file, use_cipher=use_cipher)
254 | else:
255 | saver = PosixSaver(file=file, use_cipher=use_cipher)
256 |
257 | return saver.save_model(model)
258 |
259 |
260 | def save_pt(state_dict: Dict[str, torch.Tensor], file: FILE_PATH, use_cipher: Optional[bool] = False) -> None:
261 | """Save state dict object to pytorch file.
262 |
263 | Args:
264 | state_dict (Dict): state dict
265 | file (FILE_PATH): file path
266 | use_cipher (bool, optional): encrypt file. Defaults to False.
267 |
268 | Examples:
269 | ```
270 | import torch
271 | import veturboio
272 |
273 | state_dict = {"weight": torch.randn(10, 10)}
274 | veturboio.save_pt(state_dict, "model.pt")
275 | ```
276 | """
277 | use_sfcs_sdk, file = is_sfcs_path(file)
278 | if use_sfcs_sdk:
279 | saver = SfcsClientSaver(file=file, use_cipher=use_cipher)
280 | else:
281 | saver = PosixSaver(file=file, use_cipher=use_cipher)
282 |
283 | return saver.save_pt(state_dict)
284 |
--------------------------------------------------------------------------------
/veturboio/loader/__init__.py:
--------------------------------------------------------------------------------
1 | '''
2 | Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
3 |
4 | Licensed under the Apache License, Version 2.0 (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | '''
16 |
17 | from veturboio.loader.base_loader import BaseLoader, PosixLoader
18 | from veturboio.loader.faster_posix_loader import FasterPosixLoader
19 | from veturboio.loader.sfcs_client_loader import SfcsClientLoader
20 |
21 | __all__ = ["BaseLoader", "PosixLoader", "FasterPosixLoader", "SfcsClientLoader"]
22 |
--------------------------------------------------------------------------------
/veturboio/loader/base_loader.py:
--------------------------------------------------------------------------------
1 | '''
2 | Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
3 |
4 | Licensed under the Apache License, Version 2.0 (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | '''
16 |
17 | import io
18 | from typing import Any, Dict
19 |
20 | import numpy as np
21 | import torch
22 | from numpy import ndarray
23 |
24 | from veturboio.ops.cipher import CipherInfo, decrypt
25 |
26 | # from veturboio.safetensors import SafetensorsFile
27 | from veturboio.types import FILE_PATH
28 |
29 | SAFETENSORS_FILE_MAGIC_NUM = 123
30 | BUF_ALIGN_SIZE = 4096
31 |
32 |
33 | class BaseLoader:
34 | def __init__(self, method: str) -> None:
35 | self.method = method
36 |
37 | def load_to_bytes(self, offset: int, count: int, cipher_info: CipherInfo = CipherInfo(False)) -> bytes:
38 | raise NotImplementedError
39 |
40 | def load_safetensors(
41 | self,
42 | safetensors_file: Any,
43 | map_location: str = "cpu",
44 | state_dict: Dict[str, torch.Tensor] = None,
45 | ) -> Dict[str, torch.Tensor]:
46 | raise NotImplementedError
47 |
48 | def init_aligned_tensor(self, device, device_id: int, file_size, base_offset: int) -> torch.Tensor:
49 | if device_id != -1:
50 | try:
51 | total_tensor = torch.empty(file_size - base_offset, dtype=torch.uint8, device=device)
52 | except RuntimeError as e:
53 | msg = str(e)
54 | raise RuntimeError(msg)
55 |
56 | else:
57 | array = np.empty(file_size - base_offset + BUF_ALIGN_SIZE, dtype=np.uint8)
58 | offset1 = array.ctypes.data % BUF_ALIGN_SIZE
59 | offset2 = base_offset % BUF_ALIGN_SIZE
60 | if offset1 > offset2:
61 | align = BUF_ALIGN_SIZE - offset1 + offset2
62 | else:
63 | align = offset2 - offset1
64 |
65 | sub_array = array[align : align + file_size - base_offset].view(dtype=np.uint8)
66 | total_tensor = torch.from_numpy(sub_array)
67 | return total_tensor
68 |
69 |
70 | class PosixLoader(BaseLoader):
71 | def __init__(self, file: FILE_PATH) -> None:
72 | super().__init__(method="posix")
73 | self.file = file
74 |
75 | def load_to_bytes(self, offset: int, count: int, cipher_info: CipherInfo = CipherInfo(False)) -> bytes:
76 | arr = np.fromfile(self.file, dtype=np.uint8, offset=offset, count=count)
77 | if cipher_info.use_cipher:
78 | h_off = CipherInfo.HEADER_SIZE if cipher_info.use_header else 0
79 | decrypt(cipher_info, arr, arr, offset - h_off)
80 | return arr.tobytes()
81 |
82 | def load_safetensors(
83 | self,
84 | safetensors_file: Any,
85 | map_location: str = "cpu",
86 | state_dict: Dict[str, torch.Tensor] = None,
87 | ) -> Dict[str, torch.Tensor]:
88 | if not state_dict:
89 | state_dict = {}
90 |
91 | base_offset = safetensors_file.tensor_offset
92 | device = torch.device(map_location)
93 |
94 | cipher_info = safetensors_file._cipher_info
95 | for tensor_meta in safetensors_file.meta.values():
96 | tensor_bytes = np.memmap(
97 | safetensors_file.file,
98 | dtype=np.uint8,
99 | mode="c",
100 | offset=base_offset + tensor_meta.data_offsets[0],
101 | shape=tensor_meta.data_offsets[1] - tensor_meta.data_offsets[0],
102 | )
103 | if cipher_info.use_cipher:
104 | h_off = CipherInfo.HEADER_SIZE if cipher_info.use_header else 0
105 | decrypt(cipher_info, tensor_bytes, tensor_bytes, base_offset + tensor_meta.data_offsets[0] - h_off)
106 | tensor = torch.frombuffer(tensor_bytes, dtype=tensor_meta.dtype)
107 | tensor = tensor.view(tensor_meta.shape)
108 | if device.type == "cuda":
109 | state_dict[tensor_meta.name] = tensor.pin_memory().to(device=device, non_blocking=True)
110 | else:
111 | state_dict[tensor_meta.name] = tensor
112 |
113 | return state_dict
114 |
115 | def load_pt(
116 | self, map_location: str = "cpu", cipher_info: CipherInfo = CipherInfo(False)
117 | ) -> Dict[str, torch.Tensor]:
118 | if cipher_info.use_cipher:
119 | h_off = CipherInfo.HEADER_SIZE if cipher_info.use_header else 0
120 | arr = np.fromfile(self.file, dtype=np.uint8, offset=h_off, count=-1)
121 | decrypt(cipher_info, arr, arr, 0)
122 | return torch.load(io.BytesIO(arr.data), map_location=map_location)
123 |
124 | return torch.load(self.file, map_location=map_location)
125 |
--------------------------------------------------------------------------------
/veturboio/loader/faster_posix_loader.py:
--------------------------------------------------------------------------------
1 | '''
2 | Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
3 |
4 | Licensed under the Apache License, Version 2.0 (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | '''
16 |
17 | import io
18 | import os
19 | import random
20 | import string
21 | from multiprocessing import shared_memory
22 | from typing import Dict
23 |
24 | import numpy as np
25 | import torch
26 |
27 | from veturboio.ops.cipher import CipherInfo, decrypt
28 | from veturboio.ops.io_utils import IOHelper, load_file_to_tensor
29 | from veturboio.ops.posix_utils import posix_read_file
30 | from veturboio.safetensors import SafetensorsFile
31 | from veturboio.types import FILE_PATH
32 |
33 | from .base_loader import PosixLoader
34 |
35 |
36 | class FasterPosixLoader(PosixLoader):
37 | def __init__(
38 | self,
39 | file: FILE_PATH,
40 | helper: IOHelper,
41 | num_thread: int = 32,
42 | use_pinmem: bool = False,
43 | use_direct_io: bool = False,
44 | ) -> None:
45 | super().__init__(file)
46 | self.helper = helper
47 | self.num_thread = num_thread
48 | self.use_pinmem = use_pinmem
49 | self.use_direct_io = use_direct_io
50 |
51 | def load_safetensors(
52 | self,
53 | safetensors_file: SafetensorsFile,
54 | map_location: str = "cpu",
55 | state_dict: Dict[str, torch.Tensor] = None,
56 | ) -> Dict[str, torch.Tensor]:
57 | file_size = os.path.getsize(safetensors_file.file)
58 | base_offset = safetensors_file.tensor_offset
59 | device = torch.device(map_location)
60 | if device.type == "cuda":
61 | device_id = device.index if device.index is not None else torch.cuda.current_device()
62 | else:
63 | device_id = -1
64 |
65 | if state_dict:
66 | for tensor_meta in safetensors_file._meta.values():
67 | tensor = state_dict[tensor_meta.name]
68 | if not tensor.is_contiguous():
69 | raise RuntimeError("allocated tensor not contiguous")
70 | if not tensor.dtype == tensor_meta.dtype:
71 | raise RuntimeError("allocated tensor dtype not match")
72 |
73 | offset = tensor_meta.data_offsets[0]
74 | length = tensor_meta.data_offsets[1] - tensor_meta.data_offsets[0]
75 | tensor_length = torch.numel(tensor) * tensor.element_size()
76 | if tensor_length < length:
77 | raise RuntimeError("allocated tensor size not enough")
78 |
79 | load_file_to_tensor(
80 | file_path=safetensors_file.file,
81 | total_tensor=tensor,
82 | length=length,
83 | offset=base_offset + offset,
84 | helper=self.helper,
85 | device_id=device_id,
86 | num_thread=self.num_thread,
87 | use_pinmem=self.use_pinmem,
88 | use_sfcs_sdk=False,
89 | use_direct_io=self.use_direct_io,
90 | cipher_info=safetensors_file._cipher_info,
91 | )
92 | tensor = tensor.resize_(tensor_meta.shape)
93 | state_dict[tensor_meta.name] = tensor
94 | return state_dict
95 | else:
96 | total_tensor = self.init_aligned_tensor(device, device_id, file_size, base_offset)
97 | load_file_to_tensor(
98 | file_path=safetensors_file.file,
99 | total_tensor=total_tensor,
100 | offset=base_offset,
101 | helper=self.helper,
102 | device_id=device_id,
103 | num_thread=self.num_thread,
104 | use_pinmem=self.use_pinmem,
105 | use_sfcs_sdk=False,
106 | use_direct_io=self.use_direct_io,
107 | cipher_info=safetensors_file._cipher_info,
108 | )
109 |
110 | return SafetensorsFile.split_tensor_to_state_dict(total_tensor, safetensors_file)
111 |
112 | def load_to_shmem(self, cipher_info: CipherInfo = CipherInfo(False)) -> shared_memory.SharedMemory:
113 | file_size = os.path.getsize(self.file)
114 | file_name = ''.join(random.sample(string.ascii_lowercase + string.ascii_uppercase, 10))
115 | shm = shared_memory.SharedMemory(name=file_name, create=True, size=file_size)
116 |
117 | h_off = CipherInfo.HEADER_SIZE if cipher_info.use_header else 0
118 | candidate = np.frombuffer(shm.buf, dtype=np.byte)
119 | posix_read_file(
120 | self.file,
121 | candidate,
122 | length=file_size - h_off,
123 | offset=h_off,
124 | num_thread=self.num_thread,
125 | cipher_info=cipher_info,
126 | use_direct_io=self.use_direct_io,
127 | )
128 | return shm
129 |
130 | def load_pt(
131 | self, map_location: str = "cpu", cipher_info: CipherInfo = CipherInfo(False)
132 | ) -> Dict[str, torch.Tensor]:
133 | if cipher_info.use_cipher:
134 | h_off = CipherInfo.HEADER_SIZE if cipher_info.use_header else 0
135 | arr = np.fromfile(self.file, dtype=np.uint8, offset=h_off, count=-1)
136 | decrypt(cipher_info, arr, arr, 0)
137 | return torch.load(io.BytesIO(arr.data), map_location=map_location)
138 |
139 | return torch.load(self.file, map_location=map_location)
140 |
--------------------------------------------------------------------------------
/veturboio/loader/sfcs_client_loader.py:
--------------------------------------------------------------------------------
1 | '''
2 | Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
3 |
4 | Licensed under the Apache License, Version 2.0 (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | '''
16 |
17 | import os
18 | import random
19 | import string
20 | from io import BytesIO
21 | from multiprocessing import shared_memory
22 | from typing import Dict
23 |
24 | import numpy as np
25 | import torch
26 | from numpy import ndarray
27 |
28 | from veturboio.loader.base_loader import BaseLoader
29 | from veturboio.ops.cipher import CipherInfo
30 | from veturboio.ops.io_utils import IOHelper, load_file_to_tensor
31 | from veturboio.ops.sfcs_utils import (
32 | init_sfcs_conf,
33 | path_mapper,
34 | sfcs_default_config,
35 | sfcs_get_file_size,
36 | sfcs_read_file,
37 | )
38 | from veturboio.safetensors import SafetensorsFile
39 | from veturboio.types import FILE_PATH
40 |
41 |
42 | class SfcsClientLoader(BaseLoader):
43 | def __init__(
44 | self,
45 | file: FILE_PATH,
46 | helper: IOHelper,
47 | num_thread: int = 32,
48 | use_pinmem: bool = False,
49 | use_direct_io: bool = False,
50 | ) -> None:
51 | super().__init__(method="client")
52 |
53 | self.file = file
54 | self.helper = helper
55 | self.num_thread = num_thread
56 | self.use_pinmem = use_pinmem
57 | self.use_direct_io = use_direct_io
58 | self._mount_path = init_sfcs_conf(file)
59 | self._sfcs_valid_path = path_mapper(self.file, self._mount_path)
60 |
61 | def load_to_bytes(self, offset: int, count: int, cipher_info: CipherInfo = CipherInfo(False)) -> bytes:
62 | file_size = sfcs_get_file_size(self._sfcs_valid_path)
63 | if offset + count > file_size:
64 | count = file_size - offset
65 |
66 | file_bytes = bytes(count)
67 | candidate = np.frombuffer(file_bytes, dtype=np.byte)
68 | sfcs_read_file(
69 | self._sfcs_valid_path,
70 | candidate,
71 | length=count,
72 | offset=offset,
73 | num_thread=self.num_thread,
74 | cipher_info=cipher_info,
75 | )
76 | return file_bytes
77 |
78 | def load_to_shmem(self, cipher_info: CipherInfo = CipherInfo(False)) -> shared_memory.SharedMemory:
79 | file_size = sfcs_get_file_size(self._sfcs_valid_path)
80 | file_name = ''.join(random.sample(string.ascii_lowercase + string.ascii_uppercase, 10))
81 | shm = shared_memory.SharedMemory(name=file_name, create=True, size=file_size)
82 |
83 | h_off = CipherInfo.HEADER_SIZE if cipher_info.use_header else 0
84 | candidate = np.frombuffer(shm.buf, dtype=np.byte)
85 | sfcs_read_file(
86 | self._sfcs_valid_path,
87 | candidate,
88 | length=file_size - h_off,
89 | offset=h_off,
90 | num_thread=self.num_thread,
91 | cipher_info=cipher_info,
92 | )
93 | return shm
94 |
95 | def load_safetensors(
96 | self,
97 | safetensors_file: SafetensorsFile,
98 | map_location: str = "cpu",
99 | state_dict: Dict[str, torch.Tensor] = None,
100 | ) -> Dict[str, torch.Tensor]:
101 | # TODO should be the same as self.loader
102 | sfcs_valid_path = path_mapper(safetensors_file.file, self._mount_path)
103 | file_size = sfcs_get_file_size(sfcs_valid_path)
104 | base_offset = safetensors_file.tensor_offset
105 | device = torch.device(map_location)
106 | if device.type == "cuda":
107 | device_id = device.index if device.index is not None else torch.cuda.current_device()
108 | else:
109 | device_id = -1
110 |
111 | if state_dict:
112 | for tensor_meta in safetensors_file._meta.values():
113 | tensor = state_dict[tensor_meta.name]
114 | if not tensor.is_contiguous():
115 | raise RuntimeError("allocated tensor not contiguous")
116 | if not tensor.dtype == tensor_meta.dtype:
117 | raise RuntimeError("allocated tensor dtype not match")
118 |
119 | offset = tensor_meta.data_offsets[0]
120 | length = tensor_meta.data_offsets[1] - tensor_meta.data_offsets[0]
121 | tensor_length = torch.numel(tensor) * tensor.element_size()
122 | if tensor_length < length:
123 | raise RuntimeError("allocated tensor size not enough")
124 |
125 | load_file_to_tensor(
126 | file_path=sfcs_valid_path,
127 | total_tensor=tensor,
128 | length=length,
129 | offset=base_offset + offset,
130 | helper=self.helper,
131 | device_id=device_id,
132 | num_thread=self.num_thread,
133 | use_pinmem=self.use_pinmem,
134 | use_sfcs_sdk=True,
135 | use_direct_io=self.use_direct_io,
136 | cipher_info=safetensors_file._cipher_info,
137 | )
138 | tensor = tensor.resize_(tensor_meta.shape)
139 | state_dict[tensor_meta.name] = tensor
140 | return state_dict
141 | else:
142 | total_tensor = self.init_aligned_tensor(device, device_id, file_size, base_offset)
143 | load_file_to_tensor(
144 | file_path=sfcs_valid_path,
145 | total_tensor=total_tensor,
146 | offset=base_offset,
147 | helper=self.helper,
148 | device_id=device_id,
149 | num_thread=self.num_thread,
150 | use_pinmem=self.use_pinmem,
151 | use_sfcs_sdk=True,
152 | use_direct_io=self.use_direct_io,
153 | cipher_info=safetensors_file._cipher_info,
154 | )
155 |
156 | return SafetensorsFile.split_tensor_to_state_dict(total_tensor, safetensors_file)
157 |
158 | def load_pt(
159 | self, map_location: str = "cpu", cipher_info: CipherInfo = CipherInfo(False)
160 | ) -> Dict[str, torch.Tensor]:
161 | file_size = sfcs_get_file_size(self._sfcs_valid_path)
162 | h_off = CipherInfo.HEADER_SIZE if cipher_info.use_header else 0
163 | file_bytes = self.load_to_bytes(offset=h_off, count=file_size - h_off, cipher_info=cipher_info)
164 | return torch.load(BytesIO(file_bytes), map_location=map_location)
165 |
--------------------------------------------------------------------------------
/veturboio/ops/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/volcengine/veTurboIO/0ed05516013da4332433784660660ca4d1904505/veturboio/ops/__init__.py
--------------------------------------------------------------------------------
/veturboio/ops/consts.py:
--------------------------------------------------------------------------------
1 | MLP_SECRET_KEY_FILENAME = "MLP_SECRET_KEY"
2 | MLP_ACCESS_KEY_FILENAME = "MLP_ACCESS_KEY"
3 |
4 | SFCS_DEFAULT_CONFIG_PATH_ENV = "SFCS_METAINFO_PATH"
5 |
6 | SFCS_DEFAULT_METAINFO_PATH = "/root/.volc/SFCSConfiguration.json"
7 |
8 | RDMA_NIC_ENV = "MLP_RDMA_NIC_NAMES"
9 | DEFAULT_NIC_NAME = "eth0"
10 | RDMA_SEGMENT_ENV = "MLP_RDMA_NETWORK_SEGMENT"
11 | DEFAULT_CREDENTIAL_PATH_ENV = "CREDENTIAL_PATH"
12 | DEFAULT_CREDENTIAL_PATH = "/mlplatform/.credential/"
13 |
--------------------------------------------------------------------------------
/veturboio/ops/csrc/cipher.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | #include
17 | #include
18 | #include "include/cipher.h"
19 | #include
20 |
21 | CipherInfo::CipherInfo(bool use_cipher, pybind11::array_t key_arr, pybind11::array_t iv_arr,
22 | size_t header_size)
23 | : use_cipher(use_cipher), header_size(header_size)
24 | {
25 | if (use_cipher)
26 | {
27 | pybind11::buffer_info key_info = key_arr.request();
28 | size_t key_size = key_info.size;
29 | if (key_size == 16)
30 | {
31 | mode = "CTR-128";
32 | }
33 | else if (key_size == 32)
34 | {
35 | mode = "CTR-256";
36 | }
37 | else
38 | {
39 | throw std::runtime_error("Cipher Exception: key length invalid");
40 | }
41 | key = reinterpret_cast(key_info.ptr);
42 |
43 | pybind11::buffer_info iv_info = iv_arr.request();
44 | if ((size_t)iv_info.size != AES_BLOCK_SIZE)
45 | {
46 | throw std::runtime_error("Cipher Exception: iv length invalid");
47 | }
48 | iv = reinterpret_cast(iv_info.ptr);
49 | }
50 | }
51 |
52 | CtrEncWrap::CtrEncWrap(std::string mode, pybind11::array_t key_arr,
53 | pybind11::array_t iv_arr, size_t global_offset)
54 | {
55 | pybind11::buffer_info key_info = key_arr.request();
56 | pybind11::buffer_info iv_info = iv_arr.request();
57 | enc_.reset(new CtrEncrypter(mode, (unsigned char *)key_info.ptr, (unsigned char *)iv_info.ptr, global_offset));
58 | }
59 |
60 | size_t CtrEncWrap::encrypt_update(pybind11::array_t pt, pybind11::array_t ct)
61 | {
62 | pybind11::buffer_info pt_info = pt.request();
63 | pybind11::buffer_info ct_info = ct.request();
64 | unsigned char *pt_ptr = (unsigned char *)pt_info.ptr;
65 | unsigned char *ct_ptr = (unsigned char *)ct_info.ptr;
66 | return enc_->encrypt_update(pt_ptr, pt_info.size, ct_ptr);
67 | }
68 |
69 | CtrDecWrap::CtrDecWrap(std::string mode, pybind11::array_t key_arr,
70 | pybind11::array_t iv_arr, size_t global_offset)
71 | {
72 | pybind11::buffer_info key_info = key_arr.request();
73 | pybind11::buffer_info iv_info = iv_arr.request();
74 | dec_.reset(new CtrDecrypter(mode, (unsigned char *)key_info.ptr, (unsigned char *)iv_info.ptr, global_offset));
75 | }
76 |
77 | size_t CtrDecWrap::decrypt_update(pybind11::array_t ct, pybind11::array_t pt)
78 | {
79 | pybind11::buffer_info pt_info = pt.request();
80 | pybind11::buffer_info ct_info = ct.request();
81 | unsigned char *pt_ptr = (unsigned char *)pt_info.ptr;
82 | unsigned char *ct_ptr = (unsigned char *)ct_info.ptr;
83 | return dec_->decrypt_update(ct_ptr, ct_info.size, pt_ptr);
84 | }
85 |
--------------------------------------------------------------------------------
/veturboio/ops/csrc/include/cfsaio.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | #ifndef _CLOUDFS_LIBCFS3_CLIENT_CFS_AIO_H_
17 | #define _CLOUDFS_LIBCFS3_CLIENT_CFS_AIO_H_
18 |
19 | #include /* for uint64_t, etc. */
20 |
21 | #ifdef __cplusplus
22 | extern "C"
23 | {
24 | #endif
25 | /**
26 | * Some utility decls used in libcfs.
27 | */
28 | typedef int32_t tSize; /// size of data for read/write io ops
29 | typedef int64_t tOffset; /// offset within the file
30 |
31 | struct CfsFileSystemInternalWrapper;
32 | typedef struct CfsFileSystemInternalWrapper *cfsFS;
33 |
34 | struct CfsFileInternalWrapper;
35 | typedef struct CfsFileInternalWrapper *cfsFile;
36 |
37 | typedef enum cfsStatus
38 | {
39 | STATUS_OK = 0,
40 | STATUS_MISSING_BLOCK = -1002,
41 | STATUS_TIMEOUT = -1003,
42 | STATUS_INVALID_RANGE = -1004,
43 | STATUS_CONNECTION_CLOSED = -1005,
44 | STATUS_WRITE_FAILED = -1006,
45 | STATUS_IO_BUSY = -1007,
46 | STATUS_INVALID_PARAMETER = -1098,
47 | STATUS_UNSUPPORTED_OP = -1099,
48 | STATUS_UNKNOWN_ERR = -1100,
49 | } cfsStatus;
50 |
51 | typedef void (*cfsWriteCallback)(cfsStatus status, void *args);
52 |
53 | typedef void (*cfsReadCallback)(cfsStatus status, int32_t readLength, char *buffer, void *args);
54 |
55 | typedef struct cfsAsyncContext
56 | {
57 | cfsReadCallback readCallback;
58 | cfsWriteCallback writeCallback;
59 | char *buffer;
60 | void *args;
61 | } cfsAsyncContext;
62 |
63 | /**
64 | * cfsAsyncPRead - Async positional read of data from an open file.
65 | *
66 | * @param fs The configured filesystem handle.
67 | * @param file The file handle.
68 | * @param offset Position from which to read.
69 | * @param length The length of the buffer.
70 | * @param context The callback context passed by user.
71 | * @return Status of Async method.
72 | */
73 | cfsStatus cfsAsyncPRead(cfsFS fs, cfsFile file, tSize length, tOffset offset, cfsAsyncContext *context);
74 |
75 | /**
76 | * cfsAsyncWrite - Write data to the internal buffer of outputstream,
77 | *
78 | * @param fs The configured filesystem handle.
79 | * @param file The file handle.
80 | * @param buffer The buffer to copy write bytes into.
81 | * @param length The length of the buffer.
82 | * @param context The callback context passed by user.
83 | * @return Status of Async method.
84 | */
85 | cfsStatus cfsAsyncWrite(cfsFS fs, cfsFile file, const void *buffer, tSize length, cfsAsyncContext *context);
86 |
87 | /**
88 | * cfsAsyncFlush - Wait for data is acked by remote dn.
89 | *
90 | * @param fs The configured filesystem handle.
91 | * @param file The file handle.
92 | * @param context The callback context passed by user.
93 | * @return Status of Async method.
94 | */
95 | cfsStatus cfsAsyncFlush(cfsFS fs, cfsFile file, cfsAsyncContext *context);
96 |
97 | /**
98 | * cfsAsyncWriteAndFlush - Write data to remote datanode and wait for ack.
99 | *
100 | * @param fs The configured filesystem handle.
101 | * @param file The file handle.
102 | * @param buffer The buffer to copy write bytes into.
103 | * @param length The length of the buffer.
104 | * @param context The callback context passed by user.
105 | * @return Status of Async method.
106 | */
107 | cfsStatus cfsAsyncWriteAndFlush(cfsFS fs, cfsFile file, const void *buffer, tSize length, cfsAsyncContext *context);
108 |
109 | #ifdef __cplusplus
110 | }
111 | #endif
112 |
113 | #endif /* _CLOUDFS_LIBCFS3_CLIENT_CFS_AIO_H_ */
114 |
--------------------------------------------------------------------------------
/veturboio/ops/csrc/include/cipher.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | #ifndef VETURBOIO_CIPHER_H
17 | #define VETURBOIO_CIPHER_H
18 |
19 | #include
20 | #include
21 | #include
22 | #include
23 | #include "fastcrypto.h"
24 |
25 | class CipherInfo
26 | {
27 | public:
28 | bool use_cipher = false;
29 | std::string mode = "CTR-128";
30 | size_t header_size = 0;
31 | unsigned char *key = NULL;
32 | unsigned char *iv = NULL;
33 | CipherInfo(bool use_cipher, pybind11::array_t key_arr, pybind11::array_t iv_arr, size_t header_size);
34 | CipherInfo() = default;
35 | };
36 |
37 | class CtrEncWrap
38 | {
39 | private:
40 | std::unique_ptr enc_;
41 |
42 | public:
43 | CtrEncWrap() = default;
44 | CtrEncWrap(std::string mode, pybind11::array_t key_arr, pybind11::array_t iv_arr,
45 | size_t global_offset);
46 | size_t encrypt_update(pybind11::array_t pt, pybind11::array_t ct);
47 | };
48 |
49 | class CtrDecWrap
50 | {
51 | private:
52 | std::unique_ptr dec_;
53 |
54 | public:
55 | CtrDecWrap() = default;
56 | CtrDecWrap(std::string mode, pybind11::array_t key_arr, pybind11::array_t iv_arr,
57 | size_t global_offset);
58 | size_t decrypt_update(pybind11::array_t ct, pybind11::array_t pt);
59 | };
60 |
61 | #endif
62 |
--------------------------------------------------------------------------------
/veturboio/ops/csrc/include/common.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | #ifndef COMMON_H
17 | #define COMMON_H
18 |
19 | #include
20 | #include
21 | #if defined(USE_CUDA)
22 | #include
23 | #endif
24 | #include
25 | #include
26 | #include
27 | #include
28 | #include
29 | #include
30 | #include "cfs.h"
31 | #include "logging.h"
32 | #include "sfcs.h"
33 |
34 | #define THREAD_NICE_ADJ -10
35 | #define BUF_ALIGN_SIZE (size_t)4096
36 |
37 | using namespace std;
38 |
39 | #endif
40 |
--------------------------------------------------------------------------------
/veturboio/ops/csrc/include/fastcrypto.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | #ifndef VETURBOIO_FASTCRYPTO_H
17 | #define VETURBOIO_FASTCRYPTO_H
18 |
19 | #include
20 | #include
21 |
22 | #define EVP_UPDATE_MAX 0x7ffffff0
23 | #define AES_BLOCK_SIZE 16
24 | #define AES_BUF_MAX_SIZE 32
25 | #define MAX_CTR_KEY_SIZE 32
26 | #define FASTCRYPTO_MAGIC_SIZE 16
27 |
28 | inline void counter_inc_by(unsigned char *counter, size_t n, size_t c)
29 | {
30 | do
31 | {
32 | --n;
33 | c += counter[n];
34 | counter[n] = static_cast(c);
35 | c >>= 8;
36 | } while (n);
37 | }
38 |
39 | typedef struct evp_cipher_ctx_st EVP_CIPHER_CTX;
40 | typedef struct evp_cipher_st EVP_CIPHER;
41 | typedef struct evp_mac_ctx_st EVP_MAC_CTX;
42 | typedef struct evp_mac_st EVP_MAC;
43 |
44 | class CtrEncrypter
45 | {
46 | private:
47 | EVP_CIPHER_CTX *ctx = NULL;
48 | EVP_CIPHER *cipher = NULL;
49 |
50 | public:
51 | CtrEncrypter() = default;
52 | CtrEncrypter(std::string algo, const unsigned char *key, const unsigned char *iv, size_t global_offset);
53 | ~CtrEncrypter();
54 | int encrypt_update(unsigned char *pt, size_t pt_size, unsigned char *ct);
55 | };
56 |
57 | class CtrDecrypter
58 | {
59 | private:
60 | EVP_CIPHER_CTX *ctx = NULL;
61 | EVP_CIPHER *cipher = NULL;
62 |
63 | public:
64 | CtrDecrypter() = default;
65 | CtrDecrypter(std::string algo, const unsigned char *key, const unsigned char *iv, size_t global_offset);
66 | ~CtrDecrypter();
67 | int decrypt_update(unsigned char *ct, size_t ct_size, unsigned char *pt);
68 | };
69 |
70 | // Both encrypt and decrypt require length of ct and pt multiple of 16
71 | int ctr_encrypt_gpu(std::string algo, const unsigned char *key, const unsigned char *iv, unsigned char *pt,
72 | size_t pt_size, unsigned char *ct);
73 |
74 | int ctr_decrypt_gpu(std::string algo, const unsigned char *key, const unsigned char *iv, unsigned char *ct,
75 | size_t ct_size, unsigned char *pt);
76 | #endif
--------------------------------------------------------------------------------
/veturboio/ops/csrc/include/io_helper.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | #ifndef IO_HELPER_H
17 | #define IO_HELPER_H
18 |
19 | #include "posix.h"
20 | #include "sfcs.h"
21 |
22 | class IOHelper
23 | {
24 | private:
25 | char *pin_mem = NULL;
26 | bool use_pinmem_ = false;
27 | size_t buffer_size_ = 0;
28 |
29 | public:
30 | ~IOHelper();
31 | void load_file_to_tensor(std::string file_path, torch::Tensor res_tensor, size_t length, int64_t offset,
32 | int64_t device_id, int64_t num_thread, bool use_pinmem, bool use_sfcs_sdk,
33 | bool use_direct_io, bool use_cipher, pybind11::array_t key_arr,
34 | pybind11::array_t iv_arr, int64_t header_size);
35 | void save_tensor_to_file(torch::Tensor tensor, std::string file_path, size_t length, bool use_pinmem,
36 | bool use_sfcs_sdk, bool use_cipher, pybind11::array_t key_arr,
37 | pybind11::array_t iv_arr, int64_t header_size);
38 | void save_tensor_to_file_cpu(torch::Tensor tensor, std::string file_path, size_t length, bool use_pinmem,
39 | bool use_sfcs_sdk, bool use_cipher, pybind11::array_t key_arr,
40 | pybind11::array_t iv_arr, int64_t header_size);
41 | void init_buffer(string file_path, int64_t file_size, bool use_pinmem, bool use_sfcs_sdk);
42 | void free_buffer();
43 | };
44 |
45 | size_t get_file_size(const char *file_name, bool use_sfcs_sdk);
46 |
47 | void read_file(string file_path, char *addr, int device_id, char *dev_mem, int num_thread, size_t total_size,
48 | size_t global_offset, bool use_sfcs_sdk, bool use_direct_io, CipherInfo cipher_info);
49 |
50 | void load_file_to_tensor_cpu(std::string file_path, torch::Tensor res_tensor, size_t length, int64_t offset,
51 | int64_t device_id, int64_t num_thread, bool use_pinmem, bool use_sfcs_sdk,
52 | bool use_direct_io, bool use_cipher, pybind11::array_t key_arr,
53 | pybind11::array_t iv_arr, int64_t header_size);
54 |
55 | #endif
56 |
--------------------------------------------------------------------------------
/veturboio/ops/csrc/include/logging.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | #ifndef LOGGER_H
17 | #define LOGGER_H
18 |
19 | #include
20 | using namespace std;
21 |
22 | #define PR std::cout
23 | #define ENDL std::endl
24 | #define FILE_INFO "[" << __FUNCTION__ << " at " << __FILE__ << ":" << __LINE__ << "] "
25 |
26 | #define ARG_COUNT_PRIVATE(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, N, ...) N
27 | #define ARG_COUNT(...) ARG_COUNT_PRIVATE(0, __VA_ARGS__, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
28 |
29 | #define FUN_COUNT_GLUE(M, count) M##count
30 | #define FUN_JOIN_COUNT(M, count) FUN_COUNT_GLUE(M, count)
31 | #define FUN_JOIN_ARGS(x, y) x y
32 | #define CallSomeOne(fn, ...) FUN_JOIN_ARGS(FUN_JOIN_COUNT(fn, ARG_COUNT(__VA_ARGS__)), (__VA_ARGS__))
33 |
34 | #define param1(a) a
35 | #define param2(a, b) a << ", " #b ":" << b
36 | #define param3(a, b, c) a << ", " #b ":" << b << ", " #c ":" << c
37 | #define param4(a, b, c, d) a << ", " #b ":" << b << ", " #c ":" << c << ", " #d ":" << d
38 | #define param5(a, b, c, d, e) a << ", " #b ":" << b << ", " #c ":" << c << ", " #d ":" << d << ", " #e ":" << e
39 |
40 | #define pr1(...) param1(__VA_ARGS__)
41 | #define pr2(...) param2(__VA_ARGS__)
42 | #define pr3(...) param3(__VA_ARGS__)
43 | #define pr4(...) param4(__VA_ARGS__)
44 | #define pr5(...) param5(__VA_ARGS__)
45 |
46 | #define logDebug(...) PR << "VETURBOIO_CPP_DEBUG " << FILE_INFO << CallSomeOne(pr, __VA_ARGS__) << ENDL
47 | #define logInfo(...) PR << "VETURBOIO_CPP_INFO " << FILE_INFO << CallSomeOne(pr, __VA_ARGS__) << ENDL
48 | #define logWarn(...) PR << "VETURBOIO_CPP_WARN " << FILE_INFO << CallSomeOne(pr, __VA_ARGS__) << ENDL
49 | #define logError(...) PR << "VETURBOIO_CPP_ERROR " << FILE_INFO << CallSomeOne(pr, __VA_ARGS__) << ENDL
50 | #endif // LOGGER_H
--------------------------------------------------------------------------------
/veturboio/ops/csrc/include/posix.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | #ifndef LOAD_UTILS_H
17 | #define LOAD_UTILS_H
18 |
19 | #include "common.h"
20 | #include "cipher.h"
21 |
22 | class POSIXFile
23 | {
24 | public:
25 | std::string file_path;
26 | // cipher related
27 | CipherInfo cipher_info;
28 |
29 | POSIXFile(std::string file_path);
30 | POSIXFile(std::string file_path, CipherInfo cipher_info);
31 | POSIXFile(std::string file_path, bool use_cipher, pybind11::array_t key_arr, pybind11::array_t iv_arr,
32 | size_t header_size);
33 |
34 | size_t read_file_to_address_parallel(char *addr, int device_id, char *dev_mem, int num_thread, size_t total_size,
35 | size_t global_offset, bool use_direct_io);
36 | size_t read_file_to_array(pybind11::array_t arr, size_t length, size_t offset, int num_thread,
37 | bool use_direct_io);
38 | size_t write_file_from_addr(char *addr, size_t length, bool append);
39 |
40 | private:
41 | void read_file_to_address_thread(int thread_id, char *addr, int device_id, char *dev_mem, size_t block_size,
42 | size_t total_size, size_t global_offset, bool use_direct_io,
43 | CipherInfo cipher_info);
44 | };
45 |
46 | #endif
--------------------------------------------------------------------------------
/veturboio/ops/csrc/include/sfcs.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | #ifndef SFCS_H
18 | #define SFCS_H
19 |
20 | #include
21 | #include
22 | #include "common.h"
23 | #include "cfs.h"
24 | #include "logging.h"
25 | #include "cipher.h"
26 |
27 | #define SFCS_NAME_NODE "default"
28 | #define SFCS_USER_NAME "demo-user"
29 |
30 | using namespace std;
31 |
32 | class SFCSFs
33 | {
34 | public:
35 | cfsFS fs;
36 |
37 | SFCSFs();
38 | ~SFCSFs();
39 | void concat_files(std::string file_name, vector file_paths);
40 | void rename_file(const char *file_path, const char *file_name);
41 | void mkdir(std::string file_path);
42 | int64_t get_block_size();
43 | size_t read_file_to_addr(std::string file_name, CipherInfo cipher_info, char *addr, size_t length, size_t offset);
44 | size_t write_file_from_addr(std::string file_name, CipherInfo cipher_info, char *addr, size_t length,
45 | size_t offset);
46 | void read_multi_files(pybind11::list file_paths, pybind11::list tensors, pybind11::list lengths,
47 | pybind11::list offsets, int num_thread, bool use_cipher, pybind11::array_t key_arr,
48 | pybind11::array_t iv_arr, size_t header_size);
49 | void write_multi_files(pybind11::list file_paths, pybind11::list tensors, pybind11::list lengths,
50 | pybind11::list offsets, int num_thread, bool use_cipher, pybind11::array_t key_arr,
51 | pybind11::array_t iv_arr, size_t header_size);
52 | void get_file_size(std::string file_name, size_t *size);
53 | void get_multi_file_size(pybind11::list file_paths, pybind11::list sizes, int num_thread);
54 | };
55 |
56 | class SFCSFile
57 | {
58 | public:
59 | cfsFS fs;
60 | bool fs_owner;
61 | SFCSFs *sfcs_fs;
62 | std::string file_path;
63 | // cipher related
64 | CipherInfo cipher_info;
65 |
66 | SFCSFile(std::string file_path);
67 | SFCSFile(std::string path, SFCSFs *sfcs_fs);
68 | SFCSFile(std::string file_path, bool use_cipher, pybind11::array_t key_arr, pybind11::array_t iv_arr,
69 | size_t header_size);
70 | SFCSFile(std::string file_path, CipherInfo cipher_info);
71 | SFCSFile(std::string file_path, SFCSFs *sfcs_fs, CipherInfo cipher_info);
72 | ~SFCSFile();
73 | size_t get_file_size();
74 | size_t read_file_to_address_parallel(char *addr, int device_id, char *dev_mem, int num_thread, size_t total_size,
75 | size_t global_offset);
76 | size_t read_file_to_addr(char *addr, size_t length, size_t offset);
77 | size_t read_file_to_array(pybind11::array_t arr, size_t length, size_t offset, int num_thread);
78 | size_t write_file_from_array(pybind11::array_t arr, size_t length, bool append);
79 | size_t write_file_from_tensors(pybind11::list tensors, pybind11::list sizes, pybind11::list offsets,
80 | std::string concat_dir, std::string concat_file);
81 | size_t write_file_from_addr(char *addr, size_t length, size_t offset, bool append);
82 | void delete_file();
83 |
84 | private:
85 | void read_file_to_address_thread(int thread_id, char *addr, int device_id, char *dev_mem, size_t block_size,
86 | size_t total_size, size_t global_offset);
87 | void write_file_from_tensor(torch::Tensor tensor, size_t length, size_t offset, std::string file_name);
88 | };
89 |
90 | #endif
--------------------------------------------------------------------------------
/veturboio/ops/csrc/io_helper.cu:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | #include "include/io_helper.h"
17 | #include "include/cipher.h"
18 | #include "include/fastcrypto.h"
19 |
20 | IOHelper::~IOHelper()
21 | {
22 | free_buffer();
23 | }
24 |
25 | // init buffer with given positive size or the size of the file in specified
26 | // path
27 | void IOHelper::init_buffer(string file_path, int64_t buffer_size, bool use_pinmem, bool use_sfcs_sdk)
28 | {
29 | if (buffer_size <= 0)
30 | {
31 | buffer_size = get_file_size(file_path.c_str(), use_sfcs_sdk);
32 | }
33 |
34 | if (buffer_size_ > 0)
35 | {
36 | free_buffer();
37 | }
38 |
39 | buffer_size_ = buffer_size;
40 | if (use_pinmem)
41 | {
42 | use_pinmem_ = true;
43 | cudaMallocHost(&pin_mem, buffer_size, cudaHostAllocMapped);
44 | }
45 | else
46 | {
47 | pin_mem = (char *)mmap(NULL, buffer_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
48 | madvise(pin_mem, buffer_size, MADV_HUGEPAGE);
49 | }
50 | }
51 |
52 | void IOHelper::free_buffer()
53 | {
54 | if (pin_mem != NULL)
55 | {
56 | if (use_pinmem_)
57 | cudaFreeHost(pin_mem);
58 | else
59 | munmap(pin_mem, buffer_size_);
60 | }
61 | }
62 |
63 | void read_unaligned_part_gpu(std::string file_path, torch::Tensor res_tensor, int64_t *offset, int64_t device_id,
64 | size_t *total_size, bool use_sfcs_sdk, bool use_direct_io, size_t *read_unaligned_size,
65 | CipherInfo cipher_info)
66 | {
67 | // cpu align only read head part, while gpu align read both head and tail part
68 | if (device_id < 0)
69 | {
70 | throw std::runtime_error("read_unaligned_part_gpu only support gpu device");
71 | }
72 | size_t end_offset = *offset + *total_size;
73 | // both head and tail are aligned
74 | if ((*offset & (BUF_ALIGN_SIZE - 1)) == 0 && ((end_offset) & (BUF_ALIGN_SIZE - 1)) == 0)
75 | {
76 | return;
77 | }
78 | char tmp_buf_head[BUF_ALIGN_SIZE] = {};
79 | char tmp_buf_tail[BUF_ALIGN_SIZE] = {};
80 | // read head unaligned
81 | cudaSetDevice(device_id);
82 | if ((*offset & (BUF_ALIGN_SIZE - 1)) != 0)
83 | {
84 | size_t read_head_size = min(BUF_ALIGN_SIZE - (*offset & (BUF_ALIGN_SIZE - 1)), *total_size);
85 | read_file(file_path, tmp_buf_head, device_id, (char *)res_tensor.data_ptr(), 1, read_head_size, *offset,
86 | use_sfcs_sdk, use_direct_io, cipher_info);
87 | *read_unaligned_size = read_head_size;
88 | *offset += read_head_size;
89 | *total_size -= read_head_size;
90 | }
91 | // read tail unaligned
92 | if (*total_size > 0 && (end_offset & (BUF_ALIGN_SIZE - 1)) != 0)
93 | {
94 | size_t tail_offset = end_offset - (end_offset & (BUF_ALIGN_SIZE - 1));
95 | size_t tensor_offset = tail_offset - *offset + *read_unaligned_size;
96 | read_file(file_path, tmp_buf_tail, device_id, (char *)res_tensor.data_ptr() + tensor_offset, 1,
97 | end_offset - tail_offset, tail_offset, use_sfcs_sdk, use_direct_io, cipher_info);
98 | *total_size -= end_offset - tail_offset;
99 | }
100 | cudaDeviceSynchronize();
101 | }
102 |
103 | void IOHelper::load_file_to_tensor(std::string file_path, torch::Tensor res_tensor, size_t length, int64_t offset,
104 | int64_t device_id, int64_t num_thread, bool use_pinmem, bool use_sfcs_sdk,
105 | bool use_direct_io, bool use_cipher, pybind11::array_t key_arr,
106 | pybind11::array_t iv_arr, int64_t header_size)
107 | {
108 | size_t file_size = get_file_size(file_path.c_str(), use_sfcs_sdk);
109 | size_t read_unaligned_size = 0;
110 | size_t total_size = length > 0 ? length : file_size - offset;
111 | // set cipher
112 | CipherInfo cipher_info(use_cipher, key_arr, iv_arr, header_size);
113 | if (device_id < 0)
114 | {
115 | read_file(file_path, (char *)res_tensor.data_ptr() + read_unaligned_size, device_id, NULL, num_thread,
116 | total_size, offset, use_sfcs_sdk, use_direct_io, cipher_info);
117 | }
118 | else
119 | {
120 | // read unaligned part first, since GPU can only decrypt data in integral multiple of 16 Bytes
121 | read_unaligned_part_gpu(file_path, res_tensor, &offset, device_id, &total_size, use_sfcs_sdk, use_direct_io,
122 | &read_unaligned_size, cipher_info);
123 |
124 | // change use_pinmem attribute may introduce ambiguity
125 | if (buffer_size_ > 0 && use_pinmem != use_pinmem_)
126 | {
127 | throw std::runtime_error("use_pinmem attribute of an exising IOHelper should not be changed");
128 | }
129 |
130 | // TODO: HPA might be slow
131 | // only use pin_mem as buffer for copying data to device memory
132 | // the lifecycle of the pin_mem is the same as helper
133 | if (pin_mem == NULL || total_size > buffer_size_)
134 | {
135 | init_buffer(file_path, total_size, use_pinmem, use_sfcs_sdk);
136 | }
137 | cudaSetDevice(device_id);
138 | read_file(file_path, pin_mem, device_id, (char *)res_tensor.data_ptr() + read_unaligned_size, num_thread,
139 | total_size, offset, use_sfcs_sdk, use_direct_io, CipherInfo());
140 | cudaDeviceSynchronize();
141 | // decrypt with gpu
142 | if (cipher_info.use_cipher && total_size > 0)
143 | {
144 | if (offset % AES_BLOCK_SIZE != 0 || total_size % AES_BLOCK_SIZE != 0)
145 | {
146 | throw std::runtime_error("cannot decrypt because gpu read is not aligned");
147 | }
148 | unsigned char iv[AES_BLOCK_SIZE];
149 | for (size_t i = 0; i < AES_BLOCK_SIZE; i++)
150 | {
151 | iv[i] = cipher_info.iv[i];
152 | }
153 | counter_inc_by(iv, AES_BLOCK_SIZE, (offset - cipher_info.header_size) / AES_BLOCK_SIZE);
154 | unsigned char *iv_gpu = NULL;
155 | cudaMalloc((void **)&iv_gpu, AES_BLOCK_SIZE);
156 | if (iv_gpu == NULL)
157 | {
158 | throw std::runtime_error("iv_gpu cannot be allocated");
159 | }
160 | cudaMemcpy(iv_gpu, iv, AES_BLOCK_SIZE, cudaMemcpyHostToDevice);
161 | unsigned char *ct = reinterpret_cast(res_tensor.data_ptr()) + read_unaligned_size;
162 | int cipher_ret = ctr_decrypt_gpu(cipher_info.mode, cipher_info.key, iv_gpu, ct, total_size, ct);
163 | if (!cipher_ret)
164 | {
165 | throw std::runtime_error("Cipher Exception: gpu decrypt fail");
166 | }
167 | cudaDeviceSynchronize();
168 | cudaFree(iv_gpu);
169 | }
170 | }
171 | }
172 |
173 | void IOHelper::save_tensor_to_file(torch::Tensor tensor, std::string file_path, size_t length, bool use_pinmem,
174 | bool use_sfcs_sdk, bool use_cipher, pybind11::array_t key_arr,
175 | pybind11::array_t iv_arr, int64_t header_size)
176 | {
177 | char *buf;
178 |
179 | CipherInfo cipher_info(use_cipher, key_arr, iv_arr, header_size);
180 | if (tensor.device().is_cuda() || use_cipher)
181 | {
182 | // change use_pinmem attribute may introduce ambiguity
183 | if (buffer_size_ > 0 && use_pinmem != use_pinmem_)
184 | {
185 | throw std::runtime_error("use_pinmem attribute of an exising IOHelper should not be changed");
186 | }
187 |
188 | if (pin_mem == NULL || length > buffer_size_)
189 | {
190 | init_buffer(file_path, length, use_pinmem, use_sfcs_sdk);
191 | }
192 |
193 | buf = pin_mem;
194 | if (tensor.device().is_cuda())
195 | {
196 | cudaSetDevice(tensor.device().index());
197 | cudaMemcpyAsync(buf, (char *)tensor.data_ptr(), length, cudaMemcpyDeviceToHost);
198 | cudaDeviceSynchronize();
199 | }
200 | else
201 | {
202 | memcpy(buf, (char *)tensor.data_ptr(), length);
203 | }
204 | }
205 | else
206 | {
207 | buf = (char *)tensor.data_ptr();
208 | }
209 |
210 | if (use_sfcs_sdk)
211 | {
212 | SFCSFile sfcs_file(file_path, cipher_info);
213 | sfcs_file.write_file_from_addr(buf, length, 0, true);
214 | }
215 | else
216 | {
217 | POSIXFile posix_file(file_path, cipher_info);
218 | posix_file.write_file_from_addr(buf, length, true);
219 | }
220 | }
221 |
--------------------------------------------------------------------------------
/veturboio/ops/csrc/io_helper_cpu.cpp:
--------------------------------------------------------------------------------
1 | #include "include/io_helper.h"
2 | #include "include/cipher.h"
3 |
4 | IOHelper::~IOHelper()
5 | {
6 | }
7 |
8 | // init buffer with given positive size or the size of the file in specified
9 | // path
10 | void IOHelper::init_buffer(string file_path, int64_t buffer_size, bool use_pinmem, bool use_sfcs_sdk)
11 | {
12 | }
13 |
14 | void IOHelper::free_buffer()
15 | {
16 | }
17 |
18 | void IOHelper::load_file_to_tensor(std::string file_path, torch::Tensor res_tensor, size_t length, int64_t offset,
19 | int64_t device_id, int64_t num_thread, bool use_pinmem, bool use_sfcs_sdk,
20 | bool use_direct_io, bool use_cipher, pybind11::array_t key_arr,
21 | pybind11::array_t iv_arr, int64_t header_size)
22 | {
23 | load_file_to_tensor_cpu(file_path, res_tensor, length, offset, device_id, num_thread, use_pinmem, use_sfcs_sdk,
24 | use_direct_io, use_cipher, key_arr, iv_arr, header_size);
25 | }
26 |
27 | void IOHelper::save_tensor_to_file(torch::Tensor tensor, std::string file_path, size_t length, bool use_pinmem,
28 | bool use_sfcs_sdk, bool use_cipher, pybind11::array_t key_arr,
29 | pybind11::array_t iv_arr, int64_t header_size)
30 | {
31 | save_tensor_to_file_cpu(tensor, file_path, length, use_pinmem, use_sfcs_sdk, use_cipher, key_arr, iv_arr,
32 | header_size);
33 | }
34 |
--------------------------------------------------------------------------------
/veturboio/ops/csrc/io_helper_cpu_common.cpp:
--------------------------------------------------------------------------------
1 | #include "include/io_helper.h"
2 | #include "include/cipher.h"
3 |
4 | size_t get_file_size(const char *file_name, bool use_sfcs_sdk)
5 | {
6 | if (use_sfcs_sdk)
7 | {
8 | SFCSFile sfcs_file(file_name);
9 | return sfcs_file.get_file_size();
10 | }
11 | else
12 | {
13 | struct stat st;
14 | stat(file_name, &st);
15 | return st.st_size;
16 | }
17 | }
18 |
19 | void read_file(string file_path, char *addr, int device_id, char *dev_mem, int num_thread, size_t total_size,
20 | size_t global_offset, bool use_sfcs_sdk, bool use_direct_io, CipherInfo cipher_info)
21 | {
22 | if (total_size == 0)
23 | {
24 | return;
25 | }
26 |
27 | if (use_sfcs_sdk)
28 | {
29 | SFCSFile sfcs_file(file_path, cipher_info);
30 | sfcs_file.read_file_to_address_parallel(addr, device_id, dev_mem, num_thread, total_size, global_offset);
31 | }
32 | else
33 | {
34 | POSIXFile posix_file(file_path, cipher_info);
35 | posix_file.read_file_to_address_parallel(addr, device_id, dev_mem, num_thread, total_size, global_offset,
36 | use_direct_io);
37 | }
38 | }
39 |
40 | void load_file_to_tensor_cpu(std::string file_path, torch::Tensor res_tensor, size_t length, int64_t offset,
41 | int64_t device_id, int64_t num_thread, bool use_pinmem, bool use_sfcs_sdk,
42 | bool use_direct_io, bool use_cipher, pybind11::array_t key_arr,
43 | pybind11::array_t iv_arr, int64_t header_size)
44 | {
45 | size_t file_size = get_file_size(file_path.c_str(), use_sfcs_sdk);
46 | size_t read_unaligned_size = 0;
47 | size_t total_size = length > 0 ? length : file_size - offset;
48 | // set cipher
49 | CipherInfo cipher_info(use_cipher, key_arr, iv_arr, header_size);
50 | if (device_id < 0)
51 | {
52 | read_file(file_path, (char *)res_tensor.data_ptr() + read_unaligned_size, device_id, NULL, num_thread,
53 | total_size, offset, use_sfcs_sdk, use_direct_io, cipher_info);
54 | }
55 | }
56 |
57 | void IOHelper::save_tensor_to_file_cpu(torch::Tensor tensor, std::string file_path, size_t length, bool use_pinmem,
58 | bool use_sfcs_sdk, bool use_cipher, pybind11::array_t key_arr,
59 | pybind11::array_t iv_arr, int64_t header_size)
60 | {
61 | char *buf;
62 |
63 | CipherInfo cipher_info(use_cipher, key_arr, iv_arr, header_size);
64 | if (use_cipher)
65 | {
66 | // change use_pinmem attribute may introduce ambiguity
67 | if (buffer_size_ > 0 && use_pinmem != use_pinmem_)
68 | {
69 | throw std::runtime_error("use_pinmem attribute of an exising IOHelper should not be changed");
70 | }
71 |
72 | if (pin_mem == NULL || length > buffer_size_)
73 | {
74 | init_buffer(file_path, length, use_pinmem, use_sfcs_sdk);
75 | }
76 |
77 | buf = pin_mem;
78 | memcpy(buf, (char *)tensor.data_ptr(), length);
79 | }
80 | else
81 | {
82 | buf = (char *)tensor.data_ptr();
83 | }
84 |
85 | if (use_sfcs_sdk)
86 | {
87 | SFCSFile sfcs_file(file_path, cipher_info);
88 | sfcs_file.write_file_from_addr(buf, length, 0, true);
89 | }
90 | else
91 | {
92 | POSIXFile posix_file(file_path, cipher_info);
93 | posix_file.write_file_from_addr(buf, length, true);
94 | }
95 | }
96 |
--------------------------------------------------------------------------------
/veturboio/ops/csrc/io_helper_npu.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | #include "include/io_helper.h"
17 | #include "include/cipher.h"
18 |
19 | IOHelper::~IOHelper()
20 | {
21 | }
22 |
23 | // init buffer with given positive size or the size of the file in specified
24 | // path
25 | void IOHelper::init_buffer(string file_path, int64_t buffer_size, bool use_pinmem, bool use_sfcs_sdk)
26 | {
27 | }
28 |
29 | void IOHelper::free_buffer()
30 | {
31 | }
32 |
33 | void IOHelper::load_file_to_tensor(std::string file_path, torch::Tensor res_tensor, size_t length, int64_t offset,
34 | int64_t device_id, int64_t num_thread, bool use_pinmem, bool use_sfcs_sdk,
35 | bool use_direct_io, bool use_cipher, pybind11::array_t key_arr,
36 | pybind11::array_t iv_arr, int64_t header_size)
37 | {
38 | load_file_to_tensor_cpu(file_path, res_tensor, length, offset, device_id, num_thread, use_pinmem, use_sfcs_sdk,
39 | use_direct_io, use_cipher, key_arr, iv_arr, header_size);
40 | }
41 |
42 | void IOHelper::save_tensor_to_file(torch::Tensor tensor, std::string file_path, size_t length, bool use_pinmem,
43 | bool use_sfcs_sdk, bool use_cipher, pybind11::array_t key_arr,
44 | pybind11::array_t iv_arr, int64_t header_size)
45 | {
46 | save_tensor_to_file_cpu(tensor, file_path, length, use_pinmem, use_sfcs_sdk, use_cipher, key_arr, iv_arr,
47 | header_size);
48 | }
49 |
--------------------------------------------------------------------------------
/veturboio/ops/csrc/lib/libfastcrypto_gpu.so.0.3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/volcengine/veTurboIO/0ed05516013da4332433784660660ca4d1904505/veturboio/ops/csrc/lib/libfastcrypto_gpu.so.0.3
--------------------------------------------------------------------------------
/veturboio/ops/csrc/posix.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | #include "include/posix.h"
17 | #include "include/logging.h"
18 | #include "include/cipher.h"
19 | #include "include/fastcrypto.h"
20 | #include
21 |
22 | POSIXFile::POSIXFile(std::string file_path)
23 | {
24 | this->file_path = file_path;
25 | }
26 |
27 | POSIXFile::POSIXFile(std::string file_path, CipherInfo cipher_info)
28 | {
29 | this->file_path = file_path;
30 | this->cipher_info = cipher_info;
31 | }
32 |
33 | POSIXFile::POSIXFile(std::string file_path, bool use_cipher, pybind11::array_t key_arr,
34 | pybind11::array_t iv_arr, size_t header_size)
35 | : POSIXFile(file_path)
36 | {
37 | this->cipher_info = CipherInfo(use_cipher, key_arr, iv_arr, header_size);
38 | }
39 |
40 | void POSIXFile::read_file_to_address_thread(int thread_id, char *addr, int device_id, char *dev_mem, size_t block_size,
41 | size_t total_size, size_t global_offset, bool use_direct_io,
42 | CipherInfo cipher_info)
43 | {
44 | size_t offset = thread_id * block_size;
45 | size_t read_size = block_size;
46 | int fd = -1;
47 | int ret = 0;
48 | size_t size_read = 0;
49 |
50 | if (offset + read_size >= total_size)
51 | {
52 | read_size = (total_size > offset) ? total_size - offset : 0;
53 | }
54 | // TODO: use_direct_io if sfcs file detected
55 | if (use_direct_io)
56 | {
57 | if ((fd = open(file_path.c_str(), O_RDONLY | O_DIRECT)) < 0)
58 | {
59 | if (errno == EINVAL)
60 | {
61 | logWarn("open file using directIO failed, fall back to bufferIO", file_path.c_str(),
62 | std::strerror(EINVAL));
63 | }
64 | else
65 | {
66 | logError("open file using directIO failed", file_path.c_str(), std::strerror(errno));
67 | throw std::runtime_error("veTurboIO Exception: can't apply open operation");
68 | }
69 | }
70 | }
71 |
72 | if (fd == -1)
73 | {
74 | if ((fd = open(file_path.c_str(), O_RDONLY)) < 0)
75 | {
76 | logError("open file using bufferIO failed", file_path.c_str(), std::strerror(errno));
77 | throw std::runtime_error("veTurboIO Exception: can't apply open operation");
78 | }
79 | }
80 |
81 | FILE *fp = fdopen(fd, "rb");
82 | if (fp == NULL)
83 | {
84 | logError("can't apply fdopen to file", file_path.c_str(), std::strerror(errno));
85 | throw std::runtime_error("veTurboIO Exception: can't apply fdopen operation");
86 | }
87 |
88 | if ((ret = fseek(fp, global_offset + offset, SEEK_SET)) < 0)
89 | {
90 | logError("can't apply fseek to file", file_path.c_str(), std::strerror(errno));
91 | throw std::runtime_error("veTurboIO Exception: can't apply fseek operation");
92 | }
93 |
94 | if ((size_read = fread(addr + offset, 1, read_size, fp)) == 0)
95 | {
96 | logWarn("read file with 0 bytes returned", file_path.c_str(), offset, read_size);
97 | }
98 |
99 | if ((ret = fclose(fp)) < 0)
100 | {
101 | logError("can't apply fclose to file", file_path.c_str(), std::strerror(errno));
102 | throw std::runtime_error("veTurboIO Exception: can't apply fclose operation");
103 | }
104 |
105 | // Decrypt if use_cipher is true
106 | if (cipher_info.use_cipher)
107 | {
108 | CtrDecrypter dec(cipher_info.mode, cipher_info.key, cipher_info.iv,
109 | global_offset + offset - cipher_info.header_size);
110 | unsigned char *ct = reinterpret_cast(addr + offset);
111 | int cipher_ret = dec.decrypt_update(ct, read_size, ct);
112 | if (!cipher_ret)
113 | {
114 | throw std::runtime_error("Cipher Exception: decrypt fail");
115 | }
116 | }
117 |
118 | #if defined(USE_CUDA)
119 | if (dev_mem != NULL && device_id >= 0)
120 | {
121 | cudaSetDevice(device_id);
122 | cudaMemcpyAsync(dev_mem + offset, addr + offset, read_size, cudaMemcpyHostToDevice);
123 | }
124 | #elif defined(USE_NPU)
125 | #else
126 | #endif
127 | }
128 |
129 | size_t POSIXFile::read_file_to_address_parallel(char *addr, int device_id, char *dev_mem, int num_thread,
130 | size_t total_size, size_t global_offset, bool use_direct_io)
131 | {
132 | vector threads(num_thread);
133 |
134 | size_t block_size = (size_t)ceil((double)total_size / num_thread);
135 | // align the block_size;
136 | block_size = (block_size + BUF_ALIGN_SIZE - 1) / BUF_ALIGN_SIZE * BUF_ALIGN_SIZE;
137 | // re-caculate the real needed thread num;
138 | num_thread = (total_size + block_size - 1) / block_size;
139 |
140 | for (int thread_id = 0; thread_id < num_thread; thread_id++)
141 | {
142 | threads[thread_id] = std::thread(&POSIXFile::read_file_to_address_thread, this, thread_id, addr, device_id,
143 | dev_mem, block_size, total_size, global_offset, use_direct_io, cipher_info);
144 | }
145 |
146 | for (int thread_id = 0; thread_id < num_thread; thread_id++)
147 | {
148 | threads[thread_id].join();
149 | }
150 |
151 | return total_size;
152 | }
153 |
154 | size_t POSIXFile::read_file_to_array(pybind11::array_t arr, size_t length, size_t offset, int num_thread,
155 | bool use_direct_io)
156 | {
157 | pybind11::buffer_info buf_info = arr.request();
158 | char *addr = static_cast(buf_info.ptr);
159 | madvise(addr, length, MADV_HUGEPAGE);
160 | return read_file_to_address_parallel(addr, -1, NULL, num_thread, length, offset, use_direct_io);
161 | }
162 |
163 | size_t POSIXFile::write_file_from_addr(char *addr, size_t length, bool append)
164 | {
165 | int fd;
166 | int flags = O_WRONLY;
167 | size_t ret;
168 | size_t count;
169 | char *src = addr;
170 | size_t offset = 0;
171 |
172 | if (append)
173 | {
174 | struct stat st;
175 | stat(file_path.c_str(), &st);
176 | offset = st.st_size;
177 | flags |= O_APPEND;
178 | }
179 |
180 | if (cipher_info.use_cipher)
181 | {
182 | size_t h_off = cipher_info.header_size;
183 | CtrEncrypter enc(cipher_info.mode, cipher_info.key, cipher_info.iv, offset - h_off);
184 | unsigned char *pt = reinterpret_cast(addr);
185 | int cipher_ret = enc.encrypt_update(pt, length, pt);
186 | if (!cipher_ret)
187 | {
188 | throw std::runtime_error("Cipher Exception: encrypt fail");
189 | }
190 | }
191 |
192 | fd = open(file_path.c_str(), flags);
193 | if (fd < 0)
194 | {
195 | logError("open failed", file_path.c_str(), std::strerror(errno));
196 | throw std::runtime_error("veTurboIO Exception: open failed");
197 | }
198 |
199 | count = length;
200 | while (count > 0)
201 | {
202 | ret = write(fd, src, count);
203 | if (ret < 0)
204 | {
205 | logError("Failed to write file", file_path.c_str());
206 | throw std::runtime_error("veTurboIO Exception: write file");
207 | }
208 | count -= ret;
209 | src += ret;
210 | }
211 | close(fd);
212 | return length;
213 | }
214 |
--------------------------------------------------------------------------------
/veturboio/ops/csrc/pybind.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | #include "include/io_helper.h"
17 | #include "include/sfcs.h"
18 | #include "include/cipher.h"
19 |
20 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m)
21 | {
22 | py::class_(m, "IOHelper")
23 | .def(py::init<>())
24 | .def("load_file_to_tensor", &IOHelper::load_file_to_tensor)
25 | .def("save_tensor_to_file", &IOHelper::save_tensor_to_file);
26 |
27 | py::class_(m, "POSIXFile")
28 | .def(py::init())
29 | .def(py::init, pybind11::array_t, size_t>())
30 | .def("read_file_to_array", &POSIXFile::read_file_to_array);
31 |
32 | py::class_(m, "SFCSFs")
33 | .def(py::init<>())
34 | .def("mkdir", &SFCSFs::mkdir)
35 | .def("read_multi_files", &SFCSFs::read_multi_files)
36 | .def("write_multi_files", &SFCSFs::write_multi_files)
37 | .def("get_multi_file_size", &SFCSFs::get_multi_file_size);
38 |
39 | py::class_(m, "SFCSFile")
40 | .def(py::init())
41 | .def(py::init, pybind11::array_t, size_t>())
42 | .def("get_file_size", &SFCSFile::get_file_size)
43 | .def("read_file_to_array", &SFCSFile::read_file_to_array)
44 | .def("write_file_from_array", &SFCSFile::write_file_from_array)
45 | .def("write_file_from_tensors", &SFCSFile::write_file_from_tensors)
46 | .def("delete_file", &SFCSFile::delete_file);
47 |
48 | py::class_(m, "CtrEncWrap")
49 | .def(py::init, pybind11::array_t, size_t>())
50 | .def("encrypt_update", &CtrEncWrap::encrypt_update);
51 |
52 | py::class_(m, "CtrDecWrap")
53 | .def(py::init, pybind11::array_t, size_t>())
54 | .def("decrypt_update", &CtrDecWrap::decrypt_update);
55 | }
56 |
--------------------------------------------------------------------------------
/veturboio/ops/io_utils.py:
--------------------------------------------------------------------------------
1 | '''
2 | Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
3 |
4 | Licensed under the Apache License, Version 2.0 (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | '''
16 |
17 | import json
18 | import os
19 | from typing import Dict, Optional
20 |
21 | import numpy as np
22 | import torch
23 | from loguru import logger
24 | from safetensors.torch import save_file as safetensors_save_file
25 |
26 | from veturboio.ops.cipher import CipherInfo, CipherMode, create_cipher_with_header, encrypt
27 | from veturboio.ops.sfcs_utils import sfcs_delete_file, sfcs_write_file, sfcs_write_file_in_parallel
28 | from veturboio.safetensors import parse_state_dict
29 | from veturboio.types import FILE_PATH
30 |
31 | try:
32 | import veturboio_ext
33 |
34 | IOHelper = veturboio_ext.IOHelper
35 | except ImportError:
36 | IOHelper = None
37 | logger.warning("veturboio_ext not found, fallback to pure python implementation")
38 |
39 |
40 | def load_file_to_tensor(
41 | file_path: str,
42 | total_tensor: torch.Tensor,
43 | offset: int,
44 | helper: IOHelper,
45 | length: int = 0,
46 | device_id: Optional[int] = -1,
47 | num_thread: Optional[int] = 32,
48 | use_pinmem: Optional[bool] = False,
49 | use_sfcs_sdk: Optional[bool] = False,
50 | use_direct_io: Optional[bool] = False,
51 | cipher_info: CipherInfo = CipherInfo(False),
52 | ) -> torch.Tensor:
53 | return helper.load_file_to_tensor(
54 | file_path,
55 | total_tensor,
56 | length,
57 | offset,
58 | device_id,
59 | num_thread,
60 | use_pinmem,
61 | use_sfcs_sdk,
62 | use_direct_io,
63 | cipher_info.use_cipher,
64 | cipher_info.key,
65 | cipher_info.iv,
66 | CipherInfo.HEADER_SIZE if cipher_info.use_header else 0,
67 | )
68 |
69 |
70 | def save_tensor_to_file(
71 | tensor: torch.Tensor,
72 | file_path: FILE_PATH,
73 | length: int,
74 | helper: IOHelper,
75 | use_pinmem: Optional[bool] = False,
76 | use_sfcs_sdk: Optional[bool] = False,
77 | cipher_info: CipherInfo = CipherInfo(False),
78 | ):
79 | return helper.save_tensor_to_file(
80 | tensor,
81 | file_path,
82 | length,
83 | use_pinmem,
84 | use_sfcs_sdk,
85 | cipher_info.use_cipher,
86 | cipher_info.key,
87 | cipher_info.iv,
88 | CipherInfo.HEADER_SIZE if cipher_info.use_header else 0,
89 | )
90 |
91 |
92 | def save_file(
93 | state_dict: Dict[str, torch.Tensor],
94 | filename: FILE_PATH,
95 | helper: IOHelper,
96 | metadata: Optional[Dict[str, str]] = None,
97 | use_sfcs_sdk: bool = False,
98 | cipher_info: CipherInfo = CipherInfo(False),
99 | ):
100 | if helper is None:
101 | if cipher_info.use_cipher:
102 | logger.warning("helper is None, cipher is not supported in pure python implementation")
103 | return safetensors_save_file(state_dict, filename, metadata=metadata)
104 |
105 | meta, tensors, sizes, offsets = parse_state_dict(state_dict)
106 |
107 | if metadata:
108 | meta["__metadata__"] = metadata
109 |
110 | meta_bytes = json.dumps(meta).encode('utf-8')
111 | meta_len = len(meta_bytes)
112 |
113 | # alignment
114 | if not meta_len % 8 == 0:
115 | meta_len_pad = (meta_len + 8) // 8 * 8
116 | meta_bytes += b' ' * (meta_len_pad - meta_len)
117 | meta_len = meta_len_pad
118 |
119 | st_header_bytes = meta_len.to_bytes(8, 'little') + meta_bytes
120 | st_header_len = len(st_header_bytes)
121 |
122 | if use_sfcs_sdk:
123 | sfcs_write_file_in_parallel(filename, tensors, sizes, offsets, st_header_bytes, st_header_len, cipher_info)
124 | else:
125 | with open(filename, "wb") as f:
126 | if cipher_info.use_cipher:
127 | if cipher_info.use_header:
128 | cipher_header_bytes = cipher_info.to_header_bytes()
129 | f.write(cipher_header_bytes)
130 | enc_st_header_arr = np.zeros(st_header_len, dtype=np.uint8)
131 | encrypt(cipher_info, np.frombuffer(st_header_bytes, dtype=np.uint8), enc_st_header_arr, 0)
132 | f.write(enc_st_header_arr.tobytes())
133 | else:
134 | f.write(st_header_bytes)
135 |
136 | for i in range(len(tensors)):
137 | tensor = tensors[i]
138 | size = sizes[i]
139 | save_tensor_to_file(
140 | tensor,
141 | filename,
142 | size,
143 | helper=helper,
144 | use_pinmem=False,
145 | use_sfcs_sdk=use_sfcs_sdk,
146 | cipher_info=cipher_info,
147 | )
148 |
149 |
150 | def init_io_helper() -> IOHelper:
151 | return IOHelper()
152 |
--------------------------------------------------------------------------------
/veturboio/ops/posix_utils.py:
--------------------------------------------------------------------------------
1 | '''
2 | Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
3 |
4 | Licensed under the Apache License, Version 2.0 (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | '''
16 |
17 | from typing import Optional
18 |
19 | import numpy as np
20 | from loguru import logger
21 |
22 | from veturboio.ops.cipher import CipherInfo
23 |
24 | try:
25 | from veturboio.utils.load_veturboio_ext import load_veturboio_ext
26 |
27 | veturboio_ext = load_veturboio_ext()
28 | IOHelper = veturboio_ext.IOHelper
29 | POSIXFile = veturboio_ext.POSIXFile
30 | except ImportError:
31 | POSIXFile = None
32 | logger.warning("veturboio_ext not found, fallback to pure python implementation")
33 |
34 |
35 | def posix_read_file(
36 | file_path: str,
37 | arr: np.ndarray,
38 | length: int,
39 | offset: int,
40 | num_thread: Optional[int] = 1,
41 | cipher_info: CipherInfo = CipherInfo(False),
42 | use_direct_io: bool = False,
43 | ) -> int:
44 | posix_file = POSIXFile(
45 | file_path,
46 | cipher_info.use_cipher,
47 | cipher_info.key,
48 | cipher_info.iv,
49 | CipherInfo.HEADER_SIZE if cipher_info.use_header else 0,
50 | )
51 | return posix_file.read_file_to_array(arr, length, offset, num_thread, use_direct_io)
52 |
--------------------------------------------------------------------------------
/veturboio/safetensors.py:
--------------------------------------------------------------------------------
1 | '''
2 | Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
3 |
4 | Licensed under the Apache License, Version 2.0 (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | '''
16 |
17 | import json
18 | import os
19 | import pprint
20 | from multiprocessing import shared_memory
21 | from typing import Callable, Dict, List, Optional
22 |
23 | import numpy as np
24 | import torch
25 | from loguru import logger
26 |
27 | from veturboio.loader import BaseLoader
28 | from veturboio.ops.cipher import CipherInfo
29 | from veturboio.types import FILE_PATH
30 |
31 | # All safetensors file will start with a json string, which is the meta info of the file.
32 | # We use the beginning char to determine whether it is a safetensors file. The beginning
33 | # char is '{' and its ascii code is 123.
34 | SAFETENSORS_FILE_MAGIC_NUM = 123
35 |
36 | _safetensors_dtype_mapper = {
37 | "F64": torch.float64,
38 | "F32": torch.float32,
39 | "F16": torch.float16,
40 | "BF16": torch.bfloat16,
41 | "I64": torch.int64,
42 | "I32": torch.int32,
43 | "I16": torch.int16,
44 | "I8": torch.int8,
45 | "U8": torch.uint8,
46 | "BOOL": torch.bool,
47 | }
48 |
49 |
50 | def only_safetensors_property(func: Callable):
51 | func_name = func.__name__
52 | warning_msg = "This safetensors file is invalid, will take it as a normal torch file."
53 |
54 | def wrapper(self, *args, **kwargs):
55 | if not self.is_valid:
56 | logger.patch(lambda r: r.update(function=func_name)).warning(warning_msg)
57 | return None
58 | return func(self, *args, **kwargs)
59 |
60 | return wrapper
61 |
62 |
63 | class TensorMeta:
64 | def __init__(self, name: str, dtype: str, shape: List[int], data_offsets: List[int]) -> None:
65 | self._name = name
66 | self._dtype = _safetensors_dtype_mapper[dtype]
67 | self._shape = shape
68 | self._data_offsets = data_offsets
69 |
70 | @property
71 | def name(self) -> str:
72 | return self._name
73 |
74 | @property
75 | def dtype(self) -> torch.dtype:
76 | return self._dtype
77 |
78 | @property
79 | def shape(self) -> List[int]:
80 | return self._shape
81 |
82 | @property
83 | def data_offsets(self) -> List[int]:
84 | return self._data_offsets
85 |
86 | def __str__(self) -> str:
87 | return str(
88 | {
89 | "name": self._name,
90 | "dtype": self._dtype,
91 | "shape": self._shape,
92 | "data_offsets": self._data_offsets,
93 | }
94 | )
95 |
96 | def __repr__(self) -> str:
97 | return self.__str__()
98 |
99 |
100 | class SafetensorsFile:
101 | def __init__(self, file: FILE_PATH, loader: BaseLoader, use_cipher: Optional[bool] = None) -> None:
102 | self._file = file
103 | self._loader = loader
104 |
105 | self._is_valid = True
106 |
107 | # cipher related
108 | self._cipher_info = CipherInfo(False)
109 | if use_cipher == True or use_cipher == None and os.getenv("VETURBOIO_USE_CIPHER", "0") == "1":
110 | header_bytes = loader.load_to_bytes(offset=0, count=CipherInfo.HEADER_SIZE)
111 | self._cipher_info = CipherInfo(True, header_bytes, os.path.abspath(self.file))
112 |
113 | if self._cipher_info.use_header:
114 | h_off = CipherInfo.HEADER_SIZE
115 | else:
116 | h_off = 0
117 |
118 | magic_number = loader.load_to_bytes(offset=8 + h_off, count=1, cipher_info=self._cipher_info)[0]
119 | if magic_number != SAFETENSORS_FILE_MAGIC_NUM:
120 | self._is_valid = False
121 | return
122 |
123 | self._meta_size = np.frombuffer(
124 | loader.load_to_bytes(offset=h_off, count=8, cipher_info=self._cipher_info), dtype=np.int64
125 | )[0]
126 | meta_bytes = loader.load_to_bytes(offset=8 + h_off, count=self._meta_size, cipher_info=self._cipher_info)
127 | meta_dict = json.loads(meta_bytes.decode("utf-8"))
128 |
129 | self._shared_tensor = {}
130 | self._ignored_meta = {}
131 | if "__metadata__" in meta_dict:
132 | meta_data = meta_dict.pop("__metadata__")
133 | for key, value in meta_data.items():
134 | if value not in meta_dict:
135 | self._ignored_meta[key] = value
136 | else:
137 | self._shared_tensor[key] = value
138 |
139 | self._meta = {}
140 | for key in meta_dict:
141 | self._meta[key] = TensorMeta(
142 | name=key,
143 | dtype=meta_dict[key]["dtype"],
144 | shape=meta_dict[key]["shape"],
145 | data_offsets=meta_dict[key]["data_offsets"],
146 | )
147 |
148 | # record the offset of the tensor data
149 | self._tensor_offset = np.dtype(np.int64).itemsize + self._meta_size + h_off
150 |
151 | @staticmethod
152 | def split_tensor_to_state_dict(
153 | total_tensor: torch.Tensor, safetensor_file: "SafetensorsFile"
154 | ) -> Dict[str, torch.Tensor]:
155 | state_dict = {}
156 |
157 | for tensor_meta in safetensor_file.meta.values():
158 | tensor = total_tensor[tensor_meta.data_offsets[0] : tensor_meta.data_offsets[1]]
159 | tensor = tensor.view(dtype=tensor_meta.dtype)
160 | tensor = tensor.reshape(tensor_meta.shape)
161 | state_dict[tensor_meta.name] = tensor
162 |
163 | for src_tensor_key, tgt_tensor_key in safetensor_file.shared_tensor.items():
164 | state_dict[src_tensor_key] = state_dict[tgt_tensor_key]
165 | return state_dict
166 |
167 | @property
168 | def file(self) -> FILE_PATH:
169 | return self._file
170 |
171 | @property
172 | def is_valid(self) -> bool:
173 | return self._is_valid
174 |
175 | @property
176 | @only_safetensors_property
177 | def meta_size(self) -> int:
178 | return self._meta_size
179 |
180 | @property
181 | @only_safetensors_property
182 | def meta(self) -> Dict[str, TensorMeta]:
183 | return self._meta
184 |
185 | @property
186 | @only_safetensors_property
187 | def tensor_offset(self) -> int:
188 | return self._tensor_offset
189 |
190 | @property
191 | @only_safetensors_property
192 | def shared_tensor(self) -> Dict[str, str]:
193 | return self._shared_tensor
194 |
195 | def __str__(self) -> str:
196 | if not self._is_valid:
197 | return f"{self.file} is not a valid safetensors file."
198 | return pprint.pformat(
199 | {
200 | "file": self._file,
201 | "meta_size": self._meta_size,
202 | "meta": self._meta,
203 | "tensor_offset": self._tensor_offset,
204 | }
205 | )
206 |
207 | def __repr__(self) -> str:
208 | return self.__str__()
209 |
210 | def load(self, map_location: str = "cpu", state_dict: Dict[str, torch.Tensor] = None) -> Dict[str, torch.Tensor]:
211 | if not self._is_valid:
212 | return self._loader.load_pt(map_location, self._cipher_info)
213 | else:
214 | return self._loader.load_safetensors(self, map_location, state_dict)
215 |
216 | def load_to_shmem(self) -> shared_memory.SharedMemory:
217 | return self._loader.load_to_shmem(self._cipher_info)
218 |
219 |
220 | def parse_state_dict(state_dict: Dict[str, torch.Tensor]):
221 | meta = {}
222 | tensors = []
223 | sizes = []
224 | offsets = []
225 |
226 | data_offset_begin = 0
227 | data_offset_end = 0
228 | _safetensors_dtype_str = {v: k for k, v in _safetensors_dtype_mapper.items()}
229 | bool_state_dict = {}
230 | for key, tensor in state_dict.items():
231 | if tensor.dtype == torch.bool:
232 | bool_state_dict[key] = tensor
233 | continue
234 | else:
235 | size = 1
236 | for d in range(tensor.dim()):
237 | size *= tensor.shape[d]
238 |
239 | try:
240 | bytes = torch.finfo(tensor.dtype).bits // 8
241 | except:
242 | bytes = torch.iinfo(tensor.dtype).bits // 8
243 | size *= bytes
244 |
245 | data_offset_end = data_offset_begin + size
246 | meta[key] = {
247 | "dtype": _safetensors_dtype_str[tensor.dtype],
248 | "shape": tensor.shape,
249 | "data_offsets": [data_offset_begin, data_offset_end],
250 | }
251 | if size > 0:
252 | tensors.append(tensor)
253 | sizes.append(size)
254 | offsets.append(data_offset_begin)
255 | data_offset_begin = data_offset_end
256 |
257 | for key, tensor in bool_state_dict.items():
258 | size = 1
259 | for d in range(tensor.dim()):
260 | size *= tensor.shape[d]
261 |
262 | data_offset_end = data_offset_begin + size
263 | meta[key] = {
264 | "dtype": _safetensors_dtype_str[tensor.dtype],
265 | "shape": tensor.shape,
266 | "data_offsets": [data_offset_begin, data_offset_end],
267 | }
268 | if size > 0:
269 | tensors.append(tensor)
270 | sizes.append(size)
271 | offsets.append(data_offset_begin)
272 | data_offset_begin = data_offset_end
273 | return meta, tensors, sizes, offsets
274 |
--------------------------------------------------------------------------------
/veturboio/saver/__init__.py:
--------------------------------------------------------------------------------
1 | '''
2 | Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
3 |
4 | Licensed under the Apache License, Version 2.0 (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | '''
16 |
17 | from veturboio.saver.base_saver import BaseSaver, PosixSaver
18 | from veturboio.saver.sfcs_client_saver import SfcsClientSaver
19 |
20 | __all__ = ["BaseSaver", "PosixSaver", "SfcsClientSaver"]
21 |
--------------------------------------------------------------------------------
/veturboio/saver/base_saver.py:
--------------------------------------------------------------------------------
1 | '''
2 | Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
3 |
4 | Licensed under the Apache License, Version 2.0 (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | '''
16 |
17 | import os
18 | import tempfile
19 | from typing import Any, Dict
20 |
21 | import numpy as np
22 | import torch
23 | from safetensors.torch import save_file as safetenors_save_file
24 | from safetensors.torch import save_model as safetensors_save_model
25 |
26 | from veturboio.ops.cipher import CipherInfo, CipherMode, create_cipher_with_header, encrypt
27 | from veturboio.ops.io_utils import IOHelper
28 | from veturboio.ops.io_utils import save_file as fast_save_file
29 | from veturboio.types import FILE_PATH
30 |
31 |
32 | class BaseSaver:
33 | def __init__(self, method: str) -> None:
34 | self.method = method
35 |
36 | def save_file(self, state_dict: Dict[str, torch.Tensor], file: FILE_PATH, metadata: Dict[str, str] = None) -> None:
37 | raise NotImplementedError
38 |
39 | def save_model(self, model: torch.nn.Module, file: FILE_PATH) -> None:
40 | raise NotImplementedError
41 |
42 |
43 | class PosixSaver(BaseSaver):
44 | def __init__(self, file: FILE_PATH, helper: IOHelper = None, use_cipher: bool = False) -> None:
45 | super().__init__(method="posix")
46 | self.file = file
47 | use_cipher = use_cipher or os.getenv("VETURBOIO_USE_CIPHER", "0") == "1"
48 | use_header = use_cipher and os.getenv("VETURBOIO_CIPHER_HEADER", "0") == "1"
49 | if use_header:
50 | self.cipher_info = create_cipher_with_header(CipherMode.CTR_128, os.path.abspath(self.file))
51 | else:
52 | self.cipher_info = CipherInfo(use_cipher, None, os.path.abspath(self.file))
53 |
54 | self.helper = helper
55 |
56 | def save_file(
57 | self, state_dict: Dict[str, torch.Tensor], metadata: Dict[str, str] = None, enable_fast_mode: bool = False
58 | ) -> None:
59 | if enable_fast_mode:
60 | fast_save_file(
61 | state_dict,
62 | self.file,
63 | helper=self.helper,
64 | metadata=metadata,
65 | cipher_info=self.cipher_info,
66 | )
67 | else:
68 | if self.cipher_info.use_cipher:
69 | with tempfile.NamedTemporaryFile(dir="/dev/shm") as tmpfile:
70 | tmp_file_path = tmpfile.name
71 | safetenors_save_file(state_dict, tmp_file_path, metadata=metadata)
72 | tmp_file_size = os.path.getsize(tmp_file_path)
73 | tmp_file_bytes = np.memmap(tmp_file_path, dtype=np.uint8, mode='r', shape=tmp_file_size)
74 | h_off = CipherInfo.HEADER_SIZE if self.cipher_info.use_header else 0
75 | file_bytes = np.memmap(self.file, dtype=np.uint8, mode='w+', shape=tmp_file_size + h_off)
76 | encrypt(self.cipher_info, tmp_file_bytes, file_bytes[h_off:], 0)
77 | if h_off:
78 | file_bytes[:h_off] = np.frombuffer(self.cipher_info.to_header_bytes(), dtype=np.uint8)
79 | file_bytes.flush()
80 | else:
81 | safetenors_save_file(state_dict, self.file, metadata=metadata)
82 |
83 | def save_model(self, model: torch.nn.Module) -> None:
84 | if self.cipher_info.use_cipher:
85 | with tempfile.NamedTemporaryFile(dir="/dev/shm") as tmpfile:
86 | tmp_file_path = tmpfile.name
87 | safetensors_save_model(model, tmp_file_path)
88 | tmp_file_size = os.path.getsize(tmp_file_path)
89 | tmp_file_bytes = np.memmap(tmp_file_path, dtype=np.uint8, mode='r', shape=tmp_file_size)
90 | h_off = CipherInfo.HEADER_SIZE if self.cipher_info.use_header else 0
91 | file_bytes = np.memmap(self.file, dtype=np.uint8, mode='w+', shape=tmp_file_size + h_off)
92 | encrypt(self.cipher_info, tmp_file_bytes, file_bytes[h_off:], 0)
93 | if h_off:
94 | file_bytes[:h_off] = np.frombuffer(self.cipher_info.to_header_bytes(), dtype=np.uint8)
95 | file_bytes.flush()
96 | else:
97 | safetensors_save_model(model, self.file)
98 |
99 | def save_pt(self, state_dict: Dict[str, torch.Tensor]) -> None:
100 | if self.cipher_info.use_cipher:
101 | with tempfile.NamedTemporaryFile(dir="/dev/shm") as tmpfile:
102 | tmp_file_path = tmpfile.name
103 | torch.save(state_dict, tmp_file_path)
104 | tmp_file_size = os.path.getsize(tmp_file_path)
105 | tmp_file_bytes = np.memmap(tmp_file_path, dtype=np.uint8, mode='r', shape=tmp_file_size)
106 | h_off = CipherInfo.HEADER_SIZE if self.cipher_info.use_header else 0
107 | file_bytes = np.memmap(self.file, dtype=np.uint8, mode='w+', shape=tmp_file_size + h_off)
108 | encrypt(self.cipher_info, tmp_file_bytes, file_bytes[h_off:], 0)
109 | if h_off:
110 | file_bytes[:h_off] = np.frombuffer(self.cipher_info.to_header_bytes(), dtype=np.uint8)
111 | file_bytes.flush()
112 | else:
113 | torch.save(state_dict, self.file)
114 |
--------------------------------------------------------------------------------
/veturboio/saver/sfcs_client_saver.py:
--------------------------------------------------------------------------------
1 | '''
2 | Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
3 |
4 | Licensed under the Apache License, Version 2.0 (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | '''
16 |
17 | import os
18 | import tempfile
19 | from typing import Any, Dict
20 |
21 | import numpy as np
22 | import torch
23 | from safetensors.torch import save_file as safetenors_save_file
24 | from safetensors.torch import save_model as safetensors_save_model
25 |
26 | from veturboio.ops.cipher import CipherInfo, CipherMode, create_cipher_with_header
27 | from veturboio.ops.io_utils import IOHelper
28 | from veturboio.ops.io_utils import save_file as fast_save_file
29 | from veturboio.ops.sfcs_utils import (
30 | init_sfcs_conf,
31 | path_mapper,
32 | sfcs_delete_file,
33 | sfcs_write_file,
34 | sfcs_write_file_in_parallel,
35 | )
36 | from veturboio.saver.base_saver import BaseSaver
37 | from veturboio.types import FILE_PATH
38 |
39 |
40 | class SfcsClientSaver(BaseSaver):
41 | def __init__(
42 | self,
43 | file: FILE_PATH,
44 | helper: IOHelper = None,
45 | use_cipher: bool = False,
46 | ) -> None:
47 | super().__init__(method="client")
48 |
49 | self.file = file
50 | self.helper = helper
51 |
52 | mount_path = init_sfcs_conf(file)
53 | self.sfcs_valid_path = path_mapper(self.file, mount_path)
54 |
55 | use_cipher = use_cipher or os.getenv("VETURBOIO_USE_CIPHER", "0") == "1"
56 | use_header = use_cipher and os.getenv("VETURBOIO_CIPHER_HEADER", "0") == "1"
57 | if use_header:
58 | self.cipher_info = create_cipher_with_header(CipherMode.CTR_128, os.path.abspath(self.file))
59 | else:
60 | self.cipher_info = CipherInfo(use_cipher, None, os.path.abspath(self.file))
61 |
62 | def save_file(
63 | self, state_dict: Dict[str, torch.Tensor], metadata: Dict[str, str] = None, enable_fast_mode: bool = False
64 | ) -> None:
65 | if enable_fast_mode:
66 | fast_save_file(
67 | state_dict,
68 | self.sfcs_valid_path,
69 | helper=self.helper,
70 | metadata=metadata,
71 | cipher_info=self.cipher_info,
72 | use_sfcs_sdk=True,
73 | )
74 | else:
75 | with tempfile.NamedTemporaryFile(dir="/dev/shm") as tmpfile:
76 | file_path = tmpfile.name
77 | safetenors_save_file(state_dict, file_path, metadata=metadata)
78 |
79 | file_size = os.path.getsize(file_path)
80 | if self.cipher_info.use_header:
81 | h_off = CipherInfo.HEADER_SIZE
82 | file_bytes = np.empty(file_size + h_off, dtype=np.byte)
83 | file_bytes[:h_off] = np.frombuffer(self.cipher_info.to_header_bytes(), dtype=np.byte)
84 | file_bytes[h_off:] = np.fromfile(file_path, dtype=np.byte, count=file_size)
85 | else:
86 | file_bytes = np.memmap(file_path, dtype=np.byte, mode='r+', shape=file_size)
87 | sfcs_write_file(self.sfcs_valid_path, file_bytes, len(file_bytes), self.cipher_info)
88 |
89 | def save_model(self, model: torch.nn.Module) -> None:
90 | with tempfile.NamedTemporaryFile(dir="/dev/shm") as tmpfile:
91 | file_path = tmpfile.name
92 | safetensors_save_model(model, file_path)
93 |
94 | file_size = os.path.getsize(file_path)
95 | if self.cipher_info.use_header:
96 | h_off = CipherInfo.HEADER_SIZE
97 | file_bytes = np.empty(file_size + h_off, dtype=np.byte)
98 | file_bytes[:h_off] = np.frombuffer(self.cipher_info.to_header_bytes(), dtype=np.byte)
99 | file_bytes[h_off:] = np.fromfile(file_path, dtype=np.byte, count=file_size)
100 | else:
101 | file_bytes = np.memmap(file_path, dtype=np.byte, mode='r+', shape=file_size)
102 | sfcs_write_file(self.sfcs_valid_path, file_bytes, len(file_bytes), self.cipher_info)
103 |
104 | def save_pt(self, state_dict: Dict[str, torch.Tensor]) -> None:
105 | with tempfile.NamedTemporaryFile(dir="/dev/shm") as tmpfile:
106 | file_path = tmpfile.name
107 | torch.save(state_dict, file_path)
108 |
109 | file_size = os.path.getsize(file_path)
110 | if self.cipher_info.use_header:
111 | h_off = CipherInfo.HEADER_SIZE
112 | file_bytes = np.empty(file_size + h_off, dtype=np.byte)
113 | file_bytes[:h_off] = np.frombuffer(self.cipher_info.to_header_bytes(), dtype=np.byte)
114 | file_bytes[h_off:] = np.fromfile(file_path, dtype=np.byte, count=file_size)
115 | else:
116 | file_bytes = np.memmap(file_path, dtype=np.byte, mode='r+', shape=file_size)
117 | sfcs_write_file(self.sfcs_valid_path, file_bytes, len(file_bytes), self.cipher_info)
118 |
--------------------------------------------------------------------------------
/veturboio/types.py:
--------------------------------------------------------------------------------
1 | '''
2 | Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
3 |
4 | Licensed under the Apache License, Version 2.0 (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | '''
16 |
17 | import os
18 | from typing import Union
19 |
20 | FILE_PATH = Union[str, bytes, os.PathLike]
21 |
--------------------------------------------------------------------------------
/veturboio/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/volcengine/veTurboIO/0ed05516013da4332433784660660ca4d1904505/veturboio/utils/__init__.py
--------------------------------------------------------------------------------
/veturboio/utils/load_veturboio_ext.py:
--------------------------------------------------------------------------------
1 | '''
2 | Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
3 |
4 | Licensed under the Apache License, Version 2.0 (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | '''
16 |
17 | import os
18 |
19 | from loguru import logger
20 |
21 | LIBCFS_DEFAULT_URL = "https://veturbo-cn-beijing.tos-cn-beijing.volces.com/veturboio/libcfs/libcloudfs.so"
22 | LIBCFS_DEFAULT_PATH = "/usr/lib/libcloudfs.so"
23 |
24 |
25 | def load_libcfs():
26 | libcfs_path = os.getenv("LIBCFS_PATH", LIBCFS_DEFAULT_PATH)
27 | if not os.path.isfile(libcfs_path):
28 | # libcfs_path not exist, download from url
29 | import requests
30 |
31 | libcfs_url = os.getenv("LIBCFS_URL", LIBCFS_DEFAULT_URL)
32 | logger.info(f"download libcloudfs.so from {libcfs_url}, save to {libcfs_path}")
33 | r = requests.get(libcfs_url, timeout=60)
34 | with open(libcfs_path, 'wb') as f:
35 | f.write(r.content)
36 |
37 |
38 | def load_veturboio_ext():
39 | load_libcfs()
40 | import veturboio_ext
41 |
42 | return veturboio_ext
43 |
--------------------------------------------------------------------------------
/veturboio/version.py:
--------------------------------------------------------------------------------
1 | '''
2 | Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
3 |
4 | Licensed under the Apache License, Version 2.0 (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | '''
16 |
17 | __version__ = "0.1.3rc4"
18 |
--------------------------------------------------------------------------------