├── .clang-format
├── .gitignore
├── CHANGELOG.md
├── LICENSE
├── MANIFEST.in
├── README.md
├── README.zh.md
├── bench
    ├── io_bench.py
    └── io_bench.sh
├── docs
    ├── encrypt_model.md
    ├── imgs
    │   └── SFCS.png
    ├── index.md
    └── pin_mem.md
├── mkdocs.yml
├── pyproject.toml
├── setup.py
├── tests
    ├── test_assert_exceptions.py
    ├── test_convert.py
    ├── test_fetch_cipher.py
    ├── test_load_op.py
    ├── test_save_op.py
    ├── test_sfcs_sdk_op.py
    └── test_share_tensor_cpu.py
└── veturboio
    ├── __init__.py
    ├── convert.py
    ├── io.py
    ├── loader
        ├── __init__.py
        ├── base_loader.py
        ├── faster_posix_loader.py
        └── sfcs_client_loader.py
    ├── ops
        ├── __init__.py
        ├── cipher.py
        ├── consts.py
        ├── csrc
        │   ├── cipher.cpp
        │   ├── include
        │   │   ├── cfs.h
        │   │   ├── cfsaio.h
        │   │   ├── cipher.h
        │   │   ├── common.h
        │   │   ├── fastcrypto.h
        │   │   ├── io_helper.h
        │   │   ├── logging.h
        │   │   ├── posix.h
        │   │   └── sfcs.h
        │   ├── io_helper.cu
        │   ├── io_helper_cpu.cpp
        │   ├── io_helper_cpu_common.cpp
        │   ├── io_helper_npu.cpp
        │   ├── lib
        │   │   └── libfastcrypto_gpu.so.0.3
        │   ├── posix.cpp
        │   ├── pybind.cpp
        │   └── sfcs.cpp
        ├── io_utils.py
        ├── posix_utils.py
        └── sfcs_utils.py
    ├── safetensors.py
    ├── saver
        ├── __init__.py
        ├── base_saver.py
        └── sfcs_client_saver.py
    ├── types.py
    ├── utils
        ├── __init__.py
        └── load_veturboio_ext.py
    └── version.py


/.clang-format:
--------------------------------------------------------------------------------
  1 | ---
  2 | Language:        Cpp
  3 | # BasedOnStyle:  Microsoft
  4 | AccessModifierOffset: -2
  5 | AlignAfterOpenBracket: Align
  6 | AlignConsecutiveMacros: false
  7 | AlignConsecutiveAssignments: false
  8 | AlignConsecutiveDeclarations: false
  9 | AlignEscapedNewlines: Right
 10 | AlignOperands:   true
 11 | AlignTrailingComments: true
 12 | AllowAllArgumentsOnNextLine: true
 13 | AllowAllConstructorInitializersOnNextLine: true
 14 | AllowAllParametersOfDeclarationOnNextLine: true
 15 | AllowShortBlocksOnASingleLine: Never
 16 | AllowShortCaseLabelsOnASingleLine: false
 17 | AllowShortFunctionsOnASingleLine: None
 18 | AllowShortLambdasOnASingleLine: All
 19 | AllowShortIfStatementsOnASingleLine: Never
 20 | AllowShortLoopsOnASingleLine: false
 21 | AlwaysBreakAfterDefinitionReturnType: None
 22 | AlwaysBreakAfterReturnType: None
 23 | AlwaysBreakBeforeMultilineStrings: false
 24 | AlwaysBreakTemplateDeclarations: MultiLine
 25 | BinPackArguments: true
 26 | BinPackParameters: true
 27 | BraceWrapping:
 28 |   AfterCaseLabel:  false
 29 |   AfterClass:      true
 30 |   AfterControlStatement: true
 31 |   AfterEnum:       true
 32 |   AfterFunction:   true
 33 |   AfterNamespace:  true
 34 |   AfterObjCDeclaration: true
 35 |   AfterStruct:     true
 36 |   AfterUnion:      false
 37 |   AfterExternBlock: true
 38 |   BeforeCatch:     true
 39 |   BeforeElse:      true
 40 |   IndentBraces:    false
 41 |   SplitEmptyFunction: true
 42 |   SplitEmptyRecord: true
 43 |   SplitEmptyNamespace: true
 44 | BreakBeforeBinaryOperators: None
 45 | BreakBeforeBraces: Custom
 46 | BreakBeforeInheritanceComma: false
 47 | BreakInheritanceList: BeforeColon
 48 | BreakBeforeTernaryOperators: true
 49 | BreakConstructorInitializersBeforeComma: false
 50 | BreakConstructorInitializers: BeforeColon
 51 | BreakAfterJavaFieldAnnotations: false
 52 | BreakStringLiterals: true
 53 | ColumnLimit:     120
 54 | CommentPragmas:  '^ IWYU pragma:'
 55 | CompactNamespaces: false
 56 | ConstructorInitializerAllOnOneLineOrOnePerLine: false
 57 | ConstructorInitializerIndentWidth: 4
 58 | ContinuationIndentWidth: 4
 59 | Cpp11BracedListStyle: true
 60 | DeriveLineEnding: true
 61 | DerivePointerAlignment: false
 62 | DisableFormat:   false
 63 | ExperimentalAutoDetectBinPacking: false
 64 | FixNamespaceComments: true
 65 | ForEachMacros:
 66 |   - foreach
 67 |   - Q_FOREACH
 68 |   - BOOST_FOREACH
 69 | IncludeBlocks:   Preserve
 70 | IncludeCategories:
 71 |   - Regex:           '^"(llvm|llvm-c|clang|clang-c)/'
 72 |     Priority:        2
 73 |     SortPriority:    0
 74 |   - Regex:           '^(<|"(gtest|gmock|isl|json)/)'
 75 |     Priority:        3
 76 |     SortPriority:    0
 77 |   - Regex:           '.*'
 78 |     Priority:        1
 79 |     SortPriority:    0
 80 | IncludeIsMainRegex: '(Test)?$'
 81 | IncludeIsMainSourceRegex: ''
 82 | IndentCaseLabels: false
 83 | IndentGotoLabels: true
 84 | IndentPPDirectives: None
 85 | IndentWidth:     4
 86 | IndentWrappedFunctionNames: false
 87 | JavaScriptQuotes: Leave
 88 | JavaScriptWrapImports: true
 89 | KeepEmptyLinesAtTheStartOfBlocks: true
 90 | MacroBlockBegin: ''
 91 | MacroBlockEnd:   ''
 92 | MaxEmptyLinesToKeep: 1
 93 | NamespaceIndentation: None
 94 | ObjCBinPackProtocolList: Auto
 95 | ObjCBlockIndentWidth: 2
 96 | ObjCSpaceAfterProperty: false
 97 | ObjCSpaceBeforeProtocolList: true
 98 | PenaltyBreakAssignment: 2
 99 | PenaltyBreakBeforeFirstCallParameter: 19
100 | PenaltyBreakComment: 300
101 | PenaltyBreakFirstLessLess: 120
102 | PenaltyBreakString: 1000
103 | PenaltyBreakTemplateDeclaration: 10
104 | PenaltyExcessCharacter: 1000000
105 | PenaltyReturnTypeOnItsOwnLine: 1000
106 | PointerAlignment: Right
107 | ReflowComments:  true
108 | SortIncludes:    false
109 | SortUsingDeclarations: true
110 | SpaceAfterCStyleCast: false
111 | SpaceAfterLogicalNot: false
112 | SpaceAfterTemplateKeyword: true
113 | SpaceBeforeAssignmentOperators: true
114 | SpaceBeforeCpp11BracedList: false
115 | SpaceBeforeCtorInitializerColon: true
116 | SpaceBeforeInheritanceColon: true
117 | SpaceBeforeParens: ControlStatements
118 | SpaceBeforeRangeBasedForLoopColon: true
119 | SpaceInEmptyBlock: false
120 | SpaceInEmptyParentheses: false
121 | SpacesBeforeTrailingComments: 1
122 | SpacesInAngles:  false
123 | SpacesInConditionalStatement: false
124 | SpacesInContainerLiterals: true
125 | SpacesInCStyleCastParentheses: false
126 | SpacesInParentheses: false
127 | SpacesInSquareBrackets: false
128 | SpaceBeforeSquareBrackets: false
129 | Standard:        Latest
130 | StatementMacros:
131 |   - Q_UNUSED
132 |   - QT_REQUIRE_VERSION
133 | TabWidth:        4
134 | UseCRLF:         false
135 | UseTab:          Never
136 | ...
137 | 
138 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 | 
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 | 
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 | 
119 | # SageMath parsed files
120 | *.sage.py
121 | 
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 | 
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 | 
135 | # Rope project settings
136 | .ropeproject
137 | 
138 | # mkdocs documentation
139 | /site
140 | 
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 | 
146 | # Pyre type checker
147 | .pyre/
148 | 
149 | # pytype static type analyzer
150 | .pytype/
151 | 
152 | # Cython debug symbols
153 | cython_debug/
154 | 
155 | # PyCharm
156 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
159 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 | 
162 | # local build cache
163 | build
164 | dist
165 | *.pt
166 | veturbo/ops/lib/
167 | veturbo/lego_pipeline/lib/
168 | 
169 | # cmake
170 | CMakeFiles/
171 | CMakeCache.txt
172 | CMakeScripts/
173 | CMakeTmp/
174 | cmake_install.cmake
175 | Makefile
176 | cmake-build-debug/
177 | cmake-build-release/
178 | cmake-build-relwithdebinfo/
179 | cmake-build-minsize/
180 | 
181 | # library
182 | !veturboio/ops/csrc/lib/
183 | !veturboio/ops/csrc/lib/*.so
184 | 
185 | # vscode
186 | .vscode
187 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # Changelog
 2 | 
 3 | All notable changes to this project will be documented in this file. See [conventional commits](https://www.conventionalcommits.org/) for commit guidelines.
 4 | 
 5 | ---
 6 | ## [0.1.3] - 2024-04-26
 7 | 
 8 | ### Bug Fixes
 9 | 
10 | - **(load)** set cuda device in each thread
11 | - **(security)** fix typo in comments
12 | - **(ut)** clean files in ut
13 | - directIO segmentation fault
14 | - modify aksk less real
15 | 
16 | ### Features
17 | 
18 | - **(security)** compat with cipher header and use cipher in posix
19 | - **(sfcs)** set sfcs sync interval by environ
20 | - **(sfcs)** reduce memcpy
21 | - **(sfcs)** parse sfcs confs from environ in json format
22 | - add clone mode for shared tensor
23 | - get cfs task id from env
24 | 
25 | ### Miscellaneous Chores
26 | 
27 | - **(security)** clarify cipher readme
28 | - cpp coding style
29 | 
30 | ### License
31 | 
32 | - add license file and header
33 | 
34 | ## [0.1.2] - 2024-01-25
35 | 
36 | ### Bug Fixes
37 | 
38 | - **(saver)** add return to remove repetitive writing 
39 | - **(security)** socket path and ut bug 
40 | - MANIFEST does not contain all fastcrypto lib files 
41 | 
42 | ### Documentation
43 | 
44 | - update readme 
45 | 
46 | ### Features
47 | 
48 | - **(security)** fetch key and iv 
49 | - **(security)** get and refresh sfcs aksk from datapipe 
50 | - **(security)** get namenode ip from datapipe and fix write xml bug 
51 | - **(sfcs)** decide load use sfcs sdk from environ 
52 | 
53 | ## [0.1.1] - 2023-11-17
54 | 
55 | ### Bug Fixes
56 | 
57 | - **(sfcs)** keep in consistent with reading when open for writing 
58 | - **(ut)** delete potential residual test file before testing 
59 | - fix ci release and update readme for pip install 
60 | 
61 | ### Documentation
62 | 
63 | - use index-url as default install method 
64 | 
65 | ### Features
66 | 
67 | - **(ci)** add import format tool in ci 
68 | - **(saver)** introduce saver class to aggregate save operations 
69 | - **(security)** add cipher in sfcs sdk 
70 | - **(sfcs)** load and save pt 
71 | - load pt file in parallel from sfcs 
72 | 
73 | ### Miscellaneous Chores
74 | 
75 | - bump version to v0.1.0 
76 | - bump version to v0.1.1 
77 | 
78 | ### Performance
79 | 
80 | - make the read usage with good alignment. 
81 | 
82 | <!-- generated by git-cliff -->
83 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include veturboio/ops/csrc/lib/*.so
2 | include veturboio/ops/csrc/lib/*.so.*
3 | include veturboio/ops/csrc/include/*.h
4 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # veTurboIO
  2 | 
  3 | 
  4 | [En](./README.md) | [中文](./README.zh.md)
  5 | 
  6 | 
  7 | A Python library for high-performance reading and writing of PyTorch model files 
  8 | developed by Volcano Engine. This library mainly implements based on the safetensors 
  9 | file format to achieve efficient storage and reading of tensor data.
 10 | 
 11 | ## Install
 12 | 
 13 | It can be installed directly through the following way:
 14 | ```bash
 15 | cd veturboio
 16 | python setup.py get_libcfs
 17 | python setup.py install
 18 | ```
 19 | 
 20 | Tips: This instruction will preferentially download the whl file that matches the 
 21 | current Python and PyTorch versions. If no matching whl file is found, it will 
 22 | automatically download the source code for compilation and installation.
 23 | 
 24 | 
 25 | If the installation fails, you can also try to install by downloading the source code, 
 26 | and then compile and install it manually.
 27 | 
 28 | ```bash
 29 | # CUDA ops, default
 30 | python setup.py install --cuda_ext
 31 | 
 32 | # NPU ops
 33 | python setup.py install --npu_ext
 34 | 
 35 | # CPU only
 36 | python setup.py install --cpu_ext
 37 | ```
 38 | 
 39 | 
 40 | ## Quick Start
 41 | 
 42 | ### Read and write model files
 43 | 
 44 | 
 45 | ```python
 46 | import torch
 47 | import veturboio
 48 | 
 49 | tensors = {
 50 |    "weight1": torch.zeros((1024, 1024)),
 51 |    "weight2": torch.zeros((1024, 1024))
 52 | }
 53 | 
 54 | veturboio.save_file(tensors, "model.safetensors")
 55 | 
 56 | new_tensors = veturboio.load("model.safetensors")
 57 | 
 58 | # check if the tensors are the same
 59 | for k, v in tensors.items():
 60 |     assert torch.allclose(v, new_tensors[k])
 61 | ```
 62 | 
 63 | ### Convert existing PyTorch files
 64 | 
 65 | ```bash
 66 | python -m veturboio.convert -i model.pt -o model.safetensors
 67 | ```
 68 | 
 69 | ## Performance test
 70 | 
 71 | Run directly:
 72 | ```bash
 73 | bash bench/io_bench.sh
 74 | ```
 75 | Then, you can get the following results:
 76 | ```
 77 | fs_name    tensor_size     veturboio load_time(s)             torch load_time(s)
 78 | shm        1073741824      0.08                               0.63
 79 | shm        2147483648      0.19                               1.26
 80 | shm        4294967296      0.36                               2.32
 81 | ```
 82 | 
 83 | Also, you can run the following command to get more options:
 84 | ```bash
 85 | python bench/io_bench.py -h
 86 | ```
 87 | 
 88 | ## Advance Features
 89 | 
 90 | ### Using veMLP to accelerate reading and writing
 91 | Volcano Engine Machine Learning Platform (veMLP) provides a distributed cache file system
 92 | based on the physical disks of the GPU cluster. 
 93 | 
 94 | <p align="center">
 95 |     <img src="./docs/imgs/SFCS.png" style="zoom:15%;">
 96 | </p>
 97 | 
 98 | When a cluster-level task needs to read 
 99 | a model file, the caching system can efficiently distribute the model file between GPU 
100 | machines via RDMA transfer, thus avoiding network transfer bottlenecks. When using this 
101 | system, veTurboIO can maximize its performance advantages.
102 | 
103 | ### Encrypt and decrypt model files
104 | veTurboIO supports encryption and decryption of model files. You can read the [tutorial](./docs/encrypt_model.md) 
105 | to learn how to keep your model files secure. When you use GPU as target device, veTurboIO can decrypt the model file on the fly.
106 | 
107 | 
108 | ## License
109 | 
110 | [Apache License 2.0](./LICENSE)
111 | 
112 | 


--------------------------------------------------------------------------------
/README.zh.md:
--------------------------------------------------------------------------------
 1 | # veTurboIO
 2 | 
 3 | 
 4 | [en](./README.md) | [中文](./README.zh.md)
 5 | 
 6 | 
 7 | 一个由 Volcano Engine 开发的用于高性能读写 PyTorch 模型文件的 Python 库。该库主要基于 safetensors 文件格式实现，以实现对张量数据的高效存储和读取。
 8 | 
 9 | ## 安装
10 | 
11 | 可以直接通过以下方式安装：
12 | ```bash
13 | pip install veturboio -f https://veturbo-cn-beijing.tos-cn-beijing.volces.com/veturboio/index.html --no-build-isolation
14 | ```
15 | 
16 | 提示：此指令会优先下载与当前 Python 和 PyTorch 版本匹配的 whl 文件，如果没有找到匹配的 whl 文件，会自动下载源码进行编译安装。
17 | 
18 | 如果安装失败，也可以尝试通过下载源码安装，然后手动编译安装。
19 | ```bash
20 | # CUDA ops, default
21 | python setup.py install --cuda_ext
22 | 
23 | # NPU ops
24 | python setup.py install --npu_ext
25 | 
26 | # CPU only
27 | python setup.py install --cpu_ext
28 | ```
29 | 
30 | ## 快速开始
31 | 
32 | ### 读写模型文件
33 | 
34 | 
35 | ```python
36 | import torch
37 | import veturboio
38 | 
39 | tensors = {
40 |    "weight1": torch.zeros((1024, 1024)),
41 |    "weight2": torch.zeros((1024, 1024))
42 | }
43 | 
44 | veturboio.save_file(tensors, "model.safetensors")
45 | 
46 | new_tensors = veturboio.load("model.safetensors")
47 | 
48 | # check if the tensors are the same
49 | for k, v in tensors.items():
50 |     assert torch.allclose(v, new_tensors[k])
51 | ```
52 | 
53 | ## 转换已有 PyTorch 文件
54 | 
55 | ```bash
56 | python -m veturboio.convert -i model.pt -o model.safetensors
57 | ```
58 | 
59 | ## 性能测试
60 | 
61 | 直接运行：
62 | ```bash
63 | bash bench/io_bench.sh
64 | ```
65 | 
66 | 接下来，你可以获得如下的结果：
67 | ```
68 | fs_name    tensor_size     veturboio load_time(s)             torch load_time(s)
69 | shm        1073741824      0.08                               0.63
70 | shm        2147483648      0.19                               1.26
71 | shm        4294967296      0.36                               2.32
72 | ```
73 | 
74 | ## 进阶功能
75 | 
76 | ### 使用 veMLP 加速读写
77 | Volcano Engine Machine Learning Platform (veMLP) 提供了基于 GPU 集群的物理磁盘的分布式缓存文件系统。
78 | 
79 | <p align="center">
80 |     <img src="./docs/imgs/SFCS.png" style="zoom:15%;">
81 | </p>
82 | 
83 | 当集群级任务需要读取模型文件时，缓存系统可以通过 RDMA 传输高效地在 GPU 机器之间分发模型文件，从而避免网络传输瓶颈。使用此系统时，veTurboIO 可以最大化其性能优势。
84 | 
85 | 
86 | ### 加密和解密模型文件
87 | 
88 | veTurboIO 支持模型文件的加密和解密。您可以阅读[教程]([tutorial](./docs/encrypt_model.md))以了解如何保护您的模型文件。当您使用 GPU 作为目标设备时，veTurboIO 可以实时解密模型文件。
89 | 
90 | ## 许可证
91 | 
92 | [Apache License 2.0](./LICENSE)
93 | 


--------------------------------------------------------------------------------
/bench/io_bench.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
  3 | 
  4 | Licensed under the Apache License, Version 2.0 (the "License");
  5 | you may not use this file except in compliance with the License.
  6 | You may obtain a copy of the License at
  7 | 
  8 |     http://www.apache.org/licenses/LICENSE-2.0
  9 | 
 10 | Unless required by applicable law or agreed to in writing, software
 11 | distributed under the License is distributed on an "AS IS" BASIS,
 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | See the License for the specific language governing permissions and
 14 | limitations under the License.
 15 | '''
 16 | 
 17 | import argparse
 18 | import os
 19 | import time
 20 | from functools import lru_cache
 21 | 
 22 | import numpy as np
 23 | import torch
 24 | 
 25 | import veturboio
 26 | 
 27 | 
 28 | def human_read_to_byte(size):
 29 |     factors = {
 30 |         'B': 1,
 31 |         'KB': 1024,
 32 |         'MB': 1048576,
 33 |         'GB': 1073741824,
 34 |         'TB': 1099511627776,
 35 |         'PB': 1125899906842624,
 36 |         'EB': 1152921504606846976,
 37 |         'ZB': 1180591620717411303424,
 38 |         'YB': 1208925819614629174706176,
 39 |     }
 40 |     if size[-2:] in factors:
 41 |         return factors[size[-2:]] * int(size[:-2])
 42 |     elif size[-1:] in factors:
 43 |         return int(size[:-1])
 44 |     else:
 45 |         return int(size)
 46 | 
 47 | 
 48 | def parse_args():
 49 |     parser = argparse.ArgumentParser(
 50 |         description='benchmark veturboio, notice to clear page cache manually when benchmarking for existing file'
 51 |     )
 52 |     parser.add_argument(
 53 |         '--begin',
 54 |         default='1048576',
 55 |         dest='begin',
 56 |         help='specify the minimum file size to benchmark in bytes or in format like xxKB/MB/GB',
 57 |     )
 58 |     parser.add_argument(
 59 |         '--end',
 60 |         default='1048576',
 61 |         dest='end',
 62 |         help='specify the maximum file size to benchmark in bytes or in format like xxKB/MB/GB',
 63 |     )
 64 |     parser.add_argument('--base_dir', dest='base_dir', help='specify the the base dir of files to be benchmarked')
 65 |     parser.add_argument('--fs_name', default='local_fs', help='file system name that would be displayed in the result')
 66 |     parser.add_argument('--gen_data', default=False, action=argparse.BooleanOptionalAction, dest='gen_data')
 67 |     parser.add_argument(
 68 |         '--map_location', default='cpu', dest='map_location', help='map location of tensor to be loaded'
 69 |     )
 70 |     parser.add_argument('--use_pinmem', default=False, action=argparse.BooleanOptionalAction, dest='use_pinmem')
 71 |     parser.add_argument(
 72 |         '--load_mode', default='veturboio', dest='load_mode', help='load modes specified, seperated by comma'
 73 |     )
 74 | 
 75 |     args = parser.parse_args()
 76 |     return args
 77 | 
 78 | 
 79 | def print_header(load_modes):
 80 |     mode_list = list(map(lambda mode: f"{mode}{' load_time(s)' + ' ':<25}", load_modes))
 81 |     print(f"{'fs_name' + ' ':<10} {'tensor_size' + ' ':<15}", ' '.join(mode_list))
 82 | 
 83 | 
 84 | def print_load_time(fs_name, tensor_size, load_times):
 85 |     load_times = list(map(lambda load_time: f"{load_time}{' ':<30}", load_times))
 86 |     print(f"{fs_name:<10} {str(tensor_size):<15}", ' '.join(load_times))
 87 | 
 88 | 
 89 | def sfcs_env():
 90 |     os.environ['SFCS_FSNAME'] = 'byted-cpu-sfcs'
 91 |     os.environ['SFCS_REGION'] = 'cn-beijing'
 92 |     os.environ['SFCS_ACCESS_KEY'] = os.environ['CI_SFCS_AK']
 93 |     os.environ['SFCS_SECRET_KEY'] = os.environ['CI_SFCS_SK']
 94 |     os.environ['SFCS_AUTHENTICATION_SERVICE_NAME'] = 'cfs'
 95 |     os.environ['SFCS_NS_ID'] = '18014398509481988'
 96 |     os.environ['SFCS_UFS_PATH'] = 'tos://yinzq-bucket/'
 97 |     os.environ['SFCS_MULTI_NIC_WHITELIST'] = 'eth0'
 98 |     os.environ['SFCS_NETWORK_SEGMENT'] = '172.31.128.0/17'
 99 |     os.environ['SFCS_NAMENODE_ENDPOINT_ADDRESS'] = '100.67.19.231'
100 |     os.environ['SFCS_LOG_SEVERITY'] = 'ERROR'
101 | 
102 | 
103 | def main():
104 |     args = parse_args()
105 |     if args.base_dir.startswith('sfcs://'):
106 |         sfcs_env()
107 |     load_modes = args.load_mode.split(',')
108 |     # warmup GPU otherwise the first case would be slow
109 |     device = torch.device(args.map_location)
110 |     if device.type == "cuda":
111 |         file_path = os.path.join(args.base_dir if args.base_dir else "", 'warmup.safetensors')
112 |         tensors = {"weight": torch.randn(10)}
113 |         veturboio.save_file(tensors, file_path)
114 |         veturboio.load(file_path, map_location=args.map_location, use_pinmem=args.use_pinmem)
115 |     print_header(load_modes)
116 |     tensor_size = human_read_to_byte(args.begin)
117 |     end_size = human_read_to_byte(args.end)
118 |     while tensor_size <= end_size:
119 |         if args.gen_data:
120 |             numel = tensor_size // np.dtype(float).itemsize * 2
121 |             tensors = {"weight": torch.randn(numel)}
122 |         load_times = []
123 |         for mode in load_modes:
124 |             if mode == 'veturboio':
125 |                 file_path = os.path.join(args.base_dir if args.base_dir else "", f'{tensor_size}.safetensors')
126 |                 if args.gen_data:
127 |                     veturboio.save_file(tensors, file_path)
128 | 
129 |                 start = time.time()
130 |                 loaded_tensor = veturboio.load(file_path, map_location=args.map_location, use_pinmem=args.use_pinmem)
131 |             if mode == 'torch':
132 |                 file_path = os.path.join(args.base_dir if args.base_dir else "", f'{tensor_size}.pt')
133 |                 if args.gen_data:
134 |                     veturboio.save_pt(tensors, file_path)
135 | 
136 |                 start = time.time()
137 | 
138 |                 loaded_tensor = veturboio.load(file_path, map_location=args.map_location)
139 |             end = time.time()
140 |             load_times.append("%.2f" % (end - start))
141 | 
142 |             if device.type == "cuda":
143 |                 del loaded_tensor
144 |                 torch.cuda.empty_cache()
145 | 
146 |         print_load_time(args.fs_name, tensor_size, load_times)
147 |         tensor_size = tensor_size * 2
148 | 
149 | 
150 | if __name__ == '__main__':
151 |     main()
152 | 


--------------------------------------------------------------------------------
/bench/io_bench.sh:
--------------------------------------------------------------------------------
 1 | ###
 2 |  # Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
 3 |  # 
 4 |  # Licensed under the Apache License, Version 2.0 (the "License");
 5 |  # you may not use this file except in compliance with the License.
 6 |  # You may obtain a copy of the License at
 7 |  # 
 8 |  #     http://www.apache.org/licenses/LICENSE-2.0
 9 |  # 
10 |  # Unless required by applicable law or agreed to in writing, software
11 |  # distributed under the License is distributed on an "AS IS" BASIS,
12 |  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  # See the License for the specific language governing permissions and
14 |  # limitations under the License.
15 | ###
16 | 
17 | # shm
18 | mkdir -p /dev/shm/test_files
19 | python bench/io_bench.py --load_mode=veturboio,torch --base_dir=/dev/shm/test_files --begin=1GB --end=4GB --gen_data --fs_name=shm
20 | 
21 | # sfcs
22 | python bench/io_bench.py --load_mode=veturboio,torch --base_dir=sfcs:// --begin=1GB --end=4GB --gen_data --fs_name=sfcs
23 | 


--------------------------------------------------------------------------------
/docs/encrypt_model.md:
--------------------------------------------------------------------------------
 1 | # 加解密模型文件
 2 | 
 3 | 该库底层通过两种接口读写：SFCS SDK 和 POSIX。如果文件路径前缀为 `sfcs://` 就视为使用 SFCS SDK，所需的鉴权信息可以从火山引擎可信服务的 `unix domain socket` 获取或者设置以下三个环境变量：
 4 | 
 5 | | 环境变量名                     | 含义                              |
 6 | | ------------------------------ | --------------------------------- |
 7 | | SFCS_ACCESS_KEY                | SFCS 文件系统的 AK                  |
 8 | | SFCS_SECRET_KEY                | SFCS 文件系统的 SK                  |
 9 | | SFCS_NAMENODE_ENDPOINT_ADDRESS | SFCS 文件系统 NameNode 地址          |
10 | 
11 | 
12 | 加解密读写模型文件需要 data key 和 iv，有 3 种获取方式，读取优先级按照下列顺序：
13 | - [1] 加密的 data key 和 iv 存放在密文模型文件的 header 中，使用火山引擎 KMS 解密得到明文的 data key。
14 | - [1.1] 访问 KMS 所需的 AK/SK/ST 从火山引擎可信服务的 unix domain socket 获取，需要额外挂载。
15 | - [1.2] 访问 KMS 所需的 AK/SK/ST 从环境变量获取。
16 | - [2] 访问火山引擎可信服务的 unix domain socket 直接获取 data key 和 iv，需要额外挂载。
17 | - [3] 通过环境变量直接设置 data key 和 iv。
18 | 
19 | 不同方式需要设置的环境变量如下：
20 | 
21 | | 环境变量名                     | 含义                                 |
22 | | ------------------------------ | --------------------------------- |
23 | | VETURBOIO_KMS_HOST             |  [1] KMS 服务地址，默认值 open.volcengineapi.com|
24 | | VETURBOIO_KMS_REGION            | [1] KMS 服务所在区域，默认值 cn-beijing |
25 | | VETURBOIO_KMS_KEYRING_NAME      | [1] KMS 服务解密 data key 的钥匙环名 |
26 | | VETURBOIO_KMS_KEY_NAME          | [1] KMS 服务解密 data key 的主密钥名 |
27 | | DATAPIPE_SOCKET_PATH            | [1.1][2] 可信服务 uds 的路径        |
28 | | VETURBOIO_KMS_ACCESS_KEY        | [1.2] KMS 鉴权的 AK |
29 | | VETURBOIO_KMS_SECRET_KEY        | [1.2] KMS 鉴权的 SK |
30 | | VETURBOIO_KMS_SESSION_TOKEN     | [1.2] KMS 鉴权的临时令牌，非必需|
31 | | VETURBOIO_KEY                   | [3] 加解密的 128 位数据密钥的 base64 编码 |
32 | | VETURBOIO_IV                    | [3] 加解密的 128 位初始向量的 base64 编码 |
33 | 
34 | 
35 | 按照上述三种方式设置好后，可以参考下面代码在读写模型文件时启用加解密：
36 | ```python
37 | import torch
38 | import veturboio
39 | 
40 | tensors = {
41 |    "weight1": torch.zeros((1024, 1024)),
42 |    "weight2": torch.zeros((1024, 1024))
43 | }
44 | 
45 | # use cpu to encrypt
46 | veturboio.save_file(tensors, "sfcs://model.safetensors", use_cipher=True)
47 | 
48 | # use cpu to decrypt if map_location is cpu
49 | reloaded_tensor1 = veturboio.load("sfcs://model.safetensors", map_location="cpu", use_cipher=True)
50 | 
51 | # use gpu to decrypt if map_location is cuda
52 | reloaded_tensor2 = veturboio.load("sfcs://model.safetensors", map_location="cuda:0", use_cipher=True)
53 | 
54 | # check if the tensors are the same
55 | for k, v in tensors.items():
56 |     assert torch.allclose(v, reloaded_tensor1[k])
57 | for k, v in tensors.items():
58 |     assert torch.allclose(v, reloaded_tensor2[k])
59 | ```
60 | 
61 | 


--------------------------------------------------------------------------------
/docs/imgs/SFCS.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/volcengine/veTurboIO/0ed05516013da4332433784660660ca4d1904505/docs/imgs/SFCS.png


--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
 1 | # veTurboIO
 2 | 
 3 | 火山引擎研发的一款用于高性能读写 PyTorch 模型文件的 Python 库。该库实现了主要基于 safetensors 文件格式，实现高效的存储与读取张量数据。
 4 | 
 5 | ## 安装
 6 | 
 7 | ```bash
 8 | cd veturboio
 9 | python setup.py install
10 | ```
11 | 
12 | ## 快速开始
13 | 
14 | ```python
15 | import torch
16 | import veturboio
17 | 
18 | tensors = {
19 |    "weight1": torch.zeros((1024, 1024)),
20 |    "weight2": torch.zeros((1024, 1024))
21 | }
22 | 
23 | veturboio.save_file(tensors, "model.safetensors")
24 | 
25 | reloaded_tensor = veturboio.load("model.safetensors", map_location="cpu")
26 | 
27 | # check if the tensors are the same
28 | for k, v in tensors.items():
29 |     assert torch.allclose(v, reloaded_tensor[k])
30 | ```
31 | 
32 | ### 使用锁页内存加速连续加载数据到GPU
33 | ```python
34 | import torch
35 | import veturboio
36 | 
37 | tensors1 = {
38 |    "weight1": torch.zeros((1024, 1024)),
39 |    "weight2": torch.zeros((1024, 1024))
40 | }
41 | 
42 | veturboio.save_file(tensors1, "model1.safetensors")
43 | 
44 | tensors2 = {
45 |    "weight1": torch.zeros((1024, 1024)),
46 |    "weight2": torch.zeros((1024, 1024))
47 | }
48 | 
49 | veturboio.save_file(tensors2, "model2.safetensors")
50 | 
51 | helper = veturboio.init_io_helper()
52 | reloaded_tensor1 = veturboio.load("model1.safetensors", map_location="cuda:0", use_pinmem=True, helper=helper)
53 | # the map_location may be different
54 | reloaded_tensor2 = veturboio.load("model2.safetensors", map_location="cuda:0", use_pinmem=True, helper=helper) 
55 | 
56 | # check if the tensors are the same
57 | for k, v in tensors1.items():
58 |     assert torch.allclose(v.cuda(), reloaded_tensor1[k])
59 | for k, v in tensors2.items():
60 |     assert torch.allclose(v.cuda(), reloaded_tensor2[k])
61 | ```
62 | 
63 | ### 转换现有的 PyTorch 文件
64 | ```bash
65 | python -m veturboio.convert -i model.pt -o model.safetensors
66 | ```
67 | 
68 | 
69 | ## 特性
70 | 
71 | - 多线程读取文件；
72 | - zero-copy 读取，不额外花费内存；
73 | - 支持直接加载到 CUDA；
74 | - BFloat16 数值支持；
75 | - 固定 pinmem 用于快速反复读取；
76 | - 兼容 PyTorch 标准格式（无性能提升）；
77 | - 兼容 safetensors 格式；
78 | 
79 | ## 收益
80 | 
81 | 标准的 PyTorch 模型文件会经过 zip 与 pickle 两次操作，这两个操作极大的抑制了读取的速度，同时 unpickle 也会带来潜在的不安全性。我们使用一种自定义的模型格式来存储 tensor 数据，希望可以改善 PyTorch 标准格式所存在的这些问题。目前已经实现的优点有：
82 | 
83 | - 多线程读取：当前文件对象主要的存放点为云端存储，单一进程无法达到云存储的带宽上限，必须使用多线程读取才能达到最大的读取速度。PyTorch 标准格式的读取速度受限于 pickle 解析速度，远无法达到云存储的速度上限；
84 | - 云端适配：基于火山引擎的云端存储（vePFS、SFCS）特性，最大化的利用了云端存储的带宽；
85 | - 安全性：不再使用 pickle 对象，避免了 pickle 的安全性问题；
86 | 
87 | 


--------------------------------------------------------------------------------
/docs/pin_mem.md:
--------------------------------------------------------------------------------
 1 | ### 使用锁页内存加速连续加载数据到GPU
 2 | ```python
 3 | import torch
 4 | import veturboio
 5 | 
 6 | tensors1 = {
 7 |    "weight1": torch.zeros((1024, 1024)),
 8 |    "weight2": torch.zeros((1024, 1024))
 9 | }
10 | 
11 | veturboio.save_file(tensors1, "model1.safetensors")
12 | 
13 | tensors2 = {
14 |    "weight1": torch.zeros((1024, 1024)),
15 |    "weight2": torch.zeros((1024, 1024))
16 | }
17 | 
18 | veturboio.save_file(tensors2, "model2.safetensors")
19 | 
20 | helper = veturboio.init_io_helper()
21 | reloaded_tensor1 = veturboio.load("model1.safetensors", map_location="cuda:0", use_pinmem=True, helper=helper)
22 | # the map_location may be different
23 | reloaded_tensor2 = veturboio.load("model2.safetensors", map_location="cuda:0", use_pinmem=True, helper=helper)
24 | 
25 | # check if the tensors are the same
26 | for k, v in tensors1.items():
27 |     assert torch.allclose(v.cuda(), reloaded_tensor1[k])
28 | for k, v in tensors2.items():
29 |     assert torch.allclose(v.cuda(), reloaded_tensor2[k])
30 | ```
31 | 
32 | 


--------------------------------------------------------------------------------
/mkdocs.yml:
--------------------------------------------------------------------------------
 1 | site_name: "veTurboIO"
 2 | 
 3 | theme:
 4 |   name: "material"
 5 | 
 6 | docs_dir: docs
 7 | 
 8 | nav: 
 9 |   - 首页: index.md
10 |   - 最佳实践:
11 |     - 动态加载: dynamic_load.md
12 |     - SFCS 加载优化: sfcs_support.md
13 |   - API: api.md
14 |   - 发布日志: release.md
15 | 
16 | plugins:
17 | - mkdocstrings:
18 |     default_handler: python


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | 
 2 | [tool.isort]
 3 | profile = "black"  # black-compatible
 4 | line_length = 119  # should match black parameters
 5 | py_version = 310  # python 3.10 as a target version
 6 | sections = ["FUTURE", "STDLIB", "THIRDPARTY", "FIRSTPARTY", "LOCALFOLDER"]
 7 | default_section = "THIRDPARTY"
 8 | 
 9 | 
10 | [tool.black]
11 | line_length = 119
12 | skip_string_normalization = true
13 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
  3 | 
  4 | Licensed under the Apache License, Version 2.0 (the "License");
  5 | you may not use this file except in compliance with the License.
  6 | You may obtain a copy of the License at
  7 | 
  8 |     http://www.apache.org/licenses/LICENSE-2.0
  9 | 
 10 | Unless required by applicable law or agreed to in writing, software
 11 | distributed under the License is distributed on an "AS IS" BASIS,
 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | See the License for the specific language governing permissions and
 14 | limitations under the License.
 15 | '''
 16 | 
 17 | import os
 18 | import platform
 19 | import sys
 20 | 
 21 | import requests
 22 | import setuptools
 23 | import torch
 24 | from pkg_resources import parse_version
 25 | from setuptools import Extension, find_packages, setup
 26 | from torch.utils.cpp_extension import BuildExtension, CppExtension, include_paths
 27 | 
 28 | # initialize variables for compilation
 29 | IS_LINUX = platform.system() == "Linux"
 30 | IS_DARWIN = platform.system() == "Darwin"
 31 | IS_WINDOWS = platform.system() == "Windows"
 32 | 
 33 | this_dir = os.path.dirname(os.path.abspath(__file__))
 34 | 
 35 | 
 36 | def get_option():
 37 |     if os.getenv("NPU_EXTENSION_ENABLED", "0") == "1":
 38 |         sys.argv.append("--npu_ext")
 39 |     elif "--cuda_ext" not in sys.argv and "--npu_ext" not in sys.argv and "--cpu_ext" not in sys.argv:
 40 |         print(
 41 |             '''No known extension specified, default to use --cuda_ext. Currently supported:
 42 |             --cuda_ext
 43 |             --npu_ext
 44 |             --cpu_ext'''
 45 |         )
 46 |         sys.argv.append("--cuda_ext")
 47 | 
 48 | 
 49 | def get_version():
 50 |     import importlib.util
 51 | 
 52 |     spec = importlib.util.spec_from_file_location("version", os.path.join("veturboio", "version.py"))
 53 |     m = importlib.util.module_from_spec(spec)
 54 |     spec.loader.exec_module(m)
 55 | 
 56 |     if "--cpu_ext" in sys.argv:
 57 |         return m.__version__ + "+cpu"
 58 |     elif "--npu_ext" in sys.argv:
 59 |         return m.__version__ + "+npu"
 60 |     else:
 61 |         return m.__version__
 62 | 
 63 | 
 64 | def make_relative_rpath(path):
 65 |     if IS_DARWIN:
 66 |         return '-Wl,-rpath,@loader_path/' + path
 67 |     elif IS_WINDOWS:
 68 |         return ''
 69 |     else:
 70 |         return '-Wl,-rpath,$ORIGIN/' + path
 71 | 
 72 | 
 73 | def get_veturboio_extension():
 74 |     get_option()
 75 |     # prevent ninja from using too many resources
 76 |     try:
 77 |         import psutil
 78 | 
 79 |         num_cpu = len(psutil.Process().cpu_affinity())
 80 |         cpu_use = max(4, num_cpu - 1)
 81 |     except (ModuleNotFoundError, AttributeError):
 82 |         cpu_use = 4
 83 | 
 84 |     os.environ.setdefault("MAX_JOBS", str(cpu_use))
 85 |     # os.environ.setdefault("TORCH_CUDA_ARCH_LIST", "8.0;8.6")
 86 |     os.environ["TORCH_CUDA_ARCH_LIST"] = "8.0;8.6"
 87 | 
 88 |     define_macros = []
 89 | 
 90 |     # Before PyTorch1.8.0, when compiling CUDA code, `cxx` is a
 91 |     # required key passed to PyTorch. Even if there is no flag passed
 92 |     # to cxx, users also need to pass an empty list to PyTorch.
 93 |     # Since PyTorch1.8.0, it has a default value so users do not need
 94 |     # to pass an empty list anymore.
 95 |     # More details at https://github.com/pytorch/pytorch/pull/45956
 96 |     extra_compile_args = {'cxx': ['-fvisibility=hidden'], 'nvcc': ['-O3']}
 97 | 
 98 |     if parse_version(torch.__version__) <= parse_version('1.12.1'):
 99 |         extra_compile_args['cxx'].append('-std=c++14')
100 |     else:
101 |         extra_compile_args['cxx'].append('-std=c++17')
102 | 
103 |     name = "veturboio_ext"
104 | 
105 |     sources = [
106 |         "veturboio/ops/csrc/pybind.cpp",
107 |         "veturboio/ops/csrc/posix.cpp",
108 |         "veturboio/ops/csrc/sfcs.cpp",
109 |         "veturboio/ops/csrc/io_helper_cpu_common.cpp",
110 |         "veturboio/ops/csrc/cipher.cpp",
111 |     ]
112 | 
113 |     include_dirs = include_paths()
114 |     include_dirs.append("veturboio/ops/csrc/include")
115 | 
116 |     torch_dir = os.path.join(os.path.dirname(torch.__file__), "lib")
117 |     library_dirs = [torch_dir]
118 |     library_dirs.append("veturboio/ops/csrc/lib")
119 | 
120 |     libraries = ["cloudfs", ":libfastcrypto_gpu.so.0.3"]
121 | 
122 |     extra_link_args = [make_relative_rpath("veturboio/ops/csrc/lib")]
123 | 
124 |     # Refer to: https://github.com/pytorch/pytorch/blob/main/torch/utils/cpp_extension.py#L918
125 |     # In torch 2.0, this flag is False, and the *.so lib set this flag as False when building.
126 |     # In newer torch, this flag is True, to keep compatibility with *.so lib, we set it False
127 |     # to generate g++ flags '-D_GLIBCXX_USE_CXX11_ABI=0' when building veturboio_ext, otherwise
128 |     # some 'undefine symbol' error of std::string will be thrown.
129 |     torch._C._GLIBCXX_USE_CXX11_ABI = False
130 | 
131 |     if "--cuda_ext" in sys.argv:
132 |         sys.argv.remove("--cuda_ext")
133 | 
134 |         extra_compile_args['nvcc'].append('-O3')
135 | 
136 |         sources.append("veturboio/ops/csrc/io_helper.cu")
137 | 
138 |         define_macros.append(("USE_CUDA", "1"))
139 | 
140 |         from torch.utils.cpp_extension import CUDAExtension
141 | 
142 |         return CUDAExtension(
143 |             name=name,
144 |             sources=sources,
145 |             define_macros=define_macros,
146 |             include_dirs=include_dirs,
147 |             library_dirs=library_dirs,
148 |             libraries=libraries,
149 |             extra_compile_args=extra_compile_args,
150 |             extra_link_args=extra_link_args,
151 |         )
152 |     else:
153 |         extra_compile_args['cxx'].append('-O3')
154 | 
155 |         libraries.append("torch_cpu")
156 |         libraries.append("torch_python")
157 | 
158 |         extra_link_args.append(f"-Wl,--rpath={torch_dir},--enable-new-dtags")
159 | 
160 |         if "--npu_ext" in sys.argv:
161 |             sys.argv.remove("--npu_ext")
162 | 
163 |             sources.append("veturboio/ops/csrc/io_helper_npu.cpp")
164 |             define_macros.append(("USE_NPU", "1"))
165 | 
166 |             return Extension(
167 |                 name=name,
168 |                 sources=sources,
169 |                 define_macros=define_macros,
170 |                 include_dirs=include_dirs,
171 |                 library_dirs=library_dirs,
172 |                 libraries=libraries,
173 |                 extra_compile_args=extra_compile_args,
174 |                 extra_link_args=extra_link_args,
175 |             )
176 |         elif "--cpu_ext" in sys.argv:
177 |             sys.argv.remove("--cpu_ext")
178 | 
179 |             sources.append("veturboio/ops/csrc/io_helper_cpu.cpp")
180 | 
181 |             return Extension(
182 |                 name=name,
183 |                 sources=sources,
184 |                 define_macros=define_macros,
185 |                 include_dirs=include_dirs,
186 |                 library_dirs=library_dirs,
187 |                 libraries=libraries,
188 |                 extra_compile_args=extra_compile_args,
189 |                 extra_link_args=extra_link_args,
190 |             )
191 | 
192 | 
193 | class GetLibCfsCommand(setuptools.Command):
194 |     """get libcfs from url"""
195 | 
196 |     description = 'get libcfs from url'
197 |     user_options = [('src=', 's', 'source url of libcloudfs.so'), ('dst=', 'd', 'dest filepath of libcloudfs.so')]
198 | 
199 |     def initialize_options(self):
200 |         from veturboio.utils.load_veturboio_ext import LIBCFS_DEFAULT_PATH, LIBCFS_DEFAULT_URL
201 | 
202 |         self.src = LIBCFS_DEFAULT_URL
203 |         self.dst = LIBCFS_DEFAULT_PATH
204 | 
205 |     def finalize_options(self):
206 |         pass
207 | 
208 |     def run(self):
209 |         print(f"download libcloudfs.so from {self.src}, save to {self.dst}")
210 |         r = requests.get(self.src, timeout=60)
211 |         with open(self.dst, 'wb') as f:
212 |             f.write(r.content)
213 | 
214 | 
215 | setup(
216 |     name="veturboio",
217 |     version=get_version(),
218 |     description="Effcient PyTorch IO libraray on Volcanic Engine",
219 |     author="AML Team",
220 |     ext_modules=[get_veturboio_extension()],
221 |     packages=find_packages(exclude=("veturboio.ops.csrc.common.sfcs.lib")),
222 |     install_requires=[
223 |         "safetensors",
224 |         "numpy",
225 |         "netifaces",
226 |         "loguru",
227 |         "requests-unixsocket",
228 |         "requests",
229 |     ],
230 |     include_package_data=True,
231 |     cmdclass={"get_libcfs": GetLibCfsCommand, "build_ext": BuildExtension},
232 |     dependency_links=['https://mirrors.ivolces.com/pypi/'],
233 | )
234 | 


--------------------------------------------------------------------------------
/tests/test_assert_exceptions.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | '''
16 | 
17 | import os
18 | import tempfile
19 | import unittest
20 | from unittest import TestCase
21 | 
22 | import torch
23 | 
24 | import veturboio
25 | 
26 | 
27 | class TestAssertException(TestCase):
28 |     @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
29 |     def test_modify_use_pinmem_attr(self):
30 |         helper = veturboio.init_io_helper()
31 |         with tempfile.TemporaryDirectory() as tmpdirname:
32 |             filepath = os.path.join(tmpdirname, "model.safetensors")
33 |             veturboio.save_file(self.tensors, filepath)
34 | 
35 |             with self.assertRaises(Exception) as context:
36 |                 veturboio.load(filepath, map_location="cuda:0", use_pinmem=False, helper=helper)
37 |                 veturboio.load(filepath, map_location="cuda:0", use_pinmem=True, helper=helper)
38 |             self.assertTrue(
39 |                 'use_pinmem attribute of an exising IOHelper should not be changed' in str(context.exception)
40 |             )
41 | 
42 |     @classmethod
43 |     def setUpClass(cls):
44 |         cls.tensors = {
45 |             "weight1": torch.randn(20, 10),
46 |             "weight2": torch.randn(20, 10),
47 |         }
48 | 


--------------------------------------------------------------------------------
/tests/test_convert.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | '''
16 | 
17 | import os
18 | import tempfile
19 | from unittest import TestCase
20 | 
21 | import torch
22 | 
23 | import veturboio
24 | 
25 | 
26 | class TestConvertUtil(TestCase):
27 |     def test_convert(self):
28 |         with tempfile.TemporaryDirectory() as tmpdirname:
29 |             filepath = os.path.join(tmpdirname, "model.pt")
30 |             torch.save(self.tensors, filepath)
31 |             convertpath = os.path.join(tmpdirname, "model.safetensors")
32 | 
33 |             print(f"python -m veturboio.convert -i {filepath} -o {convertpath}")
34 |             os.system(f"python -m veturboio.convert -i {filepath} -o {convertpath}")
35 | 
36 |             loaded_tensors = veturboio.load(convertpath)
37 |             for key in self.tensors.keys():
38 |                 self.assertTrue(torch.allclose(self.tensors[key], loaded_tensors[key]))
39 | 
40 |     @classmethod
41 |     def setUpClass(cls):
42 |         cls.tensors = {
43 |             "weight1": torch.randn(20, 10),
44 |             "weight2": torch.randn(20, 10),
45 |         }
46 | 


--------------------------------------------------------------------------------
/tests/test_fetch_cipher.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
  3 | 
  4 | Licensed under the Apache License, Version 2.0 (the "License");
  5 | you may not use this file except in compliance with the License.
  6 | You may obtain a copy of the License at
  7 | 
  8 |     http://www.apache.org/licenses/LICENSE-2.0
  9 | 
 10 | Unless required by applicable law or agreed to in writing, software
 11 | distributed under the License is distributed on an "AS IS" BASIS,
 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | See the License for the specific language governing permissions and
 14 | limitations under the License.
 15 | '''
 16 | 
 17 | import base64
 18 | import http.server
 19 | import json
 20 | import os
 21 | import socketserver
 22 | import tempfile
 23 | import threading
 24 | from datetime import datetime, timedelta
 25 | from time import sleep
 26 | from unittest import TestCase
 27 | 
 28 | import numpy as np
 29 | 
 30 | from veturboio.ops.cipher import CipherInfo, DataPipeClient
 31 | from veturboio.ops.sfcs_utils import (
 32 |     SFCS_OPT_ENV_LIST,
 33 |     SFCS_REQ_ENV_LIST,
 34 |     credentials_helper,
 35 |     generate_sfcs_conf_xml,
 36 |     init_sfcs_conf,
 37 | )
 38 | 
 39 | 
 40 | class UnixSocketHttpServer(socketserver.UnixStreamServer):
 41 |     def get_request(self):
 42 |         request, client_address = super().get_request()
 43 |         return (request, ["local", 0])
 44 | 
 45 | 
 46 | class DatapipeHandler(http.server.SimpleHTTPRequestHandler):
 47 |     def do_POST(self):
 48 |         action = self.headers.get('X-Datapipe-Task-Type')
 49 |         if action == 'top':
 50 |             # mock kms response
 51 |             self.send_response(200)
 52 |             self.send_header('Content-Type', 'application/json')
 53 |             self.end_headers()
 54 |             res = {'Result': {'Plaintext': base64.b64encode(b'abcdefgh87654321').decode('ascii')}}
 55 |             self.wfile.write(bytes(json.dumps(res), encoding='ascii'))
 56 |             return
 57 |         self.send_response(400)
 58 |         self.end_headers()
 59 |         return
 60 | 
 61 |     def do_GET(self):
 62 |         action = self.headers.get('X-Datapipe-Task-Type')
 63 |         if action == 'ping':
 64 |             self.send_response(200)
 65 |             self.send_header('Content-Type', 'application/json')
 66 |             self.end_headers()
 67 |             self.wfile.write(bytes(json.dumps({'message': 'pong'}), encoding='ascii'))
 68 |             return
 69 |         if action == 'encrypt-key':
 70 |             self.send_response(200)
 71 |             self.send_header('Content-Type', 'application/json')
 72 |             self.end_headers()
 73 |             self.wfile.write(
 74 |                 bytes(
 75 |                     json.dumps({'Key': 'YWJjZGVmZ2gxMjM0NTY3OA==', 'IV': 'MTIzNDU2Nzg4NzY1NDMyMQ=='}), encoding='ascii'
 76 |                 )
 77 |             )
 78 |             return
 79 |         if action == 'sfcs-sts':
 80 |             self.send_response(200)
 81 |             self.send_header('Content-Type', 'application/json')
 82 |             self.end_headers()
 83 |             date_now = datetime.now()
 84 |             date_exp = date_now + timedelta(seconds=4)
 85 |             res = {
 86 |                 'Cred': {
 87 |                     'CurrentTime': date_now.isoformat(),
 88 |                     'ExpiredTime': date_exp.isoformat(),
 89 |                     'AccessKeyId': 'A' * 12,
 90 |                     'SecretAccessKey': 'S' * 12,
 91 |                     'SessionToken': 'ST' * 12,  # fake SessionToken real one is longer
 92 |                 },
 93 |                 'SfcsNameNodeAddress': '100.67.19.231',
 94 |             }
 95 |             self.wfile.write(bytes(json.dumps(res), encoding='ascii'))
 96 |             return
 97 |         if action == 'kms-sts':
 98 |             self.send_response(200)
 99 |             self.send_header('Content-Type', 'application/json')
100 |             self.end_headers()
101 |             res = {
102 |                 'Cred': {
103 |                     'AccessKeyId': os.environ['CI_VENDOR_AK'],
104 |                     'SecretAccessKey': os.environ['CI_VENDOR_AK'],
105 |                     'SessionToken': '',
106 |                 },
107 |             }
108 |             self.wfile.write(bytes(json.dumps(res), encoding='ascii'))
109 |             return
110 |         self.send_response(400)
111 |         self.end_headers()
112 |         return
113 | 
114 | 
115 | class TestCipherInfo(TestCase):
116 |     @classmethod
117 |     def setUpClass(cls):
118 |         cls.sock_dir = tempfile.TemporaryDirectory()
119 |         cls.server_address = os.path.join(cls.sock_dir.name, 'datapipe.sock')
120 |         cls.server = UnixSocketHttpServer(cls.server_address, DatapipeHandler, bind_and_activate=True)
121 | 
122 |         def run():
123 |             cls.server.serve_forever()
124 | 
125 |         cls.thread = threading.Thread(target=run)
126 |         cls.thread.start()
127 |         cls.target_key = np.frombuffer(b'abcdefgh12345678', dtype=np.byte)
128 |         cls.target_key_2 = np.frombuffer(b'abcdefgh87654321', dtype=np.byte)
129 |         cls.target_iv = np.frombuffer(b'1234567887654321', dtype=np.byte)
130 | 
131 |     def test_fetch_from_file_header(self):
132 |         os.environ.pop('VETURBOIO_KEY', None)
133 |         os.environ.pop('VETURBOIO_IV', None)
134 |         DataPipeClient.DATAPIPE_SOCKET_PATH = '/path/not/exist'
135 | 
136 |         header_dict = {
137 |             'mode': 'CTR-128',
138 |             'iv': 'MTIzNDU2Nzg4NzY1NDMyMQ==',
139 |             'meta_data_key': 'bl2htKYLQ2+CjyyJ84Q3twAA9ZpCbFxwznRb0NkR9zGGRp1RK5Mb9u8NNOiahY+0yVrxNw3IVQ9Wgn6PDscw77Cb3eImjVn14hNBJRlwtSyQ7tRZLOsZBEHv5cWwDQ==',
140 |         }
141 |         header_bytes = bytearray(256 * 1024)
142 |         header_str = 'Byte3ncryptM0del' + json.dumps(header_dict)
143 |         header_bytes[: len(header_str)] = header_str.encode('utf-8')
144 | 
145 |         # case1: get kms cred from env
146 |         ENV_KMS_HOST = 'VETURBOIO_KMS_HOST'
147 |         ENV_KMS_REGION = 'VETURBOIO_KMS_REGION'
148 |         ENV_KMS_AK = 'VETURBOIO_KMS_ACCESS_KEY'
149 |         ENV_KMS_SK = 'VETURBOIO_KMS_SECRET_KEY'
150 |         ENV_KMS_KEYRING = 'VETURBOIO_KMS_KEYRING_NAME'
151 |         ENV_KMS_KEY = 'VETURBOIO_KMS_KEY_NAME'
152 |         os.environ[ENV_KMS_HOST] = 'open.volcengineapi.com'
153 |         os.environ[ENV_KMS_REGION] = 'cn-beijing'
154 |         os.environ[ENV_KMS_AK] = os.environ['CI_VENDOR_AK']
155 |         os.environ[ENV_KMS_SK] = os.environ['CI_VENDOR_SK']
156 |         os.environ[ENV_KMS_KEYRING] = 'datapipe_keyring'
157 |         os.environ[ENV_KMS_KEY] = 'datapipe_key_ml_maas'
158 |         info = CipherInfo(True, header_bytes)
159 |         self.assertTrue(info.use_cipher)
160 |         self.assertTrue(info.use_header)
161 |         self.assertTrue(np.array_equal(info.key, self.target_key))
162 |         self.assertTrue(np.array_equal(info.iv, self.target_iv))
163 | 
164 |         # case2: get kms cred from datapipe and access kms with datapipe proxy
165 |         os.environ.pop(ENV_KMS_HOST, None)
166 |         os.environ.pop(ENV_KMS_REGION, None)
167 |         os.environ.pop(ENV_KMS_AK, None)
168 |         os.environ.pop(ENV_KMS_SK, None)
169 |         DataPipeClient.DATAPIPE_SOCKET_PATH = self.server_address
170 |         info = CipherInfo(True, header_bytes)
171 |         self.assertTrue(info.use_cipher)
172 |         self.assertTrue(info.use_header)
173 |         self.assertTrue(np.array_equal(info.key, self.target_key_2))
174 |         self.assertTrue(np.array_equal(info.iv, self.target_iv))
175 | 
176 |     def test_fetch_from_datapipe(self):
177 |         DataPipeClient.DATAPIPE_SOCKET_PATH = self.server_address
178 |         DataPipeClient.ENCRYPT_HEADER['X-Encrypt-Caller-Pod'] = 'test-pod-name'
179 |         info = CipherInfo(True, None, '/maas_model/test_path')
180 |         self.assertTrue(info.use_cipher)
181 |         self.assertTrue(np.array_equal(info.key, self.target_key))
182 |         self.assertTrue(np.array_equal(info.iv, self.target_iv))
183 | 
184 |     def test_fetch_from_env(self):
185 |         DataPipeClient.DATAPIPE_SOCKET_PATH = '/path/not/exist'
186 |         os.environ['VETURBOIO_KEY'] = base64.b64encode(b'abcdefgh12345678').decode('ascii')
187 |         os.environ['VETURBOIO_IV'] = base64.b64encode(b'1234567887654321').decode('ascii')
188 |         info = CipherInfo(True)
189 |         self.assertTrue(info.use_cipher)
190 |         self.assertTrue(np.array_equal(info.key, self.target_key))
191 |         self.assertTrue(np.array_equal(info.iv, self.target_iv))
192 | 
193 |     def test_raise_error(self):
194 |         DataPipeClient.DATAPIPE_SOCKET_PATH = '/path/not/exist'
195 |         os.environ['VETURBOIO_KEY'] = base64.b64encode(b'abcdefgh12').decode('ascii')
196 |         os.environ['VETURBOIO_IV'] = base64.b64encode(b'1234567887').decode('ascii')
197 |         with self.assertRaises(RuntimeError):
198 |             info = CipherInfo(True)
199 | 
200 |     @classmethod
201 |     def tearDownClass(cls):
202 |         os.environ.pop('VETURBOIO_KEY', None)
203 |         os.environ.pop('VETURBOIO_IV', None)
204 |         cls.server.shutdown()
205 |         cls.server.server_close()
206 |         cls.thread.join()
207 |         cls.sock_dir.cleanup()
208 | 
209 | 
210 | class TestCredentials(TestCase):
211 |     @classmethod
212 |     def setUpClass(cls):
213 |         cls.sock_dir = tempfile.TemporaryDirectory()
214 |         cls.server_address = os.path.join(cls.sock_dir.name, 'datapipe.sock')
215 |         cls.server = UnixSocketHttpServer(cls.server_address, DatapipeHandler, bind_and_activate=True)
216 | 
217 |         def run():
218 |             cls.server.serve_forever()
219 | 
220 |         cls.thread = threading.Thread(target=run)
221 |         cls.thread.start()
222 | 
223 |     def test_sfcs_sts(self):
224 |         DataPipeClient.DATAPIPE_SOCKET_PATH = self.server_address
225 |         client = DataPipeClient()
226 |         cred = client.get_sfcs_ak_sk_st()
227 |         self.assertIsNotNone(cred)
228 |         self.assertEqual(cred['SfcsNameNodeAddress'], '100.67.19.231')
229 |         cred = cred['Cred']
230 |         self.assertEqual(cred['AccessKeyId'], 'A' * 12)
231 |         self.assertEqual(cred['SecretAccessKey'], 'S' * 12)
232 |         self.assertEqual(cred['SessionToken'], 'ST' * 12)
233 | 
234 |     def test_sfcs_conf(self):
235 |         for e in SFCS_REQ_ENV_LIST:
236 |             os.environ[e] = 'test-value'
237 |         # case 1: env SFCS_ACCESS_KEY and SFCS_SECRET_KEY and SFCS_NAMENODE_ENDPOINT_ADDRESS exists
238 |         os.environ['SFCS_ACCESS_KEY'] = 'A' * 12
239 |         os.environ['SFCS_SECRET_KEY'] = 'S' * 12
240 |         os.environ['SFCS_NAMENODE_ENDPOINT_ADDRESS'] = '100.67.19.231'
241 |         sfcs_conf = os.path.join(os.getcwd(), 'base_model2.xml')
242 |         if os.path.exists(sfcs_conf):
243 |             os.remove(sfcs_conf)
244 |         init_sfcs_conf('/base_model2/tensor.pt')
245 |         self.assertEqual(os.environ['LIBCLOUDFS_CONF'], sfcs_conf)
246 |         self.assertEqual(len(credentials_helper.threads), 0)
247 |         self.assertEqual(len(credentials_helper.running), 0)
248 |         self.assertTrue(os.path.exists(sfcs_conf))
249 |         os.remove(sfcs_conf)
250 | 
251 |         # case 2: use datapipe socket to get and refresh ak, sk, st and namenode_ip
252 |         DataPipeClient.DATAPIPE_SOCKET_PATH = self.server_address
253 |         os.environ.pop('SFCS_ACCESS_KEY', None)
254 |         os.environ.pop('SFCS_SECRET_KEY', None)
255 |         os.environ.pop('SFCS_NAMENODE_ENDPOINT_ADDRESS', None)
256 |         sfcs_conf3 = os.path.join(os.getcwd(), 'base_model3.xml')
257 |         sfcs_conf4 = os.path.join(os.getcwd(), 'base_model4.xml')
258 |         if os.path.exists(sfcs_conf3):
259 |             os.remove(sfcs_conf3)
260 |         if os.path.exists(sfcs_conf4):
261 |             os.remove(sfcs_conf4)
262 |         init_sfcs_conf('/base_model3/tensor.pt')
263 |         init_sfcs_conf('/base_model4/tensor.pt')
264 |         self.assertTrue('base_model3' in credentials_helper.threads)
265 |         self.assertTrue('base_model4' in credentials_helper.threads)
266 |         self.assertTrue(credentials_helper.running['base_model3'])
267 |         self.assertTrue(credentials_helper.running['base_model4'])
268 |         self.assertTrue(os.path.exists(sfcs_conf3))
269 |         self.assertTrue(os.path.exists(sfcs_conf4))
270 |         for i in range(5):
271 |             os.remove(sfcs_conf3)
272 |             os.remove(sfcs_conf4)
273 |             sleep(3)
274 |             self.assertTrue(os.path.exists(sfcs_conf3))
275 |             self.assertTrue(os.path.exists(sfcs_conf4))
276 |         print(credentials_helper.threads)
277 |         os.remove(sfcs_conf3)
278 |         os.remove(sfcs_conf4)
279 | 
280 |     def test_sfcs_conf_json(self):
281 |         for e in SFCS_REQ_ENV_LIST:
282 |             os.environ[e] = 'test-value'
283 |         os.environ['SFCS_FSNAME'] = json.dumps({'base_model1': 'test-value1', 'base_model2': 'test-value2'})
284 |         os.environ['SFCS_NS_ID'] = json.dumps({'base_model1': 'test-value1', 'base_model2': 'test-value2'})
285 |         os.environ['SFCS_UFS_PATH'] = json.dumps({'base_model1': 'test-value1', 'base_model2': 'test-value2'})
286 |         DataPipeClient.DATAPIPE_SOCKET_PATH = self.server_address
287 |         os.environ.pop('SFCS_ACCESS_KEY', None)
288 |         os.environ.pop('SFCS_SECRET_KEY', None)
289 |         os.environ.pop('SFCS_NAMENODE_ENDPOINT_ADDRESS', None)
290 |         sfcs_conf1 = os.path.join(os.getcwd(), 'base_model1.xml')
291 |         sfcs_conf2 = os.path.join(os.getcwd(), 'base_model2.xml')
292 |         if os.path.exists(sfcs_conf1):
293 |             os.remove(sfcs_conf1)
294 |         if os.path.exists(sfcs_conf2):
295 |             os.remove(sfcs_conf2)
296 |         init_sfcs_conf('/base_model1/tensor.pt')
297 |         init_sfcs_conf('/base_model2/tensor.pt')
298 |         self.assertTrue('base_model1' in credentials_helper.threads)
299 |         self.assertTrue('base_model2' in credentials_helper.threads)
300 |         self.assertTrue(credentials_helper.running['base_model1'])
301 |         self.assertTrue(credentials_helper.running['base_model2'])
302 |         self.assertTrue(os.path.exists(sfcs_conf1))
303 |         self.assertTrue(os.path.exists(sfcs_conf2))
304 |         for i in range(5):
305 |             sleep(3)
306 |             self.assertTrue(os.path.exists(sfcs_conf1))
307 |             self.assertTrue(os.path.exists(sfcs_conf2))
308 |         print(credentials_helper.threads)
309 |         os.remove(sfcs_conf1)
310 |         os.remove(sfcs_conf2)
311 | 
312 |     @classmethod
313 |     def tearDownClass(cls):
314 |         credentials_helper.stop()
315 |         os.environ.pop('LIBCLOUDFS_CONF', None)
316 |         for e in SFCS_REQ_ENV_LIST:
317 |             os.environ.pop(e, None)
318 |         for e in SFCS_OPT_ENV_LIST:
319 |             os.environ.pop(e, None)
320 |         cls.server.shutdown()
321 |         cls.server.server_close()
322 |         cls.thread.join()
323 |         cls.sock_dir.cleanup()
324 | 


--------------------------------------------------------------------------------
/tests/test_load_op.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
  3 | 
  4 | Licensed under the Apache License, Version 2.0 (the "License");
  5 | you may not use this file except in compliance with the License.
  6 | You may obtain a copy of the License at
  7 | 
  8 |     http://www.apache.org/licenses/LICENSE-2.0
  9 | 
 10 | Unless required by applicable law or agreed to in writing, software
 11 | distributed under the License is distributed on an "AS IS" BASIS,
 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | See the License for the specific language governing permissions and
 14 | limitations under the License.
 15 | '''
 16 | 
 17 | import base64
 18 | import os
 19 | import tempfile
 20 | import unittest
 21 | from copy import deepcopy
 22 | from unittest import TestCase
 23 | 
 24 | import torch
 25 | 
 26 | import veturboio
 27 | 
 28 | 
 29 | class TestLoad(TestCase):
 30 |     @classmethod
 31 |     def setUpClass(cls):
 32 |         ENV_KMS_HOST = 'VETURBOIO_KMS_HOST'
 33 |         ENV_KMS_REGION = 'VETURBOIO_KMS_REGION'
 34 |         ENV_KMS_AK = 'VETURBOIO_KMS_ACCESS_KEY'
 35 |         ENV_KMS_SK = 'VETURBOIO_KMS_SECRET_KEY'
 36 |         ENV_KMS_KEYRING = 'VETURBOIO_KMS_KEYRING_NAME'
 37 |         ENV_KMS_KEY = 'VETURBOIO_KMS_KEY_NAME'
 38 |         os.environ[ENV_KMS_HOST] = 'open.volcengineapi.com'
 39 |         os.environ[ENV_KMS_REGION] = 'cn-beijing'
 40 |         os.environ[ENV_KMS_AK] = os.environ['CI_VENDOR_AK']
 41 |         os.environ[ENV_KMS_SK] = os.environ['CI_VENDOR_SK']
 42 |         os.environ[ENV_KMS_KEYRING] = 'datapipe_keyring'
 43 |         os.environ[ENV_KMS_KEY] = 'datapipe_key_ml_maas'
 44 | 
 45 |         cls.tempdir = tempfile.TemporaryDirectory()
 46 | 
 47 |         cls.tensors_0 = {
 48 |             "weight1": torch.randn(2000, 10),
 49 |             "weight2": torch.IntTensor(2000, 10),
 50 |         }
 51 | 
 52 |         cls.tensors_1 = {
 53 |             "weight1": torch.randn(2000, 10),
 54 |             "weight2": torch.IntTensor(2000, 10),
 55 |             "weight3": torch.BoolTensor(2000, 10),
 56 |         }
 57 | 
 58 |         cls.filepath_0 = os.path.join(cls.tempdir.name, "model_0.safetensors")
 59 |         cls.filepath_1 = os.path.join(cls.tempdir.name, "model_1.safetensors")
 60 |         veturboio.save_file(cls.tensors_0, cls.filepath_0)
 61 |         veturboio.save_file(cls.tensors_1, cls.filepath_1, enable_fast_mode=True)
 62 | 
 63 |         cls.pt_filepath = os.path.join(cls.tempdir.name, "model.pt")
 64 |         torch.save(cls.tensors_0, cls.pt_filepath)
 65 | 
 66 |         # cipher
 67 |         os.environ["VETURBOIO_KEY"] = base64.b64encode(b"abcdefgh12345678").decode("ascii")
 68 |         os.environ["VETURBOIO_IV"] = base64.b64encode(b"1234567887654321").decode("ascii")
 69 | 
 70 |         cls.filepath_0_enc = os.path.join(cls.tempdir.name, "model_0_enc.safetensors")
 71 |         cls.filepath_1_enc = os.path.join(cls.tempdir.name, "model_1_enc.safetensors")
 72 |         veturboio.save_file(cls.tensors_0, cls.filepath_0_enc, use_cipher=True)
 73 |         veturboio.save_file(cls.tensors_1, cls.filepath_1_enc, use_cipher=True, enable_fast_mode=True)
 74 | 
 75 |         cls.pt_filepath_enc = os.path.join(cls.tempdir.name, "model_enc.pt")
 76 |         veturboio.save_pt(cls.tensors_0, cls.pt_filepath_enc, use_cipher=True)
 77 | 
 78 |         # cipher with header
 79 |         os.environ["VETURBOIO_CIPHER_HEADER"] = "1"
 80 |         cls.filepath_0_enc_h = os.path.join(cls.tempdir.name, "model_0_enc_h.safetensors")
 81 |         veturboio.save_file(cls.tensors_0, cls.filepath_0_enc_h, use_cipher=True)
 82 | 
 83 |         cls.pt_filepath_enc_h = os.path.join(cls.tempdir.name, "model_enc_h.pt")
 84 |         veturboio.save_pt(cls.tensors_0, cls.pt_filepath_enc_h, use_cipher=True)
 85 |         del os.environ["VETURBOIO_CIPHER_HEADER"]
 86 | 
 87 |         if torch.cuda.is_available():
 88 |             cls.cuda_tensors_0 = deepcopy(cls.tensors_0)
 89 |             cls.cuda_tensors_1 = deepcopy(cls.tensors_1)
 90 | 
 91 |             for key in cls.cuda_tensors_0.keys():
 92 |                 cls.cuda_tensors_0[key] = cls.cuda_tensors_0[key].cuda()
 93 |             for key in cls.cuda_tensors_1.keys():
 94 |                 cls.cuda_tensors_1[key] = cls.cuda_tensors_1[key].cuda()
 95 | 
 96 |     @classmethod
 97 |     def tearDownClass(cls):
 98 |         cls.tempdir.cleanup()
 99 | 
100 |     def _run_pipeline(self, tensors, filepath, map_location, use_cipher, enable_fast_mode=True, state_dict=None):
101 |         loaded_tensors = veturboio.load(
102 |             filepath,
103 |             map_location=map_location,
104 |             use_cipher=use_cipher,
105 |             enable_fast_mode=enable_fast_mode,
106 |             state_dict=state_dict,
107 |         )
108 |         for key in tensors.keys():
109 |             self.assertTrue(torch.allclose(tensors[key], loaded_tensors[key]))
110 |         return loaded_tensors
111 | 
112 |     def test_pipeline_cpu(self):
113 |         self._run_pipeline(self.tensors_0, self.filepath_0, "cpu", use_cipher=False)
114 |         self._run_pipeline(self.tensors_0, self.filepath_0_enc, "cpu", use_cipher=True)
115 |         self._run_pipeline(self.tensors_0, self.filepath_0, "cpu", use_cipher=False, enable_fast_mode=False)
116 |         self._run_pipeline(self.tensors_0, self.filepath_0_enc, "cpu", use_cipher=True, enable_fast_mode=False)
117 |         pre_allocated_tensors = {
118 |             "weight1": torch.randn(2000, 10),
119 |             "weight2": torch.IntTensor(2000, 10),
120 |         }
121 |         self._run_pipeline(self.tensors_0, self.filepath_0, "cpu", use_cipher=False, state_dict=pre_allocated_tensors)
122 |         self._run_pipeline(
123 |             self.tensors_0, self.filepath_0_enc, "cpu", use_cipher=True, state_dict=pre_allocated_tensors
124 |         )
125 |         self._run_pipeline(
126 |             self.tensors_0,
127 |             self.filepath_0,
128 |             "cpu",
129 |             use_cipher=False,
130 |             enable_fast_mode=False,
131 |             state_dict=pre_allocated_tensors,
132 |         )
133 |         self._run_pipeline(
134 |             self.tensors_0,
135 |             self.filepath_0_enc,
136 |             "cpu",
137 |             use_cipher=True,
138 |             enable_fast_mode=False,
139 |             state_dict=pre_allocated_tensors,
140 |         )
141 | 
142 |     @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
143 |     def test_pipeline_cuda(self):
144 |         self._run_pipeline(self.cuda_tensors_0, self.filepath_0, "cuda:0", use_cipher=False)
145 |         self._run_pipeline(self.cuda_tensors_0, self.filepath_0_enc, "cuda:0", use_cipher=True)
146 |         self._run_pipeline(self.cuda_tensors_0, self.filepath_0, "cuda:0", use_cipher=False, enable_fast_mode=False)
147 |         self._run_pipeline(self.cuda_tensors_0, self.filepath_0_enc, "cuda:0", use_cipher=True, enable_fast_mode=False)
148 |         pre_allocated_tensors = {
149 |             "weight1": torch.randn(2000, 10).cuda(),
150 |             "weight2": torch.IntTensor(2000, 10).cuda(),
151 |         }
152 |         self._run_pipeline(
153 |             self.cuda_tensors_0, self.filepath_0, "cuda:0", use_cipher=False, state_dict=pre_allocated_tensors
154 |         )
155 |         self._run_pipeline(
156 |             self.cuda_tensors_0, self.filepath_0_enc, "cuda:0", use_cipher=True, state_dict=pre_allocated_tensors
157 |         )
158 |         self._run_pipeline(
159 |             self.cuda_tensors_0,
160 |             self.filepath_0,
161 |             "cuda:0",
162 |             use_cipher=False,
163 |             enable_fast_mode=False,
164 |             state_dict=pre_allocated_tensors,
165 |         )
166 |         self._run_pipeline(
167 |             self.cuda_tensors_0,
168 |             self.filepath_0_enc,
169 |             "cuda:0",
170 |             use_cipher=True,
171 |             enable_fast_mode=False,
172 |             state_dict=pre_allocated_tensors,
173 |         )
174 | 
175 |     def test_read_multi_state_dict_cpu(self):
176 |         load_tensor_0 = self._run_pipeline(self.tensors_0, self.filepath_0, "cpu", use_cipher=False)
177 |         load_tensor_1 = self._run_pipeline(self.tensors_1, self.filepath_1, "cpu", use_cipher=False)
178 | 
179 |         self.assertEqual(len(load_tensor_0), 2)
180 |         self.assertEqual(len(load_tensor_1), 3)
181 | 
182 |         load_tensor_0_enc = self._run_pipeline(self.tensors_0, self.filepath_0_enc, "cpu", use_cipher=True)
183 |         load_tensor_1_enc = self._run_pipeline(self.tensors_1, self.filepath_1_enc, "cpu", use_cipher=True)
184 | 
185 |         self.assertEqual(len(load_tensor_0_enc), 2)
186 |         self.assertEqual(len(load_tensor_1_enc), 3)
187 | 
188 |     @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
189 |     def test_read_multi_state_dict_cuda(self):
190 |         load_tensor_0 = self._run_pipeline(self.cuda_tensors_0, self.filepath_0, "cuda:0", use_cipher=False)
191 |         load_tensor_1 = self._run_pipeline(self.cuda_tensors_1, self.filepath_1, "cuda:0", use_cipher=False)
192 | 
193 |         self.assertEqual(len(load_tensor_0), 2)
194 |         self.assertEqual(len(load_tensor_1), 3)
195 | 
196 |         load_tensor_0_enc = self._run_pipeline(self.cuda_tensors_0, self.filepath_0_enc, "cuda:0", use_cipher=True)
197 |         load_tensor_1_enc = self._run_pipeline(self.cuda_tensors_1, self.filepath_1_enc, "cuda:0", use_cipher=True)
198 | 
199 |         self.assertEqual(len(load_tensor_0_enc), 2)
200 |         self.assertEqual(len(load_tensor_1_enc), 3)
201 | 
202 |     def test_load_pt_cpu(self):
203 |         loaded_tensors = veturboio.load(self.pt_filepath, map_location="cpu", use_cipher=False)
204 |         for key in self.tensors_0.keys():
205 |             self.assertTrue(torch.allclose(self.tensors_0[key], loaded_tensors[key]))
206 | 
207 |         loaded_tensors_enc = veturboio.load(self.pt_filepath_enc, map_location="cpu", use_cipher=True)
208 |         for key in self.tensors_0.keys():
209 |             self.assertTrue(torch.allclose(self.tensors_0[key], loaded_tensors_enc[key]))
210 | 
211 |     @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
212 |     def test_load_pt_cuda(self):
213 |         loaded_tensors = veturboio.load(self.pt_filepath, map_location="cuda:0", use_cipher=False)
214 |         for key in self.tensors_0.keys():
215 |             self.assertTrue(torch.allclose(self.cuda_tensors_0[key], loaded_tensors[key]))
216 | 
217 |         loaded_tensors_enc = veturboio.load(self.pt_filepath_enc, map_location="cuda:0", use_cipher=True)
218 |         for key in self.tensors_0.keys():
219 |             self.assertTrue(torch.allclose(self.cuda_tensors_0[key], loaded_tensors_enc[key]))
220 | 
221 |     def test_load_cipher_header_cpu(self):
222 |         self._run_pipeline(self.tensors_0, self.filepath_0_enc_h, "cpu", use_cipher=True)
223 |         self._run_pipeline(self.tensors_0, self.pt_filepath_enc_h, "cpu", use_cipher=True)
224 |         self._run_pipeline(self.tensors_0, self.filepath_0_enc_h, "cpu", use_cipher=True, enable_fast_mode=False)
225 |         self._run_pipeline(self.tensors_0, self.pt_filepath_enc_h, "cpu", use_cipher=True, enable_fast_mode=False)
226 | 
227 |     @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
228 |     def test_load_cipher_header_cuda(self):
229 |         self._run_pipeline(self.cuda_tensors_0, self.filepath_0_enc_h, "cuda:0", use_cipher=True)
230 |         self._run_pipeline(self.cuda_tensors_0, self.pt_filepath_enc_h, "cuda:0", use_cipher=True)
231 |         self._run_pipeline(
232 |             self.cuda_tensors_0, self.filepath_0_enc_h, "cuda:0", use_cipher=True, enable_fast_mode=False
233 |         )
234 |         self._run_pipeline(
235 |             self.cuda_tensors_0, self.pt_filepath_enc_h, "cuda:0", use_cipher=True, enable_fast_mode=False
236 |         )
237 | 
238 |     def test_load_directIO_fall_back(self):
239 |         with tempfile.NamedTemporaryFile(dir="/dev/shm") as tmpFile:
240 |             veturboio.save_file(self.tensors_0, tmpFile.name)
241 |             tmpFile.flush()
242 |             loaded_tensors = veturboio.load(tmpFile.name, map_location="cpu", use_direct_io=True)
243 |             for key in self.tensors_0.keys():
244 |                 self.assertTrue(torch.allclose(self.tensors_0[key], loaded_tensors[key]))
245 | 
246 |     def test_load_to_shmem(self):
247 |         shmem = veturboio.load_to_shmem(self.filepath_0, use_cipher=False)
248 |         loaded_tensors = veturboio.load(
249 |             os.path.join("/dev/shm/", shmem.name), map_location="cpu", enable_fast_mode=False, use_cipher=False
250 |         )
251 |         for key in self.tensors_0.keys():
252 |             self.assertTrue(torch.allclose(self.tensors_0[key], loaded_tensors[key]))
253 |         shmem.close()
254 |         shmem.unlink()
255 | 
256 |         shmem = veturboio.load_to_shmem(self.filepath_0_enc, use_cipher=True)
257 |         loaded_tensors = veturboio.load(
258 |             os.path.join("/dev/shm/", shmem.name), map_location="cpu", enable_fast_mode=False, use_cipher=False
259 |         )
260 |         for key in self.tensors_0.keys():
261 |             self.assertTrue(torch.allclose(self.tensors_0[key], loaded_tensors[key]))
262 |         shmem.close()
263 |         shmem.unlink()
264 | 


--------------------------------------------------------------------------------
/tests/test_save_op.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | '''
16 | 
17 | import os
18 | import tempfile
19 | import unittest
20 | from copy import deepcopy
21 | from unittest import TestCase
22 | 
23 | import torch
24 | from safetensors import safe_open
25 | 
26 | import veturboio
27 | 
28 | 
29 | class TestSave(TestCase):
30 |     @classmethod
31 |     def setUpClass(cls):
32 |         cls.tensors_0 = {
33 |             "weight1": torch.randn(2000, 10),
34 |             "weight2": torch.IntTensor(2000, 10),
35 |             "weight3": torch.BoolTensor(2000, 10),
36 |         }
37 | 
38 |         class MockModel(torch.nn.Module):
39 |             def __init__(self) -> None:
40 |                 super().__init__()
41 | 
42 |                 self.linear1 = torch.nn.Linear(100, 50)
43 |                 self.linear2 = torch.nn.Linear(100, 50)
44 | 
45 |         cls.model = MockModel()
46 | 
47 |         cls.tempdir = tempfile.TemporaryDirectory()
48 |         cls.filepath_0 = os.path.join(cls.tempdir.name, "model_0.safetensors")
49 |         cls.filepath_1 = os.path.join(cls.tempdir.name, "model_0.pt")
50 |         cls.filepath_2 = os.path.join(cls.tempdir.name, "model_0_fast.safetensors")
51 |         cls.filepath_3 = os.path.join(cls.tempdir.name, "model_1.safetensors")
52 | 
53 |     @classmethod
54 |     def tearDownClass(cls):
55 |         cls.tempdir.cleanup()
56 | 
57 |     def test_save_file(self):
58 |         veturboio.save_file(self.tensors_0, self.filepath_0)
59 |         with safe_open(self.filepath_0, framework="pt", device="cpu") as f:
60 |             assert len(f.keys()) == 3
61 |             for key in f.keys():
62 |                 self.assertTrue(torch.allclose(self.tensors_0[key], f.get_tensor(key)))
63 | 
64 |         # enable fast mode
65 |         veturboio.save_file(self.tensors_0, self.filepath_2, enable_fast_mode=True)
66 |         with safe_open(self.filepath_2, framework="pt", device="cpu") as f:
67 |             assert len(f.keys()) == 3
68 |             for key in f.keys():
69 |                 self.assertTrue(torch.allclose(self.tensors_0[key], f.get_tensor(key)))
70 | 
71 |     def test_save_file_for_clone_share_tensors(self):
72 |         share_dict = {"key1": self.tensors_0["weight1"], "key2": self.tensors_0["weight1"]}
73 |         veturboio.save_file(share_dict, self.filepath_0, force_save_shared_tensor=True, force_clone_shared_tensor=True)
74 |         assert len(share_dict) == 2  # assert save_file won't change user's state_dict.
75 |         with safe_open(self.filepath_0, framework="pt", device="cpu") as f:
76 |             for key in f.keys():
77 |                 assert key in share_dict
78 |                 self.assertTrue(torch.allclose(share_dict[key], f.get_tensor(key)))
79 | 
80 |     def test_save_model(self):
81 |         veturboio.save_model(self.model, self.filepath_3, use_cipher=True)
82 |         loaded_tensors = veturboio.load(self.filepath_3, map_location="cpu", use_cipher=True)
83 |         state_dict = self.model.state_dict()
84 |         for key in state_dict.keys():
85 |             self.assertTrue(torch.allclose(state_dict[key], loaded_tensors[key]))
86 | 
87 |     def test_save_pt(self):
88 |         veturboio.save_pt(self.tensors_0, self.filepath_1)
89 |         loaded_tensors = torch.load(self.filepath_1)
90 |         for key in self.tensors_0.keys():
91 |             self.assertTrue(torch.allclose(self.tensors_0[key], loaded_tensors[key]))
92 | 


--------------------------------------------------------------------------------
/tests/test_share_tensor_cpu.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | '''
16 | 
17 | import os
18 | import tempfile
19 | from unittest import TestCase
20 | 
21 | import torch
22 | 
23 | import veturboio
24 | 
25 | 
26 | class TestSharedTensorLoad(TestCase):
27 |     @classmethod
28 |     def setUpClass(cls):
29 |         class MockModel(torch.nn.Module):
30 |             def __init__(self) -> None:
31 |                 super().__init__()
32 | 
33 |                 self.linear1 = torch.nn.Linear(10, 20)
34 |                 self.linear2 = torch.nn.Linear(20, 10)
35 |                 self.linear3 = self.linear2
36 | 
37 |         cls.model = MockModel()
38 | 
39 |     def test_pipeline(self):
40 |         with tempfile.TemporaryDirectory() as tmpdirname:
41 |             filepath = os.path.join(tmpdirname, "model.safetensors")
42 |             veturboio.save_model(self.model, filepath)
43 |             loaded_tensors = veturboio.load(filepath, map_location="cpu")
44 | 
45 |             state_dict = self.model.state_dict()
46 |             for key in state_dict.keys():
47 |                 self.assertTrue(torch.allclose(state_dict[key], loaded_tensors[key]))
48 | 
49 |     def test_save_file(self):
50 |         with tempfile.TemporaryDirectory() as tmpdirname:
51 |             filepath = os.path.join(tmpdirname, "model.safetensors")
52 |             veturboio.save_file(self.model.state_dict(), filepath, force_save_shared_tensor=True)
53 |             loaded_tensors = veturboio.load(filepath, map_location="cpu")
54 | 
55 |             state_dict = self.model.state_dict()
56 |             for key in state_dict.keys():
57 |                 self.assertTrue(torch.allclose(state_dict[key], loaded_tensors[key]))
58 | 


--------------------------------------------------------------------------------
/veturboio/__init__.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | '''
16 | 
17 | from veturboio.io import load, load_to_shmem, save_file, save_model, save_pt
18 | from veturboio.ops.io_utils import init_io_helper
19 | 
20 | __all__ = ["load", "load_to_shmem", "save_file", "save_model", "init_io_helper", "save_pt"]
21 | 


--------------------------------------------------------------------------------
/veturboio/convert.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
  3 | 
  4 | Licensed under the Apache License, Version 2.0 (the "License");
  5 | you may not use this file except in compliance with the License.
  6 | You may obtain a copy of the License at
  7 | 
  8 |     http://www.apache.org/licenses/LICENSE-2.0
  9 | 
 10 | Unless required by applicable law or agreed to in writing, software
 11 | distributed under the License is distributed on an "AS IS" BASIS,
 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | See the License for the specific language governing permissions and
 14 | limitations under the License.
 15 | '''
 16 | 
 17 | import argparse
 18 | import gc
 19 | import logging
 20 | import os
 21 | import sys
 22 | import traceback
 23 | from datetime import datetime
 24 | 
 25 | import torch
 26 | from safetensors.torch import _find_shared_tensors, _is_complete
 27 | 
 28 | import veturboio
 29 | 
 30 | 
 31 | def to_valid_state_dict(state_dict: dict[str, torch.Tensor]) -> dict[str, torch.Tensor]:
 32 |     invalid_key = [k for k, v in state_dict.items() if not isinstance(v, torch.Tensor)]
 33 |     if len(invalid_key) > 0:
 34 |         logger.warning(f"invalid keys to removed: {invalid_key}")
 35 |         state_dict = {k: v for k, v in state_dict.items() if k not in invalid_key}
 36 | 
 37 |     result = {}
 38 |     shared_tensor_groups = _find_shared_tensors(state_dict)
 39 |     for group in shared_tensor_groups:
 40 |         # check if all share tensors have the same data ptr, same shape, and same size
 41 |         shared_tensors = [state_dict[k] for k in group]
 42 |         data_ptrs = [t.data_ptr() for t in shared_tensors]
 43 |         shapes = [t.shape for t in shared_tensors]
 44 |         if len(set(data_ptrs)) != 1 or len(set(shapes)) != 1:
 45 |             raise Exception(f"shared tensors {group} are not equal")
 46 |         # make sure these tensors are complete and identical
 47 |         converted_tensor = shared_tensors[0]
 48 |         if not _is_complete(converted_tensor):
 49 |             converted_tensor = converted_tensor.clone()
 50 |         for t in group:
 51 |             result[t] = converted_tensor
 52 |     for k, v in state_dict.items():
 53 |         if k not in result:
 54 |             result[k] = v
 55 |     return result
 56 | 
 57 | 
 58 | def add_handlers(logger: logging.Logger):
 59 |     """
 60 |     Add handlers to logger
 61 |     """
 62 |     handler = logging.StreamHandler(stream=sys.stdout)
 63 |     formatter = logging.Formatter(fmt="[%(levelname)s %(asctime)s] %(filename)s: %(lineno)d  %(message)s")
 64 |     handler.setFormatter(formatter)
 65 |     logger.addHandler(handler)
 66 | 
 67 | 
 68 | def validate_result(input_state_dict: dict[str, torch.Tensor], output_state_dict: dict[str, torch.Tensor]):
 69 |     input_state_dict = {k: v for k, v in input_state_dict.items() if isinstance(v, torch.Tensor)}
 70 |     output_state_dict = {k: v for k, v in output_state_dict.items() if isinstance(v, torch.Tensor)}
 71 | 
 72 |     input_key_set = set(input_state_dict.keys())
 73 |     output_key_set = set(output_state_dict.keys())
 74 | 
 75 |     if input_key_set != output_key_set:
 76 |         not_in_output_key_set = input_key_set - output_key_set
 77 |         not_in_input_key_set = output_key_set - input_key_set
 78 |         raise Exception(
 79 |             f"key set not equal, not in output key set: {not_in_output_key_set}, not in input key set: {not_in_input_key_set}"
 80 |         )
 81 | 
 82 |     not_equal_tensor = []
 83 |     for key in input_state_dict:
 84 |         if not torch.allclose(input_state_dict[key], output_state_dict[key]):
 85 |             not_equal_tensor.append(key)
 86 |     if len(not_equal_tensor) > 0:
 87 |         raise Exception(f"result is not valid, not equal tensors: {not_equal_tensor}")
 88 | 
 89 |     logger.info(f"all {len(input_key_set)} keys in state dict are equal")
 90 | 
 91 | 
 92 | def _get_available_cpu() -> int:
 93 |     avail_cpu = os.cpu_count()
 94 |     if os.path.isfile('/sys/fs/cgroup/cpu/cpu.cfs_quota_us'):
 95 |         cpu_quota = int(open('/sys/fs/cgroup/cpu/cpu.cfs_quota_us').read().rstrip())
 96 |         if cpu_quota != -1 and os.path.isfile('/sys/fs/cgroup/cpu/cpu.cfs_period_us'):
 97 |             cpu_period = int(open('/sys/fs/cgroup/cpu/cpu.cfs_period_us').read().rstrip())
 98 |             avail_cpu = int(cpu_quota / cpu_period)
 99 |             logger.info(f"get veturboio thread {avail_cpu} from cgroup info")
100 |     return avail_cpu
101 | 
102 | 
103 | class Pt2SafeTensorConverter:
104 |     def __init__(
105 |         self,
106 |         input_path: str,
107 |         output_path: str,
108 |         dry_run: bool,
109 |         enable_to_valid_state_dict: bool,
110 |         overwrite: bool,
111 |         use_direct_io: bool,
112 |     ):
113 |         self.input_path = input_path
114 |         self.output_path = output_path
115 |         self.dry_run = dry_run
116 |         self.enable_to_valid_state_dict = enable_to_valid_state_dict
117 |         self.use_direct_io = use_direct_io
118 |         if self.input_path.startswith("sfcs://"):
119 |             try:
120 |                 self.input_file_size = veturboio.ops.sfcs_utils.sfcs_get_file_size(self.input_path)
121 |             except BaseException as Exp:
122 |                 raise FileNotFoundError("can't get size of sfcs file", Exp)
123 |         else:
124 |             if not os.path.exists(self.input_path):
125 |                 raise Exception(f"file not exist: {self.input_path}")
126 |             # convert to abs path
127 |             if not os.path.isabs(self.input_path):
128 |                 self.input_path = os.path.abspath(self.input_path)
129 |             self.input_file_size = os.path.getsize(self.input_path)
130 |         if not self.input_path.endswith(".pt"):
131 |             raise Exception("input file must end with .pt")
132 | 
133 |         if self.output_path is None:
134 |             self.output_path = self.input_path.replace(".pt", ".safetensors")
135 |         elif not self.output_path.startswith("sfcs://") and not os.path.isabs(self.output_path):
136 |             self.output_path = os.path.abspath(self.output_path)
137 |         if not self.output_path.endswith(".safetensors"):
138 |             raise Exception("output file must end with .safetensors")
139 | 
140 |         if overwrite:
141 |             if self.output_path.startswith("sfcs://"):
142 |                 raise Exception("overwrite flag cannot be set when using sfcs")
143 |             if os.path.exists(self.output_path):
144 |                 logger.info(f"overwrite output file {self.output_path}")
145 |                 if not dry_run:
146 |                     os.remove(self.output_path)
147 |         elif not self.output_path.startswith("sfcs://") and os.path.exists(self.output_path):
148 |             raise Exception(f"output file {self.output_path} already exists")
149 | 
150 |     def convert(self):
151 |         logger.info(f"converting {self.input_path} to {self.output_path}")
152 |         available_cpus = _get_available_cpu()
153 |         ext_name = self.output_path.split(".")[-1]
154 |         state_dict = {}
155 |         if ext_name != "safetensors":
156 |             raise ValueError("output file should be safetensors file")
157 |         logger.info(f"start loading the pt file, the pt file has size of {self.input_file_size // 1000 // 1000}MB")
158 |         start_time = datetime.now()
159 |         if self.dry_run:
160 |             logger.info("dry run finished for veturboio.load_pt_file")
161 |         else:
162 |             state_dict = veturboio.load(
163 |                 self.input_path, num_thread=available_cpus, use_direct_io=self.use_direct_io, enable_fast_mode=True
164 |             )
165 |         end_time = datetime.now()
166 |         logger.info(f"finish loading the pt file with duration {end_time - start_time}")
167 |         logger.info("start saving the safetensors file")
168 |         start_time = datetime.now()
169 |         if self.dry_run:
170 |             logger.info("dry run finished for veturboio.save_safetensors_file")
171 |         else:
172 |             if self.enable_to_valid_state_dict:
173 |                 state_dict = to_valid_state_dict(state_dict)
174 |             veturboio.save_file(state_dict, self.output_path, force_save_shared_tensor=True)
175 |         end_time = datetime.now()
176 |         logger.info(f"finish saving the safetensors file with duration {end_time - start_time}")
177 | 
178 |         del state_dict
179 |         gc.collect()
180 |         logger.info(f"gc finished")
181 | 
182 |     def validate(self):
183 |         available_cpus = _get_available_cpu()
184 |         logger.info(f"validating if {self.input_path} in equal to {self.output_path}")
185 |         input_state_dict = veturboio.load(
186 |             self.input_path, num_thread=available_cpus, use_direct_io=self.use_direct_io, enable_fast_mode=True
187 |         )
188 |         logger.info(f"{self.input_path} loaded")
189 | 
190 |         output_state_dict = veturboio.load(
191 |             self.output_path, num_thread=available_cpus, use_direct_io=self.use_direct_io, enable_fast_mode=True
192 |         )
193 |         logger.info(f"{self.output_path} loaded")
194 | 
195 |         validate_result(input_state_dict, output_state_dict)
196 | 
197 | 
198 | if __name__ == "__main__":
199 |     logger = logging.getLogger(__name__)
200 |     logger.setLevel(logging.INFO)
201 |     add_handlers(logger)
202 | 
203 |     parser = argparse.ArgumentParser(description="converter used to convert .pt model to .safeTensor")
204 |     parser.add_argument(
205 |         "--input",
206 |         "-i",
207 |         type=str,
208 |         required=True,
209 |         help="indicate the path of .pt file, both posix path" "and sfcs prefix are supported",
210 |     )
211 |     parser.add_argument(
212 |         "--output",
213 |         "-o",
214 |         type=str,
215 |         required=False,
216 |         help="indicate the path of .safeTensor file, both "
217 |         "posix path and sfcs prefix are supported."
218 |         "will be placed into the same dir of the .pt "
219 |         "file if left empty",
220 |     )
221 |     parser.add_argument("--dry-run", "-d", action="store_true", help="just dry run, not really convert")
222 |     parser.add_argument("--overwrite", action="store_true", help="overwrite the output file if it exists")
223 |     parser.add_argument(
224 |         "--enable-to-valid-state-dict",
225 |         action="store_true",
226 |         help="execute to_valid_state_dict function before save to .safetensors",
227 |     )
228 |     parser.add_argument("--validate-result", action="store_true", help="validate result", default=False)
229 |     parser.add_argument("--use-direct-io", action="store_true", help="use direct io to load file", default=False)
230 |     args = parser.parse_args()
231 | 
232 |     instance = Pt2SafeTensorConverter(
233 |         args.input, args.output, args.dry_run, args.enable_to_valid_state_dict, args.overwrite, args.use_direct_io
234 |     )
235 |     try:
236 |         instance.convert()
237 |         if args.validate_result:
238 |             instance.validate()
239 |     except Exception as e:
240 |         logger.error(f"convert failed.")
241 |         traceback.print_exc()
242 |         exit(1)
243 | 


--------------------------------------------------------------------------------
/veturboio/io.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
  3 | 
  4 | Licensed under the Apache License, Version 2.0 (the "License");
  5 | you may not use this file except in compliance with the License.
  6 | You may obtain a copy of the License at
  7 | 
  8 |     http://www.apache.org/licenses/LICENSE-2.0
  9 | 
 10 | Unless required by applicable law or agreed to in writing, software
 11 | distributed under the License is distributed on an "AS IS" BASIS,
 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | See the License for the specific language governing permissions and
 14 | limitations under the License.
 15 | '''
 16 | 
 17 | import os
 18 | from multiprocessing import shared_memory
 19 | from typing import Dict, Optional
 20 | 
 21 | import torch
 22 | from loguru import logger
 23 | from safetensors.torch import _remove_duplicate_names
 24 | 
 25 | from veturboio.loader import FasterPosixLoader, PosixLoader, SfcsClientLoader
 26 | from veturboio.ops.io_utils import IOHelper
 27 | from veturboio.safetensors import SafetensorsFile
 28 | from veturboio.saver import PosixSaver, SfcsClientSaver
 29 | from veturboio.types import FILE_PATH
 30 | 
 31 | 
 32 | def is_sfcs_path(file: FILE_PATH):
 33 |     if len(file) > 7 and file[:7] == "sfcs://":
 34 |         return True, file[6:]
 35 |     elif len(file) > 9 and file[:9] == "/dev/shm/":
 36 |         return False, file
 37 |     elif os.environ.get("VETURBOIO_USE_SFCS_SDK", "0") == "1":
 38 |         return True, file
 39 |     else:
 40 |         return False, file
 41 | 
 42 | 
 43 | def load(
 44 |     file: FILE_PATH,
 45 |     map_location: Optional[str] = "cpu",
 46 |     enable_fast_mode: Optional[bool] = True,
 47 |     num_thread: Optional[int] = 32,
 48 |     helper: Optional[IOHelper] = None,
 49 |     use_pinmem: Optional[bool] = False,
 50 |     use_direct_io: Optional[bool] = False,
 51 |     use_cipher: Optional[bool] = None,
 52 |     state_dict: Dict[str, torch.Tensor] = None,
 53 | ) -> Dict:
 54 |     """Load state dict object from checkpoint file. The file can be both safetensors file and pytorch file.
 55 |     If the file is safetensors file, it will be loaded by veturboio and the loading speed will be accelerated.
 56 | 
 57 |     Args:
 58 |         file (FILE_PATH): file path
 59 |         map_location (str, optional): map location. Defaults to "cpu".
 60 |         enable_fast_mode (bool, optional): enable fast mode. Defaults to True.
 61 |         helper (IOHelper, optional): use IOHelper. Defaults to None.
 62 |         use_pinmem (bool, optional): use pin memory. Defaults to False.
 63 |         num_thread (int, optional): number of threads. Defaults to 32.
 64 |         use_direct_io (bool, optional): open file in direct io mode. Defaults to False.
 65 |         use_cipher (bool, optional): decrypt file. Defaults to None. Note: cipher is
 66 |             disabled by force when use_cipher set to False. Otherwise, when use_cipher
 67 |             set to True or environ VETURBOIO_USE_CIPHER set to '1', cipher is enabled.
 68 |         state_dict (Dict): pre allocated state dict. Defaults to None.
 69 | 
 70 |     Returns:
 71 |         state_dict (Dict): state dict
 72 | 
 73 |     Examples:
 74 |         ```
 75 |         import veturboio
 76 |         state_dict = veturboio.load("model.safetensors")
 77 |         ```
 78 |     """
 79 | 
 80 |     if IOHelper is None:
 81 |         enable_fast_mode = False
 82 |     elif helper is None:
 83 |         helper = IOHelper()
 84 | 
 85 |     use_sfcs_sdk, file = is_sfcs_path(file)
 86 |     if enable_fast_mode == False:
 87 |         loader = PosixLoader(file)
 88 |     elif use_sfcs_sdk:
 89 |         loader = SfcsClientLoader(
 90 |             helper=helper,
 91 |             file=file,
 92 |             num_thread=num_thread,
 93 |             use_pinmem=use_pinmem,
 94 |             use_direct_io=use_direct_io,
 95 |         )
 96 |     else:
 97 |         loader = FasterPosixLoader(
 98 |             file,
 99 |             helper,
100 |             num_thread=num_thread,
101 |             use_pinmem=use_pinmem,
102 |             use_direct_io=use_direct_io,
103 |         )
104 | 
105 |     safetensors_file = SafetensorsFile(file, loader, use_cipher)
106 |     return safetensors_file.load(map_location=map_location, state_dict=state_dict)
107 | 
108 | 
109 | def load_to_shmem(
110 |     file: FILE_PATH,
111 |     num_thread: Optional[int] = 32,
112 |     helper: Optional[IOHelper] = None,
113 |     use_direct_io: Optional[bool] = False,
114 |     use_cipher: Optional[bool] = None,
115 | ) -> shared_memory.SharedMemory:
116 |     """Load checkpoint file to shmem.
117 | 
118 |     Args:
119 |         file (FILE_PATH): file path
120 |         num_thread (int, optional): number of threads. Defaults to 32.
121 |         helper (IOHelper, optional): use IOHelper. Defaults to None.
122 |         use_cipher (bool, optional): decrypt file. Defaults to None. Note: cipher is
123 |             disabled by force when use_cipher set to False. Otherwise, when use_cipher
124 |             set to True or environ VETURBOIO_USE_CIPHER set to '1', cipher is enabled.
125 | 
126 |     Returns:
127 |         shmem (shared_memory.SharedMemory): shared memory object.
128 | 
129 |     Examples:
130 |         ```
131 |         import veturboio
132 |         shmem_file = veturboio.load_to_shmem("sfcs://model.safetensors")
133 |         ```
134 |     """
135 | 
136 |     if helper is None:
137 |         helper = IOHelper()
138 | 
139 |     use_sfcs_sdk, file = is_sfcs_path(file)
140 |     if use_sfcs_sdk:
141 |         loader = SfcsClientLoader(
142 |             helper=helper,
143 |             file=file,
144 |             num_thread=num_thread,
145 |         )
146 |     else:
147 |         loader = FasterPosixLoader(
148 |             file,
149 |             helper,
150 |             num_thread=num_thread,
151 |             use_direct_io=use_direct_io,
152 |         )
153 | 
154 |     safetensors_file = SafetensorsFile(file, loader, use_cipher)
155 |     return safetensors_file.load_to_shmem()
156 | 
157 | 
158 | def save_file(
159 |     state_dict: Dict[str, torch.Tensor],
160 |     file: FILE_PATH,
161 |     force_contiguous: bool = True,
162 |     force_save_shared_tensor: bool = False,
163 |     force_clone_shared_tensor: bool = False,
164 |     metadata: Dict[str, str] = None,
165 |     use_cipher: Optional[bool] = False,
166 |     helper: Optional[IOHelper] = None,
167 |     enable_fast_mode: Optional[bool] = False,
168 | ) -> None:
169 |     """Save state dict object to safetensors file.
170 | 
171 |     Args:
172 |         state_dict (Dict): state dict
173 |         file (FILE_PATH): file path
174 |         force_contiguous (bool, optional): force contiguous. Defaults to True.
175 |         force_save_shared_tensor (bool, optional): force save shared tensor. Defaults to False.
176 |         force_clone_shared_tensor (bool, optional): force to clone shared tensor rather than delete
177 |             when force_save_shared_tensor is enabled. Defaults to False.
178 |         metadata (Dict[str, str], optional): metadata. Defaults to None.
179 |         use_cipher (bool, optional): decrypt file. Defaults to False.
180 |         helper (IOHelper, optional): use IOHelper. Defaults to None.
181 |         enable_fast_mode (bool, optional): enable fast mode. Defaults to False.
182 | 
183 |     Examples:
184 |         ```
185 |         import torch
186 |         import veturboio
187 | 
188 |         state_dict = {"weight": torch.randn(10, 10)}
189 |         veturboio.save_file(state_dict, "model.safetensors")
190 |         ```
191 |     """
192 |     if helper is None:
193 |         helper = IOHelper()
194 | 
195 |     use_sfcs_sdk, file = is_sfcs_path(file)
196 |     if use_sfcs_sdk:
197 |         saver = SfcsClientSaver(file=file, use_cipher=use_cipher, helper=helper)
198 |     else:
199 |         saver = PosixSaver(file=file, use_cipher=use_cipher, helper=helper)
200 | 
201 |     # TODO: there are some bugs while state_dict is loaded from veturboio
202 |     if not force_save_shared_tensor:
203 |         if force_clone_shared_tensor:
204 |             logger.warning("force_clone_shared_tensor won't take any effect while force_save_shared_tensor is False;")
205 |         try:
206 |             saver.save_file(state_dict, metadata=metadata, enable_fast_mode=enable_fast_mode)
207 |         except ValueError as e:
208 |             msg = str(e)
209 |             raise ValueError(msg)
210 |         else:
211 |             return
212 | 
213 |     to_removes = _remove_duplicate_names(state_dict)
214 | 
215 |     for kept_name, to_remove_group in to_removes.items():
216 |         for to_remove in to_remove_group:
217 |             if metadata is None:
218 |                 metadata = {}
219 | 
220 |             if to_remove not in metadata:
221 |                 # Do not override user data
222 |                 metadata[to_remove] = kept_name
223 |             if force_clone_shared_tensor:
224 |                 state_dict[to_remove] = state_dict[to_remove].clone()
225 |             else:
226 |                 del state_dict[to_remove]
227 |     if force_contiguous:
228 |         state_dict = {k: v.contiguous() for k, v in state_dict.items()}
229 | 
230 |     return saver.save_file(state_dict, metadata=metadata, enable_fast_mode=enable_fast_mode)
231 | 
232 | 
233 | def save_model(model: torch.nn.Module, file: FILE_PATH, use_cipher: Optional[bool] = False) -> None:
234 |     """Save model state dict to safetensors file.
235 | 
236 |     Args:
237 |         model (torch.nn.Module): model
238 |         file (FILE_PATH): file path
239 |         use_cipher (bool, optional): decrypt file. Defaults to False.
240 | 
241 |     Examples:
242 |         ```
243 |         import torch
244 |         import veturboio
245 | 
246 |         model = torch.nn.Linear(10, 10)
247 |         veturboio.save_model(model, "model.safetensors")
248 |         ```
249 |     """
250 | 
251 |     use_sfcs_sdk, file = is_sfcs_path(file)
252 |     if use_sfcs_sdk:
253 |         saver = SfcsClientSaver(file=file, use_cipher=use_cipher)
254 |     else:
255 |         saver = PosixSaver(file=file, use_cipher=use_cipher)
256 | 
257 |     return saver.save_model(model)
258 | 
259 | 
260 | def save_pt(state_dict: Dict[str, torch.Tensor], file: FILE_PATH, use_cipher: Optional[bool] = False) -> None:
261 |     """Save state dict object to pytorch file.
262 | 
263 |     Args:
264 |         state_dict (Dict): state dict
265 |         file (FILE_PATH): file path
266 |         use_cipher (bool, optional): encrypt file. Defaults to False.
267 | 
268 |     Examples:
269 |         ```
270 |         import torch
271 |         import veturboio
272 | 
273 |         state_dict = {"weight": torch.randn(10, 10)}
274 |         veturboio.save_pt(state_dict, "model.pt")
275 |         ```
276 |     """
277 |     use_sfcs_sdk, file = is_sfcs_path(file)
278 |     if use_sfcs_sdk:
279 |         saver = SfcsClientSaver(file=file, use_cipher=use_cipher)
280 |     else:
281 |         saver = PosixSaver(file=file, use_cipher=use_cipher)
282 | 
283 |     return saver.save_pt(state_dict)
284 | 


--------------------------------------------------------------------------------
/veturboio/loader/__init__.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | '''
16 | 
17 | from veturboio.loader.base_loader import BaseLoader, PosixLoader
18 | from veturboio.loader.faster_posix_loader import FasterPosixLoader
19 | from veturboio.loader.sfcs_client_loader import SfcsClientLoader
20 | 
21 | __all__ = ["BaseLoader", "PosixLoader", "FasterPosixLoader", "SfcsClientLoader"]
22 | 


--------------------------------------------------------------------------------
/veturboio/loader/base_loader.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
  3 | 
  4 | Licensed under the Apache License, Version 2.0 (the "License");
  5 | you may not use this file except in compliance with the License.
  6 | You may obtain a copy of the License at
  7 | 
  8 |     http://www.apache.org/licenses/LICENSE-2.0
  9 | 
 10 | Unless required by applicable law or agreed to in writing, software
 11 | distributed under the License is distributed on an "AS IS" BASIS,
 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | See the License for the specific language governing permissions and
 14 | limitations under the License.
 15 | '''
 16 | 
 17 | import io
 18 | from typing import Any, Dict
 19 | 
 20 | import numpy as np
 21 | import torch
 22 | from numpy import ndarray
 23 | 
 24 | from veturboio.ops.cipher import CipherInfo, decrypt
 25 | 
 26 | # from veturboio.safetensors import SafetensorsFile
 27 | from veturboio.types import FILE_PATH
 28 | 
 29 | SAFETENSORS_FILE_MAGIC_NUM = 123
 30 | BUF_ALIGN_SIZE = 4096
 31 | 
 32 | 
 33 | class BaseLoader:
 34 |     def __init__(self, method: str) -> None:
 35 |         self.method = method
 36 | 
 37 |     def load_to_bytes(self, offset: int, count: int, cipher_info: CipherInfo = CipherInfo(False)) -> bytes:
 38 |         raise NotImplementedError
 39 | 
 40 |     def load_safetensors(
 41 |         self,
 42 |         safetensors_file: Any,
 43 |         map_location: str = "cpu",
 44 |         state_dict: Dict[str, torch.Tensor] = None,
 45 |     ) -> Dict[str, torch.Tensor]:
 46 |         raise NotImplementedError
 47 | 
 48 |     def init_aligned_tensor(self, device, device_id: int, file_size, base_offset: int) -> torch.Tensor:
 49 |         if device_id != -1:
 50 |             try:
 51 |                 total_tensor = torch.empty(file_size - base_offset, dtype=torch.uint8, device=device)
 52 |             except RuntimeError as e:
 53 |                 msg = str(e)
 54 |                 raise RuntimeError(msg)
 55 | 
 56 |         else:
 57 |             array = np.empty(file_size - base_offset + BUF_ALIGN_SIZE, dtype=np.uint8)
 58 |             offset1 = array.ctypes.data % BUF_ALIGN_SIZE
 59 |             offset2 = base_offset % BUF_ALIGN_SIZE
 60 |             if offset1 > offset2:
 61 |                 align = BUF_ALIGN_SIZE - offset1 + offset2
 62 |             else:
 63 |                 align = offset2 - offset1
 64 | 
 65 |             sub_array = array[align : align + file_size - base_offset].view(dtype=np.uint8)
 66 |             total_tensor = torch.from_numpy(sub_array)
 67 |         return total_tensor
 68 | 
 69 | 
 70 | class PosixLoader(BaseLoader):
 71 |     def __init__(self, file: FILE_PATH) -> None:
 72 |         super().__init__(method="posix")
 73 |         self.file = file
 74 | 
 75 |     def load_to_bytes(self, offset: int, count: int, cipher_info: CipherInfo = CipherInfo(False)) -> bytes:
 76 |         arr = np.fromfile(self.file, dtype=np.uint8, offset=offset, count=count)
 77 |         if cipher_info.use_cipher:
 78 |             h_off = CipherInfo.HEADER_SIZE if cipher_info.use_header else 0
 79 |             decrypt(cipher_info, arr, arr, offset - h_off)
 80 |         return arr.tobytes()
 81 | 
 82 |     def load_safetensors(
 83 |         self,
 84 |         safetensors_file: Any,
 85 |         map_location: str = "cpu",
 86 |         state_dict: Dict[str, torch.Tensor] = None,
 87 |     ) -> Dict[str, torch.Tensor]:
 88 |         if not state_dict:
 89 |             state_dict = {}
 90 | 
 91 |         base_offset = safetensors_file.tensor_offset
 92 |         device = torch.device(map_location)
 93 | 
 94 |         cipher_info = safetensors_file._cipher_info
 95 |         for tensor_meta in safetensors_file.meta.values():
 96 |             tensor_bytes = np.memmap(
 97 |                 safetensors_file.file,
 98 |                 dtype=np.uint8,
 99 |                 mode="c",
100 |                 offset=base_offset + tensor_meta.data_offsets[0],
101 |                 shape=tensor_meta.data_offsets[1] - tensor_meta.data_offsets[0],
102 |             )
103 |             if cipher_info.use_cipher:
104 |                 h_off = CipherInfo.HEADER_SIZE if cipher_info.use_header else 0
105 |                 decrypt(cipher_info, tensor_bytes, tensor_bytes, base_offset + tensor_meta.data_offsets[0] - h_off)
106 |             tensor = torch.frombuffer(tensor_bytes, dtype=tensor_meta.dtype)
107 |             tensor = tensor.view(tensor_meta.shape)
108 |             if device.type == "cuda":
109 |                 state_dict[tensor_meta.name] = tensor.pin_memory().to(device=device, non_blocking=True)
110 |             else:
111 |                 state_dict[tensor_meta.name] = tensor
112 | 
113 |         return state_dict
114 | 
115 |     def load_pt(
116 |         self, map_location: str = "cpu", cipher_info: CipherInfo = CipherInfo(False)
117 |     ) -> Dict[str, torch.Tensor]:
118 |         if cipher_info.use_cipher:
119 |             h_off = CipherInfo.HEADER_SIZE if cipher_info.use_header else 0
120 |             arr = np.fromfile(self.file, dtype=np.uint8, offset=h_off, count=-1)
121 |             decrypt(cipher_info, arr, arr, 0)
122 |             return torch.load(io.BytesIO(arr.data), map_location=map_location)
123 | 
124 |         return torch.load(self.file, map_location=map_location)
125 | 


--------------------------------------------------------------------------------
/veturboio/loader/faster_posix_loader.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
  3 | 
  4 | Licensed under the Apache License, Version 2.0 (the "License");
  5 | you may not use this file except in compliance with the License.
  6 | You may obtain a copy of the License at
  7 | 
  8 |     http://www.apache.org/licenses/LICENSE-2.0
  9 | 
 10 | Unless required by applicable law or agreed to in writing, software
 11 | distributed under the License is distributed on an "AS IS" BASIS,
 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | See the License for the specific language governing permissions and
 14 | limitations under the License.
 15 | '''
 16 | 
 17 | import io
 18 | import os
 19 | import random
 20 | import string
 21 | from multiprocessing import shared_memory
 22 | from typing import Dict
 23 | 
 24 | import numpy as np
 25 | import torch
 26 | 
 27 | from veturboio.ops.cipher import CipherInfo, decrypt
 28 | from veturboio.ops.io_utils import IOHelper, load_file_to_tensor
 29 | from veturboio.ops.posix_utils import posix_read_file
 30 | from veturboio.safetensors import SafetensorsFile
 31 | from veturboio.types import FILE_PATH
 32 | 
 33 | from .base_loader import PosixLoader
 34 | 
 35 | 
 36 | class FasterPosixLoader(PosixLoader):
 37 |     def __init__(
 38 |         self,
 39 |         file: FILE_PATH,
 40 |         helper: IOHelper,
 41 |         num_thread: int = 32,
 42 |         use_pinmem: bool = False,
 43 |         use_direct_io: bool = False,
 44 |     ) -> None:
 45 |         super().__init__(file)
 46 |         self.helper = helper
 47 |         self.num_thread = num_thread
 48 |         self.use_pinmem = use_pinmem
 49 |         self.use_direct_io = use_direct_io
 50 | 
 51 |     def load_safetensors(
 52 |         self,
 53 |         safetensors_file: SafetensorsFile,
 54 |         map_location: str = "cpu",
 55 |         state_dict: Dict[str, torch.Tensor] = None,
 56 |     ) -> Dict[str, torch.Tensor]:
 57 |         file_size = os.path.getsize(safetensors_file.file)
 58 |         base_offset = safetensors_file.tensor_offset
 59 |         device = torch.device(map_location)
 60 |         if device.type == "cuda":
 61 |             device_id = device.index if device.index is not None else torch.cuda.current_device()
 62 |         else:
 63 |             device_id = -1
 64 | 
 65 |         if state_dict:
 66 |             for tensor_meta in safetensors_file._meta.values():
 67 |                 tensor = state_dict[tensor_meta.name]
 68 |                 if not tensor.is_contiguous():
 69 |                     raise RuntimeError("allocated tensor not contiguous")
 70 |                 if not tensor.dtype == tensor_meta.dtype:
 71 |                     raise RuntimeError("allocated tensor dtype not match")
 72 | 
 73 |                 offset = tensor_meta.data_offsets[0]
 74 |                 length = tensor_meta.data_offsets[1] - tensor_meta.data_offsets[0]
 75 |                 tensor_length = torch.numel(tensor) * tensor.element_size()
 76 |                 if tensor_length < length:
 77 |                     raise RuntimeError("allocated tensor size not enough")
 78 | 
 79 |                 load_file_to_tensor(
 80 |                     file_path=safetensors_file.file,
 81 |                     total_tensor=tensor,
 82 |                     length=length,
 83 |                     offset=base_offset + offset,
 84 |                     helper=self.helper,
 85 |                     device_id=device_id,
 86 |                     num_thread=self.num_thread,
 87 |                     use_pinmem=self.use_pinmem,
 88 |                     use_sfcs_sdk=False,
 89 |                     use_direct_io=self.use_direct_io,
 90 |                     cipher_info=safetensors_file._cipher_info,
 91 |                 )
 92 |                 tensor = tensor.resize_(tensor_meta.shape)
 93 |                 state_dict[tensor_meta.name] = tensor
 94 |             return state_dict
 95 |         else:
 96 |             total_tensor = self.init_aligned_tensor(device, device_id, file_size, base_offset)
 97 |             load_file_to_tensor(
 98 |                 file_path=safetensors_file.file,
 99 |                 total_tensor=total_tensor,
100 |                 offset=base_offset,
101 |                 helper=self.helper,
102 |                 device_id=device_id,
103 |                 num_thread=self.num_thread,
104 |                 use_pinmem=self.use_pinmem,
105 |                 use_sfcs_sdk=False,
106 |                 use_direct_io=self.use_direct_io,
107 |                 cipher_info=safetensors_file._cipher_info,
108 |             )
109 | 
110 |             return SafetensorsFile.split_tensor_to_state_dict(total_tensor, safetensors_file)
111 | 
112 |     def load_to_shmem(self, cipher_info: CipherInfo = CipherInfo(False)) -> shared_memory.SharedMemory:
113 |         file_size = os.path.getsize(self.file)
114 |         file_name = ''.join(random.sample(string.ascii_lowercase + string.ascii_uppercase, 10))
115 |         shm = shared_memory.SharedMemory(name=file_name, create=True, size=file_size)
116 | 
117 |         h_off = CipherInfo.HEADER_SIZE if cipher_info.use_header else 0
118 |         candidate = np.frombuffer(shm.buf, dtype=np.byte)
119 |         posix_read_file(
120 |             self.file,
121 |             candidate,
122 |             length=file_size - h_off,
123 |             offset=h_off,
124 |             num_thread=self.num_thread,
125 |             cipher_info=cipher_info,
126 |             use_direct_io=self.use_direct_io,
127 |         )
128 |         return shm
129 | 
130 |     def load_pt(
131 |         self, map_location: str = "cpu", cipher_info: CipherInfo = CipherInfo(False)
132 |     ) -> Dict[str, torch.Tensor]:
133 |         if cipher_info.use_cipher:
134 |             h_off = CipherInfo.HEADER_SIZE if cipher_info.use_header else 0
135 |             arr = np.fromfile(self.file, dtype=np.uint8, offset=h_off, count=-1)
136 |             decrypt(cipher_info, arr, arr, 0)
137 |             return torch.load(io.BytesIO(arr.data), map_location=map_location)
138 | 
139 |         return torch.load(self.file, map_location=map_location)
140 | 


--------------------------------------------------------------------------------
/veturboio/loader/sfcs_client_loader.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
  3 | 
  4 | Licensed under the Apache License, Version 2.0 (the "License");
  5 | you may not use this file except in compliance with the License.
  6 | You may obtain a copy of the License at
  7 | 
  8 |     http://www.apache.org/licenses/LICENSE-2.0
  9 | 
 10 | Unless required by applicable law or agreed to in writing, software
 11 | distributed under the License is distributed on an "AS IS" BASIS,
 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | See the License for the specific language governing permissions and
 14 | limitations under the License.
 15 | '''
 16 | 
 17 | import os
 18 | import random
 19 | import string
 20 | from io import BytesIO
 21 | from multiprocessing import shared_memory
 22 | from typing import Dict
 23 | 
 24 | import numpy as np
 25 | import torch
 26 | from numpy import ndarray
 27 | 
 28 | from veturboio.loader.base_loader import BaseLoader
 29 | from veturboio.ops.cipher import CipherInfo
 30 | from veturboio.ops.io_utils import IOHelper, load_file_to_tensor
 31 | from veturboio.ops.sfcs_utils import (
 32 |     init_sfcs_conf,
 33 |     path_mapper,
 34 |     sfcs_default_config,
 35 |     sfcs_get_file_size,
 36 |     sfcs_read_file,
 37 | )
 38 | from veturboio.safetensors import SafetensorsFile
 39 | from veturboio.types import FILE_PATH
 40 | 
 41 | 
 42 | class SfcsClientLoader(BaseLoader):
 43 |     def __init__(
 44 |         self,
 45 |         file: FILE_PATH,
 46 |         helper: IOHelper,
 47 |         num_thread: int = 32,
 48 |         use_pinmem: bool = False,
 49 |         use_direct_io: bool = False,
 50 |     ) -> None:
 51 |         super().__init__(method="client")
 52 | 
 53 |         self.file = file
 54 |         self.helper = helper
 55 |         self.num_thread = num_thread
 56 |         self.use_pinmem = use_pinmem
 57 |         self.use_direct_io = use_direct_io
 58 |         self._mount_path = init_sfcs_conf(file)
 59 |         self._sfcs_valid_path = path_mapper(self.file, self._mount_path)
 60 | 
 61 |     def load_to_bytes(self, offset: int, count: int, cipher_info: CipherInfo = CipherInfo(False)) -> bytes:
 62 |         file_size = sfcs_get_file_size(self._sfcs_valid_path)
 63 |         if offset + count > file_size:
 64 |             count = file_size - offset
 65 | 
 66 |         file_bytes = bytes(count)
 67 |         candidate = np.frombuffer(file_bytes, dtype=np.byte)
 68 |         sfcs_read_file(
 69 |             self._sfcs_valid_path,
 70 |             candidate,
 71 |             length=count,
 72 |             offset=offset,
 73 |             num_thread=self.num_thread,
 74 |             cipher_info=cipher_info,
 75 |         )
 76 |         return file_bytes
 77 | 
 78 |     def load_to_shmem(self, cipher_info: CipherInfo = CipherInfo(False)) -> shared_memory.SharedMemory:
 79 |         file_size = sfcs_get_file_size(self._sfcs_valid_path)
 80 |         file_name = ''.join(random.sample(string.ascii_lowercase + string.ascii_uppercase, 10))
 81 |         shm = shared_memory.SharedMemory(name=file_name, create=True, size=file_size)
 82 | 
 83 |         h_off = CipherInfo.HEADER_SIZE if cipher_info.use_header else 0
 84 |         candidate = np.frombuffer(shm.buf, dtype=np.byte)
 85 |         sfcs_read_file(
 86 |             self._sfcs_valid_path,
 87 |             candidate,
 88 |             length=file_size - h_off,
 89 |             offset=h_off,
 90 |             num_thread=self.num_thread,
 91 |             cipher_info=cipher_info,
 92 |         )
 93 |         return shm
 94 | 
 95 |     def load_safetensors(
 96 |         self,
 97 |         safetensors_file: SafetensorsFile,
 98 |         map_location: str = "cpu",
 99 |         state_dict: Dict[str, torch.Tensor] = None,
100 |     ) -> Dict[str, torch.Tensor]:
101 |         # TODO should be the same as self.loader
102 |         sfcs_valid_path = path_mapper(safetensors_file.file, self._mount_path)
103 |         file_size = sfcs_get_file_size(sfcs_valid_path)
104 |         base_offset = safetensors_file.tensor_offset
105 |         device = torch.device(map_location)
106 |         if device.type == "cuda":
107 |             device_id = device.index if device.index is not None else torch.cuda.current_device()
108 |         else:
109 |             device_id = -1
110 | 
111 |         if state_dict:
112 |             for tensor_meta in safetensors_file._meta.values():
113 |                 tensor = state_dict[tensor_meta.name]
114 |                 if not tensor.is_contiguous():
115 |                     raise RuntimeError("allocated tensor not contiguous")
116 |                 if not tensor.dtype == tensor_meta.dtype:
117 |                     raise RuntimeError("allocated tensor dtype not match")
118 | 
119 |                 offset = tensor_meta.data_offsets[0]
120 |                 length = tensor_meta.data_offsets[1] - tensor_meta.data_offsets[0]
121 |                 tensor_length = torch.numel(tensor) * tensor.element_size()
122 |                 if tensor_length < length:
123 |                     raise RuntimeError("allocated tensor size not enough")
124 | 
125 |                 load_file_to_tensor(
126 |                     file_path=sfcs_valid_path,
127 |                     total_tensor=tensor,
128 |                     length=length,
129 |                     offset=base_offset + offset,
130 |                     helper=self.helper,
131 |                     device_id=device_id,
132 |                     num_thread=self.num_thread,
133 |                     use_pinmem=self.use_pinmem,
134 |                     use_sfcs_sdk=True,
135 |                     use_direct_io=self.use_direct_io,
136 |                     cipher_info=safetensors_file._cipher_info,
137 |                 )
138 |                 tensor = tensor.resize_(tensor_meta.shape)
139 |                 state_dict[tensor_meta.name] = tensor
140 |             return state_dict
141 |         else:
142 |             total_tensor = self.init_aligned_tensor(device, device_id, file_size, base_offset)
143 |             load_file_to_tensor(
144 |                 file_path=sfcs_valid_path,
145 |                 total_tensor=total_tensor,
146 |                 offset=base_offset,
147 |                 helper=self.helper,
148 |                 device_id=device_id,
149 |                 num_thread=self.num_thread,
150 |                 use_pinmem=self.use_pinmem,
151 |                 use_sfcs_sdk=True,
152 |                 use_direct_io=self.use_direct_io,
153 |                 cipher_info=safetensors_file._cipher_info,
154 |             )
155 | 
156 |         return SafetensorsFile.split_tensor_to_state_dict(total_tensor, safetensors_file)
157 | 
158 |     def load_pt(
159 |         self, map_location: str = "cpu", cipher_info: CipherInfo = CipherInfo(False)
160 |     ) -> Dict[str, torch.Tensor]:
161 |         file_size = sfcs_get_file_size(self._sfcs_valid_path)
162 |         h_off = CipherInfo.HEADER_SIZE if cipher_info.use_header else 0
163 |         file_bytes = self.load_to_bytes(offset=h_off, count=file_size - h_off, cipher_info=cipher_info)
164 |         return torch.load(BytesIO(file_bytes), map_location=map_location)
165 | 


--------------------------------------------------------------------------------
/veturboio/ops/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/volcengine/veTurboIO/0ed05516013da4332433784660660ca4d1904505/veturboio/ops/__init__.py


--------------------------------------------------------------------------------
/veturboio/ops/consts.py:
--------------------------------------------------------------------------------
 1 | MLP_SECRET_KEY_FILENAME = "MLP_SECRET_KEY"
 2 | MLP_ACCESS_KEY_FILENAME = "MLP_ACCESS_KEY"
 3 | 
 4 | SFCS_DEFAULT_CONFIG_PATH_ENV = "SFCS_METAINFO_PATH"
 5 | 
 6 | SFCS_DEFAULT_METAINFO_PATH = "/root/.volc/SFCSConfiguration.json"
 7 | 
 8 | RDMA_NIC_ENV = "MLP_RDMA_NIC_NAMES"
 9 | DEFAULT_NIC_NAME = "eth0"
10 | RDMA_SEGMENT_ENV = "MLP_RDMA_NETWORK_SEGMENT"
11 | DEFAULT_CREDENTIAL_PATH_ENV = "CREDENTIAL_PATH"
12 | DEFAULT_CREDENTIAL_PATH = "/mlplatform/.credential/"
13 | 


--------------------------------------------------------------------------------
/veturboio/ops/csrc/cipher.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | #include <pybind11/pybind11.h>
17 | #include <pybind11/numpy.h>
18 | #include "include/cipher.h"
19 | #include <iostream>
20 | 
21 | CipherInfo::CipherInfo(bool use_cipher, pybind11::array_t<char> key_arr, pybind11::array_t<char> iv_arr,
22 |                        size_t header_size)
23 |     : use_cipher(use_cipher), header_size(header_size)
24 | {
25 |     if (use_cipher)
26 |     {
27 |         pybind11::buffer_info key_info = key_arr.request();
28 |         size_t key_size = key_info.size;
29 |         if (key_size == 16)
30 |         {
31 |             mode = "CTR-128";
32 |         }
33 |         else if (key_size == 32)
34 |         {
35 |             mode = "CTR-256";
36 |         }
37 |         else
38 |         {
39 |             throw std::runtime_error("Cipher Exception: key length invalid");
40 |         }
41 |         key = reinterpret_cast<unsigned char *>(key_info.ptr);
42 | 
43 |         pybind11::buffer_info iv_info = iv_arr.request();
44 |         if ((size_t)iv_info.size != AES_BLOCK_SIZE)
45 |         {
46 |             throw std::runtime_error("Cipher Exception: iv length invalid");
47 |         }
48 |         iv = reinterpret_cast<unsigned char *>(iv_info.ptr);
49 |     }
50 | }
51 | 
52 | CtrEncWrap::CtrEncWrap(std::string mode, pybind11::array_t<unsigned char> key_arr,
53 |                        pybind11::array_t<unsigned char> iv_arr, size_t global_offset)
54 | {
55 |     pybind11::buffer_info key_info = key_arr.request();
56 |     pybind11::buffer_info iv_info = iv_arr.request();
57 |     enc_.reset(new CtrEncrypter(mode, (unsigned char *)key_info.ptr, (unsigned char *)iv_info.ptr, global_offset));
58 | }
59 | 
60 | size_t CtrEncWrap::encrypt_update(pybind11::array_t<unsigned char> pt, pybind11::array_t<unsigned char> ct)
61 | {
62 |     pybind11::buffer_info pt_info = pt.request();
63 |     pybind11::buffer_info ct_info = ct.request();
64 |     unsigned char *pt_ptr = (unsigned char *)pt_info.ptr;
65 |     unsigned char *ct_ptr = (unsigned char *)ct_info.ptr;
66 |     return enc_->encrypt_update(pt_ptr, pt_info.size, ct_ptr);
67 | }
68 | 
69 | CtrDecWrap::CtrDecWrap(std::string mode, pybind11::array_t<unsigned char> key_arr,
70 |                        pybind11::array_t<unsigned char> iv_arr, size_t global_offset)
71 | {
72 |     pybind11::buffer_info key_info = key_arr.request();
73 |     pybind11::buffer_info iv_info = iv_arr.request();
74 |     dec_.reset(new CtrDecrypter(mode, (unsigned char *)key_info.ptr, (unsigned char *)iv_info.ptr, global_offset));
75 | }
76 | 
77 | size_t CtrDecWrap::decrypt_update(pybind11::array_t<unsigned char> ct, pybind11::array_t<unsigned char> pt)
78 | {
79 |     pybind11::buffer_info pt_info = pt.request();
80 |     pybind11::buffer_info ct_info = ct.request();
81 |     unsigned char *pt_ptr = (unsigned char *)pt_info.ptr;
82 |     unsigned char *ct_ptr = (unsigned char *)ct_info.ptr;
83 |     return dec_->decrypt_update(ct_ptr, ct_info.size, pt_ptr);
84 | }
85 | 


--------------------------------------------------------------------------------
/veturboio/ops/csrc/include/cfsaio.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | #ifndef _CLOUDFS_LIBCFS3_CLIENT_CFS_AIO_H_
 17 | #define _CLOUDFS_LIBCFS3_CLIENT_CFS_AIO_H_
 18 | 
 19 | #include <stdint.h> /* for uint64_t, etc. */
 20 | 
 21 | #ifdef __cplusplus
 22 | extern "C"
 23 | {
 24 | #endif
 25 |     /**
 26 |      * Some utility decls used in libcfs.
 27 |      */
 28 |     typedef int32_t tSize;   /// size of data for read/write io ops
 29 |     typedef int64_t tOffset; /// offset within the file
 30 | 
 31 |     struct CfsFileSystemInternalWrapper;
 32 |     typedef struct CfsFileSystemInternalWrapper *cfsFS;
 33 | 
 34 |     struct CfsFileInternalWrapper;
 35 |     typedef struct CfsFileInternalWrapper *cfsFile;
 36 | 
 37 |     typedef enum cfsStatus
 38 |     {
 39 |         STATUS_OK = 0,
 40 |         STATUS_MISSING_BLOCK = -1002,
 41 |         STATUS_TIMEOUT = -1003,
 42 |         STATUS_INVALID_RANGE = -1004,
 43 |         STATUS_CONNECTION_CLOSED = -1005,
 44 |         STATUS_WRITE_FAILED = -1006,
 45 |         STATUS_IO_BUSY = -1007,
 46 |         STATUS_INVALID_PARAMETER = -1098,
 47 |         STATUS_UNSUPPORTED_OP = -1099,
 48 |         STATUS_UNKNOWN_ERR = -1100,
 49 |     } cfsStatus;
 50 | 
 51 |     typedef void (*cfsWriteCallback)(cfsStatus status, void *args);
 52 | 
 53 |     typedef void (*cfsReadCallback)(cfsStatus status, int32_t readLength, char *buffer, void *args);
 54 | 
 55 |     typedef struct cfsAsyncContext
 56 |     {
 57 |         cfsReadCallback readCallback;
 58 |         cfsWriteCallback writeCallback;
 59 |         char *buffer;
 60 |         void *args;
 61 |     } cfsAsyncContext;
 62 | 
 63 |     /**
 64 |      * cfsAsyncPRead - Async positional read of data from an open file.
 65 |      *
 66 |      * @param fs        The configured filesystem handle.
 67 |      * @param file      The file handle.
 68 |      * @param offset    Position from which to read.
 69 |      * @param length    The length of the buffer.
 70 |      * @param context   The callback context passed by user.
 71 |      * @return          Status of Async method.
 72 |      */
 73 |     cfsStatus cfsAsyncPRead(cfsFS fs, cfsFile file, tSize length, tOffset offset, cfsAsyncContext *context);
 74 | 
 75 |     /**
 76 |      * cfsAsyncWrite - Write data to the internal buffer of outputstream,
 77 |      *
 78 |      * @param fs        The configured filesystem handle.
 79 |      * @param file      The file handle.
 80 |      * @param buffer    The buffer to copy write bytes into.
 81 |      * @param length    The length of the buffer.
 82 |      * @param context   The callback context passed by user.
 83 |      * @return          Status of Async method.
 84 |      */
 85 |     cfsStatus cfsAsyncWrite(cfsFS fs, cfsFile file, const void *buffer, tSize length, cfsAsyncContext *context);
 86 | 
 87 |     /**
 88 |      * cfsAsyncFlush - Wait for data is acked by remote dn.
 89 |      *
 90 |      * @param fs        The configured filesystem handle.
 91 |      * @param file      The file handle.
 92 |      * @param context   The callback context passed by user.
 93 |      * @return          Status of Async method.
 94 |      */
 95 |     cfsStatus cfsAsyncFlush(cfsFS fs, cfsFile file, cfsAsyncContext *context);
 96 | 
 97 |     /**
 98 |      * cfsAsyncWriteAndFlush -  Write data to remote datanode and wait for ack.
 99 |      *
100 |      * @param fs        The configured filesystem handle.
101 |      * @param file      The file handle.
102 |      * @param buffer    The buffer to copy write bytes into.
103 |      * @param length    The length of the buffer.
104 |      * @param context   The callback context passed by user.
105 |      * @return          Status of Async method.
106 |      */
107 |     cfsStatus cfsAsyncWriteAndFlush(cfsFS fs, cfsFile file, const void *buffer, tSize length, cfsAsyncContext *context);
108 | 
109 | #ifdef __cplusplus
110 | }
111 | #endif
112 | 
113 | #endif /* _CLOUDFS_LIBCFS3_CLIENT_CFS_AIO_H_ */
114 | 


--------------------------------------------------------------------------------
/veturboio/ops/csrc/include/cipher.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | #ifndef VETURBOIO_CIPHER_H
17 | #define VETURBOIO_CIPHER_H
18 | 
19 | #include <pybind11/pybind11.h>
20 | #include <pybind11/numpy.h>
21 | #include <string>
22 | #include <memory>
23 | #include "fastcrypto.h"
24 | 
25 | class CipherInfo
26 | {
27 |   public:
28 |     bool use_cipher = false;
29 |     std::string mode = "CTR-128";
30 |     size_t header_size = 0;
31 |     unsigned char *key = NULL;
32 |     unsigned char *iv = NULL;
33 |     CipherInfo(bool use_cipher, pybind11::array_t<char> key_arr, pybind11::array_t<char> iv_arr, size_t header_size);
34 |     CipherInfo() = default;
35 | };
36 | 
37 | class CtrEncWrap
38 | {
39 |   private:
40 |     std::unique_ptr<CtrEncrypter> enc_;
41 | 
42 |   public:
43 |     CtrEncWrap() = default;
44 |     CtrEncWrap(std::string mode, pybind11::array_t<unsigned char> key_arr, pybind11::array_t<unsigned char> iv_arr,
45 |                size_t global_offset);
46 |     size_t encrypt_update(pybind11::array_t<unsigned char> pt, pybind11::array_t<unsigned char> ct);
47 | };
48 | 
49 | class CtrDecWrap
50 | {
51 |   private:
52 |     std::unique_ptr<CtrDecrypter> dec_;
53 | 
54 |   public:
55 |     CtrDecWrap() = default;
56 |     CtrDecWrap(std::string mode, pybind11::array_t<unsigned char> key_arr, pybind11::array_t<unsigned char> iv_arr,
57 |                size_t global_offset);
58 |     size_t decrypt_update(pybind11::array_t<unsigned char> ct, pybind11::array_t<unsigned char> pt);
59 | };
60 | 
61 | #endif
62 | 


--------------------------------------------------------------------------------
/veturboio/ops/csrc/include/common.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | #ifndef COMMON_H
17 | #define COMMON_H
18 | 
19 | #include <torch/torch.h>
20 | #include <torch/extension.h>
21 | #if defined(USE_CUDA)
22 | #include <cuda_runtime.h>
23 | #endif
24 | #include <fcntl.h>
25 | #include <unistd.h>
26 | #include <thread>
27 | #include <stdexcept>
28 | #include <sys/mman.h>
29 | #include <sys/stat.h>
30 | #include "cfs.h"
31 | #include "logging.h"
32 | #include "sfcs.h"
33 | 
34 | #define THREAD_NICE_ADJ -10
35 | #define BUF_ALIGN_SIZE (size_t)4096
36 | 
37 | using namespace std;
38 | 
39 | #endif
40 | 


--------------------------------------------------------------------------------
/veturboio/ops/csrc/include/fastcrypto.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | #ifndef VETURBOIO_FASTCRYPTO_H
17 | #define VETURBOIO_FASTCRYPTO_H
18 | 
19 | #include <stdio.h>
20 | #include <string>
21 | 
22 | #define EVP_UPDATE_MAX 0x7ffffff0
23 | #define AES_BLOCK_SIZE 16
24 | #define AES_BUF_MAX_SIZE 32
25 | #define MAX_CTR_KEY_SIZE 32
26 | #define FASTCRYPTO_MAGIC_SIZE 16
27 | 
28 | inline void counter_inc_by(unsigned char *counter, size_t n, size_t c)
29 | {
30 |     do
31 |     {
32 |         --n;
33 |         c += counter[n];
34 |         counter[n] = static_cast<unsigned char>(c);
35 |         c >>= 8;
36 |     } while (n);
37 | }
38 | 
39 | typedef struct evp_cipher_ctx_st EVP_CIPHER_CTX;
40 | typedef struct evp_cipher_st EVP_CIPHER;
41 | typedef struct evp_mac_ctx_st EVP_MAC_CTX;
42 | typedef struct evp_mac_st EVP_MAC;
43 | 
44 | class CtrEncrypter
45 | {
46 |   private:
47 |     EVP_CIPHER_CTX *ctx = NULL;
48 |     EVP_CIPHER *cipher = NULL;
49 | 
50 |   public:
51 |     CtrEncrypter() = default;
52 |     CtrEncrypter(std::string algo, const unsigned char *key, const unsigned char *iv, size_t global_offset);
53 |     ~CtrEncrypter();
54 |     int encrypt_update(unsigned char *pt, size_t pt_size, unsigned char *ct);
55 | };
56 | 
57 | class CtrDecrypter
58 | {
59 |   private:
60 |     EVP_CIPHER_CTX *ctx = NULL;
61 |     EVP_CIPHER *cipher = NULL;
62 | 
63 |   public:
64 |     CtrDecrypter() = default;
65 |     CtrDecrypter(std::string algo, const unsigned char *key, const unsigned char *iv, size_t global_offset);
66 |     ~CtrDecrypter();
67 |     int decrypt_update(unsigned char *ct, size_t ct_size, unsigned char *pt);
68 | };
69 | 
70 | // Both encrypt and decrypt require length of ct and pt multiple of 16
71 | int ctr_encrypt_gpu(std::string algo, const unsigned char *key, const unsigned char *iv, unsigned char *pt,
72 |                     size_t pt_size, unsigned char *ct);
73 | 
74 | int ctr_decrypt_gpu(std::string algo, const unsigned char *key, const unsigned char *iv, unsigned char *ct,
75 |                     size_t ct_size, unsigned char *pt);
76 | #endif


--------------------------------------------------------------------------------
/veturboio/ops/csrc/include/io_helper.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | #ifndef IO_HELPER_H
17 | #define IO_HELPER_H
18 | 
19 | #include "posix.h"
20 | #include "sfcs.h"
21 | 
22 | class IOHelper
23 | {
24 |   private:
25 |     char *pin_mem = NULL;
26 |     bool use_pinmem_ = false;
27 |     size_t buffer_size_ = 0;
28 | 
29 |   public:
30 |     ~IOHelper();
31 |     void load_file_to_tensor(std::string file_path, torch::Tensor res_tensor, size_t length, int64_t offset,
32 |                              int64_t device_id, int64_t num_thread, bool use_pinmem, bool use_sfcs_sdk,
33 |                              bool use_direct_io, bool use_cipher, pybind11::array_t<char> key_arr,
34 |                              pybind11::array_t<char> iv_arr, int64_t header_size);
35 |     void save_tensor_to_file(torch::Tensor tensor, std::string file_path, size_t length, bool use_pinmem,
36 |                              bool use_sfcs_sdk, bool use_cipher, pybind11::array_t<char> key_arr,
37 |                              pybind11::array_t<char> iv_arr, int64_t header_size);
38 |     void save_tensor_to_file_cpu(torch::Tensor tensor, std::string file_path, size_t length, bool use_pinmem,
39 |                                  bool use_sfcs_sdk, bool use_cipher, pybind11::array_t<char> key_arr,
40 |                                  pybind11::array_t<char> iv_arr, int64_t header_size);
41 |     void init_buffer(string file_path, int64_t file_size, bool use_pinmem, bool use_sfcs_sdk);
42 |     void free_buffer();
43 | };
44 | 
45 | size_t get_file_size(const char *file_name, bool use_sfcs_sdk);
46 | 
47 | void read_file(string file_path, char *addr, int device_id, char *dev_mem, int num_thread, size_t total_size,
48 |                size_t global_offset, bool use_sfcs_sdk, bool use_direct_io, CipherInfo cipher_info);
49 | 
50 | void load_file_to_tensor_cpu(std::string file_path, torch::Tensor res_tensor, size_t length, int64_t offset,
51 |                              int64_t device_id, int64_t num_thread, bool use_pinmem, bool use_sfcs_sdk,
52 |                              bool use_direct_io, bool use_cipher, pybind11::array_t<char> key_arr,
53 |                              pybind11::array_t<char> iv_arr, int64_t header_size);
54 | 
55 | #endif
56 | 


--------------------------------------------------------------------------------
/veturboio/ops/csrc/include/logging.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | #ifndef LOGGER_H
17 | #define LOGGER_H
18 | 
19 | #include <iostream>
20 | using namespace std;
21 | 
22 | #define PR std::cout
23 | #define ENDL std::endl
24 | #define FILE_INFO "[" << __FUNCTION__ << " at " << __FILE__ << ":" << __LINE__ << "] "
25 | 
26 | #define ARG_COUNT_PRIVATE(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, N, ...) N
27 | #define ARG_COUNT(...) ARG_COUNT_PRIVATE(0, __VA_ARGS__, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
28 | 
29 | #define FUN_COUNT_GLUE(M, count) M##count
30 | #define FUN_JOIN_COUNT(M, count) FUN_COUNT_GLUE(M, count)
31 | #define FUN_JOIN_ARGS(x, y) x y
32 | #define CallSomeOne(fn, ...) FUN_JOIN_ARGS(FUN_JOIN_COUNT(fn, ARG_COUNT(__VA_ARGS__)), (__VA_ARGS__))
33 | 
34 | #define param1(a) a
35 | #define param2(a, b) a << ", " #b ":" << b
36 | #define param3(a, b, c) a << ", " #b ":" << b << ", " #c ":" << c
37 | #define param4(a, b, c, d) a << ", " #b ":" << b << ", " #c ":" << c << ", " #d ":" << d
38 | #define param5(a, b, c, d, e) a << ", " #b ":" << b << ", " #c ":" << c << ", " #d ":" << d << ", " #e ":" << e
39 | 
40 | #define pr1(...) param1(__VA_ARGS__)
41 | #define pr2(...) param2(__VA_ARGS__)
42 | #define pr3(...) param3(__VA_ARGS__)
43 | #define pr4(...) param4(__VA_ARGS__)
44 | #define pr5(...) param5(__VA_ARGS__)
45 | 
46 | #define logDebug(...) PR << "VETURBOIO_CPP_DEBUG " << FILE_INFO << CallSomeOne(pr, __VA_ARGS__) << ENDL
47 | #define logInfo(...) PR << "VETURBOIO_CPP_INFO " << FILE_INFO << CallSomeOne(pr, __VA_ARGS__) << ENDL
48 | #define logWarn(...) PR << "VETURBOIO_CPP_WARN " << FILE_INFO << CallSomeOne(pr, __VA_ARGS__) << ENDL
49 | #define logError(...) PR << "VETURBOIO_CPP_ERROR " << FILE_INFO << CallSomeOne(pr, __VA_ARGS__) << ENDL
50 | #endif // LOGGER_H


--------------------------------------------------------------------------------
/veturboio/ops/csrc/include/posix.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | #ifndef LOAD_UTILS_H
17 | #define LOAD_UTILS_H
18 | 
19 | #include "common.h"
20 | #include "cipher.h"
21 | 
22 | class POSIXFile
23 | {
24 |   public:
25 |     std::string file_path;
26 |     // cipher related
27 |     CipherInfo cipher_info;
28 | 
29 |     POSIXFile(std::string file_path);
30 |     POSIXFile(std::string file_path, CipherInfo cipher_info);
31 |     POSIXFile(std::string file_path, bool use_cipher, pybind11::array_t<char> key_arr, pybind11::array_t<char> iv_arr,
32 |               size_t header_size);
33 | 
34 |     size_t read_file_to_address_parallel(char *addr, int device_id, char *dev_mem, int num_thread, size_t total_size,
35 |                                          size_t global_offset, bool use_direct_io);
36 |     size_t read_file_to_array(pybind11::array_t<char> arr, size_t length, size_t offset, int num_thread,
37 |                               bool use_direct_io);
38 |     size_t write_file_from_addr(char *addr, size_t length, bool append);
39 | 
40 |   private:
41 |     void read_file_to_address_thread(int thread_id, char *addr, int device_id, char *dev_mem, size_t block_size,
42 |                                      size_t total_size, size_t global_offset, bool use_direct_io,
43 |                                      CipherInfo cipher_info);
44 | };
45 | 
46 | #endif


--------------------------------------------------------------------------------
/veturboio/ops/csrc/include/sfcs.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #ifndef SFCS_H
18 | #define SFCS_H
19 | 
20 | #include <pybind11/pybind11.h>
21 | #include <pybind11/numpy.h>
22 | #include "common.h"
23 | #include "cfs.h"
24 | #include "logging.h"
25 | #include "cipher.h"
26 | 
27 | #define SFCS_NAME_NODE "default"
28 | #define SFCS_USER_NAME "demo-user"
29 | 
30 | using namespace std;
31 | 
32 | class SFCSFs
33 | {
34 |   public:
35 |     cfsFS fs;
36 | 
37 |     SFCSFs();
38 |     ~SFCSFs();
39 |     void concat_files(std::string file_name, vector<const char *> file_paths);
40 |     void rename_file(const char *file_path, const char *file_name);
41 |     void mkdir(std::string file_path);
42 |     int64_t get_block_size();
43 |     size_t read_file_to_addr(std::string file_name, CipherInfo cipher_info, char *addr, size_t length, size_t offset);
44 |     size_t write_file_from_addr(std::string file_name, CipherInfo cipher_info, char *addr, size_t length,
45 |                                 size_t offset);
46 |     void read_multi_files(pybind11::list file_paths, pybind11::list tensors, pybind11::list lengths,
47 |                           pybind11::list offsets, int num_thread, bool use_cipher, pybind11::array_t<char> key_arr,
48 |                           pybind11::array_t<char> iv_arr, size_t header_size);
49 |     void write_multi_files(pybind11::list file_paths, pybind11::list tensors, pybind11::list lengths,
50 |                            pybind11::list offsets, int num_thread, bool use_cipher, pybind11::array_t<char> key_arr,
51 |                            pybind11::array_t<char> iv_arr, size_t header_size);
52 |     void get_file_size(std::string file_name, size_t *size);
53 |     void get_multi_file_size(pybind11::list file_paths, pybind11::list sizes, int num_thread);
54 | };
55 | 
56 | class SFCSFile
57 | {
58 |   public:
59 |     cfsFS fs;
60 |     bool fs_owner;
61 |     SFCSFs *sfcs_fs;
62 |     std::string file_path;
63 |     // cipher related
64 |     CipherInfo cipher_info;
65 | 
66 |     SFCSFile(std::string file_path);
67 |     SFCSFile(std::string path, SFCSFs *sfcs_fs);
68 |     SFCSFile(std::string file_path, bool use_cipher, pybind11::array_t<char> key_arr, pybind11::array_t<char> iv_arr,
69 |              size_t header_size);
70 |     SFCSFile(std::string file_path, CipherInfo cipher_info);
71 |     SFCSFile(std::string file_path, SFCSFs *sfcs_fs, CipherInfo cipher_info);
72 |     ~SFCSFile();
73 |     size_t get_file_size();
74 |     size_t read_file_to_address_parallel(char *addr, int device_id, char *dev_mem, int num_thread, size_t total_size,
75 |                                          size_t global_offset);
76 |     size_t read_file_to_addr(char *addr, size_t length, size_t offset);
77 |     size_t read_file_to_array(pybind11::array_t<char> arr, size_t length, size_t offset, int num_thread);
78 |     size_t write_file_from_array(pybind11::array_t<char> arr, size_t length, bool append);
79 |     size_t write_file_from_tensors(pybind11::list tensors, pybind11::list sizes, pybind11::list offsets,
80 |                                    std::string concat_dir, std::string concat_file);
81 |     size_t write_file_from_addr(char *addr, size_t length, size_t offset, bool append);
82 |     void delete_file();
83 | 
84 |   private:
85 |     void read_file_to_address_thread(int thread_id, char *addr, int device_id, char *dev_mem, size_t block_size,
86 |                                      size_t total_size, size_t global_offset);
87 |     void write_file_from_tensor(torch::Tensor tensor, size_t length, size_t offset, std::string file_name);
88 | };
89 | 
90 | #endif


--------------------------------------------------------------------------------
/veturboio/ops/csrc/io_helper.cu:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | #include "include/io_helper.h"
 17 | #include "include/cipher.h"
 18 | #include "include/fastcrypto.h"
 19 | 
 20 | IOHelper::~IOHelper()
 21 | {
 22 |     free_buffer();
 23 | }
 24 | 
 25 | // init buffer with given positive size or the size of the file in specified
 26 | // path
 27 | void IOHelper::init_buffer(string file_path, int64_t buffer_size, bool use_pinmem, bool use_sfcs_sdk)
 28 | {
 29 |     if (buffer_size <= 0)
 30 |     {
 31 |         buffer_size = get_file_size(file_path.c_str(), use_sfcs_sdk);
 32 |     }
 33 | 
 34 |     if (buffer_size_ > 0)
 35 |     {
 36 |         free_buffer();
 37 |     }
 38 | 
 39 |     buffer_size_ = buffer_size;
 40 |     if (use_pinmem)
 41 |     {
 42 |         use_pinmem_ = true;
 43 |         cudaMallocHost(&pin_mem, buffer_size, cudaHostAllocMapped);
 44 |     }
 45 |     else
 46 |     {
 47 |         pin_mem = (char *)mmap(NULL, buffer_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
 48 |         madvise(pin_mem, buffer_size, MADV_HUGEPAGE);
 49 |     }
 50 | }
 51 | 
 52 | void IOHelper::free_buffer()
 53 | {
 54 |     if (pin_mem != NULL)
 55 |     {
 56 |         if (use_pinmem_)
 57 |             cudaFreeHost(pin_mem);
 58 |         else
 59 |             munmap(pin_mem, buffer_size_);
 60 |     }
 61 | }
 62 | 
 63 | void read_unaligned_part_gpu(std::string file_path, torch::Tensor res_tensor, int64_t *offset, int64_t device_id,
 64 |                              size_t *total_size, bool use_sfcs_sdk, bool use_direct_io, size_t *read_unaligned_size,
 65 |                              CipherInfo cipher_info)
 66 | {
 67 |     // cpu align only read head part, while gpu align read both head and tail part
 68 |     if (device_id < 0)
 69 |     {
 70 |         throw std::runtime_error("read_unaligned_part_gpu only support gpu device");
 71 |     }
 72 |     size_t end_offset = *offset + *total_size;
 73 |     // both head and tail are aligned
 74 |     if ((*offset & (BUF_ALIGN_SIZE - 1)) == 0 && ((end_offset) & (BUF_ALIGN_SIZE - 1)) == 0)
 75 |     {
 76 |         return;
 77 |     }
 78 |     char tmp_buf_head[BUF_ALIGN_SIZE] = {};
 79 |     char tmp_buf_tail[BUF_ALIGN_SIZE] = {};
 80 |     // read head unaligned
 81 |     cudaSetDevice(device_id);
 82 |     if ((*offset & (BUF_ALIGN_SIZE - 1)) != 0)
 83 |     {
 84 |         size_t read_head_size = min(BUF_ALIGN_SIZE - (*offset & (BUF_ALIGN_SIZE - 1)), *total_size);
 85 |         read_file(file_path, tmp_buf_head, device_id, (char *)res_tensor.data_ptr(), 1, read_head_size, *offset,
 86 |                   use_sfcs_sdk, use_direct_io, cipher_info);
 87 |         *read_unaligned_size = read_head_size;
 88 |         *offset += read_head_size;
 89 |         *total_size -= read_head_size;
 90 |     }
 91 |     // read tail unaligned
 92 |     if (*total_size > 0 && (end_offset & (BUF_ALIGN_SIZE - 1)) != 0)
 93 |     {
 94 |         size_t tail_offset = end_offset - (end_offset & (BUF_ALIGN_SIZE - 1));
 95 |         size_t tensor_offset = tail_offset - *offset + *read_unaligned_size;
 96 |         read_file(file_path, tmp_buf_tail, device_id, (char *)res_tensor.data_ptr() + tensor_offset, 1,
 97 |                   end_offset - tail_offset, tail_offset, use_sfcs_sdk, use_direct_io, cipher_info);
 98 |         *total_size -= end_offset - tail_offset;
 99 |     }
100 |     cudaDeviceSynchronize();
101 | }
102 | 
103 | void IOHelper::load_file_to_tensor(std::string file_path, torch::Tensor res_tensor, size_t length, int64_t offset,
104 |                                    int64_t device_id, int64_t num_thread, bool use_pinmem, bool use_sfcs_sdk,
105 |                                    bool use_direct_io, bool use_cipher, pybind11::array_t<char> key_arr,
106 |                                    pybind11::array_t<char> iv_arr, int64_t header_size)
107 | {
108 |     size_t file_size = get_file_size(file_path.c_str(), use_sfcs_sdk);
109 |     size_t read_unaligned_size = 0;
110 |     size_t total_size = length > 0 ? length : file_size - offset;
111 |     // set cipher
112 |     CipherInfo cipher_info(use_cipher, key_arr, iv_arr, header_size);
113 |     if (device_id < 0)
114 |     {
115 |         read_file(file_path, (char *)res_tensor.data_ptr() + read_unaligned_size, device_id, NULL, num_thread,
116 |                   total_size, offset, use_sfcs_sdk, use_direct_io, cipher_info);
117 |     }
118 |     else
119 |     {
120 |         // read unaligned part first, since GPU can only decrypt data in integral multiple of 16 Bytes
121 |         read_unaligned_part_gpu(file_path, res_tensor, &offset, device_id, &total_size, use_sfcs_sdk, use_direct_io,
122 |                                 &read_unaligned_size, cipher_info);
123 | 
124 |         // change use_pinmem attribute may introduce ambiguity
125 |         if (buffer_size_ > 0 && use_pinmem != use_pinmem_)
126 |         {
127 |             throw std::runtime_error("use_pinmem attribute of an exising IOHelper should not be changed");
128 |         }
129 | 
130 |         // TODO: HPA might be slow
131 |         // only use pin_mem as buffer for copying data to device memory
132 |         // the lifecycle of the pin_mem is the same as helper
133 |         if (pin_mem == NULL || total_size > buffer_size_)
134 |         {
135 |             init_buffer(file_path, total_size, use_pinmem, use_sfcs_sdk);
136 |         }
137 |         cudaSetDevice(device_id);
138 |         read_file(file_path, pin_mem, device_id, (char *)res_tensor.data_ptr() + read_unaligned_size, num_thread,
139 |                   total_size, offset, use_sfcs_sdk, use_direct_io, CipherInfo());
140 |         cudaDeviceSynchronize();
141 |         // decrypt with gpu
142 |         if (cipher_info.use_cipher && total_size > 0)
143 |         {
144 |             if (offset % AES_BLOCK_SIZE != 0 || total_size % AES_BLOCK_SIZE != 0)
145 |             {
146 |                 throw std::runtime_error("cannot decrypt because gpu read is not aligned");
147 |             }
148 |             unsigned char iv[AES_BLOCK_SIZE];
149 |             for (size_t i = 0; i < AES_BLOCK_SIZE; i++)
150 |             {
151 |                 iv[i] = cipher_info.iv[i];
152 |             }
153 |             counter_inc_by(iv, AES_BLOCK_SIZE, (offset - cipher_info.header_size) / AES_BLOCK_SIZE);
154 |             unsigned char *iv_gpu = NULL;
155 |             cudaMalloc((void **)&iv_gpu, AES_BLOCK_SIZE);
156 |             if (iv_gpu == NULL)
157 |             {
158 |                 throw std::runtime_error("iv_gpu cannot be allocated");
159 |             }
160 |             cudaMemcpy(iv_gpu, iv, AES_BLOCK_SIZE, cudaMemcpyHostToDevice);
161 |             unsigned char *ct = reinterpret_cast<unsigned char *>(res_tensor.data_ptr()) + read_unaligned_size;
162 |             int cipher_ret = ctr_decrypt_gpu(cipher_info.mode, cipher_info.key, iv_gpu, ct, total_size, ct);
163 |             if (!cipher_ret)
164 |             {
165 |                 throw std::runtime_error("Cipher Exception: gpu decrypt fail");
166 |             }
167 |             cudaDeviceSynchronize();
168 |             cudaFree(iv_gpu);
169 |         }
170 |     }
171 | }
172 | 
173 | void IOHelper::save_tensor_to_file(torch::Tensor tensor, std::string file_path, size_t length, bool use_pinmem,
174 |                                    bool use_sfcs_sdk, bool use_cipher, pybind11::array_t<char> key_arr,
175 |                                    pybind11::array_t<char> iv_arr, int64_t header_size)
176 | {
177 |     char *buf;
178 | 
179 |     CipherInfo cipher_info(use_cipher, key_arr, iv_arr, header_size);
180 |     if (tensor.device().is_cuda() || use_cipher)
181 |     {
182 |         // change use_pinmem attribute may introduce ambiguity
183 |         if (buffer_size_ > 0 && use_pinmem != use_pinmem_)
184 |         {
185 |             throw std::runtime_error("use_pinmem attribute of an exising IOHelper should not be changed");
186 |         }
187 | 
188 |         if (pin_mem == NULL || length > buffer_size_)
189 |         {
190 |             init_buffer(file_path, length, use_pinmem, use_sfcs_sdk);
191 |         }
192 | 
193 |         buf = pin_mem;
194 |         if (tensor.device().is_cuda())
195 |         {
196 |             cudaSetDevice(tensor.device().index());
197 |             cudaMemcpyAsync(buf, (char *)tensor.data_ptr(), length, cudaMemcpyDeviceToHost);
198 |             cudaDeviceSynchronize();
199 |         }
200 |         else
201 |         {
202 |             memcpy(buf, (char *)tensor.data_ptr(), length);
203 |         }
204 |     }
205 |     else
206 |     {
207 |         buf = (char *)tensor.data_ptr();
208 |     }
209 | 
210 |     if (use_sfcs_sdk)
211 |     {
212 |         SFCSFile sfcs_file(file_path, cipher_info);
213 |         sfcs_file.write_file_from_addr(buf, length, 0, true);
214 |     }
215 |     else
216 |     {
217 |         POSIXFile posix_file(file_path, cipher_info);
218 |         posix_file.write_file_from_addr(buf, length, true);
219 |     }
220 | }
221 | 


--------------------------------------------------------------------------------
/veturboio/ops/csrc/io_helper_cpu.cpp:
--------------------------------------------------------------------------------
 1 | #include "include/io_helper.h"
 2 | #include "include/cipher.h"
 3 | 
 4 | IOHelper::~IOHelper()
 5 | {
 6 | }
 7 | 
 8 | // init buffer with given positive size or the size of the file in specified
 9 | // path
10 | void IOHelper::init_buffer(string file_path, int64_t buffer_size, bool use_pinmem, bool use_sfcs_sdk)
11 | {
12 | }
13 | 
14 | void IOHelper::free_buffer()
15 | {
16 | }
17 | 
18 | void IOHelper::load_file_to_tensor(std::string file_path, torch::Tensor res_tensor, size_t length, int64_t offset,
19 |                                    int64_t device_id, int64_t num_thread, bool use_pinmem, bool use_sfcs_sdk,
20 |                                    bool use_direct_io, bool use_cipher, pybind11::array_t<char> key_arr,
21 |                                    pybind11::array_t<char> iv_arr, int64_t header_size)
22 | {
23 |     load_file_to_tensor_cpu(file_path, res_tensor, length, offset, device_id, num_thread, use_pinmem, use_sfcs_sdk,
24 |                             use_direct_io, use_cipher, key_arr, iv_arr, header_size);
25 | }
26 | 
27 | void IOHelper::save_tensor_to_file(torch::Tensor tensor, std::string file_path, size_t length, bool use_pinmem,
28 |                                    bool use_sfcs_sdk, bool use_cipher, pybind11::array_t<char> key_arr,
29 |                                    pybind11::array_t<char> iv_arr, int64_t header_size)
30 | {
31 |     save_tensor_to_file_cpu(tensor, file_path, length, use_pinmem, use_sfcs_sdk, use_cipher, key_arr, iv_arr,
32 |                             header_size);
33 | }
34 | 


--------------------------------------------------------------------------------
/veturboio/ops/csrc/io_helper_cpu_common.cpp:
--------------------------------------------------------------------------------
 1 | #include "include/io_helper.h"
 2 | #include "include/cipher.h"
 3 | 
 4 | size_t get_file_size(const char *file_name, bool use_sfcs_sdk)
 5 | {
 6 |     if (use_sfcs_sdk)
 7 |     {
 8 |         SFCSFile sfcs_file(file_name);
 9 |         return sfcs_file.get_file_size();
10 |     }
11 |     else
12 |     {
13 |         struct stat st;
14 |         stat(file_name, &st);
15 |         return st.st_size;
16 |     }
17 | }
18 | 
19 | void read_file(string file_path, char *addr, int device_id, char *dev_mem, int num_thread, size_t total_size,
20 |                size_t global_offset, bool use_sfcs_sdk, bool use_direct_io, CipherInfo cipher_info)
21 | {
22 |     if (total_size == 0)
23 |     {
24 |         return;
25 |     }
26 | 
27 |     if (use_sfcs_sdk)
28 |     {
29 |         SFCSFile sfcs_file(file_path, cipher_info);
30 |         sfcs_file.read_file_to_address_parallel(addr, device_id, dev_mem, num_thread, total_size, global_offset);
31 |     }
32 |     else
33 |     {
34 |         POSIXFile posix_file(file_path, cipher_info);
35 |         posix_file.read_file_to_address_parallel(addr, device_id, dev_mem, num_thread, total_size, global_offset,
36 |                                                  use_direct_io);
37 |     }
38 | }
39 | 
40 | void load_file_to_tensor_cpu(std::string file_path, torch::Tensor res_tensor, size_t length, int64_t offset,
41 |                              int64_t device_id, int64_t num_thread, bool use_pinmem, bool use_sfcs_sdk,
42 |                              bool use_direct_io, bool use_cipher, pybind11::array_t<char> key_arr,
43 |                              pybind11::array_t<char> iv_arr, int64_t header_size)
44 | {
45 |     size_t file_size = get_file_size(file_path.c_str(), use_sfcs_sdk);
46 |     size_t read_unaligned_size = 0;
47 |     size_t total_size = length > 0 ? length : file_size - offset;
48 |     // set cipher
49 |     CipherInfo cipher_info(use_cipher, key_arr, iv_arr, header_size);
50 |     if (device_id < 0)
51 |     {
52 |         read_file(file_path, (char *)res_tensor.data_ptr() + read_unaligned_size, device_id, NULL, num_thread,
53 |                   total_size, offset, use_sfcs_sdk, use_direct_io, cipher_info);
54 |     }
55 | }
56 | 
57 | void IOHelper::save_tensor_to_file_cpu(torch::Tensor tensor, std::string file_path, size_t length, bool use_pinmem,
58 |                                        bool use_sfcs_sdk, bool use_cipher, pybind11::array_t<char> key_arr,
59 |                                        pybind11::array_t<char> iv_arr, int64_t header_size)
60 | {
61 |     char *buf;
62 | 
63 |     CipherInfo cipher_info(use_cipher, key_arr, iv_arr, header_size);
64 |     if (use_cipher)
65 |     {
66 |         // change use_pinmem attribute may introduce ambiguity
67 |         if (buffer_size_ > 0 && use_pinmem != use_pinmem_)
68 |         {
69 |             throw std::runtime_error("use_pinmem attribute of an exising IOHelper should not be changed");
70 |         }
71 | 
72 |         if (pin_mem == NULL || length > buffer_size_)
73 |         {
74 |             init_buffer(file_path, length, use_pinmem, use_sfcs_sdk);
75 |         }
76 | 
77 |         buf = pin_mem;
78 |         memcpy(buf, (char *)tensor.data_ptr(), length);
79 |     }
80 |     else
81 |     {
82 |         buf = (char *)tensor.data_ptr();
83 |     }
84 | 
85 |     if (use_sfcs_sdk)
86 |     {
87 |         SFCSFile sfcs_file(file_path, cipher_info);
88 |         sfcs_file.write_file_from_addr(buf, length, 0, true);
89 |     }
90 |     else
91 |     {
92 |         POSIXFile posix_file(file_path, cipher_info);
93 |         posix_file.write_file_from_addr(buf, length, true);
94 |     }
95 | }
96 | 


--------------------------------------------------------------------------------
/veturboio/ops/csrc/io_helper_npu.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | #include "include/io_helper.h"
17 | #include "include/cipher.h"
18 | 
19 | IOHelper::~IOHelper()
20 | {
21 | }
22 | 
23 | // init buffer with given positive size or the size of the file in specified
24 | // path
25 | void IOHelper::init_buffer(string file_path, int64_t buffer_size, bool use_pinmem, bool use_sfcs_sdk)
26 | {
27 | }
28 | 
29 | void IOHelper::free_buffer()
30 | {
31 | }
32 | 
33 | void IOHelper::load_file_to_tensor(std::string file_path, torch::Tensor res_tensor, size_t length, int64_t offset,
34 |                                    int64_t device_id, int64_t num_thread, bool use_pinmem, bool use_sfcs_sdk,
35 |                                    bool use_direct_io, bool use_cipher, pybind11::array_t<char> key_arr,
36 |                                    pybind11::array_t<char> iv_arr, int64_t header_size)
37 | {
38 |     load_file_to_tensor_cpu(file_path, res_tensor, length, offset, device_id, num_thread, use_pinmem, use_sfcs_sdk,
39 |                             use_direct_io, use_cipher, key_arr, iv_arr, header_size);
40 | }
41 | 
42 | void IOHelper::save_tensor_to_file(torch::Tensor tensor, std::string file_path, size_t length, bool use_pinmem,
43 |                                    bool use_sfcs_sdk, bool use_cipher, pybind11::array_t<char> key_arr,
44 |                                    pybind11::array_t<char> iv_arr, int64_t header_size)
45 | {
46 |     save_tensor_to_file_cpu(tensor, file_path, length, use_pinmem, use_sfcs_sdk, use_cipher, key_arr, iv_arr,
47 |                             header_size);
48 | }
49 | 


--------------------------------------------------------------------------------
/veturboio/ops/csrc/lib/libfastcrypto_gpu.so.0.3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/volcengine/veTurboIO/0ed05516013da4332433784660660ca4d1904505/veturboio/ops/csrc/lib/libfastcrypto_gpu.so.0.3


--------------------------------------------------------------------------------
/veturboio/ops/csrc/posix.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | #include "include/posix.h"
 17 | #include "include/logging.h"
 18 | #include "include/cipher.h"
 19 | #include "include/fastcrypto.h"
 20 | #include <errno.h>
 21 | 
 22 | POSIXFile::POSIXFile(std::string file_path)
 23 | {
 24 |     this->file_path = file_path;
 25 | }
 26 | 
 27 | POSIXFile::POSIXFile(std::string file_path, CipherInfo cipher_info)
 28 | {
 29 |     this->file_path = file_path;
 30 |     this->cipher_info = cipher_info;
 31 | }
 32 | 
 33 | POSIXFile::POSIXFile(std::string file_path, bool use_cipher, pybind11::array_t<char> key_arr,
 34 |                      pybind11::array_t<char> iv_arr, size_t header_size)
 35 |     : POSIXFile(file_path)
 36 | {
 37 |     this->cipher_info = CipherInfo(use_cipher, key_arr, iv_arr, header_size);
 38 | }
 39 | 
 40 | void POSIXFile::read_file_to_address_thread(int thread_id, char *addr, int device_id, char *dev_mem, size_t block_size,
 41 |                                             size_t total_size, size_t global_offset, bool use_direct_io,
 42 |                                             CipherInfo cipher_info)
 43 | {
 44 |     size_t offset = thread_id * block_size;
 45 |     size_t read_size = block_size;
 46 |     int fd = -1;
 47 |     int ret = 0;
 48 |     size_t size_read = 0;
 49 | 
 50 |     if (offset + read_size >= total_size)
 51 |     {
 52 |         read_size = (total_size > offset) ? total_size - offset : 0;
 53 |     }
 54 |     // TODO: use_direct_io if sfcs file detected
 55 |     if (use_direct_io)
 56 |     {
 57 |         if ((fd = open(file_path.c_str(), O_RDONLY | O_DIRECT)) < 0)
 58 |         {
 59 |             if (errno == EINVAL)
 60 |             {
 61 |                 logWarn("open file using directIO failed, fall back to bufferIO", file_path.c_str(),
 62 |                         std::strerror(EINVAL));
 63 |             }
 64 |             else
 65 |             {
 66 |                 logError("open file using directIO failed", file_path.c_str(), std::strerror(errno));
 67 |                 throw std::runtime_error("veTurboIO Exception: can't apply open operation");
 68 |             }
 69 |         }
 70 |     }
 71 | 
 72 |     if (fd == -1)
 73 |     {
 74 |         if ((fd = open(file_path.c_str(), O_RDONLY)) < 0)
 75 |         {
 76 |             logError("open file using bufferIO failed", file_path.c_str(), std::strerror(errno));
 77 |             throw std::runtime_error("veTurboIO Exception: can't apply open operation");
 78 |         }
 79 |     }
 80 | 
 81 |     FILE *fp = fdopen(fd, "rb");
 82 |     if (fp == NULL)
 83 |     {
 84 |         logError("can't apply fdopen to file", file_path.c_str(), std::strerror(errno));
 85 |         throw std::runtime_error("veTurboIO Exception: can't apply fdopen operation");
 86 |     }
 87 | 
 88 |     if ((ret = fseek(fp, global_offset + offset, SEEK_SET)) < 0)
 89 |     {
 90 |         logError("can't apply fseek to file", file_path.c_str(), std::strerror(errno));
 91 |         throw std::runtime_error("veTurboIO Exception: can't apply fseek operation");
 92 |     }
 93 | 
 94 |     if ((size_read = fread(addr + offset, 1, read_size, fp)) == 0)
 95 |     {
 96 |         logWarn("read file with 0 bytes returned", file_path.c_str(), offset, read_size);
 97 |     }
 98 | 
 99 |     if ((ret = fclose(fp)) < 0)
100 |     {
101 |         logError("can't apply fclose to file", file_path.c_str(), std::strerror(errno));
102 |         throw std::runtime_error("veTurboIO Exception: can't apply fclose operation");
103 |     }
104 | 
105 |     // Decrypt if use_cipher is true
106 |     if (cipher_info.use_cipher)
107 |     {
108 |         CtrDecrypter dec(cipher_info.mode, cipher_info.key, cipher_info.iv,
109 |                          global_offset + offset - cipher_info.header_size);
110 |         unsigned char *ct = reinterpret_cast<unsigned char *>(addr + offset);
111 |         int cipher_ret = dec.decrypt_update(ct, read_size, ct);
112 |         if (!cipher_ret)
113 |         {
114 |             throw std::runtime_error("Cipher Exception: decrypt fail");
115 |         }
116 |     }
117 | 
118 | #if defined(USE_CUDA)
119 |     if (dev_mem != NULL && device_id >= 0)
120 |     {
121 |         cudaSetDevice(device_id);
122 |         cudaMemcpyAsync(dev_mem + offset, addr + offset, read_size, cudaMemcpyHostToDevice);
123 |     }
124 | #elif defined(USE_NPU)
125 | #else
126 | #endif
127 | }
128 | 
129 | size_t POSIXFile::read_file_to_address_parallel(char *addr, int device_id, char *dev_mem, int num_thread,
130 |                                                 size_t total_size, size_t global_offset, bool use_direct_io)
131 | {
132 |     vector<thread> threads(num_thread);
133 | 
134 |     size_t block_size = (size_t)ceil((double)total_size / num_thread);
135 |     // align the block_size;
136 |     block_size = (block_size + BUF_ALIGN_SIZE - 1) / BUF_ALIGN_SIZE * BUF_ALIGN_SIZE;
137 |     // re-caculate the real needed thread num;
138 |     num_thread = (total_size + block_size - 1) / block_size;
139 | 
140 |     for (int thread_id = 0; thread_id < num_thread; thread_id++)
141 |     {
142 |         threads[thread_id] = std::thread(&POSIXFile::read_file_to_address_thread, this, thread_id, addr, device_id,
143 |                                          dev_mem, block_size, total_size, global_offset, use_direct_io, cipher_info);
144 |     }
145 | 
146 |     for (int thread_id = 0; thread_id < num_thread; thread_id++)
147 |     {
148 |         threads[thread_id].join();
149 |     }
150 | 
151 |     return total_size;
152 | }
153 | 
154 | size_t POSIXFile::read_file_to_array(pybind11::array_t<char> arr, size_t length, size_t offset, int num_thread,
155 |                                      bool use_direct_io)
156 | {
157 |     pybind11::buffer_info buf_info = arr.request();
158 |     char *addr = static_cast<char *>(buf_info.ptr);
159 |     madvise(addr, length, MADV_HUGEPAGE);
160 |     return read_file_to_address_parallel(addr, -1, NULL, num_thread, length, offset, use_direct_io);
161 | }
162 | 
163 | size_t POSIXFile::write_file_from_addr(char *addr, size_t length, bool append)
164 | {
165 |     int fd;
166 |     int flags = O_WRONLY;
167 |     size_t ret;
168 |     size_t count;
169 |     char *src = addr;
170 |     size_t offset = 0;
171 | 
172 |     if (append)
173 |     {
174 |         struct stat st;
175 |         stat(file_path.c_str(), &st);
176 |         offset = st.st_size;
177 |         flags |= O_APPEND;
178 |     }
179 | 
180 |     if (cipher_info.use_cipher)
181 |     {
182 |         size_t h_off = cipher_info.header_size;
183 |         CtrEncrypter enc(cipher_info.mode, cipher_info.key, cipher_info.iv, offset - h_off);
184 |         unsigned char *pt = reinterpret_cast<unsigned char *>(addr);
185 |         int cipher_ret = enc.encrypt_update(pt, length, pt);
186 |         if (!cipher_ret)
187 |         {
188 |             throw std::runtime_error("Cipher Exception: encrypt fail");
189 |         }
190 |     }
191 | 
192 |     fd = open(file_path.c_str(), flags);
193 |     if (fd < 0)
194 |     {
195 |         logError("open failed", file_path.c_str(), std::strerror(errno));
196 |         throw std::runtime_error("veTurboIO Exception: open failed");
197 |     }
198 | 
199 |     count = length;
200 |     while (count > 0)
201 |     {
202 |         ret = write(fd, src, count);
203 |         if (ret < 0)
204 |         {
205 |             logError("Failed to write file", file_path.c_str());
206 |             throw std::runtime_error("veTurboIO Exception: write file");
207 |         }
208 |         count -= ret;
209 |         src += ret;
210 |     }
211 |     close(fd);
212 |     return length;
213 | }
214 | 


--------------------------------------------------------------------------------
/veturboio/ops/csrc/pybind.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | #include "include/io_helper.h"
17 | #include "include/sfcs.h"
18 | #include "include/cipher.h"
19 | 
20 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m)
21 | {
22 |     py::class_<IOHelper>(m, "IOHelper")
23 |         .def(py::init<>())
24 |         .def("load_file_to_tensor", &IOHelper::load_file_to_tensor)
25 |         .def("save_tensor_to_file", &IOHelper::save_tensor_to_file);
26 | 
27 |     py::class_<POSIXFile>(m, "POSIXFile")
28 |         .def(py::init<std::string>())
29 |         .def(py::init<std::string, bool, pybind11::array_t<char>, pybind11::array_t<char>, size_t>())
30 |         .def("read_file_to_array", &POSIXFile::read_file_to_array);
31 | 
32 |     py::class_<SFCSFs>(m, "SFCSFs")
33 |         .def(py::init<>())
34 |         .def("mkdir", &SFCSFs::mkdir)
35 |         .def("read_multi_files", &SFCSFs::read_multi_files)
36 |         .def("write_multi_files", &SFCSFs::write_multi_files)
37 |         .def("get_multi_file_size", &SFCSFs::get_multi_file_size);
38 | 
39 |     py::class_<SFCSFile>(m, "SFCSFile")
40 |         .def(py::init<std::string>())
41 |         .def(py::init<std::string, bool, pybind11::array_t<char>, pybind11::array_t<char>, size_t>())
42 |         .def("get_file_size", &SFCSFile::get_file_size)
43 |         .def("read_file_to_array", &SFCSFile::read_file_to_array)
44 |         .def("write_file_from_array", &SFCSFile::write_file_from_array)
45 |         .def("write_file_from_tensors", &SFCSFile::write_file_from_tensors)
46 |         .def("delete_file", &SFCSFile::delete_file);
47 | 
48 |     py::class_<CtrEncWrap>(m, "CtrEncWrap")
49 |         .def(py::init<std::string, pybind11::array_t<unsigned char>, pybind11::array_t<unsigned char>, size_t>())
50 |         .def("encrypt_update", &CtrEncWrap::encrypt_update);
51 | 
52 |     py::class_<CtrDecWrap>(m, "CtrDecWrap")
53 |         .def(py::init<std::string, pybind11::array_t<unsigned char>, pybind11::array_t<unsigned char>, size_t>())
54 |         .def("decrypt_update", &CtrDecWrap::decrypt_update);
55 | }
56 | 


--------------------------------------------------------------------------------
/veturboio/ops/io_utils.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
  3 | 
  4 | Licensed under the Apache License, Version 2.0 (the "License");
  5 | you may not use this file except in compliance with the License.
  6 | You may obtain a copy of the License at
  7 | 
  8 |     http://www.apache.org/licenses/LICENSE-2.0
  9 | 
 10 | Unless required by applicable law or agreed to in writing, software
 11 | distributed under the License is distributed on an "AS IS" BASIS,
 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | See the License for the specific language governing permissions and
 14 | limitations under the License.
 15 | '''
 16 | 
 17 | import json
 18 | import os
 19 | from typing import Dict, Optional
 20 | 
 21 | import numpy as np
 22 | import torch
 23 | from loguru import logger
 24 | from safetensors.torch import save_file as safetensors_save_file
 25 | 
 26 | from veturboio.ops.cipher import CipherInfo, CipherMode, create_cipher_with_header, encrypt
 27 | from veturboio.ops.sfcs_utils import sfcs_delete_file, sfcs_write_file, sfcs_write_file_in_parallel
 28 | from veturboio.safetensors import parse_state_dict
 29 | from veturboio.types import FILE_PATH
 30 | 
 31 | try:
 32 |     import veturboio_ext
 33 | 
 34 |     IOHelper = veturboio_ext.IOHelper
 35 | except ImportError:
 36 |     IOHelper = None
 37 |     logger.warning("veturboio_ext not found, fallback to pure python implementation")
 38 | 
 39 | 
 40 | def load_file_to_tensor(
 41 |     file_path: str,
 42 |     total_tensor: torch.Tensor,
 43 |     offset: int,
 44 |     helper: IOHelper,
 45 |     length: int = 0,
 46 |     device_id: Optional[int] = -1,
 47 |     num_thread: Optional[int] = 32,
 48 |     use_pinmem: Optional[bool] = False,
 49 |     use_sfcs_sdk: Optional[bool] = False,
 50 |     use_direct_io: Optional[bool] = False,
 51 |     cipher_info: CipherInfo = CipherInfo(False),
 52 | ) -> torch.Tensor:
 53 |     return helper.load_file_to_tensor(
 54 |         file_path,
 55 |         total_tensor,
 56 |         length,
 57 |         offset,
 58 |         device_id,
 59 |         num_thread,
 60 |         use_pinmem,
 61 |         use_sfcs_sdk,
 62 |         use_direct_io,
 63 |         cipher_info.use_cipher,
 64 |         cipher_info.key,
 65 |         cipher_info.iv,
 66 |         CipherInfo.HEADER_SIZE if cipher_info.use_header else 0,
 67 |     )
 68 | 
 69 | 
 70 | def save_tensor_to_file(
 71 |     tensor: torch.Tensor,
 72 |     file_path: FILE_PATH,
 73 |     length: int,
 74 |     helper: IOHelper,
 75 |     use_pinmem: Optional[bool] = False,
 76 |     use_sfcs_sdk: Optional[bool] = False,
 77 |     cipher_info: CipherInfo = CipherInfo(False),
 78 | ):
 79 |     return helper.save_tensor_to_file(
 80 |         tensor,
 81 |         file_path,
 82 |         length,
 83 |         use_pinmem,
 84 |         use_sfcs_sdk,
 85 |         cipher_info.use_cipher,
 86 |         cipher_info.key,
 87 |         cipher_info.iv,
 88 |         CipherInfo.HEADER_SIZE if cipher_info.use_header else 0,
 89 |     )
 90 | 
 91 | 
 92 | def save_file(
 93 |     state_dict: Dict[str, torch.Tensor],
 94 |     filename: FILE_PATH,
 95 |     helper: IOHelper,
 96 |     metadata: Optional[Dict[str, str]] = None,
 97 |     use_sfcs_sdk: bool = False,
 98 |     cipher_info: CipherInfo = CipherInfo(False),
 99 | ):
100 |     if helper is None:
101 |         if cipher_info.use_cipher:
102 |             logger.warning("helper is None, cipher is not supported in pure python implementation")
103 |         return safetensors_save_file(state_dict, filename, metadata=metadata)
104 | 
105 |     meta, tensors, sizes, offsets = parse_state_dict(state_dict)
106 | 
107 |     if metadata:
108 |         meta["__metadata__"] = metadata
109 | 
110 |     meta_bytes = json.dumps(meta).encode('utf-8')
111 |     meta_len = len(meta_bytes)
112 | 
113 |     # alignment
114 |     if not meta_len % 8 == 0:
115 |         meta_len_pad = (meta_len + 8) // 8 * 8
116 |         meta_bytes += b' ' * (meta_len_pad - meta_len)
117 |         meta_len = meta_len_pad
118 | 
119 |     st_header_bytes = meta_len.to_bytes(8, 'little') + meta_bytes
120 |     st_header_len = len(st_header_bytes)
121 | 
122 |     if use_sfcs_sdk:
123 |         sfcs_write_file_in_parallel(filename, tensors, sizes, offsets, st_header_bytes, st_header_len, cipher_info)
124 |     else:
125 |         with open(filename, "wb") as f:
126 |             if cipher_info.use_cipher:
127 |                 if cipher_info.use_header:
128 |                     cipher_header_bytes = cipher_info.to_header_bytes()
129 |                     f.write(cipher_header_bytes)
130 |                 enc_st_header_arr = np.zeros(st_header_len, dtype=np.uint8)
131 |                 encrypt(cipher_info, np.frombuffer(st_header_bytes, dtype=np.uint8), enc_st_header_arr, 0)
132 |                 f.write(enc_st_header_arr.tobytes())
133 |             else:
134 |                 f.write(st_header_bytes)
135 | 
136 |         for i in range(len(tensors)):
137 |             tensor = tensors[i]
138 |             size = sizes[i]
139 |             save_tensor_to_file(
140 |                 tensor,
141 |                 filename,
142 |                 size,
143 |                 helper=helper,
144 |                 use_pinmem=False,
145 |                 use_sfcs_sdk=use_sfcs_sdk,
146 |                 cipher_info=cipher_info,
147 |             )
148 | 
149 | 
150 | def init_io_helper() -> IOHelper:
151 |     return IOHelper()
152 | 


--------------------------------------------------------------------------------
/veturboio/ops/posix_utils.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | '''
16 | 
17 | from typing import Optional
18 | 
19 | import numpy as np
20 | from loguru import logger
21 | 
22 | from veturboio.ops.cipher import CipherInfo
23 | 
24 | try:
25 |     from veturboio.utils.load_veturboio_ext import load_veturboio_ext
26 | 
27 |     veturboio_ext = load_veturboio_ext()
28 |     IOHelper = veturboio_ext.IOHelper
29 |     POSIXFile = veturboio_ext.POSIXFile
30 | except ImportError:
31 |     POSIXFile = None
32 |     logger.warning("veturboio_ext not found, fallback to pure python implementation")
33 | 
34 | 
35 | def posix_read_file(
36 |     file_path: str,
37 |     arr: np.ndarray,
38 |     length: int,
39 |     offset: int,
40 |     num_thread: Optional[int] = 1,
41 |     cipher_info: CipherInfo = CipherInfo(False),
42 |     use_direct_io: bool = False,
43 | ) -> int:
44 |     posix_file = POSIXFile(
45 |         file_path,
46 |         cipher_info.use_cipher,
47 |         cipher_info.key,
48 |         cipher_info.iv,
49 |         CipherInfo.HEADER_SIZE if cipher_info.use_header else 0,
50 |     )
51 |     return posix_file.read_file_to_array(arr, length, offset, num_thread, use_direct_io)
52 | 


--------------------------------------------------------------------------------
/veturboio/safetensors.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
  3 | 
  4 | Licensed under the Apache License, Version 2.0 (the "License");
  5 | you may not use this file except in compliance with the License.
  6 | You may obtain a copy of the License at
  7 | 
  8 |     http://www.apache.org/licenses/LICENSE-2.0
  9 | 
 10 | Unless required by applicable law or agreed to in writing, software
 11 | distributed under the License is distributed on an "AS IS" BASIS,
 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | See the License for the specific language governing permissions and
 14 | limitations under the License.
 15 | '''
 16 | 
 17 | import json
 18 | import os
 19 | import pprint
 20 | from multiprocessing import shared_memory
 21 | from typing import Callable, Dict, List, Optional
 22 | 
 23 | import numpy as np
 24 | import torch
 25 | from loguru import logger
 26 | 
 27 | from veturboio.loader import BaseLoader
 28 | from veturboio.ops.cipher import CipherInfo
 29 | from veturboio.types import FILE_PATH
 30 | 
 31 | # All safetensors file will start with a json string, which is the meta info of the file.
 32 | # We use the beginning char to determine whether it is a safetensors file. The beginning
 33 | # char is '{' and its ascii code is 123.
 34 | SAFETENSORS_FILE_MAGIC_NUM = 123
 35 | 
 36 | _safetensors_dtype_mapper = {
 37 |     "F64": torch.float64,
 38 |     "F32": torch.float32,
 39 |     "F16": torch.float16,
 40 |     "BF16": torch.bfloat16,
 41 |     "I64": torch.int64,
 42 |     "I32": torch.int32,
 43 |     "I16": torch.int16,
 44 |     "I8": torch.int8,
 45 |     "U8": torch.uint8,
 46 |     "BOOL": torch.bool,
 47 | }
 48 | 
 49 | 
 50 | def only_safetensors_property(func: Callable):
 51 |     func_name = func.__name__
 52 |     warning_msg = "This safetensors file is invalid, will take it as a normal torch file."
 53 | 
 54 |     def wrapper(self, *args, **kwargs):
 55 |         if not self.is_valid:
 56 |             logger.patch(lambda r: r.update(function=func_name)).warning(warning_msg)
 57 |             return None
 58 |         return func(self, *args, **kwargs)
 59 | 
 60 |     return wrapper
 61 | 
 62 | 
 63 | class TensorMeta:
 64 |     def __init__(self, name: str, dtype: str, shape: List[int], data_offsets: List[int]) -> None:
 65 |         self._name = name
 66 |         self._dtype = _safetensors_dtype_mapper[dtype]
 67 |         self._shape = shape
 68 |         self._data_offsets = data_offsets
 69 | 
 70 |     @property
 71 |     def name(self) -> str:
 72 |         return self._name
 73 | 
 74 |     @property
 75 |     def dtype(self) -> torch.dtype:
 76 |         return self._dtype
 77 | 
 78 |     @property
 79 |     def shape(self) -> List[int]:
 80 |         return self._shape
 81 | 
 82 |     @property
 83 |     def data_offsets(self) -> List[int]:
 84 |         return self._data_offsets
 85 | 
 86 |     def __str__(self) -> str:
 87 |         return str(
 88 |             {
 89 |                 "name": self._name,
 90 |                 "dtype": self._dtype,
 91 |                 "shape": self._shape,
 92 |                 "data_offsets": self._data_offsets,
 93 |             }
 94 |         )
 95 | 
 96 |     def __repr__(self) -> str:
 97 |         return self.__str__()
 98 | 
 99 | 
100 | class SafetensorsFile:
101 |     def __init__(self, file: FILE_PATH, loader: BaseLoader, use_cipher: Optional[bool] = None) -> None:
102 |         self._file = file
103 |         self._loader = loader
104 | 
105 |         self._is_valid = True
106 | 
107 |         # cipher related
108 |         self._cipher_info = CipherInfo(False)
109 |         if use_cipher == True or use_cipher == None and os.getenv("VETURBOIO_USE_CIPHER", "0") == "1":
110 |             header_bytes = loader.load_to_bytes(offset=0, count=CipherInfo.HEADER_SIZE)
111 |             self._cipher_info = CipherInfo(True, header_bytes, os.path.abspath(self.file))
112 | 
113 |         if self._cipher_info.use_header:
114 |             h_off = CipherInfo.HEADER_SIZE
115 |         else:
116 |             h_off = 0
117 | 
118 |         magic_number = loader.load_to_bytes(offset=8 + h_off, count=1, cipher_info=self._cipher_info)[0]
119 |         if magic_number != SAFETENSORS_FILE_MAGIC_NUM:
120 |             self._is_valid = False
121 |             return
122 | 
123 |         self._meta_size = np.frombuffer(
124 |             loader.load_to_bytes(offset=h_off, count=8, cipher_info=self._cipher_info), dtype=np.int64
125 |         )[0]
126 |         meta_bytes = loader.load_to_bytes(offset=8 + h_off, count=self._meta_size, cipher_info=self._cipher_info)
127 |         meta_dict = json.loads(meta_bytes.decode("utf-8"))
128 | 
129 |         self._shared_tensor = {}
130 |         self._ignored_meta = {}
131 |         if "__metadata__" in meta_dict:
132 |             meta_data = meta_dict.pop("__metadata__")
133 |             for key, value in meta_data.items():
134 |                 if value not in meta_dict:
135 |                     self._ignored_meta[key] = value
136 |                 else:
137 |                     self._shared_tensor[key] = value
138 | 
139 |         self._meta = {}
140 |         for key in meta_dict:
141 |             self._meta[key] = TensorMeta(
142 |                 name=key,
143 |                 dtype=meta_dict[key]["dtype"],
144 |                 shape=meta_dict[key]["shape"],
145 |                 data_offsets=meta_dict[key]["data_offsets"],
146 |             )
147 | 
148 |         # record the offset of the tensor data
149 |         self._tensor_offset = np.dtype(np.int64).itemsize + self._meta_size + h_off
150 | 
151 |     @staticmethod
152 |     def split_tensor_to_state_dict(
153 |         total_tensor: torch.Tensor, safetensor_file: "SafetensorsFile"
154 |     ) -> Dict[str, torch.Tensor]:
155 |         state_dict = {}
156 | 
157 |         for tensor_meta in safetensor_file.meta.values():
158 |             tensor = total_tensor[tensor_meta.data_offsets[0] : tensor_meta.data_offsets[1]]
159 |             tensor = tensor.view(dtype=tensor_meta.dtype)
160 |             tensor = tensor.reshape(tensor_meta.shape)
161 |             state_dict[tensor_meta.name] = tensor
162 | 
163 |         for src_tensor_key, tgt_tensor_key in safetensor_file.shared_tensor.items():
164 |             state_dict[src_tensor_key] = state_dict[tgt_tensor_key]
165 |         return state_dict
166 | 
167 |     @property
168 |     def file(self) -> FILE_PATH:
169 |         return self._file
170 | 
171 |     @property
172 |     def is_valid(self) -> bool:
173 |         return self._is_valid
174 | 
175 |     @property
176 |     @only_safetensors_property
177 |     def meta_size(self) -> int:
178 |         return self._meta_size
179 | 
180 |     @property
181 |     @only_safetensors_property
182 |     def meta(self) -> Dict[str, TensorMeta]:
183 |         return self._meta
184 | 
185 |     @property
186 |     @only_safetensors_property
187 |     def tensor_offset(self) -> int:
188 |         return self._tensor_offset
189 | 
190 |     @property
191 |     @only_safetensors_property
192 |     def shared_tensor(self) -> Dict[str, str]:
193 |         return self._shared_tensor
194 | 
195 |     def __str__(self) -> str:
196 |         if not self._is_valid:
197 |             return f"{self.file} is not a valid safetensors file."
198 |         return pprint.pformat(
199 |             {
200 |                 "file": self._file,
201 |                 "meta_size": self._meta_size,
202 |                 "meta": self._meta,
203 |                 "tensor_offset": self._tensor_offset,
204 |             }
205 |         )
206 | 
207 |     def __repr__(self) -> str:
208 |         return self.__str__()
209 | 
210 |     def load(self, map_location: str = "cpu", state_dict: Dict[str, torch.Tensor] = None) -> Dict[str, torch.Tensor]:
211 |         if not self._is_valid:
212 |             return self._loader.load_pt(map_location, self._cipher_info)
213 |         else:
214 |             return self._loader.load_safetensors(self, map_location, state_dict)
215 | 
216 |     def load_to_shmem(self) -> shared_memory.SharedMemory:
217 |         return self._loader.load_to_shmem(self._cipher_info)
218 | 
219 | 
220 | def parse_state_dict(state_dict: Dict[str, torch.Tensor]):
221 |     meta = {}
222 |     tensors = []
223 |     sizes = []
224 |     offsets = []
225 | 
226 |     data_offset_begin = 0
227 |     data_offset_end = 0
228 |     _safetensors_dtype_str = {v: k for k, v in _safetensors_dtype_mapper.items()}
229 |     bool_state_dict = {}
230 |     for key, tensor in state_dict.items():
231 |         if tensor.dtype == torch.bool:
232 |             bool_state_dict[key] = tensor
233 |             continue
234 |         else:
235 |             size = 1
236 |             for d in range(tensor.dim()):
237 |                 size *= tensor.shape[d]
238 | 
239 |             try:
240 |                 bytes = torch.finfo(tensor.dtype).bits // 8
241 |             except:
242 |                 bytes = torch.iinfo(tensor.dtype).bits // 8
243 |             size *= bytes
244 | 
245 |             data_offset_end = data_offset_begin + size
246 |             meta[key] = {
247 |                 "dtype": _safetensors_dtype_str[tensor.dtype],
248 |                 "shape": tensor.shape,
249 |                 "data_offsets": [data_offset_begin, data_offset_end],
250 |             }
251 |             if size > 0:
252 |                 tensors.append(tensor)
253 |                 sizes.append(size)
254 |                 offsets.append(data_offset_begin)
255 |                 data_offset_begin = data_offset_end
256 | 
257 |     for key, tensor in bool_state_dict.items():
258 |         size = 1
259 |         for d in range(tensor.dim()):
260 |             size *= tensor.shape[d]
261 | 
262 |         data_offset_end = data_offset_begin + size
263 |         meta[key] = {
264 |             "dtype": _safetensors_dtype_str[tensor.dtype],
265 |             "shape": tensor.shape,
266 |             "data_offsets": [data_offset_begin, data_offset_end],
267 |         }
268 |         if size > 0:
269 |             tensors.append(tensor)
270 |             sizes.append(size)
271 |             offsets.append(data_offset_begin)
272 |             data_offset_begin = data_offset_end
273 |     return meta, tensors, sizes, offsets
274 | 


--------------------------------------------------------------------------------
/veturboio/saver/__init__.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | '''
16 | 
17 | from veturboio.saver.base_saver import BaseSaver, PosixSaver
18 | from veturboio.saver.sfcs_client_saver import SfcsClientSaver
19 | 
20 | __all__ = ["BaseSaver", "PosixSaver", "SfcsClientSaver"]
21 | 


--------------------------------------------------------------------------------
/veturboio/saver/base_saver.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
  3 | 
  4 | Licensed under the Apache License, Version 2.0 (the "License");
  5 | you may not use this file except in compliance with the License.
  6 | You may obtain a copy of the License at
  7 | 
  8 |     http://www.apache.org/licenses/LICENSE-2.0
  9 | 
 10 | Unless required by applicable law or agreed to in writing, software
 11 | distributed under the License is distributed on an "AS IS" BASIS,
 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | See the License for the specific language governing permissions and
 14 | limitations under the License.
 15 | '''
 16 | 
 17 | import os
 18 | import tempfile
 19 | from typing import Any, Dict
 20 | 
 21 | import numpy as np
 22 | import torch
 23 | from safetensors.torch import save_file as safetenors_save_file
 24 | from safetensors.torch import save_model as safetensors_save_model
 25 | 
 26 | from veturboio.ops.cipher import CipherInfo, CipherMode, create_cipher_with_header, encrypt
 27 | from veturboio.ops.io_utils import IOHelper
 28 | from veturboio.ops.io_utils import save_file as fast_save_file
 29 | from veturboio.types import FILE_PATH
 30 | 
 31 | 
 32 | class BaseSaver:
 33 |     def __init__(self, method: str) -> None:
 34 |         self.method = method
 35 | 
 36 |     def save_file(self, state_dict: Dict[str, torch.Tensor], file: FILE_PATH, metadata: Dict[str, str] = None) -> None:
 37 |         raise NotImplementedError
 38 | 
 39 |     def save_model(self, model: torch.nn.Module, file: FILE_PATH) -> None:
 40 |         raise NotImplementedError
 41 | 
 42 | 
 43 | class PosixSaver(BaseSaver):
 44 |     def __init__(self, file: FILE_PATH, helper: IOHelper = None, use_cipher: bool = False) -> None:
 45 |         super().__init__(method="posix")
 46 |         self.file = file
 47 |         use_cipher = use_cipher or os.getenv("VETURBOIO_USE_CIPHER", "0") == "1"
 48 |         use_header = use_cipher and os.getenv("VETURBOIO_CIPHER_HEADER", "0") == "1"
 49 |         if use_header:
 50 |             self.cipher_info = create_cipher_with_header(CipherMode.CTR_128, os.path.abspath(self.file))
 51 |         else:
 52 |             self.cipher_info = CipherInfo(use_cipher, None, os.path.abspath(self.file))
 53 | 
 54 |         self.helper = helper
 55 | 
 56 |     def save_file(
 57 |         self, state_dict: Dict[str, torch.Tensor], metadata: Dict[str, str] = None, enable_fast_mode: bool = False
 58 |     ) -> None:
 59 |         if enable_fast_mode:
 60 |             fast_save_file(
 61 |                 state_dict,
 62 |                 self.file,
 63 |                 helper=self.helper,
 64 |                 metadata=metadata,
 65 |                 cipher_info=self.cipher_info,
 66 |             )
 67 |         else:
 68 |             if self.cipher_info.use_cipher:
 69 |                 with tempfile.NamedTemporaryFile(dir="/dev/shm") as tmpfile:
 70 |                     tmp_file_path = tmpfile.name
 71 |                     safetenors_save_file(state_dict, tmp_file_path, metadata=metadata)
 72 |                     tmp_file_size = os.path.getsize(tmp_file_path)
 73 |                     tmp_file_bytes = np.memmap(tmp_file_path, dtype=np.uint8, mode='r', shape=tmp_file_size)
 74 |                     h_off = CipherInfo.HEADER_SIZE if self.cipher_info.use_header else 0
 75 |                     file_bytes = np.memmap(self.file, dtype=np.uint8, mode='w+', shape=tmp_file_size + h_off)
 76 |                     encrypt(self.cipher_info, tmp_file_bytes, file_bytes[h_off:], 0)
 77 |                     if h_off:
 78 |                         file_bytes[:h_off] = np.frombuffer(self.cipher_info.to_header_bytes(), dtype=np.uint8)
 79 |                     file_bytes.flush()
 80 |             else:
 81 |                 safetenors_save_file(state_dict, self.file, metadata=metadata)
 82 | 
 83 |     def save_model(self, model: torch.nn.Module) -> None:
 84 |         if self.cipher_info.use_cipher:
 85 |             with tempfile.NamedTemporaryFile(dir="/dev/shm") as tmpfile:
 86 |                 tmp_file_path = tmpfile.name
 87 |                 safetensors_save_model(model, tmp_file_path)
 88 |                 tmp_file_size = os.path.getsize(tmp_file_path)
 89 |                 tmp_file_bytes = np.memmap(tmp_file_path, dtype=np.uint8, mode='r', shape=tmp_file_size)
 90 |                 h_off = CipherInfo.HEADER_SIZE if self.cipher_info.use_header else 0
 91 |                 file_bytes = np.memmap(self.file, dtype=np.uint8, mode='w+', shape=tmp_file_size + h_off)
 92 |                 encrypt(self.cipher_info, tmp_file_bytes, file_bytes[h_off:], 0)
 93 |                 if h_off:
 94 |                     file_bytes[:h_off] = np.frombuffer(self.cipher_info.to_header_bytes(), dtype=np.uint8)
 95 |                 file_bytes.flush()
 96 |         else:
 97 |             safetensors_save_model(model, self.file)
 98 | 
 99 |     def save_pt(self, state_dict: Dict[str, torch.Tensor]) -> None:
100 |         if self.cipher_info.use_cipher:
101 |             with tempfile.NamedTemporaryFile(dir="/dev/shm") as tmpfile:
102 |                 tmp_file_path = tmpfile.name
103 |                 torch.save(state_dict, tmp_file_path)
104 |                 tmp_file_size = os.path.getsize(tmp_file_path)
105 |                 tmp_file_bytes = np.memmap(tmp_file_path, dtype=np.uint8, mode='r', shape=tmp_file_size)
106 |                 h_off = CipherInfo.HEADER_SIZE if self.cipher_info.use_header else 0
107 |                 file_bytes = np.memmap(self.file, dtype=np.uint8, mode='w+', shape=tmp_file_size + h_off)
108 |                 encrypt(self.cipher_info, tmp_file_bytes, file_bytes[h_off:], 0)
109 |                 if h_off:
110 |                     file_bytes[:h_off] = np.frombuffer(self.cipher_info.to_header_bytes(), dtype=np.uint8)
111 |                 file_bytes.flush()
112 |         else:
113 |             torch.save(state_dict, self.file)
114 | 


--------------------------------------------------------------------------------
/veturboio/saver/sfcs_client_saver.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
  3 | 
  4 | Licensed under the Apache License, Version 2.0 (the "License");
  5 | you may not use this file except in compliance with the License.
  6 | You may obtain a copy of the License at
  7 | 
  8 |     http://www.apache.org/licenses/LICENSE-2.0
  9 | 
 10 | Unless required by applicable law or agreed to in writing, software
 11 | distributed under the License is distributed on an "AS IS" BASIS,
 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | See the License for the specific language governing permissions and
 14 | limitations under the License.
 15 | '''
 16 | 
 17 | import os
 18 | import tempfile
 19 | from typing import Any, Dict
 20 | 
 21 | import numpy as np
 22 | import torch
 23 | from safetensors.torch import save_file as safetenors_save_file
 24 | from safetensors.torch import save_model as safetensors_save_model
 25 | 
 26 | from veturboio.ops.cipher import CipherInfo, CipherMode, create_cipher_with_header
 27 | from veturboio.ops.io_utils import IOHelper
 28 | from veturboio.ops.io_utils import save_file as fast_save_file
 29 | from veturboio.ops.sfcs_utils import (
 30 |     init_sfcs_conf,
 31 |     path_mapper,
 32 |     sfcs_delete_file,
 33 |     sfcs_write_file,
 34 |     sfcs_write_file_in_parallel,
 35 | )
 36 | from veturboio.saver.base_saver import BaseSaver
 37 | from veturboio.types import FILE_PATH
 38 | 
 39 | 
 40 | class SfcsClientSaver(BaseSaver):
 41 |     def __init__(
 42 |         self,
 43 |         file: FILE_PATH,
 44 |         helper: IOHelper = None,
 45 |         use_cipher: bool = False,
 46 |     ) -> None:
 47 |         super().__init__(method="client")
 48 | 
 49 |         self.file = file
 50 |         self.helper = helper
 51 | 
 52 |         mount_path = init_sfcs_conf(file)
 53 |         self.sfcs_valid_path = path_mapper(self.file, mount_path)
 54 | 
 55 |         use_cipher = use_cipher or os.getenv("VETURBOIO_USE_CIPHER", "0") == "1"
 56 |         use_header = use_cipher and os.getenv("VETURBOIO_CIPHER_HEADER", "0") == "1"
 57 |         if use_header:
 58 |             self.cipher_info = create_cipher_with_header(CipherMode.CTR_128, os.path.abspath(self.file))
 59 |         else:
 60 |             self.cipher_info = CipherInfo(use_cipher, None, os.path.abspath(self.file))
 61 | 
 62 |     def save_file(
 63 |         self, state_dict: Dict[str, torch.Tensor], metadata: Dict[str, str] = None, enable_fast_mode: bool = False
 64 |     ) -> None:
 65 |         if enable_fast_mode:
 66 |             fast_save_file(
 67 |                 state_dict,
 68 |                 self.sfcs_valid_path,
 69 |                 helper=self.helper,
 70 |                 metadata=metadata,
 71 |                 cipher_info=self.cipher_info,
 72 |                 use_sfcs_sdk=True,
 73 |             )
 74 |         else:
 75 |             with tempfile.NamedTemporaryFile(dir="/dev/shm") as tmpfile:
 76 |                 file_path = tmpfile.name
 77 |                 safetenors_save_file(state_dict, file_path, metadata=metadata)
 78 | 
 79 |                 file_size = os.path.getsize(file_path)
 80 |                 if self.cipher_info.use_header:
 81 |                     h_off = CipherInfo.HEADER_SIZE
 82 |                     file_bytes = np.empty(file_size + h_off, dtype=np.byte)
 83 |                     file_bytes[:h_off] = np.frombuffer(self.cipher_info.to_header_bytes(), dtype=np.byte)
 84 |                     file_bytes[h_off:] = np.fromfile(file_path, dtype=np.byte, count=file_size)
 85 |                 else:
 86 |                     file_bytes = np.memmap(file_path, dtype=np.byte, mode='r+', shape=file_size)
 87 |                 sfcs_write_file(self.sfcs_valid_path, file_bytes, len(file_bytes), self.cipher_info)
 88 | 
 89 |     def save_model(self, model: torch.nn.Module) -> None:
 90 |         with tempfile.NamedTemporaryFile(dir="/dev/shm") as tmpfile:
 91 |             file_path = tmpfile.name
 92 |             safetensors_save_model(model, file_path)
 93 | 
 94 |             file_size = os.path.getsize(file_path)
 95 |             if self.cipher_info.use_header:
 96 |                 h_off = CipherInfo.HEADER_SIZE
 97 |                 file_bytes = np.empty(file_size + h_off, dtype=np.byte)
 98 |                 file_bytes[:h_off] = np.frombuffer(self.cipher_info.to_header_bytes(), dtype=np.byte)
 99 |                 file_bytes[h_off:] = np.fromfile(file_path, dtype=np.byte, count=file_size)
100 |             else:
101 |                 file_bytes = np.memmap(file_path, dtype=np.byte, mode='r+', shape=file_size)
102 |             sfcs_write_file(self.sfcs_valid_path, file_bytes, len(file_bytes), self.cipher_info)
103 | 
104 |     def save_pt(self, state_dict: Dict[str, torch.Tensor]) -> None:
105 |         with tempfile.NamedTemporaryFile(dir="/dev/shm") as tmpfile:
106 |             file_path = tmpfile.name
107 |             torch.save(state_dict, file_path)
108 | 
109 |             file_size = os.path.getsize(file_path)
110 |             if self.cipher_info.use_header:
111 |                 h_off = CipherInfo.HEADER_SIZE
112 |                 file_bytes = np.empty(file_size + h_off, dtype=np.byte)
113 |                 file_bytes[:h_off] = np.frombuffer(self.cipher_info.to_header_bytes(), dtype=np.byte)
114 |                 file_bytes[h_off:] = np.fromfile(file_path, dtype=np.byte, count=file_size)
115 |             else:
116 |                 file_bytes = np.memmap(file_path, dtype=np.byte, mode='r+', shape=file_size)
117 |             sfcs_write_file(self.sfcs_valid_path, file_bytes, len(file_bytes), self.cipher_info)
118 | 


--------------------------------------------------------------------------------
/veturboio/types.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | '''
16 | 
17 | import os
18 | from typing import Union
19 | 
20 | FILE_PATH = Union[str, bytes, os.PathLike]
21 | 


--------------------------------------------------------------------------------
/veturboio/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/volcengine/veTurboIO/0ed05516013da4332433784660660ca4d1904505/veturboio/utils/__init__.py


--------------------------------------------------------------------------------
/veturboio/utils/load_veturboio_ext.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | '''
16 | 
17 | import os
18 | 
19 | from loguru import logger
20 | 
21 | LIBCFS_DEFAULT_URL = "https://veturbo-cn-beijing.tos-cn-beijing.volces.com/veturboio/libcfs/libcloudfs.so"
22 | LIBCFS_DEFAULT_PATH = "/usr/lib/libcloudfs.so"
23 | 
24 | 
25 | def load_libcfs():
26 |     libcfs_path = os.getenv("LIBCFS_PATH", LIBCFS_DEFAULT_PATH)
27 |     if not os.path.isfile(libcfs_path):
28 |         # libcfs_path not exist, download from url
29 |         import requests
30 | 
31 |         libcfs_url = os.getenv("LIBCFS_URL", LIBCFS_DEFAULT_URL)
32 |         logger.info(f"download libcloudfs.so from {libcfs_url}, save to {libcfs_path}")
33 |         r = requests.get(libcfs_url, timeout=60)
34 |         with open(libcfs_path, 'wb') as f:
35 |             f.write(r.content)
36 | 
37 | 
38 | def load_veturboio_ext():
39 |     load_libcfs()
40 |     import veturboio_ext
41 | 
42 |     return veturboio_ext
43 | 


--------------------------------------------------------------------------------
/veturboio/version.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Copyright (c) 2024 Beijing Volcano Engine Technology Ltd.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | '''
16 | 
17 | __version__ = "0.1.3rc4"
18 | 


--------------------------------------------------------------------------------