├── .github
    └── workflows
    │   └── ci.yml
├── .gitignore
├── .gitmodules
├── Cargo.lock
├── Cargo.toml
├── LICENSE
├── Makefile
├── README-ZH.MD
├── README.MD
├── benches
    ├── block_device
    │   └── main.rs
    ├── grpc
    │   ├── client.rs
    │   ├── main.rs
    │   ├── mod.rs
    │   └── server.rs
    ├── local_storage
    │   └── main.rs
    ├── mod.rs
    └── rpc
    │   ├── client.rs
    │   ├── main.rs
    │   ├── mod.rs
    │   └── server.rs
├── build.rs
├── docker
    ├── client
    │   └── Dockerfile
    ├── manager
    │   └── Dockerfile
    └── server
    │   └── Dockerfile
├── docs
    ├── RDMA.md
    ├── README-ZH.MD
    ├── README.MD
    ├── images
    │   └── architecture.jpg
    └── specification.md
├── examples
    ├── hello_client.rs
    ├── hello_server.rs
    ├── manager.yaml
    ├── rdma_client.rs
    └── rdma_server.rs
├── intercept
    ├── Cargo.toml
    ├── build.rs
    └── src
    │   ├── client.rs
    │   ├── file_desc.rs
    │   ├── lib.rs
    │   ├── path.rs
    │   ├── syscall_intercept.rs
    │   └── test_log.rs
├── proto
    └── test.proto
├── scripts
    ├── add_node.sh
    ├── close_all_instances.sh
    ├── delete_node.sh
    ├── read_files.sh
    ├── test.sh
    └── test_run_all.sh
├── src
    ├── bin
    │   ├── client.rs
    │   ├── manager.rs
    │   └── server.rs
    ├── client
    │   ├── daemon.rs
    │   ├── fuse_client.rs
    │   └── mod.rs
    ├── common
    │   ├── byte.rs
    │   ├── cache.rs
    │   ├── errors.rs
    │   ├── hash_ring.rs
    │   ├── info_syncer.rs
    │   ├── mod.rs
    │   ├── sender.rs
    │   ├── serialization.rs
    │   └── util.rs
    ├── lib.rs
    ├── manager
    │   ├── core.rs
    │   ├── manager_service.rs
    │   └── mod.rs
    ├── rpc
    │   ├── callback.rs
    │   ├── client.rs
    │   ├── connection.rs
    │   ├── mod.rs
    │   ├── protocol.rs
    │   ├── rdma
    │   │   ├── client.rs
    │   │   ├── mod.rs
    │   │   └── server.rs
    │   └── server.rs
    └── server
    │   ├── distributed_engine.rs
    │   ├── mod.rs
    │   ├── storage_engine
    │       ├── block_engine
    │       │   ├── allocator.rs
    │       │   ├── index.rs
    │       │   ├── io.rs
    │       │   └── mod.rs
    │       ├── file_engine.rs
    │       ├── meta_engine.rs
    │       └── mod.rs
    │   └── transfer_manager.rs
└── test_io500.sh


/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
  1 | name: Continuous integration
  2 | 
  3 | on:
  4 |   push:
  5 |     branches: [ "main" ]
  6 |     paths-ignore:
  7 |       - "docs/**"
  8 |       - "**/*.md"
  9 |       - "**/*.yaml"
 10 |   pull_request:
 11 |     branches: [ "main" ]
 12 |     paths-ignore:
 13 |       - "docs/**"
 14 |       - "**/*.md"
 15 |       - "**/*.yaml"
 16 | 
 17 | env:
 18 |   CARGO_TERM_COLOR: always
 19 |   RUST_TOOLCHAIN: 1.68.0
 20 | 
 21 | jobs:
 22 |   build:
 23 |     name: Continuous integration
 24 |     runs-on: self-hosted
 25 |     steps:
 26 |       - uses: actions/checkout@v3
 27 |         with:
 28 |           submodules: true
 29 |       - uses: actions-rs/toolchain@v1.0.6
 30 |         with:
 31 |           profile: minimal
 32 |           toolchain: ${{ env.RUST_TOOLCHAIN }}
 33 |           override: true
 34 |          
 35 |       - name: Install dependences
 36 |         id: install_deps
 37 |         run: make install_deps 
 38 | 
 39 |       - name: Copy cache
 40 |         run: |
 41 |           mkdir -p /data/action/_work/sealfs/sealfs/target
 42 |           if [ -d /data/backup/debug ]; then
 43 |             mv /data/backup/debug /data/action/_work/sealfs/sealfs/target
 44 |           fi
 45 | 
 46 |       - name: Build
 47 |         id: make-build
 48 |         run:  |
 49 |           make build
 50 |           # make build features=mem-db
 51 |         continue-on-error: true
 52 | 
 53 |       - name: Check
 54 |         id: cargo-check
 55 |         run:  |
 56 |           cargo check --features=disk-db
 57 |           # cargo check --features=mem-db
 58 |         continue-on-error: true
 59 |       
 60 |       
 61 |       - name: Test Suite
 62 |         id: make-test
 63 |         run:  |
 64 |           sudo rm -rf /tmp/test*
 65 |           make test
 66 |           ./test_io500.sh /data
 67 |           # make test features=mem-db
 68 |         continue-on-error: true
 69 |       
 70 |       - name: Clippy
 71 |         id: cargo-clippy
 72 |         run:  |
 73 |           rustup component add clippy
 74 |           cargo clippy --features=disk-db -- -D warnings
 75 |           # cargo clippy --features=mem-db -- -D warnings
 76 |         continue-on-error: true
 77 | 
 78 |       - name: Rustfmt
 79 |         id: cargo-fmt
 80 |         run:  |
 81 |           rustup component add rustfmt
 82 |           cargo fmt --all -- --check
 83 |         continue-on-error: true
 84 |       
 85 |       - if: always()
 86 |         name: Backup Temporary
 87 |         run: |
 88 |           if [ -d /data/action/_work/sealfs/sealfs/target/debug ]; then
 89 |             if [ -d /data/backup/debug ]; then
 90 |               rm -rf /data/backup/debug
 91 |             fi
 92 |             mv /data/action/_work/sealfs/sealfs/target/debug /data/backup
 93 |           fi
 94 |       
 95 |       - if: ${{ steps.install_deps.outcome == 'success' }}
 96 |         name: Summary
 97 |         run: |
 98 |           echo "Build: ${{ steps.make-build.outcome }}"
 99 |           echo "Check: ${{ steps.cargo-check.outcome }}"
100 |           echo "Test: ${{ steps.make-test.outcome }}"
101 |           echo "Clippy: ${{ steps.cargo-clippy.outcome }}"
102 |           echo "Rustfmt: ${{ steps.cargo-fmt.outcome }}"
103 |           if [ "${{ steps.make-build.outcome }}" != "success" ] || [ "${{ steps.cargo-check.outcome }}" != "success" ] || [ "${{ steps.make-test.outcome }}" != "success" ] || [ "${{ steps.cargo-clippy.outcome }}" != "success" ] || [ "${{ steps.cargo-fmt.outcome }}" != "success" ]; then
104 |             exit 1
105 |           fi


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 | /.vscode


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "syscall_intercept"]
2 | 	path = syscall_intercept
3 | 	url = https://github.com/pmem/syscall_intercept.git
4 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "sealfs"
 3 | version = "0.1.0"
 4 | edition = "2021"
 5 | authors = ["The Sealfs Developers"]
 6 | license = "Apache-2.0"
 7 | 
 8 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 9 | 
10 | [dependencies]
11 | tokio = { version = "1.21.2", features = ["full"] }
12 | lazy_static = "1.4.0"
13 | anyhow = "1.0.66"
14 | thiserror = "1.0.37"
15 | log = "0.4.17"
16 | clap = { version = "=4.0.18", features = ["derive"] }
17 | env_logger = "0.9.1"
18 | prost = "0.11.0"
19 | serde = { version = "1", features = ["derive"] }
20 | serde_yaml = "0.9.14"
21 | # tonic-health = "0.7.1"
22 | dashmap = "5.4.0"
23 | async-trait = "0.1.73"
24 | nix = "0.26.1"
25 | rocksdb = "0.19.0"
26 | bincode = "1.3.3"
27 | ahash = "0.8.3"
28 | parking_lot = "0.12.1"
29 | fuser = "0.11.1"
30 | libc = "0.2"
31 | wyhash = "0.5.0"
32 | kanal = "0.1.0-pre8"
33 | rand = "0.8.5"
34 | pegasusdb = { git = "https://github.com/uran0sH/pegasusdb.git" }
35 | bytes = "1.4.0"
36 | ibv = { git = "https://github.com/mond77/ibv.git" }
37 | conhash = '0.5.0'
38 | spin = "0.5"
39 | 
40 | [build-dependencies]
41 | tonic-build = "0.8"
42 | 
43 | [dev-dependencies]
44 | tonic = "0.8.2"
45 | core_affinity = "0.8.0"
46 | criterion = "0.4"
47 | 
48 | [[bin]]
49 | name = "client"
50 | path = "src/bin/client.rs"
51 | 
52 | [[bin]]
53 | name = "server"
54 | path = "src/bin/server.rs"
55 | 
56 | [[bin]]
57 | name = "manager"
58 | path = "src/bin/manager.rs"
59 | 
60 | [workspace]
61 | members = [
62 |     "intercept",
63 | ]
64 | 
65 | [features]
66 | disk-db = []
67 | mem-db = []
68 | 
69 | [[bench]]
70 | name = "rpc"
71 | harness = false
72 | 
73 | [[bench]]
74 | name = "grpc"
75 | harness = false
76 | 
77 | [[bench]]
78 | name = "local_storage"
79 | harness = false
80 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | target_dir = target/debug
 2 | features := disk-db
 3 | flags += --workspace --verbose --features=$(features)
 4 | deps = 	pkg-config protobuf-compiler clang libfuse-dev libcapstone-dev 	\
 5 | 		iproute2 perftest build-essential net-tools cmake pandoc 		\
 6 | 		libnl-3-dev libnl-route-3-dev libibverbs-dev
 7 | 
 8 | all_release: install_deps release
 9 | all_debug: install_deps debug
10 | 
11 | install_deps:
12 | 	sudo apt update && sudo apt-get install $(deps) -y
13 | 
14 | release: 
15 | 	cargo build $(flags) --release
16 | 
17 | build:
18 | 	cargo build $(flags)
19 | 
20 | test:
21 | 	cargo test --features=$(features)
22 | 
23 | images: manager-image server-image client-image
24 | 
25 | manager-image:
26 | 	docker build -t manager -f docker/manager/Dockerfile . --no-cache
27 | 
28 | server-image:
29 | 	docker build -t server -f docker/server/Dockerfile .
30 | 
31 | client-image:
32 | 	docker build -t client -f docker/client/Dockerfile .


--------------------------------------------------------------------------------
/README-ZH.MD:
--------------------------------------------------------------------------------
 1 | # SEALFS
 2 | 
 3 | [sealos](https://github.com/labring/sealos)的存储底座，我们希望构建一个适用于云原生的高性能，高可靠，弹性伸缩的分布式文件存储
 4 | 
 5 | ## 系统架构
 6 | sealfs的架构为无中心架构，且无独立的元数据节点，我们希望尽最大可能提升读写性能和解决存储海量小文件问题
 7 | 
 8 | - server负责文件以及元数据存储，元数据无疑是分布式文件系统的热点文件，所以我们采用分挂盘的方式对数据和元数据进行存储，用户可以选择更好的硬件对元数据进行存储。
 9 | - client实现用户态的文件系统，对文件请求进行拦截并通过哈希算法进行存储寻址。
10 | - manager负责协调集群。
11 | 
12 | 设计图如下：
13 | ![](docs/images/architecture.jpg)
14 | 
15 | ### 全链路用户态
16 | 我们希望结合特定硬件从客户端文件请求劫持到网络到存储打造一个全链路用户态的分布式文件存储系统，从而获得极致的性能体验。
17 | 
18 | 更多设计可以参考：
19 | ### 设计文档
20 | [设计文档](https://github.com/labring/sealfs/blob/main/docs/README-ZH.MD)
21 | 
22 | ## 实现计划
23 | 目前我们致力于全链路性能提升，对于其他方面的建设如高可靠性以及高可用性的优先级会低一些
24 | - 第一版功能实现：
25 |   - 客户端:
26 |     - [ ] fuse文件系统接口
27 |     - [ ] 系统调用劫持（用户态文件系统）
28 |     - [ ] 选址算法
29 |     - [ ] 批处理
30 | 
31 |   - 服务端:
32 |     - [ ] 绕过本地文件系统
33 |     - [ ] 磁盘管理
34 |     - [ ] 目录管理
35 |     - [ ] 元数据持久化内存存储
36 |     - [ ] 文件索引
37 |     - [ ] 文件锁
38 |     - [ ] 持久化数据结构
39 |     
40 |   - 协调节点:
41 |     - [ ] 心跳管理
42 |     
43 |   - 网络:
44 |     - [ ] RDMA
45 |     - [ ] socket网络通信
46 | 
47 |   - 测试
48 |     - [ ] IO500
49 |     - [ ] 功能测试
50 | 
51 | ## 编译
52 | 
53 | rust版本 1.68
54 | 
55 | ```bash
56 | cargo build
57 | ```
58 | 
59 | ## 快速使用
60 | 
61 | ### 启动管理节点
62 | 
63 | ```bash
64 | # edit manager.yaml
65 | vi examples/manager.yaml
66 | 
67 | # start manager with manager.yaml
68 | SEALFS_CONFIG_PATH=./examples ./target/debug/manager &
69 | ```
70 | 
71 | ### 启动一个服务器
72 | 
73 | ```bash
74 | ./target/debug/server --manager-address <manager_ip>:<manager_port> --server-address <server_ip>:<server_port> --database-path <local_database_dir> --storage-path <local_storage_dir> --log-level warn &
75 | ```
76 | 
77 | ### 启动一个客户端
78 | 
79 | ```bash
80 | ./target/debug/client --manager-address <manager_ip>:<manager_port> --log-level warn daemon
81 | ```
82 | 
83 | ### 创建并挂载磁盘
84 | 
85 | ```bash
86 | ./target/debug/client --manager-address <manager_ip>:<manager_port> --log-level warn create test1 100000
87 | ./target/debug/client --log-level warn mount ~/fs test1
88 | ```
89 | 
90 | ## 开源协议
91 | [Apache License 2.0](https://github.com/labring/sealfs/blob/main/LICENSE)


--------------------------------------------------------------------------------
/README.MD:
--------------------------------------------------------------------------------
  1 | # SEALFS
  2 | [English](https://github.com/labring/sealfs/blob/main/README.MD) | [简体
  3 | 中文](https://github.com/labring/sealfs/blob/main/README-ZH.MD)
  4 | 
  5 | The storage system of [sealos](https://github.com/labring/sealos),
  6 | aiming to be a high-performance, highly reliable and auto-scalable
  7 | distributed file system which fits the cloud native environment.
  8 | 
  9 | ## System Architecture
 10 | 
 11 | The architecture of sealfs is decentralized, and there is no single
 12 | metadata node. sealfs hopes to improve the read and write performance
 13 | as much as possible and solve the problems of storing large amounts of
 14 | small files.
 15 | 
 16 | ### Main Components
 17 | Sealfs consists of the following three components:
 18 | 
 19 | #### Server
 20 | 
 21 | Server component is responsible for storing files and metadata. sealfs
 22 | separates data and metadata into different disks, since metadata is
 23 | undoubtedly the hot file on distributed file-system. This way, users
 24 | can choose better hardware to store metadata.
 25 | 
 26 | #### Client
 27 | 
 28 | Client component implements the file-system in user mode. It
 29 | intercepts file requests, stores, and addresses them through hash
 30 | algorithms.
 31 | 
 32 | #### Manager
 33 | 
 34 | Manager component is responsible for coordinating the cluster.
 35 | 
 36 | 
 37 | The System Architecture can be shown as follow:
 38 | ![](docs/images/architecture.jpg)
 39 | 
 40 | ### User Mode All The Way
 41 | 
 42 | With specific hardware, sealos hopes to support user-mode completely,
 43 | from file request hijacking on the client side, to the network, and to
 44 | the storage, for maximum performance improvement.
 45 | 
 46 | More designs can be referred to:
 47 | ### Design Document
 48 | [design document](https://github.com/labring/sealfs/blob/main/docs/README.MD)
 49 | 
 50 | ## RoadMap
 51 | Currently, we are committed to improving the performance
 52 | thoroughly. For other design aspects, such as high reliability and
 53 | high availability, the priority would be lower.
 54 | 
 55 | - first version Function:
 56 |   - Client:
 57 |     - [ ] fuse file system interface
 58 |     - [ ] System call hijacking(file system of user mode)
 59 |     - [ ] location algorithm
 60 |     - [ ] batch process
 61 | 
 62 |   - Sever:
 63 |     - [ ] bypass  file system
 64 |     - [ ] file Storage
 65 |     - [ ] disk manager
 66 |     - [ ] catalogue manager
 67 |     - [ ] Metadata persistent memory storage
 68 |     - [ ] file index
 69 |     - [ ] file lock
 70 |     - [ ] Persistent data structure
 71 | 
 72 |   - Manger:
 73 |     - [ ] heart manager
 74 | 
 75 |   - Network:
 76 |     - [ ] RDMA
 77 |     - [ ] socket network
 78 | 
 79 |   - Test
 80 |     - [ ] IO500
 81 |     - [ ] function test
 82 | 
 83 | ## Compile
 84 | 
 85 | rust version 1.68
 86 | 
 87 | ```bash
 88 | make build
 89 | ```
 90 | 
 91 | ## Quick Start
 92 | 
 93 | ### Start Manager
 94 | 
 95 | ```bash
 96 | # edit manager.yaml
 97 | vi examples/manager.yaml
 98 | 
 99 | # start manager with manager.yaml
100 | SEALFS_CONFIG_PATH=./examples ./target/debug/manager &
101 | ```
102 | 
103 | ### Start Servers on a Node
104 | 
105 | ```bash
106 | ./target/debug/server --manager-address <manager_ip>:<manager_port> --server-address <server_ip>:<server_port> --database-path <local_database_dir> --storage-path <local_storage_dir> --log-level warn &
107 | ```
108 | 
109 | ### Start Client on a Node
110 | 
111 | ```bash
112 | ./target/debug/client --log-level warn daemon
113 | ```
114 | 
115 | ### Create & Mount Disk
116 | 
117 | ```bash
118 | ./target/debug/client --log-level warn create test1 100000
119 | ./target/debug/client --log-level warn mount ~/fs test1
120 | ```
121 | 
122 | ## LICENSE
123 | [Apache License 2.0](https://github.com/labring/sealfs/blob/main/LICENSE)
124 | 


--------------------------------------------------------------------------------
/benches/block_device/main.rs:
--------------------------------------------------------------------------------
 1 | //! run the benchmark with:
 2 | //!     cargo bench --bench block_device --features=disk-db
 3 | 
 4 | use std::sync::Arc;
 5 | 
 6 | use criterion::{criterion_group, criterion_main, Criterion};
 7 | use sealfs::server::storage_engine::{block_engine::BlockEngine, meta_engine, StorageEngine};
 8 | use std::process::Command;
 9 | 
10 | fn write_file(engine: &BlockEngine, n: isize) {
11 |     (0..n).for_each(|_| {
12 |         let bytes = vec![1u8; 10240];
13 |         engine.write_file("test", bytes.as_slice(), 0).unwrap();
14 |     })
15 | }
16 | 
17 | fn read_file(engine: &BlockEngine, n: isize) {
18 |     (0..n * 10).for_each(|_| {
19 |         engine.read_file("test", 10240, 0).unwrap();
20 |     })
21 | }
22 | 
23 | fn criterion_benchmark(c: &mut Criterion) {
24 |     Command::new("bash")
25 |         .arg("-c")
26 |         .arg("dd if=/dev/zero of=node1 bs=4M count=1")
27 |         .output()
28 |         .unwrap();
29 |     Command::new("bash")
30 |         .arg("-c")
31 |         .arg("losetup /dev/loop8 node1")
32 |         .output()
33 |         .unwrap();
34 |     let meta_engine = Arc::new(meta_engine::MetaEngine::new(
35 |         "/tmp/bench/db",
36 |         128 << 20,
37 |         128 * 1024 * 1024,
38 |     ));
39 |     let engine = BlockEngine::new("/dev/loop8", meta_engine);
40 | 
41 |     c.bench_function("block device test", |b| {
42 |         b.iter(|| {
43 |             write_file(&engine, 512);
44 |             read_file(&engine, 512);
45 |         })
46 |     });
47 |     Command::new("bash")
48 |         .arg("-c")
49 |         .arg("losetup -d /dev/loop8")
50 |         .output()
51 |         .unwrap();
52 |     Command::new("bash")
53 |         .arg("-c")
54 |         .arg("rm node1")
55 |         .output()
56 |         .unwrap();
57 | }
58 | 
59 | criterion_group!(benches, criterion_benchmark);
60 | criterion_main!(benches);
61 | 


--------------------------------------------------------------------------------
/benches/grpc/client.rs:
--------------------------------------------------------------------------------
 1 | #![allow(unused)]
 2 | 
 3 | use hello_world::greeter_client::GreeterClient;
 4 | use hello_world::HelloRequest;
 5 | 
 6 | use tonic::{transport::Server, Request, Response, Status};
 7 | 
 8 | pub mod hello_world {
 9 |     tonic::include_proto!("helloworld");
10 | }
11 | 
12 | pub fn gcli(total: u32) {
13 |     let mut rt = tokio::runtime::Runtime::new().unwrap();
14 | 
15 |     let client = rt
16 |         .block_on(GreeterClient::connect("http://[::1]:50051"))
17 |         .unwrap();
18 |     let mut handles = Vec::with_capacity(50);
19 | 
20 |     let mut data = [0u8; 50];
21 |     let data = String::from_utf8(data.to_vec()).unwrap();
22 |     for i in 0..total {
23 |         let out = data.clone();
24 |         let client_clone = client.clone();
25 |         handles.push(rt.spawn(async move {
26 |             let request = tonic::Request::new(HelloRequest {
27 |                 id: i,
28 |                 r#type: 0,
29 |                 flags: 0,
30 |                 filename: "".to_string(),
31 |                 meta_data: "".to_string(),
32 |                 data: out,
33 |             });
34 |             let response = client_clone.to_owned().say_hello(request).await.unwrap();
35 |             // debug!("call_remote, result: {:?}", result);
36 |             let reply = response.into_inner();
37 |             if reply.id != i || reply.status != 1 {
38 |                 println!("Error reply")
39 |             }
40 |         }));
41 |     }
42 |     for handle in handles {
43 |         rt.block_on(handle).unwrap();
44 |     }
45 | }
46 | 


--------------------------------------------------------------------------------
/benches/grpc/main.rs:
--------------------------------------------------------------------------------
 1 | //! run the benchmark with:
 2 | //!     cargo bench --bench grpc
 3 | 
 4 | mod client;
 5 | mod server;
 6 | 
 7 | use client::gcli;
 8 | use criterion::{criterion_group, criterion_main, Criterion};
 9 | use server::server;
10 | 
11 | extern crate core_affinity;
12 | 
13 | fn grpc_benchmark(c: &mut Criterion) {
14 |     std::thread::spawn(|| {
15 |         let core_ids = core_affinity::get_core_ids().unwrap();
16 |         let core_id0 = core_ids[0];
17 |         core_affinity::set_for_current(core_id0);
18 |         match server() {
19 |             Ok(()) => println!("server start succeed!"),
20 |             Err(_) => println!("server start failed!"),
21 |         };
22 |     });
23 |     // wait for server to start
24 |     std::thread::sleep(std::time::Duration::from_secs(5));
25 |     // c.bench_function("grpc_bench0", |b| b.iter(|| gcli(0)));
26 |     // c.bench_function("grpc_bench10", |b| b.iter(|| gcli(10)));
27 |     // c.bench_function("grpc_bench100", |b| b.iter(|| gcli(100)));
28 |     // c.bench_function("grpc_bench1000", |b| b.iter(|| gcli(1000)));
29 |     // c.bench_function("grpc_bench10000", |b| b.iter(|| gcli(10000)));
30 |     c.bench_function("grpc_bench100000", |b| b.iter(|| gcli(100000)));
31 | }
32 | 
33 | criterion_group!(
34 |     name=benches;
35 |     config=Criterion::default().significance_level(0.1).sample_size(10);
36 |     targets = grpc_benchmark
37 | );
38 | criterion_main!(benches);
39 | 


--------------------------------------------------------------------------------
/benches/grpc/mod.rs:
--------------------------------------------------------------------------------
1 | mod client;
2 | mod server;
3 | 


--------------------------------------------------------------------------------
/benches/grpc/server.rs:
--------------------------------------------------------------------------------
 1 | #![allow(unused)]
 2 | use hello_world::greeter_server::{Greeter, GreeterServer};
 3 | use hello_world::{HelloReply, HelloRequest};
 4 | 
 5 | use tonic::{transport::Server, Request, Response, Status};
 6 | 
 7 | pub mod hello_world {
 8 |     tonic::include_proto!("helloworld");
 9 | }
10 | 
11 | #[derive(Debug, Default)]
12 | pub struct MyGreeter {}
13 | 
14 | #[tonic::async_trait]
15 | impl Greeter for MyGreeter {
16 |     async fn say_hello(
17 |         &self,
18 |         request: Request<HelloRequest>,
19 |     ) -> Result<Response<HelloReply>, Status> {
20 |         let message = request.into_inner();
21 |         // println!("Got a request: {} {}", message.id, &message.data[0..5]);
22 | 
23 |         let reply = hello_world::HelloReply {
24 |             id: message.id,
25 |             status: 1,
26 |             flags: 0,
27 |             meta_data: "".to_string(),
28 |             data: "".to_string(),
29 |         };
30 | 
31 |         Ok(Response::new(reply))
32 |     }
33 | }
34 | 
35 | #[tokio::main]
36 | pub async fn server() -> Result<(), Box<dyn std::error::Error>> {
37 |     let addr = "[::1]:50051".parse()?;
38 |     let greeter = MyGreeter::default();
39 | 
40 |     Server::builder()
41 |         .add_service(GreeterServer::new(greeter))
42 |         .serve(addr)
43 |         .await?;
44 | 
45 |     Ok(())
46 | }
47 | 


--------------------------------------------------------------------------------
/benches/local_storage/main.rs:
--------------------------------------------------------------------------------
 1 | //! run the benchmark with:
 2 | //!     cargo bench --bench local_storage
 3 | 
 4 | use std::sync::Arc;
 5 | 
 6 | use criterion::{criterion_group, criterion_main, Criterion};
 7 | use rand::prelude::*;
 8 | use sealfs::server::storage_engine::{
 9 |     file_engine::{self, FileEngine},
10 |     meta_engine, StorageEngine,
11 | };
12 | 
13 | fn create_file(engine: &FileEngine, n: isize) {
14 |     let mode = 0o777;
15 |     let oflag = libc::O_CREAT | libc::O_RDWR;
16 |     (0..n).for_each(|i| {
17 |         engine
18 |             .create_file(i.to_string().as_str(), oflag, 0, mode)
19 |             .unwrap();
20 |     })
21 | }
22 | 
23 | fn delete_file(engine: &FileEngine, n: isize) {
24 |     (0..n).for_each(|i| {
25 |         engine.delete_file(i.to_string().as_str()).unwrap();
26 |     })
27 | }
28 | 
29 | fn write_file(engine: &FileEngine, n: isize) {
30 |     (0..n).for_each(|_| {
31 |         let mut rng = rand::thread_rng();
32 |         let i: usize = rng.gen::<usize>() % n as usize;
33 |         let bytes = vec![1u8; 10240];
34 |         engine
35 |             .write_file(i.to_string().as_str(), bytes.as_slice(), 0)
36 |             .unwrap();
37 |     })
38 | }
39 | 
40 | fn read_file(engine: &FileEngine, n: isize) {
41 |     (0..n * 10).for_each(|_| {
42 |         let mut rng = rand::thread_rng();
43 |         let i: usize = rng.gen::<usize>() % n as usize;
44 |         let _ = engine.read_file(i.to_string().as_str(), 10240, 0).unwrap();
45 |     })
46 | }
47 | 
48 | fn criterion_benchmark(c: &mut Criterion) {
49 |     let meta_engine = Arc::new(meta_engine::MetaEngine::new(
50 |         "/tmp/bench/db",
51 |         128 << 20,
52 |         128 * 1024 * 1024,
53 |     ));
54 |     let engine = file_engine::FileEngine::new("/tmp/bench/root", meta_engine);
55 | 
56 |     c.bench_function("default engine file 512", |b| {
57 |         b.iter(|| {
58 |             create_file(&engine, 5120);
59 |             write_file(&engine, 5120);
60 |             read_file(&engine, 5120);
61 |             delete_file(&engine, 5120);
62 |         })
63 |     });
64 | }
65 | 
66 | criterion_group!(benches, criterion_benchmark);
67 | criterion_main!(benches);
68 | 


--------------------------------------------------------------------------------
/benches/mod.rs:
--------------------------------------------------------------------------------
1 | mod grpc;
2 | mod rpc;
3 | 


--------------------------------------------------------------------------------
/benches/rpc/client.rs:
--------------------------------------------------------------------------------
  1 | #![allow(unused)]
  2 | 
  3 | use sealfs::rpc::client::{RpcClient, TcpStreamCreator};
  4 | use std::{sync::Arc, time::Duration};
  5 | 
  6 | pub fn cli(total: u32) {
  7 |     let runtime = tokio::runtime::Builder::new_multi_thread()
  8 |         .enable_all()
  9 |         .build()
 10 |         .unwrap();
 11 |     runtime.block_on(run_cli_without_data(total));
 12 | }
 13 | 
 14 | pub fn cli_size(total: u32, size: usize) {
 15 |     let runtime = tokio::runtime::Builder::new_multi_thread()
 16 |         .enable_all()
 17 |         .build()
 18 |         .unwrap();
 19 |     runtime.block_on(run_cli_with_data_size(total, size));
 20 | }
 21 | 
 22 | pub async fn run_cli_without_data(total: u32) {
 23 |     let rt = tokio::runtime::Handle::current();
 24 |     let mut handles = Vec::with_capacity(total as usize);
 25 | 
 26 |     let server_address = "127.0.0.1:50052";
 27 |     let client: Arc<
 28 |         RpcClient<
 29 |             tokio::net::tcp::OwnedReadHalf,
 30 |             tokio::net::tcp::OwnedWriteHalf,
 31 |             TcpStreamCreator,
 32 |         >,
 33 |     > = Arc::new(RpcClient::new());
 34 |     client.add_connection(server_address).await;
 35 | 
 36 |     for i in 0..total {
 37 |         let new_client = client.clone();
 38 |         handles.push(rt.spawn(async move {
 39 |             let mut status = 0;
 40 |             let mut rsp_flags = 0;
 41 |             let mut recv_meta_data_length = 0;
 42 |             let mut recv_data_length = 0;
 43 |             let mut recv_meta_data = vec![];
 44 |             let mut recv_data = vec![];
 45 |             // debug!("call_remote, start");
 46 |             let result = new_client
 47 |                 .call_remote(
 48 |                     server_address,
 49 |                     0,
 50 |                     i,
 51 |                     "",
 52 |                     &[],
 53 |                     &[],
 54 |                     &mut status,
 55 |                     &mut rsp_flags,
 56 |                     &mut recv_meta_data_length,
 57 |                     &mut recv_data_length,
 58 |                     &mut recv_meta_data,
 59 |                     &mut recv_data,
 60 |                     Duration::from_secs(10),
 61 |                 )
 62 |                 .await;
 63 |             // debug!("call_remote, result: {:?}", result);
 64 |             match result {
 65 |                 Ok(_) => {
 66 |                     if status == 0 {
 67 |                         // let data = String::from_utf8(recv_data).unwrap();
 68 |                         // println!("result: {}, data: {}", i, data);
 69 |                     } else {
 70 |                         println!("Error: {}", status);
 71 |                     }
 72 |                 }
 73 |                 Err(e) => {
 74 |                     println!("Error: {}", e);
 75 |                 }
 76 |             }
 77 |         }));
 78 |     }
 79 |     for handle in handles {
 80 |         handle.await;
 81 |     }
 82 |     client.close();
 83 | }
 84 | async fn run_cli_with_data_size(total: u32, size: usize) {
 85 |     let rt = tokio::runtime::Handle::current();
 86 |     let mut handles = Vec::with_capacity(total as usize);
 87 | 
 88 |     let server_address = "127.0.0.1:50052";
 89 |     let client: Arc<
 90 |         RpcClient<
 91 |             tokio::net::tcp::OwnedReadHalf,
 92 |             tokio::net::tcp::OwnedWriteHalf,
 93 |             TcpStreamCreator,
 94 |         >,
 95 |     > = Arc::new(RpcClient::new());
 96 |     client.add_connection(server_address).await;
 97 |     let data = vec![0u8; size];
 98 |     for i in 0..total {
 99 |         let new_client = client.clone();
100 |         let data = data.clone();
101 |         handles.push(rt.spawn(async move {
102 |             let mut status = 0;
103 |             let mut rsp_flags = 0;
104 |             let mut recv_meta_data_length = 0;
105 |             let mut recv_data_length = 0;
106 |             let mut recv_meta_data = vec![];
107 |             let mut recv_data = vec![];
108 |             // debug!("call_remote, start");
109 |             let result = new_client
110 |                 .call_remote(
111 |                     server_address,
112 |                     0,
113 |                     i,
114 |                     "",
115 |                     &[],
116 |                     &data,
117 |                     &mut status,
118 |                     &mut rsp_flags,
119 |                     &mut recv_meta_data_length,
120 |                     &mut recv_data_length,
121 |                     &mut recv_meta_data,
122 |                     &mut recv_data,
123 |                     Duration::from_secs(10),
124 |                 )
125 |                 .await;
126 |             // debug!("call_remote, result: {:?}", result);
127 |             match result {
128 |                 Ok(_) => {
129 |                     if status == 0 {
130 |                         // let data = String::from_utf8(recv_data).unwrap();
131 |                         // println!("result: {}, data: {}", i, data);
132 |                     } else {
133 |                         println!("Error: {}", status);
134 |                     }
135 |                 }
136 |                 Err(e) => {
137 |                     println!("Error: {}", e);
138 |                 }
139 |             }
140 |         }));
141 |     }
142 |     for handle in handles {
143 |         handle.await;
144 |     }
145 |     client.close();
146 | }
147 | 


--------------------------------------------------------------------------------
/benches/rpc/main.rs:
--------------------------------------------------------------------------------
 1 | //! run the benchmark with:
 2 | //!     cargo bench --bench rpc
 3 | 
 4 | #![allow(unused)]
 5 | mod client;
 6 | mod server;
 7 | use client::{cli, cli_size};
 8 | use criterion::{criterion_group, criterion_main, Criterion};
 9 | 
10 | use server::server;
11 | fn rpc_benchmark(c: &mut Criterion) {
12 |     std::thread::spawn(|| {
13 |         let core_ids = core_affinity::get_core_ids().unwrap();
14 |         let core_id0 = core_ids[0];
15 |         core_affinity::set_for_current(core_id0);
16 |         match server() {
17 |             Ok(()) => println!("server start succeed!"),
18 |             Err(_) => println!("server start failed!"),
19 |         };
20 |     });
21 |     // wait for server to start
22 |     std::thread::sleep(std::time::Duration::from_secs(5));
23 |     // c.bench_function("rpc_bench0", |b| b.iter(|| cli(0)));
24 |     // c.bench_function("rpc_bench10", |b| b.iter(|| cli(10)));
25 |     // c.bench_function("rpc_bench100", |b| b.iter(|| cli(100)));
26 |     // c.bench_function("rpc_bench1000", |b| b.iter(|| cli(1000)));
27 |     c.bench_function("rpc_bench100000", |b| b.iter(|| cli(100000)));
28 |     // c.bench_function("rpc_bench100000_without_data", |b| b.iter(|| cli(100000)));
29 |     // c.bench_function("rpc_bench100000_data_size_1024", |b| {
30 |     //     b.iter(|| cli_size(100000, 1024))
31 |     // });
32 |     // c.bench_function("rpc_bench100000_data_size_1024_4", |b| {
33 |     //     b.iter(|| cli_size(100000, 1024 * 4))
34 |     // });
35 |     // c.bench_function("rpc_bench100000_data_size_1024_16", |b| {
36 |     //     b.iter(|| cli_size(100000, 1024 * 16))
37 |     // });
38 |     // c.bench_function("rpc_bench100000_data_size_1024_64", |b| {
39 |     //     b.iter(|| cli_size(100000, 1024 * 64))
40 |     // });
41 |     // c.bench_function("rpc_bench100000_data_size_1024_256", |b| {
42 |     //     b.iter(|| cli_size(100000,1024*256))
43 |     // });
44 |     // c.bench_function("rpc_bench100000_data_size_1024_1024", |b| {
45 |     //     b.iter(|| cli_size(100000,1024*1024))
46 |     // });
47 | }
48 | 
49 | criterion_group!(
50 |     name=benches;
51 |     config=Criterion::default().significance_level(0.1).sample_size(10);
52 |     targets = rpc_benchmark
53 | );
54 | criterion_main!(benches);
55 | 


--------------------------------------------------------------------------------
/benches/rpc/mod.rs:
--------------------------------------------------------------------------------
1 | mod client;
2 | mod server;
3 | 


--------------------------------------------------------------------------------
/benches/rpc/server.rs:
--------------------------------------------------------------------------------
 1 | #![allow(unused)]
 2 | 
 3 | use async_trait::async_trait;
 4 | use sealfs::rpc::server::{Handler, RpcServer};
 5 | use std::{sync::Arc, vec};
 6 | use tokio::sync::Mutex;
 7 | pub struct HelloHandler {}
 8 | 
 9 | impl HelloHandler {
10 |     pub fn new() -> Self {
11 |         Self {}
12 |     }
13 | }
14 | 
15 | // lazy_static::lazy_static! {
16 | //     static ref HELLO_COUNT: Arc<Mutex<u32>> = Arc::new(Mutex::new(0));
17 | // }
18 | 
19 | #[async_trait]
20 | impl Handler for HelloHandler {
21 |     async fn dispatch(
22 |         &self,
23 |         _conn_id: u32,
24 |         operation_type: u32,
25 |         _flags: u32,
26 |         _path: Vec<u8>,
27 |         _data: Vec<u8>,
28 |         _metadata: Vec<u8>,
29 |     ) -> anyhow::Result<(i32, u32, usize, usize, Vec<u8>, Vec<u8>)> {
30 |         // debug!("dispatch, operation_type: {}", operation_type);
31 |         // debug!("dispatch, path: {:?}", path);
32 |         // debug!("dispatch, data: {:?}", data);
33 |         match operation_type {
34 |             0 => {
35 |                 // let success = String::from("Success").into_bytes();
36 |                 Ok((0, 0, 0, 0, vec![], vec![]))
37 |             }
38 |             _ => {
39 |                 todo!()
40 |             }
41 |         }
42 |     }
43 | }
44 | 
45 | #[tokio::main]
46 | pub async fn server() -> anyhow::Result<()> {
47 |     // let mut builder = env_logger::Builder::from_default_env();
48 |     // builder
49 |     //     .format_timestamp(None)
50 |     //     .filter(None, log::LevelFilter::Debug);
51 |     // builder.init();
52 | 
53 |     let server = RpcServer::new(Arc::new(HelloHandler::new()), "127.0.0.1:50052");
54 |     server.run().await?;
55 |     Ok(())
56 | }
57 | 


--------------------------------------------------------------------------------
/build.rs:
--------------------------------------------------------------------------------
1 | fn main() -> Result<(), Box<dyn std::error::Error>> {
2 |     tonic_build::compile_protos("proto/test.proto")?;
3 |     Ok(())
4 | }
5 | 


--------------------------------------------------------------------------------
/docker/client/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM debian:bullseye-20221205
 2 | 
 3 | RUN apt update && apt upgrade -y && apt-mark unhold libcap2 && \
 4 |     apt install -y fuse3 libfuse3-3 libfuse2 libibverbs1 && \
 5 |     apt clean && \
 6 |     rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
 7 | 
 8 | COPY target/debug/client /usr/local/bin/client
 9 | 
10 | ENTRYPOINT ["/usr/local/bin/client"]
11 | 


--------------------------------------------------------------------------------
/docker/manager/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM debian:bullseye-20221205
 2 | 
 3 | RUN apt update && apt upgrade -y && apt-mark unhold libcap2 && \
 4 |     apt install -y libfuse3-3 libfuse2 libibverbs1 && \
 5 |     apt clean && \
 6 |     rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
 7 | 
 8 | COPY target/debug/manager /usr/local/bin/manager
 9 | 
10 | ENTRYPOINT ["/usr/local/bin/manager"]
11 | 


--------------------------------------------------------------------------------
/docker/server/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM debian:bullseye-20221205
 2 | 
 3 | RUN apt update && apt upgrade -y && apt-mark unhold libcap2 && \
 4 |     apt install -y libfuse3-3 libfuse2 libibverbs1 && \
 5 |     apt clean && \
 6 |     rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
 7 | 
 8 | COPY target/debug/server /usr/local/bin/server
 9 | 
10 | ENTRYPOINT ["/usr/local/bin/server"]
11 | 


--------------------------------------------------------------------------------
/docs/RDMA.md:
--------------------------------------------------------------------------------
 1 | ## Communication Principle of RDMA in sealfs
 2 | 
 3 | GitHub Repository: https://github.com/mond77/ibv.git
 4 | 
 5 | ### Connection Establishment
 6 | 
 7 | The endpoint device addresses and RecvBuffer memory addresses are exchanged via TCP.
 8 | 
 9 | RemoteBufManager is the allocator of the remote RecvBuffer.
10 | 
11 | Both SendBuffer and RecvBuffer are memory regions that have been registered with `ibv_reg_mr`.
12 | 
13 | ### send_msg() Method
14 | 
15 | `fn send_msg(&self, msg: &[IoSlice<'_>]) -> io::Result<()> `
16 | 
17 | The main process of the send_msg() method includes:
18 | 
19 | 1. Allocation of SendBuffer
20 | 2. Locking
21 | 3. Allocation of RemoteBuf
22 | 4. Issuing of WR (work request)
23 | 5. Unlocking
24 | 
25 | Each request/response corresponds to a write_with_imm operation, which generates a WC (work completion) on both the local and remote ends. write_with_imm consumes one RQE (Receive Queue Element) on the remote end. The WC type of the remote end is write_with_imm, and the WC type of the local end is write.
26 | 
27 | ### recv_msg() Method
28 | 
29 | `fn recv_msg(&self) -> io::Result<&[u8]> `
30 | 
31 | The main process of the recv_msg() method includes:
32 | 
33 | 1. A task in the polling background that polls a CQ (Completion Queue) notifies the task blocked in recv_msg() when a type write_with_imm WC is received.
34 | 2. The task then reads the data of this write.
35 | 3. The returned & [u8] points to the data located on the RecvBuffer.
36 | 
37 | ### Memory Management
38 | 
39 | #### SendBuffer
40 | 
41 | SendBuffer is linearly allocated, and when released, it is marked with AtomicBool, greatly reducing the complexity of memory management. A release_task is used to maintain the linear release order of each allocated memory block and determine whether it is released or not using AtomicBool.
42 | 
43 | #### RecvBuffer
44 | 
45 | RecvBuffer is linearly allocated and released.
46 | 
47 | 


--------------------------------------------------------------------------------
/docs/README-ZH.MD:
--------------------------------------------------------------------------------
  1 | # sealfs设计文档
  2 | 
  3 | ## 系统架构
  4 | sealfs的架构为无中心架构，且无独立的元数据节点
  5 | sealfs包含三个组件，
  6 | - server负责文件以及元数据存储，元数据无疑是分布式文件系统的热点文件，所以我们采用分挂盘的方式对数据和元数据进行存储，用户可以选择更好的硬件对元数据进行存储。
  7 | - client实现用户态的文件系统，对文件请求进行拦截并通过哈希算法进行存储寻址。
  8 | - manager负责协调集群。
  9 | 设计图如下：
 10 | ![](images/architecture.jpg)
 11 | 
 12 | ### 全链路用户态
 13 | 我们希望结合特定硬件从客户端文件请求劫持到网络到存储打造一个全链路用户态的分布式文件存储系统，从而获得极致的性能体验。
 14 | 
 15 | ## 客户端
 16 | 在客户端处，我们支持了两种类型的文件系统，一种是比较常见的fuse，另一种是用户态的文件系统，我们希望以这种方式提升性能。
 17 | 
 18 | ### fuse
 19 | #### 内核文件系统
 20 | 为了减少调用次数，一种实现方案则是直接使用内核态实现文件系统
 21 | 
 22 | 1. 用户态请求
 23 | 2. VFS
 24 | 3. 内核态文件系统
 25 | 4. 网络传输
 26 | 
 27 | 这种方案可以将内核态和用户态切换次数减少，但缺点也显而易见：
 28 | 1. 内核编程的调试复杂
 29 | 2. 需要为客户端安装额外内核模块
 30 | 3. 文件系统崩溃影响其他进程
 31 | 
 32 | #### fuse
 33 | 为了避免上述问题，第一种方式我们采用容易实现且易于使用的fuse。
 34 | 
 35 | ![alt fuse](https://imgconvert.csdnimg.cn/aHR0cHM6Ly9tbWJpei5xcGljLmNuL21tYml6X3BuZy9kNGhvWUpseE9qTnNvaWNRQkUwM01aRDBrWjNmY3VpYWVRZzJmV1RlNFlWV3RUYko5aWN1cG1iZ1IwZGd1RUlrTTloTzZzaWJQdU80VTlFNzlpYWczWWljdlE4US82NDA?x-oss-process=image/format,png)
 36 | 
 37 | 在fuse中，通过网络进行文件存储的一次调用流程包括：
 38 | 1. 用户态请求
 39 | 2. VFS
 40 | 3. fuse driver
 41 | 4. fuse library
 42 | 5. 网络传输
 43 | 
 44 | 需要注意的是，在避免上述问题的同时，fuse也降低了文件系统的性能。
 45 | 
 46 | 
 47 | ### 系统调用劫持
 48 | 这是我们实现的第二种方案，即实现一个用户态的文件系统。在上图中，可以看到，用户请求并不是直接交给linux内核的，而是经过了glibc(或其他libc库)来提交系统调用，这意味着可以在libc层替换系统调用的地址，实现系统调用劫持。  
 49 | 1. 用户态请求
 50 | 2. 系统调用劫持
 51 | 3. 网络传输
 52 | 
 53 | ## 网络
 54 | 对于网络部分，同样提供了两种网络传输方式，一种是RPC的方式，一种是通过RDMA的方式。
 55 | 
 56 | ### RPC
 57 | ![image](https://user-images.githubusercontent.com/14962503/189853670-d10c29e8-34d7-468e-baa6-36c8fa65a3c9.png)
 58 | 
 59 | #### 寻址算法
 60 | 一个文件请求会被客户端使用一致性哈希算法映射到一个服务器，通过长链接进行传输。
 61 | 
 62 | #### 请求流程
 63 | 
 64 | 1. client接收请求，创建处理线程。创建处理线程的工作是由libfuse实现的，sealfs实现的函数可以认为已经是独立线程。
 65 | 2. 计算文件所在的服务器。
 66 | 3. 向server发送文件请求，hold线程。发送请求的过程要考虑多个请求并行处理的情况。为每个请求建立一个socket是最简单的实现，但创建连接的延迟过高，网络连接数也可能会过多。保持多个长连接保证了创建连接的延迟问题，但在大并发的情况下，依旧无法解决网络连接数量过多，同时代码的实现也稍显复杂。所以采用了一个长连接共享多个文件请求的方式。一个线程发送请求需要包含该请求的id与数据长度，同时需要实现一个额外的线程安全的队列用于保存发送请求后的线程的锁。
 67 | 4. server处理文件请求，并将请求结果返回给client。处理过程中始终保持了请求的id。
 68 | 5. client接收数据，激活请求线程并处理返回值。一个（或有限多个）独立的线程用于接收请求结果，其中包含了请求的id，需要在队列中查询请求id所对应的线程锁，写入结果并释放该线程锁，激活原请求线程并将结果返回给应用。
 69 | 
 70 | #### 内存拷贝
 71 | 
 72 | 采用了多个请求共享同一线程的方式，socket发送请求时，由于数据不定长，需要提前发送一个长度变量，才能避免粘包。这里有两种不同的方案:  
 73 | 一种是使用多个socket实现连接池，每次发送一个请求使用一个socket，该方案不存在数据包连续性的问题，可以多次发送。  
 74 | 另一种是用同一个socket，但是要保证数据连续性，需要进行字符串拼接，涉及内存拷贝，开销会变大，那为了避免这个问题，每次发送数据需要给线程加锁，这个是第一个阶段的实现方案。
 75 | 
 76 | ### RDMA
 77 | 
 78 | ## 管理节点
 79 | 管理节点用于管理server集群。
 80 | 
 81 | ### 心跳管理
 82 | server节点上下线的时候，会将心跳信息上报给管理节点，客户端会订阅心跳信息用于选址计算，同时，服务端也会订阅心跳信息用于数据迁移等方面。
 83 | 
 84 | ## 服务端
 85 | server主要存储两种类型的数据，一种是文件的元数据信息，以及文件本身的内容。
 86 | 
 87 | 对于分布式文件存储而言，元数据无疑是热点数据，因此，我们采用分开挂盘的方式，将元数据和文件数据挂载在不同的盘中，一种经济的办法是将元数据数据挂载在SSD盘中，而对普通的文件数据存储在hdd中。当然，可以随意搭配。
 88 | 
 89 | ### 元数据管理
 90 | 
 91 | >在大数据环境下，元数据的体量也非常大，元数据的存取性能是整个分布式文件系统性能的关键。常见的元数据管理可以分为集中式和分布式元数据管理架构。集中式元数据管理架构采用单一的元数据服务器，实现简单．但是存在单点故障等问题。分布式元数据管理架构则将元数据分散在多个结点上．进而解决了元数据服务器的性能瓶颈等问题．并提高了元数据管理架构的可扩展性，但实现较为复杂，并引入了元数据一致性的问题。另外，还有一种无元数据服务器的分布式架构，通过在线算法组织数据，不需要专用的元数据服务器。但是该架构对数据一致性的保障很困难．实现较为复杂。文件目录遍历操作效率低下，并且缺乏文件系统全局监控管理功能。
 92 | 
 93 | sealfs目前选择的是元数据节点的架构，即避免了元数据节点单点故障的问题，但是对于元数据的遍历成为了一个难题。
 94 | 
 95 | #### 元数据持久化内存存储
 96 | 
 97 | 为了提高元数据的性能，我们打算结合支持持久化内存的硬件对元数据进行存储设计。
 98 | 
 99 | ### 数据存储
100 | 
101 | #### 绕过本地文件系统
102 | 为了提升性能，sealfs直接跨过文件系统进行文件存储。当然这会带来更多的复杂性。
103 | 
104 | #### 适配不同的硬件
105 | 对于不同的固态硬盘有不同的特性，我们会对不同的硬件都进行适配，设计不同的数据结构，希望对用户使用的每一种硬件都达到比较好的效果。
106 | 
107 | ## 一些其他的扩展
108 | 这些拓展点暂时不在第一版计划实现中
109 | - 数据可靠性与高可用  
110 |   - 多副本
111 |     多副本暂时计划采用raft协议，由一致性hash算法计算副本位置分布到多个节点上实现replica
112 |   - 纠删码
113 | 
114 | - 数据扩缩容  
115 | 基于一致性hash实现扩缩容，具体细节暂时先不讲。需要明确的是添加或删除节点后集群会进行rebalance，这是一致性hash本身需要做的，无需额外设计。rebalance期间会导致集群性能下降，且可能耗费较长时间，但对于可以持续提供服务。在rebalance期间需要做的工作如下：
116 | 
117 | | 开始扩容 | 迁移数据 | 扩容完成 |
118 | | ---- | ---- | ---- |
119 | | 更新集群元数据 | client进行二次请求，确认迁移后数据和迁移前数据一致性，并将数据写于新节点;同时迁移任务进行数据迁移和同步 | 确认集群元数据 |
120 | 
121 | - 租户管理  
122 | 对于不同的client申请挂载的磁盘，进行容量限制隔离
123 | 
124 | 
125 | 
126 | 
127 | 


--------------------------------------------------------------------------------
/docs/README.MD:
--------------------------------------------------------------------------------
  1 | # Sealfs Design Document
  2 | 
  3 | ## System Architecture
  4 | The architecture of sealfs is non centralized, and there is no single metadata node. 
  5 | 
  6 | Sealfs consists of three components
  7 | -Server:Responsible for storing files and metadata.Metadata is undoubtedly the hot file of the distributed file system, so we store data and metadata in the way of separate disks. Users can choose better hardware to store metadata.
  8 | -client:It implements the file system in user mode, intercepts file requests, and stores and addresses them through the hash algorithm.
  9 | -manager:Responsible for Coordinate cluster.
 10 | 
 11 | The System Architecture picture as follows:
 12 | ![](images/architecture.jpg)
 13 | 
 14 | ### User Mode in Overall Chain 
 15 | We hope to create an overall chain user mode distributed file storage system from client file request hijacking to network to storage with specific hardware, so as to obtain the ultimate performance experience.
 16 | 
 17 | ## Client
 18 | On the client side, we support two types of file systems: the more common fuse file system and the user mode file system which we hope to improve the performance in this way.
 19 | 
 20 | ### Fuse
 21 | 
 22 | #### Kernel File System
 23 | In order to reduce the number of calls, an implementation scheme is to directly implement the file system in the kernel state, and network requests are implemented in the kernel state.
 24 | 1. User status request
 25 | 2. VFS
 26 | 3. Kernel file system
 27 | 4. network
 28 | 
 29 | This scheme can reduce the number of handovers to 2, but its disadvantages are also obvious:
 30 | 1. The debugging of kernel programming is complex
 31 | 2. You need to install additional kernel modules for the client
 32 | 3. File system crash affects other processes
 33 | 
 34 | #### fuse
 35 | There are different ways to implement network file storage, and fuse is an easy way to implement and use.
 36 | 
 37 | ![alt fuse](https://imgconvert.csdnimg.cn/aHR0cHM6Ly9tbWJpei5xcGljLmNuL21tYml6X3BuZy9kNGhvWUpseE9qTnNvaWNRQkUwM01aRDBrWjNmY3VpYWVRZzJmV1RlNFlWV3RUYko5aWN1cG1iZ1IwZGd1RUlrTTloTzZzaWJQdU80VTlFNzlpYWczWWljdlE4US82NDA?x-oss-process=image/format,png)
 38 | 
 39 | In fuse, a call process for file storage through the network includes:
 40 | 1. User status request
 41 | 2. VFS (switching)
 42 | 3. fuse driver
 43 | 4. fuse library
 44 | 5. network
 45 | 
 46 | It should be noted that fuse also reduces the performance of the file system while avoiding the above problems.
 47 | 
 48 | ### System Call Hijacking
 49 | This is the second scheme we implemented, namely, to implement a user mode file system. In the figure in the previous section, we can see that user requests are not directly handed over to the Linux kernel, but are submitted through glibc (or other libc libraries). This means that the address of system calls can be replaced at the libc layer to achieve system call hijacking.
 50 | 1. User status request
 51 | 2. System call hijacking client
 52 | 3. network
 53 | 
 54 | ## Network 
 55 | For the network part, two network transmission modes are also provided, RPC and RDMA.
 56 | 
 57 | ### RPC
 58 | ![image](https://user-images.githubusercontent.com/14962503/189853670-d10c29e8-34d7-468e-baa6-36c8fa65a3c9.png)
 59 | 
 60 | #### Location algorithm
 61 | A file request will be mapped to a server by the client using a hash algorithm and transmitted through the socket link.
 62 | 
 63 | #### Request Process
 64 | 
 65 | 1. The client receives the request and creates a processing thread. The work of creating processing threads is implemented by libfuse, and the functions implemented by sealfs can be considered as independent threads.
 66 | 2. The server where the calculation file is located, and the content is in metadata management, which is not detailed in this section.
 67 | 3. Send a file request to the server and hold the thread. The process of sending requests should consider the parallel processing of multiple requests. Setting up a socket for each request is the simplest implementation, but the connection creation delay is too high, and the number of network connections may be too large. Maintaining multiple long connections ensures the delay of connection creation. However, in the case of large concurrency, it is still unable to solve the problem of excessive network connections. At the same time, the code implementation is slightly complicated. Therefore, a long connection is used to share multiple file requests. When a thread sends a request, it needs to include the request ID and data length. At the same time, it needs to implement an additional thread safe queue to store the lock of the thread after sending the request.
 68 | 4. The server processes the file request and returns the request result to the client. The requested id is always maintained during processing.
 69 | 5. The client receives data, activates the request thread, and processes the return value. One (or a limited number of) independent threads are used to receive the request result, which contains the request ID, so it is necessary to query the thread lock corresponding to the request ID in the queue, write the result and release the thread lock, activate the original request thread and return the result to the application.
 70 | 
 71 | #### Memory Copy
 72 | 
 73 | The method that multiple requests share the same thread is adopted. When the socket sends a request, it needs to send a length variable in advance to avoid packet sticking due to the variable length of the data. There are two different solutions:
 74 | One is to use multiple sockets to realize the connection pool. One socket is used to send one request each time. This scheme does not have the problem of packet continuity and can be sent multiple times.
 75 | The other is to use the same socket, but to ensure data continuity, string splicing is required. When memory copying is involved, the overhead will increase. To avoid this problem, threads need to be locked each time data is sent. This is the implementation scheme of the first phase.
 76 | 
 77 | ### RDMA
 78 | 
 79 | ## Manager
 80 | Manager used for managing server cluster.
 81 | 
 82 | ### heart Manger
 83 | When the server node is online or offline, it will report the heartbeat information to the management node. The client will subscribe to the heartbeat information for location calculation. At the same time, the server will subscribe to the heartbeat information for data migration and other aspects.
 84 | 
 85 | ## Server
 86 | The server mainly stores two types of data. One is the metadata information of the file and the content of the file itself.
 87 | 
 88 | As far as distributed file storage is concerned, metadata is undoubtedly hot data. Therefore, we use the method of separate hanging disks to mount metadata and file data in different disks. One economic way is to mount metadata data in SSD disks, while ordinary file data is stored in hdds. Of course, it can be matched at will.
 89 | 
 90 | ### Metadata Management
 91 | 
 92 | >In the big data environment, the volume of metadata is also very large, and the access performance of metadata is the key to the performance of the entire distributed file system. Common metadata management can be divided into centralized and distributed metadata management architectures. The centralized metadata management architecture uses a single metadata server, which is simple to implement, but has a single point of failure and other problems. The distributed metadata management architecture disperses metadata on multiple nodes, thus solving the performance bottleneck of the metadata server and improving the scalability of the metadata management architecture, but the implementation is more complex and introduces the problem of metadata consistency. In addition, there is a distributed architecture without metadata server, which organizes data through online algorithms, and does not require a dedicated metadata server. However, it is difficult to guarantee the data consistency of this architecture. The implementation is more complex. The file directory traversal operation is inefficient and lacks the global monitoring and management function of the file system.
 93 | 
 94 | At present, sealfs chooses the metadata node architecture, which avoids the single point of failure of the metadata node, but metadata traversal becomes a difficult problem.
 95 | 
 96 | #### Metadata persistent memory storage
 97 | 
 98 | In order to improve the performance of metadata, we plan to combine the hardware supporting persistent memory to design metadata storage.
 99 | 
100 | ### Data storage
101 | 
102 | #### Bypass local file system
103 | To improve performance, sealfs directly stores files across file systems. Of course, this will bring more complexity.
104 | 
105 | #### Adapt to different hardware
106 | Different SSDs have different characteristics. We will adapt different hardware, design different data structures, and hope to achieve better results for each type of hardware used by users.
107 | 
108 | ## Some Other Extensions
109 | These expansion points are not implemented in the first version of the plan
110 | - Data reliability and high availability
111 |   - multi-replica
112 |     
113 |     The multi replica is temporarily planned to use the raft protocol, and the consistent hash algorithm is used to calculate the replica locations and distribute them to multiple nodes to achieve replica.
114 |   - erasure coding
115 | - Data expansion
116 | 
117 | Capacity expansion and reduction are implemented based on consistency hash. The details will not be discussed for the time being. It needs to be clear that after adding or deleting nodes, the cluster will be rebalanced. This is what consistency hash itself needs to do without additional design. Rebalance will cause the cluster performance to decline and may take a long time, but it can provide services continuously. The work to be done during rebalance is as follows:
118 | 
119 | | Start capacity expansion | Migrate data | Complete capacity expansion |
120 | | ---- | ---- | ---- |
121 | |Updating cluster metadata | The client makes a second request to confirm the consistency of the data after migration and the data before migration, and writes the data to the new node; Simultaneous migration task for data migration and synchronization | Confirm cluster metadata|
122 | 
123 | - Tenant Management
124 | 
125 |   For the disks that different clients apply to mount, perform capacity limitation isolation
126 | 
127 | 


--------------------------------------------------------------------------------
/docs/images/architecture.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/labring/sealfs/41e1ad08ab587cb78e8b7610a74a2eb373172f13/docs/images/architecture.jpg


--------------------------------------------------------------------------------
/docs/specification.md:
--------------------------------------------------------------------------------
 1 | # specifications
 2 | 
 3 | ## benchmark
 4 | 
 5 | we use library [criterion](https://github.com/bheisler/criterion.rs) for benchmark.
 6 | 
 7 | add benchmark like such:
 8 | 
 9 | ```toml
10 | # Cargo.toml
11 | 
12 | [[bench]]
13 | name = "rpc"
14 | harness = false
15 | ```
16 | 
17 | create the file benches/rpc/main.rs or benches/rpc.rs.
18 | 
19 | ```rust
20 | // benches/rpc/main.rs or benches/rpc.rs
21 | // ......
22 | use criterion::{criterion_group, criterion_main, Criterion};
23 | 
24 | fn rpc_benchmark(c: &mut Criterion) {
25 |     // add your bench like below.
26 |     c.bench_function("rpc_bench100000", |b| b.iter(|| cli(100000)));
27 | }
28 | 
29 | //define benchmark configuration like below.
30 | criterion_group!(
31 |     name=benches;
32 |     config=Criterion::default().significance_level(0.1).sample_size(10);
33 |     targets = rpc_benchmark
34 | );
35 | criterion_main!(benches);
36 | ```
37 | 
38 | to run benchmark
39 | `cargo bench --bench <bench-name>`
40 | 
41 | ```shell
42 | cargo bench --bench rpc
43 | ```
44 | 
45 | ## log
46 | 
47 | We use library [env-logger](https://docs.rs/env_logger/0.10.0/env_logger/) including five log level: "ERROR", "WARN", "INFO", "DEBUG", "TRACE".
48 | 
49 | For flexible usage, you can specify the log level by `./target/debug/server --log-level info`. The default log level is set in `examples/*.yaml`
50 | 
51 | Logging principles:
52 | 
53 | 1. Logging key change information
54 | 2. Applying the right level of logging;
55 | 3. Avoid duplication of logging information;
56 | 4. ......
57 | 
58 | Details about log level :
59 | 
60 | **error** : Designates very serious errors.
61 | 
62 | error log generally refers to program-level errors or serious business errors that do not affect the operation of the program.
63 | 
64 | **warn** : Designates hazardous situations.
65 | 
66 | warn log implies that needs attention, but not sure if an error occurred. For example, a user connection is closed abnormally, the relevant configuration cannot be found and only the default configuration can be used, retry after XX seconds, etc.
67 | 
68 | **info** : Designates useful information.
69 | 
70 | info log often used to record information about the operation of a program, such as user operations or changes in status, connection establishment and termination.
71 | 
72 | **debug** : Designates lower priority information.
73 | 
74 | debug log always used for detailed information, such as user request details tracking, configuration information read.
75 | 
76 | **trace** : Designates very low priority, often extremely verbose, information.
77 | 


--------------------------------------------------------------------------------
/examples/hello_client.rs:
--------------------------------------------------------------------------------
 1 | //! hello_client and hello_server demos show how rpc process the message sent by client
 2 | //! and the usage of 'call_remote' and 'dispatch' APIs.
 3 | //!
 4 | //! After starting server:
 5 | //!
 6 | //!     cargo run --example hello_server --features=disk-db
 7 | //!
 8 | //! You can try this example by running:
 9 | //!
10 | //!     cargo run --example hello_client --features=disk-db
11 | 
12 | use log::debug;
13 | use sealfs::rpc::client::{RpcClient, TcpStreamCreator};
14 | use std::sync::Arc;
15 | use std::time::Duration;
16 | 
17 | #[tokio::main]
18 | pub async fn main() {
19 |     let mut builder = env_logger::Builder::from_default_env();
20 |     builder
21 |         .format_timestamp(None)
22 |         .filter(None, log::LevelFilter::Info);
23 |     builder.init();
24 |     let total = 10000;
25 |     let elapsed = cli(total).await;
26 |     println!("elapsed: {:?}", elapsed);
27 | }
28 | 
29 | pub async fn cli(total: u32) -> Duration {
30 |     let client: Arc<
31 |         RpcClient<
32 |             tokio::net::tcp::OwnedReadHalf,
33 |             tokio::net::tcp::OwnedWriteHalf,
34 |             TcpStreamCreator,
35 |         >,
36 |     > = Arc::new(RpcClient::default());
37 |     let server_address = "127.0.0.1:50051";
38 |     client.add_connection(server_address).await.unwrap();
39 |     // sleep for 1 second to wait for server to start
40 |     // tokio::time::sleep(tokio::time::Duration::from_millis(100)).await;
41 |     let mut handles = vec![];
42 |     let start = tokio::time::Instant::now();
43 |     for _ in 0..total {
44 |         let new_client = client.clone();
45 |         handles.push(tokio::spawn(async move {
46 |             let mut status = 0;
47 |             let mut rsp_flags = 0;
48 |             let mut recv_meta_data_length = 0;
49 |             let mut recv_data_length = 0;
50 |             let mut recv_meta_data = vec![0u8; 4];
51 |             let mut recv_data = vec![0u8; 4];
52 |             debug!("call_remote, start");
53 |             let result = new_client
54 |                 .call_remote(
55 |                     server_address,
56 |                     0,
57 |                     0,
58 |                     "",
59 |                     &[],
60 |                     &[0u8; 10],
61 |                     &mut status,
62 |                     &mut rsp_flags,
63 |                     &mut recv_meta_data_length,
64 |                     &mut recv_data_length,
65 |                     &mut recv_meta_data,
66 |                     &mut recv_data,
67 |                     Duration::from_secs(10),
68 |                 )
69 |                 .await;
70 |             debug!("call_remote, result: {:?}", result);
71 |             match result {
72 |                 Ok(_) => {
73 |                     if status == 0 {
74 |                         println!("Success");
75 |                     } else {
76 |                         println!("Error: {}", status);
77 |                     }
78 |                 }
79 |                 Err(e) => {
80 |                     println!("Error: {}", e);
81 |                 }
82 |             }
83 |         }));
84 |     }
85 |     for handle in handles {
86 |         if let Err(e) = handle.await {
87 |             println!("Error: {}", e);
88 |         }
89 |     }
90 |     let elapsed = start.elapsed();
91 |     client.close();
92 |     elapsed
93 | }
94 | 


--------------------------------------------------------------------------------
/examples/hello_server.rs:
--------------------------------------------------------------------------------
 1 | //! hello_client and hello_server demos show how rpc process the message sent by client
 2 | //! and the usage of 'call_remote' and 'dispatch' APIs.
 3 | //!
 4 | //! You can try this example by running:
 5 | //!
 6 | //!     cargo run --example hello_server --features=disk-db
 7 | //!
 8 | //! And then start client in another terminal by running:
 9 | //!
10 | //!     cargo run --example hello_client --features=disk-db
11 | 
12 | #![allow(unused)]
13 | use async_trait::async_trait;
14 | use log::debug;
15 | use sealfs::rpc::server::{Handler, RpcServer};
16 | use std::sync::Arc;
17 | use tokio::sync::Mutex;
18 | pub struct HelloHandler {}
19 | 
20 | impl HelloHandler {
21 |     pub fn new() -> Self {
22 |         Self {}
23 |     }
24 | }
25 | 
26 | // lazy_static::lazy_static! {
27 | //     static ref HELLO_COUNT: Arc<Mutex<u32>> = Arc::new(Mutex::new(0));
28 | // }
29 | 
30 | #[async_trait]
31 | impl Handler for HelloHandler {
32 |     async fn dispatch(
33 |         &self,
34 |         _conn_id: u32,
35 |         operation_type: u32,
36 |         _flags: u32,
37 |         path: Vec<u8>,
38 |         data: Vec<u8>,
39 |         _metadata: Vec<u8>,
40 |     ) -> anyhow::Result<(i32, u32, usize, usize, Vec<u8>, Vec<u8>)> {
41 |         // debug!("dispatch, operation_type: {}", operation_type);
42 |         // debug!("dispatch, path: {:?}", path);
43 |         // debug!("dispatch, data: {:?}", data);
44 |         match operation_type {
45 |             0 => {
46 |                 // let mut count = HELLO_COUNT.lock().await;
47 |                 // let buf = format!("Hello, {}!", count).into_bytes();
48 |                 // *count += 1;
49 |                 Ok((0, 0, 4, 4, vec![1, 2, 3, 4], vec![5, 6, 7, 8]))
50 |             }
51 |             _ => {
52 |                 todo!()
53 |             }
54 |         }
55 |     }
56 | }
57 | 
58 | #[tokio::main]
59 | pub async fn main() -> anyhow::Result<()> {
60 |     let mut builder = env_logger::Builder::from_default_env();
61 |     builder
62 |         .format_timestamp(None)
63 |         .filter(None, log::LevelFilter::Info);
64 |     builder.init();
65 |     let server = RpcServer::new(Arc::new(HelloHandler::new()), "127.0.0.1:50051");
66 |     server.run().await?;
67 |     Ok(())
68 | }
69 | 


--------------------------------------------------------------------------------
/examples/manager.yaml:
--------------------------------------------------------------------------------
 1 | address:
 2 |   127.0.0.1:8081
 3 | all_servers_address:
 4 |  - 127.0.0.1:8085
 5 |  - 127.0.0.1:8086
 6 |  - 127.0.0.1:8087
 7 |  - 127.0.0.1:8088
 8 |  - 127.0.0.1:8089
 9 | virtual_nodes:
10 |   100
11 | log_level:
12 |   warn
13 | 


--------------------------------------------------------------------------------
/examples/rdma_client.rs:
--------------------------------------------------------------------------------
 1 | //! cargo run --example rdma_client --features=disk-db
 2 | //!
 3 | 
 4 | use log::debug;
 5 | use sealfs::rpc::rdma::client::Client;
 6 | use std::{sync::Arc, time::Duration};
 7 | 
 8 | #[tokio::main]
 9 | pub async fn main() {
10 |     let mut builder = env_logger::Builder::from_default_env();
11 |     builder
12 |         .format_timestamp(None)
13 |         .filter(None, log::LevelFilter::Info);
14 |     builder.init();
15 |     let total = 10000;
16 |     let elapsed = cli(total).await;
17 |     println!("elapsed: {:?}", elapsed);
18 | }
19 | 
20 | pub async fn cli(total: u32) -> Duration {
21 |     let client = Arc::new(Client::new());
22 |     let server_address = "127.0.0.1:7777";
23 |     client.add_connection(server_address).await;
24 |     // sleep for 1 second to wait for server to start
25 |     // tokio::time::sleep(tokio::time::Duration::from_millis(100)).await;
26 |     let mut handles = vec![];
27 |     let start = tokio::time::Instant::now();
28 |     for _ in 0..total {
29 |         let new_client = client.clone();
30 |         handles.push(tokio::spawn(async move {
31 |             let mut status = 0;
32 |             let mut rsp_flags = 0;
33 |             let mut recv_meta_data_length = 0;
34 |             let mut recv_data_length = 0;
35 |             let mut recv_meta_data = vec![0u8; 4];
36 |             let mut recv_data = vec![0u8; 4];
37 |             debug!("call_remote, start");
38 |             let result = new_client
39 |                 .call_remote(
40 |                     server_address,
41 |                     0,
42 |                     0,
43 |                     "",
44 |                     &[],
45 |                     &[0u8; 10],
46 |                     &mut status,
47 |                     &mut rsp_flags,
48 |                     &mut recv_meta_data_length,
49 |                     &mut recv_data_length,
50 |                     &mut recv_meta_data,
51 |                     &mut recv_data,
52 |                     Duration::from_secs(10),
53 |                 )
54 |                 .await;
55 |             debug!("call_remote, result: {:?}", result);
56 |             match result {
57 |                 Ok(_) => {
58 |                     if status == 0 {
59 |                         // // print recv_metadata and recv_data
60 |                         // println!(
61 |                         //     "result: {}, recv_meta_data: {:?}, recv_data: {:?}",
62 |                         //     i, recv_meta_data, recv_data
63 |                         // );
64 |                     } else {
65 |                         println!("Error: {}", status);
66 |                     }
67 |                 }
68 |                 Err(e) => {
69 |                     println!("Error: {}", e);
70 |                 }
71 |             }
72 |         }));
73 |     }
74 |     for handle in handles {
75 |         if let Err(e) = handle.await {
76 |             println!("Error: {}", e);
77 |         }
78 |     }
79 |     let elapsed = start.elapsed();
80 |     client.close();
81 |     elapsed
82 | }
83 | 


--------------------------------------------------------------------------------
/examples/rdma_server.rs:
--------------------------------------------------------------------------------
 1 | //! cargo run --example rdma_server --features=disk-db
 2 | //!
 3 | 
 4 | use async_trait::async_trait;
 5 | use sealfs::rpc::{rdma::server::Server, server::Handler};
 6 | use std::sync::Arc;
 7 | pub struct HelloHandler {}
 8 | 
 9 | impl HelloHandler {
10 |     pub fn new() -> Self {
11 |         Self {}
12 |     }
13 | }
14 | 
15 | // lazy_static::lazy_static! {
16 | //     static ref HELLO_COUNT: Arc<Mutex<u32>> = Arc::new(Mutex::new(0));
17 | // }
18 | 
19 | #[async_trait]
20 | impl Handler for HelloHandler {
21 |     async fn dispatch(
22 |         &self,
23 |         _conn_id: u32,
24 |         operation_type: u32,
25 |         _flags: u32,
26 |         _path: Vec<u8>,
27 |         _data: Vec<u8>,
28 |         _metadata: Vec<u8>,
29 |     ) -> anyhow::Result<(i32, u32, usize, usize, Vec<u8>, Vec<u8>)> {
30 |         // println!("metadata: {:?}", metadata);
31 |         // println!("data: {:?}", data);
32 |         match operation_type {
33 |             0 => Ok((0, 0, 4, 4, vec![1, 2, 3, 4], vec![5, 6, 7, 8])),
34 |             _ => {
35 |                 todo!()
36 |             }
37 |         }
38 |     }
39 | }
40 | 
41 | #[tokio::main]
42 | pub async fn main() -> anyhow::Result<()> {
43 |     let server = Server::new("127.0.0.1:7777".to_string(), Arc::new(HelloHandler::new())).await;
44 |     server.run().await?;
45 |     Ok(())
46 | }
47 | 


--------------------------------------------------------------------------------
/intercept/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "intercept"
 3 | version = "0.1.0"
 4 | edition = "2021"
 5 | authors = ["The Sealfs Developers"]
 6 | license = "Apache-2.0"
 7 | 
 8 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 9 | 
10 | [dependencies]
11 | libc = "0.2"
12 | tokio = {version = "1.22.0", features = ["full"]}
13 | dashmap = "5.4.0"
14 | lazy_static = "1.4.0"
15 | crossbeam-channel = "0.5"
16 | log = "0.4.17"
17 | env_logger = "0.9.1"
18 | bincode = "1.3.3"
19 | nix = "0.26.1"
20 | serde = { version = "1", features = ["derive"] }
21 | serde_yaml = "0.9.14"
22 | async-trait = "0.1.59"
23 | spin = "0.5"
24 | sealfs = { path = "../" }
25 | 
26 | [build-dependencies]
27 | cmake = "0.1"
28 | 
29 | [dev-dependencies]
30 | libc = "0.2"
31 | 
32 | [lib]
33 | crate-type = ["cdylib"]


--------------------------------------------------------------------------------
/intercept/build.rs:
--------------------------------------------------------------------------------
1 | fn main() {
2 |     let dst = cmake::build("../syscall_intercept");
3 | 
4 |     println!("cargo:rustc-link-search=native={}/lib", dst.display());
5 |     println!("cargo:rustc-link-lib=static=syscall_intercept");
6 |     println!("cargo:rustc-link-lib=capstone");
7 | }
8 | 


--------------------------------------------------------------------------------
/intercept/src/file_desc.rs:
--------------------------------------------------------------------------------
 1 | use crossbeam_channel::{bounded, Receiver, Sender};
 2 | use dashmap::DashMap;
 3 | 
 4 | #[derive(PartialEq, Debug, Clone)]
 5 | pub enum FdType {
 6 |     File,
 7 |     Dir,
 8 | }
 9 | 
10 | #[derive(Clone)]
11 | pub struct FdAttr {
12 |     pub pathname: String,
13 |     pub r#type: FdType,
14 |     pub offset: i64,
15 |     pub flags: i32,
16 | }
17 | 
18 | lazy_static::lazy_static! {
19 |     static ref IDLE_FD: (Sender<i32>, Receiver<i32>) = {
20 |         let (s, r) = bounded(1024);
21 |         for i in 10000..11024 {
22 |             s.send(i).unwrap();
23 |         }
24 |         (s, r)
25 |     };
26 |     static ref FD_TB: DashMap<i32, FdAttr> = DashMap::new();
27 | }
28 | 
29 | pub fn insert_attr(attr: FdAttr) -> Option<i32> {
30 |     let fd = match IDLE_FD.1.recv() {
31 |         Ok(value) => value,
32 |         Err(_) => return None,
33 |     };
34 | 
35 |     FD_TB.insert(fd, attr);
36 |     return Some(fd);
37 | }
38 | 
39 | pub fn remove_attr(fd: i32) -> bool {
40 |     match FD_TB.remove(&fd) {
41 |         Some(_) => {
42 |             IDLE_FD.0.send(fd).unwrap();
43 |             true
44 |         }
45 |         None => false,
46 |     }
47 | }
48 | 
49 | pub fn get_attr(fd: i32) -> Option<FdAttr> {
50 |     match FD_TB.get(&fd) {
51 |         Some(value) => Some((*value).clone()),
52 |         None => None,
53 |     }
54 | }
55 | 
56 | pub fn set_attr(fd: i32, attr: FdAttr) -> bool {
57 |     match FD_TB.get_mut(&fd) {
58 |         Some(mut value) => {
59 |             *value = attr;
60 |             true
61 |         }
62 |         None => false,
63 |     }
64 | }
65 | 
66 | pub fn set_offset(fd: i32, offset: i64) {
67 |     FD_TB.get_mut(&fd).unwrap().offset = offset as i64
68 | }
69 | 


--------------------------------------------------------------------------------
/intercept/src/path.rs:
--------------------------------------------------------------------------------
 1 | lazy_static::lazy_static! {
 2 |     pub static ref CURRENT_DIR: String = std::env::current_dir()
 3 |         .unwrap()
 4 |         .to_str()
 5 |         .unwrap()
 6 |         .to_string();
 7 |     pub static ref MOUNT_POINT: String = {
 8 |         let mut value =
 9 |             std::env::var("SEALFS_MOUNT_POINT").unwrap_or_else(|_| "/mnt/fs".to_string());
10 |         if value.ends_with('/') {
11 |             value.pop();
12 |         }
13 |         value
14 |     };
15 |     pub static ref VOLUME_NAME: String = {
16 |         std::env::var("SEALFS_VOLUME_NAME").unwrap_or_else(|_| "sealfs".to_string())
17 |     };
18 | }
19 | 
20 | fn get_realpath(path: &str) -> Option<String> {
21 |     // An absolute pathname
22 |     let path = if path.starts_with('/') {
23 |         path.to_string()
24 |     } else {
25 |         let mut cwd = CURRENT_DIR.to_string();
26 |         cwd.push('/');
27 |         cwd.push_str(path);
28 |         cwd
29 |     };
30 |     let mut start = 0;
31 |     let mut end;
32 |     let path_bytes = path.as_bytes();
33 |     let mut result = String::new();
34 |     while start < path_bytes.len() {
35 |         while start < path_bytes.len() && path_bytes[start] == b'/' {
36 |             start += 1;
37 |         }
38 |         end = start;
39 |         while end < path_bytes.len() && path_bytes[end] != b'/' {
40 |             end += 1;
41 |         }
42 |         let len = end - start;
43 |         if len == 0 {
44 |             break;
45 |         } else if len == 1 && path_bytes[start] == b'.' {
46 |             /* nothing */
47 |         } else if len == 2 && path_bytes[start] == b'.' && path_bytes[start + 1] == b'.' {
48 |             while result.len() > 0 && !result.ends_with('/') {
49 |                 result.pop();
50 |             }
51 |             if result.len() == 0 {
52 |                 return None;
53 |             }
54 |             result.pop();
55 |         } else {
56 |             result.push('/');
57 |             result.push_str(&String::from_utf8(path_bytes[start..end].to_vec()).unwrap());
58 |         }
59 |         start = end;
60 |     }
61 |     Some(result)
62 | }
63 | 
64 | pub fn get_absolutepath(dir_path: &str, file_path: &str) -> Result<String, i32> {
65 |     // An absolute pathname
66 |     if file_path.starts_with('/') {
67 |         match get_realpath(file_path) {
68 |             Some(value) => return Ok(value),
69 |             None => return Err(0),
70 |         }
71 |     }
72 | 
73 |     // By file descriptor
74 |     if file_path.is_empty() {
75 |         match get_realpath(dir_path) {
76 |             Some(value) => return Ok(value),
77 |             None => return Err(0),
78 |         }
79 |     }
80 | 
81 |     // By file descriptor
82 |     match get_realpath(&(dir_path.to_string() + "/" + &file_path)) {
83 |         Some(value) => Ok(value),
84 |         None => Err(0),
85 |     }
86 | }
87 | 
88 | pub fn get_remotepath(path: &str) -> Option<String> {
89 |     if path.starts_with(MOUNT_POINT.as_str()) {
90 |         let mut remotepath = VOLUME_NAME.clone();
91 |         remotepath.push_str(&path[MOUNT_POINT.len()..]);
92 |         if remotepath.len() > 1 && remotepath.ends_with('/') {
93 |             remotepath.pop();
94 |         }
95 |         return Some(remotepath);
96 |     }
97 |     None
98 | }
99 | 


--------------------------------------------------------------------------------
/intercept/src/syscall_intercept.rs:
--------------------------------------------------------------------------------
 1 | #[link(name = "syscall_intercept")]
 2 | extern "C" {
 3 |     static mut intercept_hook_point: Option<HookFn>;
 4 | 
 5 |     pub fn syscall_no_intercept(num: isize, ...) -> isize;
 6 | }
 7 | 
 8 | /// Set syscall intercept hook function.
 9 | ///
10 | /// # Safety
11 | ///
12 | /// This function will change all syscall behavior!
13 | pub unsafe fn set_hook_fn(f: HookFn) {
14 |     intercept_hook_point = Some(f);
15 | }
16 | 
17 | /// Clear syscall intercept hook function.
18 | ///
19 | /// # Safety
20 | ///
21 | /// This function will change all syscall behavior!
22 | pub unsafe fn unset_hook_fn() {
23 |     intercept_hook_point = None;
24 | }
25 | 
26 | /// The type of hook function.
27 | pub type HookFn = extern "C" fn(
28 |     num: isize,
29 |     a0: isize,
30 |     a1: isize,
31 |     a2: isize,
32 |     a3: isize,
33 |     a4: isize,
34 |     a5: isize,
35 |     result: &mut isize,
36 | ) -> InterceptResult;
37 | 
38 | /// The return value of hook function.
39 | #[repr(i32)]
40 | pub enum InterceptResult {
41 |     /// The user takes over the system call. The return value should be set via `result`.
42 |     Hook = 0,
43 |     /// The specific system call was ignored by the user and the original syscall should be executed.
44 |     Forward = 1,
45 | }
46 | 


--------------------------------------------------------------------------------
/intercept/src/test_log.rs:
--------------------------------------------------------------------------------
 1 | use libc::SYS_write;
 2 | 
 3 | use crate::syscall_intercept::syscall_no_intercept;
 4 | 
 5 | struct CStrPointer {
 6 |     _p: *const u8,
 7 | }
 8 | 
 9 | unsafe impl std::marker::Sync for CStrPointer {}
10 | unsafe impl std::marker::Send for CStrPointer {}
11 | lazy_static::lazy_static! {
12 |     static ref LOG_BUF: Vec<u8> = vec![0u8; 10];
13 | }
14 | 
15 | pub unsafe fn _print_log(mut num: i32, pre_char: char, suf_char: char) {
16 |     let c = LOG_BUF.as_slice().as_ptr() as *mut u8;
17 |     let mut cnt = 1;
18 |     *c = pre_char as u8;
19 |     while num != 0 {
20 |         *c.offset(cnt) = ((num % 10) + 48) as u8;
21 |         num /= 10;
22 |         cnt += 1;
23 |     }
24 |     *c.offset(cnt) = suf_char as u8;
25 |     *c.offset(cnt + 1) = 0 as u8;
26 |     syscall_no_intercept(SYS_write as isize, 1, c, cnt + 2);
27 | }
28 | 


--------------------------------------------------------------------------------
/proto/test.proto:
--------------------------------------------------------------------------------
 1 | syntax = "proto3";
 2 | package helloworld;
 3 | service Greeter {
 4 |     rpc SayHello (HelloRequest) returns (HelloReply);
 5 | }
 6 | 
 7 | message HelloRequest {
 8 |     uint32 id = 1;
 9 |     uint32 type = 2;
10 |     uint32 flags = 3;
11 |     string filename = 4;
12 |     string meta_data = 5;
13 |     string data = 6;
14 | }
15 | 
16 | message HelloReply {
17 |     uint32 id = 1;
18 |     int32 status = 2;
19 |     uint32 flags = 3;
20 |     string meta_data = 5;
21 |     string data = 6;
22 | }


--------------------------------------------------------------------------------
/scripts/add_node.sh:
--------------------------------------------------------------------------------
 1 | for ((i=1; i<=10; i++))
 2 | do
 3 |     mkdir ~/fs/test_rm$i
 4 |     echo "test" >> ~/fs/test_rm$i/test.log
 5 |     sleep 0.1
 6 | done
 7 | 
 8 | target/debug/client --log-level info add 127.0.0.1:8090
 9 | ./target/debug/server --server-address 127.0.0.1:8090 --database-path /data/database5/ --storage-path /data/storage5/ --log-level info
10 | 
11 | 
12 | for ((i=1; i<=10; i++))
13 | do
14 |     rm ~/fs/test_rm$i/test.log
15 |     echo heart
16 |     rm -r ~/fs/test_rm$i
17 |     sleep 0.1
18 | done


--------------------------------------------------------------------------------
/scripts/close_all_instances.sh:
--------------------------------------------------------------------------------
 1 | set +e
 2 | # ps and kill the process start by this command "target/debug/server"
 3 | ps -ef | grep "target/debug/server" | grep -v grep | awk '{print $2}' | xargs kill -9
 4 | # ps and kill the process start by this command "target/debug/manager"
 5 | ps -ef | grep "target/debug/manager" | grep -v grep | awk '{print $2}' | xargs kill -9
 6 | # ps and kill the process start by this command "target/debug/client"
 7 | ps -ef | grep "target/debug/client" | grep -v grep | awk '{print $2}' | xargs kill -9
 8 | 
 9 | # ps and kill the process start by this command "target/release/server"
10 | ps -ef | grep "target/release/server" | grep -v grep | awk '{print $2}' | xargs kill -9
11 | # ps and kill the process start by this command "target/release/manager"
12 | ps -ef | grep "target/release/manager" | grep -v grep | awk '{print $2}' | xargs kill -9
13 | # ps and kill the process start by this command "target/release/client"
14 | ps -ef | grep "target/release/client" | grep -v grep | awk '{print $2}' | xargs kill -9
15 | 
16 | # ps and kill the process start by this command "target/release/client"
17 | ps -ef | grep "run_all" | grep -v grep | awk '{print $2}' | xargs kill -9


--------------------------------------------------------------------------------
/scripts/delete_node.sh:
--------------------------------------------------------------------------------
1 | target/debug/client --log-level info delete 127.0.0.1:8089


--------------------------------------------------------------------------------
/scripts/read_files.sh:
--------------------------------------------------------------------------------
 1 | 
 2 | for ((l=1; l<=100; l++))
 3 | do 
 4 |     for ((i=1; i<=10; i++))
 5 |     do
 6 |         mkdir ~/fs/test_rm$i
 7 |         echo "test" >> ~/fs/test_rm$i/test.log
 8 |         sleep 0.1
 9 |     done
10 | 
11 |     for ((i=1; i<=10; i++))
12 |     do
13 |         cat ~/fs/test_rm$i/test.log
14 |         echo "test" >> ~/fs/test_rm$i/test.log
15 |         sleep 0.1
16 |     done
17 | 
18 |     for ((i=1; i<=10; i++))
19 |     do
20 |         sleep 0.1
21 |     done
22 | 
23 |     for ((i=1; i<=10; i++))
24 |     do
25 |         rm ~/fs/test_rm$i/test.log
26 |         echo heart
27 |         rm -r ~/fs/test_rm$i
28 |         sleep 0.1
29 |     done
30 | done
31 | 
32 | kill $(jobs -p)


--------------------------------------------------------------------------------
/scripts/test.sh:
--------------------------------------------------------------------------------
 1 | 
 2 | s=(SyncNewHashRing PreTransfer Transferring PreFinish Finishing Idle)
 3 | 
 4 | for i in {0..5}
 5 | do 
 6 |     echo "test $i"
 7 |     echo ""
 8 | 
 9 |     scripts/close_all_instances.sh
10 |     scripts/run_all.sh /data warn&
11 |     sleep 10
12 | 
13 |     mkdir ~/fs/test_rm5
14 | 
15 |     target/debug/client --log-level info delete 127.0.0.1:8089
16 | 
17 |     while true
18 |     do
19 |         status=`target/debug/client status`
20 |         #echo $status+${s[$i]}
21 |         if [[ $status == *${s[$i]}* ]];
22 |         then
23 |             break
24 |         fi
25 |         sleep 0.1
26 |     done
27 | 
28 |     echo "test" >> ~/fs/test_rm5/test.log
29 | 
30 |     rm ~/fs/test_rm5/test.log
31 | 
32 |     while true
33 |     do
34 |         status=`target/debug/client status`
35 |         #echo $status+${s[$i]}
36 |         if [[ $status == *${s[5]}* ]];
37 |         then
38 |             break
39 |         fi
40 |         sleep 0.1
41 |     done
42 | 
43 |     rm -r ~/fs/test_rm5
44 | 
45 |     kill $(jobs -p)
46 | done


--------------------------------------------------------------------------------
/scripts/test_run_all.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | function finish() {
 4 |     set +e
 5 |     trap 'kill $(jobs -p)' EXIT
 6 |     set -e
 7 |     exit $1
 8 | }
 9 | 
10 | trap 'onCtrlC' INT
11 | function onCtrlC () {
12 |     finish 0
13 | }
14 | 
15 | function green_font() {
16 |     echo -e "\033[32m$1\033[0m\c"
17 | }
18 | 
19 | echo "start fuse_client_run"
20 | 
21 | # exit with 1 if no argument
22 | if [ $# -eq 0 ]
23 | then
24 |     echo "no argument"
25 |     exit 1
26 | fi
27 | 
28 | set +e
29 | 
30 | rm /tmp/sealfs.sock
31 | rm /tmp/sealfs.index
32 | sudo umount ~/fs
33 | mkdir -p ~/fs
34 | 
35 | set -e
36 | 
37 | # check if $2 is empty, if empty, let $log_level = warn, else $log_level = $2
38 | if [ -z $2 ]; then
39 |     log_level=info
40 | else
41 |     log_level=$2
42 | fi
43 | 
44 | SEALFS_CONFIG_PATH=./examples ./target/debug/manager --log-level $log_level &
45 | 
46 | sudo rm -rf $1/database*
47 | sudo rm -rf $1/storage*
48 | for ((i=0; i<5; i++))
49 | do
50 |     port=$[8085+$i]
51 |     ./target/debug/server --server-address 127.0.0.1:${port} --database-path $1/database${i}/ --storage-path $1/storage${i}/ --log-level $log_level &
52 | done
53 | 
54 | sleep 3
55 | 
56 | ./target/debug/client --log-level $log_level create-volume test1 100000
57 | 
58 | ./target/debug/client --log-level $log_level daemon&
59 | sleep 3
60 | 
61 | ./target/debug/client --log-level $log_level mount ~/fs test1
62 | sleep 3
63 | 
64 | echo "press ctrl+c to stop"
65 | sleep 100000


--------------------------------------------------------------------------------
/src/bin/client.rs:
--------------------------------------------------------------------------------
 1 | // Copyright 2022 labring. All rights reserved.
 2 | //
 3 | // SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | use sealfs::client;
 6 | 
 7 | #[tokio::main]
 8 | async fn main() -> Result<(), Box<dyn std::error::Error>> {
 9 |     if let Err(e) = client::run_command().await {
10 |         println!("Error: {}", e);
11 |         return Err(e);
12 |     }
13 |     Ok(())
14 | }
15 | 


--------------------------------------------------------------------------------
/src/bin/manager.rs:
--------------------------------------------------------------------------------
  1 | // Copyright 2022 labring. All rights reserved.
  2 | //
  3 | // SPDX-License-Identifier: Apache-2.0
  4 | 
  5 | use clap::Parser;
  6 | use env_logger::fmt;
  7 | use log::{error, info, warn};
  8 | use sealfs::manager::manager_service::update_server_status;
  9 | use sealfs::{manager::manager_service::ManagerService, rpc::server::RpcServer};
 10 | use serde::{Deserialize, Serialize};
 11 | use std::fs;
 12 | use std::io::Read;
 13 | use std::str::FromStr;
 14 | use std::{fmt::Debug, sync::Arc};
 15 | 
 16 | #[derive(Parser, Debug)]
 17 | #[command(author, version, about, long_about = None)]
 18 | struct Args {
 19 |     #[arg(long)]
 20 |     address: Option<String>,
 21 |     #[arg(long)]
 22 |     config_file: Option<String>,
 23 |     /// To use customized configuration or not. If this flag is used, please provide a config file through --config_file <path>
 24 |     #[arg(long)]
 25 |     use_config_file: bool,
 26 |     #[arg(long)]
 27 |     log_level: Option<String>,
 28 |     #[arg(long)]
 29 |     all_servers_address: Option<Vec<String>>,
 30 |     #[arg(long)]
 31 |     virtual_nodes: Option<usize>,
 32 | }
 33 | 
 34 | #[derive(Debug, Serialize, Deserialize)]
 35 | struct Properties {
 36 |     address: String,
 37 |     all_servers_address: Vec<String>,
 38 |     virtual_nodes: usize,
 39 |     log_level: String,
 40 | }
 41 | 
 42 | #[tokio::main]
 43 | async fn main() -> anyhow::Result<()> {
 44 |     let mut builder = env_logger::Builder::from_default_env();
 45 | 
 46 |     // read from default configuration.
 47 |     let config_path = std::env::var("SEALFS_CONFIG_PATH").unwrap_or("~".to_string());
 48 | 
 49 |     let mut config_file = std::fs::File::open(format!("{}/{}", config_path, "manager.yaml"))
 50 |         .expect("manager.yaml open failed!");
 51 | 
 52 |     let mut config_str = String::new();
 53 | 
 54 |     config_file
 55 |         .read_to_string(&mut config_str)
 56 |         .expect("manager.yaml read failed!");
 57 | 
 58 |     let default_properties: Properties =
 59 |         serde_yaml::from_str(&config_str).expect("manager.yaml serializa failed!");
 60 | 
 61 |     // read from command line.
 62 |     let args: Args = Args::parse();
 63 |     let properties: Properties = match args.use_config_file {
 64 |         true => {
 65 |             // read from user-provided config file
 66 |             match args.config_file {
 67 |                 Some(c) => {
 68 |                     let yaml_str = fs::read_to_string(c).expect("Couldn't read from file. The file is either missing or you don't have enough permissions!");
 69 |                     let mut result: Properties =
 70 |                         serde_yaml::from_str(&yaml_str).expect("manager.yaml read failed!");
 71 |                     if args.log_level.is_some() {
 72 |                         result.log_level = args.log_level.unwrap();
 73 |                     }
 74 |                     result
 75 |                 }
 76 |                 _ => {
 77 |                     warn!(
 78 |                         "No custom configuration provided, fallback to the default configuration."
 79 |                     );
 80 |                     default_properties
 81 |                 }
 82 |             }
 83 |         }
 84 |         false => Properties {
 85 |             address: args.address.unwrap_or(default_properties.address),
 86 |             all_servers_address: args
 87 |                 .all_servers_address
 88 |                 .unwrap_or(default_properties.all_servers_address),
 89 |             virtual_nodes: args
 90 |                 .virtual_nodes
 91 |                 .unwrap_or(default_properties.virtual_nodes),
 92 |             log_level: args.log_level.unwrap_or(default_properties.log_level),
 93 |         },
 94 |     };
 95 | 
 96 |     builder
 97 |         .format_timestamp(Some(fmt::TimestampPrecision::Millis))
 98 |         .filter(
 99 |             None,
100 |             log::LevelFilter::from_str(&properties.log_level).unwrap(),
101 |         );
102 |     builder.init();
103 | 
104 |     info!("Starting manager with log level: {}", properties.log_level);
105 | 
106 |     let address = properties.address;
107 | 
108 |     let servers_address = properties
109 |         .all_servers_address
110 |         .iter()
111 |         .map(|s| (s.to_string(), properties.virtual_nodes))
112 |         .collect::<Vec<(String, usize)>>();
113 | 
114 |     info!("All servers address: {:?}", servers_address);
115 | 
116 |     let manager = Arc::new(ManagerService::new(servers_address.clone()));
117 | 
118 |     let server = Arc::new(RpcServer::new(manager.clone(), &address));
119 | 
120 |     info!("Manager started at {}", address);
121 | 
122 |     let new_manager = manager.clone();
123 | 
124 |     tokio::spawn(async move {
125 |         if let Err(e) = server.run().await {
126 |             error!("Manager server error: {}", e);
127 |             new_manager
128 |                 .manager
129 |                 .closed
130 |                 .store(true, std::sync::atomic::Ordering::Relaxed);
131 |         }
132 |     });
133 | 
134 |     update_server_status(manager.manager.clone()).await;
135 | 
136 |     Ok(())
137 | }
138 | 


--------------------------------------------------------------------------------
/src/bin/server.rs:
--------------------------------------------------------------------------------
 1 | // Copyright 2022 labring. All rights reserved.
 2 | //
 3 | // SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | use clap::Parser;
 6 | use env_logger::fmt;
 7 | use log::info;
 8 | use sealfs::server;
 9 | use serde::{Deserialize, Serialize};
10 | use std::fmt::Debug;
11 | use std::str::FromStr;
12 | 
13 | const _SERVER_FLAG: u32 = 1;
14 | 
15 | #[derive(Parser, Debug)]
16 | #[command(author, version, about, long_about = None)]
17 | struct Args {
18 |     #[arg(long)]
19 |     manager_address: Option<String>,
20 |     #[arg(required = true, long)]
21 |     server_address: Option<String>,
22 |     #[arg(required = true, long)]
23 |     database_path: Option<String>,
24 |     #[arg(long)]
25 |     cache_capacity: Option<usize>,
26 |     #[arg(long)]
27 |     write_buffer_size: Option<usize>,
28 |     #[arg(required = true, long)]
29 |     storage_path: Option<String>,
30 |     #[arg(long)]
31 |     log_level: Option<String>,
32 | }
33 | 
34 | #[derive(Debug, Serialize, Deserialize)]
35 | struct Properties {
36 |     manager_address: String,
37 |     server_address: String,
38 |     database_path: String,
39 |     cache_capacity: usize,
40 |     write_buffer_size: usize,
41 |     storage_path: String,
42 |     log_level: String,
43 | }
44 | 
45 | #[tokio::main]
46 | async fn main() -> anyhow::Result<(), Box<dyn std::error::Error>> {
47 |     // read from command line.
48 |     let args: Args = Args::parse();
49 |     // if the user provides the config file, parse it and use the arguments from the config file.
50 |     let properties: Properties = Properties {
51 |         manager_address: args.manager_address.unwrap_or("127.0.0.1:8081".to_owned()),
52 |         server_address: args.server_address.unwrap(),
53 |         database_path: args.database_path.unwrap(),
54 |         cache_capacity: args.cache_capacity.unwrap_or(13421772),
55 |         write_buffer_size: args.write_buffer_size.unwrap_or(0x4000000),
56 |         storage_path: args.storage_path.unwrap(),
57 |         log_level: args.log_level.unwrap_or("warn".to_owned()),
58 |     };
59 | 
60 |     let mut builder = env_logger::Builder::from_default_env();
61 |     builder
62 |         .format_timestamp(Some(fmt::TimestampPrecision::Millis))
63 |         .filter(
64 |             None,
65 |             match log::LevelFilter::from_str(&properties.log_level) {
66 |                 Ok(level) => level,
67 |                 Err(_) => log::LevelFilter::Warn,
68 |             },
69 |         );
70 |     builder.init();
71 | 
72 |     info!("start server with properties: {:?}", properties);
73 | 
74 |     let manager_address = properties.manager_address;
75 |     let server_address = properties.server_address.clone();
76 | 
77 |     server::run(
78 |         properties.database_path,
79 |         properties.storage_path,
80 |         server_address,
81 |         manager_address,
82 |         properties.cache_capacity,
83 |         properties.write_buffer_size,
84 |     )
85 |     .await?;
86 |     Ok(())
87 | }
88 | 


--------------------------------------------------------------------------------
/src/common/byte.rs:
--------------------------------------------------------------------------------
 1 | // Copyright 2022 labring. All rights reserved.
 2 | //
 3 | // SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | pub fn array2u32(array: &[u8]) -> u32 {
 6 |     (array[0] as u32)
 7 |         + ((array[1] as u32) << 8)
 8 |         + ((array[2] as u32) << 16)
 9 |         + ((array[3] as u32) << 24)
10 | }
11 | 
12 | pub const CHUNK_SIZE: i64 = 65536;
13 | 


--------------------------------------------------------------------------------
/src/common/cache.rs:
--------------------------------------------------------------------------------
  1 | // Copyright 2022 labring. All rights reserved.
  2 | //
  3 | // SPDX-License-Identifier: Apache-2.0
  4 | 
  5 | use dashmap::DashMap;
  6 | use parking_lot::Mutex;
  7 | use std::sync::atomic::{AtomicUsize, Ordering};
  8 | use std::{marker::PhantomData, mem, ptr::NonNull};
  9 | 
 10 | #[derive(Copy, Clone)]
 11 | struct NodePointer<T>(Option<NonNull<Node<T>>>);
 12 | 
 13 | unsafe impl<T> Send for NodePointer<T> {}
 14 | unsafe impl<T> Sync for NodePointer<T> {}
 15 | 
 16 | impl<T> Default for NodePointer<T> {
 17 |     fn default() -> Self {
 18 |         NodePointer(None)
 19 |     }
 20 | }
 21 | 
 22 | pub struct Node<T> {
 23 |     val: T,
 24 |     next: Mutex<NodePointer<T>>,
 25 |     prev: Mutex<NodePointer<T>>,
 26 | }
 27 | 
 28 | impl<T> Node<T> {
 29 |     fn new(val: T) -> Self {
 30 |         Self {
 31 |             val,
 32 |             next: Mutex::new(NodePointer::default()),
 33 |             prev: Mutex::new(NodePointer::default()),
 34 |         }
 35 |     }
 36 | 
 37 |     fn into_val(self) -> T {
 38 |         self.val
 39 |     }
 40 | }
 41 | 
 42 | pub struct LinkedList<T>
 43 | where
 44 |     T: std::fmt::Debug,
 45 | {
 46 |     length: AtomicUsize,
 47 |     head: Mutex<NodePointer<T>>,
 48 |     tail: Mutex<NodePointer<T>>,
 49 |     _marker: PhantomData<Box<Node<T>>>,
 50 | }
 51 | 
 52 | impl<T> Default for LinkedList<T>
 53 | where
 54 |     T: std::fmt::Debug,
 55 | {
 56 |     fn default() -> Self {
 57 |         Self {
 58 |             length: 0.into(),
 59 |             head: Mutex::new(NodePointer::default()),
 60 |             tail: Mutex::new(NodePointer::default()),
 61 |             _marker: PhantomData,
 62 |         }
 63 |     }
 64 | }
 65 | 
 66 | impl<T> LinkedList<T>
 67 | where
 68 |     T: std::fmt::Debug,
 69 | {
 70 |     pub fn new() -> Self {
 71 |         Self {
 72 |             length: 0.into(),
 73 |             head: Mutex::new(NodePointer::default()),
 74 |             tail: Mutex::new(NodePointer::default()),
 75 |             _marker: PhantomData,
 76 |         }
 77 |     }
 78 | 
 79 |     pub fn insert_front(&self, val: T) {
 80 |         let node = Box::new(Node::new(val));
 81 |         let node = NonNull::new(Box::into_raw(node)).unwrap();
 82 |         self.insert_front_raw(node);
 83 |     }
 84 | 
 85 |     pub fn insert_front_raw(&self, mut node: NonNull<Node<T>>) {
 86 |         let mut head_locked = self.head.lock();
 87 |         unsafe {
 88 |             node.as_mut().next.lock().0 = head_locked.0;
 89 |             node.as_mut().prev = Mutex::new(NodePointer::default());
 90 |         }
 91 | 
 92 |         match head_locked.0 {
 93 |             Some(head) => unsafe {
 94 |                 (*head.as_ptr()).prev.lock().0 = Some(node);
 95 |             },
 96 |             None => {
 97 |                 self.tail.lock().0 = Some(node);
 98 |             }
 99 |         }
100 |         head_locked.0 = Some(node);
101 |         self.length.fetch_add(1, Ordering::Relaxed);
102 |     }
103 | 
104 |     pub fn remove(&self, mut node: NonNull<Node<T>>) -> T {
105 |         let node_mut = unsafe { node.as_mut() };
106 |         self.length.fetch_sub(1, Ordering::Relaxed);
107 |         match node_mut.prev.lock().0 {
108 |             Some(prev) => unsafe { (*prev.as_ptr()).next.lock().0 = node_mut.next.lock().0 },
109 |             None => {
110 |                 self.head.lock().0 = node_mut.next.lock().0;
111 |             }
112 |         }
113 |         match node_mut.next.lock().0 {
114 |             Some(next) => unsafe { (*next.as_ptr()).prev.lock().0 = node_mut.prev.lock().0 },
115 |             None => self.tail.lock().0 = node_mut.prev.lock().0,
116 |         }
117 |         unsafe {
118 |             let n = Box::from_raw(node.as_ptr());
119 |             n.into_val()
120 |         }
121 |     }
122 | 
123 |     pub fn reinsert_front(&self, mut node: NonNull<Node<T>>) {
124 |         {
125 |             let head_locked = self.head.lock();
126 |             if head_locked.0 == Some(node) {
127 |                 return;
128 |             }
129 |         }
130 |         let node_mut = unsafe { node.as_mut() };
131 |         self.length.fetch_sub(1, Ordering::Relaxed);
132 |         match node_mut.prev.lock().0 {
133 |             Some(prev) => unsafe { (*prev.as_ptr()).next.lock().0 = node_mut.next.lock().0 },
134 |             None => {
135 |                 self.head.lock().0 = node_mut.next.lock().0;
136 |             }
137 |         }
138 |         match node_mut.next.lock().0 {
139 |             Some(next) => unsafe { (*next.as_ptr()).prev.lock().0 = node_mut.prev.lock().0 },
140 |             None => self.tail.lock().0 = node_mut.prev.lock().0,
141 |         }
142 |         self.insert_front_raw(node);
143 |     }
144 | 
145 |     pub fn remove_tail(&self) -> Option<T> {
146 |         let mut tail_locked = self.tail.lock();
147 |         self.length.fetch_sub(1, Ordering::Relaxed);
148 |         match tail_locked.0 {
149 |             Some(tail) => unsafe {
150 |                 let node = Box::from_raw(tail.as_ptr());
151 |                 {
152 |                     let prev_node_locked = node.prev.lock();
153 |                     tail_locked.0 = prev_node_locked.0;
154 |                     match tail_locked.0 {
155 |                         Some(t) => {
156 |                             (*t.as_ptr()).next.lock().0 = None;
157 |                         }
158 |                         None => {
159 |                             self.head.lock().0 = None;
160 |                         }
161 |                     }
162 |                 }
163 |                 Some(node.into_val())
164 |             },
165 |             None => {
166 |                 let mut head_locked = self.head.lock();
167 |                 head_locked.0 = None;
168 |                 None
169 |             }
170 |         }
171 |     }
172 | 
173 |     pub fn iter(&self) -> Iter<'_, T> {
174 |         Iter {
175 |             head: NodePointer(self.head.lock().0),
176 |             len: self.length.load(Ordering::Relaxed),
177 |             _marker: PhantomData,
178 |         }
179 |     }
180 | }
181 | 
182 | impl<T> Drop for LinkedList<T>
183 | where
184 |     T: std::fmt::Debug,
185 | {
186 |     fn drop(&mut self) {
187 |         struct DropGuard<'a, T>(&'a mut LinkedList<T>)
188 |         where
189 |             T: std::fmt::Debug;
190 |         impl<'a, T> Drop for DropGuard<'a, T>
191 |         where
192 |             T: std::fmt::Debug,
193 |         {
194 |             fn drop(&mut self) {
195 |                 while self.0.remove_tail().is_some() {}
196 |             }
197 |         }
198 | 
199 |         while let Some(node) = self.remove_tail() {
200 |             let guard = DropGuard(self);
201 |             drop(node);
202 |             mem::forget(guard);
203 |         }
204 |     }
205 | }
206 | 
207 | pub struct Iter<'a, T: 'a> {
208 |     head: NodePointer<T>,
209 |     len: usize,
210 |     _marker: PhantomData<&'a Node<T>>,
211 | }
212 | 
213 | impl<'a, T> Iterator for Iter<'a, T> {
214 |     type Item = &'a T;
215 | 
216 |     #[inline]
217 |     fn next(&mut self) -> Option<Self::Item> {
218 |         if self.len == 0 {
219 |             None
220 |         } else {
221 |             self.head.0.map(|node| {
222 |                 self.len -= 1;
223 | 
224 |                 unsafe {
225 |                     let node = &*node.as_ptr();
226 |                     self.head = NodePointer(node.next.lock().0);
227 |                     &node.val
228 |                 }
229 |             })
230 |         }
231 |     }
232 | }
233 | 
234 | impl<T: std::fmt::Debug> std::fmt::Debug for LinkedList<T> {
235 |     fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
236 |         for cur in self.iter() {
237 |             write!(f, "{:?} ", cur)?;
238 |         }
239 |         Ok(())
240 |     }
241 | }
242 | 
243 | struct LRUEntry<T: std::fmt::Debug> {
244 |     key: Vec<u8>,
245 |     value: T,
246 | }
247 | 
248 | impl<T> LRUEntry<T>
249 | where
250 |     T: std::fmt::Debug,
251 | {
252 |     pub fn new(key: &[u8], value: T) -> Self {
253 |         Self {
254 |             key: key.to_vec(),
255 |             value,
256 |         }
257 |     }
258 | }
259 | 
260 | impl<T: std::fmt::Debug> std::fmt::Debug for LRUEntry<T> {
261 |     fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
262 |         write!(f, "{:?}", self.value)?;
263 |         Ok(())
264 |     }
265 | }
266 | 
267 | pub struct LRUCache<T>
268 | where
269 |     T: std::fmt::Debug,
270 | {
271 |     map: DashMap<Vec<u8>, NodePointer<LRUEntry<T>>>,
272 |     list: LinkedList<LRUEntry<T>>,
273 |     capacity: usize,
274 |     lock: Mutex<()>,
275 | }
276 | 
277 | impl<T> LRUCache<T>
278 | where
279 |     T: std::fmt::Debug,
280 | {
281 |     pub fn new(capacity: usize) -> Self {
282 |         Self {
283 |             map: DashMap::new(),
284 |             list: LinkedList::new(),
285 |             capacity,
286 |             lock: Mutex::new(()),
287 |         }
288 |     }
289 | 
290 |     pub fn insert(&self, key: &[u8], value: T) -> Option<T> {
291 |         let _l = self.lock.lock();
292 |         let new_node = LRUEntry::new(key, value);
293 |         let new_node = Box::new(Node::new(new_node));
294 |         let new_node = NonNull::new(Box::into_raw(new_node)).unwrap();
295 | 
296 |         let mut val = None;
297 |         match self.map.get(key) {
298 |             Some(entry) => {
299 |                 let entry = entry.0.unwrap();
300 |                 let value = self.list.remove(entry);
301 |                 val = Some(value.value);
302 |                 self.list.insert_front_raw(new_node);
303 |             }
304 |             None => {
305 |                 if self.list.length.load(Ordering::Relaxed) >= self.capacity {
306 |                     // let removed_key = self.list.remove_tail();
307 |                     if let Some(entry) = self.list.remove_tail() {
308 |                         self.map.remove(&entry.key);
309 |                         val = Some(entry.value);
310 |                     }
311 |                 }
312 |                 self.list.insert_front_raw(new_node);
313 |             }
314 |         }
315 |         self.map.insert(key.to_vec(), NodePointer(Some(new_node)));
316 |         val
317 |     }
318 | 
319 |     pub fn get(&self, key: &[u8]) -> Option<&T> {
320 |         let _l = self.lock.lock();
321 |         match self.map.get(key) {
322 |             Some(node) => unsafe {
323 |                 let node = node.0.unwrap();
324 |                 let value = &node.as_ref().val.value;
325 |                 self.list.reinsert_front(node);
326 |                 Some(value)
327 |             },
328 |             None => None,
329 |         }
330 |     }
331 | 
332 |     pub fn remove(&self, key: &[u8]) {
333 |         let _l = self.lock.lock();
334 |         if let Some(node) = self.map.get(key) {
335 |             self.list.remove(node.0.unwrap());
336 |         }
337 |         self.map.remove(key);
338 |     }
339 | }
340 | 
341 | #[cfg(test)]
342 | mod test {
343 |     mod test_linkedlist {
344 |         use super::super::LinkedList;
345 | 
346 |         #[test]
347 |         fn test_insert() {
348 |             let list: LinkedList<i32> = LinkedList::new();
349 |             list.insert_front(2);
350 |             list.insert_front(3);
351 |             list.insert_front(4);
352 |             let result = format!("{:?}", list);
353 |             assert_eq!("4 3 2 ", result);
354 |         }
355 |     }
356 | 
357 |     mod test_lru_cache {
358 |         use std::sync::Arc;
359 | 
360 |         use super::super::LRUCache;
361 |         use rand::prelude::*;
362 | 
363 |         #[test]
364 |         fn test() {
365 |             let lru = LRUCache::new(5);
366 |             lru.insert(&5_i32.to_le_bytes(), 5);
367 |             println!("{:?}", lru.list);
368 |             lru.insert(&0_i32.to_le_bytes(), 0);
369 |             println!("{:?}", lru.list);
370 |             lru.insert(&2_i32.to_le_bytes(), 2);
371 |             println!("{:?}", lru.list);
372 |             lru.insert(&6_i32.to_le_bytes(), 6);
373 |             println!("{:?}", lru.list);
374 |             lru.insert(&1_i32.to_le_bytes(), 1);
375 |             println!("{:?}", lru.list);
376 |             lru.insert(&6_i32.to_le_bytes(), 6);
377 |             println!("{:?}", lru.list);
378 |             lru.insert(&8_i32.to_le_bytes(), 8);
379 |             println!("{:?}", lru.list);
380 |             lru.insert(&8_i32.to_le_bytes(), 8);
381 |             println!("{:?}", lru.list);
382 |             lru.insert(&7_i32.to_le_bytes(), 7);
383 |             println!("{:?}", lru.list);
384 |             lru.insert(&4_i32.to_le_bytes(), 4);
385 |             println!("{:?}", lru.list);
386 |             lru.insert(&0_i32.to_le_bytes(), 0);
387 |             println!("{:?}", lru.list);
388 |             lru.insert(&0_i32.to_le_bytes(), 0);
389 |             lru.insert(&2_i32.to_le_bytes(), 2);
390 |             lru.insert(&1_i32.to_le_bytes(), 1);
391 |             lru.insert(&0_i32.to_le_bytes(), 0);
392 |             lru.insert(&2_i32.to_le_bytes(), 2);
393 |         }
394 | 
395 |         #[test]
396 |         fn test_insert() {
397 |             let lru = LRUCache::new(10);
398 |             for _i in 0..50000_usize {
399 |                 let mut rng = rand::thread_rng();
400 |                 let n: usize = rng.gen::<usize>() % 100;
401 |                 print!("{n},");
402 |                 lru.insert(&n.to_le_bytes(), n);
403 |             }
404 |         }
405 | 
406 |         #[test]
407 |         fn test_multithread() {
408 |             let lru: Arc<LRUCache<usize>> = Arc::new(LRUCache::new(10));
409 |             let mut thread_arr = Vec::new();
410 |             for _i in 0..10usize {
411 |                 let lru_arc = Arc::clone(&lru);
412 |                 let handler = std::thread::spawn(move || {
413 |                     for _i in 0..10000_usize {
414 |                         let mut rng = rand::thread_rng();
415 |                         let n = rng.gen::<usize>() % 100;
416 |                         // println!("thread: {:?} {n}", std::thread::current().id());
417 |                         lru_arc.insert(&n.to_le_bytes(), n);
418 |                     }
419 |                 });
420 |                 thread_arr.push(handler);
421 |             }
422 |             for thread in thread_arr {
423 |                 thread.join().unwrap();
424 |             }
425 |         }
426 |     }
427 | }
428 | 


--------------------------------------------------------------------------------
/src/common/errors.rs:
--------------------------------------------------------------------------------
 1 | // Copyright 2022 labring. All rights reserved.
 2 | //
 3 | // SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | use std::ffi::CStr;
 6 | 
 7 | use libc::strerror;
 8 | 
 9 | pub const CONNECTION_ERROR: i32 = 10001;
10 | pub const INVALID_CLUSTER_STATUS: i32 = 10002;
11 | pub const DATABASE_ERROR: i32 = 10003;
12 | pub const SERIALIZATION_ERROR: i32 = 10004;
13 | 
14 | pub fn status_to_string(status: i32) -> String {
15 |     match status {
16 |         CONNECTION_ERROR => "CONNECTION_ERROR".to_string(),
17 |         INVALID_CLUSTER_STATUS => "INVALID_CLUSTER_STATUS".to_string(),
18 |         DATABASE_ERROR => "DATABASE_ERROR".to_string(),
19 |         SERIALIZATION_ERROR => "SERIALIZATION_ERROR".to_string(),
20 |         _ => unsafe { CStr::from_ptr(strerror(status)) }
21 |             .to_str()
22 |             .unwrap()
23 |             .to_string(),
24 |     }
25 | }
26 | 


--------------------------------------------------------------------------------
/src/common/hash_ring.rs:
--------------------------------------------------------------------------------
 1 | // Copyright 2022 labring. All rights reserved.
 2 | //
 3 | // SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | use std::collections::HashMap;
 6 | 
 7 | use conhash::{ConsistentHash, Node};
 8 | 
 9 | #[derive(Clone)]
10 | pub struct ServerNode {
11 |     pub address: String,
12 | }
13 | 
14 | impl Node for ServerNode {
15 |     fn name(&self) -> String {
16 |         self.address.clone()
17 |     }
18 | }
19 | 
20 | pub struct HashRing {
21 |     pub ring: ConsistentHash<ServerNode>,
22 |     pub servers: HashMap<String, usize>,
23 | }
24 | 
25 | impl Clone for HashRing {
26 |     fn clone(&self) -> Self {
27 |         let servers = self.servers.clone();
28 |         let mut ring = ConsistentHash::<ServerNode>::new();
29 |         for (server, weight) in servers.iter() {
30 |             ring.add(
31 |                 &ServerNode {
32 |                     address: server.clone(),
33 |                 },
34 |                 *weight,
35 |             );
36 |         }
37 |         HashRing { ring, servers }
38 |     }
39 | }
40 | 
41 | impl HashRing {
42 |     pub fn new(servers: Vec<(String, usize)>) -> Self {
43 |         let mut ring = ConsistentHash::<ServerNode>::new();
44 |         let mut servers_map = HashMap::new();
45 |         for (server, weight) in servers {
46 |             ring.add(
47 |                 &ServerNode {
48 |                     address: server.clone(),
49 |                 },
50 |                 weight,
51 |             );
52 |             servers_map.insert(server, weight);
53 |         }
54 |         HashRing {
55 |             ring,
56 |             servers: servers_map,
57 |         }
58 |     }
59 | 
60 |     pub fn get(&self, key: &str) -> Option<&ServerNode> {
61 |         self.ring.get_str(key)
62 |     }
63 | 
64 |     pub fn add(&mut self, server: ServerNode, weight: usize) {
65 |         self.ring.add(&server, weight);
66 |         self.servers.insert(server.address, weight);
67 |     }
68 | 
69 |     pub fn remove(&mut self, server: &ServerNode) {
70 |         self.ring.remove(server);
71 |         self.servers.remove(&server.address);
72 |     }
73 | 
74 |     pub fn contains(&self, server: &str) -> bool {
75 |         self.servers.contains_key(server)
76 |     }
77 | 
78 |     pub fn get_server_lists(&self) -> Vec<String> {
79 |         self.servers.keys().cloned().collect()
80 |     }
81 | }
82 | 


--------------------------------------------------------------------------------
/src/common/info_syncer.rs:
--------------------------------------------------------------------------------
  1 | use std::{
  2 |     sync::{
  3 |         atomic::{AtomicI32, Ordering},
  4 |         Arc,
  5 |     },
  6 |     time::Duration,
  7 | };
  8 | 
  9 | use async_trait::async_trait;
 10 | use log::{debug, error, info};
 11 | use spin::RwLock;
 12 | use tokio::time::sleep;
 13 | 
 14 | use crate::common::errors::{self, status_to_string, CONNECTION_ERROR};
 15 | 
 16 | use super::{hash_ring::HashRing, sender::Sender, serialization::ClusterStatus};
 17 | 
 18 | #[async_trait]
 19 | pub trait InfoSyncer {
 20 |     async fn get_cluster_status(&self) -> Result<ClusterStatus, i32>;
 21 |     fn cluster_status(&self) -> &AtomicI32;
 22 | }
 23 | 
 24 | async fn sync_cluster_infos<I: InfoSyncer>(client: Arc<I>) {
 25 |     loop {
 26 |         {
 27 |             let result = client.get_cluster_status().await;
 28 |             match result {
 29 |                 Ok(status) => {
 30 |                     let status = status.into();
 31 |                     if client.cluster_status().load(Ordering::Relaxed) != status {
 32 |                         client.cluster_status().store(status, Ordering::Relaxed);
 33 |                     }
 34 |                 }
 35 |                 Err(e) => {
 36 |                     info!("sync server infos failed, error = {}", e);
 37 |                 }
 38 |             }
 39 |         }
 40 |         sleep(Duration::from_secs(1)).await;
 41 |     }
 42 | }
 43 | 
 44 | #[async_trait]
 45 | pub trait ClientStatusMonitor: InfoSyncer {
 46 |     fn hash_ring(&self) -> &Arc<RwLock<Option<HashRing>>>;
 47 |     fn new_hash_ring(&self) -> &Arc<RwLock<Option<HashRing>>>;
 48 |     fn sender(&self) -> &Sender;
 49 |     fn manager_address(&self) -> &Arc<tokio::sync::Mutex<String>>;
 50 | 
 51 |     fn get_address(&self, path: &str) -> String {
 52 |         self.hash_ring()
 53 |             .read()
 54 |             .as_ref()
 55 |             .unwrap()
 56 |             .get(path)
 57 |             .unwrap()
 58 |             .address
 59 |             .clone()
 60 |     }
 61 | 
 62 |     fn get_new_address(&self, path: &str) -> String {
 63 |         match self.new_hash_ring().read().as_ref() {
 64 |             Some(hash_ring) => hash_ring.get(path).unwrap().address.clone(),
 65 |             None => self.get_address(path),
 66 |         }
 67 |     }
 68 | 
 69 |     async fn get_hash_ring_info(&self) -> Result<Vec<(String, usize)>, i32> {
 70 |         self.sender()
 71 |             .get_hash_ring_info(&self.manager_address().lock().await)
 72 |             .await
 73 |     }
 74 |     async fn get_new_hash_ring_info(&self) -> Result<Vec<(String, usize)>, i32> {
 75 |         self.sender()
 76 |             .get_new_hash_ring_info(&self.manager_address().lock().await)
 77 |             .await
 78 |     }
 79 | 
 80 |     fn get_connection_address(&self, path: &str) -> String {
 81 |         let cluster_status = self.cluster_status().load(Ordering::Acquire);
 82 | 
 83 |         // check the ClusterStatus is not Idle
 84 |         // for efficiency, we use i32 operation to check the ClusterStatus
 85 |         if cluster_status == 301 {
 86 |             return self.get_address(path);
 87 |         }
 88 | 
 89 |         match cluster_status.try_into().unwrap() {
 90 |             ClusterStatus::Initializing => panic!("cluster status is not ready"),
 91 |             ClusterStatus::Idle => todo!(),
 92 |             ClusterStatus::NodesStarting => self.get_address(path),
 93 |             ClusterStatus::SyncNewHashRing => self.get_address(path),
 94 |             ClusterStatus::PreTransfer => self.get_address(path),
 95 |             ClusterStatus::Transferring => self.get_address(path),
 96 |             ClusterStatus::PreFinish => self.get_new_address(path),
 97 |             ClusterStatus::Finishing => self.get_address(path),
 98 |             ClusterStatus::StatusError => todo!(),
 99 |             ClusterStatus::Unkown => todo!(),
100 |         }
101 |     }
102 | 
103 |     async fn add_connection(&self, server_address: &str) -> Result<(), i32>;
104 | 
105 |     async fn connect_to_manager(&self, manager_address: &str) -> Result<(), i32> {
106 |         self.manager_address()
107 |             .lock()
108 |             .await
109 |             .push_str(manager_address);
110 |         self.add_connection(manager_address).await.map_err(|e| {
111 |             error!("add connection failed: {:?}", e);
112 |             CONNECTION_ERROR
113 |         })
114 |     }
115 | 
116 |     async fn add_new_servers(&self, new_servers_info: Vec<(String, usize)>) -> Result<(), i32> {
117 |         self.sender()
118 |             .add_new_servers(&self.manager_address().lock().await, new_servers_info)
119 |             .await
120 |     }
121 | 
122 |     async fn connect_servers(&self) -> Result<(), i32> {
123 |         debug!("init");
124 | 
125 |         let result = async {
126 |             loop {
127 |                 match self
128 |                     .cluster_status()
129 |                     .load(Ordering::Acquire)
130 |                     .try_into()
131 |                     .unwrap()
132 |                 {
133 |                     ClusterStatus::Idle => {
134 |                         return self.get_hash_ring_info().await;
135 |                     }
136 |                     ClusterStatus::Initializing => {
137 |                         info!("cluster is initalling, wait for a while");
138 |                         tokio::time::sleep(Duration::from_secs(1)).await;
139 |                     }
140 |                     ClusterStatus::PreFinish => {
141 |                         info!("cluster is initalling, wait for a while");
142 |                         tokio::time::sleep(Duration::from_secs(1)).await;
143 |                     }
144 |                     s => {
145 |                         error!("invalid cluster status: {}", s);
146 |                         return Err(errors::INVALID_CLUSTER_STATUS);
147 |                     }
148 |                 }
149 |             }
150 |         }
151 |         .await;
152 | 
153 |         match result {
154 |             Ok(all_servers_address) => {
155 |                 for server_address in &all_servers_address {
156 |                     self.add_connection(&server_address.0).await?;
157 |                 }
158 |                 self.hash_ring()
159 |                     .write()
160 |                     .replace(HashRing::new(all_servers_address.clone()));
161 |                 Ok(())
162 |             }
163 |             Err(e) => Err(e),
164 |         }
165 |     }
166 | }
167 | 
168 | async fn client_watch_status<I: ClientStatusMonitor + std::marker::Sync + std::marker::Send>(
169 |     client: Arc<I>,
170 | ) {
171 |     loop {
172 |         match client
173 |             .cluster_status()
174 |             .load(Ordering::Relaxed)
175 |             .try_into()
176 |             .unwrap()
177 |         {
178 |             ClusterStatus::SyncNewHashRing => {
179 |                 // here I write a long code block to deal with the process from SyncNewHashRing to new Idle status.
180 |                 // this is because we don't make persistent flags for status, so we could not check a status is finished or not.
181 |                 // so we have to check the status in a long code block, and we could not use a loop to check the status.
182 |                 // in the future, we will make persistent flags for status, and we separate the code block for each status.
183 |                 info!("Transfer: start to sync new hash ring");
184 |                 let all_servers_address = match client.get_new_hash_ring_info().await {
185 |                     Ok(value) => value,
186 |                     Err(e) => {
187 |                         panic!("Get Hash Ring Info Failed. Error = {}", e);
188 |                     }
189 |                 };
190 |                 info!("Transfer: get new hash ring info");
191 | 
192 |                 for value in all_servers_address.iter() {
193 |                     if client
194 |                         .hash_ring()
195 |                         .read()
196 |                         .as_ref()
197 |                         .unwrap()
198 |                         .contains(&value.0)
199 |                     {
200 |                         continue;
201 |                     }
202 |                     if let Err(e) = client.add_connection(&value.0).await {
203 |                         // TODO: we should rollback the transfer process
204 |                         panic!("Add Connection Failed. Error = {}", e);
205 |                     }
206 |                 }
207 |                 client
208 |                     .new_hash_ring()
209 |                     .write()
210 |                     .replace(HashRing::new(all_servers_address));
211 |                 info!("Transfer: sync new hash ring finished");
212 | 
213 |                 // wait for all servers to be PreTransfer
214 | 
215 |                 while <i32 as TryInto<ClusterStatus>>::try_into(
216 |                     client.cluster_status().load(Ordering::Relaxed),
217 |                 )
218 |                 .unwrap()
219 |                     == ClusterStatus::SyncNewHashRing
220 |                 {
221 |                     sleep(Duration::from_secs(1)).await;
222 |                 }
223 |                 assert!(
224 |                     <i32 as TryInto<ClusterStatus>>::try_into(
225 |                         client.cluster_status().load(Ordering::Relaxed)
226 |                     )
227 |                     .unwrap()
228 |                         == ClusterStatus::PreTransfer
229 |                 );
230 | 
231 |                 while <i32 as TryInto<ClusterStatus>>::try_into(
232 |                     client.cluster_status().load(Ordering::Relaxed),
233 |                 )
234 |                 .unwrap()
235 |                     == ClusterStatus::PreTransfer
236 |                 {
237 |                     sleep(Duration::from_secs(1)).await;
238 |                 }
239 |                 assert!(
240 |                     <i32 as TryInto<ClusterStatus>>::try_into(
241 |                         client.cluster_status().load(Ordering::Relaxed)
242 |                     )
243 |                     .unwrap()
244 |                         == ClusterStatus::Transferring
245 |                 );
246 | 
247 |                 while <i32 as TryInto<ClusterStatus>>::try_into(
248 |                     client.cluster_status().load(Ordering::Relaxed),
249 |                 )
250 |                 .unwrap()
251 |                     == ClusterStatus::Transferring
252 |                 {
253 |                     sleep(Duration::from_secs(1)).await;
254 |                 }
255 |                 assert!(
256 |                     <i32 as TryInto<ClusterStatus>>::try_into(
257 |                         client.cluster_status().load(Ordering::Relaxed)
258 |                     )
259 |                     .unwrap()
260 |                         == ClusterStatus::PreFinish
261 |                 );
262 | 
263 |                 let _old_hash_ring = client
264 |                     .hash_ring()
265 |                     .write()
266 |                     .replace(client.new_hash_ring().read().as_ref().unwrap().clone());
267 | 
268 |                 while <i32 as TryInto<ClusterStatus>>::try_into(
269 |                     client.cluster_status().load(Ordering::Relaxed),
270 |                 )
271 |                 .unwrap()
272 |                     == ClusterStatus::PreFinish
273 |                 {
274 |                     sleep(Duration::from_secs(1)).await;
275 |                 }
276 |                 assert!(
277 |                     <i32 as TryInto<ClusterStatus>>::try_into(
278 |                         client.cluster_status().load(Ordering::Relaxed)
279 |                     )
280 |                     .unwrap()
281 |                         == ClusterStatus::Finishing
282 |                 );
283 | 
284 |                 let _ = client.new_hash_ring().write().take();
285 |                 // here we should close connections to old servers, but now we just wait for remote servers to close connections and do nothing
286 | 
287 |                 while <i32 as TryInto<ClusterStatus>>::try_into(
288 |                     client.cluster_status().load(Ordering::Relaxed),
289 |                 )
290 |                 .unwrap()
291 |                     == ClusterStatus::Finishing
292 |                 {
293 |                     sleep(Duration::from_secs(1)).await;
294 |                 }
295 |                 assert!(
296 |                     <i32 as TryInto<ClusterStatus>>::try_into(
297 |                         client.cluster_status().load(Ordering::Relaxed)
298 |                     )
299 |                     .unwrap()
300 |                         == ClusterStatus::Idle
301 |                 );
302 | 
303 |                 info!("transferring data finished");
304 |             }
305 |             ClusterStatus::Idle => {
306 |                 sleep(Duration::from_secs(1)).await;
307 |             }
308 |             ClusterStatus::Initializing => {
309 |                 sleep(Duration::from_secs(1)).await;
310 |             }
311 |             ClusterStatus::NodesStarting => {
312 |                 sleep(Duration::from_secs(1)).await;
313 |             }
314 |             e => {
315 |                 panic!("cluster status error: {:?}", e as u32);
316 |             }
317 |         }
318 |     }
319 | }
320 | 
321 | pub async fn init_network_connections<
322 |     I: ClientStatusMonitor + std::marker::Sync + std::marker::Send + 'static,
323 | >(
324 |     manager_address: String,
325 |     client: Arc<I>,
326 | ) {
327 |     if let Err(e) = client.connect_to_manager(&manager_address).await {
328 |         panic!("connect to manager failed, err = {}", status_to_string(e));
329 |     }
330 |     tokio::spawn(sync_cluster_infos(client.clone()));
331 |     tokio::spawn(client_watch_status(client));
332 | }
333 | 


--------------------------------------------------------------------------------
/src/common/mod.rs:
--------------------------------------------------------------------------------
 1 | // Copyright 2022 labring. All rights reserved.
 2 | //
 3 | // SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | pub mod byte;
 6 | pub mod cache;
 7 | pub mod errors;
 8 | pub mod hash_ring;
 9 | pub mod info_syncer;
10 | pub mod sender;
11 | pub mod serialization;
12 | pub mod util;
13 | 


--------------------------------------------------------------------------------
/src/common/util.rs:
--------------------------------------------------------------------------------
 1 | // Copyright 2022 labring. All rights reserved.
 2 | //
 3 | // SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | use std::time::SystemTime;
 6 | 
 7 | use fuser::{FileAttr, FileType};
 8 | use log::error;
 9 | 
10 | pub fn get_full_path(parent: &str, name: &str) -> String {
11 |     if parent == "/" {
12 |         return format!("/{}", name);
13 |     }
14 |     let path = format!("{}/{}", parent, name);
15 |     path
16 | }
17 | 
18 | //  path_split: the path should not be empty, and it does not end with a slash unless it is the root directory.
19 | pub fn path_split(path: &str) -> Result<(String, String), i32> {
20 |     if path.is_empty() {
21 |         error!("path is empty");
22 |         return Err(libc::EINVAL);
23 |     }
24 |     if path == "/" {
25 |         error!("path is root");
26 |         return Err(libc::EINVAL);
27 |     }
28 |     if path.ends_with('/') {
29 |         error!("path ends with /");
30 |         return Err(libc::EINVAL);
31 |     }
32 |     let index = match path.rfind('/') {
33 |         Some(value) => value,
34 |         None => {
35 |             error!("path does not contain /");
36 |             return Err(libc::EINVAL);
37 |         }
38 |     };
39 |     match index {
40 |         0 => Ok(("/".into(), path[1..].into())),
41 |         _ => Ok((path[..index].into(), path[(index + 1)..].into())),
42 |     }
43 | }
44 | 
45 | pub fn empty_file() -> FileAttr {
46 |     FileAttr {
47 |         ino: 0,
48 |         size: 0,
49 |         blocks: 0,
50 |         atime: SystemTime::now(),
51 |         mtime: SystemTime::now(),
52 |         ctime: SystemTime::now(),
53 |         crtime: SystemTime::now(),
54 |         kind: FileType::RegularFile,
55 |         perm: 0,
56 |         nlink: 0,
57 |         uid: 0,
58 |         gid: 0,
59 |         rdev: 0,
60 |         flags: 0,
61 |         blksize: 0,
62 |     }
63 | }
64 | 
65 | pub fn empty_dir() -> FileAttr {
66 |     FileAttr {
67 |         ino: 0,
68 |         size: 4096,
69 |         blocks: 0,
70 |         atime: SystemTime::now(),
71 |         mtime: SystemTime::now(),
72 |         ctime: SystemTime::now(),
73 |         crtime: SystemTime::now(),
74 |         kind: FileType::Directory,
75 |         perm: 0,
76 |         nlink: 0,
77 |         uid: 0,
78 |         gid: 0,
79 |         rdev: 0,
80 |         flags: 0,
81 |         blksize: 0,
82 |     }
83 | }
84 | 


--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
 1 | // Copyright 2022 labring. All rights reserved.
 2 | //
 3 | // SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | pub mod client;
 6 | pub mod common;
 7 | pub mod manager;
 8 | pub mod rpc;
 9 | pub mod server;
10 | 


--------------------------------------------------------------------------------
/src/manager/core.rs:
--------------------------------------------------------------------------------
  1 | // Copyright 2022 labring. All rights reserved.
  2 | //
  3 | // SPDX-License-Identifier: Apache-2.0
  4 | 
  5 | use std::sync::atomic::AtomicBool;
  6 | use std::sync::{Arc, Mutex, RwLock};
  7 | 
  8 | use ahash::{HashMap, HashMapExt};
  9 | use anyhow::Error;
 10 | use dashmap::DashMap;
 11 | use log::{debug, info};
 12 | 
 13 | use crate::common::hash_ring::{HashRing, ServerNode};
 14 | use crate::common::serialization::{ClusterStatus, ServerStatus, ServerType};
 15 | pub struct Manager {
 16 |     pub hashring: Arc<RwLock<Option<HashRing>>>,
 17 |     pub new_hashring: Arc<RwLock<Option<HashRing>>>,
 18 |     pub servers: Arc<Mutex<HashMap<String, Server>>>,
 19 |     pub cluster_status: Arc<Mutex<ClusterStatus>>,
 20 |     pub closed: AtomicBool,
 21 |     _clients: DashMap<String, String>,
 22 | }
 23 | 
 24 | pub struct Server {
 25 |     pub status: ServerStatus,
 26 |     r#_type: ServerType,
 27 |     _replicas: usize,
 28 | }
 29 | 
 30 | impl Manager {
 31 |     pub fn new(servers: Vec<(String, usize)>) -> Self {
 32 |         let hashring = Arc::new(RwLock::new(Some(HashRing::new(servers.clone()))));
 33 |         let manager = Manager {
 34 |             hashring,
 35 |             new_hashring: Arc::new(RwLock::new(None)),
 36 |             servers: Arc::new(Mutex::new(HashMap::new())),
 37 |             cluster_status: Arc::new(Mutex::new(ClusterStatus::Initializing)),
 38 |             closed: AtomicBool::new(false),
 39 |             _clients: DashMap::new(),
 40 |         };
 41 | 
 42 |         for (server, weight) in servers {
 43 |             manager.servers.lock().unwrap().insert(
 44 |                 server,
 45 |                 Server {
 46 |                     status: ServerStatus::Initializing,
 47 |                     r#_type: ServerType::Running,
 48 |                     _replicas: weight,
 49 |                 },
 50 |             );
 51 |         }
 52 | 
 53 |         manager
 54 |     }
 55 | 
 56 |     pub fn get_cluster_status(&self) -> ClusterStatus {
 57 |         let status = *self.cluster_status.lock().unwrap();
 58 |         debug!("get_cluster_status: {:?}", status);
 59 |         status
 60 |     }
 61 | 
 62 |     pub fn get_hash_ring_info(&self) -> Vec<(String, usize)> {
 63 |         self.hashring
 64 |             .read()
 65 |             .unwrap()
 66 |             .as_ref()
 67 |             .unwrap()
 68 |             .servers
 69 |             .iter()
 70 |             .map(|(k, v)| (k.clone(), *v))
 71 |             .collect()
 72 |     }
 73 | 
 74 |     pub fn get_new_hash_ring_info(&self) -> Result<Vec<(String, usize)>, Error> {
 75 |         if let Some(new_hashring) = self.new_hashring.read().unwrap().as_ref() {
 76 |             Ok(new_hashring
 77 |                 .servers
 78 |                 .iter()
 79 |                 .map(|(k, v)| (k.clone(), *v))
 80 |                 .collect())
 81 |         } else {
 82 |             Err(anyhow::anyhow!("new hashring is none"))
 83 |         }
 84 |     }
 85 | 
 86 |     pub fn add_nodes(&self, nodes: Vec<(String, usize)>) -> Option<Error> {
 87 |         info!("add_nodes: {:?}", nodes);
 88 |         let mut cluster_status = self.cluster_status.lock().unwrap();
 89 |         if *cluster_status != ClusterStatus::Idle {
 90 |             return Some(anyhow::anyhow!("cluster is not idle"));
 91 |         }
 92 |         let mut new_hashring = self.hashring.read().unwrap().clone().unwrap();
 93 |         let mut servers = self.servers.lock().unwrap();
 94 |         for (node, weight) in nodes {
 95 |             new_hashring.add(
 96 |                 ServerNode {
 97 |                     address: node.clone(),
 98 |                 },
 99 |                 weight,
100 |             );
101 |             servers.insert(
102 |                 node,
103 |                 Server {
104 |                     status: ServerStatus::Initializing,
105 |                     r#_type: ServerType::Running,
106 |                     _replicas: weight,
107 |                 },
108 |             );
109 |         }
110 | 
111 |         self.new_hashring.write().unwrap().replace(new_hashring);
112 |         *cluster_status = ClusterStatus::NodesStarting;
113 | 
114 |         None
115 |     }
116 | 
117 |     pub fn delete_nodes(&self, nodes: Vec<String>) -> Option<Error> {
118 |         let mut cluster_status = self.cluster_status.lock().unwrap();
119 |         if *cluster_status != ClusterStatus::Idle {
120 |             return Some(anyhow::anyhow!("cluster is not idle"));
121 |         }
122 |         let mut new_hashring = self.hashring.read().unwrap().clone().unwrap();
123 |         new_hashring.remove(&ServerNode {
124 |             address: nodes[0].clone(),
125 |         });
126 | 
127 |         self.new_hashring.write().unwrap().replace(new_hashring);
128 | 
129 |         *cluster_status = ClusterStatus::NodesStarting;
130 |         None
131 |     }
132 | 
133 |     pub fn set_server_status(&self, server_id: String, status: ServerStatus) -> Option<Error> {
134 |         // debug : logs all server_name in self.servers
135 |         debug!(
136 |             "set_server_status: {:?}",
137 |             self.servers
138 |                 .lock()
139 |                 .unwrap()
140 |                 .iter()
141 |                 .map(|kv| kv.0.clone())
142 |                 .collect::<Vec<String>>()
143 |         );
144 | 
145 |         info!("set server status: {} {:?}", server_id, status);
146 | 
147 |         match status {
148 |             ServerStatus::Initializing => {
149 |                 panic!("cannot set server status to init");
150 |             }
151 |             ServerStatus::PreTransfer => {
152 |                 let cluster_status = self.cluster_status.lock().unwrap();
153 |                 if *cluster_status != ClusterStatus::SyncNewHashRing {
154 |                     return Some(anyhow::anyhow!("cannot pretransfer for server: {}, cluster is not SyncNewHashRing: status: {:?}" , server_id, *cluster_status));
155 |                 }
156 |                 let mut servers = self.servers.lock().unwrap();
157 |                 if servers.get(&server_id).unwrap().status != ServerStatus::Finished {
158 |                     return Some(anyhow::anyhow!(
159 |                         "cannot pretransfer for server: {}, server is not finish: status: {:?}",
160 |                         server_id,
161 |                         servers.get(&server_id).unwrap().status
162 |                     ));
163 |                 }
164 |                 servers.get_mut(&server_id).unwrap().status = ServerStatus::PreTransfer;
165 |                 None
166 |             }
167 |             ServerStatus::Transferring => {
168 |                 let cluster_status = self.cluster_status.lock().unwrap();
169 |                 if *cluster_status != ClusterStatus::PreTransfer {
170 |                     return Some(anyhow::anyhow!(
171 |                         "cannot transfer for server: {}, cluster is not PreTransfer: status: {:?}",
172 |                         server_id,
173 |                         *cluster_status
174 |                     ));
175 |                 }
176 |                 let mut servers = self.servers.lock().unwrap();
177 |                 if servers.get(&server_id).unwrap().status != ServerStatus::PreTransfer {
178 |                     return Some(anyhow::anyhow!(
179 |                         "cannot transfer for server: {}, server is not finish: status: {:?}",
180 |                         server_id,
181 |                         servers.get(&server_id).unwrap().status
182 |                     ));
183 |                 }
184 |                 servers.get_mut(&server_id).unwrap().status = ServerStatus::Transferring;
185 |                 None
186 |             }
187 |             ServerStatus::PreFinish => {
188 |                 let cluster_status = self.cluster_status.lock().unwrap();
189 |                 if *cluster_status != ClusterStatus::Transferring {
190 |                     return Some(anyhow::anyhow!("cannot prefinish for server: {}, cluster is not Transferring: status: {:?}" , server_id, *cluster_status));
191 |                 }
192 |                 let mut servers = self.servers.lock().unwrap();
193 |                 if servers.get(&server_id).unwrap().status != ServerStatus::Transferring {
194 |                     return Some(anyhow::anyhow!(
195 |                         "cannot prefinish for server: {}, server is not transferring: status: {:?}",
196 |                         server_id,
197 |                         servers.get(&server_id).unwrap().status
198 |                     ));
199 |                 }
200 |                 servers.get_mut(&server_id).unwrap().status = ServerStatus::PreFinish;
201 |                 None
202 |             }
203 |             ServerStatus::Finishing => {
204 |                 let cluster_status = self.cluster_status.lock().unwrap();
205 |                 if *cluster_status != ClusterStatus::PreFinish {
206 |                     return Some(anyhow::anyhow!("cannot prefinish for server: {}, cluster is not Transferring: status: {:?}" , server_id, *cluster_status));
207 |                 }
208 |                 let mut servers: std::sync::MutexGuard<
209 |                     std::collections::HashMap<String, Server, ahash::RandomState>,
210 |                 > = self.servers.lock().unwrap();
211 |                 if servers.get(&server_id).unwrap().status != ServerStatus::PreFinish {
212 |                     return Some(anyhow::anyhow!(
213 |                         "cannot finish for server: {}, server is not prefinish: status: {:?}",
214 |                         server_id,
215 |                         servers.get(&server_id).unwrap().status
216 |                     ));
217 |                 }
218 |                 servers.get_mut(&server_id).unwrap().status = ServerStatus::Finishing;
219 |                 None
220 |             }
221 |             ServerStatus::Finished => {
222 |                 let cluster_status = self.cluster_status.lock().unwrap();
223 |                 match *cluster_status {
224 |                     ClusterStatus::Finishing => {
225 |                         let mut servers: std::sync::MutexGuard<std::collections::HashMap<String, Server, ahash::RandomState>> = self.servers.lock().unwrap();
226 |                         if servers.get(&server_id).unwrap().status != ServerStatus::Finishing {
227 |                             return Some(anyhow::anyhow!("cannot finish for server: {}, server is not Finishing: status: {:?}", server_id, servers.get(&server_id).unwrap().status));
228 |                         }
229 |                         servers.get_mut(&server_id).unwrap().status = ServerStatus::Finished;
230 |                         None
231 |                     }
232 |                     ClusterStatus::Initializing => {
233 |                         let mut servers = self.servers.lock().unwrap();
234 |                         if servers.get(&server_id).unwrap().status != ServerStatus::Initializing {
235 |                             return Some(anyhow::anyhow!(
236 |                                 "cannot finish for server: {}, server is not Initializing: status: {:?}",
237 |                                 server_id,
238 |                                 servers.get(&server_id).unwrap().status
239 |                             ));
240 |                         }
241 |                         servers.get_mut(&server_id).unwrap().status = ServerStatus::Finished;
242 |                         None
243 |                     }
244 |                     ClusterStatus::NodesStarting => {
245 |                         let mut servers = self.servers.lock().unwrap();
246 |                         if !self
247 |                             .new_hashring
248 |                             .read()
249 |                             .unwrap()
250 |                             .as_ref()
251 |                             .unwrap()
252 |                             .contains(&server_id)
253 |                         {
254 |                             return Some(anyhow::anyhow!(
255 |                                 "cannot finish for server: {}, server is not in new_hashring",
256 |                                 server_id
257 |                             ));
258 |                         }
259 |                         if servers.get(&server_id).unwrap().status != ServerStatus::Initializing {
260 |                             return Some(anyhow::anyhow!(
261 |                                 "cannot finish for server: {}, server is not Initializing: status: {:?}",
262 |                                 server_id,
263 |                                 servers.get(&server_id).unwrap().status
264 |                             ));
265 |                         }
266 |                         servers.get_mut(&server_id).unwrap().status = ServerStatus::Finished;
267 |                         None
268 |                     }
269 |                     _ => {
270 |                         Some(anyhow::anyhow!(
271 |                             "cannot finish for server: {}, cluster is not Finishing, Init or AddNodes: status: {:?}",
272 |                             server_id,
273 |                             *cluster_status
274 |                         ))
275 |                     }
276 |                 }
277 |             }
278 |         }
279 |     }
280 | }
281 | 


--------------------------------------------------------------------------------
/src/manager/manager_service.rs:
--------------------------------------------------------------------------------
  1 | // Copyright 2022 labring. All rights reserved.
  2 | //
  3 | // SPDX-License-Identifier: Apache-2.0
  4 | 
  5 | use std::{sync::Arc, time::Duration};
  6 | 
  7 | use crate::{
  8 |     common::serialization::{
  9 |         AddNodesSendMetaData, ClusterStatus, DeleteNodesSendMetaData, GetClusterStatusRecvMetaData,
 10 |         GetHashRingInfoRecvMetaData, ManagerOperationType, ServerStatus,
 11 |     },
 12 |     rpc::server::Handler,
 13 | };
 14 | 
 15 | use super::core::Manager;
 16 | 
 17 | use async_trait::async_trait;
 18 | use log::{debug, error, info};
 19 | use serde::{Deserialize, Serialize};
 20 | 
 21 | pub struct ManagerService {
 22 |     pub manager: Arc<Manager>,
 23 | }
 24 | 
 25 | #[derive(Serialize, Deserialize)]
 26 | pub struct SendHeartRequest {
 27 |     pub address: String,
 28 |     pub flags: u32,
 29 |     pub lifetime: String,
 30 | }
 31 | 
 32 | #[derive(Serialize, Deserialize)]
 33 | pub struct MetadataRequest {
 34 |     pub flags: u32,
 35 | }
 36 | 
 37 | #[derive(Default, Serialize, Deserialize)]
 38 | pub struct MetadataResponse {
 39 |     pub instances: Vec<String>,
 40 | }
 41 | 
 42 | pub async fn update_server_status(manager: Arc<Manager>) {
 43 |     loop {
 44 |         tokio::time::sleep(Duration::from_secs(1)).await;
 45 |         if manager.closed.load(std::sync::atomic::Ordering::Relaxed) {
 46 |             break;
 47 |         }
 48 |         let status = *manager.cluster_status.lock().unwrap();
 49 |         debug!("current cluster status is {:?}", status);
 50 |         match status {
 51 |             ClusterStatus::Idle => {}
 52 |             ClusterStatus::NodesStarting => {
 53 |                 // if all servers is ready, change the cluster status to SyncNewHashRing
 54 |                 let flag = manager
 55 |                     .servers
 56 |                     .lock()
 57 |                     .unwrap()
 58 |                     .iter()
 59 |                     .all(|kv| kv.1.status == ServerStatus::Finished);
 60 |                 if flag {
 61 |                     tokio::time::sleep(Duration::from_secs(1)).await;
 62 |                     *manager.cluster_status.lock().unwrap() = ClusterStatus::SyncNewHashRing;
 63 |                     info!("all servers is ready, change the cluster status to SyncNewHashRing");
 64 |                 };
 65 |             }
 66 |             ClusterStatus::SyncNewHashRing => {
 67 |                 // if all servers is ready, change the cluster status to PreTransfer
 68 |                 let flag = manager
 69 |                     .servers
 70 |                     .lock()
 71 |                     .unwrap()
 72 |                     .iter()
 73 |                     .all(|kv| kv.1.status == ServerStatus::PreTransfer);
 74 |                 if flag {
 75 |                     tokio::time::sleep(Duration::from_secs(1)).await;
 76 |                     *manager.cluster_status.lock().unwrap() = ClusterStatus::PreTransfer;
 77 |                     info!("all servers is ready, change the cluster status to PreTransfer");
 78 |                 }
 79 |             }
 80 |             ClusterStatus::PreTransfer => {
 81 |                 // if all servers is ready, change the cluster status to Transferring
 82 |                 let flag = manager
 83 |                     .servers
 84 |                     .lock()
 85 |                     .unwrap()
 86 |                     .iter()
 87 |                     .all(|kv| kv.1.status == ServerStatus::Transferring);
 88 |                 if flag {
 89 |                     tokio::time::sleep(Duration::from_secs(1)).await;
 90 |                     *manager.cluster_status.lock().unwrap() = ClusterStatus::Transferring;
 91 |                     info!("all servers is ready, change the cluster status to Transferring");
 92 |                 }
 93 |             }
 94 |             ClusterStatus::Transferring => {
 95 |                 // if all servers is ready, change the cluster status to PreFinish
 96 |                 let flag = manager
 97 |                     .servers
 98 |                     .lock()
 99 |                     .unwrap()
100 |                     .iter()
101 |                     .all(|kv| kv.1.status == ServerStatus::PreFinish);
102 |                 if flag {
103 |                     tokio::time::sleep(Duration::from_secs(1)).await;
104 |                     *manager.cluster_status.lock().unwrap() = ClusterStatus::PreFinish;
105 |                     info!("all servers is ready, change the cluster status to PreFinish");
106 |                 }
107 |             }
108 |             ClusterStatus::PreFinish => {
109 |                 // if all servers is ready, change the cluster status to Finishing
110 |                 let flag = manager
111 |                     .servers
112 |                     .lock()
113 |                     .unwrap()
114 |                     .iter()
115 |                     .all(|kv| kv.1.status == ServerStatus::Finishing);
116 |                 if flag {
117 |                     tokio::time::sleep(Duration::from_secs(1)).await;
118 |                     let _ = manager
119 |                         .hashring
120 |                         .write()
121 |                         .unwrap()
122 |                         .replace(manager.new_hashring.read().unwrap().clone().unwrap());
123 |                     *manager.cluster_status.lock().unwrap() = ClusterStatus::Finishing;
124 |                     info!("all servers is ready, change the cluster status to Finishing");
125 |                 }
126 |             }
127 |             ClusterStatus::Finishing => {
128 |                 // if all servers is ready, change the cluster status to Idle
129 |                 let flag = manager
130 |                     .servers
131 |                     .lock()
132 |                     .unwrap()
133 |                     .iter()
134 |                     .all(|kv| kv.1.status == ServerStatus::Finished);
135 |                 if flag {
136 |                     tokio::time::sleep(Duration::from_secs(1)).await;
137 |                     let mut new_hashring = manager.new_hashring.write().unwrap();
138 |                     manager
139 |                         .servers
140 |                         .lock()
141 |                         .unwrap()
142 |                         .retain(|k, _| new_hashring.as_ref().unwrap().contains(k));
143 |                     // move new_hashring to hashring
144 |                     let _ = new_hashring.take().unwrap();
145 |                     *manager.cluster_status.lock().unwrap() = ClusterStatus::Idle;
146 |                     info!("all servers is ready, change the cluster status to Idle");
147 |                 }
148 |             }
149 |             ClusterStatus::Initializing => {
150 |                 // if all servers is ready, change the cluster status to Idle
151 |                 let flag = manager
152 |                     .servers
153 |                     .lock()
154 |                     .unwrap()
155 |                     .iter()
156 |                     .all(|kv| kv.1.status == ServerStatus::Finished);
157 |                 if flag {
158 |                     tokio::time::sleep(Duration::from_secs(1)).await;
159 |                     *manager.cluster_status.lock().unwrap() = ClusterStatus::Idle;
160 |                     info!("all servers is ready, change the cluster status to Idle");
161 |                 }
162 |             }
163 |             s => panic!("update server status failed, invalid cluster status: {}", s),
164 |         }
165 |     }
166 | }
167 | 
168 | impl ManagerService {
169 |     pub fn new(servers: Vec<(String, usize)>) -> Self {
170 |         let manager = Arc::new(Manager::new(servers));
171 |         ManagerService { manager }
172 |     }
173 | }
174 | 
175 | #[async_trait]
176 | impl Handler for ManagerService {
177 |     async fn dispatch(
178 |         &self,
179 |         id: u32,
180 |         operation_type: u32,
181 |         _flags: u32,
182 |         path: Vec<u8>,
183 |         _data: Vec<u8>,
184 |         metadata: Vec<u8>,
185 |     ) -> anyhow::Result<(i32, u32, usize, usize, Vec<u8>, Vec<u8>)> {
186 |         let r#type = ManagerOperationType::try_from(operation_type).unwrap();
187 |         match r#type {
188 |             ManagerOperationType::GetClusterStatus => {
189 |                 let status = self.manager.get_cluster_status();
190 |                 let response_meta_data =
191 |                     bincode::serialize(&GetClusterStatusRecvMetaData { status }).unwrap();
192 | 
193 |                 debug!("connection {} get cluster status: {:?}", id, status);
194 | 
195 |                 Ok((
196 |                     0,
197 |                     0,
198 |                     response_meta_data.len(),
199 |                     0,
200 |                     response_meta_data,
201 |                     Vec::new(),
202 |                 ))
203 |             }
204 |             ManagerOperationType::GetHashRing => {
205 |                 let hash_ring_info = self.manager.get_hash_ring_info();
206 | 
207 |                 info!("connection {} get hash ring: {:?}", id, hash_ring_info);
208 | 
209 |                 let response_meta_data =
210 |                     bincode::serialize(&GetHashRingInfoRecvMetaData { hash_ring_info }).unwrap();
211 |                 Ok((
212 |                     0,
213 |                     0,
214 |                     response_meta_data.len(),
215 |                     0,
216 |                     response_meta_data,
217 |                     Vec::new(),
218 |                 ))
219 |             }
220 |             ManagerOperationType::GetNewHashRing => match self.manager.get_new_hash_ring_info() {
221 |                 Ok(hash_ring_info) => {
222 |                     info!("connection {} get new hash ring: {:?}", id, hash_ring_info);
223 |                     let response_meta_data =
224 |                         bincode::serialize(&GetHashRingInfoRecvMetaData { hash_ring_info })
225 |                             .unwrap();
226 |                     Ok((
227 |                         0,
228 |                         0,
229 |                         response_meta_data.len(),
230 |                         0,
231 |                         response_meta_data,
232 |                         Vec::new(),
233 |                     ))
234 |                 }
235 |                 Err(e) => {
236 |                     error!("get new hash ring error: {}", e);
237 |                     Ok((libc::ENOENT, 0, 0, 0, Vec::new(), Vec::new()))
238 |                 }
239 |             },
240 |             ManagerOperationType::AddNodes => {
241 |                 let new_servers_info = bincode::deserialize::<AddNodesSendMetaData>(&metadata)
242 |                     .unwrap()
243 |                     .new_servers_info;
244 |                 info!("connection {} add nodes: {:?}", id, new_servers_info);
245 |                 match self.manager.add_nodes(new_servers_info) {
246 |                     None => Ok((0, 0, 0, 0, Vec::new(), Vec::new())),
247 |                     Some(e) => {
248 |                         error!("add nodes error: {}", e);
249 |                         Ok((libc::EIO, 0, 0, 0, Vec::new(), Vec::new()))
250 |                     }
251 |                 }
252 |             }
253 |             ManagerOperationType::RemoveNodes => {
254 |                 let deleted_servers_info =
255 |                     bincode::deserialize::<DeleteNodesSendMetaData>(&metadata)
256 |                         .unwrap()
257 |                         .deleted_servers_info;
258 |                 info!("connection {} remove nodes: {:?}", id, deleted_servers_info);
259 |                 match self.manager.delete_nodes(deleted_servers_info) {
260 |                     None => Ok((0, 0, 0, 0, Vec::new(), Vec::new())),
261 |                     Some(e) => {
262 |                         error!("remove nodes error: {}", e);
263 |                         Ok((libc::EIO, 0, 0, 0, Vec::new(), Vec::new()))
264 |                     }
265 |                 }
266 |             }
267 |             ManagerOperationType::UpdateServerStatus => {
268 |                 info!("connection {} update server status", id);
269 |                 match self.manager.set_server_status(
270 |                     String::from_utf8(path).unwrap(),
271 |                     bincode::deserialize(&metadata).unwrap(),
272 |                 ) {
273 |                     None => Ok((0, 0, 0, 0, Vec::new(), Vec::new())),
274 |                     Some(e) => {
275 |                         error!("update server status error: {}", e);
276 |                         Ok((libc::EIO, 0, 0, 0, Vec::new(), Vec::new()))
277 |                     }
278 |                 }
279 |             }
280 |             _ => todo!(),
281 |         }
282 |     }
283 | }
284 | 


--------------------------------------------------------------------------------
/src/manager/mod.rs:
--------------------------------------------------------------------------------
1 | // Copyright 2022 labring. All rights reserved.
2 | //
3 | // SPDX-License-Identifier: Apache-2.0
4 | 
5 | pub mod core;
6 | pub mod manager_service;
7 | 


--------------------------------------------------------------------------------
/src/rpc/client.rs:
--------------------------------------------------------------------------------
  1 | // Copyright 2022 labring. All rights reserved.
  2 | //
  3 | // SPDX-License-Identifier: Apache-2.0
  4 | 
  5 | use super::{
  6 |     callback::CallbackPool,
  7 |     connection::ClientConnection,
  8 |     protocol::{CONNECTION_RETRY_TIMES, SEND_RETRY_TIMES},
  9 | };
 10 | use async_trait::async_trait;
 11 | use dashmap::DashMap;
 12 | use log::{error, info, warn};
 13 | use std::{marker::PhantomData, sync::Arc, time::Duration};
 14 | use tokio::io::{AsyncReadExt, AsyncWriteExt};
 15 | 
 16 | #[async_trait]
 17 | pub trait StreamCreator<
 18 |     R: AsyncReadExt + Unpin + std::marker::Sync + std::marker::Send + 'static,
 19 |     W: AsyncWriteExt + Unpin + std::marker::Sync + std::marker::Send + 'static,
 20 | >
 21 | {
 22 |     async fn create_stream(server_address: &str) -> Result<(R, W), String>;
 23 | }
 24 | 
 25 | pub struct TcpStreamCreator;
 26 | 
 27 | #[async_trait]
 28 | impl StreamCreator<tokio::net::tcp::OwnedReadHalf, tokio::net::tcp::OwnedWriteHalf>
 29 |     for TcpStreamCreator
 30 | {
 31 |     async fn create_stream(
 32 |         server_address: &str,
 33 |     ) -> Result<
 34 |         (
 35 |             tokio::net::tcp::OwnedReadHalf,
 36 |             tokio::net::tcp::OwnedWriteHalf,
 37 |         ),
 38 |         String,
 39 |     > {
 40 |         let stream = match tokio::net::TcpStream::connect(server_address).await {
 41 |             Ok(stream) => stream,
 42 |             Err(e) => {
 43 |                 return Err(format!("connect to {} error: {}", server_address, e));
 44 |             }
 45 |         };
 46 |         Ok(stream.into_split())
 47 |     }
 48 | }
 49 | 
 50 | pub struct UnixStreamCreator;
 51 | 
 52 | #[async_trait]
 53 | impl StreamCreator<tokio::net::unix::OwnedReadHalf, tokio::net::unix::OwnedWriteHalf>
 54 |     for UnixStreamCreator
 55 | {
 56 |     async fn create_stream(
 57 |         server_address: &str,
 58 |     ) -> Result<
 59 |         (
 60 |             tokio::net::unix::OwnedReadHalf,
 61 |             tokio::net::unix::OwnedWriteHalf,
 62 |         ),
 63 |         String,
 64 |     > {
 65 |         let stream = match tokio::net::UnixStream::connect(server_address).await {
 66 |             Ok(stream) => stream,
 67 |             Err(e) => {
 68 |                 return Err(format!("connect to {} error: {}", server_address, e));
 69 |             }
 70 |         };
 71 |         Ok(stream.into_split())
 72 |     }
 73 | }
 74 | 
 75 | pub struct RpcClient<
 76 |     R: AsyncReadExt + Unpin + std::marker::Sync + std::marker::Send + 'static,
 77 |     W: AsyncWriteExt + Unpin + std::marker::Sync + std::marker::Send + 'static,
 78 |     S: StreamCreator<R, W>,
 79 | > {
 80 |     connections: DashMap<String, Arc<ClientConnection<W, R>>>,
 81 |     pool: Arc<CallbackPool>,
 82 |     stream_creator: PhantomData<S>,
 83 | }
 84 | 
 85 | impl<
 86 |         R: AsyncReadExt + Unpin + std::marker::Sync + std::marker::Send + 'static,
 87 |         W: AsyncWriteExt + Unpin + std::marker::Sync + std::marker::Send + 'static,
 88 |         S: StreamCreator<R, W>,
 89 |     > Default for RpcClient<R, W, S>
 90 | {
 91 |     fn default() -> Self {
 92 |         Self::new()
 93 |     }
 94 | }
 95 | 
 96 | impl<
 97 |         R: AsyncReadExt + Unpin + std::marker::Sync + std::marker::Send + 'static,
 98 |         W: AsyncWriteExt + Unpin + std::marker::Sync + std::marker::Send + 'static,
 99 |         S: StreamCreator<R, W>,
100 |     > RpcClient<R, W, S>
101 | {
102 |     pub fn new() -> Self {
103 |         let mut pool = CallbackPool::new();
104 |         pool.init();
105 |         let pool = Arc::new(pool);
106 |         Self {
107 |             connections: DashMap::new(),
108 |             pool,
109 |             stream_creator: PhantomData,
110 |         }
111 |     }
112 | 
113 |     pub fn close(&self) {
114 |         self.pool.free();
115 |     }
116 | 
117 |     pub async fn add_connection(&self, server_address: &str) -> Result<(), String> {
118 |         for _ in 0..CONNECTION_RETRY_TIMES {
119 |             match S::create_stream(server_address).await {
120 |                 Ok((read_stream, write_stream)) => {
121 |                     if self.connections.contains_key(server_address) {
122 |                         warn!("connection already exists: {}", server_address);
123 |                         return Ok(());
124 |                     }
125 |                     let connection = Arc::new(ClientConnection::new(server_address, write_stream));
126 |                     tokio::spawn(parse_response(
127 |                         read_stream,
128 |                         connection.clone(),
129 |                         self.pool.clone(),
130 |                     ));
131 |                     self.connections
132 |                         .insert(server_address.to_string(), connection);
133 |                     info!("add connection to {} success", server_address);
134 |                     return Ok(());
135 |                 }
136 |                 Err(e) => {
137 |                     warn!(
138 |                         "connect to {} failed: {}, wait for a while",
139 |                         server_address, e
140 |                     );
141 |                     tokio::time::sleep(Duration::from_secs(1)).await;
142 |                 }
143 |             }
144 |         }
145 |         Err(format!(
146 |             "connect to {} error: connection retry times exceed",
147 |             server_address
148 |         ))
149 |     }
150 | 
151 |     async fn reconnect(&self, server_address: &str) -> Result<(), String> {
152 |         info!("reconnect to {}", server_address);
153 |         match self.connections.get(server_address) {
154 |             Some(connection) => {
155 |                 if connection.is_connected() {
156 |                     info!("connection already exists: {}", server_address);
157 |                     return Ok(());
158 |                 }
159 |                 match S::create_stream(server_address).await {
160 |                     Ok((read_stream, write_stream)) => {
161 |                         tokio::spawn(parse_response(
162 |                             read_stream,
163 |                             connection.clone(),
164 |                             self.pool.clone(),
165 |                         ));
166 |                         connection.value().reset_connection(write_stream).await;
167 |                         info!("reconnect to {} success", server_address);
168 |                         Ok(())
169 |                     }
170 |                     Err(e) => {
171 |                         warn!(
172 |                             "reconnect to {} failed: {}, wait for a while",
173 |                             server_address, e
174 |                         );
175 |                         tokio::time::sleep(Duration::from_secs(1)).await;
176 |                         Ok(())
177 |                     }
178 |                 }
179 |             }
180 |             None => Err(format!("connection not exists: {}", server_address)),
181 |         }
182 |     }
183 | 
184 |     pub fn remove_connection(&self, server_address: &str) {
185 |         self.connections.remove(server_address);
186 |     }
187 | 
188 |     #[allow(clippy::too_many_arguments)]
189 |     pub async fn call_remote(
190 |         &self,
191 |         server_address: &str,
192 |         operation_type: u32,
193 |         req_flags: u32,
194 |         path: &str,
195 |         send_meta_data: &[u8],
196 |         send_data: &[u8],
197 |         status: &mut i32,
198 |         rsp_flags: &mut u32,
199 |         recv_meta_data_length: &mut usize,
200 |         recv_data_length: &mut usize,
201 |         recv_meta_data: &mut [u8],
202 |         recv_data: &mut [u8],
203 |         timeout: Duration,
204 |     ) -> Result<(), String> {
205 |         for _ in 0..SEND_RETRY_TIMES {
206 |             let connection = match self.connections.get(server_address) {
207 |                 Some(connection) => connection,
208 |                 None => {
209 |                     error!("connection not exists: {}", server_address);
210 |                     return Err(format!("connection not exists: {}", server_address));
211 |                 }
212 |             };
213 |             let (batch, id) = self
214 |                 .pool
215 |                 .register_callback(recv_meta_data, recv_data)
216 |                 .await?; // TODO: unregister callback when error
217 | 
218 |             if let Err(e) = connection
219 |                 .send_request(
220 |                     batch,
221 |                     id,
222 |                     operation_type,
223 |                     req_flags,
224 |                     path,
225 |                     send_meta_data,
226 |                     send_data,
227 |                 )
228 |                 .await
229 |             {
230 |                 error!("send request to {} failed: {}", server_address, e);
231 |                 connection.disconnect();
232 |                 let _lock = connection.get_reconnecting_lock().await;
233 |                 warn!("connection to {} disconnected", server_address);
234 |                 match self.reconnect(server_address).await {
235 |                     Ok(_) => {
236 |                         continue;
237 |                     }
238 |                     Err(e) => {
239 |                         error!("reconnect to {} failed: {}", server_address, e);
240 |                         return Err(format!("reconnect to {} failed: {}", server_address, e));
241 |                     }
242 |                 }
243 |             }
244 |             match self.pool.wait_for_callback(id, timeout).await {
245 |                 Ok((s, f, meta_data_length, data_length)) => {
246 |                     *status = s;
247 |                     *rsp_flags = f;
248 |                     *recv_meta_data_length = meta_data_length;
249 |                     *recv_data_length = data_length;
250 |                     return Ok(());
251 |                 }
252 |                 Err(e) => {
253 |                     error!("wait for callback failed: {}, batch: {}, id {}, operation type: {}, path: {}", e, batch, id, operation_type, path);
254 |                     continue;
255 |                 }
256 |             }
257 |         }
258 |         Err(format!(
259 |             "send request to {} error: send retry times exceed",
260 |             server_address
261 |         ))
262 |     }
263 | }
264 | 
265 | // parse_response
266 | // try to get response from sequence of connections and write to callbacks
267 | pub async fn parse_response<W: AsyncWriteExt + Unpin, R: AsyncReadExt + Unpin>(
268 |     mut read_stream: R,
269 |     connection: Arc<ClientConnection<W, R>>,
270 |     pool: Arc<CallbackPool>,
271 | ) {
272 |     loop {
273 |         if !connection.is_connected() {
274 |             break;
275 |         }
276 |         let header = match connection.receive_response_header(&mut read_stream).await {
277 |             Ok(header) => header,
278 |             Err(e) => {
279 |                 if e == "early eof" || e == "Connection reset by peer (os error 104)" {
280 |                     warn!("{:?} disconnected: {}", connection.server_address, e);
281 |                     break;
282 |                 }
283 |                 panic!(
284 |                     "parse_response header from {:?} error: {}",
285 |                     connection.server_address, e
286 |                 );
287 |             }
288 |         };
289 |         let batch = header.batch;
290 |         let id = header.id;
291 |         let total_length = header.total_length;
292 | 
293 |         let result = {
294 |             match pool.lock_if_not_timeout(batch, id) {
295 |                 Ok(_) => Ok(()),
296 |                 Err(e) => Err(e),
297 |             }
298 |         };
299 |         match result {
300 |             Ok(_) => {}
301 |             Err(e) => {
302 |                 error!(
303 |                     "parse_response lock timeout: {}, batch: {}, id: {}",
304 |                     e, batch, id
305 |                 );
306 |                 let result = connection
307 |                     .clean_response(&mut read_stream, total_length)
308 |                     .await;
309 |                 match result {
310 |                     Ok(_) => {}
311 |                     Err(e) => {
312 |                         error!("parse_response clean_response error: {}", e);
313 |                         break;
314 |                     }
315 |                 }
316 |                 continue;
317 |             }
318 |         }
319 | 
320 |         if let Err(e) = connection
321 |             .receive_response(
322 |                 &mut read_stream,
323 |                 pool.get_meta_data_ref(id, header.meta_data_length as usize),
324 |                 pool.get_data_ref(id, header.data_length as usize),
325 |             )
326 |             .await
327 |         {
328 |             error!("Error receiving response: {}", e);
329 |             break;
330 |         };
331 |         if let Err(e) = pool
332 |             .response(
333 |                 id,
334 |                 header.status,
335 |                 header.flags,
336 |                 header.meta_data_length as usize,
337 |                 header.data_length as usize,
338 |             )
339 |             .await
340 |         {
341 |             error!("Error writing response back: {}", e);
342 |             break;
343 |         };
344 |     }
345 | }
346 | 


--------------------------------------------------------------------------------
/src/rpc/connection.rs:
--------------------------------------------------------------------------------
  1 | // Copyright 2022 labring. All rights reserved.
  2 | //
  3 | // SPDX-License-Identifier: Apache-2.0
  4 | 
  5 | use std::{io::IoSlice, marker::PhantomData, sync::atomic::AtomicU32};
  6 | 
  7 | use super::protocol::{
  8 |     RequestHeader, ResponseHeader, MAX_DATA_LENGTH, MAX_FILENAME_LENGTH, MAX_METADATA_LENGTH,
  9 |     REQUEST_HEADER_SIZE, RESPONSE_HEADER_SIZE,
 10 | };
 11 | use log::{error, info};
 12 | use tokio::{
 13 |     io::{AsyncReadExt, AsyncWriteExt},
 14 |     sync::Mutex,
 15 | };
 16 | 
 17 | const CONNECTED: u32 = 0;
 18 | const DISCONNECTED: u32 = 1;
 19 | 
 20 | pub struct ClientConnection<W: AsyncWriteExt + Unpin, R: AsyncReadExt + Unpin> {
 21 |     pub server_address: String,
 22 |     write_stream: Mutex<Option<W>>,
 23 |     status: AtomicU32,
 24 |     reconneting_lock: Mutex<()>,
 25 | 
 26 |     phantom_data: PhantomData<R>,
 27 | 
 28 |     // lock for send_request
 29 |     // we need this lock because we will send multiple requests in parallel
 30 |     // and each request will be sent several data packets due to the partation of data and header.
 31 |     // now we simply copy the data and header to a buffer and send it in one write call,
 32 |     // so we do not need to lock the stream(linux kernel will do it for us).
 33 |     _send_lock: Mutex<()>,
 34 | }
 35 | 
 36 | impl<W: AsyncWriteExt + Unpin, R: AsyncReadExt + Unpin> ClientConnection<W, R> {
 37 |     pub fn new(server_address: &str, write_stream: W) -> Self {
 38 |         Self {
 39 |             server_address: server_address.to_string(),
 40 |             write_stream: Mutex::new(Some(write_stream)),
 41 |             status: AtomicU32::new(CONNECTED),
 42 |             reconneting_lock: Mutex::new(()),
 43 |             phantom_data: PhantomData,
 44 |             _send_lock: Mutex::new(()),
 45 |         }
 46 |     }
 47 | 
 48 |     pub fn disconnect(&self) -> bool {
 49 |         info!("disconnecting from server {}", self.server_address);
 50 |         self.status
 51 |             .compare_exchange(
 52 |                 CONNECTED,
 53 |                 DISCONNECTED,
 54 |                 std::sync::atomic::Ordering::SeqCst,
 55 |                 std::sync::atomic::Ordering::SeqCst,
 56 |             )
 57 |             .is_ok()
 58 |     }
 59 | 
 60 |     pub fn is_connected(&self) -> bool {
 61 |         self.status.load(std::sync::atomic::Ordering::Acquire) == CONNECTED
 62 |     }
 63 | 
 64 |     pub async fn get_reconnecting_lock(&self) -> tokio::sync::MutexGuard<'_, ()> {
 65 |         self.reconneting_lock.lock().await
 66 |     }
 67 | 
 68 |     pub async fn reset_connection(&self, write_stream: W) {
 69 |         self.write_stream.lock().await.replace(write_stream);
 70 |         self.status
 71 |             .store(CONNECTED, std::sync::atomic::Ordering::SeqCst);
 72 |     }
 73 | 
 74 |     // request
 75 |     // | batch | id | type | flags | total_length | file_path_length | meta_data_length | data_length | filename | meta_data | data |
 76 |     // | 4Byte | 4Byte | 4Byte | 4Byte | 4Byte | 4Byte | 4Byte | 4Byte | 1~4kB | 0~ | 0~ |
 77 |     #[allow(clippy::too_many_arguments)]
 78 |     pub async fn send_request(
 79 |         &self,
 80 |         batch: u32,
 81 |         id: u32,
 82 |         operation_type: u32,
 83 |         flags: u32,
 84 |         filename: &str,
 85 |         meta_data: &[u8],
 86 |         data: &[u8],
 87 |     ) -> Result<(), String> {
 88 |         if !self.is_connected() {
 89 |             return Err("connection is not connected".to_string());
 90 |         }
 91 |         let filename_length = filename.len();
 92 |         let meta_data_length = meta_data.len();
 93 |         let data_length = data.len();
 94 |         let total_length = filename_length + meta_data_length + data_length;
 95 |         let mut request = Vec::with_capacity(total_length + REQUEST_HEADER_SIZE);
 96 |         request.extend_from_slice(&batch.to_le_bytes());
 97 |         request.extend_from_slice(&id.to_le_bytes());
 98 |         request.extend_from_slice(&operation_type.to_le_bytes());
 99 |         request.extend_from_slice(&flags.to_le_bytes());
100 |         request.extend_from_slice(&(total_length as u32).to_le_bytes());
101 |         request.extend_from_slice(&(filename_length as u32).to_le_bytes());
102 |         request.extend_from_slice(&(meta_data_length as u32).to_le_bytes());
103 |         request.extend_from_slice(&(data_length as u32).to_le_bytes());
104 |         request.extend_from_slice(filename.as_bytes());
105 |         let mut stream = self.write_stream.lock().await;
106 |         let mut offset = 0;
107 |         loop {
108 |             if offset >= request.len() + meta_data_length + data_length {
109 |                 break;
110 |             }
111 |             if offset < request.len() {
112 |                 let bufs: &[_] = &[
113 |                     IoSlice::new(&request[offset..]),
114 |                     IoSlice::new(meta_data),
115 |                     IoSlice::new(data),
116 |                 ];
117 |                 offset += stream
118 |                     .as_mut()
119 |                     .unwrap()
120 |                     .write_vectored(bufs)
121 |                     .await
122 |                     .map_err(|e| e.to_string())?;
123 |             } else if offset < request.len() + meta_data_length {
124 |                 let bufs: &[_] = &[
125 |                     IoSlice::new(&meta_data[offset - request.len()..]),
126 |                     IoSlice::new(data),
127 |                 ];
128 |                 offset += stream
129 |                     .as_mut()
130 |                     .unwrap()
131 |                     .write_vectored(bufs)
132 |                     .await
133 |                     .map_err(|e| e.to_string())?;
134 |             } else {
135 |                 let bufs: &[_] = &[IoSlice::new(
136 |                     &data[offset - request.len() - meta_data_length..],
137 |                 )];
138 |                 offset += stream
139 |                     .as_mut()
140 |                     .unwrap()
141 |                     .write_vectored(bufs)
142 |                     .await
143 |                     .map_err(|e| e.to_string())?;
144 |             }
145 |         }
146 |         Ok(())
147 |     }
148 | 
149 |     pub async fn receive_response_header(
150 |         &self,
151 |         read_stream: &mut R,
152 |     ) -> Result<ResponseHeader, String> {
153 |         let mut header = [0; RESPONSE_HEADER_SIZE];
154 |         self.receive(read_stream, &mut header).await?;
155 |         let batch = u32::from_le_bytes(header[0..4].try_into().unwrap());
156 |         let id = u32::from_le_bytes(header[4..8].try_into().unwrap());
157 |         let status = i32::from_le_bytes(header[8..12].try_into().unwrap());
158 |         let flags = u32::from_le_bytes(header[12..16].try_into().unwrap());
159 |         let total_length = u32::from_le_bytes(header[16..20].try_into().unwrap());
160 |         let meta_data_length = u32::from_le_bytes(header[20..24].try_into().unwrap());
161 |         let data_length = u32::from_le_bytes(header[24..28].try_into().unwrap());
162 |         Ok(ResponseHeader {
163 |             batch,
164 |             id,
165 |             status,
166 |             flags,
167 |             total_length,
168 |             meta_data_length,
169 |             data_length,
170 |         })
171 |     }
172 | 
173 |     pub async fn receive_response(
174 |         &self,
175 |         read_stream: &mut R,
176 |         meta_data: &mut [u8],
177 |         data: &mut [u8],
178 |     ) -> Result<(), String> {
179 |         let meta_data_length = meta_data.len();
180 |         let data_length = data.len();
181 |         self.receive(read_stream, &mut meta_data[0..meta_data_length])
182 |             .await?;
183 |         self.receive(read_stream, &mut data[0..data_length]).await?;
184 |         Ok(())
185 |     }
186 | 
187 |     pub async fn receive(&self, read_stream: &mut R, data: &mut [u8]) -> Result<(), String> {
188 |         match read_stream.read_exact(data).await {
189 |             Ok(_) => Ok(()),
190 |             Err(e) => Err(e.to_string()),
191 |         }
192 |     }
193 | 
194 |     pub async fn clean_response(
195 |         &self,
196 |         read_stream: &mut R,
197 |         total_length: u32,
198 |     ) -> Result<(), String> {
199 |         let mut buffer = vec![0u8; total_length as usize];
200 |         self.receive(read_stream, &mut buffer).await?;
201 |         Ok(())
202 |     }
203 | }
204 | 
205 | pub struct ServerConnection<W: AsyncWriteExt + Unpin, R: AsyncReadExt + Unpin> {
206 |     pub id: u32,
207 |     name_id: String,
208 |     write_stream: Mutex<W>,
209 | 
210 |     phantom_data: PhantomData<R>,
211 | }
212 | 
213 | impl<W: AsyncWriteExt + Unpin, R: AsyncReadExt + Unpin> ServerConnection<W, R> {
214 |     pub fn new(write_stream: W, name_id: String, id: u32) -> Self {
215 |         ServerConnection {
216 |             id,
217 |             name_id,
218 |             write_stream: Mutex::new(write_stream),
219 | 
220 |             phantom_data: PhantomData,
221 |         }
222 |     }
223 | 
224 |     pub fn name_id(&self) -> String {
225 |         self.name_id.clone()
226 |     }
227 | 
228 |     pub async fn close(&self) -> Result<(), String> {
229 |         let mut stream = self.write_stream.lock().await;
230 |         stream.shutdown().await.map_err(|e| e.to_string())?;
231 |         info!("close connection {}", self.name_id);
232 |         Ok(())
233 |     }
234 | 
235 |     // response
236 |     // | batch | id | status | flags | total_length | meta_data_lenght | data_length | meta_data | data |
237 |     // | 4Byte | 4Byte | 4Byte | 4Byte | 4Byte | 4Byte | 4Byte | 0~ | 0~ |
238 |     pub async fn send_response(
239 |         &self,
240 |         batch: u32,
241 |         id: u32,
242 |         status: i32,
243 |         flags: u32,
244 |         meta_data: &[u8],
245 |         data: &[u8],
246 |     ) -> Result<(), String> {
247 |         let data_length = data.len();
248 |         let meta_data_length = meta_data.len();
249 |         let total_length = data_length + meta_data_length;
250 |         let mut response = Vec::with_capacity(RESPONSE_HEADER_SIZE + total_length);
251 |         response.extend_from_slice(&batch.to_le_bytes());
252 |         response.extend_from_slice(&id.to_le_bytes());
253 |         response.extend_from_slice(&status.to_le_bytes());
254 |         response.extend_from_slice(&flags.to_le_bytes());
255 |         response.extend_from_slice(&(total_length as u32).to_le_bytes());
256 |         response.extend_from_slice(&(meta_data_length as u32).to_le_bytes());
257 |         response.extend_from_slice(&(data_length as u32).to_le_bytes());
258 |         let mut stream = self.write_stream.lock().await;
259 |         let mut offset = 0;
260 |         loop {
261 |             if offset >= response.len() + meta_data_length + data_length {
262 |                 break;
263 |             }
264 |             if offset < response.len() {
265 |                 let bufs: &[_] = &[
266 |                     IoSlice::new(&response[offset..]),
267 |                     IoSlice::new(meta_data),
268 |                     IoSlice::new(data),
269 |                 ];
270 |                 offset += stream
271 |                     .write_vectored(bufs)
272 |                     .await
273 |                     .map_err(|e| e.to_string())?;
274 |             } else if offset < response.len() + meta_data_length {
275 |                 let bufs: &[_] = &[
276 |                     IoSlice::new(&meta_data[offset - response.len()..]),
277 |                     IoSlice::new(data),
278 |                 ];
279 |                 offset += stream
280 |                     .write_vectored(bufs)
281 |                     .await
282 |                     .map_err(|e| e.to_string())?;
283 |             } else {
284 |                 let bufs: &[_] = &[IoSlice::new(
285 |                     &data[offset - response.len() - meta_data_length..],
286 |                 )];
287 |                 offset += stream
288 |                     .write_vectored(bufs)
289 |                     .await
290 |                     .map_err(|e| e.to_string())?;
291 |             }
292 |         }
293 |         Ok(())
294 |     }
295 | 
296 |     pub async fn receive_request_header(
297 |         &self,
298 |         read_stream: &mut R,
299 |     ) -> Result<RequestHeader, String> {
300 |         let mut header = [0; REQUEST_HEADER_SIZE];
301 |         self.receive(read_stream, &mut header).await?;
302 |         let batch = u32::from_le_bytes(header[0..4].try_into().unwrap());
303 |         let id = u32::from_le_bytes(header[4..8].try_into().unwrap());
304 |         let operation_type = u32::from_le_bytes(header[8..12].try_into().unwrap());
305 |         let flags: u32 = u32::from_le_bytes(header[12..16].try_into().unwrap());
306 |         let total_length = u32::from_le_bytes(header[16..20].try_into().unwrap());
307 |         let file_path_length = u32::from_le_bytes(header[20..24].try_into().unwrap());
308 |         let meta_data_length = u32::from_le_bytes(header[24..28].try_into().unwrap());
309 |         let data_length = u32::from_le_bytes(header[28..32].try_into().unwrap());
310 |         Ok(RequestHeader {
311 |             batch,
312 |             id,
313 |             r#type: operation_type,
314 |             flags,
315 |             total_length,
316 |             file_path_length,
317 |             meta_data_length,
318 |             data_length,
319 |         })
320 |     }
321 | 
322 |     pub async fn receive_request(
323 |         &self,
324 |         read_stream: &mut R,
325 |         header: &RequestHeader,
326 |     ) -> Result<(Vec<u8>, Vec<u8>, Vec<u8>), String> {
327 |         if header.file_path_length as usize > MAX_FILENAME_LENGTH {
328 |             error!("path length is too long: {}", header.file_path_length);
329 |             return Err("path length is too long".into());
330 |         }
331 |         if header.data_length as usize > MAX_DATA_LENGTH {
332 |             error!("data length is too long: {}", header.data_length);
333 |             return Err("data length is too long".into());
334 |         }
335 |         if header.meta_data_length as usize > MAX_METADATA_LENGTH {
336 |             error!("meta data length is too long: {}", header.meta_data_length);
337 |             return Err("meta data length is too long".into());
338 |         }
339 |         let mut path = vec![0u8; header.file_path_length as usize];
340 |         let mut data = vec![0u8; header.data_length as usize];
341 |         let mut meta_data = vec![0u8; header.meta_data_length as usize];
342 | 
343 |         self.receive(read_stream, &mut path[0..header.file_path_length as usize])
344 |             .await?;
345 |         self.receive(
346 |             read_stream,
347 |             &mut meta_data[0..header.meta_data_length as usize],
348 |         )
349 |         .await?;
350 |         self.receive(read_stream, &mut data[0..header.data_length as usize])
351 |             .await?;
352 | 
353 |         Ok((path, data, meta_data))
354 |     }
355 | 
356 |     pub async fn receive(&self, read_stream: &mut R, data: &mut [u8]) -> Result<(), String> {
357 |         match read_stream.read_exact(data).await {
358 |             Ok(_) => Ok(()),
359 |             Err(e) => Err(e.to_string()),
360 |         }
361 |     }
362 | }
363 | 


--------------------------------------------------------------------------------
/src/rpc/mod.rs:
--------------------------------------------------------------------------------
 1 | // Copyright 2022 labring. All rights reserved.
 2 | //
 3 | // SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | pub mod callback;
 6 | pub mod client;
 7 | pub mod connection;
 8 | pub mod protocol;
 9 | pub mod rdma;
10 | pub mod server;
11 | 


--------------------------------------------------------------------------------
/src/rpc/protocol.rs:
--------------------------------------------------------------------------------
  1 | // Copyright 2022 labring. All rights reserved.
  2 | //
  3 | // SPDX-License-Identifier: Apache-2.0
  4 | 
  5 | pub const MAX_FILENAME_LENGTH: usize = 4096;
  6 | pub const MAX_DATA_LENGTH: usize = 65536 * 65536;
  7 | pub const MAX_METADATA_LENGTH: usize = 65536;
  8 | pub const MAX_COPY_LENGTH: usize = 1024 * 8;
  9 | 
 10 | pub const CONNECTION_RETRY_TIMES: i32 = 100;
 11 | pub const SEND_RETRY_TIMES: i32 = 5;
 12 | 
 13 | // request
 14 | // | batch | id | type | flags | total_length | file_path_length | meta_data_length | data_length | filename | meta_data | data |
 15 | // | 4Byte | 4Byte | 4Byte | 4Byte | 4Byte | 4Byte | 4Byte | 4Byte | 1~4kB | 0~ | 0~ |
 16 | pub const REQUEST_HEADER_SIZE: usize = 4 * 8;
 17 | pub const REQUEST_FILENAME_LENGTH_SIZE: usize = 4;
 18 | pub const REQUEST_METADATA_LENGTH_SIZE: usize = 4;
 19 | pub const REQUEST_DATA_LENGTH_SIZE: usize = 4;
 20 | 
 21 | pub const REQUEST_POOL_SIZE: usize = 65536;
 22 | 
 23 | /* receive operation response and wake up the operation thread using condition variable
 24 |     response
 25 |     | batch | id | status | flags | total_length | meta_data_lenght | data_length | meta_data | data |
 26 |     | 4Byte | 4Byte | 4Byte | 4Byte | 4Byte | 4Byte | 4Byte | 0~ | 0~ |
 27 | */
 28 | pub const RESPONSE_HEADER_SIZE: usize = 4 * 7;
 29 | 
 30 | // pub const CLIENT_RESPONSE_TIMEOUT: time::Duration = time::Duration::from_micros(300); // timeout for client response loop
 31 | 
 32 | #[derive(Debug)]
 33 | pub struct RequestHeader {
 34 |     pub batch: u32,
 35 |     pub id: u32,
 36 |     pub r#type: u32,
 37 |     pub flags: u32,
 38 |     pub total_length: u32, // we use u32 because of the protocol consistency
 39 |     pub file_path_length: u32,
 40 |     pub meta_data_length: u32,
 41 |     pub data_length: u32,
 42 | }
 43 | 
 44 | impl RequestHeader {
 45 |     #[allow(clippy::too_many_arguments)]
 46 |     pub fn new(
 47 |         batch: u32,
 48 |         id: u32,
 49 |         r#type: u32,
 50 |         flags: u32,
 51 |         total_length: u32,
 52 |         file_path_length: u32,
 53 |         meta_data_length: u32,
 54 |         data_length: u32,
 55 |     ) -> Self {
 56 |         Self {
 57 |             batch,
 58 |             id,
 59 |             r#type,
 60 |             flags,
 61 |             total_length,
 62 |             file_path_length,
 63 |             meta_data_length,
 64 |             data_length,
 65 |         }
 66 |     }
 67 | }
 68 | 
 69 | pub struct ResponseHeader {
 70 |     pub batch: u32,
 71 |     pub id: u32,
 72 |     pub status: i32,
 73 |     pub flags: u32,
 74 |     pub total_length: u32,
 75 |     pub meta_data_length: u32,
 76 |     pub data_length: u32,
 77 | }
 78 | 
 79 | impl ResponseHeader {
 80 |     pub fn new(
 81 |         batch: u32,
 82 |         id: u32,
 83 |         status: i32,
 84 |         flags: u32,
 85 |         total_length: u32,
 86 |         meta_data_length: u32,
 87 |         data_length: u32,
 88 |     ) -> Self {
 89 |         Self {
 90 |             batch,
 91 |             id,
 92 |             status,
 93 |             flags,
 94 |             total_length,
 95 |             meta_data_length,
 96 |             data_length,
 97 |         }
 98 |     }
 99 | }
100 | 


--------------------------------------------------------------------------------
/src/rpc/rdma/client.rs:
--------------------------------------------------------------------------------
  1 | use core::result::Result;
  2 | use dashmap::DashMap;
  3 | use ibv::connection::conn::{connect, Conn};
  4 | use log::{debug, error};
  5 | use std::{io::IoSlice, sync::Arc, time::Duration};
  6 | 
  7 | use crate::rpc::{
  8 |     callback::CallbackPool,
  9 |     protocol::{ResponseHeader, RESPONSE_HEADER_SIZE},
 10 | };
 11 | pub struct Client {
 12 |     connections: DashMap<String, Arc<Conn>>,
 13 |     pool: Arc<CallbackPool>,
 14 | }
 15 | 
 16 | impl Client {
 17 |     pub fn new() -> Self {
 18 |         let mut pool = CallbackPool::new();
 19 |         pool.init();
 20 |         let pool = Arc::new(pool);
 21 |         Client {
 22 |             connections: DashMap::new(),
 23 |             pool,
 24 |         }
 25 |     }
 26 | 
 27 |     pub fn close(&self) {
 28 |         self.pool.free();
 29 |     }
 30 | 
 31 |     pub async fn add_connection(&self, addr: &str) {
 32 |         let conn = Arc::new(connect(addr).await.unwrap());
 33 |         debug!("connect to {} success", addr);
 34 |         let conn1 = conn.clone();
 35 |         self.connections.insert(addr.to_string(), conn1);
 36 |         tokio::spawn(parse_response(conn, self.pool.clone()));
 37 |     }
 38 | 
 39 |     pub fn get_connection(&self, addr: &str) -> Option<Arc<Conn>> {
 40 |         self.connections.get(addr).map(|conn| conn.value().clone())
 41 |     }
 42 | 
 43 |     #[allow(clippy::too_many_arguments)]
 44 |     pub async fn call_remote(
 45 |         &self,
 46 |         server_address: &str,
 47 |         operation_type: u32,
 48 |         req_flags: u32,
 49 |         path: &str,
 50 |         send_meta_data: &[u8],
 51 |         send_data: &[u8],
 52 |         status: &mut i32,
 53 |         rsp_flags: &mut u32,
 54 |         recv_meta_data_length: &mut usize,
 55 |         recv_data_length: &mut usize,
 56 |         recv_meta_data: &mut [u8],
 57 |         recv_data: &mut [u8],
 58 |         timeout: Duration,
 59 |     ) -> Result<(), Box<dyn std::error::Error>> {
 60 |         let (batch, id) = self
 61 |             .pool
 62 |             .register_callback(recv_meta_data, recv_data)
 63 |             .await?;
 64 |         debug!(
 65 |             "call_remote on {:?}, batch {}, id: {}",
 66 |             server_address, batch, id
 67 |         );
 68 |         // send request to remote
 69 |         self.send_request(
 70 |             server_address,
 71 |             batch,
 72 |             id,
 73 |             operation_type,
 74 |             req_flags,
 75 |             path,
 76 |             send_meta_data,
 77 |             send_data,
 78 |         )
 79 |         .await?;
 80 | 
 81 |         let (s, f, meta_data_length, data_length) =
 82 |             self.pool.wait_for_callback(id, timeout).await?;
 83 |         debug!(
 84 |             "call_remote success, id: {}, status: {}, flags: {}, meta_data_length: {}, data_length: {}",
 85 |             id, s, f, meta_data_length, data_length
 86 |         );
 87 |         *status = s;
 88 |         *rsp_flags = f;
 89 |         *recv_meta_data_length = meta_data_length;
 90 |         *recv_data_length = data_length;
 91 |         Ok(())
 92 |     }
 93 | 
 94 |     #[allow(clippy::too_many_arguments)]
 95 |     pub async fn send_request(
 96 |         &self,
 97 |         addr: &str,
 98 |         batch: u32,
 99 |         id: u32,
100 |         operation_type: u32,
101 |         req_flags: u32,
102 |         path: &str,
103 |         send_meta_data: &[u8],
104 |         send_data: &[u8],
105 |     ) -> Result<(), Box<dyn std::error::Error>> {
106 |         let conn = self.get_connection(addr).unwrap();
107 |         let mut request = Vec::new();
108 |         let total_length = path.len() + send_meta_data.len() + send_data.len();
109 |         request.extend_from_slice(&batch.to_le_bytes());
110 |         request.extend_from_slice(&id.to_le_bytes());
111 |         request.extend_from_slice(&operation_type.to_le_bytes());
112 |         request.extend_from_slice(&req_flags.to_le_bytes());
113 |         request.extend_from_slice(&(total_length as u32).to_le_bytes());
114 |         request.extend_from_slice(&(path.len() as u32).to_le_bytes());
115 |         request.extend_from_slice(&(send_meta_data.len() as u32).to_le_bytes());
116 |         request.extend_from_slice(&(send_data.len() as u32).to_le_bytes());
117 |         request.extend_from_slice(path.as_bytes());
118 |         let request = &[
119 |             IoSlice::new(&request),
120 |             IoSlice::new(send_meta_data),
121 |             IoSlice::new(send_data),
122 |         ];
123 |         debug!("send_request: {:?}", request);
124 |         conn.send_msg(request).await?;
125 |         Ok(())
126 |     }
127 | }
128 | 
129 | impl Default for Client {
130 |     fn default() -> Self {
131 |         Self::new()
132 |     }
133 | }
134 | 
135 | pub async fn parse_response(conn: Arc<Conn>, pool: Arc<CallbackPool>) {
136 |     loop {
137 |         let response = conn.recv_msg().await.unwrap();
138 |         debug!("parse_response: recv response: {:?}", response);
139 |         // parse response
140 |         let header = parse_response_header(response);
141 | 
142 |         let batch = header.batch;
143 |         let id = header.id;
144 | 
145 |         if pool.lock_if_not_timeout(batch, id).is_err() {
146 |             debug!("parse_response: lock timeout");
147 |             continue;
148 |         }
149 |         debug!("parse_response: lock success");
150 | 
151 |         parse_response_body(
152 |             response,
153 |             pool.get_meta_data_ref(id, header.meta_data_length as usize),
154 |             pool.get_data_ref(id, header.data_length as usize),
155 |         );
156 |         conn.release(response).await;
157 |         if let Err(e) = pool
158 |             .response(
159 |                 id,
160 |                 header.status,
161 |                 header.flags,
162 |                 header.meta_data_length as usize,
163 |                 header.data_length as usize,
164 |             )
165 |             .await
166 |         {
167 |             error!("Error writing response back: {}", e);
168 |             break;
169 |         };
170 |         // todo: realease the buf in response
171 |     }
172 | }
173 | 
174 | pub fn parse_response_header(response: &[u8]) -> ResponseHeader {
175 |     let header = &response[0..RESPONSE_HEADER_SIZE];
176 |     let batch = u32::from_le_bytes(header[0..4].try_into().unwrap());
177 |     let id = u32::from_le_bytes(header[4..8].try_into().unwrap());
178 |     let status = i32::from_le_bytes(header[8..12].try_into().unwrap());
179 |     let flags = u32::from_le_bytes(header[12..16].try_into().unwrap());
180 |     let total_length = u32::from_le_bytes(header[16..20].try_into().unwrap());
181 |     let meta_data_length = u32::from_le_bytes(header[20..24].try_into().unwrap());
182 |     let data_length = u32::from_le_bytes(header[24..28].try_into().unwrap());
183 |     // debug!(
184 |     //     "received response_header batch: {}, id: {}, status: {}, flags: {}, total_length: {}, meta_data_length: {}, data_length: {}",
185 |     //     batch, id, status, flags, total_length, meta_data_length, data_length
186 |     // );
187 |     ResponseHeader {
188 |         batch,
189 |         id,
190 |         status,
191 |         flags,
192 |         total_length,
193 |         meta_data_length,
194 |         data_length,
195 |     }
196 | }
197 | 
198 | pub fn parse_response_body(response: &[u8], meta_data: &mut [u8], data: &mut [u8]) {
199 |     let meta_data_length = meta_data.len();
200 |     let data_length = data.len();
201 |     debug!(
202 |         "waiting for response_meta_data, length: {}",
203 |         meta_data_length
204 |     );
205 |     // copy response to meta_data
206 |     meta_data
207 |         .copy_from_slice(&response[RESPONSE_HEADER_SIZE..RESPONSE_HEADER_SIZE + meta_data_length]);
208 |     debug!("received reponse_meta_data, meta_data: {:?}", meta_data);
209 |     // copy response to data
210 |     data.copy_from_slice(
211 |         &response[RESPONSE_HEADER_SIZE + meta_data_length
212 |             ..RESPONSE_HEADER_SIZE + meta_data_length + data_length],
213 |     );
214 |     debug!("received reponse_data, data: {:?}", data);
215 | }
216 | 


--------------------------------------------------------------------------------
/src/rpc/rdma/mod.rs:
--------------------------------------------------------------------------------
1 | pub mod client;
2 | pub mod server;
3 | 


--------------------------------------------------------------------------------
/src/rpc/rdma/server.rs:
--------------------------------------------------------------------------------
  1 | use std::{io::IoSlice, sync::Arc};
  2 | 
  3 | use ibv::connection::conn::{Conn, MyReceiver};
  4 | use log::debug;
  5 | use tokio::sync::mpsc::channel;
  6 | 
  7 | use ibv::connection::conn::run;
  8 | 
  9 | use crate::rpc::{
 10 |     protocol::{RequestHeader, REQUEST_HEADER_SIZE, RESPONSE_HEADER_SIZE},
 11 |     server::Handler,
 12 | };
 13 | pub struct Server<H: Handler + std::marker::Sync + std::marker::Send + 'static> {
 14 |     pub addr: String,
 15 |     incoming: MyReceiver<Conn>,
 16 |     handler: Arc<H>,
 17 | }
 18 | 
 19 | impl<H: Handler + std::marker::Sync + std::marker::Send> Server<H>
 20 | where
 21 |     H: Handler + std::marker::Sync + std::marker::Send + 'static,
 22 | {
 23 |     pub async fn new(addr: String, handler: Arc<H>) -> Self {
 24 |         let (tx, rx) = channel(1000);
 25 |         let address = addr.clone();
 26 |         tokio::spawn(run(address, tx));
 27 |         let rx = MyReceiver::new(rx);
 28 |         Server {
 29 |             addr,
 30 |             incoming: rx,
 31 |             handler,
 32 |         }
 33 |     }
 34 | 
 35 |     pub async fn accept(&self) -> Conn {
 36 |         self.incoming.recv().await
 37 |     }
 38 | 
 39 |     pub async fn run(&self) -> anyhow::Result<()> {
 40 |         loop {
 41 |             let conn = Arc::new(self.accept().await);
 42 |             println!("accept a connection");
 43 |             let handler = Arc::clone(&self.handler);
 44 |             tokio::spawn(receive(handler, conn));
 45 |         }
 46 |     }
 47 | }
 48 | 
 49 | pub async fn receive<H: Handler + std::marker::Sync + std::marker::Send + 'static>(
 50 |     handler: Arc<H>,
 51 |     conn: Arc<Conn>,
 52 | ) {
 53 |     loop {
 54 |         let request: &[u8] = conn.recv_msg().await.unwrap();
 55 |         debug!("receive a request: {:?}", request);
 56 |         let (header, path, meta_data, data) = parse_request(request);
 57 |         conn.release(request).await;
 58 | 
 59 |         let handler = handler.clone();
 60 |         tokio::spawn(handle(handler, conn.clone(), header, path, meta_data, data));
 61 |     }
 62 | }
 63 | 
 64 | // parse_request_header(): parse the request header
 65 | // 1. parse the header from the request
 66 | // 2. return the header
 67 | pub fn parse_request_header(request: &[u8]) -> RequestHeader {
 68 |     let header = &request[0..REQUEST_HEADER_SIZE];
 69 |     let batch = u32::from_le_bytes(header[0..4].try_into().unwrap());
 70 |     let id = u32::from_le_bytes(header[4..8].try_into().unwrap());
 71 |     let operation_type = u32::from_le_bytes(header[8..12].try_into().unwrap());
 72 |     let flags: u32 = u32::from_le_bytes(header[12..16].try_into().unwrap());
 73 |     let total_length = u32::from_le_bytes(header[16..20].try_into().unwrap());
 74 |     let file_path_length = u32::from_le_bytes(header[20..24].try_into().unwrap());
 75 |     let meta_data_length = u32::from_le_bytes(header[24..28].try_into().unwrap());
 76 |     let data_length = u32::from_le_bytes(header[28..32].try_into().unwrap());
 77 |     RequestHeader {
 78 |         batch,
 79 |         id,
 80 |         r#type: operation_type,
 81 |         flags,
 82 |         total_length,
 83 |         file_path_length,
 84 |         meta_data_length,
 85 |         data_length,
 86 |     }
 87 | }
 88 | 
 89 | pub fn parse_request(request: &[u8]) -> (RequestHeader, Vec<u8>, Vec<u8>, Vec<u8>) {
 90 |     let header = parse_request_header(request);
 91 |     debug!("parse_request, header: {:?}", header);
 92 |     let path =
 93 |         &request[REQUEST_HEADER_SIZE..REQUEST_HEADER_SIZE + header.file_path_length as usize];
 94 |     let metadata = &request[REQUEST_HEADER_SIZE + header.file_path_length as usize
 95 |         ..REQUEST_HEADER_SIZE
 96 |             + header.file_path_length as usize
 97 |             + header.meta_data_length as usize];
 98 |     let data = &request[REQUEST_HEADER_SIZE
 99 |         + header.file_path_length as usize
100 |         + header.meta_data_length as usize
101 |         ..REQUEST_HEADER_SIZE
102 |             + header.file_path_length as usize
103 |             + header.meta_data_length as usize
104 |             + header.data_length as usize];
105 |     (header, path.to_vec(), metadata.to_vec(), data.to_vec())
106 | }
107 | 
108 | // handle(): handle the request
109 | // 1. call the handler to handle the request
110 | // 2. send the response back to the client
111 | async fn handle<H: Handler + std::marker::Sync + std::marker::Send + 'static>(
112 |     handler: Arc<H>,
113 |     conn: Arc<Conn>,
114 |     header: RequestHeader,
115 |     path: Vec<u8>,
116 |     metadata: Vec<u8>,
117 |     data: Vec<u8>,
118 | ) {
119 |     debug!("handle, id: {}", header.id);
120 |     let response = handler
121 |         .dispatch(0, header.r#type, header.flags, path, data, metadata)
122 |         .await;
123 |     debug!("handle, response: {:?}", response);
124 |     match response {
125 |         Ok(response) => {
126 |             let result = send_response(
127 |                 conn.clone(),
128 |                 header.batch,
129 |                 header.id,
130 |                 response.0,
131 |                 response.1,
132 |                 &response.4[0..response.2],
133 |                 &response.5[0..response.3],
134 |             )
135 |             .await;
136 |             match result {
137 |                 Ok(_) => {
138 |                     // debug!("handle, send response success");
139 |                 }
140 |                 Err(e) => {
141 |                     debug!("handle, send response error: {}", e);
142 |                 }
143 |             }
144 |         }
145 |         Err(e) => {
146 |             debug!("handle, dispatch error: {}", e);
147 |         }
148 |     }
149 | }
150 | 
151 | pub async fn send_response(
152 |     conn: Arc<Conn>,
153 |     batch: u32,
154 |     id: u32,
155 |     status: i32,
156 |     flags: u32,
157 |     meta_data: &[u8],
158 |     data: &[u8],
159 | ) -> Result<(), Box<dyn std::error::Error>> {
160 |     let data_length = data.len();
161 |     let meta_data_length = meta_data.len();
162 |     let total_length = data_length + meta_data_length;
163 |     let mut response = Vec::with_capacity(RESPONSE_HEADER_SIZE + total_length);
164 |     response.extend_from_slice(&batch.to_le_bytes());
165 |     response.extend_from_slice(&id.to_le_bytes());
166 |     response.extend_from_slice(&status.to_le_bytes());
167 |     response.extend_from_slice(&flags.to_le_bytes());
168 |     response.extend_from_slice(&(total_length as u32).to_le_bytes());
169 |     response.extend_from_slice(&(meta_data_length as u32).to_le_bytes());
170 |     response.extend_from_slice(&(data_length as u32).to_le_bytes());
171 |     let response = &[
172 |         IoSlice::new(&response),
173 |         IoSlice::new(meta_data),
174 |         IoSlice::new(data),
175 |     ];
176 |     conn.send_msg(response).await?;
177 |     Ok(())
178 | }
179 | 


--------------------------------------------------------------------------------
/src/rpc/server.rs:
--------------------------------------------------------------------------------
  1 | // Copyright 2022 labring. All rights reserved.
  2 | //
  3 | // SPDX-License-Identifier: Apache-2.0
  4 | 
  5 | use std::sync::Arc;
  6 | 
  7 | use async_trait::async_trait;
  8 | use log::{error, info, warn};
  9 | use tokio::{
 10 |     io::{AsyncReadExt, AsyncWriteExt},
 11 |     net::{TcpListener, UnixListener},
 12 | };
 13 | 
 14 | use super::{connection::ServerConnection, protocol::RequestHeader};
 15 | 
 16 | #[async_trait]
 17 | pub trait Handler {
 18 |     async fn dispatch(
 19 |         &self,
 20 |         id: u32,
 21 |         operation_type: u32,
 22 |         flags: u32,
 23 |         path: Vec<u8>,
 24 |         data: Vec<u8>,
 25 |         metadata: Vec<u8>,
 26 |     ) -> anyhow::Result<(i32, u32, usize, usize, Vec<u8>, Vec<u8>)>;
 27 | }
 28 | 
 29 | pub async fn handle<
 30 |     H: Handler + std::marker::Sync + std::marker::Send + 'static,
 31 |     W: AsyncWriteExt + Unpin,
 32 |     R: AsyncReadExt + Unpin,
 33 | >(
 34 |     handler: Arc<H>,
 35 |     connection: Arc<ServerConnection<W, R>>,
 36 |     header: RequestHeader,
 37 |     path: Vec<u8>,
 38 |     data: Vec<u8>,
 39 |     metadata: Vec<u8>,
 40 | ) {
 41 |     let response = handler
 42 |         .dispatch(
 43 |             connection.id,
 44 |             header.r#type,
 45 |             header.flags,
 46 |             path.clone(),
 47 |             data,
 48 |             metadata,
 49 |         )
 50 |         .await;
 51 |     match response {
 52 |         Ok(response) => {
 53 |             if let Err(e) = connection
 54 |                 .send_response(
 55 |                     header.batch,
 56 |                     header.id,
 57 |                     response.0,
 58 |                     response.1,
 59 |                     &response.4[0..response.2],
 60 |                     &response.5[0..response.3],
 61 |                 )
 62 |                 .await
 63 |             {
 64 |                 error!("handle connection: {} , send response error: {}, batch: {}, id: {}, operation_type: {}, flags: {}, path: {:?}", connection.id, e, header.batch, header.id, header.r#type, header.flags, std::str::from_utf8(&path));
 65 |                 let _ = connection.close().await;
 66 |             }
 67 |         }
 68 |         Err(e) => {
 69 |             error!(
 70 |                 "handle connection: {} , dispatch error: {}",
 71 |                 connection.id, e
 72 |             );
 73 |         }
 74 |     }
 75 | }
 76 | 
 77 | pub async fn receive<
 78 |     H: Handler + std::marker::Sync + std::marker::Send + 'static,
 79 |     W: AsyncWriteExt + Unpin + std::marker::Sync + std::marker::Send + 'static,
 80 |     R: AsyncReadExt + Unpin + std::marker::Sync + std::marker::Send + 'static,
 81 | >(
 82 |     handler: Arc<H>,
 83 |     connection: Arc<ServerConnection<W, R>>,
 84 |     mut read_stream: R,
 85 | ) {
 86 |     loop {
 87 |         {
 88 |             let id = connection.name_id();
 89 |             let header = match connection.receive_request_header(&mut read_stream).await {
 90 |                 Ok(header) => header,
 91 |                 Err(e) => {
 92 |                     if e == "early eof" || e == "Connection reset by peer (os error 104)" {
 93 |                         warn!("{:?} receive, connection closed", id);
 94 |                         break;
 95 |                     }
 96 |                     panic!("{:?} parse_request, header error: {}", id, e);
 97 |                 }
 98 |             };
 99 |             let data_result = connection.receive_request(&mut read_stream, &header).await;
100 |             let (path, data, metadata) = match data_result {
101 |                 Ok(data) => data,
102 |                 Err(e) => {
103 |                     panic!("{:?} parse_request, data error: {}", id, e);
104 |                 }
105 |             };
106 |             let handler = handler.clone();
107 |             let connection = connection.clone();
108 |             tokio::spawn(handle(handler, connection, header, path, data, metadata));
109 |         }
110 |     }
111 | }
112 | 
113 | pub struct RpcServer<H: Handler + std::marker::Sync + std::marker::Send + 'static> {
114 |     // listener: TcpListener,
115 |     bind_address: String,
116 |     handler: Arc<H>,
117 | }
118 | 
119 | impl<H: Handler + std::marker::Sync + std::marker::Send> RpcServer<H> {
120 |     pub fn new(handler: Arc<H>, bind_address: &str) -> Self {
121 |         Self {
122 |             handler,
123 |             bind_address: String::from(bind_address),
124 |         }
125 |     }
126 | 
127 |     pub async fn run(&self) -> anyhow::Result<()> {
128 |         info!("Listening on {:?}", self.bind_address);
129 |         let listener = TcpListener::bind(&self.bind_address).await?;
130 |         let mut id = 1u32;
131 |         loop {
132 |             match listener.accept().await {
133 |                 Ok((stream, _)) => {
134 |                     let (read_stream, write_stream) = stream.into_split();
135 |                     info!("Connection {id} accepted");
136 |                     let handler = Arc::clone(&self.handler);
137 |                     let name_id = format!("{},{}", self.bind_address, id);
138 |                     let connection = Arc::new(ServerConnection::new(write_stream, name_id, id));
139 |                     tokio::spawn(async move {
140 |                         receive(handler, connection, read_stream).await;
141 |                     });
142 |                     id += 1;
143 |                 }
144 |                 Err(e) => {
145 |                     panic!("Failed to create tcp stream, error is {}", e)
146 |                 }
147 |             }
148 |         }
149 |     }
150 | 
151 |     pub async fn run_unix_stream(&self) -> anyhow::Result<()> {
152 |         info!("Listening on {:?}", self.bind_address);
153 |         let listener = match UnixListener::bind(&self.bind_address) {
154 |             Ok(listener) => listener,
155 |             Err(e) => {
156 |                 return Err(anyhow::anyhow!(
157 |                     "Failed to create unix stream at {:?}, error is {}",
158 |                     self.bind_address,
159 |                     e
160 |                 ));
161 |             }
162 |         };
163 |         let mut id = 1u32;
164 |         loop {
165 |             match listener.accept().await {
166 |                 Ok((stream, _)) => {
167 |                     let (read_stream, write_stream) = stream.into_split();
168 |                     info!("Connection {id} accepted");
169 |                     let handler = Arc::clone(&self.handler);
170 |                     let name_id = format!("{},{}", self.bind_address, id);
171 |                     let connection = Arc::new(ServerConnection::new(write_stream, name_id, id));
172 |                     tokio::spawn(async move {
173 |                         receive(handler, connection, read_stream).await;
174 |                     });
175 |                     id += 1;
176 |                 }
177 |                 Err(e) => {
178 |                     panic!("Failed to create tcp stream, error is {}", e)
179 |                 }
180 |             }
181 |         }
182 |     }
183 | }
184 | 


--------------------------------------------------------------------------------
/src/server/storage_engine/block_engine/allocator.rs:
--------------------------------------------------------------------------------
  1 | // Copyright 2022 labring. All rights reserved.
  2 | //
  3 | // SPDX-License-Identifier: Apache-2.0
  4 | 
  5 | use std::sync::Arc;
  6 | 
  7 | use libc::ioctl;
  8 | use nix::fcntl::{open, OFlag};
  9 | use parking_lot::Mutex;
 10 | 
 11 | //#define BLKGETSIZE _IO(0x12,96)	/* return device size /512 (long *arg) */
 12 | const BLOCKGETSIZE: u64 = 0x1260;
 13 | 
 14 | pub const CHUNK: u64 = 512 * 8;
 15 | const SECTOR: u64 = 512;
 16 | 
 17 | pub(crate) trait Allocator {
 18 |     fn new(path: &str) -> Self;
 19 |     fn allocator_space(&self, lenth: u64) -> u64;
 20 | }
 21 | 
 22 | /*
 23 |  * This Allocator use for memory.
 24 |  */
 25 | #[allow(unused)]
 26 | pub(crate) struct BitmapAllocator {
 27 |     block_space: Arc<Mutex<u64>>,
 28 |     total_aspce: u64,
 29 | }
 30 | 
 31 | impl Allocator for BitmapAllocator {
 32 |     fn new(path: &str) -> Self {
 33 |         let blockdevice = BlockDevice::new(path).unwrap();
 34 |         Self {
 35 |             block_space: Arc::new(Mutex::new(0)),
 36 |             total_aspce: blockdevice.chunk_num,
 37 |         }
 38 |     }
 39 | 
 40 |     fn allocator_space(&self, lenth: u64) -> u64 {
 41 |         // todo reduce allocatorc size.
 42 |         // todo exent space manager.
 43 |         let mut chunk_size = lenth / CHUNK;
 44 |         if lenth - chunk_size * CHUNK > 0 {
 45 |             chunk_size += 1;
 46 |         }
 47 |         let mut mutex = self.block_space.lock();
 48 |         let begin_allocator_pos = *mutex;
 49 |         *mutex += chunk_size;
 50 |         begin_allocator_pos
 51 |     }
 52 | }
 53 | 
 54 | // Block device info.
 55 | struct BlockDevice {
 56 |     chunk_num: u64,
 57 | }
 58 | 
 59 | impl BlockDevice {
 60 |     fn new(path: &str) -> Result<BlockDevice, i32> {
 61 |         let block_num = Self::get_block_info(path)?;
 62 |         let chunk_num = block_num / (CHUNK / SECTOR);
 63 |         Ok(BlockDevice { chunk_num })
 64 |     }
 65 | 
 66 |     fn get_block_info(path: &str) -> Result<u64, i32> {
 67 |         let fd = open(path, OFlag::O_DIRECT, nix::sys::stat::Mode::S_IRWXU).map_err(|_| {
 68 |             println!("open block device error");
 69 |             libc::EIO
 70 |         })?;
 71 |         if fd < 0 {
 72 |             return Err(libc::EEXIST);
 73 |         }
 74 |         let block_num = 0;
 75 |         unsafe {
 76 |             let result = ioctl(fd, BLOCKGETSIZE, &block_num);
 77 |             if result < 0 {
 78 |                 return Err(libc::EIO);
 79 |             }
 80 |         }
 81 |         Ok(block_num)
 82 |     }
 83 | }
 84 | 
 85 | #[cfg(feature = "block_test")]
 86 | #[cfg(test)]
 87 | mod tests {
 88 |     use std::process::Command;
 89 | 
 90 |     use super::{Allocator, BitmapAllocator, BlockDevice};
 91 | 
 92 |     #[test]
 93 |     fn block_info_test() {
 94 |         Command::new("bash")
 95 |             .arg("-c")
 96 |             .arg("dd if=/dev/zero of=node1 bs=4M count=1")
 97 |             .output()
 98 |             .unwrap();
 99 |         Command::new("bash")
100 |             .arg("-c")
101 |             .arg("losetup /dev/loop8 node1")
102 |             .output()
103 |             .unwrap();
104 |         let block_num = BlockDevice::get_block_info("/dev/loop8");
105 |         assert_eq!(8192, block_num.unwrap());
106 |         Command::new("bash")
107 |             .arg("-c")
108 |             .arg("losetup -d /dev/loop8")
109 |             .output()
110 |             .unwrap();
111 |         Command::new("bash")
112 |             .arg("-c")
113 |             .arg("rm node1")
114 |             .output()
115 |             .unwrap();
116 |     }
117 | 
118 |     #[test]
119 |     fn allocator_test() {
120 |         Command::new("bash")
121 |             .arg("-c")
122 |             .arg("dd if=/dev/zero of=node1 bs=4M count=1")
123 |             .output()
124 |             .unwrap();
125 |         Command::new("bash")
126 |             .arg("-c")
127 |             .arg("losetup /dev/loop8 node1")
128 |             .output()
129 |             .unwrap();
130 |         let allocator = BitmapAllocator::new("/dev/loop8");
131 |         let length = allocator.allocator_space(512 * 8 * 8);
132 |         assert_eq!(length + 8, 8);
133 |         Command::new("bash")
134 |             .arg("-c")
135 |             .arg("losetup -d /dev/loop8")
136 |             .output()
137 |             .unwrap();
138 |         Command::new("bash")
139 |             .arg("-c")
140 |             .arg("rm node1")
141 |             .output()
142 |             .unwrap();
143 |     }
144 | }
145 | 


--------------------------------------------------------------------------------
/src/server/storage_engine/block_engine/index.rs:
--------------------------------------------------------------------------------
 1 | // Copyright 2022 labring. All rights reserved.
 2 | //
 3 | // SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | use dashmap::DashMap;
 6 | 
 7 | pub(crate) struct FileIndex {
 8 |     index: DashMap<String, Vec<u64>>,
 9 | }
10 | 
11 | impl FileIndex {
12 |     pub(crate) fn new() -> Self {
13 |         let index = DashMap::new();
14 |         Self { index }
15 |     }
16 | 
17 |     pub(crate) fn search(&self, file_name: &str) -> Vec<u64> {
18 |         let value = self.index.get(file_name);
19 |         match value {
20 |             Some(entry) => entry.value().to_vec(),
21 |             None => Vec::new(),
22 |         }
23 |     }
24 | 
25 |     pub(crate) fn update_index(&self, path: &str, mut vec: Vec<u64>) {
26 |         let mut index_value_vec = self.search(path);
27 |         index_value_vec.append(vec.as_mut());
28 |         self.index.insert(path.to_string(), index_value_vec);
29 |     }
30 | }
31 | 
32 | #[derive(Clone, Copy)]
33 | #[allow(unused)]
34 | pub(crate) struct IndexEntry {
35 |     chunk: u64,
36 |     begin: u64,
37 |     length: u64,
38 | }
39 | 
40 | #[cfg(test)]
41 | mod tests {
42 |     use super::FileIndex;
43 | 
44 |     #[test]
45 |     fn search_and_update_index_test() {
46 |         let index = FileIndex::new();
47 |         let vec = index.search("test");
48 |         assert_eq!(vec.is_empty(), true);
49 |         let mut vec = Vec::new();
50 |         vec.push(1);
51 |         index.update_index("test", vec);
52 |         let mut vec = index.search("test");
53 |         assert_eq!(vec.pop(), Some(1));
54 |     }
55 | }
56 | 


--------------------------------------------------------------------------------
/src/server/storage_engine/block_engine/io.rs:
--------------------------------------------------------------------------------
 1 | // Copyright 2022 labring. All rights reserved.
 2 | //
 3 | // SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | use nix::{
 6 |     fcntl::{self, OFlag},
 7 |     sys::{
 8 |         stat::Mode,
 9 |         uio::{pread, pwrite},
10 |     },
11 | };
12 | 
13 | pub(crate) struct Storage {
14 |     _fd: i32,
15 | }
16 | 
17 | impl Storage {
18 |     pub(crate) fn new(path: &str) -> Storage {
19 |         let oflags = OFlag::O_RDWR;
20 |         let mode = Mode::S_IRUSR
21 |             | Mode::S_IWUSR
22 |             | Mode::S_IRGRP
23 |             | Mode::S_IWGRP
24 |             | Mode::S_IROTH
25 |             | Mode::S_IWOTH;
26 |         let fd = fcntl::open(path, oflags, mode);
27 |         match fd {
28 |             Ok(fd) => Self { _fd: fd },
29 |             Err(_) => panic!("No Raw blockdevice"),
30 |         }
31 |     }
32 | 
33 |     pub(crate) fn _write(&self, data: &[u8], offset: i64) -> Result<usize, i32> {
34 |         match pwrite(self._fd, data, offset) {
35 |             Ok(size) => Ok(size),
36 |             Err(_) => Err(libc::EIO),
37 |         }
38 |     }
39 | 
40 |     pub(crate) fn _read(&self, size: u32, offset: i64) -> Result<Vec<u8>, i32> {
41 |         let mut data = vec![0; size as usize];
42 |         let length = pread(self._fd, data.as_mut_slice(), offset).map_err(|_| libc::EIO)?;
43 |         Ok(data[..length].to_vec())
44 |     }
45 | }
46 | 
47 | #[cfg(feature = "block_test")]
48 | #[cfg(test)]
49 | mod tests {
50 |     use std::process::Command;
51 | 
52 |     use crate::server::storage_engine::block_device::io::Storage;
53 |     #[test]
54 |     fn write_and_read_test() {
55 |         Command::new("bash")
56 |             .arg("-c")
57 |             .arg("dd if=/dev/zero of=node1 bs=4M count=1")
58 |             .output()
59 |             .unwrap();
60 |         Command::new("bash")
61 |             .arg("-c")
62 |             .arg("losetup /dev/loop8 node1")
63 |             .output()
64 |             .unwrap();
65 |         let storage = Storage::new("/dev/loop8");
66 |         let writre_result = storage.write(&b"some bytes"[..], 0).unwrap();
67 |         assert_eq!(writre_result, 10);
68 |         let read_result = storage.read(10, 0).unwrap();
69 |         assert_eq!(read_result, &b"some bytes"[..]);
70 |         Command::new("bash")
71 |             .arg("-c")
72 |             .arg("losetup -d /dev/loop8")
73 |             .output()
74 |             .unwrap();
75 |         Command::new("bash")
76 |             .arg("-c")
77 |             .arg("rm node1")
78 |             .output()
79 |             .unwrap();
80 |     }
81 | }
82 | 


--------------------------------------------------------------------------------
/src/server/storage_engine/block_engine/mod.rs:
--------------------------------------------------------------------------------
  1 | // Copyright 2022 labring. All rights reserved.
  2 | //
  3 | // SPDX-License-Identifier: Apache-2.0
  4 | 
  5 | pub mod allocator;
  6 | /**
  7 | *block device is use to bypass filesystem aimed to attain higher performance.
  8 | */
  9 | pub mod index;
 10 | pub mod io;
 11 | 
 12 | use std::sync::Arc;
 13 | 
 14 | use crate::server::storage_engine::StorageEngine;
 15 | 
 16 | use allocator::{Allocator, BitmapAllocator, CHUNK};
 17 | use index::FileIndex;
 18 | use io::Storage;
 19 | 
 20 | use super::meta_engine::MetaEngine;
 21 | 
 22 | #[allow(unused)]
 23 | pub struct BlockEngine {
 24 |     allocator: BitmapAllocator,
 25 |     index: FileIndex,
 26 |     storage: Storage,
 27 | }
 28 | 
 29 | impl StorageEngine for BlockEngine {
 30 |     fn new(root: &str, _meta: Arc<MetaEngine>) -> Self {
 31 |         let index = FileIndex::new();
 32 |         let storage = Storage::new(root);
 33 |         let allocator = BitmapAllocator::new(root);
 34 |         Self {
 35 |             allocator,
 36 |             index,
 37 |             storage,
 38 |         }
 39 |     }
 40 | 
 41 |     fn init(&self) {}
 42 | 
 43 |     fn read_file(&self, path: &str, _size: u32, offset: i64) -> Result<Vec<u8>, i32> {
 44 |         let index_vec = self.index.search(path);
 45 |         let real_offset_index = offset as u64 / CHUNK;
 46 |         let real_offset = index_vec.get(real_offset_index as usize);
 47 |         match real_offset {
 48 |             Some(_real_offset) => todo!(), // self.storage.read(size, *real_offset as i64),
 49 |             None => todo!(),               // Err(libc::EIO),
 50 |         }
 51 |     }
 52 | 
 53 |     fn open_file(&self, _path: &str, _flag: i32, _mode: u32) -> Result<(), i32> {
 54 |         todo!()
 55 |     }
 56 | 
 57 |     fn write_file(&self, path: &str, data: &[u8], _offset: i64) -> Result<usize, i32> {
 58 |         let pos = self.allocator.allocator_space(data.len() as u64);
 59 |         let index_value_vec = self.index.search(path);
 60 |         let mut vec = Vec::new();
 61 |         let mut length = (data.len() as u64) / CHUNK;
 62 |         if data.len() as u64 - length * CHUNK > 0 {
 63 |             length += 1;
 64 |         }
 65 |         for n in 0..length {
 66 |             vec.push(pos + n * CHUNK);
 67 |         }
 68 |         self.index.update_index(path, vec);
 69 |         match index_value_vec.last() {
 70 |             Some(_last) => todo!(), // self.storage.write(data, (last + pos) as i64),
 71 |             None => todo!(),        // self.storage.write(data, pos as i64),
 72 |         }
 73 |     }
 74 | 
 75 |     fn create_file(
 76 |         &self,
 77 |         _path: &str,
 78 |         _oflag: i32,
 79 |         _umask: u32,
 80 |         _mode: u32,
 81 |     ) -> Result<Vec<u8>, i32> {
 82 |         todo!()
 83 |     }
 84 | 
 85 |     fn delete_file(&self, _path: &str) -> Result<(), i32> {
 86 |         todo!()
 87 |     }
 88 | 
 89 |     fn truncate_file(&self, _path: &str, _length: i64) -> Result<(), i32> {
 90 |         todo!()
 91 |     }
 92 | }
 93 | 
 94 | #[cfg(feature = "block_test")]
 95 | #[cfg(test)]
 96 | mod tests {
 97 |     use crate::server::storage_engine::StorageEngine;
 98 | 
 99 |     use super::BlockEngine;
100 |     use std::process::Command;
101 |     #[test]
102 |     fn write_and_read_test() {
103 |         Command::new("bash")
104 |             .arg("-c")
105 |             .arg("dd if=/dev/zero of=node1 bs=4M count=1")
106 |             .output()
107 |             .unwrap();
108 |         Command::new("bash")
109 |             .arg("-c")
110 |             .arg("losetup /dev/loop8 node1")
111 |             .output()
112 |             .unwrap();
113 |         let engine = BlockEngine::new("", "/dev/loop8");
114 |         let write_size = engine
115 |             .write_file("test".to_string(), &b"some bytes"[..], 0)
116 |             .unwrap();
117 |         assert_eq!(write_size, 10);
118 |         let read = engine.read_file("test".to_string(), 10, 0).unwrap();
119 |         assert_eq!(read, &b"some bytes"[..]);
120 |         Command::new("bash")
121 |             .arg("-c")
122 |             .arg("losetup -d /dev/loop8")
123 |             .output()
124 |             .unwrap();
125 |         Command::new("bash")
126 |             .arg("-c")
127 |             .arg("rm node1")
128 |             .output()
129 |             .unwrap();
130 |     }
131 | }
132 | 


--------------------------------------------------------------------------------
/src/server/storage_engine/mod.rs:
--------------------------------------------------------------------------------
 1 | // Copyright 2022 labring. All rights reserved.
 2 | //
 3 | // SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | use std::sync::Arc;
 6 | 
 7 | use self::meta_engine::MetaEngine;
 8 | 
 9 | pub mod block_engine;
10 | pub mod file_engine;
11 | pub mod meta_engine;
12 | 
13 | pub trait StorageEngine {
14 |     fn new(root: &str, meta_engine: Arc<MetaEngine>) -> Self;
15 | 
16 |     fn init(&self);
17 | 
18 |     fn read_file(&self, path: &str, size: u32, offset: i64) -> Result<Vec<u8>, i32>;
19 | 
20 |     fn open_file(&self, path: &str, flag: i32, mode: u32) -> Result<(), i32>;
21 | 
22 |     fn write_file(&self, path: &str, data: &[u8], offset: i64) -> Result<usize, i32>;
23 | 
24 |     fn create_file(&self, path: &str, oflag: i32, umask: u32, mode: u32) -> Result<Vec<u8>, i32>;
25 | 
26 |     fn delete_file(&self, path: &str) -> Result<(), i32>;
27 | 
28 |     fn truncate_file(&self, path: &str, length: i64) -> Result<(), i32>;
29 | }
30 | 


--------------------------------------------------------------------------------
/src/server/transfer_manager.rs:
--------------------------------------------------------------------------------
 1 | // Copyright 2022 labring. All rights reserved.
 2 | //
 3 | // SPDX-License-Identifier: Apache-2.0
 4 | 
 5 | use std::collections::HashMap;
 6 | 
 7 | use dashmap::DashMap;
 8 | use tokio::sync::{RwLock, RwLockReadGuard, RwLockWriteGuard};
 9 | 
10 | pub struct LockPool {
11 |     locks: HashMap<String, RwLock<()>>,
12 | }
13 | 
14 | pub struct TransferManager {
15 |     transferring_locks: *const LockPool,
16 |     transferring_status: DashMap<String, bool>,
17 | }
18 | 
19 | unsafe impl std::marker::Sync for TransferManager {}
20 | unsafe impl std::marker::Send for TransferManager {}
21 | 
22 | impl Default for TransferManager {
23 |     fn default() -> Self {
24 |         Self::new()
25 |     }
26 | }
27 | 
28 | impl TransferManager {
29 |     pub fn new() -> Self {
30 |         TransferManager {
31 |             transferring_locks: Box::into_raw(Box::new(LockPool {
32 |                 locks: HashMap::new(),
33 |             })),
34 |             transferring_status: DashMap::new(),
35 |         }
36 |     }
37 | 
38 |     pub fn get_lock(&self, path: &str) -> &RwLock<()> {
39 |         unsafe { (*self.transferring_locks).locks.get(path).unwrap() }
40 |     }
41 | 
42 |     pub fn make_up_files(&self, paths: &Vec<String>) {
43 |         self.transferring_status.clear();
44 |         let transferring_locks = unsafe { &mut *(self.transferring_locks as *mut LockPool) };
45 |         transferring_locks.locks.clear();
46 |         for path in paths {
47 |             transferring_locks
48 |                 .locks
49 |                 .insert(path.clone(), RwLock::new(()));
50 |             self.transferring_status.insert(path.clone(), false);
51 |         }
52 |     }
53 | 
54 |     pub async fn get_rlock(&self, path: &str) -> RwLockReadGuard<'_, ()> {
55 |         let lock = self.get_lock(path);
56 |         lock.read().await
57 |     }
58 | 
59 |     pub async fn get_wlock(&self, path: &str) -> RwLockWriteGuard<'_, ()> {
60 |         let lock = self.get_lock(path);
61 |         lock.write().await
62 |     }
63 | 
64 |     pub fn status(&self, path: &str) -> Option<bool> {
65 |         self.transferring_status.get(path).map(|status| *status)
66 |     }
67 | 
68 |     pub fn set_status(&self, path: &str, status: bool) {
69 |         self.transferring_status.insert(path.to_string(), status);
70 |     }
71 | }
72 | 


--------------------------------------------------------------------------------
/test_io500.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | function finish() {
  4 |     trap 'kill $(jobs -p)' EXIT
  5 |     exit $1
  6 | }
  7 | 
  8 | function green_font() {
  9 |     echo -e "\033[32m$1\033[0m\c"
 10 | }
 11 | 
 12 | function fuse_test() {
 13 |     ./target/debug/client --log-level info daemon&
 14 |     sleep 3
 15 |     ./target/debug/client --log-level info mount ~/fs test1
 16 | 
 17 |     start_time=$[$(date +%s%N)/1000000]
 18 |     cd io500
 19 |     timeout -s SIGKILL 200 mpirun -np 5 ./io500 config-minimal.ini
 20 |     result=$?
 21 |     cd ..
 22 |     end_time=$[$(date +%s%N)/1000000]
 23 |     result_time=$[ $end_time - $start_time ]
 24 |     echo -e "fuse tests finish, cost: $(green_font ${result_time}ms)"
 25 |     return $result
 26 | }
 27 | 
 28 | function intercept_test() {
 29 |     start_time=$[$(date +%s%N)/1000000]
 30 |     cd io500
 31 |     SEALFS_LOG_LEVEL=warn SEALFS_VOLUME_NAME=test1 SEALFS_MOUNT_POINT=~/fs LD_PRELOAD=../target/debug/libintercept.so timeout -s SIGKILL 200 mpirun -np 5 ./io500 config-minimal.ini
 32 |     result=$?
 33 |     cd ..
 34 |     end_time=$[$(date +%s%N)/1000000]
 35 |     result_time=$[ $end_time - $start_time ]
 36 |     echo -e "intercept tests finish, cost: $(green_font ${result_time}ms)"
 37 |     return $result
 38 | }
 39 | 
 40 | echo "start fuse_client_run"
 41 | 
 42 | # exit with 1 if no argument
 43 | if [ $# -eq 0 ]
 44 | then
 45 |     echo "no argument"
 46 |     exit 1
 47 | fi
 48 | 
 49 | set +e
 50 | 
 51 | sudo umount ~/fs
 52 | rm /tmp/sealfs.sock
 53 | rm /tmp/sealfs.index
 54 | mkdir -p ~/fs
 55 | 
 56 | sudo rm -rf io500
 57 | sudo rm -rf $1/database*
 58 | sudo rm -rf $1/storage*
 59 | 
 60 | set -e
 61 | 
 62 | SEALFS_CONFIG_PATH=./examples ./target/debug/manager --log-level info &
 63 | 
 64 | sleep 1
 65 | 
 66 | for ((i=0; i<5; i++))
 67 | do
 68 |     port=$[8085+$i]
 69 |     ./target/debug/server --server-address 127.0.0.1:${port} --database-path $1/database${i}/ --storage-path $1/storage${i}/ --log-level info &
 70 | done
 71 | 
 72 | sleep 3
 73 | 
 74 | 
 75 | SELF_HOSTED=1
 76 | 
 77 | if [ $SELF_HOSTED -eq 1 ]
 78 | then
 79 |     cp -r ~/io500/io500 .
 80 |     cd io500
 81 | else
 82 |     git clone https://github.com/IO500/io500.git
 83 |     cd io500
 84 |     ./prepare.sh
 85 | fi
 86 | 
 87 | echo "[global]" > config-minimal.ini
 88 | echo "datadir = $HOME/fs" >> config-minimal.ini
 89 | echo "" >> config-minimal.ini
 90 | echo "[debug]" >> config-minimal.ini
 91 | echo "stonewall-time = 2" >> config-minimal.ini
 92 | 
 93 | cd ..
 94 | 
 95 | set +e
 96 | 
 97 | ./target/debug/client --log-level info create-volume test1 100000
 98 | 
 99 | fuse_test
100 | fuse_result=$?
101 | echo "fuse result: $fuse_result"
102 | 
103 | intercept_test
104 | intercept_result=$?
105 | echo "intercept result: $intercept_result"
106 | result=$(($fuse_result||$intercept_result))
107 | 
108 | set -e
109 | finish $result


--------------------------------------------------------------------------------