├── .github
    └── workflows
    │   └── build.yml
├── .gitignore
├── Cargo.toml
├── LICENSE
├── README.md
├── docs
    ├── api-specification.md
    ├── figs
    │   └── io-arch.png
    ├── members.json
    └── proposal.md
├── rust-toolchain
├── scripts
    ├── decode_parquet.py
    ├── generate_parquet.py
    └── generate_traces.py
├── storage-client
    ├── Cargo.toml
    ├── LICENSE
    ├── README.md
    └── src
    │   ├── bin
    │       └── driver.rs
    │   ├── client.rs
    │   └── lib.rs
├── storage-node
    ├── Cargo.toml
    ├── src
    │   ├── bin
    │   │   └── storage_node.rs
    │   ├── cache
    │   │   ├── data_store_cache
    │   │   │   ├── memdisk
    │   │   │   │   ├── data_store
    │   │   │   │   │   ├── disk.rs
    │   │   │   │   │   ├── memory.rs
    │   │   │   │   │   └── mod.rs
    │   │   │   │   └── mod.rs
    │   │   │   ├── mod.rs
    │   │   │   └── sqlite
    │   │   │   │   ├── blob.rs
    │   │   │   │   └── mod.rs
    │   │   ├── mod.rs
    │   │   └── replacer
    │   │   │   ├── lru.rs
    │   │   │   ├── lru_k.rs
    │   │   │   └── mod.rs
    │   ├── common
    │   │   ├── config.rs
    │   │   ├── hash.rs
    │   │   └── mod.rs
    │   ├── disk
    │   │   ├── disk_manager.rs
    │   │   ├── disk_manager_sync.rs
    │   │   ├── mod.rs
    │   │   └── stream.rs
    │   ├── error.rs
    │   ├── lib.rs
    │   ├── server.rs
    │   ├── storage_manager.rs
    │   └── storage_reader
    │   │   ├── mod.rs
    │   │   ├── s3.rs
    │   │   └── s3_diskmock.rs
    └── tests
    │   ├── parquet
    │       ├── small_random_data.parquet
    │       ├── userdata1.parquet
    │       └── userdata2.parquet
    │   └── text
    │       └── what-can-i-hold-you-with
└── tests
    ├── Cargo.toml
    └── src
        ├── client_server_test.rs
        └── lib.rs


/.github/workflows/build.yml:
--------------------------------------------------------------------------------
 1 | name: Build
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ "main" ]
 6 |   pull_request:
 7 |     branches: [ "main" ]
 8 |   workflow_dispatch:
 9 | 
10 | env:
11 |   CARGO_TERM_COLOR: always
12 |   RUSTFLAGS: "-Dwarnings"
13 | 
14 | jobs:
15 |   build:
16 | 
17 |     runs-on: self-hosted
18 |     continue-on-error: false
19 | 
20 |     steps:
21 |     - uses: actions/checkout@v3
22 |     - name: Install Toolchain
23 |       run: |
24 |         rustup update stable
25 |         rustup default stable
26 |         rustup component add rustfmt
27 |         rustup component add clippy
28 |     - name: Install cargo-llvm-cov
29 |       uses: taiki-e/install-action@cargo-llvm-cov
30 |     - name: Install Sqlite
31 |       run: |
32 |         apt update
33 |         apt install libsqlite3-dev
34 |     - name: Format check
35 |       run: cargo fmt --all -- --check
36 |     - name: Run Clippy
37 |       run: cargo clippy --all-targets --all-features
38 |     - name: Compile check
39 |       run: cargo check --all-targets --all-features
40 |     - name: Run tests and Generate code coverage
41 |       env:
42 |         AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
43 |         AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
44 |       run: cargo llvm-cov --all-features --workspace --codecov --output-path lcov.info
45 |     - name: Archive code coverage results
46 |       uses: actions/upload-artifact@v4
47 |       with:
48 |         name: code-coverage-report
49 |         path: lcov.info
50 |         retention-days: 3
51 |     - name: Upload to codecov
52 |       uses: codecov/codecov-action@v3
53 |       with:
54 |         token: be8874e2-10d6-434f-9d52-db6094de31d6
55 |         files: lcov.info
56 |         name: codecov-umbrella # optional
57 |         fail_ci_if_error: true
58 |         verbose: true
59 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Generated by Cargo
 2 | # will have compiled files and executables
 3 | debug/
 4 | target/
 5 | 
 6 | # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
 7 | # More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html
 8 | Cargo.lock
 9 | 
10 | # These are backup files generated by rustfmt
11 | **/*.rs.bk
12 | 
13 | # MSVC Windows builds of rustc generate these, which store debugging information
14 | *.pdb
15 | 
16 | .vscode/
17 | 
18 | **/.DS_Store
19 | 
20 | data/
21 | 
22 | **/*.pem
23 | 
24 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
1 | [workspace]
2 | members = ["storage-node", "storage-client", "tests"]
3 | 
4 | resolver = "2"
5 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 CMU Database Group
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # 15721-s24-cache1
2 | 15-721 Spring 2024 - Cache #1
3 | 


--------------------------------------------------------------------------------
/docs/api-specification.md:
--------------------------------------------------------------------------------
 1 | # I/O Service API Specification
 2 | 
 3 | ## Overview
 4 | 
 5 | > What commands will the API expose.
 6 | 
 7 | The I/O service will provide the execution engine with a client library, to which they can issue requests for data. We allow the execution engine to query data on different granularities, including table, column, and tuple. We will provide both synchronous and asynchronous methods for the execution engine to get storage data.
 8 | 
 9 | See [this PR](https://github.com/cmu-db/15721-s24-cache1/pull/2) for more details.
10 | 
11 | ## Encoding
12 | 
13 | > What encoding scheme will the API use for inputs / outputs
14 | 
15 | The I/O service will encode the data as [Arrow's `RecordBatch` type](https://docs.rs/arrow/latest/arrow/record_batch/struct.RecordBatch.html) when we transfer the storage data to the execution engine.
16 | 
17 | ## Error Handling
18 | 
19 | > What errors can the service encounter and how will API handle them (e.g., status codes).
20 | 
21 | On error, the I/O service will return `anyhow::Error` to the execution engine with a customized message, which simply denotes that the I/O service is not able to retrieve data from the underlying storage. The execution engine should forward the error to the upper layer.
22 | 
23 | See the [discussion here](https://github.com/cmu-db/15721-s24-cache1/pull/2#issuecomment-1942780360) for more details.


--------------------------------------------------------------------------------
/docs/figs/io-arch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmu-db/15721-s24-cache1/b4e2bc8f2c2fc3ab7a9b9fa3f8e864e25e9c8c40/docs/figs/io-arch.png


--------------------------------------------------------------------------------
/docs/members.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "info": {
 3 |         "title": "Parpulse: I/O Service for Modern OLAP Database System",
 4 |         "github": "https://github.com/cmu-db/15721-s24-cache1",
 5 |         "description": "The goal of this project is to develop an I/O service for an Online Analytical Processing (OLAP) database system. This service will facilitate communication between the execution engine and remote storage solutions such as Amazon S3. Additionally, a local cache will be incorporated to store recently accessed data on the local disk, thereby accelerating future data retrievals.\nThe I/O service is designed to manage requests from the execution engine and fetch pertinent data (e.g., Parquet files) from either the local cache or remote storage. It will process the data and return a stream of the decoded data to the execution engine.\nThe initial phase of this project aims to construct a fully functional I/O service following the specifications outlined above. Further enhancements, such as kernel bypass and integration of io_uring, may be considered in the future.",
 6 |         "students": [
 7 |             {
 8 |                 "name": "Yuanxin Cao",
 9 |                 "url": "https://github.com/xx01cyx"
10 |             },
11 |             {
12 |                 "name": "Kunle Li",
13 |                 "url": "https://github.com/unw9527"
14 |             },
15 |             {
16 |                 "name": "Lan Lou",
17 |                 "url": "https://github.com/lanlou1554"
18 |             }
19 |         ]
20 |     }
21 | }


--------------------------------------------------------------------------------
/docs/proposal.md:
--------------------------------------------------------------------------------
  1 | # I/O Service Project Proposal
  2 | 
  3 | * Yuanxin Cao (yuanxinc)
  4 | * Lan Lou (lanlou)
  5 | * Kunle Li (kunlel)
  6 | 
  7 | ## Overview
  8 | 
  9 | > What is the goal of this project? What will this component achieve?
 10 | 
 11 | The objective of this project is to develop an Input/Output (I/O) service for an Online Analytical Processing (OLAP) database system. This service will facilitate communication between the execution engine and remote storage solutions such as Amazon S3. Additionally, a local cache will be incorporated to store recently accessed data on the local disk, thereby accelerating future data retrievals.
 12 | 
 13 | The I/O service is designed to manage requests from the execution engine, fetching pertinent data (e.g., Parquet files) from either the local cache or remote storage. It will process the data and return a stream of decoded information as a record batch to the execution engine.
 14 | 
 15 | The initial phase aims to construct a fully functional I/O service following the specifications outlined above. Further enhancements, such as kernel bypass and integration of io_uring, may be considered based on project timeline and requirements.
 16 | 
 17 | 
 18 | ## Architectural Design
 19 | 
 20 | > Explain the input and output of the component, describe interactions and breakdown the smaller components if any. Include diagrams if appropriate.
 21 | 
 22 | The I/O service receives input in the form of requested columns (i.e. logical location) from the execution engine and produces an output stream (e.g. [`tokio::Stream`](https://docs.rs/tokio/latest/tokio/stream/index.html)) of Apache Arrow [`RecordBatch`](https://docs.rs/arrow-array/50.0.0/arrow_array/struct.RecordBatch.html).
 23 | 
 24 | ![](./figs/io-arch.png)
 25 | 
 26 | 
 27 | Our design comprises several key components:
 28 | 
 29 | - Storage Client
 30 | - Storage Node
 31 |     - Storage Manager
 32 |       - DataStore Cache
 33 |         - Replacer (LRU, LRU-K)
 34 |         - DataStore 
 35 |           - MemDiskStore --> File system
 36 |           - SqliteStore --> SQLite
 37 |       - Storage Reader
 38 |         - S3 Reader (Read from S3)
 39 |         - Mock S3 Reader (Read from file system)
 40 | 
 41 | The Storage Client resides in the compute node, where it establishes connections with the executors from the execution engine. The Storage Manager orchestrates requests from the compute node and then directs them to either the cache or the Storage Reader. The cache works by recording the access timestamp and making evictions of the cached elements, and we plan to use embedded databases such as RocksDB or Redis as our cache. For the cache policy, we plan to incorporate common policies such as LRU-K. The Storage Reader includes several APIs for reading from different storage systems such as Amazon S3 and the local file system.
 42 | 
 43 | The workflow of the I/O service is as follows. Initially, the execution engine invokes the API exposed by the I/O service. The Storage Client will then contact the catalog to retrieve the corresponding physical location based on the logical columns (update: After discussing with the other I/O service team, we used a hashmap to mimic the behavior of catalog for sake of time). Next, the Storage Client transmits the requests via HTTP to the Storage Node. The Storage Manager then verifies whether the data is already present on the local disk by consulting the cache. 
 44 | 
 45 | We design two levels of cache. One sits in the memory for small files fast retrieval, and the other uses disk as storage for caching large files. The latter includes a mapping where the key represents the file's physical location in S3, and the value denotes the physical location on the local disk. If the data is found, it is directly returned to the Storage Client. Otherwise, the Storage Reader reads the data from the underlying storage and updates the cache. Finally, the Parquet file is decoded in the Storage Client, and the resulting record batch stream is returned to the execution engine.
 46 | 
 47 | 
 48 | ## Design Rationale
 49 | 
 50 | > Explain the goals of this design and how the design achieves these goals. Present alternatives considered and document why they are not chosen.
 51 | 
 52 | The design goal of the I/O service is to provide the execution engine with a simple interface to interact with the storage while achieving high performance. The storage client resides in the compute node, which makes it possible to let the execution engine get storage data just by a function call, instead of sending a request over the network. This leaves request processing to the I/O service itself and thus makes the conveying of data or error more straightforward. Moreover, having a storage client residing on the compute node promises more possibilities, including providing a `write_data` interface for the execution engine to store its own persistent states (if there would be any) in the future.
 53 | 
 54 | We use HTTP rather than TCP for the interaction between the storage client and the storage node because of HTTP's provision of more extensive application semantics, such as transmitting data in row groups in Parquet, without requiring TCP's packet-to-byte breakdown capability. We opt out of using gRPC because the storage client already communicates with the catalog via HTTP, making it simpler to utilize HTTP for all communication and data transmission within the storage client.
 55 | 
 56 | The storage node, on the other hand, is designed to be of high performance and at the same time be easily extendible. We adopt LRU cache algorithm because it is one of the most widely-used cache strategies in the industry since it maintains a good hit rate in real-world scenarios while requiring moderate computation. Besides LRU, we also plan to adopt more cache algorithms to get a sense of the performance difference between various cache algorithms.
 57 | 
 58 | In addition to utilizing disk-based caching, we intend to incorporate SQLite as our primary cache storage solution. We choose SQLite because it is out-of-box and stable to use.
 59 | 
 60 | The storage reader is designed to retrieve data from different storage services. Currently, we plan to support the local file systems and Amazon S3, but we can easily add more storage services in the future via the abstraction of the storage reader.
 61 | 
 62 | ## Testing Plan
 63 | 
 64 | > How should the component be tested?
 65 | 
 66 | 1. Correctness Tests
 67 | 
 68 |    1. Unit Tests
 69 | 
 70 |       1. Cache Algorithm Test: focusing on the correctness of cache algorithms, like LRU.
 71 |       2. Storage Reader Test: focusing on the correctness of getting data from the underlying storage.
 72 |       3. Storage Manager Test: focusing on the correctness of coordinating the cache and the storage reader.
 73 |       4. Storage Client Test: focusing on the correctness of getting physical location information from the catalog and forwarding the request to the I/O server.
 74 | 
 75 |       (Note: all the above tests should also focus on error handling.)
 76 | 
 77 |    2. Integration Tests 
 78 | 
 79 |       The integration test will use the public API of the I/O service. We will call the API of the storage client in the way the execution engine does. We will test on different request types (table, column, etc.), different storage types (file system, S3), and different request data amounts. We will focus on the availability of the data and the correctness of the contents. Also, the I/O service should report the error appropriately when there is an exception.
 80 | 
 81 | 2. Performance Tests (Benchmark)
 82 | 
 83 |    We would write a Python script to generate random parquet files of certain sizes for benchmarking. Since the data type of the parquet files does not affect the performance, we generate floating point numbers for each parquet file. 
 84 |    
 85 |    The dataset we create is two sets of 10 parquet files, with one set containing 1Mb files and the other set containing 100Mb files. We adopt the Zipfian distribution for the access pattern.
 86 | 
 87 |    For benchmarking, we measure the detailed elapsed time during each phase, starting from the Storage Client receiving the request, to the Storage Client returning the data. The machine we use is AWS EC2 type `C5.xlarge`, with 4vCPU, 8Gb memory and 32Gb disk. We set up one instance for Storage Client and one for Storage Server.
 88 | 
 89 |    The way we trigger the benchmarking is through GitHub Actions, which enables benchmarking to be triggered automatically on certain PR or push without manual operations.
 90 | 
 91 | ## Trade-offs and Potential Problems
 92 | 
 93 | > Write down any conscious trade-off you made that can be problematic in the future, or any problems discovered during the design process that remain unaddressed (technical debts).
 94 | 
 95 | The whole design is based on the fact that the database is a static one (i.e. no data manipulation) and we only have read requests on the storage. This assumption makes everything easier, since there will be few concurrency issues for a read-only database. However, if we are going to enable updates, then we should correctly handle the read-write and write-write conflicts, which requires a more complicated design than the current one.
 96 | 
 97 | Moreover, even if all data is ETLed into the database system (this is our assumption), there can still be updates if the user replaces some of the underlying Parquet files. In this case, we might need another service to perform data discovery on the storage to deal with these situations. Also, we have to ensure the consistency of caches in different compute nodes (if we are going to build the cache) and ensure that the data we read is not stale.
 98 | 
 99 | 
100 | <!-- ## Glossary (Optional)
101 | > If you are introducing new concepts or giving unintuitive names to components, write them down here. -->
102 | 
103 | 


--------------------------------------------------------------------------------
/rust-toolchain:
--------------------------------------------------------------------------------
1 | stable
2 | 


--------------------------------------------------------------------------------
/scripts/decode_parquet.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This file is used to decode a parquet file and display the first n rows of the
 3 | file, mainly for visualization purposes.
 4 | 
 5 | Usage:
 6 |     python scripts/decode_parquet.py <path-to-parquet-file> --n
 7 |     <number-of-rows-to-display>
 8 | """
 9 | 
10 | from IPython.display import display
11 | import pandas as pd
12 | import argparse
13 | 
14 | 
15 | def decode_parquet(file_path, n=5):
16 |     df = pd.read_parquet(file_path)
17 |     print('---------- Statistics ----------')
18 |     print(df.describe(), '\n')
19 |     print(f'---------- First {n} rows of the parquet file ----------')
20 |     display(df.head(n), '\n')
21 | 
22 | 
23 | if __name__ == "__main__":
24 |     parser = argparse.ArgumentParser(description='Decode parquet file')
25 |     parser.add_argument('file_path', type=str, help='path to parquet file')
26 |     parser.add_argument(
27 |         '--n',
28 |         type=int,
29 |         default=5,
30 |         help='number of rows to display',
31 |         required=False)
32 |     args = parser.parse_args()
33 |     decode_parquet(args.file_path, args.n)
34 | 


--------------------------------------------------------------------------------
/scripts/generate_parquet.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This script generates random parquet files given the number of rows, columns,
 3 | and files.
 4 | 
 5 | Usage:
 6 |     python scripts/generate_parquet.py -d data -r 5500 -c 20 -n 5
 7 | """
 8 | import pandas as pd
 9 | import numpy as np
10 | import pyarrow as pa
11 | import pyarrow.parquet as pq
12 | import os
13 | import argparse
14 | from tqdm import tqdm
15 | 
16 | 
17 | def generate_random_parquet_files(output_dir, num_rows, num_cols, num_files):
18 |     for i in tqdm(range(num_files)):
19 |         data = pd.DataFrame(np.random.rand(num_rows, num_cols))
20 |         data.columns = [f'col {i + 1}' for i in range(num_cols)]
21 |         table = pa.Table.from_pandas(data)
22 |         file_name = os.path.join(output_dir, f"random_data_{i}.parquet")
23 |         pq.write_table(table, file_name)
24 | 
25 | 
26 | if __name__ == "__main__":
27 |     parser = argparse.ArgumentParser(
28 |         description='Generate random parquet files')
29 |     parser.add_argument(
30 |         '-d',
31 |         '--dir',
32 |         type=str,
33 |         help='output directory for parquet files')
34 |     parser.add_argument(
35 |         '-r',
36 |         '--row',
37 |         type=int,
38 |         help='number of rows in each file',
39 |         default=5500,  # Roughly 1MB
40 |         required=False)
41 |     parser.add_argument(
42 |         '-c',
43 |         '--col',
44 |         type=int,
45 |         help='number of columns in each file',
46 |         default=20,
47 |         required=False)
48 |     parser.add_argument(
49 |         '-n',
50 |         type=int,
51 |         help='number of files to generate',
52 |         default=5,
53 |         required=False)
54 |     args = parser.parse_args()
55 | 
56 |     generate_random_parquet_files(args.dir, args.row, args.col, args.n)
57 | 


--------------------------------------------------------------------------------
/scripts/generate_traces.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This script generates access pattern for a set of files with a Zipfian distribution.
 3 | 
 4 | Usage:
 5 |     python scripts/generate_traces.py --num_files 10 --skew_param 1.2 --num_accesses 100 -o <YOUR_OUTPUT_FILE>
 6 | """
 7 | 
 8 | import numpy as np
 9 | import csv
10 | import argparse
11 | import random
12 | 
13 | mp = {1: [1, 2, 3, 4, 5, 6, 7, 8, 9], 100: [
14 |     10, 11, 12, 13, 14, 15, 16, 17, 18, 19]}
15 | 
16 | 
17 | def generate_access_counts(num_files, s, num_accesses, file_size):
18 |     access_counts = []
19 |     # Generate Zipfian distribution probabilities
20 |     probabilities = 1 / np.arange(1, num_files + 1) ** s
21 |     # Normalize
22 |     probabilities /= np.sum(probabilities)
23 | 
24 |     # Simulate file accesses
25 |     for _ in range(num_accesses):
26 |         file_index = np.random.choice(mp[file_size], p=probabilities)
27 |         access_counts.append(file_index)
28 | 
29 |     return access_counts
30 | 
31 | 
32 | def write_to_csv(access_counts, output_file):
33 |     with open(output_file, "w") as f:
34 |         writer = csv.writer(f)
35 |         writer.writerow(["timestamp", "file_index"])
36 |         timestamp = 0  # timestamp is in milliseconds
37 |         for file_index in access_counts:
38 |             writer.writerow([timestamp, file_index])
39 |             timestamp += random.randint(1, 500)
40 | 
41 | 
42 | if __name__ == "__main__":
43 |     parser = argparse.ArgumentParser(
44 |         description="Generate access counts with a Zipfian distribution")
45 |     parser.add_argument(
46 |         "-s",
47 |         "--skew_param",
48 |         type=float,
49 |         default=1.5,
50 |         help="Skew parameter (default: 1.5)")
51 |     parser.add_argument(
52 |         "--num_accesses",
53 |         type=int,
54 |         default=20,
55 |         help="Number of accesses (default: 20)")
56 |     parser.add_argument(
57 |         "-o",
58 |         "--output_file",
59 |         type=str,
60 |         default="data/traces/trace_1m.csv",
61 |         help="Output CSV file (default: data/trace.csv)")
62 |     parser.add_argument(
63 |         "--size",
64 |         type=int,
65 |         default=1,
66 |         help="Size of the parquet file in MB"
67 |     )
68 |     args = parser.parse_args()
69 | 
70 |     if args.size not in mp:
71 |         raise ValueError("Size should be either 1 or 100")
72 |     if args.size == 100:
73 |         num_files = 10
74 |     else:
75 |         num_files = 9
76 | 
77 |     access_counts = generate_access_counts(
78 |         num_files, args.skew_param, args.num_accesses, args.size)
79 |     write_to_csv(access_counts, args.output_file)
80 | 
81 |     print("Access counts generated and written to", args.output_file)
82 | 


--------------------------------------------------------------------------------
/storage-client/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "parpulse-client"
 3 | version = "0.1.0"
 4 | edition = "2021"
 5 | authors = [
 6 |     "Yuanxin Cao <caoyuanxin0531@outlook.com>",
 7 |     "Kunle <1041593558@qq.com>",
 8 |     "Lan Lou <lanlou1554@gmail.com>",
 9 | ]
10 | description = "Client application for Parpulse OLAP database I/O cache service"
11 | license-file = "LICENSE"
12 | homepage = "https://github.com/cmu-db/15721-s24-cache1"
13 | repository = "https://github.com/cmu-db/15721-s24-cache1"
14 | documentation = "https://github.com/cmu-db/15721-s24-cache1/blob/main/README.md"
15 | readme = "README.md"
16 | include = ["src/client.rs", "src/lib.rs"]
17 | 
18 | [dependencies]
19 | anyhow = "1"
20 | hyper = "1"
21 | async-trait = "0.1"
22 | tokio = { version = "1", features = ["full", "rt-multi-thread"] }
23 | futures = "0.3"
24 | reqwest = { version = "0.12", features = ["stream"] }
25 | tempfile = "3.2"
26 | parquet = { version = "50.0.0", features = ["async"] }
27 | arrow = "50.0.0"
28 | log = "0.4"
29 | istziio-client = "0.1.9"
30 | lazy_static = "1.4"
31 | enum-as-inner = "0.6"
32 | serde = { version = "1", features = ["derive"] }
33 | env_logger = "0.11"
34 | 
35 | [dev-dependencies]
36 | mockito = "1.4.0"
37 | 


--------------------------------------------------------------------------------
/storage-client/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 CMU Database Group
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/storage-client/README.md:
--------------------------------------------------------------------------------
1 | # Parpulse Client
2 | 
3 | This is the storage client implementation for 15721-s24-cache1. The `StorageClientImpl` should implement the `StorageClient` trait agreed between the two teams.
4 | 


--------------------------------------------------------------------------------
/storage-client/src/bin/driver.rs:
--------------------------------------------------------------------------------
 1 | use arrow::array::Float64Array;
 2 | use istziio_client::client_api::{DataRequest, StorageClient, StorageRequest};
 3 | use log::info;
 4 | use parpulse_client::client::StorageClientImpl;
 5 | use std::time::Instant;
 6 | 
 7 | /// This test is for benchmarking.
 8 | 
 9 | #[tokio::main]
10 | async fn main() {
11 |     let _ = env_logger::builder()
12 |         .filter_level(log::LevelFilter::Info)
13 |         .is_test(true)
14 |         .try_init();
15 | 
16 |     let server_endpoint =
17 |         std::env::var("SERVER_URL").unwrap_or(String::from("http://127.0.0.1:3030"));
18 | 
19 |     let storage_client = StorageClientImpl::new(&server_endpoint, "http://127.0.0.1:3031")
20 |         .expect("Failed to create storage client.");
21 |     let start_time = Instant::now();
22 |     // Requesting random_data_100m_0.parquet
23 |     let request = StorageRequest::new(0, DataRequest::Table(10));
24 |     let mut receiver = storage_client
25 |         .request_data(request)
26 |         .await
27 |         .expect("Failed to get data from the server.");
28 |     let mut record_batches = vec![];
29 |     while let Some(record_batch) = receiver.recv().await {
30 |         record_batches.push(record_batch);
31 |     }
32 |     info!("Time taken for 100m file: {:?}", start_time.elapsed());
33 | 
34 |     assert!(!record_batches.is_empty());
35 | 
36 |     let first_batch = &record_batches[0];
37 |     assert_eq!(first_batch.num_columns(), 20);
38 | 
39 |     // Check the first 5 columns of the first row.
40 |     let real_first_row = [
41 |         0.869278151694903,
42 |         0.5698583744743971,
43 |         0.5731127546817466,
44 |         0.9509491985107434,
45 |         0.3949108352357301,
46 |     ];
47 |     for (i, &real_value) in real_first_row.iter().enumerate() {
48 |         let column = first_batch
49 |             .column(i)
50 |             .as_any()
51 |             .downcast_ref::<Float64Array>()
52 |             .unwrap();
53 |         assert_eq!(column.value(0), real_value);
54 |     }
55 |     info!("Succeed!")
56 | }
57 | 


--------------------------------------------------------------------------------
/storage-client/src/client.rs:
--------------------------------------------------------------------------------
  1 | use anyhow::{anyhow, Ok, Result};
  2 | use arrow::array::RecordBatch;
  3 | use futures::stream::StreamExt;
  4 | 
  5 | use crate::RequestParams;
  6 | use hyper::Uri;
  7 | use lazy_static::lazy_static;
  8 | use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder;
  9 | use parquet::arrow::ProjectionMask;
 10 | use reqwest::{Client, Response, Url};
 11 | use std::collections::HashMap;
 12 | use std::fs::File;
 13 | use std::io::Write;
 14 | use tempfile::tempdir;
 15 | 
 16 | use tokio::sync::mpsc::{channel, Receiver};
 17 | 
 18 | use istziio_client::client_api::{DataRequest, StorageClient, StorageRequest, TableId};
 19 | 
 20 | /// The batch size for the record batch.
 21 | const BATCH_SIZE: usize = 1024;
 22 | const CHANNEL_CAPACITY: usize = 32;
 23 | const PARAM_BUCKET_KEY: &str = "bucket";
 24 | const PARAM_KEYS_KEY: &str = "keys";
 25 | 
 26 | lazy_static! {
 27 |     static ref TABLE_FILE_MAP: HashMap<TableId, String> = {
 28 |         let mut m = HashMap::new();
 29 |         // For mock s3
 30 |         m.insert(0, "userdata1.parquet".to_string());
 31 |         // All the remainings are for real s3
 32 |         for i in 1..=9 {
 33 |             m.insert(i, format!("1m/random_data_1m_{}.parquet", i));
 34 |         }
 35 |         for i in 10..=19 {
 36 |             m.insert(i, format!("100m/random_data_100m_{}.parquet", i - 10));
 37 |         }
 38 |         m
 39 |     };
 40 | }
 41 | 
 42 | pub struct StorageClientImpl {
 43 |     storage_server_endpoint: Uri,
 44 |     _catalog_server_endpoint: Uri,
 45 | }
 46 | 
 47 | impl StorageClientImpl {
 48 |     pub fn new(
 49 |         storage_server_endpoint_str: &str,
 50 |         catalog_server_endpoint_str: &str,
 51 |     ) -> Result<Self> {
 52 |         let storage_server_endpoint = storage_server_endpoint_str.parse::<Uri>().map_err(|_| {
 53 |             anyhow!(
 54 |                 "cannot resolve storage server endpoint: {}",
 55 |                 storage_server_endpoint_str
 56 |             )
 57 |         })?;
 58 |         let catalog_server_endpoint = catalog_server_endpoint_str.parse::<Uri>().map_err(|_| {
 59 |             anyhow!(
 60 |                 "cannot resolve catalog server endpoint: {}",
 61 |                 catalog_server_endpoint_str
 62 |             )
 63 |         })?;
 64 |         Ok(Self {
 65 |             storage_server_endpoint,
 66 |             _catalog_server_endpoint: catalog_server_endpoint,
 67 |         })
 68 |     }
 69 | 
 70 |     /// Returns the physical location of the requested data in RequestParams.
 71 |     async fn get_info_from_catalog(&self, request: StorageRequest) -> Result<RequestParams> {
 72 |         let bucket = "parpulse-test".to_string();
 73 |         let table_id = match request.data_request() {
 74 |             DataRequest::Table(id) => *id,
 75 |             _ => {
 76 |                 return Err(anyhow!("Only table request is supported."));
 77 |             }
 78 |         };
 79 |         let keys = vec![TABLE_FILE_MAP.get(&table_id).unwrap().to_string()];
 80 |         Ok(RequestParams::S3((bucket, keys)))
 81 |     }
 82 | 
 83 |     async fn get_data_from_response(response: Response) -> Result<Receiver<RecordBatch>> {
 84 |         if response.status().is_success() {
 85 |             // Store the streamed Parquet file in a temporary file.
 86 |             // FIXME: 1. Do we really need streaming here?
 87 |             //       2. Do we need to store the file in a temporary file?
 88 |             let temp_dir = tempdir()?;
 89 |             let file_path = temp_dir.path().join("tmp.parquet");
 90 |             let mut file = File::create(&file_path)?;
 91 |             let mut stream = response.bytes_stream();
 92 |             while let Some(chunk) = stream.next().await {
 93 |                 let chunk = chunk?;
 94 |                 file.write_all(&chunk)?;
 95 |             }
 96 | 
 97 |             // Convert the Parquet file to a record batch.
 98 |             let file = File::open(file_path)?;
 99 |             let builder =
100 |                 ParquetRecordBatchReaderBuilder::try_new(file)?.with_batch_size(BATCH_SIZE);
101 |             let mask = ProjectionMask::all();
102 |             let mut reader = builder.with_projection(mask).build()?;
103 | 
104 |             let (tx, rx) = channel(CHANNEL_CAPACITY);
105 | 
106 |             // Return the record batch as a stream.
107 |             tokio::spawn(async move {
108 |                 while let Some(core::result::Result::Ok(rb)) = reader.next() {
109 |                     tx.send(rb).await.unwrap();
110 |                 }
111 |             });
112 |             Ok(rx)
113 |         } else {
114 |             Err(anyhow::anyhow!(
115 |                 "Failed to download file. Response: {:?}, Body: {}",
116 |                 response.status(),
117 |                 response
118 |                     .text()
119 |                     .await
120 |                     .unwrap_or_else(|_| String::from("Failed to read response body"))
121 |             ))
122 |         }
123 |     }
124 | 
125 |     async fn get_info_from_catalog_test(&self, request: StorageRequest) -> Result<RequestParams> {
126 |         let bucket = "tests-parquet".to_string();
127 |         let table_id = match request.data_request() {
128 |             DataRequest::Table(id) => id,
129 |             _ => {
130 |                 return Err(anyhow!("Only table request is supported."));
131 |             }
132 |         };
133 |         let keys = vec![TABLE_FILE_MAP.get(table_id).unwrap().to_string()];
134 |         Ok(RequestParams::MockS3((bucket, keys)))
135 |     }
136 | 
137 |     fn get_request_url_and_params(
138 |         &self,
139 |         location: (String, Vec<String>),
140 |     ) -> Result<(String, Vec<(&str, String)>)> {
141 |         let scheme = self
142 |             .storage_server_endpoint
143 |             .scheme()
144 |             .ok_or_else(|| anyhow!("Failed to get the scheme of the storage server endpoint."))?
145 |             .to_owned();
146 |         let authority = self
147 |             .storage_server_endpoint
148 |             .authority()
149 |             .ok_or_else(|| anyhow!("Failed to get the authority of the storage server endpoint."))?
150 |             .to_owned();
151 |         let path = "/file";
152 |         let url = Uri::builder()
153 |             .scheme(scheme)
154 |             .authority(authority)
155 |             .path_and_query(path)
156 |             .build()
157 |             .unwrap();
158 |         let params = vec![
159 |             (PARAM_BUCKET_KEY, location.0),
160 |             (PARAM_KEYS_KEY, location.1.join(",")),
161 |         ];
162 |         Ok((url.to_string(), params))
163 |     }
164 | 
165 |     pub async fn request_data_test(
166 |         &self,
167 |         request: StorageRequest,
168 |     ) -> Result<Receiver<RecordBatch>> {
169 |         // First we need to get the location of the parquet file from the catalog server.
170 |         let location = match self.get_info_from_catalog_test(request).await? {
171 |             RequestParams::MockS3(location) => location,
172 |             _ => {
173 |                 return Err(anyhow!(
174 |                     "Failed to get location of the file from the catalog server."
175 |                 ));
176 |             }
177 |         };
178 | 
179 |         // Then we need to send the request to the storage server.
180 |         let client = Client::new();
181 |         let (url, mut params) = self.get_request_url_and_params(location)?;
182 |         params.push(("is_test", "true".to_owned()));
183 | 
184 |         let url = Url::parse_with_params(&url, params)?;
185 |         let response = client.get(url).send().await?;
186 | 
187 |         Self::get_data_from_response(response).await
188 |     }
189 | }
190 | 
191 | #[async_trait::async_trait]
192 | impl StorageClient for StorageClientImpl {
193 |     async fn request_data(&self, request: StorageRequest) -> Result<Receiver<RecordBatch>> {
194 |         // First we need to get the location of the parquet file from the catalog server.
195 |         let location = match self.get_info_from_catalog(request).await? {
196 |             RequestParams::S3(location) => location,
197 |             _ => {
198 |                 return Err(anyhow!(
199 |                     "Failed to get location of the file from the catalog server."
200 |                 ));
201 |             }
202 |         };
203 | 
204 |         // Then we need to send the request to the storage server.
205 |         let client = Client::new();
206 |         let (url, params) = self.get_request_url_and_params(location)?;
207 |         let url = Url::parse_with_params(&url, params)?;
208 |         let response = client.get(url).send().await?;
209 |         Self::get_data_from_response(response).await
210 |     }
211 | 
212 |     // TODO (kunle): I don't think this function is necessary.
213 |     async fn request_data_sync(&self, _request: StorageRequest) -> Result<Vec<RecordBatch>> {
214 |         todo!()
215 |     }
216 | }
217 | 
218 | #[cfg(test)]
219 | mod tests {
220 |     use super::*;
221 |     use arrow::array::StringArray;
222 |     use mockito::Server;
223 | 
224 |     /// WARNING: Put userdata1.parquet in the storage-node/tests/parquet directory before running this test.
225 |     #[tokio::test]
226 |     async fn test_storage_client_disk() {
227 |         // Create a mock server to serve the parquet file.
228 |         let mut server = Server::new_async().await;
229 |         println!("server host: {}", server.host_with_port());
230 |         server
231 |             .mock(
232 |                 "GET",
233 |                 "/file?bucket=tests-parquet&keys=userdata1.parquet&is_test=true",
234 |             )
235 |             .with_body_from_file("../storage-node/tests/parquet/userdata1.parquet")
236 |             .create_async()
237 |             .await;
238 | 
239 |         let server_endpoint = server.url() + "/";
240 |         let storage_client = StorageClientImpl::new(&server_endpoint, "localhost:3031")
241 |             .expect("Failed to create storage client.");
242 |         // 0 is the table id for userdata1.parquet on local disk.
243 |         let request = StorageRequest::new(0, DataRequest::Table(0));
244 |         let mut receiver = storage_client
245 |             .request_data_test(request)
246 |             .await
247 |             .expect("Failed to get data from the server.");
248 |         let mut record_batches = vec![];
249 |         while let Some(record_batch) = receiver.recv().await {
250 |             record_batches.push(record_batch);
251 |         }
252 |         assert!(!record_batches.is_empty());
253 | 
254 |         let first_batch = &record_batches[0];
255 |         assert_eq!(first_batch.num_columns(), 13);
256 | 
257 |         let real_first_names = StringArray::from(vec!["Amanda", "Albert", "Evelyn"]);
258 |         let read_last_names = StringArray::from(vec!["Jordan", "Freeman", "Morgan"]);
259 |         let first_names = first_batch
260 |             .column(2)
261 |             .as_any()
262 |             .downcast_ref::<StringArray>()
263 |             .unwrap();
264 |         let last_names = first_batch
265 |             .column(3)
266 |             .as_any()
267 |             .downcast_ref::<StringArray>()
268 |             .unwrap();
269 |         // Check the first three entries in the first and last name columns.
270 |         for i in 0..3 {
271 |             assert_eq!(first_names.value(i), real_first_names.value(i));
272 |             assert_eq!(last_names.value(i), read_last_names.value(i));
273 |         }
274 |     }
275 | }
276 | 


--------------------------------------------------------------------------------
/storage-client/src/lib.rs:
--------------------------------------------------------------------------------
 1 | pub mod client;
 2 | 
 3 | use enum_as_inner::EnumAsInner;
 4 | use serde::Deserialize;
 5 | 
 6 | #[derive(Clone, EnumAsInner, Debug)]
 7 | pub enum RequestParams {
 8 |     /// S3 bucket and keys.
 9 |     S3((String, Vec<String>)),
10 |     /// Mock S3 bucket and keys.
11 |     /// This is used for testing purposes.
12 |     MockS3((String, Vec<String>)),
13 | }
14 | 
15 | #[derive(Deserialize)]
16 | pub struct S3Request {
17 |     pub bucket: String,
18 |     /// Cannot deserialize a vector of strings, might need to customize a deserializer later.
19 |     pub keys: String,
20 |     #[serde(default)]
21 |     pub is_test: bool,
22 | }
23 | 


--------------------------------------------------------------------------------
/storage-node/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "storage-node"
 3 | version = "0.1.0"
 4 | edition = "2021"
 5 | 
 6 | [dependencies]
 7 | bytes = "1"
 8 | hyper = "1"
 9 | tokio = { version = "1", features = ["rt", "rt-multi-thread", "macros"] }
10 | hashlink = "0.8"
11 | enum-as-inner = "0.6"
12 | futures = { version = "0.3", features = ["alloc"] }
13 | thiserror = "1"
14 | aws-sdk-s3 = "1"
15 | aws-config = "1"
16 | aws-smithy-runtime-api = "1"
17 | async-trait = "0.1"
18 | parpulse-client = { path = "../storage-client" }
19 | warp = "0.3"
20 | tokio-util = "0.7"
21 | reqwest = "0.12"
22 | tempfile = "3.10.1"
23 | rand = "0.8"
24 | tokio-stream = "0.1"
25 | rusqlite = { version = "0.31", features = ["blob"] }
26 | log = "0.4"
27 | env_logger = "0.11"
28 | crc32fast = "1.4.0"
29 | clap = { version = "4.5", features = ["derive"] }
30 | serde = { version = "1", features = ["derive"] }
31 | 
32 | [dev-dependencies]
33 | serial_test = "3.1"
34 | 


--------------------------------------------------------------------------------
/storage-node/src/bin/storage_node.rs:
--------------------------------------------------------------------------------
 1 | use clap::Parser;
 2 | use log::info;
 3 | use storage_node::{common::config::ParpulseConfig, server::storage_node_serve};
 4 | 
 5 | #[tokio::main]
 6 | async fn main() {
 7 |     // Init log.
 8 |     if let Err(e) = env_logger::builder()
 9 |         .filter_level(log::LevelFilter::Info)
10 |         .try_init()
11 |     {
12 |         println!("Failed to init logger: {:?}", e);
13 |     }
14 |     info!("starting storage node server...");
15 |     let config = ParpulseConfig::parse();
16 |     storage_node_serve("0.0.0.0", 3030, config).await.unwrap();
17 | }
18 | 


--------------------------------------------------------------------------------
/storage-node/src/cache/data_store_cache/memdisk/data_store/disk.rs:
--------------------------------------------------------------------------------
  1 | use std::{fs, sync::Arc};
  2 | 
  3 | use bytes::Bytes;
  4 | use futures::StreamExt;
  5 | use log::info;
  6 | use tokio::sync::{mpsc::Receiver, Mutex};
  7 | 
  8 | use crate::{
  9 |     cache::{
 10 |         data_store_cache::memdisk::{MemDiskStoreReplacerKey, MemDiskStoreReplacerValue},
 11 |         replacer::DataStoreReplacer,
 12 |     },
 13 |     disk::disk_manager::DiskManager,
 14 |     error::ParpulseResult,
 15 |     storage_reader::StorageReaderStream,
 16 | };
 17 | 
 18 | const DEFAULT_DISK_CHANNEL_BUFFER_SIZE: usize = 512;
 19 | 
 20 | /// [`DiskStore`] stores the contents of remote objects on the local disk.
 21 | pub struct DiskStore {
 22 |     disk_manager: DiskManager,
 23 |     /// The path to the directory where the data is stored on the disk.
 24 |     base_path: String,
 25 |     max_disk_reader_buffer_size: usize,
 26 | }
 27 | 
 28 | impl Drop for DiskStore {
 29 |     fn drop(&mut self) {
 30 |         if fs::metadata(&self.base_path).is_ok() {
 31 |             fs::remove_dir_all(self.base_path.clone()).expect("remove cache files failed");
 32 |             info!("cache files removed: {}", self.base_path);
 33 |         }
 34 |     }
 35 | }
 36 | 
 37 | impl DiskStore {
 38 |     pub fn new(
 39 |         disk_manager: DiskManager,
 40 |         base_path: String,
 41 |         max_disk_reader_buffer_size: usize,
 42 |     ) -> Self {
 43 |         let mut final_base_path = base_path;
 44 |         if !final_base_path.ends_with('/') {
 45 |             final_base_path += "/";
 46 |         }
 47 |         Self {
 48 |             disk_manager,
 49 |             base_path: final_base_path,
 50 |             max_disk_reader_buffer_size,
 51 |         }
 52 |     }
 53 | }
 54 | 
 55 | impl DiskStore {
 56 |     /// Reads data from the disk store. The method returns a stream of data read from the disk
 57 |     /// store.
 58 |     pub async fn read_data<R>(
 59 |         &self,
 60 |         key: &str,
 61 |         disk_replacer: Arc<Mutex<R>>,
 62 |         key_replacer: String,
 63 |     ) -> ParpulseResult<Option<Receiver<ParpulseResult<Bytes>>>>
 64 |     where
 65 |         R: DataStoreReplacer<MemDiskStoreReplacerKey, MemDiskStoreReplacerValue> + 'static,
 66 |     {
 67 |         // TODO(lanlou): we later may consider the remaining space to decide the buffer size
 68 |         let mut buffer_size = self.disk_manager.file_size(key).await? as usize;
 69 |         if buffer_size > self.max_disk_reader_buffer_size {
 70 |             buffer_size = self.max_disk_reader_buffer_size;
 71 |         }
 72 |         // FIXME: Shall we consider the situation where the data is not found?
 73 |         let mut disk_stream = self.disk_manager.disk_read_stream(key, buffer_size).await?;
 74 |         let (tx, rx) = tokio::sync::mpsc::channel(DEFAULT_DISK_CHANNEL_BUFFER_SIZE);
 75 |         tokio::spawn(async move {
 76 |             loop {
 77 |                 match disk_stream.next().await {
 78 |                     Some(Ok(bytes_read)) => {
 79 |                         tx.send(Ok(Bytes::from(disk_stream.buffer()[..bytes_read].to_vec())))
 80 |                             .await
 81 |                             .unwrap();
 82 |                     }
 83 |                     Some(Err(e)) => tx.send(Err(e)).await.unwrap(),
 84 |                     None => {
 85 |                         // TODO(lanlou): when second read, so there is no need to unpin, how to improve?
 86 |                         disk_replacer.lock().await.unpin(&key_replacer);
 87 |                         break;
 88 |                     }
 89 |                 }
 90 |             }
 91 |         });
 92 |         Ok(Some(rx))
 93 |     }
 94 | 
 95 |     /// Writes data to the disk store. The method accepts a stream of data to write to the disk
 96 |     /// store.
 97 |     /// TODO: We may need to push the response writer down to the disk store as well.
 98 |     pub async fn write_data(
 99 |         &self,
100 |         key: String,
101 |         bytes_vec: Option<Vec<Bytes>>,
102 |         stream: Option<StorageReaderStream>,
103 |     ) -> ParpulseResult<usize> {
104 |         // NOTE(Yuanxin): Shall we spawn a task to write the data to disk?
105 |         let bytes_written = self
106 |             .disk_manager
107 |             .write_bytes_and_stream_to_disk(bytes_vec, stream, &key)
108 |             .await?;
109 |         Ok(bytes_written)
110 |     }
111 | 
112 |     /// Cleans the data from the disk store.
113 |     pub async fn clean_data(&self, key: &str) -> ParpulseResult<()> {
114 |         self.disk_manager.remove_file(key).await
115 |     }
116 | 
117 |     /// Returns the key for the disk store. The key should be cached in the disk store cache.
118 |     pub fn data_store_key(&self, remote_location: &str) -> String {
119 |         format!("{}{}", self.base_path, remote_location)
120 |     }
121 | }
122 | 


--------------------------------------------------------------------------------
/storage-node/src/cache/data_store_cache/memdisk/data_store/memory.rs:
--------------------------------------------------------------------------------
  1 | use std::{collections::HashMap, sync::Arc};
  2 | 
  3 | use bytes::Bytes;
  4 | use tokio::sync::{mpsc::Receiver, Mutex};
  5 | 
  6 | use crate::{
  7 |     cache::{
  8 |         data_store_cache::memdisk::{MemDiskStoreReplacerKey, MemDiskStoreReplacerValue},
  9 |         replacer::DataStoreReplacer,
 10 |     },
 11 |     error::ParpulseResult,
 12 | };
 13 | 
 14 | const DEFAULT_MEM_CHANNEL_BUFFER_SIZE: usize = 1024;
 15 | 
 16 | pub struct MemStore {
 17 |     /// data: remote_location -> (data, size)
 18 |     data: HashMap<String, (Vec<Bytes>, usize)>,
 19 |     max_file_size: usize,
 20 | }
 21 | 
 22 | impl MemStore {
 23 |     pub fn new(max_file_size: usize) -> Self {
 24 |         Self {
 25 |             data: HashMap::new(),
 26 |             max_file_size,
 27 |         }
 28 |     }
 29 | 
 30 |     pub fn read_data<R>(
 31 |         &self,
 32 |         key: &str,
 33 |         mem_replacer: Arc<Mutex<R>>,
 34 |     ) -> ParpulseResult<Option<Receiver<ParpulseResult<Bytes>>>>
 35 |     where
 36 |         R: DataStoreReplacer<MemDiskStoreReplacerKey, MemDiskStoreReplacerValue> + 'static,
 37 |     {
 38 |         let key_value = self.data.get(key);
 39 |         if key_value.is_none() {
 40 |             return Ok(None);
 41 |         }
 42 |         let data_vec = key_value.unwrap().0.clone();
 43 |         let (tx, rx) = tokio::sync::mpsc::channel(DEFAULT_MEM_CHANNEL_BUFFER_SIZE);
 44 |         let key_str = key.to_string().clone();
 45 |         tokio::spawn(async move {
 46 |             for data in data_vec.iter() {
 47 |                 tx.send(Ok(data.clone())).await.unwrap();
 48 |             }
 49 |             // TODO(lanlou): when second read, so there is no need to unpin, how to improve?
 50 |             mem_replacer.lock().await.unpin(&key_str);
 51 |         });
 52 |         Ok(Some(rx))
 53 |     }
 54 | 
 55 |     /// Writes data to the memory store, also tracks the size. If the size for one key is too large,
 56 |     /// we will delete the data from the memory store and return all the data to the caller.
 57 |     /// If return value is None, it means successful write. Otherwise, it means unsuccessful write.
 58 |     /// TODO(lanlou): the key type should be &str maybe?
 59 |     pub fn write_data(&mut self, key: String, bytes: Bytes) -> Option<(Vec<Bytes>, usize)> {
 60 |         let (bytes_vec, size) = self.data.entry(key.clone()).or_insert((Vec::new(), 0));
 61 |         *size += bytes.len();
 62 |         bytes_vec.push(bytes);
 63 |         if *size > self.max_file_size {
 64 |             let size_copy = *size;
 65 |             let bytes_vec_copy = bytes_vec.clone();
 66 |             self.data.remove(&key);
 67 |             Some((bytes_vec_copy, size_copy))
 68 |         } else {
 69 |             None
 70 |         }
 71 |     }
 72 | 
 73 |     pub fn clean_data(&mut self, key: &str) -> Option<(Vec<Bytes>, usize)> {
 74 |         self.data.remove(key)
 75 |     }
 76 | }
 77 | 
 78 | #[cfg(test)]
 79 | mod tests {
 80 |     use crate::cache::replacer::lru::LruReplacer;
 81 | 
 82 |     use super::*;
 83 | 
 84 |     #[test]
 85 |     fn test_large_write() {
 86 |         // max_file_size is 10 bytes per file.
 87 |         let max_file_size = 10;
 88 |         let mut mem_store = MemStore::new(max_file_size);
 89 |         let key = "large_write_key".to_string();
 90 | 
 91 |         let bytes1 = Bytes::from(vec![1, 2, 3, 4]);
 92 |         let bytes2 = Bytes::from(vec![5, 6, 7, 8]);
 93 |         let bytes3 = Bytes::from(vec![9, 10, 11, 12]);
 94 | 
 95 |         let bytes1_cp = bytes1.clone();
 96 |         let bytes2_cp = bytes2.clone();
 97 |         let bytes3_cp = bytes3.clone();
 98 | 
 99 |         let res1 = mem_store.write_data(key.clone(), bytes1);
100 |         assert!(res1.is_none());
101 |         let res2 = mem_store.write_data(key.clone(), bytes2);
102 |         assert!(res2.is_none());
103 |         let res3 = mem_store.write_data(key.clone(), bytes3);
104 |         assert!(res3.is_some());
105 |         assert_eq!(res3.as_ref().unwrap().0.len(), 3);
106 |         assert_eq!(res3.as_ref().unwrap().1, 12);
107 |         assert_eq!(res3.as_ref().unwrap().0[0], bytes1_cp);
108 |         assert_eq!(res3.as_ref().unwrap().0[1], bytes2_cp);
109 |         assert_eq!(res3.as_ref().unwrap().0[2], bytes3_cp);
110 | 
111 |         let dummy_replacer = Arc::new(Mutex::new(LruReplacer::new(0)));
112 |         let read_res = mem_store.read_data(key.as_str(), dummy_replacer);
113 |         assert!(read_res.is_ok());
114 |         assert!(read_res.unwrap().is_none());
115 |     }
116 | 
117 |     #[tokio::test]
118 |     async fn test_write_read() {
119 |         let max_file_size = 10;
120 |         let mut mem_store = MemStore::new(max_file_size);
121 |         let key = "write_read_key".to_string();
122 |         let bytes = Bytes::from(vec![1, 2, 3, 4]);
123 |         let bytes_cp = bytes.clone();
124 |         let res = mem_store.write_data(key.clone(), bytes);
125 |         assert!(res.is_none());
126 |         let dummy_replacer = Arc::new(Mutex::new(LruReplacer::new(0)));
127 |         let read_res = mem_store.read_data(key.as_str(), dummy_replacer);
128 |         assert!(read_res.is_ok());
129 |         let mut rx = read_res.unwrap().unwrap();
130 |         let mut bytes_vec = Vec::new();
131 |         let mut data_size: usize = 0;
132 |         while let Some(data) = rx.recv().await {
133 |             let data = data.unwrap();
134 |             data_size += data.len();
135 |             bytes_vec.push(data);
136 |         }
137 |         assert_eq!(bytes_vec.len(), 1);
138 |         assert_eq!(bytes_vec[0], bytes_cp);
139 |         assert_eq!(data_size, bytes_cp.len());
140 |     }
141 | }
142 | 


--------------------------------------------------------------------------------
/storage-node/src/cache/data_store_cache/memdisk/data_store/mod.rs:
--------------------------------------------------------------------------------
1 | pub mod disk;
2 | pub mod memory;
3 | 


--------------------------------------------------------------------------------
/storage-node/src/cache/data_store_cache/mod.rs:
--------------------------------------------------------------------------------
 1 | use async_trait::async_trait;
 2 | use bytes::Bytes;
 3 | use parpulse_client::RequestParams;
 4 | use tokio::sync::mpsc::Receiver;
 5 | 
 6 | use crate::error::ParpulseResult;
 7 | 
 8 | pub mod memdisk;
 9 | pub mod sqlite;
10 | 
11 | #[async_trait]
12 | pub trait DataStoreCache {
13 |     async fn get_data_from_cache(
14 |         &self,
15 |         request_param: &RequestParams,
16 |     ) -> ParpulseResult<Option<Receiver<ParpulseResult<Bytes>>>>;
17 | 
18 |     /// Put data to cache. Accepts a stream of bytes and returns the number of bytes written.
19 |     /// The data_size parameter is optional and can be used to hint the cache about the size of the data.
20 |     /// If the data_size is not provided, the cache implementation should try to determine the size of
21 |     /// the data.
22 |     async fn put_data_to_cache(&self, request_param: &RequestParams) -> ParpulseResult<usize>;
23 | }
24 | 
25 | pub fn cache_key_from_request(request_param: &RequestParams) -> String {
26 |     match request_param {
27 |         RequestParams::S3((bucket, keys)) => {
28 |             format!("{}-{}", bucket, keys.join(","))
29 |         }
30 |         RequestParams::MockS3((bucket, keys)) => {
31 |             format!("{}-{}", bucket, keys.join(","))
32 |         }
33 |     }
34 | }
35 | 


--------------------------------------------------------------------------------
/storage-node/src/cache/data_store_cache/sqlite/blob.rs:
--------------------------------------------------------------------------------
  1 | use std::io::Read;
  2 | 
  3 | use bytes::BytesMut;
  4 | use rusqlite::{blob::Blob, Connection, DatabaseName};
  5 | 
  6 | use crate::error::ParpulseResult;
  7 | 
  8 | use super::{SQLITE_CACHE_COLUMN_NAME, SQLITE_CACHE_TABLE_NAME};
  9 | 
 10 | pub type SqliteBlobKey = i64;
 11 | 
 12 | pub struct SqliteBlob<'a> {
 13 |     blob: Blob<'a>,
 14 | }
 15 | 
 16 | impl<'a> SqliteBlob<'a> {
 17 |     pub fn new(blob: Blob<'a>) -> Self {
 18 |         Self { blob }
 19 |     }
 20 | 
 21 |     pub fn read(&mut self, buffer: &mut [u8]) -> ParpulseResult<usize> {
 22 |         self.blob.read(buffer).map_err(Into::into)
 23 |     }
 24 | 
 25 |     pub fn write_at(&mut self, data: &[u8], offset: usize) -> ParpulseResult<()> {
 26 |         self.blob.write_at(data, offset).map_err(Into::into)
 27 |     }
 28 | }
 29 | 
 30 | unsafe impl<'a> Send for SqliteBlob<'a> {}
 31 | 
 32 | pub struct SqliteBlobReader<'a> {
 33 |     blob: SqliteBlob<'a>,
 34 |     buffer: BytesMut,
 35 | }
 36 | 
 37 | impl<'a> SqliteBlobReader<'a> {
 38 |     pub fn new(
 39 |         db: &'a Connection,
 40 |         blob_key: SqliteBlobKey,
 41 |         buffer_size: usize,
 42 |     ) -> ParpulseResult<Self> {
 43 |         let blob = db.blob_open(
 44 |             DatabaseName::Main,
 45 |             SQLITE_CACHE_TABLE_NAME,
 46 |             SQLITE_CACHE_COLUMN_NAME,
 47 |             blob_key,
 48 |             true,
 49 |         )?;
 50 |         Ok(Self {
 51 |             blob: SqliteBlob::new(blob),
 52 |             buffer: BytesMut::zeroed(buffer_size),
 53 |         })
 54 |     }
 55 | 
 56 |     pub fn buffer(&self) -> &[u8] {
 57 |         &self.buffer
 58 |     }
 59 | }
 60 | 
 61 | impl Iterator for SqliteBlobReader<'_> {
 62 |     type Item = ParpulseResult<usize>;
 63 | 
 64 |     fn next(&mut self) -> Option<Self::Item> {
 65 |         match self.blob.read(self.buffer.as_mut()) {
 66 |             Ok(bytes_read) => {
 67 |                 if bytes_read > 0 {
 68 |                     Some(Ok(bytes_read))
 69 |                 } else {
 70 |                     None
 71 |                 }
 72 |             }
 73 |             Err(e) => Some(Err(e)),
 74 |         }
 75 |     }
 76 | }
 77 | 
 78 | #[cfg(test)]
 79 | mod tests {
 80 |     use super::*;
 81 | 
 82 |     use bytes::Bytes;
 83 |     use tempfile::tempdir;
 84 | 
 85 |     #[test]
 86 |     fn test_sqlite_blob_reader() {
 87 |         let poem = Bytes::from_static(
 88 |             b"What can I hold you with?
 89 | I offer you lean streets, desperate sunsets, the
 90 | moon of the jagged suburbs.
 91 | I offer you the bitterness of a man who has looked
 92 | long and long at the lonely moon.
 93 | I offer you my ancestors, my dead men, the ghosts
 94 | that living men have honoured in bronze.
 95 | I offer you whatever insight my books may hold,
 96 | whatever manliness or humour my life.
 97 | I offer you the loyalty of a man who has never
 98 | been loyal.
 99 | I offer you that kernel of myself that I have saved,
100 | somehow-the central heart that deals not
101 | in words, traffics not with dreams, and is
102 | untouched by time, by joy, by adversities.
103 | I offer you the memory of a yellow rose seen at
104 | sunset, years before you were born.
105 | I offer you explanations of yourself, theories about
106 | yourself, authentic and surprising news of
107 | yourself.
108 | I can give you my loneliness, my darkness, the
109 | hunger of my heart; I am trying to bribe you
110 | with uncertainty, with danger, with defeat.
111 | ",
112 |         );
113 |         let temp_dir = tempdir().unwrap();
114 |         let db_path = temp_dir.path().join("tmp.db");
115 | 
116 |         let db = Connection::open(&db_path).unwrap();
117 |         db.execute(
118 |             &format!(
119 |                 "CREATE TABLE {} ({})",
120 |                 SQLITE_CACHE_TABLE_NAME, SQLITE_CACHE_COLUMN_NAME
121 |             ),
122 |             [],
123 |         )
124 |         .unwrap();
125 |         db.execute(
126 |             &format!(
127 |                 "INSERT INTO {} ({}) VALUES (ZEROBLOB({}))",
128 |                 SQLITE_CACHE_TABLE_NAME,
129 |                 SQLITE_CACHE_COLUMN_NAME,
130 |                 poem.len()
131 |             ),
132 |             [],
133 |         )
134 |         .unwrap();
135 |         let blob_key = db.last_insert_rowid();
136 | 
137 |         {
138 |             let mut writer = db
139 |                 .blob_open(
140 |                     DatabaseName::Main,
141 |                     SQLITE_CACHE_TABLE_NAME,
142 |                     SQLITE_CACHE_COLUMN_NAME,
143 |                     blob_key,
144 |                     false,
145 |                 )
146 |                 .unwrap();
147 |             writer.write_at(&poem, 0).unwrap();
148 |         }
149 |         // FLush the result so that the blob is visible to another connnection.
150 |         db.cache_flush().unwrap();
151 | 
152 |         let db2 = Connection::open(&db_path).unwrap();
153 |         let buffer_size = 100;
154 |         let mut reader = SqliteBlobReader::new(&db2, blob_key, buffer_size).unwrap();
155 | 
156 |         let mut total_bytes_read = 0;
157 |         let mut read_count = 0;
158 |         let mut result = String::new();
159 |         while let Some(bytes_read) = reader.next() {
160 |             let bytes_read = bytes_read.unwrap();
161 |             println!("bytes_read: {}", bytes_read);
162 |             println!("buffer: {:?}", reader.buffer()[..bytes_read].to_vec());
163 |             result += &String::from_utf8(reader.buffer()[..bytes_read].to_vec()).unwrap();
164 |             total_bytes_read += bytes_read;
165 |             read_count += 1;
166 |         }
167 | 
168 |         assert_eq!(result, poem);
169 |         assert_eq!(total_bytes_read, 930);
170 |         assert_eq!(read_count, 10);
171 |     }
172 | }
173 | 


--------------------------------------------------------------------------------
/storage-node/src/cache/data_store_cache/sqlite/mod.rs:
--------------------------------------------------------------------------------
  1 | pub mod blob;
  2 | 
  3 | use std::fs;
  4 | 
  5 | use async_trait::async_trait;
  6 | use bytes::Bytes;
  7 | use futures::StreamExt;
  8 | use log::warn;
  9 | use parpulse_client::RequestParams;
 10 | use rusqlite::{Connection, DatabaseName, OpenFlags};
 11 | use tokio::sync::{
 12 |     mpsc::{channel, Receiver},
 13 |     Mutex,
 14 | };
 15 | 
 16 | use crate::{
 17 |     cache::replacer::{DataStoreReplacer, ReplacerValue},
 18 |     error::ParpulseResult,
 19 |     storage_reader::{s3::S3Reader, s3_diskmock::MockS3Reader, AsyncStorageReader},
 20 | };
 21 | 
 22 | use self::blob::{SqliteBlob, SqliteBlobReader};
 23 | 
 24 | use super::{cache_key_from_request, DataStoreCache};
 25 | 
 26 | const SQLITE_CACHE_TABLE_NAME: &str = "parpulse_cache";
 27 | const SQLITE_CACHE_COLUMN_NAME: &str = "content";
 28 | const SQLITE_MAX_BLOB_SIZE: usize = 512 * 1024 * 1024; // 512 MB
 29 | const SQLITE_BLOB_CHANNEL_CAPACITY: usize = 5;
 30 | 
 31 | pub type SqliteStoreReplacerKey = String;
 32 | pub struct SqliteStoreReplacerValue {
 33 |     pub(crate) row_id: i64,
 34 |     pub(crate) size: usize,
 35 | }
 36 | 
 37 | impl SqliteStoreReplacerValue {
 38 |     pub fn new(row_id: i64, size: usize) -> Self {
 39 |         Self { row_id, size }
 40 |     }
 41 | }
 42 | 
 43 | impl ReplacerValue for SqliteStoreReplacerValue {
 44 |     type Value = i64;
 45 | 
 46 |     fn into_value(self) -> Self::Value {
 47 |         self.row_id
 48 |     }
 49 | 
 50 |     fn as_value(&self) -> &Self::Value {
 51 |         &self.row_id
 52 |     }
 53 | 
 54 |     fn size(&self) -> usize {
 55 |         self.size
 56 |     }
 57 | }
 58 | 
 59 | pub struct SqliteStoreCache<R: DataStoreReplacer<SqliteStoreReplacerKey, SqliteStoreReplacerValue>>
 60 | {
 61 |     replacer: Mutex<R>,
 62 |     sqlite_base_path: String,
 63 |     reader_buffer_size: usize,
 64 | }
 65 | 
 66 | impl<R: DataStoreReplacer<SqliteStoreReplacerKey, SqliteStoreReplacerValue>> SqliteStoreCache<R> {
 67 |     pub fn new(
 68 |         replacer: R,
 69 |         sqlite_base_path: String,
 70 |         reader_buffer_size: usize,
 71 |     ) -> ParpulseResult<Self> {
 72 |         let db = Connection::open(&sqlite_base_path)?;
 73 |         let create_table_stmt = format!(
 74 |             "CREATE TABLE IF NOT EXISTS {} ({} BLOB);",
 75 |             SQLITE_CACHE_TABLE_NAME, SQLITE_CACHE_COLUMN_NAME
 76 |         );
 77 |         db.execute_batch(&create_table_stmt)?;
 78 | 
 79 |         Ok(Self {
 80 |             replacer: Mutex::new(replacer),
 81 |             sqlite_base_path,
 82 |             reader_buffer_size,
 83 |         })
 84 |     }
 85 | }
 86 | 
 87 | impl<R: DataStoreReplacer<SqliteStoreReplacerKey, SqliteStoreReplacerValue>> Drop
 88 |     for SqliteStoreCache<R>
 89 | {
 90 |     fn drop(&mut self) {
 91 |         if fs::metadata(&self.sqlite_base_path).is_ok() {
 92 |             fs::remove_file(self.sqlite_base_path.clone()).expect("remove sqlite db files failed");
 93 |         } else {
 94 |             warn!("sqlite db file not found: {}", self.sqlite_base_path);
 95 |         }
 96 |     }
 97 | }
 98 | 
 99 | #[async_trait]
100 | impl<R: DataStoreReplacer<SqliteStoreReplacerKey, SqliteStoreReplacerValue>> DataStoreCache
101 |     for SqliteStoreCache<R>
102 | {
103 |     async fn get_data_from_cache(
104 |         &self,
105 |         request: &RequestParams,
106 |     ) -> ParpulseResult<Option<Receiver<ParpulseResult<Bytes>>>> {
107 |         let remote_location = cache_key_from_request(request);
108 |         let mut replacer = self.replacer.lock().await;
109 |         if let Some(replacer_value) = replacer.get(&remote_location) {
110 |             let (tx, rx) = channel(SQLITE_BLOB_CHANNEL_CAPACITY);
111 |             let row_id = *replacer_value.as_value();
112 |             let sqlite_base_path = self.sqlite_base_path.clone();
113 |             let buffer_size = self.reader_buffer_size;
114 | 
115 |             tokio::spawn(async move {
116 |                 let db =
117 |                     Connection::open_with_flags(sqlite_base_path, OpenFlags::SQLITE_OPEN_READ_ONLY)
118 |                         .unwrap();
119 |                 let mut blob_reader = SqliteBlobReader::new(&db, row_id, buffer_size).unwrap();
120 |                 while let Some(result) = blob_reader.next() {
121 |                     match result {
122 |                         Ok(bytes_read) => {
123 |                             let buffer = blob_reader.buffer();
124 |                             let bytes = Bytes::copy_from_slice(&buffer[..bytes_read]);
125 |                             tx.send(Ok(bytes)).await.unwrap()
126 |                         }
127 |                         Err(err) => tx.send(Err(err)).await.unwrap(),
128 |                     }
129 |                 }
130 |             });
131 |             Ok(Some(rx))
132 |         } else {
133 |             Ok(None)
134 |         }
135 |     }
136 | 
137 |     async fn put_data_to_cache(&self, request: &RequestParams) -> ParpulseResult<usize> {
138 |         let remote_location = cache_key_from_request(request);
139 |         let (mut data_stream, blob_size) = {
140 |             match request {
141 |                 RequestParams::S3((bucket, keys)) => {
142 |                     let reader = S3Reader::new(bucket.clone(), keys.clone().to_vec()).await;
143 |                     let data_size = reader.get_object_size().await;
144 |                     (reader.into_stream().await?, data_size)
145 |                 }
146 |                 RequestParams::MockS3((bucket, keys)) => {
147 |                     let reader = MockS3Reader::new(bucket.clone(), keys.clone().to_vec()).await;
148 |                     let data_size = reader.get_object_size().await;
149 |                     (reader.into_stream().await?, data_size)
150 |                 }
151 |             }
152 |         };
153 |         let blob_size = blob_size.unwrap_or(SQLITE_MAX_BLOB_SIZE);
154 |         let mut replacer = self.replacer.lock().await;
155 |         let sqlite_base_path = self.sqlite_base_path.clone();
156 |         let db = Connection::open(sqlite_base_path)?;
157 |         let insert_blob_stmt = format!(
158 |             "INSERT INTO {} ({}) VALUES (ZEROBLOB({}))",
159 |             SQLITE_CACHE_TABLE_NAME, SQLITE_CACHE_COLUMN_NAME, blob_size
160 |         );
161 |         db.execute(&insert_blob_stmt, [])?;
162 |         let blob_key = db.last_insert_rowid();
163 |         let mut blob = SqliteBlob::new(db.blob_open(
164 |             DatabaseName::Main,
165 |             SQLITE_CACHE_TABLE_NAME,
166 |             SQLITE_CACHE_COLUMN_NAME,
167 |             blob_key,
168 |             false,
169 |         )?);
170 | 
171 |         let mut size = 0;
172 |         while let Some(data) = data_stream.next().await {
173 |             let data = data?;
174 |             blob.write_at(&data, size)?;
175 |             size += data.len();
176 |         }
177 |         replacer.put(
178 |             remote_location,
179 |             SqliteStoreReplacerValue::new(blob_key, size),
180 |         );
181 |         Ok(size)
182 |     }
183 | }
184 | 
185 | #[cfg(test)]
186 | mod tests {
187 |     use std::path::Path;
188 | 
189 |     use crate::cache::replacer::lru::LruReplacer;
190 | 
191 |     use super::*;
192 | 
193 |     #[tokio::test]
194 |     async fn test_sqlite_store_cache() {
195 |         let tmp = tempfile::tempdir().unwrap();
196 |         let sqlite_base_path = tmp.path().to_owned().join(Path::new("sqlite_test.db"));
197 |         let replacer = LruReplacer::new(1024);
198 |         let buffer_size = 100;
199 |         let cache = SqliteStoreCache::new(
200 |             replacer,
201 |             sqlite_base_path.to_str().unwrap().to_string(),
202 |             buffer_size,
203 |         )
204 |         .expect("create sqlite store cache failed");
205 | 
206 |         let bucket = "tests-text".to_string();
207 |         let keys = vec!["what-can-i-hold-you-with".to_string()];
208 |         let request = RequestParams::MockS3((bucket, keys));
209 |         let bytes_written = cache
210 |             .put_data_to_cache(&request)
211 |             .await
212 |             .expect("put data to cache failed");
213 |         assert_eq!(bytes_written, 930);
214 | 
215 |         let mut rx = cache
216 |             .get_data_from_cache(&request)
217 |             .await
218 |             .expect("get data from cache failed")
219 |             .expect("data not found in cache");
220 | 
221 |         let mut result = String::new();
222 |         let mut total_bytes_read = 0;
223 |         while let Some(bytes) = rx.recv().await {
224 |             let bytes = bytes.expect("read data from cache failed");
225 |             total_bytes_read += bytes.len();
226 |             result += &String::from_utf8(bytes.to_vec()).expect("convert bytes to string failed");
227 |         }
228 |         assert_eq!(total_bytes_read, 930);
229 |     }
230 | }
231 | 


--------------------------------------------------------------------------------
/storage-node/src/cache/mod.rs:
--------------------------------------------------------------------------------
1 | pub mod data_store_cache;
2 | pub mod replacer;
3 | 


--------------------------------------------------------------------------------
/storage-node/src/cache/replacer/lru.rs:
--------------------------------------------------------------------------------
  1 | use hashlink::linked_hash_map;
  2 | use hashlink::LinkedHashMap;
  3 | use log::{debug, warn};
  4 | 
  5 | use super::DataStoreReplacer;
  6 | use super::ReplacerKey;
  7 | use super::ReplacerValue;
  8 | 
  9 | /// [`LruReplacer`] adopts the least-recently-used algorithm to cache sized
 10 | /// objects. The replacer will start evicting if a new object comes that makes
 11 | /// the replacer's size exceeds its max capacity, from the oldest to the newest.
 12 | pub struct LruReplacer<K: ReplacerKey, V: ReplacerValue> {
 13 |     // usize is pin count
 14 |     cache_map: LinkedHashMap<K, (V, usize)>,
 15 |     max_capacity: usize,
 16 |     size: usize,
 17 | }
 18 | 
 19 | impl<K: ReplacerKey, V: ReplacerValue> LruReplacer<K, V> {
 20 |     pub fn new(max_capacity: usize) -> LruReplacer<K, V> {
 21 |         LruReplacer {
 22 |             cache_map: LinkedHashMap::new(),
 23 |             max_capacity,
 24 |             size: 0,
 25 |         }
 26 |     }
 27 | 
 28 |     fn get_value(&mut self, key: &K) -> Option<&V> {
 29 |         match self.cache_map.raw_entry_mut().from_key(key) {
 30 |             linked_hash_map::RawEntryMut::Occupied(mut entry) => {
 31 |                 entry.to_back();
 32 |                 Some(&entry.into_mut().0)
 33 |             }
 34 |             linked_hash_map::RawEntryMut::Vacant(_) => None,
 35 |         }
 36 |     }
 37 | 
 38 |     fn put_value(&mut self, key: K, value: V) -> Option<Vec<K>> {
 39 |         if value.size() > self.max_capacity {
 40 |             // If the object size is greater than the max capacity, we do not insert the
 41 |             // object into the replacer.
 42 |             warn!("The size of the value is greater than the max capacity",);
 43 |             warn!(
 44 |                 "Key: {:?}, Value: {:?}, Value size: {:?}, Max capacity: {:?}",
 45 |                 key,
 46 |                 value.as_value(),
 47 |                 value.size(),
 48 |                 self.max_capacity
 49 |             );
 50 |             return None;
 51 |         }
 52 |         if let Some(cache_value) = self.cache_map.get(&key) {
 53 |             // If the key already exists, update the replacer size.
 54 |             self.size -= cache_value.0.size();
 55 |         }
 56 |         let mut evicted_keys = Vec::new();
 57 |         let mut iter = self.cache_map.iter();
 58 |         let mut current_size = self.size;
 59 |         while (current_size + value.size()) > self.max_capacity {
 60 |             match iter.next() {
 61 |                 Some((key, (value, pin_count))) => {
 62 |                     if *pin_count > 0 {
 63 |                         // If the key is pinned, we do not evict the key.
 64 |                         continue;
 65 |                     }
 66 |                     evicted_keys.push(key.clone());
 67 |                     current_size -= value.size();
 68 |                 }
 69 |                 None => {
 70 |                     return None;
 71 |                 }
 72 |             }
 73 |         }
 74 | 
 75 |         for key in &evicted_keys {
 76 |             if let Some(cache_value) = self.cache_map.remove(key) {
 77 |                 debug!("-------- Evicting Key: {:?} --------", key);
 78 |                 self.size -= cache_value.0.size();
 79 |             } else {
 80 |                 return None;
 81 |             }
 82 |         }
 83 | 
 84 |         self.size += value.size();
 85 |         self.cache_map.insert(key.clone(), (value, 0));
 86 |         Some(evicted_keys)
 87 |     }
 88 | 
 89 |     fn pin_key(&mut self, key: &K, count: usize) -> bool {
 90 |         match self.cache_map.get_mut(key) {
 91 |             Some((_, pin_count)) => {
 92 |                 *pin_count += count;
 93 |                 true
 94 |             }
 95 |             None => false,
 96 |         }
 97 |     }
 98 | 
 99 |     fn unpin_key(&mut self, key: &K) -> bool {
100 |         match self.cache_map.get_mut(key) {
101 |             Some((_, pin_count)) => {
102 |                 if *pin_count == 0 {
103 |                     return false;
104 |                 }
105 |                 *pin_count -= 1;
106 |                 true
107 |             }
108 |             None => false,
109 |         }
110 |     }
111 | 
112 |     fn peek_value(&self, key: &K) -> Option<&V> {
113 |         match self.cache_map.get(key) {
114 |             Some((value, _)) => Some(value),
115 |             None => None,
116 |         }
117 |     }
118 | }
119 | 
120 | impl<K: ReplacerKey, V: ReplacerValue> DataStoreReplacer<K, V> for LruReplacer<K, V> {
121 |     fn get(&mut self, key: &K) -> Option<&V> {
122 |         self.get_value(key)
123 |     }
124 | 
125 |     fn put(&mut self, key: K, value: V) -> Option<Vec<K>> {
126 |         self.put_value(key, value)
127 |     }
128 | 
129 |     fn pin(&mut self, key: &K, count: usize) -> bool {
130 |         self.pin_key(key, count)
131 |     }
132 | 
133 |     fn unpin(&mut self, key: &K) -> bool {
134 |         self.unpin_key(key)
135 |     }
136 | 
137 |     fn peek(&self, key: &K) -> Option<&V> {
138 |         self.peek_value(key)
139 |     }
140 | 
141 |     fn len(&self) -> usize {
142 |         self.cache_map.len()
143 |     }
144 | 
145 |     fn is_empty(&self) -> bool {
146 |         self.cache_map.is_empty()
147 |     }
148 | 
149 |     fn size(&self) -> usize {
150 |         self.size
151 |     }
152 | 
153 |     fn max_capacity(&self) -> usize {
154 |         self.max_capacity
155 |     }
156 | 
157 |     fn set_max_capacity(&mut self, capacity: usize) {
158 |         self.max_capacity = capacity;
159 |     }
160 | 
161 |     fn clear(&mut self) {
162 |         self.cache_map.clear();
163 |         self.size = 0;
164 |     }
165 | }
166 | 
167 | #[cfg(test)]
168 | mod tests {
169 |     use crate::cache::replacer::{
170 |         tests::{ParpulseTestReplacerKey, ParpulseTestReplacerValue},
171 |         DataStoreReplacer,
172 |     };
173 | 
174 |     use super::LruReplacer;
175 | 
176 |     #[test]
177 |     fn test_new() {
178 |         let replacer = LruReplacer::<ParpulseTestReplacerKey, ParpulseTestReplacerValue>::new(10);
179 |         assert_eq!(replacer.max_capacity(), 10);
180 |         assert_eq!(replacer.size(), 0);
181 |     }
182 | 
183 |     #[test]
184 |     fn test_peek_and_set() {
185 |         let mut replacer =
186 |             LruReplacer::<ParpulseTestReplacerKey, ParpulseTestReplacerValue>::new(10);
187 |         replacer.put("key1".to_string(), ("value1".to_string(), 1));
188 |         replacer.put("key2".to_string(), ("value2".to_string(), 2));
189 |         replacer.put("key3".to_string(), ("value3".to_string(), 3));
190 |         replacer.put("key4".to_string(), ("value4".to_string(), 4));
191 |         replacer.set_max_capacity(14);
192 |         replacer.put("key5".to_string(), ("value5".to_string(), 5));
193 |         assert_eq!(replacer.peek(&"key1".to_string()), None);
194 |         assert_eq!(
195 |             replacer.peek(&"key2".to_string()),
196 |             Some(&("value2".to_string(), 2))
197 |         );
198 |         assert_eq!(
199 |             replacer.peek(&"key3".to_string()),
200 |             Some(&("value3".to_string(), 3))
201 |         );
202 |         assert_eq!(
203 |             replacer.peek(&"key4".to_string()),
204 |             Some(&("value4".to_string(), 4))
205 |         );
206 |         assert_eq!(
207 |             replacer.peek(&"key5".to_string()),
208 |             Some(&("value5".to_string(), 5))
209 |         );
210 |     }
211 | 
212 |     #[test]
213 |     fn test_put_different_keys() {
214 |         let mut replacer =
215 |             LruReplacer::<ParpulseTestReplacerKey, ParpulseTestReplacerValue>::new(10);
216 |         replacer.put("key1".to_string(), ("value1".to_string(), 1));
217 |         assert_eq!(replacer.size(), 1);
218 |         replacer.put("key2".to_string(), ("value2".to_string(), 2));
219 |         assert_eq!(replacer.size(), 3);
220 |         replacer.put("key3".to_string(), ("value3".to_string(), 3));
221 |         assert_eq!(replacer.size(), 6);
222 |         replacer.put("key4".to_string(), ("value4".to_string(), 4));
223 |         assert_eq!(replacer.size(), 10);
224 |         replacer.put("key5".to_string(), ("value5".to_string(), 5));
225 |         assert_eq!(replacer.size(), 9); // Only key4 and key5 are in the replacer
226 |         assert_eq!(replacer.len(), 2);
227 |         assert!(!replacer.is_empty());
228 |         replacer.clear();
229 |         assert!(replacer.is_empty());
230 |         assert_eq!(replacer.size(), 0);
231 |         assert_eq!(replacer.len(), 0);
232 |     }
233 | 
234 |     #[test]
235 |     fn test_put_same_key() {
236 |         let mut replacer =
237 |             LruReplacer::<ParpulseTestReplacerKey, ParpulseTestReplacerValue>::new(10);
238 |         replacer.put("key1".to_string(), ("value1".to_string(), 1));
239 |         replacer.put("key1".to_string(), ("value2".to_string(), 2));
240 |         replacer.put("key1".to_string(), ("value3".to_string(), 3));
241 |         assert_eq!(replacer.len(), 1);
242 |         assert_eq!(replacer.size(), 3);
243 |         replacer.put("key1".to_string(), ("value4".to_string(), 100)); // Should not be inserted
244 |         assert_eq!(
245 |             replacer.get(&"key1".to_string()),
246 |             Some(&("value3".to_string(), 3))
247 |         );
248 |         assert_eq!(replacer.get(&("key2".to_string())), None);
249 |     }
250 | 
251 |     #[test]
252 |     fn test_evict_pinned_key() {
253 |         let mut replacer =
254 |             LruReplacer::<ParpulseTestReplacerKey, ParpulseTestReplacerValue>::new(10);
255 |         replacer.put("key1".to_string(), ("value1".to_string(), 9));
256 |         assert!(replacer.pin(&"key1".to_string(), 1));
257 |         assert!(replacer
258 |             .put("key2".to_string(), ("value2".to_string(), 2))
259 |             .is_none());
260 |         assert_eq!(replacer.size(), 9);
261 |         assert!(replacer.pin(&"key1".to_string(), 1));
262 |         assert!(replacer.unpin(&"key1".to_string()));
263 |         assert!(replacer
264 |             .put("key2".to_string(), ("value2".to_string(), 2))
265 |             .is_none());
266 |         assert!(replacer.unpin(&"key1".to_string()));
267 |         assert!(replacer
268 |             .put("key2".to_string(), ("value2".to_string(), 2))
269 |             .is_some());
270 |         assert_eq!(replacer.size(), 2);
271 |         assert!(replacer.pin(&"key2".to_string(), 1));
272 |         replacer.put("key3".to_string(), ("value3".to_string(), 8));
273 |         assert_eq!(replacer.size(), 10);
274 |         replacer.put("key4".to_string(), ("value4".to_string(), 7));
275 |         assert_eq!(replacer.size(), 9);
276 |         assert!(replacer.get(&"key2".to_string()).is_some());
277 |         assert!(replacer.get(&"key4".to_string()).is_some());
278 |         assert!(replacer.get(&"key3".to_string()).is_none());
279 |     }
280 | }
281 | 


--------------------------------------------------------------------------------
/storage-node/src/cache/replacer/lru_k.rs:
--------------------------------------------------------------------------------
  1 | /// LRU-K replacer implementation.
  2 | /// Credit: https://doi.org/10.1145/170036.170081
  3 | use log::{debug, warn};
  4 | use std::collections::HashMap;
  5 | use std::collections::VecDeque;
  6 | 
  7 | use super::DataStoreReplacer;
  8 | use super::ReplacerKey;
  9 | use super::ReplacerValue;
 10 | 
 11 | type Timestamp = i32;
 12 | 
 13 | /// Represents a node in the LRU-K replacer.
 14 | ///
 15 | /// Each node contains a value of type `V` and a history of timestamps.
 16 | /// The history is stored as a `VecDeque<Timestamp>`, where the most recent
 17 | /// timestamps are at the front of the deque.
 18 | struct LruKNode<V: ReplacerValue> {
 19 |     value: V,
 20 |     history: VecDeque<Timestamp>,
 21 |     pin_count: usize,
 22 | }
 23 | 
 24 | /// Represents an LRU-K replacer.
 25 | ///
 26 | /// The LRU-K algorithm evicts a node whose backward k-distance is maximum of all
 27 | /// nodes. Backward k-distance is computed as the difference in time between current
 28 | /// timestamp and the timestamp of kth previous access. A node with fewer than k
 29 | /// historical accesses is given +inf as its backward k-distance. When multiple nodes
 30 | /// have +inf backward k-distance, the replacer evicts the node with the earliest
 31 | /// overall timestamp (i.e., the frame whose least-recent recorded access is the
 32 | /// overall least recent access, overall, out of all nodes).
 33 | pub struct LruKReplacer<K: ReplacerKey, V: ReplacerValue> {
 34 |     cache_map: HashMap<K, LruKNode<V>>,
 35 |     max_capacity: usize,
 36 |     size: usize,
 37 |     curr_timestamp: Timestamp,
 38 |     k: usize, // The k value for LRU-K
 39 | }
 40 | 
 41 | impl<K: ReplacerKey, V: ReplacerValue> LruKReplacer<K, V> {
 42 |     pub fn new(max_capacity: usize, k: usize) -> LruKReplacer<K, V> {
 43 |         LruKReplacer {
 44 |             cache_map: HashMap::new(),
 45 |             max_capacity,
 46 |             size: 0,
 47 |             curr_timestamp: 0,
 48 |             k,
 49 |         }
 50 |     }
 51 | 
 52 |     fn evict(&mut self, new_key: &K) -> Option<K> {
 53 |         let mut found = false;
 54 |         let mut max_k_dist = 0;
 55 |         let mut k_dist;
 56 |         let mut earliest_timestamp = 0;
 57 |         let mut key_to_evict: Option<K> = None;
 58 |         for (key, node) in self.cache_map.iter() {
 59 |             if key == new_key {
 60 |                 continue;
 61 |             }
 62 |             let history = &node.history;
 63 |             if let Some(kth_timestamp) = history.front() {
 64 |                 k_dist = if history.len() < self.k {
 65 |                     std::i32::MAX
 66 |                 } else {
 67 |                     self.curr_timestamp - kth_timestamp
 68 |                 };
 69 |                 if ((k_dist > max_k_dist)
 70 |                     || (k_dist == max_k_dist && kth_timestamp < &earliest_timestamp))
 71 |                     && node.pin_count == 0
 72 |                 {
 73 |                     found = true;
 74 |                     max_k_dist = k_dist;
 75 |                     earliest_timestamp = *kth_timestamp;
 76 |                     key_to_evict = Some(key.clone());
 77 |                 }
 78 |             }
 79 |         }
 80 |         if found {
 81 |             if let Some(key) = key_to_evict {
 82 |                 // TODO: Should have better logging
 83 |                 debug!("-------- Evicting Key: {:?} --------", key);
 84 |                 if let Some(node) = self.cache_map.remove(&key) {
 85 |                     self.size -= node.value.size();
 86 |                 }
 87 |                 return Some(key);
 88 |             }
 89 |         }
 90 |         None
 91 |     }
 92 | 
 93 |     fn record_access(&mut self, node: &mut LruKNode<V>) {
 94 |         node.history.push_back(self.curr_timestamp);
 95 |         if node.history.len() > self.k {
 96 |             node.history.pop_front();
 97 |         }
 98 |         self.curr_timestamp += 1;
 99 |     }
100 | 
101 |     fn get_value(&mut self, key: &K) -> Option<&V> {
102 |         if let Some(mut node) = self.cache_map.remove(key) {
103 |             self.record_access(&mut node);
104 |             self.cache_map.insert(key.clone(), node);
105 |             return self.cache_map.get(key).map(|node| &node.value);
106 |         }
107 |         None
108 |     }
109 | 
110 |     fn put_value(&mut self, key: K, value: V) -> Option<Vec<K>> {
111 |         if value.size() > self.max_capacity {
112 |             // If the object size is greater than the max capacity, we do not insert the
113 |             // object into the replacer.
114 |             warn!("The size of the value is greater than the max capacity",);
115 |             warn!(
116 |                 "Key: {:?}, Value: {:?}, Value size: {:?}, Max capacity: {:?}",
117 |                 key,
118 |                 value.as_value(),
119 |                 value.size(),
120 |                 self.max_capacity
121 |             );
122 |             return None;
123 |         }
124 |         let updated_size = value.size();
125 |         let mut new_history: VecDeque<Timestamp> = VecDeque::new();
126 |         if let Some(mut node) = self.cache_map.remove(&key) {
127 |             self.record_access(&mut node);
128 |             self.size -= node.value.size();
129 |             new_history = node.history;
130 |         } else {
131 |             new_history.push_back(self.curr_timestamp);
132 |             self.curr_timestamp += 1;
133 |         }
134 |         let mut evicted_keys = Vec::new();
135 |         while (self.size + updated_size) > self.max_capacity {
136 |             let key_to_evict = self.evict(&key);
137 |             // If key_to_evict is none, return none
138 |             key_to_evict.as_ref()?;
139 |             if let Some(evicted_key) = key_to_evict {
140 |                 evicted_keys.push(evicted_key);
141 |             }
142 |         }
143 |         self.cache_map.insert(
144 |             key.clone(),
145 |             LruKNode {
146 |                 value,
147 |                 history: new_history,
148 |                 pin_count: 0,
149 |             },
150 |         );
151 |         self.size += updated_size;
152 |         Some(evicted_keys)
153 |     }
154 | 
155 |     fn pin_value(&mut self, key: &K, count: usize) -> bool {
156 |         match self.cache_map.get_mut(key) {
157 |             Some(node) => {
158 |                 node.pin_count += count;
159 |                 true
160 |             }
161 |             None => false,
162 |         }
163 |     }
164 | 
165 |     fn unpin_value(&mut self, key: &K) -> bool {
166 |         match self.cache_map.get_mut(key) {
167 |             Some(node) => {
168 |                 if node.pin_count == 0 {
169 |                     return false;
170 |                 }
171 |                 node.pin_count -= 1;
172 |                 true
173 |             }
174 |             None => false,
175 |         }
176 |     }
177 | 
178 |     fn peek_value(&self, key: &K) -> Option<&V> {
179 |         if let Some(node) = self.cache_map.get(key) {
180 |             let cache_value = &node.value;
181 |             Some(cache_value)
182 |         } else {
183 |             None
184 |         }
185 |     }
186 | 
187 |     #[allow(dead_code)]
188 |     fn current_timestamp(&self) -> Timestamp {
189 |         self.curr_timestamp
190 |     }
191 | }
192 | 
193 | impl<K: ReplacerKey, V: ReplacerValue> DataStoreReplacer<K, V> for LruKReplacer<K, V> {
194 |     fn get(&mut self, key: &K) -> Option<&V> {
195 |         self.get_value(key)
196 |     }
197 | 
198 |     fn put(&mut self, key: K, value: V) -> Option<Vec<K>> {
199 |         self.put_value(key, value)
200 |     }
201 | 
202 |     fn pin(&mut self, key: &K, count: usize) -> bool {
203 |         self.pin_value(key, count)
204 |     }
205 | 
206 |     fn unpin(&mut self, key: &K) -> bool {
207 |         self.unpin_value(key)
208 |     }
209 | 
210 |     fn peek(&self, key: &K) -> Option<&V> {
211 |         self.peek_value(key)
212 |     }
213 | 
214 |     fn len(&self) -> usize {
215 |         self.cache_map.len()
216 |     }
217 | 
218 |     fn is_empty(&self) -> bool {
219 |         self.cache_map.is_empty()
220 |     }
221 | 
222 |     fn size(&self) -> usize {
223 |         self.size
224 |     }
225 | 
226 |     fn max_capacity(&self) -> usize {
227 |         self.max_capacity
228 |     }
229 | 
230 |     fn set_max_capacity(&mut self, capacity: usize) {
231 |         self.max_capacity = capacity;
232 |     }
233 | 
234 |     fn clear(&mut self) {
235 |         self.cache_map.clear();
236 |         self.size = 0;
237 |     }
238 | }
239 | 
240 | #[cfg(test)]
241 | mod tests {
242 |     use crate::cache::replacer::{
243 |         tests::{ParpulseTestReplacerKey, ParpulseTestReplacerValue},
244 |         DataStoreReplacer,
245 |     };
246 | 
247 |     use super::LruKReplacer;
248 | 
249 |     #[test]
250 |     fn test_new() {
251 |         let mut replacer =
252 |             LruKReplacer::<ParpulseTestReplacerKey, ParpulseTestReplacerValue>::new(10, 2);
253 |         assert_eq!(replacer.max_capacity(), 10);
254 |         assert_eq!(replacer.size(), 0);
255 |         replacer.set_max_capacity(20);
256 |         assert_eq!(replacer.max_capacity(), 20);
257 |     }
258 | 
259 |     #[test]
260 |     fn test_peek_and_set() {
261 |         let mut replacer =
262 |             LruKReplacer::<ParpulseTestReplacerKey, ParpulseTestReplacerValue>::new(10, 2);
263 |         let key = "key1".to_string();
264 |         let value = "value1".to_string();
265 |         assert_eq!(replacer.peek(&key), None);
266 |         assert!(replacer.put(key.clone(), (value.clone(), 1)).is_some());
267 |         assert_eq!(replacer.peek(&key), Some(&(value.clone(), 1)));
268 |         assert_eq!(replacer.len(), 1);
269 |         assert_eq!(replacer.size(), 1);
270 |         assert!(!replacer.is_empty());
271 |         replacer.clear();
272 |         assert!(replacer.is_empty());
273 |     }
274 | 
275 |     #[test]
276 |     fn test_evict() {
277 |         let mut replacer =
278 |             LruKReplacer::<ParpulseTestReplacerKey, ParpulseTestReplacerValue>::new(13, 2);
279 |         let key1 = "key1".to_string();
280 |         let key2 = "key2".to_string();
281 |         let key3 = "key3".to_string();
282 |         let key4 = "key4".to_string();
283 |         let key5 = "key5".to_string();
284 |         let value1 = "value1".to_string();
285 |         let value2 = "value2".to_string();
286 |         let value3 = "value3".to_string();
287 |         let value4 = "value4".to_string();
288 |         let value5 = "value5".to_string();
289 |         replacer.put(key1.clone(), (value1.clone(), 1));
290 |         replacer.put(key2.clone(), (value2.clone(), 2));
291 |         replacer.put(key3.clone(), (value3.clone(), 3));
292 |         replacer.put(key4.clone(), (value4.clone(), 4));
293 |         assert_eq!(replacer.current_timestamp(), 4);
294 |         assert_eq!(replacer.get(&key3), Some(&(value3.clone(), 3)));
295 |         assert_eq!(replacer.get(&key4), Some(&(value4.clone(), 4)));
296 |         assert_eq!(replacer.get(&key1), Some(&(value1.clone(), 1)));
297 |         assert_eq!(replacer.get(&key2), Some(&(value2.clone(), 2)));
298 |         assert_eq!(replacer.current_timestamp(), 8);
299 |         // Now the kth (i.e. 2nd) order from old to new is [1, 2, 3, 4]
300 |         replacer.put(key5.clone(), (value5.clone(), 4));
301 |         assert_eq!(replacer.get(&key1), None); // key1 should be evicted
302 | 
303 |         assert_eq!(replacer.get(&key2), Some(&(value2.clone(), 2)));
304 |         assert_eq!(replacer.get(&key4), Some(&(value4.clone(), 4)));
305 |         assert_eq!(replacer.get(&key3), Some(&(value3.clone(), 3)));
306 |         assert_eq!(replacer.get(&key5), Some(&(value5.clone(), 4)));
307 |         // Now the kth (i.e. 2nd) order from old to new is [3, 4, 2, 5]
308 |         replacer.put(key1.clone(), (value1.clone(), 1));
309 |         assert_eq!(replacer.get(&key3), None); // key3 should be evicted
310 |         assert_eq!(replacer.current_timestamp(), 14); // When get fails, the timestamp should not be updated
311 |     }
312 | 
313 |     #[test]
314 |     fn test_infinite() {
315 |         let mut replacer =
316 |             LruKReplacer::<ParpulseTestReplacerKey, ParpulseTestReplacerValue>::new(6, 2);
317 |         let key1 = "key1".to_string();
318 |         let key2 = "key2".to_string();
319 |         let key3 = "key3".to_string();
320 |         let key4 = "key4".to_string();
321 |         let value1 = "value1".to_string();
322 |         let value2 = "value2".to_string();
323 |         let value3 = "value3".to_string();
324 |         let value4 = "value4".to_string();
325 |         replacer.put(key1.clone(), (value1.clone(), 1));
326 |         replacer.put(key2.clone(), (value2.clone(), 2));
327 |         replacer.put(key3.clone(), (value3.clone(), 3));
328 |         replacer.put(key4.clone(), (value4.clone(), 4));
329 |         assert_eq!(replacer.current_timestamp(), 4);
330 |         assert_eq!(replacer.get(&key1), None); // Key1 should be evicted as it has infinite k distance and the earliest overall timestamp, same for key2 and key3
331 |         assert_eq!(replacer.get(&key2), None);
332 |         assert_eq!(replacer.get(&key3), None);
333 |         assert_eq!(replacer.size(), 4); // Only key4 should be in the replacer
334 |     }
335 | 
336 |     #[test]
337 |     fn test_put_same_key() {
338 |         let mut replacer =
339 |             LruKReplacer::<ParpulseTestReplacerKey, ParpulseTestReplacerValue>::new(10, 2);
340 |         replacer.put("key1".to_string(), ("value1".to_string(), 1));
341 |         replacer.put("key1".to_string(), ("value2".to_string(), 2));
342 |         replacer.put("key1".to_string(), ("value3".to_string(), 3));
343 |         replacer.put("key1".to_string(), ("value3".to_string(), 4));
344 |         assert_eq!(replacer.len(), 1);
345 |         assert_eq!(replacer.size(), 4);
346 |         replacer.put("key1".to_string(), ("value4".to_string(), 100)); // Should not be inserted
347 |         assert_eq!(
348 |             replacer.get(&"key1".to_string()),
349 |             Some(&("value3".to_string(), 4))
350 |         );
351 |         assert_eq!(replacer.get(&("key2".to_string())), None);
352 |     }
353 | 
354 |     #[test]
355 |     fn test_evict_pinned_key() {
356 |         let mut replacer =
357 |             LruKReplacer::<ParpulseTestReplacerKey, ParpulseTestReplacerValue>::new(10, 2);
358 |         replacer.put("key1".to_string(), ("value1".to_string(), 9));
359 |         assert!(replacer.pin(&"key1".to_string(), 1));
360 |         assert!(replacer
361 |             .put("key2".to_string(), ("value2".to_string(), 2))
362 |             .is_none());
363 |         assert_eq!(replacer.size(), 9);
364 |         assert!(replacer.pin(&"key1".to_string(), 1));
365 |         assert!(replacer.unpin(&"key1".to_string()));
366 |         assert!(replacer
367 |             .put("key2".to_string(), ("value2".to_string(), 2))
368 |             .is_none());
369 |         assert!(replacer.unpin(&"key1".to_string()));
370 |         assert!(replacer
371 |             .put("key2".to_string(), ("value2".to_string(), 2))
372 |             .is_some());
373 |         assert_eq!(replacer.size(), 2);
374 |         assert!(replacer.pin(&"key2".to_string(), 1));
375 |         replacer.put("key3".to_string(), ("value3".to_string(), 8));
376 |         assert_eq!(replacer.size(), 10);
377 |         replacer.put("key4".to_string(), ("value4".to_string(), 7));
378 |         assert_eq!(replacer.size(), 9);
379 |         assert!(replacer.get(&"key2".to_string()).is_some());
380 |         assert!(replacer.get(&"key4".to_string()).is_some());
381 |         assert!(replacer.get(&"key3".to_string()).is_none());
382 |     }
383 | }
384 | 


--------------------------------------------------------------------------------
/storage-node/src/cache/replacer/mod.rs:
--------------------------------------------------------------------------------
 1 | pub mod lru;
 2 | pub mod lru_k;
 3 | use std::fmt::Debug;
 4 | use std::hash::Hash;
 5 | 
 6 | /// [`ReplacerKey`] is the key type for data store replacers using different
 7 | /// policies in the system.
 8 | pub trait ReplacerKey: Hash + Eq + Clone + Debug + Send + Sync {}
 9 | impl<T: Hash + Eq + Clone + Debug + Send + Sync> ReplacerKey for T {}
10 | /// [`ReplacerValue`] is the value type for data store caches using different
11 | /// policies in the system.
12 | /// It might represent a logical object and we can get the actual size for this
13 | /// logical object by calling `size()`.
14 | pub trait ReplacerValue: Send + Sync {
15 |     type Value: Debug;
16 | 
17 |     fn into_value(self) -> Self::Value;
18 |     fn as_value(&self) -> &Self::Value;
19 |     fn size(&self) -> usize;
20 | }
21 | 
22 | /// [`DataStoreReplacer`] records objects' locations in the data store. For example, we cache
23 | /// the contents of s3's remote object `userdata.parquet` in the local disk. Then we may
24 | /// store the local file system path of `userdata.parquet` in `DataStoreCache`. By querying
25 | /// `DataStoreCache`, we can get the local file system path of `userdata.parquet` and read the
26 | /// contents from the local disk.
27 | ///
28 | /// There are different policies for the data store replacer, such as LRU, LRU-K, etc. See
29 | /// other files in this module for more details.
30 | pub trait DataStoreReplacer<K: ReplacerKey, V: ReplacerValue>: Send + Sync {
31 |     /// Gets a value from the replacer. Might has side effect on the replacer (e.g.
32 |     /// modifying some bookkeeping fields in the replacer).
33 |     fn get(&mut self, key: &K) -> Option<&V>;
34 | 
35 |     /// Puts a value into the replacer.
36 |     /// Returns `None`: insertion failed.
37 |     /// Returns `Some`: insertion successful with a list of keys that are evicted from the cache.
38 |     fn put(&mut self, key: K, value: V) -> Option<Vec<K>>;
39 | 
40 |     fn pin(&mut self, key: &K, count: usize) -> bool;
41 | 
42 |     fn unpin(&mut self, key: &K) -> bool;
43 | 
44 |     /// Returns a reference to the value in the replacer with no side effect on the
45 |     /// replacer.
46 |     fn peek(&self, key: &K) -> Option<&V>;
47 | 
48 |     /// Returns the number of the objects in the replacer.
49 |     fn len(&self) -> usize;
50 | 
51 |     /// Returns the total size of the objects in the replacer.
52 |     fn size(&self) -> usize;
53 | 
54 |     fn is_empty(&self) -> bool;
55 | 
56 |     fn max_capacity(&self) -> usize;
57 | 
58 |     fn set_max_capacity(&mut self, capacity: usize);
59 | 
60 |     fn clear(&mut self);
61 | }
62 | 
63 | #[cfg(test)]
64 | mod tests {
65 |     use super::ReplacerValue;
66 | 
67 |     pub type ParpulseTestReplacerKey = String;
68 |     pub type ParpulseTestReplacerValue = (String, usize);
69 | 
70 |     impl ReplacerValue for ParpulseTestReplacerValue {
71 |         type Value = String;
72 | 
73 |         fn into_value(self) -> Self::Value {
74 |             self.0
75 |         }
76 | 
77 |         fn as_value(&self) -> &Self::Value {
78 |             &self.0
79 |         }
80 | 
81 |         fn size(&self) -> usize {
82 |             self.1
83 |         }
84 |     }
85 | }
86 | 


--------------------------------------------------------------------------------
/storage-node/src/common/config.rs:
--------------------------------------------------------------------------------
 1 | use clap::Parser;
 2 | use serde::Serialize;
 3 | 
 4 | #[derive(clap::ValueEnum, Clone, Default, Debug, Serialize)]
 5 | pub enum ParpulseConfigDataStore {
 6 |     #[default]
 7 |     Memdisk,
 8 |     Disk,
 9 |     Sqlite,
10 | }
11 | 
12 | #[derive(clap::ValueEnum, Clone, Default, Debug, Serialize)]
13 | pub enum ParpulseConfigCachePolicy {
14 |     #[default]
15 |     Lru,
16 |     Lruk,
17 | }
18 | 
19 | #[derive(Parser, Default)]
20 | pub struct ParpulseConfig {
21 |     #[clap(long, default_value_t, value_enum)]
22 |     pub cache_policy: ParpulseConfigCachePolicy,
23 | 
24 |     #[clap(long, default_value = None)]
25 |     pub cache_lru_k: Option<usize>,
26 | 
27 |     #[clap(long, default_value_t, value_enum)]
28 |     pub data_store: ParpulseConfigDataStore,
29 | 
30 |     #[clap( long, default_value = None)]
31 |     pub data_store_cache_num: Option<usize>,
32 | 
33 |     #[clap(long, default_value = None)]
34 |     pub mem_cache_size: Option<usize>,
35 | 
36 |     #[clap(long, default_value = None)]
37 |     pub mem_cache_file_size: Option<usize>,
38 | 
39 |     #[clap(long, default_value = None)]
40 |     pub disk_cache_size: Option<usize>,
41 | 
42 |     #[clap(long, default_value = None)]
43 |     pub sqlite_cache_size: Option<usize>,
44 | 
45 |     #[clap(long, default_value = None)]
46 |     pub cache_path: Option<String>,
47 | 
48 |     #[clap(long, default_value = None)]
49 |     pub max_disk_reader_buffer_size: Option<usize>,
50 | 
51 |     #[clap(long, default_value = None)]
52 |     pub sqlite_blob_reader_buffer_size: Option<usize>,
53 | }
54 | 


--------------------------------------------------------------------------------
/storage-node/src/common/hash.rs:
--------------------------------------------------------------------------------
 1 | use std::hash::Hasher;
 2 | 
 3 | pub fn calculate_hash_default(data: &[u8]) -> usize {
 4 |     let mut hasher = std::collections::hash_map::DefaultHasher::new();
 5 |     hasher.write(data);
 6 |     hasher.finish() as usize
 7 | }
 8 | 
 9 | pub fn calculate_hash_crc32fast(data: &[u8]) -> usize {
10 |     let mut hasher = crc32fast::Hasher::new();
11 |     hasher.update(data);
12 |     hasher.finalize() as usize
13 | }
14 | 


--------------------------------------------------------------------------------
/storage-node/src/common/mod.rs:
--------------------------------------------------------------------------------
1 | pub mod config;
2 | pub mod hash;
3 | 


--------------------------------------------------------------------------------
/storage-node/src/disk/disk_manager.rs:
--------------------------------------------------------------------------------
  1 | use bytes::Bytes;
  2 | use futures::stream::StreamExt;
  3 | use futures::{future::TryFutureExt, join};
  4 | 
  5 | use std::future::IntoFuture;
  6 | use std::io::SeekFrom;
  7 | 
  8 | use std::path::{Path, PathBuf};
  9 | use std::pin::Pin;
 10 | 
 11 | use tokio::fs::{self, File, OpenOptions};
 12 | use tokio::io::AsyncSeekExt;
 13 | use tokio::io::{self, AsyncReadExt, AsyncWriteExt};
 14 | 
 15 | use crate::error::{ParpulseError, ParpulseResult};
 16 | use crate::storage_reader::StorageReaderStream;
 17 | 
 18 | use super::stream::DiskReadStream;
 19 | 
 20 | /// [`DiskManager`] is responsible for reading and writing data to disk. The default
 21 | /// version is async. We keep this struct to add lock.
 22 | ///
 23 | /// TODO: Do we need to put disk_root_path into DiskManager?
 24 | #[derive(Default)]
 25 | pub struct DiskManager {}
 26 | 
 27 | impl DiskManager {
 28 |     pub async fn open_or_create(&self, path: &str, append: bool) -> ParpulseResult<File> {
 29 |         let path_buf: PathBuf = PathBuf::from(path);
 30 |         if let Some(parent) = path_buf.parent() {
 31 |             if !parent.exists() {
 32 |                 fs::create_dir_all(parent).await?;
 33 |             }
 34 |         }
 35 |         let mut options = OpenOptions::new();
 36 |         options.write(true);
 37 |         if !path_buf.exists() {
 38 |             options.create(true);
 39 |         }
 40 |         options.append(append);
 41 |         Ok(options.open(&path_buf).await?)
 42 |     }
 43 | 
 44 |     pub async fn write_disk_all(&self, path: &str, content: &[u8]) -> ParpulseResult<()> {
 45 |         let mut file = self.open_or_create(path, false).await?;
 46 |         file.write_all(content).await?;
 47 |         Ok(file.flush().await?)
 48 |     }
 49 | 
 50 |     pub async fn read_disk_all(&self, path: &str) -> ParpulseResult<(usize, Bytes)> {
 51 |         let mut file = File::open(path).await?;
 52 |         let mut buffer = Vec::with_capacity(file.metadata().await?.len() as usize);
 53 | 
 54 |         let bytes_read = file.read_to_end(&mut buffer).await?;
 55 |         Ok((bytes_read, Bytes::from(buffer)))
 56 |     }
 57 | 
 58 |     pub async fn read_disk(
 59 |         &self,
 60 |         path: &str,
 61 |         start_pos: u64,
 62 |         bytes_to_read: usize,
 63 |     ) -> ParpulseResult<(usize, Bytes)> {
 64 |         let mut file = File::open(path).await?;
 65 |         file.seek(SeekFrom::Start(start_pos)).await?;
 66 | 
 67 |         let mut buffer = vec![0; bytes_to_read];
 68 |         let bytes_read = file.read(&mut buffer).await?;
 69 |         buffer.truncate(bytes_read);
 70 |         Ok((bytes_read, Bytes::from(buffer)))
 71 |     }
 72 | 
 73 |     // If needs to record statistics, use disk_read_stream, if not, please directly new DiskReadStream.
 74 |     pub async fn disk_read_stream(
 75 |         &self,
 76 |         path: &str,
 77 |         buffer_size: usize,
 78 |     ) -> ParpulseResult<Pin<Box<DiskReadStream>>> {
 79 |         let disk_read_stream = DiskReadStream::new(path, buffer_size).await?;
 80 |         Ok(Box::pin(disk_read_stream))
 81 |     }
 82 | 
 83 |     /// This function will try to **first** write `bytes_vec` to disk if applicable, and write all the (remaining)
 84 |     /// data polled from the `stream` to disk. The function will return the total bytes written to disk.
 85 |     ///
 86 |     /// Note these in current implementation:
 87 |     /// 1. When writing evicted data from memory cache, bytes_vec should be Some and stream should be None.
 88 |     /// 2. When memory cache is disabled, bytes_vec should be None and stream should be Some.
 89 |     /// 3. When writing data which cannot be written to memory cache, both bytes_vec and stream should be Some.
 90 |     ///
 91 |     /// FIXME: disk_path should not exist, otherwise throw an error
 92 |     /// TODO(lanlou): we must handle write-write conflict correctly in the future.
 93 |     /// One way is using `write commit` to handle read-write conflict, then there is no w-w conflict.
 94 |     /// TODO(lanlou): We need to write data to disk & send data to network at the same time.
 95 |     /// TODO(lanlou): S3 stream now returns 10^5 bytes one time, and do we need to group all the bytes for
 96 |     /// one file and write all of them to disk at once?
 97 |     pub async fn write_bytes_and_stream_to_disk(
 98 |         &self,
 99 |         bytes_vec: Option<Vec<Bytes>>,
100 |         stream: Option<StorageReaderStream>,
101 |         disk_path: &str,
102 |     ) -> ParpulseResult<usize> {
103 |         if Path::new(disk_path).exists() {
104 |             return Err(io::Error::new(
105 |                 io::ErrorKind::AlreadyExists,
106 |                 "disk file to write already exists",
107 |             )
108 |             .into());
109 |         }
110 |         let mut file = self.open_or_create(disk_path, true).await?;
111 |         let mut bytes_written = 0;
112 | 
113 |         if let Some(bytes_vec) = bytes_vec {
114 |             for bytes in bytes_vec {
115 |                 file.write_all(&bytes).await?;
116 |                 bytes_written += bytes.len();
117 |             }
118 |         }
119 | 
120 |         if let Some(mut stream) = stream {
121 |             let bytes_cur = stream.next().await;
122 |             if bytes_cur.is_none() {
123 |                 file.flush().await?;
124 |                 return Ok(bytes_written);
125 |             }
126 |             let mut bytes_cur = bytes_cur.unwrap()?;
127 |             loop {
128 |                 let disk_write_fut = TryFutureExt::into_future(file.write_all(&bytes_cur));
129 |                 let bytes_next_fut = stream.next().into_future();
130 |                 match join!(disk_write_fut, bytes_next_fut) {
131 |                     (Ok(_), Some(Ok(bytes_next))) => {
132 |                         bytes_written += bytes_cur.len();
133 |                         bytes_cur = bytes_next;
134 |                     }
135 |                     (Ok(_), None) => {
136 |                         bytes_written += bytes_cur.len();
137 |                         break;
138 |                     }
139 |                     (Err(e), _) => return Err(ParpulseError::Disk(e)),
140 |                     (Ok(_), Some(Err(e))) => return Err(e),
141 |                 }
142 |             }
143 |         }
144 |         // FIXME: do we need a flush here?
145 |         file.flush().await?;
146 |         Ok(bytes_written)
147 |     }
148 | 
149 |     pub async fn file_size(&self, path: &str) -> ParpulseResult<u64> {
150 |         let metadata = fs::metadata(path).await?;
151 |         Ok(metadata.len())
152 |     }
153 | 
154 |     pub fn file_size_sync(&self, path: &str) -> ParpulseResult<u64> {
155 |         let metadata = std::fs::metadata(path)?;
156 |         Ok(metadata.len())
157 |     }
158 | 
159 |     pub async fn remove_file(&self, path: &str) -> ParpulseResult<()> {
160 |         Ok(fs::remove_file(path).await?)
161 |     }
162 | }
163 | 
164 | #[cfg(test)]
165 | mod tests {
166 |     use crate::disk::stream::RandomDiskReadStream;
167 | 
168 |     use super::*;
169 |     #[tokio::test]
170 |     async fn test_simple_write_read() {
171 |         let disk_manager = DiskManager {};
172 |         let tmp = tempfile::tempdir().unwrap();
173 |         let dir = tmp.path().to_owned();
174 |         let path = &dir.join("test_disk_manager1.txt").display().to_string();
175 |         let content = "Hello, world!";
176 |         disk_manager
177 |             .write_disk_all(path, content.as_bytes())
178 |             .await
179 |             .expect("write_disk_all failed");
180 |         let mut file = disk_manager
181 |             .open_or_create(path, true)
182 |             .await
183 |             .expect("open_or_create failed");
184 |         file.write_all(content.as_bytes()).await.unwrap();
185 |         // Without this code, this test will fail sometimes.
186 |         // But even if we add this code, this test is not likely to fail in the sync version.
187 |         file.flush().await.unwrap();
188 | 
189 |         let file_size = disk_manager
190 |             .file_size(path)
191 |             .await
192 |             .expect("file_size failed");
193 |         assert_eq!(file_size, 2 * content.len() as u64);
194 | 
195 |         let (bytes_read, bytes) = disk_manager
196 |             .read_disk_all(path)
197 |             .await
198 |             .expect("read_disk_all failed");
199 |         assert_eq!(bytes_read, 2 * content.len());
200 |         assert_eq!(bytes, Bytes::from(content.to_owned() + content));
201 | 
202 |         let (bytes_read, bytes) = disk_manager
203 |             .read_disk(path, content.len() as u64, content.len())
204 |             .await
205 |             .expect("read_disk_all failed");
206 |         assert_eq!(bytes_read, content.len());
207 |         assert_eq!(bytes, Bytes::from(content));
208 |     }
209 | 
210 |     #[tokio::test]
211 |     async fn test_iterator_read() {
212 |         let disk_manager = DiskManager {};
213 |         let tmp = tempfile::tempdir().unwrap();
214 |         let dir = tmp.path().to_owned();
215 |         let path = &dir.join("test_disk_manager2.txt").display().to_string();
216 |         let content = "bhjoilkmnkbhaoijsdklmnjkbhiauosdjikbhjoilkmnkbhaoijsdklmnjkbhiauosdjik";
217 |         disk_manager
218 |             .write_disk_all(path, content.as_bytes())
219 |             .await
220 |             .expect("write_disk_all failed");
221 |         let mut stream = disk_manager
222 |             .disk_read_stream(path, 2)
223 |             .await
224 |             .expect("disk_read_iterator failed");
225 |         let mut start_pos = 0;
226 |         loop {
227 |             if start_pos >= content.len() {
228 |                 break;
229 |             }
230 |             let bytes_read = stream
231 |                 .next()
232 |                 .await
233 |                 .expect("iterator early ended")
234 |                 .expect("iterator read failed");
235 |             let buffer = stream.buffer();
236 |             assert_eq!(
237 |                 &content.as_bytes()[start_pos..start_pos + bytes_read],
238 |                 &buffer[..bytes_read]
239 |             );
240 |             start_pos += bytes_read;
241 |         }
242 |         assert_eq!(start_pos, content.len());
243 |     }
244 | 
245 |     #[tokio::test]
246 |     async fn test_write_reader_to_disk() {
247 |         let disk_manager = DiskManager {};
248 |         let tmp = tempfile::tempdir().unwrap();
249 |         let dir = tmp.path().to_owned();
250 |         let path = &dir.join("test_disk_manager3.txt").display().to_string();
251 |         let content = "bhjoilkmnkbhaoijsdklmnjkbhiauosdjikbhjoilkmnkbhaoijsdklmnjkbhiauosdjik";
252 |         disk_manager
253 |             .write_disk_all(path, content.as_bytes())
254 |             .await
255 |             .expect("write_disk_all failed");
256 |         let stream = RandomDiskReadStream::new(path, 2, 4).unwrap().boxed();
257 |         let output_path = &dir
258 |             .join("test_disk_manager3_output.txt")
259 |             .display()
260 |             .to_string();
261 |         let bytes_written = disk_manager
262 |             .write_bytes_and_stream_to_disk(None, Some(stream), output_path)
263 |             .await
264 |             .expect("write_reader_to_disk failed");
265 |         assert_eq!(bytes_written, content.len());
266 | 
267 |         let (bytes_read, bytes) = disk_manager
268 |             .read_disk_all(output_path)
269 |             .await
270 |             .expect("read_disk_all failed");
271 |         assert_eq!(bytes_read, content.len());
272 |         assert_eq!(bytes, Bytes::from(content));
273 |         let file_size = disk_manager
274 |             .file_size(output_path)
275 |             .await
276 |             .expect("file_size failed");
277 |         assert_eq!(file_size, content.len() as u64);
278 |     }
279 | 
280 |     #[tokio::test]
281 |     async fn test_write_bytes_to_disk() {
282 |         let disk_manager = DiskManager {};
283 |         let tmp = tempfile::tempdir().unwrap();
284 |         let dir = tmp.path().to_owned();
285 |         let path = &dir.join("test_disk_manager4.txt").display().to_string();
286 |         let content1 = "Hello, world!";
287 |         let content2 = "Bye, CMU!";
288 |         let bytes_written = disk_manager
289 |             .write_bytes_and_stream_to_disk(
290 |                 Some(vec![Bytes::from(content1), Bytes::from(content2)]),
291 |                 None,
292 |                 path,
293 |             )
294 |             .await
295 |             .expect("write_bytes_to_disk failed");
296 |         assert_eq!(bytes_written, content1.len() + content2.len());
297 |         let (bytes_read, bytes) = disk_manager
298 |             .read_disk_all(path)
299 |             .await
300 |             .expect("read_disk_all failed");
301 |         assert_eq!(bytes_read, content1.len() + content2.len());
302 |         assert_eq!(bytes, Bytes::from(content1.to_owned() + content2));
303 |     }
304 | 
305 |     #[tokio::test]
306 |     async fn test_write_bytes_and_stream_to_disk() {
307 |         let disk_manager = DiskManager {};
308 |         let tmp = tempfile::tempdir().unwrap();
309 |         let dir = tmp.path().to_owned();
310 |         let path = &dir.join("test_disk_manager5.txt").display().to_string();
311 |         let content = "bhjoilkmnkbhaoijsdklmnjkbhiauosdjikbhjoilkmnkbhaoijsdklmnjkbhiauosdjik";
312 |         disk_manager
313 |             .write_disk_all(path, content.as_bytes())
314 |             .await
315 |             .expect("write_disk_all failed");
316 |         let mut stream = RandomDiskReadStream::new(path, 2, 4).unwrap().boxed();
317 | 
318 |         let mut bytes_vec: Vec<Bytes> = Vec::new();
319 |         for _ in 0..3 {
320 |             let stream_data = stream.next().await.unwrap().unwrap();
321 |             bytes_vec.push(stream_data);
322 |         }
323 | 
324 |         let output_path = &dir
325 |             .join("test_disk_manager5_output.txt")
326 |             .display()
327 |             .to_string();
328 |         let bytes_written = disk_manager
329 |             .write_bytes_and_stream_to_disk(Some(bytes_vec), Some(stream), output_path)
330 |             .await
331 |             .expect("write_reader_to_disk failed");
332 |         assert_eq!(bytes_written, content.len());
333 | 
334 |         let (bytes_read, bytes) = disk_manager
335 |             .read_disk_all(output_path)
336 |             .await
337 |             .expect("read_disk_all failed");
338 |         assert_eq!(bytes_read, content.len());
339 |         assert_eq!(bytes, Bytes::from(content));
340 |         let file_size = disk_manager
341 |             .file_size(output_path)
342 |             .await
343 |             .expect("file_size failed");
344 |         assert_eq!(file_size, content.len() as u64);
345 |     }
346 | 
347 |     #[tokio::test]
348 |     async fn test_remove_file() {
349 |         let disk_manager = DiskManager {};
350 |         let tmp = tempfile::tempdir().unwrap();
351 |         let dir = tmp.path().to_owned();
352 |         let path = &dir.join("test_disk_manager6.txt").display().to_string();
353 |         let content = "Hello, world!";
354 |         disk_manager
355 |             .write_disk_all(path, content.as_bytes())
356 |             .await
357 |             .expect("write_disk_all failed");
358 |         disk_manager
359 |             .remove_file(path)
360 |             .await
361 |             .expect("remove_file failed");
362 |         assert!(!Path::new(path).exists());
363 |     }
364 | }
365 | 


--------------------------------------------------------------------------------
/storage-node/src/disk/disk_manager_sync.rs:
--------------------------------------------------------------------------------
  1 | use bytes::{Bytes, BytesMut};
  2 | use std::fs::{self, File, OpenOptions};
  3 | use std::io::{self, Read, Seek, SeekFrom, Write};
  4 | use std::path::{Path, PathBuf};
  5 | 
  6 | use crate::error::ParpulseResult;
  7 | use crate::storage_manager::ParpulseReaderIterator;
  8 | 
  9 | /// [`DiskManagerSync`] contains the common logic to read from or write to a disk.
 10 | ///
 11 | /// TODO: Record statistics (maybe in statistics manager).
 12 | #[derive(Default)]
 13 | pub struct DiskManagerSync {}
 14 | 
 15 | // TODO: Make each method accepting `&self` instead of `&mut self`.
 16 | impl DiskManagerSync {
 17 |     pub fn open_or_create(&self, path: &str, append: bool) -> ParpulseResult<File> {
 18 |         let path_buf: PathBuf = PathBuf::from(path);
 19 |         if let Some(parent) = path_buf.parent() {
 20 |             if !parent.exists() {
 21 |                 fs::create_dir_all(parent)?;
 22 |             }
 23 |         }
 24 |         let mut options = OpenOptions::new();
 25 |         options.write(true);
 26 |         if !path_buf.exists() {
 27 |             options.create(true);
 28 |         }
 29 |         options.append(append);
 30 |         Ok(options.open(&path_buf)?)
 31 |     }
 32 | 
 33 |     // FIXME: `mut` allows future statistics computation
 34 |     pub fn write_disk_all(&mut self, path: &str, content: &[u8]) -> ParpulseResult<()> {
 35 |         // TODO: when path exists, we directly overwrite it, should we notify cache?
 36 |         let mut file = self.open_or_create(path, false)?;
 37 |         file.write_all(content)?;
 38 |         Ok(file.flush()?)
 39 |     }
 40 | 
 41 |     // FIXME: do we need to record statistics for read?
 42 |     pub fn read_disk_all(&self, path: &str) -> ParpulseResult<(usize, Bytes)> {
 43 |         let mut file = File::open(path)?;
 44 |         let mut buffer = Vec::with_capacity(file.metadata()?.len() as usize);
 45 |         let bytes_read = file.read_to_end(&mut buffer)?;
 46 |         Ok((bytes_read, Bytes::from(buffer)))
 47 |     }
 48 | 
 49 |     pub fn read_disk(
 50 |         &self,
 51 |         path: &str,
 52 |         start_pos: u64,
 53 |         bytes_to_read: usize,
 54 |     ) -> ParpulseResult<(usize, Bytes)> {
 55 |         let mut file = File::open(path)?;
 56 |         file.seek(SeekFrom::Start(start_pos))?;
 57 | 
 58 |         let mut buffer = vec![0; bytes_to_read];
 59 |         let bytes_read = file.read(&mut buffer)?;
 60 |         buffer.truncate(bytes_read);
 61 |         Ok((bytes_read, Bytes::from(buffer)))
 62 |     }
 63 | 
 64 |     // If needs to record statistics, use disk_read_iterator, if not, please directly new DiskReadIterator
 65 |     pub fn disk_read_iterator(
 66 |         &self,
 67 |         path: &str,
 68 |         buffer_size: usize,
 69 |     ) -> ParpulseResult<DiskReadIterator> {
 70 |         DiskReadIterator::new(path, buffer_size)
 71 |     }
 72 | 
 73 |     // FIXME: disk_path should not exist, otherwise throw an error
 74 |     pub fn write_iterator_reader_to_disk<T>(
 75 |         &mut self,
 76 |         mut iterator: T,
 77 |         disk_path: &str,
 78 |     ) -> ParpulseResult<usize>
 79 |     where
 80 |         T: ParpulseReaderIterator,
 81 |     {
 82 |         if Path::new(disk_path).exists() {
 83 |             return Err(io::Error::new(
 84 |                 io::ErrorKind::AlreadyExists,
 85 |                 "disk file to write already exists",
 86 |             )
 87 |             .into());
 88 |         }
 89 |         let mut file = self.open_or_create(disk_path, true)?;
 90 |         let mut bytes_written = 0;
 91 |         loop {
 92 |             match iterator.next() {
 93 |                 Some(Ok(bytes_read)) => {
 94 |                     let buffer = iterator.buffer();
 95 |                     file.write_all(&buffer[..bytes_read])?;
 96 |                     bytes_written += bytes_read;
 97 |                 }
 98 |                 Some(Err(e)) => return Err(e),
 99 |                 None => break,
100 |             }
101 |         }
102 |         // FIXME: do we need to flush?
103 |         file.flush()?;
104 |         Ok(bytes_written)
105 |     }
106 | 
107 |     pub fn file_size(&self, path: &str) -> ParpulseResult<u64> {
108 |         let metadata = fs::metadata(path)?;
109 |         Ok(metadata.len())
110 |     }
111 | 
112 |     pub fn remove_file(&mut self, path: &str) -> ParpulseResult<()> {
113 |         Ok(fs::remove_file(path)?)
114 |     }
115 | }
116 | 
117 | /// FIXME: iterator for sync, stream for async
118 | pub struct DiskReadIterator {
119 |     f: File,
120 |     pub buffer: BytesMut,
121 | }
122 | 
123 | impl DiskReadIterator {
124 |     pub fn new(file_path: &str, buffer_size: usize) -> ParpulseResult<Self> {
125 |         let f = File::open(file_path)?;
126 | 
127 |         Ok(DiskReadIterator {
128 |             f,
129 |             buffer: BytesMut::zeroed(buffer_size),
130 |         })
131 |     }
132 | }
133 | 
134 | impl Iterator for DiskReadIterator {
135 |     type Item = ParpulseResult<usize>;
136 | 
137 |     fn next(&mut self) -> Option<Self::Item> {
138 |         match self.f.read(self.buffer.as_mut()) {
139 |             Ok(bytes_read) => {
140 |                 if bytes_read > 0 {
141 |                     Some(Ok(bytes_read))
142 |                 } else {
143 |                     None
144 |                 }
145 |             }
146 |             Err(e) => Some(Err(e.into())),
147 |         }
148 |     }
149 | }
150 | 
151 | impl ParpulseReaderIterator for DiskReadIterator {
152 |     fn buffer(&self) -> &[u8] {
153 |         &self.buffer
154 |     }
155 | }
156 | 
157 | #[cfg(test)]
158 | mod tests {
159 |     use super::*;
160 |     #[test]
161 |     fn test_simple_write_read() {
162 |         let mut disk_manager = DiskManagerSync {};
163 |         let tmp = tempfile::tempdir().unwrap();
164 |         let dir = tmp.path().to_owned();
165 |         let path = &dir
166 |             .join("test_disk_manager_sync1.txt")
167 |             .display()
168 |             .to_string();
169 |         let content = "Hello, world!";
170 |         disk_manager
171 |             .write_disk_all(path, content.as_bytes())
172 |             .expect("write_disk_all failed");
173 |         let mut file = disk_manager
174 |             .open_or_create(path, true)
175 |             .expect("open_or_create failed");
176 |         file.write_all(content.as_bytes()).unwrap();
177 |         file.flush().unwrap();
178 | 
179 |         let file_size = disk_manager.file_size(path).expect("file_size failed");
180 |         assert_eq!(file_size, 2 * content.len() as u64);
181 | 
182 |         let (bytes_read, bytes) = disk_manager
183 |             .read_disk_all(path)
184 |             .expect("read_disk_all failed");
185 |         assert_eq!(bytes_read, 2 * content.len());
186 |         assert_eq!(bytes, Bytes::from(content.to_owned() + content));
187 | 
188 |         let (bytes_read, bytes) = disk_manager
189 |             .read_disk(path, content.len() as u64, content.len())
190 |             .expect("read_disk_all failed");
191 |         assert_eq!(bytes_read, content.len());
192 |         assert_eq!(bytes, Bytes::from(content));
193 |     }
194 | 
195 |     #[test]
196 |     fn test_iterator_read() {
197 |         let mut disk_manager = DiskManagerSync {};
198 |         let tmp = tempfile::tempdir().unwrap();
199 |         let dir = tmp.path().to_owned();
200 |         let path = &dir
201 |             .join("test_disk_manager_sync2.txt")
202 |             .display()
203 |             .to_string();
204 |         let content = "bhjoilkmnkbhaoijsdklmnjkbhiauosdjikbhjoilkmnkbhaoijsdklmnjkbhiauosdjik";
205 |         disk_manager
206 |             .write_disk_all(path, content.as_bytes())
207 |             .expect("write_disk_all failed");
208 |         let mut iterator = disk_manager
209 |             .disk_read_iterator(path, 2)
210 |             .expect("disk_read_iterator failed");
211 |         let mut start_pos = 0;
212 |         loop {
213 |             if start_pos >= content.len() {
214 |                 break;
215 |             }
216 |             let bytes_read = iterator
217 |                 .next()
218 |                 .expect("iterator early ended")
219 |                 .expect("iterator read failed");
220 |             let buffer = iterator.buffer();
221 |             assert_eq!(
222 |                 &content.as_bytes()[start_pos..start_pos + bytes_read],
223 |                 &buffer[..bytes_read]
224 |             );
225 |             start_pos += bytes_read;
226 |         }
227 |         assert_eq!(start_pos, content.len());
228 |     }
229 | 
230 |     #[test]
231 |     fn test_write_iterator_reader_to_disk() {
232 |         let mut disk_manager = DiskManagerSync {};
233 |         let tmp = tempfile::tempdir().unwrap();
234 |         let dir = tmp.path().to_owned();
235 |         let path = &dir
236 |             .join("test_disk_manager_sync3.txt")
237 |             .display()
238 |             .to_string();
239 |         let content = "bhjoilkmnkbhaoijsdklmnjkbhiauosdjikbhjoilkmnkbhaoijsdklmnjkbhiauosdjik";
240 |         disk_manager
241 |             .write_disk_all(path, content.as_bytes())
242 |             .expect("write_disk_all failed");
243 |         let iterator = disk_manager
244 |             .disk_read_iterator(path, 1)
245 |             .expect("disk_read_iterator failed");
246 |         let output_path = &dir
247 |             .join("test_disk_manager3_output.txt")
248 |             .display()
249 |             .to_string();
250 |         let bytes_written = disk_manager
251 |             .write_iterator_reader_to_disk::<DiskReadIterator>(iterator, output_path)
252 |             .expect("write_reader_to_disk failed");
253 |         assert_eq!(bytes_written, content.len());
254 | 
255 |         let (bytes_read, bytes) = disk_manager
256 |             .read_disk_all(output_path)
257 |             .expect("read_disk_all failed");
258 |         assert_eq!(bytes_read, content.len());
259 |         assert_eq!(bytes, Bytes::from(content));
260 |         let file_size = disk_manager
261 |             .file_size(output_path)
262 |             .expect("file_size failed");
263 |         assert_eq!(file_size, content.len() as u64);
264 |     }
265 | 
266 |     #[test]
267 |     fn test_remove_file() {
268 |         let mut disk_manager = DiskManagerSync {};
269 |         let tmp = tempfile::tempdir().unwrap();
270 |         let dir = tmp.path().to_owned();
271 |         let path = &dir
272 |             .join("test_disk_manager_sync5.txt")
273 |             .display()
274 |             .to_string();
275 |         let content = "Hello, world!";
276 |         disk_manager
277 |             .write_disk_all(path, content.as_bytes())
278 |             .expect("write_disk_all failed");
279 |         disk_manager.remove_file(path).expect("remove_file failed");
280 |         assert!(!Path::new(path).exists());
281 |     }
282 | }
283 | 


--------------------------------------------------------------------------------
/storage-node/src/disk/mod.rs:
--------------------------------------------------------------------------------
1 | pub mod disk_manager;
2 | pub mod disk_manager_sync;
3 | pub mod stream;
4 | 


--------------------------------------------------------------------------------
/storage-node/src/disk/stream.rs:
--------------------------------------------------------------------------------
  1 | use bytes::{Bytes, BytesMut};
  2 | use futures::{FutureExt, Stream};
  3 | 
  4 | use rand::Rng;
  5 | use std::ops::DerefMut;
  6 | use std::pin::Pin;
  7 | use std::task::{Context, Poll};
  8 | use tokio::fs::File;
  9 | use tokio::io::AsyncReadExt;
 10 | 
 11 | use crate::error::{ParpulseError, ParpulseResult};
 12 | 
 13 | /// [`DiskReadStream`] reads data from disk as a stream.
 14 | pub struct DiskReadStream {
 15 |     /// The file to read from.
 16 |     f: File,
 17 |     /// Contains the data read from the file.
 18 |     /// Note that the buffer may not be fully filled with data read from the file.
 19 |     buffer: BytesMut,
 20 | }
 21 | 
 22 | impl DiskReadStream {
 23 |     pub fn new_sync(file_path: &str, buffer_size: usize) -> ParpulseResult<Self> {
 24 |         let f: std::fs::File = std::fs::File::open(file_path)?;
 25 | 
 26 |         Ok(DiskReadStream {
 27 |             f: File::from_std(f),
 28 |             buffer: BytesMut::zeroed(buffer_size),
 29 |         })
 30 |     }
 31 | 
 32 |     pub async fn new(file_path: &str, buffer_size: usize) -> ParpulseResult<Self> {
 33 |         let f = File::open(file_path).await?;
 34 | 
 35 |         Ok(DiskReadStream {
 36 |             f,
 37 |             buffer: BytesMut::zeroed(buffer_size),
 38 |         })
 39 |     }
 40 | 
 41 |     pub fn buffer(&self) -> &[u8] {
 42 |         &self.buffer
 43 |     }
 44 | }
 45 | 
 46 | impl Stream for DiskReadStream {
 47 |     type Item = ParpulseResult<usize>;
 48 | 
 49 |     fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<Self::Item>> {
 50 |         let deref_self = self.deref_mut();
 51 |         match deref_self
 52 |             .f
 53 |             .read(deref_self.buffer.as_mut())
 54 |             .boxed()
 55 |             .poll_unpin(cx)
 56 |         {
 57 |             Poll::Ready(Ok(bytes_read)) => {
 58 |                 if bytes_read > 0 {
 59 |                     Poll::Ready(Some(Ok(bytes_read)))
 60 |                 } else {
 61 |                     Poll::Ready(None)
 62 |                 }
 63 |             }
 64 |             Poll::Ready(Err(e)) => Poll::Ready(Some(Err(e.into()))),
 65 |             Poll::Pending => Poll::Pending,
 66 |         }
 67 |     }
 68 | }
 69 | 
 70 | /// [`RandomDiskReadStream`] is used by `MockS3Reader` to simulate the read from S3.
 71 | /// Since every time we pull data from S3, the number of bytes read is random, we
 72 | /// need to simulate this behavior.
 73 | ///
 74 | /// NOTE: The byte range here is only a hint. Due to the implementation of tokio's
 75 | /// `AsyncReadExt` trait, the actual number of bytes read may be less than `min_read_bytes`.
 76 | /// It is acceptable here because we just use this `RandomDiskReadStream` to simulate
 77 | /// the read from S3.
 78 | ///
 79 | /// `RandomDiskReadStream` should only be used for testing purposes.
 80 | pub struct RandomDiskReadStream {
 81 |     f: File,
 82 |     min_read_bytes: usize,
 83 |     max_read_bytes: usize,
 84 |     buffer: BytesMut,
 85 | }
 86 | 
 87 | impl RandomDiskReadStream {
 88 |     pub fn new(
 89 |         file_path: &str,
 90 |         min_read_bytes: usize,
 91 |         max_read_bytes: usize,
 92 |     ) -> ParpulseResult<Self> {
 93 |         let f: std::fs::File = std::fs::File::open(file_path)?;
 94 |         if min_read_bytes >= max_read_bytes {
 95 |             return Err(ParpulseError::Internal(
 96 |                 "`min_read_bytes` must be less than `max_read_bytes` in `RandomDiskReadStream`"
 97 |                     .to_string(),
 98 |             ));
 99 |         }
100 | 
101 |         Ok(RandomDiskReadStream {
102 |             f: File::from_std(f),
103 |             min_read_bytes,
104 |             max_read_bytes,
105 |             buffer: BytesMut::new(),
106 |         })
107 |     }
108 | }
109 | 
110 | impl Stream for RandomDiskReadStream {
111 |     type Item = ParpulseResult<Bytes>;
112 | 
113 |     fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<Self::Item>> {
114 |         let bytes_to_read = rand::thread_rng().gen_range(self.min_read_bytes..self.max_read_bytes);
115 |         self.buffer.resize(bytes_to_read, 0);
116 |         let deref_self = self.deref_mut();
117 | 
118 |         let read_result = deref_self
119 |             .f
120 |             .read(deref_self.buffer.as_mut())
121 |             .boxed()
122 |             .poll_unpin(cx);
123 |         match read_result {
124 |             Poll::Ready(Ok(bytes_read)) => {
125 |                 if bytes_read > 0 {
126 |                     // Though we have resized the buffer to `bytes_to_read` before, tokio's
127 |                     // implementation doesn't ensure that `bytes_to_read` bytes have been read
128 |                     // into the buffer. It's likely that fewer bytes have been read. So we
129 |                     // truncate the buffer to the actual number of bytes read here.
130 |                     deref_self.buffer.truncate(bytes_read);
131 |                     Poll::Ready(Some(Ok(deref_self.buffer.clone().freeze())))
132 |                 } else {
133 |                     Poll::Ready(None)
134 |                 }
135 |             }
136 |             Poll::Ready(Err(e)) => Poll::Ready(Some(Err(e.into()))),
137 |             Poll::Pending => Poll::Pending,
138 |         }
139 |     }
140 | }
141 | 
142 | #[cfg(test)]
143 | mod tests {
144 |     use super::*;
145 |     use futures::stream::StreamExt;
146 | 
147 |     #[tokio::test]
148 |     async fn test_disk_read_stream() {
149 |         let poem = "What can I hold you with?
150 | I offer you lean streets, desperate sunsets, the
151 | moon of the jagged suburbs.
152 | I offer you the bitterness of a man who has looked
153 | long and long at the lonely moon.
154 | I offer you my ancestors, my dead men, the ghosts
155 | that living men have honoured in bronze.
156 | I offer you whatever insight my books may hold,
157 | whatever manliness or humour my life.
158 | I offer you the loyalty of a man who has never
159 | been loyal.
160 | I offer you that kernel of myself that I have saved,
161 | somehow-the central heart that deals not
162 | in words, traffics not with dreams, and is
163 | untouched by time, by joy, by adversities.
164 | I offer you the memory of a yellow rose seen at
165 | sunset, years before you were born.
166 | I offer you explanations of yourself, theories about
167 | yourself, authentic and surprising news of
168 | yourself.
169 | I can give you my loneliness, my darkness, the
170 | hunger of my heart; I am trying to bribe you
171 | with uncertainty, with danger, with defeat.
172 | ";
173 | 
174 |         let buffer_size = 102;
175 |         let mut disk_read_stream =
176 |             DiskReadStream::new("tests/text/what-can-i-hold-you-with", buffer_size)
177 |                 .await
178 |                 .unwrap();
179 | 
180 |         let mut total_bytes_read = 0;
181 |         let mut read_count = 0;
182 |         let mut result = String::new();
183 |         while let Some(bytes_read) = disk_read_stream.next().await {
184 |             let bytes_read = bytes_read.unwrap();
185 |             result += &String::from_utf8(disk_read_stream.buffer()[..bytes_read].to_vec()).unwrap();
186 |             total_bytes_read += bytes_read;
187 |             read_count += 1;
188 |         }
189 | 
190 |         assert_eq!(result, poem);
191 |         assert_eq!(total_bytes_read, 930);
192 |         assert_eq!(read_count, 10);
193 |     }
194 | 
195 |     #[tokio::test]
196 |     async fn test_random_disk_read_stream() {
197 |         let poem = "What can I hold you with?
198 | I offer you lean streets, desperate sunsets, the
199 | moon of the jagged suburbs.
200 | I offer you the bitterness of a man who has looked
201 | long and long at the lonely moon.
202 | I offer you my ancestors, my dead men, the ghosts
203 | that living men have honoured in bronze.
204 | I offer you whatever insight my books may hold,
205 | whatever manliness or humour my life.
206 | I offer you the loyalty of a man who has never
207 | been loyal.
208 | I offer you that kernel of myself that I have saved,
209 | somehow-the central heart that deals not
210 | in words, traffics not with dreams, and is
211 | untouched by time, by joy, by adversities.
212 | I offer you the memory of a yellow rose seen at
213 | sunset, years before you were born.
214 | I offer you explanations of yourself, theories about
215 | yourself, authentic and surprising news of
216 | yourself.
217 | I can give you my loneliness, my darkness, the
218 | hunger of my heart; I am trying to bribe you
219 | with uncertainty, with danger, with defeat.
220 | ";
221 | 
222 |         let mut random_disk_read_stream =
223 |             RandomDiskReadStream::new("tests/text/what-can-i-hold-you-with", 150, 250).unwrap();
224 | 
225 |         let mut total_bytes_read = 0;
226 |         let mut result = String::new();
227 |         while let Some(bytes) = random_disk_read_stream.next().await {
228 |             let bytes = bytes.unwrap();
229 |             total_bytes_read += bytes.len();
230 |             result += &String::from_utf8(bytes.to_vec()).unwrap();
231 |         }
232 | 
233 |         assert_eq!(result, poem);
234 |         assert_eq!(total_bytes_read, 930);
235 |     }
236 | }
237 | 


--------------------------------------------------------------------------------
/storage-node/src/error.rs:
--------------------------------------------------------------------------------
 1 | use aws_sdk_s3::primitives::ByteStreamError;
 2 | use thiserror::Error;
 3 | 
 4 | #[derive(Debug, Error)]
 5 | pub enum ParpulseError {
 6 |     #[error("Disk error: {0}")]
 7 |     Disk(#[source] std::io::Error),
 8 |     #[error("S3 error: {0}")]
 9 |     S3(#[source] Box<dyn std::error::Error>),
10 |     #[error("Internal error: {0}")]
11 |     Internal(String),
12 |     #[error("SQLite error: {0}")]
13 |     Sqlite(#[source] rusqlite::Error),
14 | }
15 | 
16 | impl From<std::io::Error> for ParpulseError {
17 |     fn from(e: std::io::Error) -> Self {
18 |         ParpulseError::Disk(e)
19 |     }
20 | }
21 | 
22 | impl<E, R> From<aws_smithy_runtime_api::client::result::SdkError<E, R>> for ParpulseError
23 | where
24 |     E: std::error::Error + Send + Sync + 'static,
25 |     R: std::fmt::Debug + Send + Sync + 'static,
26 | {
27 |     fn from(e: aws_smithy_runtime_api::client::result::SdkError<E, R>) -> Self {
28 |         ParpulseError::S3(Box::new(e))
29 |     }
30 | }
31 | 
32 | impl From<ByteStreamError> for ParpulseError {
33 |     fn from(e: ByteStreamError) -> Self {
34 |         ParpulseError::Internal(e.to_string())
35 |     }
36 | }
37 | 
38 | impl From<rusqlite::Error> for ParpulseError {
39 |     fn from(e: rusqlite::Error) -> Self {
40 |         ParpulseError::Sqlite(e)
41 |     }
42 | }
43 | 
44 | pub type ParpulseResult<T> = std::result::Result<T, ParpulseError>;
45 | 
46 | unsafe impl Send for ParpulseError {}
47 | unsafe impl Sync for ParpulseError {}
48 | 


--------------------------------------------------------------------------------
/storage-node/src/lib.rs:
--------------------------------------------------------------------------------
 1 | #![allow(clippy::new_without_default)]
 2 | 
 3 | pub mod cache;
 4 | pub mod common;
 5 | pub mod disk;
 6 | pub mod error;
 7 | pub mod server;
 8 | pub mod storage_manager;
 9 | pub mod storage_reader;
10 | 


--------------------------------------------------------------------------------
/storage-node/src/server.rs:
--------------------------------------------------------------------------------
  1 | use log::{info, warn};
  2 | use parpulse_client::{RequestParams, S3Request};
  3 | use std::net::IpAddr;
  4 | use std::sync::Arc;
  5 | use tokio_stream::wrappers::ReceiverStream;
  6 | use warp::{Filter, Rejection};
  7 | 
  8 | use crate::{
  9 |     cache::{
 10 |         data_store_cache::{memdisk::MemDiskStoreCache, sqlite::SqliteStoreCache},
 11 |         replacer::{lru::LruReplacer, lru_k::LruKReplacer},
 12 |     },
 13 |     common::config::{ParpulseConfig, ParpulseConfigCachePolicy, ParpulseConfigDataStore},
 14 |     error::ParpulseResult,
 15 |     storage_manager::{StorageManager, StorageManagerImpl},
 16 | };
 17 | 
 18 | const CACHE_BASE_PATH: &str = "parpulse-cache";
 19 | const DEFAULT_DATA_STORE_CACHE_NUM: usize = 3;
 20 | const DEFAULT_MEM_CACHE_SIZE: usize = 100 * 1024;
 21 | const DEFAULT_DISK_CACHE_SIZE: usize = 1024 * 1024 * 1024;
 22 | const DEFAULT_SQLITE_CACHE_SIZE: usize = 200 * 1024 * 1024;
 23 | const DEFAULT_MEM_CACHE_MAX_FILE_SIZE: usize = 10 * 1024 * 1024 + 1;
 24 | const DEFAULT_LRU_K_VALUE: usize = 2;
 25 | const DEFAULT_MAX_DISK_READER_BUFFER_SIZE: usize = 100 * 1024 * 1024;
 26 | const DEFAULT_SQLITE_BLOB_READER_BUFFER_SIZE: usize = 1024;
 27 | 
 28 | async fn route(storage_manager: Arc<impl StorageManager + 'static>, ip_addr: &str, port: u16) {
 29 |     let route = warp::path!("file")
 30 |         .and(warp::path::end())
 31 |         .and(warp::query::<S3Request>())
 32 |         .and_then(move |params: S3Request| {
 33 |             let storage_manager = storage_manager.clone();
 34 |             if params.is_test {
 35 |                 info!(
 36 |                     "Received test request for bucket: {}, keys: {:?}",
 37 |                     params.bucket, params.keys
 38 |                 );
 39 |             } else {
 40 |                 info!(
 41 |                     "Received request for bucket: {}, keys: {:?}",
 42 |                     params.bucket, params.keys
 43 |                 );
 44 |             }
 45 |             async move {
 46 |                 let bucket = params.bucket;
 47 |                 let keys = params.keys;
 48 |                 let request = if params.is_test {
 49 |                     RequestParams::MockS3((bucket, vec![keys]))
 50 |                 } else {
 51 |                     RequestParams::S3((bucket, vec![keys]))
 52 |                 };
 53 | 
 54 |                 let result = storage_manager.get_data(request).await;
 55 |                 match result {
 56 |                     Ok(data_rx) => {
 57 |                         let stream = ReceiverStream::new(data_rx);
 58 |                         let body = warp::hyper::Body::wrap_stream(stream);
 59 |                         let response = warp::http::Response::builder()
 60 |                             .header("Content-Type", "text/plain")
 61 |                             .body(body)
 62 |                             .unwrap();
 63 |                         Ok::<_, Rejection>(warp::reply::with_status(
 64 |                             response,
 65 |                             warp::http::StatusCode::OK,
 66 |                         ))
 67 |                     }
 68 |                     Err(e) => {
 69 |                         let error_message = format!("Failed to get data: {}", e);
 70 |                         let response = warp::http::Response::builder()
 71 |                             .status(warp::http::StatusCode::INTERNAL_SERVER_ERROR)
 72 |                             .body(error_message.into())
 73 |                             .unwrap();
 74 |                         Ok::<_, Rejection>(warp::reply::with_status(
 75 |                             response,
 76 |                             warp::http::StatusCode::INTERNAL_SERVER_ERROR,
 77 |                         ))
 78 |                     }
 79 |                 }
 80 |             }
 81 |         });
 82 | 
 83 |     let heartbeat = warp::path!("heartbeat").map(|| warp::http::StatusCode::OK);
 84 | 
 85 |     // Catch a request that does not match any of the routes above.
 86 |     let catch_all = warp::any()
 87 |         .and(warp::path::full())
 88 |         .map(|path: warp::path::FullPath| {
 89 |             warn!("Catch all route hit. Path: {}", path.as_str());
 90 |             warp::http::StatusCode::NOT_FOUND
 91 |         });
 92 | 
 93 |     let routes = route.or(heartbeat).or(catch_all);
 94 |     let ip_addr: IpAddr = ip_addr.parse().unwrap();
 95 |     warp::serve(routes).run((ip_addr, port)).await;
 96 | }
 97 | 
 98 | pub async fn storage_node_serve(
 99 |     ip_addr: &str,
100 |     port: u16,
101 |     config: ParpulseConfig,
102 | ) -> ParpulseResult<()> {
103 |     let data_store_cache_num = config
104 |         .data_store_cache_num
105 |         .unwrap_or(DEFAULT_DATA_STORE_CACHE_NUM);
106 |     match config.data_store {
107 |         ParpulseConfigDataStore::Memdisk => {
108 |             let disk_cache_size = config.disk_cache_size.unwrap_or(DEFAULT_DISK_CACHE_SIZE);
109 |             let mem_cache_size = config.mem_cache_size.unwrap_or(DEFAULT_MEM_CACHE_SIZE);
110 |             let mem_cache_file_size = config
111 |                 .mem_cache_file_size
112 |                 .unwrap_or(DEFAULT_MEM_CACHE_MAX_FILE_SIZE);
113 |             let max_disk_reader_buffer_size = config
114 |                 .max_disk_reader_buffer_size
115 |                 .unwrap_or(DEFAULT_MAX_DISK_READER_BUFFER_SIZE);
116 |             let cache_base_path = config.cache_path.unwrap_or(CACHE_BASE_PATH.to_string());
117 |             match config.cache_policy {
118 |                 ParpulseConfigCachePolicy::Lru => {
119 |                     info!("starting storage node with {} mem-disk cache(s) and LRU cache policy, disk cache size: {}, mem cache size: {}, mem cache file size: {}, max disk reader buffer size: {}", data_store_cache_num, disk_cache_size, mem_cache_size, mem_cache_file_size, max_disk_reader_buffer_size);
120 |                     let mut data_store_caches = Vec::new();
121 |                     for i in 0..data_store_cache_num {
122 |                         let disk_replacer = LruReplacer::new(disk_cache_size);
123 |                         let mem_replacer = LruReplacer::new(mem_cache_size);
124 |                         let data_store_cache = MemDiskStoreCache::new(
125 |                             disk_replacer,
126 |                             i.to_string() + &cache_base_path,
127 |                             Some(mem_replacer),
128 |                             Some(mem_cache_file_size),
129 |                             max_disk_reader_buffer_size,
130 |                         );
131 |                         data_store_caches.push(data_store_cache);
132 |                     }
133 |                     let storage_manager = Arc::new(StorageManagerImpl::new(data_store_caches));
134 |                     route(storage_manager, ip_addr, port).await;
135 |                 }
136 |                 ParpulseConfigCachePolicy::Lruk => {
137 |                     info!("starting storage node with {} mem-disk cache(s) and LRU-K cache policy, disk cache size: {}, mem cache size: {}, mem cache file size: {}, max disk reader buffer size: {}", data_store_cache_num, disk_cache_size, mem_cache_size, mem_cache_file_size, max_disk_reader_buffer_size);
138 |                     let mut data_store_caches = Vec::new();
139 |                     let k = config.cache_lru_k.unwrap_or(DEFAULT_LRU_K_VALUE);
140 |                     for i in 0..data_store_cache_num {
141 |                         let disk_replacer = LruKReplacer::new(disk_cache_size, k);
142 |                         let mem_replacer = LruKReplacer::new(mem_cache_size, k);
143 |                         let data_store_cache = MemDiskStoreCache::new(
144 |                             disk_replacer,
145 |                             i.to_string() + &cache_base_path,
146 |                             Some(mem_replacer),
147 |                             Some(mem_cache_file_size),
148 |                             max_disk_reader_buffer_size,
149 |                         );
150 |                         data_store_caches.push(data_store_cache);
151 |                     }
152 |                     let storage_manager = Arc::new(StorageManagerImpl::new(data_store_caches));
153 |                     route(storage_manager, ip_addr, port).await;
154 |                 }
155 |             };
156 |         }
157 |         ParpulseConfigDataStore::Disk => {
158 |             let disk_cache_size = config.disk_cache_size.unwrap_or(DEFAULT_DISK_CACHE_SIZE);
159 |             let cache_base_path = config.cache_path.unwrap_or(CACHE_BASE_PATH.to_string());
160 |             let max_disk_reader_buffer_size = config
161 |                 .max_disk_reader_buffer_size
162 |                 .unwrap_or(DEFAULT_MAX_DISK_READER_BUFFER_SIZE);
163 |             match config.cache_policy {
164 |                 ParpulseConfigCachePolicy::Lru => {
165 |                     info!("starting storage node with {} disk-only cache(s) and LRU cache policy, disk cache size: {}, max disk reader buffer size: {}", data_store_cache_num, disk_cache_size, max_disk_reader_buffer_size);
166 |                     let mut data_store_caches = Vec::new();
167 |                     for i in 0..data_store_cache_num {
168 |                         let disk_replacer = LruReplacer::new(disk_cache_size);
169 |                         let data_store_cache = MemDiskStoreCache::new(
170 |                             disk_replacer,
171 |                             i.to_string() + &cache_base_path,
172 |                             None,
173 |                             None,
174 |                             max_disk_reader_buffer_size,
175 |                         );
176 |                         data_store_caches.push(data_store_cache);
177 |                     }
178 |                     let storage_manager = Arc::new(StorageManagerImpl::new(data_store_caches));
179 |                     route(storage_manager, ip_addr, port).await;
180 |                 }
181 |                 ParpulseConfigCachePolicy::Lruk => {
182 |                     info!("starting storage node with {} disk-only cache(s) and LRU-K cache policy, disk cache size: {}, max disk reader buffer size: {}", data_store_cache_num, disk_cache_size, max_disk_reader_buffer_size);
183 |                     let mut data_store_caches = Vec::new();
184 |                     let k = config.cache_lru_k.unwrap_or(DEFAULT_LRU_K_VALUE);
185 |                     for i in 0..data_store_cache_num {
186 |                         let disk_replacer = LruKReplacer::new(disk_cache_size, k);
187 |                         let data_store_cache = MemDiskStoreCache::new(
188 |                             disk_replacer,
189 |                             i.to_string() + &cache_base_path,
190 |                             None,
191 |                             None,
192 |                             max_disk_reader_buffer_size,
193 |                         );
194 |                         data_store_caches.push(data_store_cache);
195 |                     }
196 |                     let storage_manager = Arc::new(StorageManagerImpl::new(data_store_caches));
197 |                     route(storage_manager, ip_addr, port).await;
198 |                 }
199 |             }
200 |         }
201 |         ParpulseConfigDataStore::Sqlite => {
202 |             let sqlite_base_path =
203 |                 config.cache_path.unwrap_or(CACHE_BASE_PATH.to_string()) + "sqlite.db3";
204 |             let sqlite_cache_size = config.mem_cache_size.unwrap_or(DEFAULT_SQLITE_CACHE_SIZE);
205 |             let sqlite_blob_reader_buffer_size = config
206 |                 .sqlite_blob_reader_buffer_size
207 |                 .unwrap_or(DEFAULT_SQLITE_BLOB_READER_BUFFER_SIZE);
208 |             match config.cache_policy {
209 |                 ParpulseConfigCachePolicy::Lru => {
210 |                     info!("starting storage node with {} sqlite cache(s) and LRU cache policy, cache size: {}, blob reader buffer size: {}", data_store_cache_num, sqlite_cache_size, sqlite_blob_reader_buffer_size);
211 |                     let mut data_store_caches = Vec::new();
212 |                     for i in 0..data_store_cache_num {
213 |                         let replacer = LruReplacer::new(sqlite_cache_size);
214 |                         let sqlite_data_cache = SqliteStoreCache::new(
215 |                             replacer,
216 |                             i.to_string() + &sqlite_base_path,
217 |                             sqlite_blob_reader_buffer_size,
218 |                         )?;
219 |                         data_store_caches.push(sqlite_data_cache);
220 |                     }
221 |                     let storage_manager = Arc::new(StorageManagerImpl::new(data_store_caches));
222 |                     route(storage_manager, ip_addr, port).await;
223 |                 }
224 |                 ParpulseConfigCachePolicy::Lruk => {
225 |                     info!("starting storage node with {} sqlite cache(s) and LRU-K cache policy, cache size: {}, blob reader buffer size: {}", data_store_cache_num, sqlite_cache_size, sqlite_blob_reader_buffer_size);
226 |                     let k = config.cache_lru_k.unwrap_or(DEFAULT_LRU_K_VALUE);
227 |                     let mut data_store_caches = Vec::new();
228 |                     for i in 0..data_store_cache_num {
229 |                         let replacer = LruKReplacer::new(sqlite_cache_size, k);
230 |                         let sqlite_data_cache = SqliteStoreCache::new(
231 |                             replacer,
232 |                             i.to_string() + &sqlite_base_path,
233 |                             sqlite_blob_reader_buffer_size,
234 |                         )?;
235 |                         data_store_caches.push(sqlite_data_cache);
236 |                     }
237 |                     let storage_manager = Arc::new(StorageManagerImpl::new(data_store_caches));
238 |                     route(storage_manager, ip_addr, port).await;
239 |                 }
240 |             }
241 |         }
242 |     };
243 | 
244 |     Ok(())
245 | }
246 | 
247 | #[cfg(test)]
248 | mod tests {
249 | 
250 |     use super::*;
251 |     use reqwest::Client;
252 |     use std::fs;
253 |     use std::io::Write;
254 |     use tempfile::tempdir;
255 | 
256 |     /// WARNING: Put userdata1.parquet in the storage-node/tests/parquet directory before running this test.
257 |     #[tokio::test]
258 |     #[allow(clippy::field_reassign_with_default)]
259 |     async fn test_server() {
260 |         let original_file_path = "tests/parquet/userdata1.parquet";
261 |         let mut config = ParpulseConfig::default();
262 |         config.data_store_cache_num = Some(6);
263 |         // Start the server
264 |         let server_handle = tokio::spawn(async move {
265 |             storage_node_serve("127.0.0.1", 3030, config).await.unwrap();
266 |         });
267 | 
268 |         // Give the server some time to start
269 |         tokio::time::sleep(std::time::Duration::from_secs(1)).await;
270 | 
271 |         // Test1: test_download_file
272 |         let url =
273 |             "http://localhost:3030/file?bucket=tests-parquet&keys=userdata1.parquet&is_test=true";
274 |         let client = Client::new();
275 |         let mut response = client
276 |             .get(url)
277 |             .send()
278 |             .await
279 |             .expect("Failed to get response from the server.");
280 |         assert!(
281 |             response.status().is_success(),
282 |             "Failed to download file. Status code: {}",
283 |             response.status()
284 |         );
285 | 
286 |         let temp_dir = tempdir().unwrap();
287 |         let file_path = temp_dir.path().join("userdata1.parquet");
288 |         let mut file = fs::File::create(&file_path).unwrap();
289 | 
290 |         // Stream the response body and write to the file
291 |         while let Some(chunk) = response.chunk().await.unwrap() {
292 |             file.write_all(&chunk).unwrap();
293 |         }
294 |         assert!(file_path.exists(), "File not found after download");
295 | 
296 |         // Check if file sizes are equal
297 |         assert_eq!(
298 |             fs::metadata(original_file_path).unwrap().len(),
299 |             fs::metadata(file_path.clone()).unwrap().len()
300 |         );
301 | 
302 |         assert_eq!(fs::metadata(file_path).unwrap().len(), 113629);
303 | 
304 |         // Test2: test_file_not_exist
305 |         let url =
306 |             "http://localhost:3030/file?bucket=tests-parquet&keys=not_exist.parquet&is_test=true";
307 |         let client = Client::new();
308 |         let response = client
309 |             .get(url)
310 |             .send()
311 |             .await
312 |             .expect("Failed to get response from the server.");
313 | 
314 |         assert!(
315 |             response.status().is_server_error(),
316 |             "Expected 500 status code"
317 |         );
318 | 
319 |         server_handle.abort();
320 |     }
321 | }
322 | 


--------------------------------------------------------------------------------
/storage-node/src/storage_manager.rs:
--------------------------------------------------------------------------------
  1 | use crate::{
  2 |     cache::data_store_cache::{cache_key_from_request, DataStoreCache},
  3 |     common::hash::calculate_hash_crc32fast,
  4 |     error::ParpulseResult,
  5 | };
  6 | 
  7 | use async_trait::async_trait;
  8 | use bytes::Bytes;
  9 | use log::debug;
 10 | use parpulse_client::RequestParams;
 11 | use tokio::sync::mpsc::Receiver;
 12 | 
 13 | #[async_trait]
 14 | pub trait StorageManager: Send + Sync {
 15 |     async fn get_data(
 16 |         &self,
 17 |         request: RequestParams,
 18 |     ) -> ParpulseResult<Receiver<ParpulseResult<Bytes>>>;
 19 | }
 20 | 
 21 | /// [`StorageManager`] handles the request from the storage client.
 22 | ///
 23 | /// We should allow concurrent requests fed into the storage manager,
 24 | /// which should be responsible for handling multiple requests at the
 25 | /// same time.
 26 | pub struct StorageManagerImpl<C: DataStoreCache> {
 27 |     /// We don't use lock here because `data_store_cache` itself should handle the concurrency.
 28 |     data_store_caches: Vec<C>,
 29 | }
 30 | 
 31 | impl<C: DataStoreCache> StorageManagerImpl<C> {
 32 |     pub fn new(data_store_caches: Vec<C>) -> Self {
 33 |         Self { data_store_caches }
 34 |     }
 35 | 
 36 |     pub async fn get_data(
 37 |         &self,
 38 |         request: RequestParams,
 39 |     ) -> ParpulseResult<Receiver<ParpulseResult<Bytes>>> {
 40 |         // 1. Try to get data from the cache first.
 41 |         // 2. If cache miss, then go to storage reader to fetch the data from
 42 |         // the underlying storage.
 43 |         // 3. If needed, update the cache with the data fetched from the storage reader.
 44 | 
 45 |         // TODO: Support more request types.
 46 | 
 47 |         // FIXME: Cache key should be <bucket + key>. Might refactor the underlying S3
 48 |         // reader as one S3 key for one reader.
 49 |         let cache_key = cache_key_from_request(&request);
 50 |         let hash = calculate_hash_crc32fast(cache_key.as_bytes());
 51 |         let cache_index = hash % self.data_store_caches.len();
 52 |         let data_store_cache = self.data_store_caches.get(cache_index).unwrap();
 53 | 
 54 |         debug!(
 55 |             "For cache key: {}, the corresponding data_store_cache index {}",
 56 |             cache_key, cache_index
 57 |         );
 58 | 
 59 |         let data_rx = data_store_cache.get_data_from_cache(&request).await?;
 60 |         if let Some(data_rx) = data_rx {
 61 |             Ok(data_rx)
 62 |         } else {
 63 |             data_store_cache.put_data_to_cache(&request).await?;
 64 |             // TODO (kunle): Push down the response writer rather than calling get_data_from_cache again.
 65 |             let data_rx = data_store_cache.get_data_from_cache(&request).await?;
 66 |             if data_rx.is_none() {
 67 |                 panic!("Data should be in the cache now. {}", cache_key.clone());
 68 |             }
 69 |             Ok(data_rx.unwrap())
 70 |         }
 71 |     }
 72 | }
 73 | 
 74 | #[async_trait]
 75 | impl<C: DataStoreCache + Send + Sync> StorageManager for StorageManagerImpl<C> {
 76 |     async fn get_data(
 77 |         &self,
 78 |         request: RequestParams,
 79 |     ) -> ParpulseResult<Receiver<ParpulseResult<Bytes>>> {
 80 |         self.get_data(request).await
 81 |     }
 82 | }
 83 | 
 84 | /// fn buffer(&self) -> &[u8]; ensures Iterator has a buffer
 85 | /// This buffer function returns the starting point of the result.
 86 | /// **NOTE**: The result buffer must be **CONTINUOUS** in bytes with the size in Item as its length.
 87 | pub trait ParpulseReaderIterator: Iterator<Item = ParpulseResult<usize>> {
 88 |     fn buffer(&self) -> &[u8];
 89 | }
 90 | 
 91 | #[cfg(test)]
 92 | mod tests {
 93 |     use futures::join;
 94 |     use std::{sync::Arc, time::Instant};
 95 | 
 96 |     use crate::cache::{data_store_cache::memdisk::MemDiskStoreCache, replacer::lru::LruReplacer};
 97 | 
 98 |     use super::*;
 99 | 
100 |     async fn consume_receiver(mut rx: Receiver<ParpulseResult<Bytes>>) -> usize {
101 |         let mut total_bytes = 0;
102 |         while let Some(data) = rx.recv().await {
103 |             match data {
104 |                 Ok(bytes) => {
105 |                     total_bytes += bytes.len();
106 |                 }
107 |                 Err(e) => panic!("Error receiving data: {:?}", e),
108 |             }
109 |         }
110 |         total_bytes
111 |     }
112 | 
113 |     #[tokio::test]
114 |     async fn test_storage_manager_disk_only() {
115 |         let dummy_size = 1000000;
116 |         let cache = LruReplacer::new(dummy_size);
117 | 
118 |         let tmp = tempfile::tempdir().unwrap();
119 |         let dir = tmp.path().to_owned();
120 |         let cache_base_path = dir.join("test-storage-manager");
121 | 
122 |         let data_store_cache = MemDiskStoreCache::new(
123 |             cache,
124 |             cache_base_path.display().to_string(),
125 |             None,
126 |             None,
127 |             100 * 1024 * 1024,
128 |         );
129 |         let storage_manager = StorageManagerImpl::new(vec![data_store_cache]);
130 | 
131 |         let bucket = "tests-parquet".to_string();
132 |         let keys = vec!["userdata1.parquet".to_string()];
133 |         let request = RequestParams::MockS3((bucket, keys));
134 | 
135 |         let mut start_time = Instant::now();
136 |         let result = storage_manager.get_data(request.clone()).await;
137 |         assert!(result.is_ok());
138 |         let mut data_rx = result.unwrap();
139 |         let mut total_bytes = 0;
140 |         while let Some(data) = data_rx.recv().await {
141 |             match data {
142 |                 Ok(bytes) => {
143 |                     total_bytes += bytes.len();
144 |                 }
145 |                 Err(e) => panic!("Error receiving data: {:?}", e),
146 |             }
147 |         }
148 |         assert_eq!(total_bytes, 113629);
149 |         let delta_time_miss = Instant::now() - start_time;
150 | 
151 |         start_time = Instant::now();
152 |         let result = storage_manager.get_data(request).await;
153 |         assert!(result.is_ok());
154 |         let data_rx = result.unwrap();
155 |         assert_eq!(consume_receiver(data_rx).await, 113629);
156 |         let delta_time_hit = Instant::now() - start_time;
157 | 
158 |         println!(
159 |             "Delta time miss: {:?}, delta time hit: {:?}",
160 |             delta_time_miss, delta_time_hit
161 |         );
162 |         assert!(delta_time_miss > delta_time_hit);
163 |     }
164 | 
165 |     #[tokio::test]
166 |     async fn test_storage_manager_mem_disk_1() {
167 |         // 1. get small data (-> memory)
168 |         // 2. get large data (-> disk)
169 |         // 3. get small data again
170 |         // 4. get large data again
171 |         // 5. compare time
172 |         let dummy_size = 1000000;
173 |         let disk_cache = LruReplacer::new(dummy_size);
174 |         let mem_cache = LruReplacer::new(dummy_size);
175 | 
176 |         let tmp = tempfile::tempdir().unwrap();
177 |         let disk_cache_base_path = tmp.path().to_owned();
178 | 
179 |         let data_store_cache = MemDiskStoreCache::new(
180 |             disk_cache,
181 |             disk_cache_base_path.display().to_string(),
182 |             Some(mem_cache),
183 |             Some(950),
184 |             100 * 1024 * 1024,
185 |         );
186 |         let storage_manager = StorageManagerImpl::new(vec![data_store_cache]);
187 | 
188 |         let request_path_small_bucket = "tests-text".to_string();
189 |         let request_path_small_keys = vec!["what-can-i-hold-you-with".to_string()];
190 |         let request_small =
191 |             RequestParams::MockS3((request_path_small_bucket, request_path_small_keys));
192 | 
193 |         let result = storage_manager.get_data(request_small.clone()).await;
194 |         assert!(result.is_ok());
195 |         assert_eq!(consume_receiver(result.unwrap()).await, 930);
196 | 
197 |         let request_path_large_bucket = "tests-parquet".to_string();
198 |         let request_path_large_keys = vec!["userdata2.parquet".to_string()];
199 |         let request_large =
200 |             RequestParams::MockS3((request_path_large_bucket, request_path_large_keys));
201 | 
202 |         let result = storage_manager.get_data(request_large.clone()).await;
203 |         assert!(result.is_ok());
204 |         assert_eq!(consume_receiver(result.unwrap()).await, 112193);
205 | 
206 |         // Get data again.
207 |         let mut start_time = Instant::now();
208 |         let result = storage_manager.get_data(request_large).await;
209 |         assert!(result.is_ok());
210 |         assert_eq!(consume_receiver(result.unwrap()).await, 112193);
211 |         let delta_time_hit_disk = Instant::now() - start_time;
212 | 
213 |         start_time = Instant::now();
214 |         let result = storage_manager.get_data(request_small).await;
215 |         assert!(result.is_ok());
216 |         assert_eq!(consume_receiver(result.unwrap()).await, 930);
217 |         let delta_time_hit_mem = Instant::now() - start_time;
218 | 
219 |         println!(
220 |             "For small and large files, Delta time hit mem: {:?}, delta time hit disk: {:?}",
221 |             delta_time_hit_mem, delta_time_hit_disk
222 |         );
223 |         assert!(delta_time_hit_disk > delta_time_hit_mem);
224 |     }
225 | 
226 |     #[tokio::test]
227 |     async fn test_storage_manager_mem_disk_2() {
228 |         // 1. get large data1 (-> memory)
229 |         // 2. get large data2 (-> memory, and evict data1 to disk)
230 |         // 3. get data1 again
231 |         // 4. get data2 again
232 |         // 5. compare time
233 |         let disk_cache = LruReplacer::new(1000000);
234 |         let mem_cache = LruReplacer::new(120000);
235 | 
236 |         let tmp = tempfile::tempdir().unwrap();
237 |         let disk_cache_base_path = tmp.path().to_owned();
238 | 
239 |         let data_store_cache = MemDiskStoreCache::new(
240 |             disk_cache,
241 |             disk_cache_base_path.display().to_string(),
242 |             Some(mem_cache),
243 |             Some(120000),
244 |             100 * 1024 * 1024,
245 |         );
246 |         let storage_manager = StorageManagerImpl::new(vec![data_store_cache]);
247 | 
248 |         let request_path_bucket1 = "tests-parquet".to_string();
249 |         let request_path_keys1 = vec!["userdata1.parquet".to_string()];
250 |         let request_data1 = RequestParams::MockS3((request_path_bucket1, request_path_keys1));
251 | 
252 |         let result = storage_manager.get_data(request_data1.clone()).await;
253 |         assert!(result.is_ok());
254 |         assert_eq!(consume_receiver(result.unwrap()).await, 113629);
255 | 
256 |         let request_path_bucket2 = "tests-parquet".to_string();
257 |         let request_path_keys2 = vec!["userdata2.parquet".to_string()];
258 |         let request_data2 = RequestParams::MockS3((request_path_bucket2, request_path_keys2));
259 | 
260 |         let result = storage_manager.get_data(request_data2.clone()).await;
261 |         assert!(result.is_ok());
262 |         assert_eq!(consume_receiver(result.unwrap()).await, 112193);
263 | 
264 |         // Get data again. Now data2 in memory and data1 in disk.
265 |         let mut start_time = Instant::now();
266 |         let result = storage_manager.get_data(request_data1).await;
267 |         assert!(result.is_ok());
268 |         assert_eq!(consume_receiver(result.unwrap()).await, 113629);
269 |         let delta_time_hit_disk = Instant::now() - start_time;
270 | 
271 |         start_time = Instant::now();
272 |         let result = storage_manager.get_data(request_data2).await;
273 |         assert!(result.is_ok());
274 |         assert_eq!(consume_receiver(result.unwrap()).await, 112193);
275 |         let delta_time_hit_mem = Instant::now() - start_time;
276 | 
277 |         println!(
278 |             "For almost same files, delta time hit mem: {:?}, delta time hit disk: {:?}",
279 |             delta_time_hit_mem, delta_time_hit_disk
280 |         );
281 |         assert!(delta_time_hit_disk > delta_time_hit_mem);
282 |     }
283 | 
284 |     #[tokio::test]
285 |     async fn test_storage_manager_parallel_1() {
286 |         let disk_cache = LruReplacer::new(1000000);
287 | 
288 |         let tmp = tempfile::tempdir().unwrap();
289 |         let disk_cache_base_path = tmp.path().to_owned();
290 | 
291 |         let data_store_cache = MemDiskStoreCache::new(
292 |             disk_cache,
293 |             disk_cache_base_path.display().to_string(),
294 |             None,
295 |             None,
296 |             100 * 1024 * 1024,
297 |         );
298 |         let storage_manager = Arc::new(StorageManagerImpl::new(vec![data_store_cache]));
299 | 
300 |         let request_path_bucket1 = "tests-parquet".to_string();
301 |         let request_path_keys1 = vec!["userdata1.parquet".to_string()];
302 |         let request_data1 = RequestParams::MockS3((request_path_bucket1, request_path_keys1));
303 | 
304 |         let request_path_bucket2 = "tests-parquet".to_string();
305 |         let request_path_keys2 = vec!["userdata2.parquet".to_string()];
306 |         let request_data2 = RequestParams::MockS3((request_path_bucket2, request_path_keys2));
307 | 
308 |         let storage_manager_1 = storage_manager.clone();
309 |         let request_data1_1 = request_data1.clone();
310 |         let get_data_fut_1 =
311 |             tokio::spawn(async move { storage_manager_1.get_data(request_data1_1).await });
312 | 
313 |         let storage_manager_2 = storage_manager.clone();
314 |         let request_data1_2 = request_data1.clone();
315 |         let get_data_fut_2 =
316 |             tokio::spawn(async move { storage_manager_2.get_data(request_data1_2).await });
317 | 
318 |         let storage_manager_3 = storage_manager.clone();
319 |         let request_data2_3 = request_data2.clone();
320 |         let get_data_fut_3 =
321 |             tokio::spawn(async move { storage_manager_3.get_data(request_data2_3).await });
322 | 
323 |         let storage_manager_4 = storage_manager.clone();
324 |         let request_data1_4 = request_data1.clone();
325 |         let get_data_fut_4 =
326 |             tokio::spawn(async move { storage_manager_4.get_data(request_data1_4).await });
327 | 
328 |         let result = join!(
329 |             get_data_fut_1,
330 |             get_data_fut_2,
331 |             get_data_fut_3,
332 |             get_data_fut_4
333 |         );
334 |         assert!(result.0.is_ok());
335 |         assert_eq!(consume_receiver(result.0.unwrap().unwrap()).await, 113629);
336 |         assert!(result.1.is_ok());
337 |         assert_eq!(consume_receiver(result.1.unwrap().unwrap()).await, 113629);
338 |         assert!(result.2.is_ok());
339 |         assert_eq!(consume_receiver(result.2.unwrap().unwrap()).await, 112193);
340 |         assert!(result.3.is_ok());
341 |         assert_eq!(consume_receiver(result.3.unwrap().unwrap()).await, 113629);
342 |     }
343 | 
344 |     #[tokio::test]
345 |     async fn test_storage_manager_parallel_2() {
346 |         let disk_cache = LruReplacer::new(1000000);
347 |         let mem_cache = LruReplacer::new(120000);
348 | 
349 |         let tmp = tempfile::tempdir().unwrap();
350 |         let disk_cache_base_path = tmp.path().to_owned();
351 | 
352 |         let data_store_cache = MemDiskStoreCache::new(
353 |             disk_cache,
354 |             disk_cache_base_path.display().to_string(),
355 |             Some(mem_cache),
356 |             Some(120000),
357 |             100 * 1024 * 1024,
358 |         );
359 |         let storage_manager = Arc::new(StorageManagerImpl::new(vec![data_store_cache]));
360 | 
361 |         let request_path_bucket1 = "tests-parquet".to_string();
362 |         let request_path_keys1 = vec!["userdata2.parquet".to_string()];
363 |         let request_data1 = RequestParams::MockS3((request_path_bucket1, request_path_keys1));
364 | 
365 |         let request_path_bucket2 = "tests-parquet".to_string();
366 |         let request_path_keys2 = vec!["userdata1.parquet".to_string()];
367 |         let request_data2 = RequestParams::MockS3((request_path_bucket2, request_path_keys2));
368 | 
369 |         let mut start_time = Instant::now();
370 | 
371 |         let storage_manager_1 = storage_manager.clone();
372 |         let request_data1_1 = request_data1.clone();
373 |         let get_data_fut_1 =
374 |             tokio::spawn(async move { storage_manager_1.get_data(request_data1_1).await });
375 | 
376 |         let storage_manager_2 = storage_manager.clone();
377 |         let request_data1_2 = request_data1.clone();
378 |         let get_data_fut_2 =
379 |             tokio::spawn(async move { storage_manager_2.get_data(request_data1_2).await });
380 | 
381 |         let storage_manager_3 = storage_manager.clone();
382 |         let request_data2_3 = request_data2.clone();
383 |         let get_data_fut_3 =
384 |             tokio::spawn(async move { storage_manager_3.get_data(request_data2_3).await });
385 | 
386 |         let storage_manager_4 = storage_manager.clone();
387 |         let request_data2_4 = request_data2.clone();
388 |         let get_data_fut_4 =
389 |             tokio::spawn(async move { storage_manager_4.get_data(request_data2_4).await });
390 | 
391 |         let storage_manager_5 = storage_manager.clone();
392 |         let request_data1_5 = request_data1.clone();
393 |         let get_data_fut_5 =
394 |             tokio::spawn(async move { storage_manager_5.get_data(request_data1_5).await });
395 | 
396 |         let result = join!(
397 |             get_data_fut_1,
398 |             get_data_fut_2,
399 |             get_data_fut_3,
400 |             get_data_fut_4,
401 |             get_data_fut_5
402 |         );
403 |         assert!(result.0.is_ok());
404 |         assert_eq!(consume_receiver(result.0.unwrap().unwrap()).await, 112193);
405 |         assert!(result.1.is_ok());
406 |         assert_eq!(consume_receiver(result.1.unwrap().unwrap()).await, 112193);
407 |         assert!(result.2.is_ok());
408 |         assert_eq!(consume_receiver(result.2.unwrap().unwrap()).await, 113629);
409 |         assert!(result.3.is_ok());
410 |         assert_eq!(consume_receiver(result.3.unwrap().unwrap()).await, 113629);
411 |         assert!(result.4.is_ok());
412 |         assert_eq!(consume_receiver(result.4.unwrap().unwrap()).await, 112193);
413 | 
414 |         let delta_time_miss = Instant::now() - start_time;
415 | 
416 |         start_time = Instant::now();
417 | 
418 |         let storage_manager_1 = storage_manager.clone();
419 |         let request_data2_1 = request_data2.clone();
420 |         let get_data_fut_1 =
421 |             tokio::spawn(async move { storage_manager_1.get_data(request_data2_1).await });
422 | 
423 |         let storage_manager_2 = storage_manager.clone();
424 |         let request_data1_2 = request_data1.clone();
425 |         let get_data_fut_2 =
426 |             tokio::spawn(async move { storage_manager_2.get_data(request_data1_2).await });
427 | 
428 |         let storage_manager_3 = storage_manager.clone();
429 |         let request_data2_3 = request_data2.clone();
430 |         let get_data_fut_3 =
431 |             tokio::spawn(async move { storage_manager_3.get_data(request_data2_3).await });
432 | 
433 |         let storage_manager_4 = storage_manager.clone();
434 |         let request_data1_4 = request_data1.clone();
435 |         let get_data_fut_4 =
436 |             tokio::spawn(async move { storage_manager_4.get_data(request_data1_4).await });
437 | 
438 |         let storage_manager_5 = storage_manager.clone();
439 |         let request_data1_5 = request_data1.clone();
440 |         let get_data_fut_5 =
441 |             tokio::spawn(async move { storage_manager_5.get_data(request_data1_5).await });
442 | 
443 |         let result = join!(
444 |             get_data_fut_1,
445 |             get_data_fut_2,
446 |             get_data_fut_3,
447 |             get_data_fut_4,
448 |             get_data_fut_5
449 |         );
450 |         assert!(result.0.is_ok());
451 |         assert_eq!(consume_receiver(result.0.unwrap().unwrap()).await, 113629);
452 |         assert!(result.1.is_ok());
453 |         assert_eq!(consume_receiver(result.1.unwrap().unwrap()).await, 112193);
454 |         assert!(result.2.is_ok());
455 |         assert_eq!(consume_receiver(result.2.unwrap().unwrap()).await, 113629);
456 |         assert!(result.3.is_ok());
457 |         assert_eq!(consume_receiver(result.3.unwrap().unwrap()).await, 112193);
458 |         assert!(result.4.is_ok());
459 |         assert_eq!(consume_receiver(result.4.unwrap().unwrap()).await, 112193);
460 | 
461 |         let delta_time_hit = Instant::now() - start_time;
462 | 
463 |         println!(
464 |             "For parallel test 2, delta time miss: {:?}, delta time miss: {:?}",
465 |             delta_time_miss, delta_time_hit
466 |         );
467 |         assert!(delta_time_miss > delta_time_hit);
468 |     }
469 | 
470 |     #[tokio::test]
471 |     async fn test_fanout_cache() {
472 |         let data_store_cache_num = 6;
473 |         let mut data_store_caches = Vec::new();
474 |         for _ in 0..data_store_cache_num {
475 |             let disk_cache = LruReplacer::new(1000000);
476 |             let mem_cache = LruReplacer::new(120000);
477 | 
478 |             let tmp = tempfile::tempdir().unwrap();
479 |             let disk_cache_base_path = tmp.path().to_owned();
480 | 
481 |             let data_store_cache = MemDiskStoreCache::new(
482 |                 disk_cache,
483 |                 disk_cache_base_path.display().to_string(),
484 |                 Some(mem_cache),
485 |                 Some(120000),
486 |                 100 * 1024 * 1024,
487 |             );
488 |             data_store_caches.push(data_store_cache);
489 |         }
490 |         let storage_manager = Arc::new(StorageManagerImpl::new(data_store_caches));
491 | 
492 |         let request_path_bucket1 = "tests-parquet".to_string();
493 |         let request_path_keys1 = vec!["userdata1.parquet".to_string()];
494 |         let request_data1 = RequestParams::MockS3((request_path_bucket1, request_path_keys1));
495 | 
496 |         let result = storage_manager.get_data(request_data1.clone()).await;
497 |         assert!(result.is_ok());
498 |         assert_eq!(consume_receiver(result.unwrap()).await, 113629);
499 |         let request_path_bucket2 = "tests-parquet".to_string();
500 |         let request_path_keys2 = vec!["userdata2.parquet".to_string()];
501 |         let request_data2 = RequestParams::MockS3((request_path_bucket2, request_path_keys2));
502 |         let result = storage_manager.get_data(request_data2.clone()).await;
503 |         assert!(result.is_ok());
504 |         assert_eq!(consume_receiver(result.unwrap()).await, 112193);
505 | 
506 |         let request_path_bucket3 = "tests-text".to_string();
507 |         let request_path_keys3: Vec<String> = vec!["what-can-i-hold-you-with".to_string()];
508 |         let request_data3 = RequestParams::MockS3((request_path_bucket3, request_path_keys3));
509 |         let result = storage_manager.get_data(request_data3.clone()).await;
510 |         assert!(result.is_ok());
511 |         assert_eq!(consume_receiver(result.unwrap()).await, 930);
512 | 
513 |         let request_path_bucket4 = "tests-parquet".to_string();
514 |         let request_path_keys4: Vec<String> = vec!["small_random_data.parquet".to_string()];
515 |         let request_data4 = RequestParams::MockS3((request_path_bucket4, request_path_keys4));
516 |         let result = storage_manager.get_data(request_data4.clone()).await;
517 |         assert!(result.is_ok());
518 |         assert_eq!(consume_receiver(result.unwrap()).await, 2013);
519 |     }
520 | 
521 |     #[tokio::test]
522 |     async fn test_fanout_cach_parallel() {
523 |         let data_store_cache_num = 6;
524 |         let mut data_store_caches = Vec::new();
525 |         for _ in 0..data_store_cache_num {
526 |             let disk_cache = LruReplacer::new(1000000);
527 |             let mem_cache = LruReplacer::new(120000);
528 | 
529 |             let tmp = tempfile::tempdir().unwrap();
530 |             let disk_cache_base_path = tmp.path().to_owned();
531 | 
532 |             let data_store_cache = MemDiskStoreCache::new(
533 |                 disk_cache,
534 |                 disk_cache_base_path.display().to_string(),
535 |                 Some(mem_cache),
536 |                 Some(120000),
537 |                 100 * 1024 * 1024,
538 |             );
539 |             data_store_caches.push(data_store_cache);
540 |         }
541 |         let storage_manager = Arc::new(StorageManagerImpl::new(data_store_caches));
542 | 
543 |         let request_path_bucket1 = "tests-parquet".to_string();
544 |         let request_path_keys1 = vec!["userdata2.parquet".to_string()];
545 |         let request_data1 = RequestParams::MockS3((request_path_bucket1, request_path_keys1));
546 | 
547 |         let request_path_bucket2 = "tests-parquet".to_string();
548 |         let request_path_keys2 = vec!["userdata1.parquet".to_string()];
549 |         let request_data2 = RequestParams::MockS3((request_path_bucket2, request_path_keys2));
550 | 
551 |         let request_path_bucket3 = "tests-text".to_string();
552 |         let request_path_keys3 = vec!["what-can-i-hold-you-with".to_string()];
553 |         let request_data3 = RequestParams::MockS3((request_path_bucket3, request_path_keys3));
554 | 
555 |         let storage_manager_1 = storage_manager.clone();
556 |         let request_data1_1 = request_data1.clone();
557 |         let get_data_fut_1 =
558 |             tokio::spawn(async move { storage_manager_1.get_data(request_data1_1).await });
559 | 
560 |         let storage_manager_2 = storage_manager.clone();
561 |         let request_data1_2 = request_data1.clone();
562 |         let get_data_fut_2 =
563 |             tokio::spawn(async move { storage_manager_2.get_data(request_data1_2).await });
564 | 
565 |         let storage_manager_3 = storage_manager.clone();
566 |         let request_data3_3 = request_data3.clone();
567 |         let get_data_fut_3 =
568 |             tokio::spawn(async move { storage_manager_3.get_data(request_data3_3).await });
569 | 
570 |         let storage_manager_4 = storage_manager.clone();
571 |         let request_data2_4 = request_data2.clone();
572 |         let get_data_fut_4 =
573 |             tokio::spawn(async move { storage_manager_4.get_data(request_data2_4).await });
574 | 
575 |         let storage_manager_5 = storage_manager.clone();
576 |         let request_data2_5 = request_data2.clone();
577 |         let get_data_fut_5 =
578 |             tokio::spawn(async move { storage_manager_5.get_data(request_data2_5).await });
579 | 
580 |         let storage_manager_6 = storage_manager.clone();
581 |         let request_data1_6 = request_data1.clone();
582 |         let get_data_fut_6 =
583 |             tokio::spawn(async move { storage_manager_6.get_data(request_data1_6).await });
584 | 
585 |         let storage_manager_7 = storage_manager.clone();
586 |         let request_data3_7 = request_data3.clone();
587 |         let get_data_fut_7 =
588 |             tokio::spawn(async move { storage_manager_7.get_data(request_data3_7).await });
589 | 
590 |         let result = join!(
591 |             get_data_fut_1,
592 |             get_data_fut_2,
593 |             get_data_fut_3,
594 |             get_data_fut_4,
595 |             get_data_fut_5,
596 |             get_data_fut_6,
597 |             get_data_fut_7
598 |         );
599 |         assert!(result.0.is_ok());
600 |         assert_eq!(consume_receiver(result.0.unwrap().unwrap()).await, 112193);
601 |         assert!(result.1.is_ok());
602 |         assert_eq!(consume_receiver(result.1.unwrap().unwrap()).await, 112193);
603 |         assert!(result.2.is_ok());
604 |         assert_eq!(consume_receiver(result.2.unwrap().unwrap()).await, 930);
605 |         assert!(result.3.is_ok());
606 |         assert_eq!(consume_receiver(result.3.unwrap().unwrap()).await, 113629);
607 |         assert!(result.4.is_ok());
608 |         assert_eq!(consume_receiver(result.4.unwrap().unwrap()).await, 113629);
609 |         assert!(result.5.is_ok());
610 |         assert_eq!(consume_receiver(result.5.unwrap().unwrap()).await, 112193);
611 |         assert!(result.6.is_ok());
612 |         assert_eq!(consume_receiver(result.6.unwrap().unwrap()).await, 930);
613 |     }
614 | 
615 |     #[tokio::test]
616 |     async fn test_evict_disk() {
617 |         let disk_cache = LruReplacer::new(120000);
618 | 
619 |         let tmp = tempfile::tempdir().unwrap();
620 |         let disk_cache_base_path = tmp.path().to_owned();
621 | 
622 |         let data_store_cache = MemDiskStoreCache::new(
623 |             disk_cache,
624 |             disk_cache_base_path.display().to_string(),
625 |             None,
626 |             None,
627 |             100 * 1024 * 1024,
628 |         );
629 |         let storage_manager = Arc::new(StorageManagerImpl::new(vec![data_store_cache]));
630 | 
631 |         let request_path_bucket1 = "tests-parquet".to_string();
632 |         let request_path_keys1 = vec!["userdata2.parquet".to_string()];
633 |         let request_data1 = RequestParams::MockS3((request_path_bucket1, request_path_keys1));
634 | 
635 |         let request_path_bucket2 = "tests-parquet".to_string();
636 |         let request_path_keys2 = vec!["userdata1.parquet".to_string()];
637 |         let request_data2 = RequestParams::MockS3((request_path_bucket2, request_path_keys2));
638 | 
639 |         let res1 = storage_manager.get_data(request_data1.clone()).await;
640 |         assert!(res1.is_ok());
641 |         assert_eq!(consume_receiver(res1.unwrap()).await, 112193);
642 |         let res2 = storage_manager.get_data(request_data2.clone()).await;
643 |         assert!(res2.is_ok());
644 |         assert_eq!(consume_receiver(res2.unwrap()).await, 113629);
645 |         let res3 = storage_manager.get_data(request_data1.clone()).await;
646 |         assert!(res3.is_ok());
647 |         assert_eq!(consume_receiver(res3.unwrap()).await, 112193);
648 |     }
649 | 
650 |     #[tokio::test]
651 |     async fn test_evict_mem() {
652 |         let disk_cache = LruReplacer::new(10);
653 |         let mem_cache = LruReplacer::new(120000);
654 | 
655 |         let tmp = tempfile::tempdir().unwrap();
656 |         let disk_cache_base_path = tmp.path().to_owned();
657 | 
658 |         let data_store_cache = MemDiskStoreCache::new(
659 |             disk_cache,
660 |             disk_cache_base_path.display().to_string(),
661 |             Some(mem_cache),
662 |             Some(120000),
663 |             100 * 1024 * 1024,
664 |         );
665 |         let storage_manager = Arc::new(StorageManagerImpl::new(vec![data_store_cache]));
666 | 
667 |         let request_path_bucket1 = "tests-parquet".to_string();
668 |         let request_path_keys1 = vec!["userdata2.parquet".to_string()];
669 |         let request_data1 = RequestParams::MockS3((request_path_bucket1, request_path_keys1));
670 | 
671 |         let request_path_bucket2 = "tests-parquet".to_string();
672 |         let request_path_keys2 = vec!["userdata1.parquet".to_string()];
673 |         let request_data2 = RequestParams::MockS3((request_path_bucket2, request_path_keys2));
674 | 
675 |         let res1 = storage_manager.get_data(request_data1.clone()).await;
676 |         assert!(res1.is_ok());
677 |         assert_eq!(consume_receiver(res1.unwrap()).await, 112193);
678 |         let res2 = storage_manager.get_data(request_data2.clone()).await;
679 |         assert!(res2.is_ok());
680 |         assert_eq!(consume_receiver(res2.unwrap()).await, 113629);
681 |         let res3 = storage_manager.get_data(request_data1.clone()).await;
682 |         assert!(res3.is_ok());
683 |         assert_eq!(consume_receiver(res3.unwrap()).await, 112193);
684 |     }
685 | }
686 | 


--------------------------------------------------------------------------------
/storage-node/src/storage_reader/mod.rs:
--------------------------------------------------------------------------------
 1 | use async_trait::async_trait;
 2 | use bytes::Bytes;
 3 | use futures::stream::BoxStream;
 4 | 
 5 | use crate::{error::ParpulseResult, storage_manager::ParpulseReaderIterator};
 6 | 
 7 | pub mod s3;
 8 | // TODO: We can use `use mockall::automock;` to mock s3.
 9 | // (https://docs.aws.amazon.com/sdk-for-rust/latest/dg/testing.html)
10 | // pub mod s3_automock;
11 | pub mod s3_diskmock;
12 | 
13 | pub trait SyncStorageReader {
14 |     type ReaderIterator: ParpulseReaderIterator;
15 |     fn read_all(&self) -> ParpulseResult<Bytes>;
16 |     fn into_iterator(self) -> ParpulseResult<Self::ReaderIterator>;
17 | }
18 | 
19 | /// [`StorageReaderStream`] is a stream of data read from the underlying storage.
20 | /// Each storage reader should implement `Stream` trait to provide a stream of data.
21 | pub type StorageReaderStream = BoxStream<'static, ParpulseResult<Bytes>>;
22 | 
23 | // TODO: Merge `StorageReader` with `AsyncStorageReader`.
24 | #[async_trait]
25 | pub trait AsyncStorageReader {
26 |     /// Read all data at once from the underlying storage.
27 |     ///
28 |     /// NEVER call this method if you do not know the size of the data -- collecting
29 |     /// all data into one buffer might lead to OOM.
30 |     async fn read_all(&self) -> ParpulseResult<Vec<Bytes>>;
31 | 
32 |     /// Read data from the underlying storage as a stream.
33 |     async fn into_stream(self) -> ParpulseResult<StorageReaderStream>;
34 | }
35 | 


--------------------------------------------------------------------------------
/storage-node/src/storage_reader/s3.rs:
--------------------------------------------------------------------------------
  1 | use std::{
  2 |     pin::Pin,
  3 |     task::{Context, Poll},
  4 | };
  5 | 
  6 | use async_trait::async_trait;
  7 | use aws_config::{meta::region::RegionProviderChain, BehaviorVersion};
  8 | use aws_sdk_s3::{
  9 |     operation::get_object::{GetObjectError, GetObjectOutput},
 10 |     primitives::ByteStream,
 11 |     Client,
 12 | };
 13 | use aws_smithy_runtime_api::{client::result::SdkError, http::Response};
 14 | use bytes::Bytes;
 15 | use futures::{future::BoxFuture, ready, FutureExt, Stream};
 16 | 
 17 | use crate::error::{ParpulseError, ParpulseResult};
 18 | 
 19 | use super::{AsyncStorageReader, StorageReaderStream};
 20 | 
 21 | /// [`S3Reader`] is a reader for retrieving data from S3. It can either read the
 22 | /// data once at all or read the data in an asynchronous stream.
 23 | pub struct S3Reader {
 24 |     client: Client,
 25 |     bucket: String,
 26 |     keys: Vec<String>,
 27 | }
 28 | 
 29 | impl S3Reader {
 30 |     pub async fn new(bucket: String, keys: Vec<String>) -> Self {
 31 |         let region_provider = RegionProviderChain::default_provider().or_else("us-east-1");
 32 |         let config = aws_config::defaults(BehaviorVersion::latest())
 33 |             .region(region_provider)
 34 |             .load()
 35 |             .await;
 36 |         let client = Client::new(&config);
 37 |         Self {
 38 |             client,
 39 |             bucket,
 40 |             keys,
 41 |         }
 42 |     }
 43 | 
 44 |     pub async fn get_object_size(&self) -> ParpulseResult<usize> {
 45 |         let mut size = 0;
 46 |         for key in &self.keys {
 47 |             let obj = self
 48 |                 .client
 49 |                 .head_object()
 50 |                 .bucket(&self.bucket)
 51 |                 .key(key)
 52 |                 .send()
 53 |                 .await?;
 54 |             size += obj
 55 |                 .content_length
 56 |                 .map(|l| l as usize)
 57 |                 .ok_or_else(|| ParpulseError::S3("fail to get object size".into()))?;
 58 |         }
 59 |         Ok(size)
 60 |     }
 61 | }
 62 | 
 63 | /// [`S3DataStream`] is a stream for reading data from S3. It reads the data in
 64 | /// chunks and returns the data in a stream. Currently it uses non-fixed buffer,
 65 | /// which means it will be consumed and extended.
 66 | ///
 67 | /// If we want to use fixed buffer for benchmark, we can add self.last_read_size and
 68 | /// self.current_buffer_pos.
 69 | pub struct S3ReaderStream {
 70 |     client: Client,
 71 |     bucket: String,
 72 |     keys: Vec<String>,
 73 |     current_key: usize,
 74 | 
 75 |     object_fut:
 76 |         Option<BoxFuture<'static, Result<GetObjectOutput, SdkError<GetObjectError, Response>>>>,
 77 |     object_body: Option<ByteStream>,
 78 | }
 79 | 
 80 | impl S3ReaderStream {
 81 |     pub fn new(client: Client, bucket: String, keys: Vec<String>) -> Self {
 82 |         assert!(!keys.is_empty(), "keys should not be empty");
 83 |         Self {
 84 |             client,
 85 |             bucket,
 86 |             keys,
 87 |             current_key: 0,
 88 |             object_fut: None,
 89 |             object_body: None,
 90 |         }
 91 |     }
 92 | }
 93 | 
 94 | impl Stream for S3ReaderStream {
 95 |     type Item = ParpulseResult<Bytes>;
 96 | 
 97 |     fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<Self::Item>> {
 98 |         if let Some(object_fut) = self.object_fut.as_mut() {
 99 |             match ready!(object_fut.poll_unpin(cx)) {
100 |                 Ok(object) => {
101 |                     self.object_fut.take();
102 |                     self.object_body = Some(object.body);
103 |                     self.poll_next(cx)
104 |                 }
105 |                 Err(e) => Poll::Ready(Some(Err(ParpulseError::from(e)))),
106 |             }
107 |         } else if let Some(object_body) = self.object_body.as_mut() {
108 |             let poll_result = object_body.try_next().boxed().poll_unpin(cx);
109 |             match poll_result {
110 |                 Poll::Ready(ready_result) => match ready_result {
111 |                     Ok(Some(bytes)) => Poll::Ready(Some(Ok(bytes))),
112 |                     Ok(None) => {
113 |                         self.object_body = None;
114 |                         self.poll_next(cx)
115 |                     }
116 |                     Err(e) => Poll::Ready(Some(Err(ParpulseError::from(e)))),
117 |                 },
118 |                 Poll::Pending => Poll::Pending,
119 |             }
120 |         } else if self.current_key >= self.keys.len() {
121 |             // No more data to read in S3.
122 |             Poll::Ready(None)
123 |         } else {
124 |             // There are more files to read in S3. Fetch the next object.
125 |             let fut = self
126 |                 .client
127 |                 .get_object()
128 |                 .bucket(&self.bucket)
129 |                 .key(&self.keys[self.current_key])
130 |                 .send()
131 |                 .boxed();
132 |             self.object_fut = Some(fut);
133 |             self.current_key += 1;
134 |             self.poll_next(cx)
135 |         }
136 |     }
137 | }
138 | 
139 | #[async_trait]
140 | impl AsyncStorageReader for S3Reader {
141 |     /// NEVER call this method if you do not know the size of the data -- collecting
142 |     /// all data into one buffer might lead to OOM.
143 |     async fn read_all(&self) -> ParpulseResult<Vec<Bytes>> {
144 |         let mut bytes_vec = Vec::with_capacity(self.keys.len());
145 |         for key in &self.keys {
146 |             let object = self
147 |                 .client
148 |                 .get_object()
149 |                 .bucket(&self.bucket)
150 |                 .key(key)
151 |                 .send()
152 |                 .await
153 |                 .map_err(ParpulseError::from)?;
154 |             bytes_vec.push(
155 |                 object
156 |                     .body
157 |                     .collect()
158 |                     .await
159 |                     .map_err(ParpulseError::from)?
160 |                     .into_bytes(),
161 |             );
162 |         }
163 |         Ok(bytes_vec)
164 |     }
165 | 
166 |     async fn into_stream(self) -> ParpulseResult<StorageReaderStream> {
167 |         let s3_stream = S3ReaderStream::new(self.client, self.bucket, self.keys);
168 |         Ok(Box::pin(s3_stream))
169 |     }
170 | }
171 | 
172 | #[cfg(test)]
173 | mod tests {
174 |     use futures::StreamExt;
175 | 
176 |     use super::*;
177 | 
178 |     #[tokio::test]
179 |     async fn test_s3_read_all() {
180 |         let bucket = "parpulse-test".to_string();
181 |         let keys = vec!["userdata/userdata1.parquet".to_string()];
182 |         let reader = S3Reader::new(bucket, keys).await;
183 |         let bytes = reader.read_all().await.unwrap();
184 |         assert_eq!(bytes[0].len(), 113629);
185 |     }
186 | 
187 |     #[tokio::test]
188 |     async fn test_s3_read_streaming() {
189 |         let bucket = "parpulse-test".to_string();
190 |         let keys = vec![
191 |             "userdata/userdata1.parquet".to_string(),
192 |             "userdata/userdata2.parquet".to_string(),
193 |             "userdata/userdata3.parquet".to_string(),
194 |             "userdata/userdata4.parquet".to_string(),
195 |             "userdata/userdata5.parquet".to_string(),
196 |         ];
197 | 
198 |         let reader = S3Reader::new(bucket, keys).await;
199 |         let mut s3_stream = reader.into_stream().await.unwrap();
200 | 
201 |         let mut streaming_total_bytes = 0;
202 |         while let Some(data) = s3_stream.next().await {
203 |             let data = data.unwrap();
204 |             streaming_total_bytes += data.len();
205 |         }
206 |         assert_eq!(streaming_total_bytes, 565545);
207 |     }
208 | 
209 |     #[tokio::test]
210 |     async fn test_s3_get_object_size() {
211 |         let bucket = "parpulse-test".to_string();
212 |         let keys = vec![
213 |             "userdata/userdata1.parquet".to_string(),
214 |             "userdata/userdata2.parquet".to_string(),
215 |             "userdata/userdata3.parquet".to_string(),
216 |             "userdata/userdata4.parquet".to_string(),
217 |             "userdata/userdata5.parquet".to_string(),
218 |         ];
219 | 
220 |         let reader = S3Reader::new(bucket, keys).await;
221 |         let size = reader.get_object_size().await.unwrap();
222 |         assert_eq!(size, 565545);
223 |     }
224 | }
225 | 


--------------------------------------------------------------------------------
/storage-node/src/storage_reader/s3_diskmock.rs:
--------------------------------------------------------------------------------
  1 | use std::{
  2 |     env,
  3 |     pin::Pin,
  4 |     task::{Context, Poll},
  5 |     thread,
  6 |     time::Duration,
  7 | };
  8 | 
  9 | use async_trait::async_trait;
 10 | use bytes::Bytes;
 11 | use futures::{ready, Stream, StreamExt};
 12 | 
 13 | use crate::{
 14 |     disk::{disk_manager::DiskManager, stream::RandomDiskReadStream},
 15 |     error::ParpulseResult,
 16 | };
 17 | 
 18 | use super::{AsyncStorageReader, StorageReaderStream};
 19 | 
 20 | const DELAY: Option<Duration> = Some(Duration::from_millis(1));
 21 | const MIN_DISK_READ_SIZE: usize = 1024 * 512;
 22 | const MAX_DISK_READ_SIZE: usize = 1024 * 1024;
 23 | 
 24 | /// Please DON'T use `MockS3Reader` to test performance, only use it to
 25 | /// test the correctness!!!
 26 | /// There is no chunksize in `MockS3Reader`.
 27 | /// If we want to make big change to s3.rs, please also change s3_diskmock.rs
 28 | /// TODO: We can also use automock to mock s3. (so there is no need to manually sync changes)
 29 | pub struct MockS3Reader {
 30 |     file_paths: Vec<String>,
 31 |     disk_manager: DiskManager,
 32 | }
 33 | 
 34 | impl MockS3Reader {
 35 |     // Async here is to be consistent with S3Reader.
 36 |     pub async fn new(bucket: String, keys: Vec<String>) -> Self {
 37 |         // Get the absolute path instead of relative path.
 38 |         let base_path = env::current_dir()
 39 |             .ok()
 40 |             .and_then(|current_path| {
 41 |                 current_path
 42 |                     .parent()
 43 |                     .map(|root_path| root_path.join("storage-node"))
 44 |             })
 45 |             .and_then(|joined_path| joined_path.to_str().map(|s| s.to_string()))
 46 |             .unwrap_or_default();
 47 | 
 48 |         let file_paths: Vec<String> = keys
 49 |             .iter()
 50 |             .map(|key| format!("{}/{}/{}", base_path, bucket.replace('-', "/"), key))
 51 |             .collect();
 52 |         MockS3Reader {
 53 |             file_paths,
 54 |             disk_manager: DiskManager::default(),
 55 |         }
 56 |     }
 57 | 
 58 |     pub async fn get_object_size(&self) -> ParpulseResult<usize> {
 59 |         let mut size = 0;
 60 |         for file_path in &self.file_paths {
 61 |             size += self.disk_manager.file_size(file_path).await? as usize;
 62 |         }
 63 |         Ok(size)
 64 |     }
 65 | }
 66 | 
 67 | pub struct MockS3ReaderStream {
 68 |     current_disk_stream: Option<Pin<Box<RandomDiskReadStream>>>,
 69 |     file_paths: Vec<String>,
 70 |     current_key: usize,
 71 | }
 72 | 
 73 | impl MockS3ReaderStream {
 74 |     pub fn new(file_paths: Vec<String>) -> Self {
 75 |         MockS3ReaderStream {
 76 |             current_disk_stream: None,
 77 |             file_paths,
 78 |             current_key: 0,
 79 |         }
 80 |     }
 81 | }
 82 | 
 83 | impl Stream for MockS3ReaderStream {
 84 |     type Item = ParpulseResult<Bytes>;
 85 | 
 86 |     fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<Self::Item>> {
 87 |         if let Some(current_disk_stream) = self.current_disk_stream.as_mut() {
 88 |             match ready!(current_disk_stream.poll_next_unpin(cx)) {
 89 |                 Some(Ok(bytes)) => {
 90 |                     if let Some(delay) = DELAY {
 91 |                         thread::sleep(delay);
 92 |                     }
 93 |                     Poll::Ready(Some(Ok(bytes)))
 94 |                 }
 95 |                 Some(Err(e)) => Poll::Ready(Some(Err(e))),
 96 |                 None => {
 97 |                     self.current_key += 1;
 98 |                     self.current_disk_stream.take();
 99 |                     self.poll_next(cx)
100 |                 }
101 |             }
102 |         } else {
103 |             // We need to create a new disk_stream since there is no last disk_stream, or it has
104 |             // been consumed.
105 |             if self.current_key >= self.file_paths.len() {
106 |                 return Poll::Ready(None);
107 |             }
108 |             let file_path = self.file_paths[self.current_key].clone();
109 |             match RandomDiskReadStream::new(&file_path, MIN_DISK_READ_SIZE, MAX_DISK_READ_SIZE) {
110 |                 Ok(disk_stream) => {
111 |                     self.current_disk_stream = Some(Box::pin(disk_stream));
112 |                 }
113 |                 Err(e) => return Poll::Ready(Some(Err(e))),
114 |             }
115 |             self.poll_next(cx)
116 |         }
117 |     }
118 | }
119 | 
120 | #[async_trait]
121 | impl AsyncStorageReader for MockS3Reader {
122 |     async fn read_all(&self) -> ParpulseResult<Vec<Bytes>> {
123 |         let mut bytes_vec = Vec::with_capacity(self.file_paths.len());
124 |         for file_path in &self.file_paths {
125 |             let (_, data) = self.disk_manager.read_disk_all(file_path).await?;
126 |             bytes_vec.push(data);
127 |         }
128 |         Ok(bytes_vec)
129 |     }
130 | 
131 |     async fn into_stream(self) -> ParpulseResult<StorageReaderStream> {
132 |         Ok(Box::pin(MockS3ReaderStream::new(self.file_paths)))
133 |     }
134 | }
135 | 
136 | #[cfg(test)]
137 | mod tests {
138 |     use super::*;
139 |     #[tokio::test]
140 |     async fn test_simple_write_read() {
141 |         let bucket = "tests-parquet".to_string();
142 |         let keys = vec![
143 |             "userdata1.parquet".to_string(),
144 |             "userdata2.parquet".to_string(),
145 |         ];
146 |         let reader = MockS3Reader::new(bucket, keys).await;
147 |         let bytes = reader.read_all().await.unwrap();
148 |         assert_eq!(bytes[0].len() + bytes[1].len(), 113629 + 112193);
149 |     }
150 | 
151 |     #[tokio::test]
152 |     async fn test_mock_s3_read_streaming() {
153 |         let bucket = "tests-parquet".to_string();
154 |         let keys = vec![
155 |             "userdata1.parquet".to_string(),
156 |             "userdata2.parquet".to_string(),
157 |         ];
158 | 
159 |         let reader = MockS3Reader::new(bucket, keys).await;
160 |         let mut s3_stream = reader.into_stream().await.unwrap();
161 | 
162 |         let mut streaming_total_bytes = 0;
163 |         while let Some(data) = s3_stream.next().await {
164 |             let data = data.unwrap();
165 |             streaming_total_bytes += data.len();
166 |         }
167 |         assert_eq!(streaming_total_bytes, 113629 + 112193);
168 |     }
169 | }
170 | 


--------------------------------------------------------------------------------
/storage-node/tests/parquet/small_random_data.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmu-db/15721-s24-cache1/b4e2bc8f2c2fc3ab7a9b9fa3f8e864e25e9c8c40/storage-node/tests/parquet/small_random_data.parquet


--------------------------------------------------------------------------------
/storage-node/tests/parquet/userdata1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmu-db/15721-s24-cache1/b4e2bc8f2c2fc3ab7a9b9fa3f8e864e25e9c8c40/storage-node/tests/parquet/userdata1.parquet


--------------------------------------------------------------------------------
/storage-node/tests/parquet/userdata2.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmu-db/15721-s24-cache1/b4e2bc8f2c2fc3ab7a9b9fa3f8e864e25e9c8c40/storage-node/tests/parquet/userdata2.parquet


--------------------------------------------------------------------------------
/storage-node/tests/text/what-can-i-hold-you-with:
--------------------------------------------------------------------------------
 1 | What can I hold you with?
 2 | I offer you lean streets, desperate sunsets, the
 3 | moon of the jagged suburbs.
 4 | I offer you the bitterness of a man who has looked
 5 | long and long at the lonely moon.
 6 | I offer you my ancestors, my dead men, the ghosts
 7 | that living men have honoured in bronze.
 8 | I offer you whatever insight my books may hold,
 9 | whatever manliness or humour my life.
10 | I offer you the loyalty of a man who has never
11 | been loyal.
12 | I offer you that kernel of myself that I have saved,
13 | somehow-the central heart that deals not
14 | in words, traffics not with dreams, and is
15 | untouched by time, by joy, by adversities.
16 | I offer you the memory of a yellow rose seen at
17 | sunset, years before you were born.
18 | I offer you explanations of yourself, theories about
19 | yourself, authentic and surprising news of
20 | yourself.
21 | I can give you my loneliness, my darkness, the
22 | hunger of my heart; I am trying to bribe you
23 | with uncertainty, with danger, with defeat.
24 | 


--------------------------------------------------------------------------------
/tests/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "tests"
 3 | version = "0.1.0"
 4 | edition = "2021"
 5 | 
 6 | [dependencies]
 7 | storage-node = { path = "../storage-node" }
 8 | parpulse-client = { path = "../storage-client" }
 9 | tokio = { version = "1", features = ["rt", "rt-multi-thread", "macros"] }
10 | arrow = "50.0.0"
11 | log = "0.4"
12 | istziio-client = "0.1"
13 | 
14 | [dev-dependencies]
15 | serial_test = "3.1"
16 | env_logger = "0.11"
17 | 


--------------------------------------------------------------------------------
/tests/src/client_server_test.rs:
--------------------------------------------------------------------------------
  1 | /// This file serves as an integration test for the client and server.
  2 | /// WARNING: This test assumes that the data returned from the server is userdata1.parquet.
  3 | extern crate parpulse_client;
  4 | extern crate storage_node;
  5 | 
  6 | #[cfg(test)]
  7 | mod tests {
  8 |     use arrow::array::{Float64Array, StringArray};
  9 |     use istziio_client::client_api::{DataRequest, StorageClient, StorageRequest};
 10 |     use parpulse_client::client::StorageClientImpl;
 11 |     use serial_test::serial;
 12 |     use std::time::Instant;
 13 |     use storage_node::{common::config::ParpulseConfig, server::storage_node_serve};
 14 | 
 15 |     #[test]
 16 |     fn setup() {
 17 |         let _ = env_logger::builder()
 18 |             .filter_level(log::LevelFilter::Info)
 19 |             .is_test(true)
 20 |             .try_init();
 21 |     }
 22 | 
 23 |     #[tokio::test]
 24 |     #[serial]
 25 |     async fn test_client_server_disk() {
 26 |         // The file dir should start from storage-node.
 27 |         // Start the server
 28 |         let server_handle = tokio::spawn(async move {
 29 |             storage_node_serve("127.0.0.1", 3030, ParpulseConfig::default())
 30 |                 .await
 31 |                 .unwrap();
 32 |         });
 33 | 
 34 |         // Give the server some time to start
 35 |         tokio::time::sleep(std::time::Duration::from_secs(1)).await;
 36 | 
 37 |         let storage_client =
 38 |             StorageClientImpl::new("http://127.0.0.1:3030", "http://127.0.0.1:3031")
 39 |                 .expect("Failed to create storage client.");
 40 |         let start_time = Instant::now();
 41 |         let request = StorageRequest::new(0, DataRequest::Table(0));
 42 |         let mut receiver = storage_client
 43 |             .request_data_test(request)
 44 |             .await
 45 |             .expect("Failed to get data from the server.");
 46 |         let mut record_batches = vec![];
 47 |         while let Some(record_batch) = receiver.recv().await {
 48 |             record_batches.push(record_batch);
 49 |         }
 50 |         println!(
 51 |             "Time taken for userdata file in disk: {:?}",
 52 |             start_time.elapsed()
 53 |         );
 54 |         assert!(!record_batches.is_empty());
 55 | 
 56 |         let first_batch = &record_batches[0];
 57 |         assert_eq!(first_batch.num_columns(), 13);
 58 | 
 59 |         let real_first_names = StringArray::from(vec!["Amanda", "Albert", "Evelyn"]);
 60 |         let read_last_names = StringArray::from(vec!["Jordan", "Freeman", "Morgan"]);
 61 |         let first_names = first_batch
 62 |             .column(2)
 63 |             .as_any()
 64 |             .downcast_ref::<StringArray>()
 65 |             .unwrap();
 66 |         let last_names = first_batch
 67 |             .column(3)
 68 |             .as_any()
 69 |             .downcast_ref::<StringArray>()
 70 |             .unwrap();
 71 |         // Check the first three entries in the first and last name columns.
 72 |         for i in 0..3 {
 73 |             assert_eq!(first_names.value(i), real_first_names.value(i));
 74 |             assert_eq!(last_names.value(i), read_last_names.value(i));
 75 |         }
 76 | 
 77 |         server_handle.abort();
 78 |     }
 79 | 
 80 |     #[tokio::test]
 81 |     #[serial]
 82 |     async fn test_client_server_s3() {
 83 |         // Start the server
 84 |         let server_handle = tokio::spawn(async move {
 85 |             storage_node_serve("127.0.0.1", 3030, ParpulseConfig::default())
 86 |                 .await
 87 |                 .unwrap();
 88 |         });
 89 | 
 90 |         // Give the server some time to start
 91 |         tokio::time::sleep(std::time::Duration::from_secs(1)).await;
 92 | 
 93 |         let storage_client =
 94 |             StorageClientImpl::new("http://127.0.0.1:3030", "http://127.0.0.1:3031")
 95 |                 .expect("Failed to create storage client.");
 96 |         let start_time = Instant::now();
 97 |         // Requesting random_data_1m_1.parquet
 98 |         let request = StorageRequest::new(0, DataRequest::Table(1));
 99 |         let mut receiver = storage_client
100 |             .request_data(request)
101 |             .await
102 |             .expect("Failed to get data from the server.");
103 |         let mut record_batches = vec![];
104 |         while let Some(record_batch) = receiver.recv().await {
105 |             record_batches.push(record_batch);
106 |         }
107 | 
108 |         println!("Time taken for 1m file: {:?}", start_time.elapsed());
109 |         assert!(!record_batches.is_empty());
110 | 
111 |         let first_batch = &record_batches[0];
112 |         assert_eq!(first_batch.num_columns(), 20);
113 | 
114 |         // Check the first 5 columns of the first row.
115 |         let real_first_row = [
116 |             0.19195386139992177,
117 |             0.4815442611405789,
118 |             0.47078682326631927,
119 |             0.7793912218913533,
120 |             0.21877220521846885,
121 |         ];
122 |         for (i, &real_value) in real_first_row.iter().enumerate() {
123 |             let column = first_batch
124 |                 .column(i)
125 |                 .as_any()
126 |                 .downcast_ref::<Float64Array>()
127 |                 .unwrap();
128 |             assert_eq!(column.value(0), real_value);
129 |         }
130 | 
131 |         server_handle.abort();
132 |     }
133 | }
134 | 


--------------------------------------------------------------------------------
/tests/src/lib.rs:
--------------------------------------------------------------------------------
1 | pub mod client_server_test;
2 | 


--------------------------------------------------------------------------------