├── LICENSE ├── README.md ├── prototype ├── CMakeLists.txt ├── README.md ├── app │ ├── CMakeLists.txt │ └── main.cc ├── scripts │ ├── howto_emulated_storage.md │ ├── run.sh │ └── volume_list.txt └── src │ ├── CMakeLists.txt │ ├── buse │ ├── CMakeLists.txt │ ├── buse.cpp │ ├── buse.h │ ├── buseOperations.cpp │ ├── buseOperations.h │ ├── commonIncludes.h │ ├── diskStats.cpp │ └── diskStats.h │ ├── indexmap │ ├── CMakeLists.txt │ ├── array.cc │ ├── array.h │ ├── factory.h │ ├── hashmap.cc │ ├── hashmap.h │ └── indexmap.h │ ├── logstore │ ├── config.h │ ├── logstore.cc │ ├── logstore.h │ ├── manager.cc │ ├── manager.h │ ├── scheduler.cc │ ├── scheduler.h │ ├── segment.cc │ └── segment.h │ ├── placement │ ├── CMakeLists.txt │ ├── dac.cc │ ├── dac.h │ ├── factory.h │ ├── fifo.h │ ├── metadata.h │ ├── no_placement.cc │ ├── no_placement.h │ ├── placement.h │ ├── sepbit.cc │ ├── sepbit.h │ ├── sepgc.cc │ ├── sepgc.h │ ├── warcip.cc │ └── warcip.h │ ├── selection │ ├── CMakeLists.txt │ ├── costbenefit.cc │ ├── costbenefit.h │ ├── factory.h │ ├── greedy.cc │ ├── greedy.h │ └── selection.h │ └── storage_adapter │ ├── CMakeLists.txt │ ├── factory.h │ ├── local_adapter.cc │ ├── local_adapter.h │ ├── storage_adapter.h │ ├── zenfs_adapter.cc │ └── zenfs_adapter.h └── trace_replay ├── README.md ├── analyze_script ├── .gitignore ├── README.md ├── etc │ ├── ali_property.txt │ ├── ali_selected_186.txt │ ├── common.sh │ ├── tc_property.txt │ └── tc_selected_271.txt ├── r │ ├── common.r │ ├── common_graph.r │ ├── design_calculation.r │ ├── obsv1.r │ ├── obsv2.r │ ├── obsv3.r │ ├── plot_design_boxplot.r │ ├── plot_design_lines.r │ ├── plot_design_traces.r │ └── synthetic.r ├── run_annotate.sh ├── run_design.sh ├── run_exp5_hot20.sh ├── run_obsv1.sh ├── run_obsv2.sh ├── run_obsv3.sh ├── split.sh ├── src │ ├── annotate_future_knowledge.cc │ ├── design_gw.cc │ ├── design_uw.cc │ ├── exp5_hot20.cc │ ├── large_array.h │ ├── obsv1.cc │ ├── obsv2.cc │ ├── obsv3.cc │ ├── split.cc │ ├── trace.h │ └── transform_timestamp_tencentCloud.cc └── synthetic_gen.sh ├── etc ├── ali_groups │ ├── Note │ ├── ali_selected.txt │ ├── divide.py │ ├── group1 │ ├── group10 │ ├── group11 │ ├── group12 │ ├── group13 │ ├── group14 │ ├── group15 │ ├── group16 │ ├── group17 │ ├── group18 │ ├── group19 │ ├── group2 │ ├── group20 │ ├── group21 │ ├── group22 │ ├── group23 │ ├── group24 │ ├── group25 │ ├── group26 │ ├── group27 │ ├── group28 │ ├── group29 │ ├── group3 │ ├── group30 │ ├── group4 │ ├── group5 │ ├── group6 │ ├── group7 │ ├── group8 │ └── group9 ├── ali_property.txt ├── synthetic_groups │ └── all ├── synthetic_property.txt ├── tencent_groups │ ├── divide.py │ ├── group1 │ ├── group10 │ ├── group11 │ ├── group12 │ ├── group13 │ ├── group14 │ ├── group15 │ ├── group16 │ ├── group17 │ ├── group18 │ ├── group19 │ ├── group2 │ ├── group20 │ ├── group3 │ ├── group4 │ ├── group5 │ ├── group6 │ ├── group7 │ ├── group8 │ ├── group9 │ └── selected_volumes.txt └── tencent_property.txt ├── pom.xml ├── scripts ├── base.sh ├── obtain_removed_seg.py ├── process_fifo_len.py ├── run_exp1_selection.sh ├── run_exp2_segsize.sh ├── run_exp3_gp.sh ├── run_exp4_predictability.sh ├── run_exp5_micro.sh ├── run_exp6_tencent.sh ├── run_exp7_skewness.sh └── run_exp8_memory.sh └── src └── main └── java └── gcsimulator ├── BlockContainer.java ├── Configs.java ├── GCScheduler.java ├── GCWorker.java ├── Log.java ├── Metadata.java ├── Segment.java ├── Simulator.java ├── Statistics.java ├── fifo └── OnDiskFIFO.java ├── indexmap ├── IndexMap.java ├── IndexMapFactory.java ├── IndexMapWithGlobalPageCache.java ├── LargeArray.java ├── LargeNativeArray.java ├── Page.java ├── PageTableIndexMap.java ├── PersistentIndexMap.java └── PureInMemIndexMap.java ├── iorequest ├── AliIORequest.java └── IORequest.java ├── placement ├── BITDouble.java ├── BITGW.java ├── BITHalf.java ├── DAC.java ├── ETI.java ├── FADaC.java ├── FK.java ├── GW.java ├── Method1.java ├── Method2.java ├── MultiLog.java ├── MultiQueue.java ├── NoSep.java ├── SFR.java ├── SFS.java ├── SepBIT.java ├── SepGC.java ├── Separator.java ├── SeparatorFactory.java ├── UW.java └── Warcip.java ├── segment └── SegmentMeta.java ├── selection ├── Basic.java ├── CostBenefit.java ├── CostHotness.java ├── Greedy.java ├── Lru.java ├── MultiLog.java ├── Random.java ├── RandomGreedy.java ├── SelectionAlgorithm.java ├── SelectionAlgorithmFactory.java └── WindowGreedy.java └── tracereplay └── TraceReplay.java /README.md: -------------------------------------------------------------------------------- 1 | ## SepBIT 2 | SepBIT is a lightweight data placement scheme for log-structured storage systems to achieve low write amplification. This project contains prototype and trace analysis code for our publication below. 3 | 4 | ### Publication 5 | * Qiuping Wang, Jinhong Li, Patrick P. C. Lee, Tao Ouyang, Chao Shi, and Lilong Huang. 6 | [Separating Data via Block Invalidation Time Inference for Write Amplification Reduction in Log-Structured Storage.](https://www.cse.cuhk.edu.hk/~pclee/www/pubs/fast22_sepbit.pdf) 7 | USENIX FAST 2022. 8 | -------------------------------------------------------------------------------- /prototype/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.15) 2 | project(sepbit) 3 | 4 | add_subdirectory(src) 5 | add_subdirectory(app) 6 | -------------------------------------------------------------------------------- /prototype/README.md: -------------------------------------------------------------------------------- 1 | ### SepBIT prototype 2 | This project include a real prototype of a log-structured storage system. 3 | 4 | ### Dependencies 5 | * Snappy, bz2, tcmalloc, zlib (installed via apt-get) 6 | * RocksDB with ZenFS compiled (see the [link](https://github.com/westerndigitalcorporation/zenfs)) 7 | 8 | ### Build and Run 9 | * The project is managed using CMake (version >= 3.15) 10 | * Step 1: mkdir build 11 | * Step 2: cd build && cmake .. 12 | * Step 3: make 13 | 14 | ### Run 15 | * Build the emulated zoned storage environment 16 | * Use tcmu-runner to emulate an emulated SMR disks, [tutorial](https://zonedstorage.io/getting-started/smr-emulation/) 17 | * Configure the parameters, e.g., the devices and the folders for temporary on-disk data structures, in logstore/config.h and rebuild 18 | * Attention: root priviledge is required to operate on raw zoned storage device. 19 | -------------------------------------------------------------------------------- /prototype/app/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_executable(app main.cc) 2 | target_compile_features(app PRIVATE cxx_std_17) 3 | target_link_libraries(app PRIVATE logstore buse rt pthread) 4 | -------------------------------------------------------------------------------- /prototype/app/main.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "src/buse/buse.h" 9 | #include "src/logstore/logstore.h" 10 | #include "src/logstore/config.h" 11 | 12 | using namespace buse; 13 | 14 | int main(int argc, char *argv[]) { 15 | // Config::getInstance().selection = std::string(argv[1]); 16 | Config::GetInstance().placement = std::string(argv[1]); 17 | 18 | int opt; 19 | buseOperations *bop = NULL; // Initialize to something 20 | bop = new LogStore(300 * 1024 * 1024 * 1024ull); 21 | 22 | alignas(512) char buffer[4096 * 512]; 23 | for (int i = 0; i < 4096 * 512; ++i) 24 | { 25 | buffer[i] = 0; 26 | } 27 | 28 | char input[100]; 29 | memset(input, 0, 100); 30 | sprintf(input, "%s/%s.csv", argv[2], argv[3]); 31 | FILE *trace = fopen(input, "r"); 32 | char line[100]; 33 | 34 | { 35 | struct timeval current_time; 36 | gettimeofday(¤t_time, NULL); 37 | printf("seconds: %ld micro seconds: %ld\n", 38 | current_time.tv_sec, current_time.tv_usec); 39 | 40 | } 41 | 42 | while (fscanf(trace, "%s", line) != EOF) 43 | { 44 | std::string str(line); 45 | std::vector result; 46 | { 47 | stringstream ss(str); //create string stream from the string 48 | while (ss.good()) { 49 | string substr; 50 | getline(ss, substr, ','); //get first string delimited by comma 51 | result.push_back(substr); 52 | } 53 | } 54 | 55 | if (result[1] != "W") continue; 56 | 57 | uint64_t start = atoll(result[2].c_str()); 58 | uint32_t length = atoi(result[3].c_str()); 59 | uint64_t end = start + length; 60 | start = start & (~4095ull); 61 | length = ((end + 4095) & (~4095ull)) - start; 62 | bop->write(buffer, length, start); 63 | } 64 | 65 | { 66 | struct timeval current_time; 67 | gettimeofday(¤t_time, NULL); 68 | printf("seconds: %ld micro seconds: %ld\n", 69 | current_time.tv_sec, current_time.tv_usec); 70 | 71 | } 72 | 73 | delete bop; 74 | } 75 | -------------------------------------------------------------------------------- /prototype/scripts/howto_emulated_storage.md: -------------------------------------------------------------------------------- 1 | # Guidance for preparing the emulated zoned device 2 | 3 | * We introduce how we prepare the emulated zoned device in this document 4 | 5 | ## Optane PMM 6 | * Create a block device and format it using ext4 7 | * ``ndctl create-namespace --mode=fsdax`` 8 | * ``mkfs.ext4 /dev/pmem0`` 9 | * Mount the PMM-based block device 10 | * ``mount -o dax /dev/pmem0 /mnt/pmem0`` 11 | 12 | ## Emulated zoned device 13 | * Tool: [tcmu-runner](https://github.com/open-iscsi/tcmu-runner) 14 | * Tool: [targetcli](https://github.com/open-iscsi/targetcli-fb) 15 | * Make sure to run a tcmu-runner daemon before using targetcli to configure user:zbc backstores 16 | * Create a Host-Managed SMR user:zbc target 17 | * Using targetcli under /backstores/user:zbc 18 | * ``create name=zbc0 size=400g cfgstring=model-HM/zsize-256/conv-10@/mnt/pmem0/zbc1.raw`` 19 | * Create a loopback device based on a file (on PMM) using targetcli 20 | * Using targetcli under /loopback; create a device, under its luns/ folder 21 | * ``create /backstores/user:zbc/zbc0 0`` 22 | * Use ``lsscsi -g`` to check and a new HM-SMR device (e.g., /dev/sdd) is prepared 23 | -------------------------------------------------------------------------------- /prototype/scripts/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Note: set the traceDir variable to the one you put the trace in 3 | # Run this script under your build directory 4 | traceDir="/mnt/data/alibaba_trace" 5 | mkdir results/ 6 | for volume in `cat ./volume_list.txt` 7 | do 8 | echo $volume 9 | for placement in "SepBIT" "DAC" "SepGC" "WARCIP" "NoSep" 10 | do 11 | sudo ../build/app/app $placement $traceDir $volume > results/${volume}_${placement}.result 12 | done 13 | done 14 | -------------------------------------------------------------------------------- /prototype/scripts/volume_list.txt: -------------------------------------------------------------------------------- 1 | 745 2 | 759 3 | 275 4 | 725 5 | 7 6 | 40 7 | 202 8 | 132 9 | 223 10 | 391 11 | 126 12 | 293 13 | 256 14 | 728 15 | 466 16 | 727 17 | 0 18 | 261 19 | 228 20 | 724 21 | -------------------------------------------------------------------------------- /prototype/src/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_library(logstore logstore/logstore.cc logstore/manager.cc logstore/segment.cc logstore/scheduler.cc) 2 | 3 | add_subdirectory(indexmap) 4 | add_subdirectory(placement) 5 | add_subdirectory(selection) 6 | add_subdirectory(storage_adapter) 7 | add_subdirectory(buse) 8 | 9 | target_link_libraries(logstore indexmap placement selection storage_adapter) 10 | target_compile_features(logstore PRIVATE cxx_std_17) 11 | -------------------------------------------------------------------------------- /prototype/src/buse/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_library(buse buse.cpp buseOperations.cpp diskStats.cpp) 2 | 3 | target_include_directories(buse PUBLIC ${PROJECT_SOURCE_DIR}) 4 | target_compile_features(buse PRIVATE cxx_std_17) 5 | -------------------------------------------------------------------------------- /prototype/src/buse/buse.h: -------------------------------------------------------------------------------- 1 | #ifndef BUSE_H 2 | #define BUSE_H 3 | 4 | #include "buseOperations.h" 5 | using namespace buse; 6 | 7 | int buse_main(const char* dev_file, buseOperations *bop); 8 | 9 | #endif /* BUSE_H_INCLUDED */ 10 | -------------------------------------------------------------------------------- /prototype/src/buse/buseOperations.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * buseOperations.cpp 3 | * 4 | * Created on: Jan 25, 2015 5 | * Author: andras 6 | */ 7 | #include 8 | #include 9 | #include "src/buse/buseOperations.h" 10 | 11 | using namespace std; 12 | 13 | namespace buse { 14 | buseOperations::buseOperations() { this->size = 0; } 15 | buseOperations::buseOperations(uint64_t size) { this->size = size; } 16 | 17 | buseOperations::~buseOperations() { 18 | DEBUGPRINTLN("Destroying buse object."); 19 | while(!disks.empty()) { close(disks.back().getFD()); disks.pop_back(); } 20 | } 21 | 22 | int buseOperations::read(void *buf, size_t len, off64_t offset) { 23 | UNUSED(buf); UNUSED(len); UNUSED(offset); 24 | DEBUGPRINTLN("R - " << offset << ", " << len); 25 | return handleTX(buf,len, offset,::commonIncludesRead); 26 | } 27 | 28 | int buseOperations::write(const void *buf, size_t len, off64_t offset) { 29 | UNUSED(buf); UNUSED(len); UNUSED(offset); 30 | DEBUGPRINTLN("W - " << offset << ", " << len); 31 | return handleTX((void*)buf,len, offset,::commonIncludesWrite); 32 | } 33 | 34 | int buseOperations::handleTX(void *buf, size_t len, off64_t offset, ssize_t (*func)(int, void *, size_t)) { 35 | UNUSED(buf); UNUSED(len); UNUSED(offset); UNUSED(func); 36 | DEBUGPRINTLN("H - " << offset << ", " << len); 37 | return 0; 38 | } 39 | 40 | void buseOperations::disc() { DEBUGPRINTLN("Received a disconnect request."); } 41 | 42 | int buseOperations::flush() { 43 | DEBUGPRINTLN("Received a flush request."); 44 | for(uint i = 0; i < this->disks.size(); i++) ::syncfs(disks[i].getFD()); 45 | return 0; 46 | } 47 | 48 | int buseOperations::trim(uint64_t from, uint32_t len) { 49 | UNUSED(from); UNUSED(len); 50 | DEBUGPRINTLN("T - " << from << ", " << len); 51 | return 0; 52 | } 53 | 54 | uint8_t buseOperations::getNumAsyncIdle() { 55 | uint8_t numDisks = 0; 56 | for(uint i = 0; i < disks.size(); i++) { numDisks += (uint8_t)(disks[i].aio_error()!=EINPROGRESS); } 57 | return numDisks; 58 | } 59 | 60 | uint8_t buseOperations::getFastestIdleReadDisk() { 61 | uint8_t cdID = 0; 62 | for(uint8_t i = 0; i < disks.size(); i++) { if((disks[i].aio_error()!=EINPROGRESS) && (disks[cdID].getReadSpeed() > disks[i].getReadSpeed())) cdID = i; } 63 | return cdID; 64 | } 65 | 66 | uint8_t buseOperations::getFastestIdleWriteDisk() { 67 | uint8_t cdID = 0; 68 | for(uint8_t i = 0; i < disks.size(); i++) { if((disks[i].aio_error()!=EINPROGRESS) && (disks[cdID].getWriteSpeed() > disks[i].getWriteSpeed())) cdID = i; } 69 | return cdID; 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /prototype/src/buse/buseOperations.h: -------------------------------------------------------------------------------- 1 | /* 2 | * buseOperations.h 3 | * 4 | * Created on: Jan 25, 2015 5 | * Author: andras 6 | */ 7 | 8 | #ifndef BUSEOPERATIONS_H_ 9 | #define BUSEOPERATIONS_H_ 10 | 11 | #include "commonIncludes.h" 12 | #include "diskStats.h" 13 | #include 14 | #include 15 | #include 16 | 17 | namespace buse { 18 | class buseOperations { 19 | public: 20 | buseOperations(); 21 | buseOperations(uint64_t size); 22 | virtual ~buseOperations(); 23 | 24 | inline uint64_t getSize() { return size; } 25 | 26 | virtual int read(void *buf, size_t len, off64_t offset); 27 | virtual int write(const void *buf, size_t len, off64_t offset); 28 | 29 | virtual void disc(); 30 | virtual int flush(); 31 | virtual int trim(uint64_t from, uint32_t len); 32 | inline uint8_t getNumDrives() { return (uint8_t)disks.size(); } 33 | uint8_t getNumAsyncIdle(); 34 | uint8_t getFastestIdleReadDisk(); 35 | uint8_t getFastestIdleWriteDisk(); 36 | 37 | protected: 38 | // helper function for read/write operations that are similar in content 39 | virtual int handleTX(void *buf, size_t len, off64_t offset, ssize_t (*func)(int, void *, size_t)); 40 | 41 | std::vector disks; 42 | uint64_t size; // size of the entire array 43 | }; 44 | } 45 | 46 | #endif /* BUSEOPERATIONS_H_ */ 47 | -------------------------------------------------------------------------------- /prototype/src/buse/commonIncludes.h: -------------------------------------------------------------------------------- 1 | #ifndef COMMONINCLUDES_H 2 | #define COMMONINCLUDES_H 3 | 4 | //#define _GNU_SOURCE 5 | //#define _LARGEFILE64_SOURCE 6 | 7 | //#define DEBUG 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | using namespace std; 17 | 18 | // stole this part from: http://stackoverflow.com/questions/16692400/c11-adding-a-stream-output-operator-for-stdchronotime-point 19 | namespace prettyPrint { 20 | template 21 | struct printWrapper { // boost::noopy optional -- if so, use it with && as an argument 22 | T const &data; 23 | 24 | printWrapper(T const &t) : data(t) { } 25 | }; 26 | 27 | template 28 | printWrapper format(T const &t) { return {t}; } 29 | 30 | template 31 | std::ostream &operator<<(std::ostream &stream, const printWrapper> &&duration) { 32 | return stream << std::chrono::duration_cast(duration.data).count() << "us"; 33 | } 34 | 35 | template 36 | std::ostream &operator<<(std::ostream &stream, const printWrapper> &&time_point) { 37 | const time_t time = Clock::to_time_t(time_point.data); 38 | #if __GNUC__ > 4 || ((__GNUC__ == 4) && __GNUC_MINOR__ > 8 && __GNUC_REVISION__ > 1) 39 | // Maybe the put_time will be implemented later? 40 | struct tm tm; 41 | localtime_r(&time, &tm); 42 | return stream << std::put_time(&tm, "c"); 43 | #else 44 | char buffer[26]; 45 | ctime_r(&time, buffer); 46 | buffer[24] = '\0'; // Removes the newline that is added 47 | return stream << buffer; 48 | #endif 49 | } 50 | } 51 | 52 | inline ssize_t commonIncludesRead(int fd, void *buf, size_t count) { return ::read(fd,buf,count); } 53 | inline ssize_t commonIncludesWrite(int fd, void *buf, size_t count) { return ::write(fd,buf,count); } 54 | 55 | #define likely(x) __builtin_expect(!!(x), 1) 56 | #define unlikely(x) __builtin_expect(!!(x), 0) 57 | 58 | #ifdef DEBUG 59 | 60 | #include 61 | #include 62 | #include 63 | 64 | #define DEBUGCODE(X) do {\ 65 | boost::lock_guard guard(boost::serialization::singleton::get_mutable_instance());\ 66 | cerr << prettyPrint::format(std::chrono::system_clock::now()) << ':' << __FILE__ << ':' << __LINE__ << ": " << endl;\ 67 | { X; };\ 68 | } while(0); 69 | #define DEBUGPRINTLN(X) do {\ 70 | boost::lock_guard guard(boost::serialization::singleton::get_mutable_instance());\ 71 | cerr << prettyPrint::format(std::chrono::system_clock::now()) << ':' << __FILE__ << ':' << __LINE__ << ": " << X << endl;\ 72 | } while(0); 73 | #else 74 | #define DEBUGCODE(X) 75 | #define DEBUGPRINTLN(X) 76 | //#define DEBUGCODE(X) std::cout << X << std::endl; 77 | //#define DEBUGPRINTLN(X) std::cout << X << std::endl; 78 | #endif 79 | 80 | #define UNUSED(expr) (void)(expr) 81 | 82 | #endif 83 | -------------------------------------------------------------------------------- /prototype/src/buse/diskStats.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by andras on 5/6/15. 3 | // 4 | 5 | #ifndef DISKSTATS_H 6 | #define DISKSTATS_H 7 | 8 | #include "commonIncludes.h" 9 | #include 10 | #include 11 | 12 | class diskStats { 13 | public: 14 | diskStats(int fd, string name); 15 | diskStats(int fd, string name, std::chrono::duration writeSpeed, std::chrono::duration readSpeed); 16 | 17 | int getFD() { return aiocb.aio_fildes; } 18 | uint64_t getSize() { return diskSize; } 19 | string getFileName() { return name; } 20 | 21 | void testSpeed(); 22 | // The buffers all have to be of size bufSiz. 23 | void testSpeed(char *buf0, char *buf1, char *buf2, const char *buf3, const uint32_t bufSiz); 24 | std::chrono::duration getWriteSpeed() { return writeSpeed; } 25 | std::chrono::duration getReadSpeed() { return readSpeed; } 26 | 27 | int aio_read(void *buf, size_t len, off64_t offset); 28 | int aio_write(const void *buf, size_t len, off64_t offset); 29 | ssize_t aio_return(); 30 | int aio_error(); 31 | ssize_t read(void *buf, size_t len, off64_t offset); 32 | ssize_t write(const void *buf, size_t len, off64_t offset); 33 | 34 | private: 35 | const string name; 36 | uint64_t diskSize; 37 | std::chrono::duration writeSpeed; 38 | std::chrono::duration readSpeed; 39 | ::aiocb64 aiocb; 40 | }; 41 | 42 | #endif //DISKSTATS_H 43 | -------------------------------------------------------------------------------- /prototype/src/indexmap/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_library(indexmap hashmap.cc array.cc) 2 | 3 | target_include_directories(indexmap PUBLIC ${PROJECT_SOURCE_DIR}) 4 | 5 | target_compile_features(indexmap PRIVATE cxx_std_17) 6 | -------------------------------------------------------------------------------- /prototype/src/indexmap/array.cc: -------------------------------------------------------------------------------- 1 | #include "src/indexmap/array.h" 2 | #include 3 | 4 | Array::Array(int capacity) { 5 | mCapacity = capacity; 6 | mValues = new uint64_t[mCapacity]; 7 | for (int i = 0; i < mCapacity; ++i) { 8 | mValues[i] = ~0ull; 9 | } 10 | } 11 | 12 | Array::Array() { 13 | mValues = new uint64_t[mCapacity]; 14 | for (int i = 0; i < mCapacity; ++i) { 15 | mValues[i] = ~0ull; 16 | } 17 | } 18 | 19 | void Array::Update(uint32_t blockAddr, uint64_t phyAddr) { 20 | mValues[blockAddr] = phyAddr; 21 | } 22 | 23 | uint64_t Array::Query(uint32_t blockAddr) { 24 | return mValues[blockAddr]; 25 | } 26 | 27 | Array::~Array() { 28 | delete[] mValues; 29 | } 30 | -------------------------------------------------------------------------------- /prototype/src/indexmap/array.h: -------------------------------------------------------------------------------- 1 | #ifndef LOGSTORE_ARRAY_H 2 | #define LOGSTORE_ARRAY_H 3 | 4 | #include "indexmap.h" 5 | 6 | class Array : public IndexMap { 7 | public: 8 | Array(int capacity); 9 | Array(); 10 | ~Array(); 11 | uint64_t Query(uint32_t blockAddr) override; 12 | void Update(uint32_t blockAddr, uint64_t phyAddr) override; 13 | 14 | private: 15 | int mCapacity = 128 * 1024 * 1024; // 512 GiB 16 | uint64_t *mValues; 17 | }; 18 | 19 | #endif //LOGSTORE_ARRAY_H 20 | -------------------------------------------------------------------------------- /prototype/src/indexmap/factory.h: -------------------------------------------------------------------------------- 1 | #ifndef LOGSTORE_INDEXMAPFACTORY_H 2 | #define LOGSTORE_INDEXMAPFACTORY_H 3 | 4 | #include "src/indexmap/indexmap.h" 5 | #include "src/indexmap/hashmap.h" 6 | #include "src/indexmap/array.h" 7 | #include 8 | #include 9 | 10 | class IndexMapFactory { 11 | public: 12 | static IndexMap *GetInstance(std::string type) { 13 | if (type == "HashMap") { 14 | return new HashMap(); 15 | } else if (type == "Array") { 16 | return new Array(); 17 | } else { 18 | std::cerr << "No IndexMap, type: " << type << std::endl; 19 | } 20 | return new HashMap(); 21 | } 22 | }; 23 | 24 | #endif 25 | -------------------------------------------------------------------------------- /prototype/src/indexmap/hashmap.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include "src/indexmap/hashmap.h" 3 | 4 | HashMap::HashMap() { 5 | mMap.clear(); 6 | } 7 | 8 | void HashMap::Update(uint32_t blockAddr, uint64_t phyAddr) { 9 | mMap[blockAddr] = phyAddr; 10 | } 11 | 12 | uint64_t HashMap::Query(uint32_t blockAddr) { 13 | if (mMap.find(blockAddr) == mMap.end()) { 14 | return ~0ull; 15 | } else { 16 | return mMap[blockAddr]; 17 | } 18 | } 19 | 20 | -------------------------------------------------------------------------------- /prototype/src/indexmap/hashmap.h: -------------------------------------------------------------------------------- 1 | #ifndef LOGSTORE_HASHMAP_H 2 | #define LOGSTORE_HASHMAP_H 3 | 4 | #include 5 | #include 6 | #include "src/indexmap/indexmap.h" 7 | 8 | class HashMap : public IndexMap { 9 | public: 10 | HashMap(); 11 | uint64_t Query(uint32_t blockAddr) override; 12 | void Update(uint32_t blockAddr, uint64_t phyAddr) override; 13 | 14 | private: 15 | std::unordered_map mMap; 16 | }; 17 | 18 | #endif //LOGSTORE_HASHMAP_H 19 | -------------------------------------------------------------------------------- /prototype/src/indexmap/indexmap.h: -------------------------------------------------------------------------------- 1 | #ifndef LOGSTORE_INDEXMAP_H 2 | #define LOGSTORE_INDEXMAP_H 3 | 4 | #include 5 | 6 | class IndexMap { 7 | 8 | public: 9 | virtual uint64_t Query(uint32_t blockAddr) = 0; 10 | virtual void Update(uint32_t blockAddr, uint64_t phyAddr) = 0; 11 | 12 | IndexMap() = default; 13 | }; 14 | 15 | 16 | #endif //LOGSTORE_INDEXMAP_H 17 | -------------------------------------------------------------------------------- /prototype/src/logstore/config.h: -------------------------------------------------------------------------------- 1 | #ifndef LOGSTORE_CONFIG_H 2 | #define LOGSTORE_CONFIG_H 3 | 4 | #include 5 | 6 | class Config { 7 | public: 8 | static Config& GetInstance() { 9 | static Config instance; 10 | return instance; 11 | } 12 | std::string selection = "CostBenefit"; 13 | std::string indexMap = "Array"; 14 | std::string storageAdapter = "ZenFS"; 15 | std::string placement = "SepBIT"; 16 | int maxNumOpenSegments = 6; 17 | uint64_t numValidBlocks = 0; 18 | 19 | // For zenfs and zoned storage backend 20 | std::string zbdName = "sdd"; 21 | std::string zenFsAuxPath = "/tmp/aux_path"; 22 | 23 | // For local file system backend 24 | std::string localAdapterDir = "/tmp/local"; 25 | 26 | // For SepBIT 27 | std::string fifoDir = "/tmp/fifo"; 28 | std::string metadataDir = "/tmp/metadata"; 29 | }; 30 | 31 | #endif 32 | -------------------------------------------------------------------------------- /prototype/src/logstore/logstore.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "src/logstore/logstore.h" 6 | #include "src/logstore/segment.h" 7 | 8 | int LogStore::write(const void *buf, size_t len, off64_t offset) { 9 | // check against the invalidation 10 | alignas(512) char tmp[4096]; 11 | 12 | buseOperations::write(buf, len, offset); 13 | if(unlikely(len + (uint64_t)offset > this->getSize())) return EFBIG; 14 | 15 | size_t beg = offset / 4096 * 4096; 16 | size_t end = (offset + len + 4095) / 4096 * 4096; 17 | 18 | // |-----|--------|------| (4KiB) 19 | // beg off off+len end 20 | // | tmp | 21 | // | buf | 22 | if ( (beg != offset || end != offset + len) && end == beg + 4096) { 23 | memset(tmp, 0, sizeof(tmp)); 24 | mManager->Read(tmp, beg); 25 | memcpy(tmp + offset - beg, (const char*)buf, len); 26 | mManager->Append(tmp, beg); 27 | return 0; 28 | } 29 | 30 | uint64_t i = 0; 31 | // deal with the beginning part 32 | if (beg < offset) { 33 | memset(tmp, 0, sizeof(tmp)); 34 | mManager->Read(tmp, beg); 35 | memcpy(tmp + offset - beg, (const char*)buf, beg + 4096 - offset); 36 | mManager->Append(tmp, beg); 37 | 38 | i = beg + offset - offset; 39 | } 40 | 41 | for ( ;; i += 4096) { 42 | off64_t addr = offset + i; 43 | if (addr == end - 4096) break; 44 | mManager->Append(((const char *)buf) + i, addr); 45 | } 46 | 47 | if (offset + len < end) { 48 | // deal with the beginning part 49 | memset(tmp, 0, sizeof(tmp)); 50 | mManager->Read(tmp, end - 4096); 51 | memcpy(tmp, (const char*)buf + (end - offset) - 4096, offset + len - (end - 4096)); 52 | mManager->Append(tmp, end - 4096); 53 | } else { 54 | mManager->Append(((const char *)buf) + i, offset + i); 55 | } 56 | 57 | return 0; 58 | } 59 | 60 | int LogStore::read(void *buf, size_t len, off64_t offset) { 61 | alignas(512) char tmp[4096]; 62 | 63 | buseOperations::read(buf, len, offset); 64 | if(unlikely(len + (uint64_t)offset > this->getSize())) return EFBIG; 65 | size_t beg = offset / 4096 * 4096; 66 | size_t end = (offset + len + 4095) / 4096 * 4096; 67 | 68 | if ( (beg != offset || end != offset + len) && end == beg + 4096) { 69 | mManager->Read(tmp, beg); 70 | memcpy((char*)buf, tmp + offset - beg, len); 71 | return 0; 72 | } 73 | 74 | if (beg < offset) { 75 | memset(tmp, 0, sizeof(tmp)); 76 | // deal with the beginning part 77 | mManager->Read(tmp, beg); 78 | memcpy((char*)buf, tmp + offset - beg, beg + 4096 - offset); 79 | } 80 | 81 | uint64_t i = 0; 82 | if (beg < offset) i = 4096 - (offset - beg); 83 | else i = 0; 84 | for (; i < len; i += 4096) { 85 | off64_t addr = offset + i; 86 | if (addr + 4096 == end) break; 87 | mManager->Read(((char*)buf) + i, addr); 88 | } 89 | 90 | if (offset + len < end) { 91 | memset(tmp, 0, sizeof(tmp)); 92 | mManager->Read(tmp, end - 4096); 93 | memcpy((char*)buf + (end - offset) - 4096, tmp, offset + len - (end - 4096)); 94 | } else { 95 | mManager->Read(((char*)buf) + i, offset + i); 96 | } 97 | return 0; 98 | } 99 | 100 | LogStore::LogStore(uint64_t size) : 101 | buse::buseOperations(size) 102 | { 103 | mManager = std::make_unique(6); 104 | mScheduler = std::make_unique(mManager.get()); 105 | } 106 | 107 | void LogStore::Shutdown() { 108 | mScheduler->Shutdown(); 109 | } 110 | 111 | -------------------------------------------------------------------------------- /prototype/src/logstore/logstore.h: -------------------------------------------------------------------------------- 1 | #ifndef LOGSTORE_BLOCKDEVICE_H 2 | #define LOGSTORE_BLOCKDEVICE_H 3 | 4 | 5 | #include 6 | #include "src/logstore/manager.h" 7 | #include "src/logstore/scheduler.h" 8 | #include "src/buse/buseOperations.h" 9 | 10 | class LogStore : public buse::buseOperations { 11 | 12 | public: 13 | LogStore(uint64_t ); 14 | ~LogStore() { Shutdown(); } 15 | 16 | int read(void *buf, size_t len, off64_t offset); 17 | int write(const void *buf, size_t len, off64_t offset); 18 | 19 | void Shutdown(); 20 | 21 | private: 22 | std::unique_ptr mManager; 23 | std::unique_ptr mScheduler; 24 | }; 25 | 26 | #endif //LOGSTORE_BLOCKDEVICE_H 27 | -------------------------------------------------------------------------------- /prototype/src/logstore/manager.h: -------------------------------------------------------------------------------- 1 | #ifndef LOGSTORE_SEGMENT_MANAGER_H 2 | #define LOGSTORE_SEGMENT_MANAGER_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include "src/logstore/segment.h" 13 | #include "src/indexmap/indexmap.h" 14 | #include "src/placement/placement.h" 15 | #include "src/storage_adapter/storage_adapter.h" 16 | 17 | class Manager { 18 | 19 | public: 20 | static uint64_t globalTimestamp; 21 | Manager(int numOpenSegments); 22 | ~Manager(); 23 | 24 | void Append(const void *buf, off64_t addr); 25 | bool GcAppend(const void *buf, uint32_t blockAddr, off64_t oldPhyAddr); 26 | void Read(void *buf, off64_t addr); 27 | 28 | double GetGp() const; 29 | uint64_t GetnBlocks() const; 30 | uint64_t GetnValidBlocks() const; 31 | uint64_t GetnInvalidBlocks() const; 32 | uint64_t PrintRealStats(); 33 | 34 | void OpenNewSegment(int id); 35 | void RemoveSegment(int id, uint64_t nRewriteBlocks); 36 | void CollectSegment(int id); 37 | 38 | void GetSegments(std::vector &segs); 39 | Segment ReadSegment(int id); 40 | 41 | private: 42 | std::shared_ptr findSegment(off64_t addr); 43 | 44 | std::unique_ptr mIndexMap; 45 | std::unique_ptr mPlacement; 46 | std::unordered_map> mSegments; 47 | std::vector> mOpenSegments; 48 | 49 | uint64_t mCurrentSegmentId{}; 50 | 51 | uint64_t mTotalBlocks; 52 | uint64_t mTotalInvalidBlocks; 53 | uint64_t mTotalUserWrites; 54 | uint64_t mTotalGcWrites; 55 | 56 | std::mutex mGlobalMutex; 57 | std::mutex mStopTheWorldMutex; 58 | std::mutex mSegmentMutex; 59 | std::condition_variable mStopTheWorldCv; 60 | 61 | std::unique_ptr mStorageAdapter; 62 | }; 63 | 64 | 65 | #endif //LOGSTORE_SEGMENT_MANAGER_H 66 | -------------------------------------------------------------------------------- /prototype/src/logstore/scheduler.cc: -------------------------------------------------------------------------------- 1 | #include "src/logstore/scheduler.h" 2 | 3 | Scheduler::Scheduler(Manager *manager) 4 | { 5 | mSelection = std::unique_ptr(SelectionFactory::GetInstance(Config::GetInstance().selection)); 6 | mWorker = std::thread(&Scheduler::scheduling, this, manager); 7 | mWorker.detach(); 8 | } 9 | 10 | void Scheduler::scheduling(Manager *manager) { 11 | using namespace std::chrono_literals; 12 | struct timeval current_time; 13 | while (true) { 14 | std::this_thread::sleep_for(0.05s); 15 | if (mShutdown) { 16 | break; 17 | } 18 | 19 | 20 | while (manager->GetGp() >= 0.15) { 21 | printf("GP: %.2f\n", manager->GetGp()); 22 | // select a segment 23 | int segmentId = select(manager); 24 | { 25 | gettimeofday(¤t_time, NULL); 26 | printf("GC start: %ld.%ld\n", 27 | current_time.tv_sec, current_time.tv_usec); 28 | } 29 | Segment segment = manager->ReadSegment(segmentId); 30 | { 31 | gettimeofday(¤t_time, NULL); 32 | printf("GC finish read: %ld.%ld\n", 33 | current_time.tv_sec, current_time.tv_usec); 34 | } 35 | // collect the segment 36 | collect(manager, segment); 37 | { 38 | gettimeofday(¤t_time, NULL); 39 | printf("GC finish rewrite: %ld.%ld\n", 40 | current_time.tv_sec, current_time.tv_usec); 41 | } 42 | } 43 | } 44 | } 45 | 46 | int Scheduler::select(Manager *manager) { 47 | // prepare the segments_ 48 | std::vector segments; 49 | manager->GetSegments(segments); 50 | 51 | auto res = mSelection->Select(segments); 52 | 53 | return res[0].second; 54 | } 55 | 56 | void Scheduler::collect(Manager *manager, Segment &segment) { 57 | uint64_t nRewriteBlocks = 0; 58 | manager->CollectSegment(segment.GetSegmentId()); 59 | for (int i = 0; i < 131072; ++i) { 60 | off64_t blockAddr = segment.GetBlockAddr(i); 61 | if (blockAddr == UINT32_MAX) continue; 62 | off64_t oldPhyAddr = segment.GetPhyAddr(i); 63 | char* data = segment.GetBlockData(i); 64 | if (!manager->GcAppend(data, blockAddr, oldPhyAddr)) { 65 | nRewriteBlocks += 1; 66 | } 67 | } 68 | manager->RemoveSegment(segment.GetSegmentId(), segment.GetTotalInvalidBlocks() + nRewriteBlocks); 69 | } 70 | -------------------------------------------------------------------------------- /prototype/src/logstore/scheduler.h: -------------------------------------------------------------------------------- 1 | #ifndef LOGSTORE_SCHEDULER_H 2 | #define LOGSTORE_SCHEDULER_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "src/logstore/manager.h" 10 | #include "src/selection/factory.h" 11 | #include "src/logstore/config.h" 12 | 13 | class Scheduler { 14 | 15 | public: 16 | Scheduler(Manager *manager); 17 | void Shutdown() { mShutdown = true; } 18 | 19 | private: 20 | void scheduling(Manager *manager); 21 | int select(Manager *manager); 22 | void collect(Manager* manager, Segment& segment); 23 | 24 | std::unique_ptr mSelection; 25 | std::thread mWorker; 26 | bool mShutdown = false; 27 | }; 28 | #endif //LOGSTORE_SCHEDULER_H 29 | -------------------------------------------------------------------------------- /prototype/src/logstore/segment.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "src/logstore/manager.h" 4 | 5 | Segment::Segment(uint64_t id, int temperature, uint64_t timestamp) { 6 | mSegmentId = id; 7 | mCreationTimestamp = timestamp; 8 | mClassNum = temperature; 9 | 10 | mMutex = new std::mutex(); 11 | mBlocks = new uint32_t[131072]; 12 | } 13 | 14 | uint64_t Segment::Append(uint32_t blockAddr) { 15 | uint64_t phyAddr = mSegmentId * 131072ull + mNextOffset; 16 | 17 | mBlocks[mNextOffset] = blockAddr; 18 | mNextOffset += 1; 19 | 20 | return phyAddr; 21 | } 22 | 23 | void Segment::Invalidate(int i) { 24 | mTotalInvalidBlocks += 1; 25 | mBlocks[i] = UINT32_MAX; 26 | } 27 | 28 | uint64_t Segment::GetSegmentId() const { 29 | return mSegmentId; 30 | } 31 | 32 | bool Segment::IsFull() { 33 | return mNextOffset == 131072; 34 | } 35 | 36 | void Segment::Seal() { 37 | mSealed = true; 38 | } 39 | 40 | bool Segment::IsSealed() { 41 | return mSealed; 42 | } 43 | 44 | double Segment::GetGp() const { 45 | return 1.0 * mTotalInvalidBlocks / mNextOffset; 46 | } 47 | 48 | off64_t Segment::GetBlockAddr(int i) { 49 | return this->mBlocks[i]; 50 | } 51 | 52 | char* Segment::GetBlockData(int i) { 53 | return mData + i * 4096; 54 | } 55 | 56 | // only obtain the information of the given segment 57 | // used for selection 58 | Segment::Segment(Segment *o) { 59 | this->mSegmentId = o->mSegmentId; 60 | this->mTotalInvalidBlocks = o->mTotalInvalidBlocks; 61 | this->mNextOffset = o->mNextOffset; 62 | this->mCreationTimestamp = o->mCreationTimestamp; 63 | this->mBlocks = nullptr; 64 | this->mClassNum = o->mClassNum; 65 | } 66 | 67 | // obtain the metadata of the block information 68 | Segment::Segment(std::shared_ptr o) { 69 | this->mSegmentId = o->mSegmentId; 70 | this->mTotalInvalidBlocks = o->mTotalInvalidBlocks; 71 | this->mNextOffset = o->mNextOffset; 72 | this->mCreationTimestamp = o->mCreationTimestamp; 73 | this->mBlocks = new uint32_t[131072]; 74 | memcpy(this->mBlocks, o->mBlocks, sizeof(uint32_t) * 131072); 75 | if (this->mTotalInvalidBlocks == 131072) { 76 | this->mData = nullptr; 77 | } else { 78 | this->mData = (char*)aligned_alloc(512, 131072 * 4096); 79 | } 80 | this->mClassNum = o->mClassNum; 81 | } 82 | 83 | off64_t Segment::GetPhyAddr(int i) { 84 | return mSegmentId * 131072 + i; 85 | } 86 | 87 | char *Segment::GetData() { 88 | return mData; 89 | } 90 | 91 | uint64_t Segment::GetTotalValidBlocks() const { 92 | return mNextOffset - mTotalInvalidBlocks; 93 | } 94 | 95 | uint64_t Segment::GetTotalInvalidBlocks() const { 96 | return mTotalInvalidBlocks; 97 | } 98 | 99 | uint64_t Segment::GetTotalBlocks() const { 100 | return mNextOffset; 101 | } 102 | 103 | int Segment::GetClassNum() const { 104 | return mClassNum; 105 | } 106 | 107 | uint64_t Segment::GetAge() const { 108 | return Manager::globalTimestamp - mCreationTimestamp; 109 | } 110 | 111 | uint64_t Segment::GetCreationTimestamp() const { 112 | return mCreationTimestamp; 113 | } 114 | 115 | void Segment::Lock() { 116 | mMutex->lock(); 117 | } 118 | 119 | void Segment::Unlock() { 120 | mMutex->unlock(); 121 | } 122 | -------------------------------------------------------------------------------- /prototype/src/logstore/segment.h: -------------------------------------------------------------------------------- 1 | #ifndef LOGSTORE_SEGMENT_H 2 | #define LOGSTORE_SEGMENT_H 3 | 4 | 5 | #include 6 | #include 7 | 8 | class Segment { 9 | public: 10 | Segment(uint64_t id, int temperature, uint64_t timestamp); 11 | Segment(Segment *); 12 | Segment(std::shared_ptr); 13 | uint64_t Append(uint32_t blockAddr); 14 | void Invalidate(int i); 15 | void Seal(); 16 | 17 | bool IsFull(); 18 | bool IsSealed(); 19 | char* GetData(); 20 | char* GetBlockData(int i); 21 | 22 | off64_t GetBlockAddr(int i); 23 | off64_t GetPhyAddr(int i); 24 | 25 | uint64_t GetSegmentId() const; 26 | int GetClassNum() const; 27 | 28 | uint64_t GetTotalInvalidBlocks() const; 29 | uint64_t GetTotalValidBlocks() const; 30 | uint64_t GetTotalBlocks() const; 31 | 32 | double GetGp() const; 33 | uint64_t GetAge() const; 34 | uint64_t GetCreationTimestamp() const; 35 | 36 | void Lock(); 37 | void Unlock(); 38 | 39 | ~Segment() { 40 | if (mBlocks != nullptr) { 41 | delete[] mBlocks; 42 | } 43 | if (mData != nullptr) { 44 | free(mData); 45 | } 46 | if (mMutex != nullptr) { 47 | delete mMutex; 48 | } 49 | } 50 | 51 | private: 52 | uint64_t mSegmentId = 0; 53 | bool mSealed = false; 54 | uint64_t mCreationTimestamp = 0; 55 | int mClassNum = 0; 56 | 57 | uint64_t mTotalInvalidBlocks = 0; 58 | 59 | uint32_t* mBlocks = nullptr; // storing the logical block addresses of the blocks 60 | uint64_t mNextOffset = 0; // the offset of next appended block 61 | 62 | char *mData = nullptr; // storing the data of the blocks, only used when GC 63 | 64 | std::mutex* mMutex = nullptr; 65 | }; 66 | 67 | 68 | #endif //LOGSTORE_SEGMENT_H 69 | -------------------------------------------------------------------------------- /prototype/src/placement/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_library(placement no_placement.cc sepbit.cc sepgc.cc dac.cc warcip.cc) 2 | 3 | target_include_directories(placement PUBLIC ${PROJECT_SOURCE_DIR}) 4 | target_compile_features(placement PRIVATE cxx_std_17) 5 | -------------------------------------------------------------------------------- /prototype/src/placement/dac.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include "src/placement/dac.h" 3 | 4 | DAC::DAC() 5 | { 6 | mTemperatureMap = std::unique_ptr(IndexMapFactory::GetInstance("HashMap")); 7 | } 8 | 9 | int DAC::Classify(uint32_t blockAddr, bool isGcAppend) 10 | { 11 | uint64_t classNum = mTemperatureMap->Query(blockAddr); 12 | if (classNum == (~0ull)) { 13 | classNum = 0; 14 | } 15 | return classNum; 16 | } 17 | 18 | 19 | void DAC::CollectSegment(Segment *segment) { 20 | } 21 | 22 | void DAC::Append(uint32_t blockAddr, uint64_t timestamp) { 23 | uint64_t classNum = mTemperatureMap->Query(blockAddr); 24 | if (classNum == (~0ull)) { 25 | classNum = 0; 26 | mTemperatureMap->Update(blockAddr, 0); 27 | } else { 28 | if (classNum < 5) { 29 | mTemperatureMap->Update(blockAddr, classNum + 1); 30 | } 31 | } 32 | } 33 | 34 | void DAC::GcAppend(uint32_t blockAddr) { 35 | uint64_t classNum = mTemperatureMap->Query(blockAddr); 36 | assert(classNum != (~0ull)); 37 | 38 | if (classNum > 0) { 39 | mTemperatureMap->Update(blockAddr, classNum - 1); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /prototype/src/placement/dac.h: -------------------------------------------------------------------------------- 1 | #ifndef LOGSTORE_DAC_H 2 | #define LOGSTORE_DAC_H 3 | 4 | #include "src/logstore/segment.h" 5 | #include "src/indexmap/indexmap.h" 6 | #include "src/indexmap/factory.h" 7 | #include "src/placement/placement.h" 8 | 9 | class DAC : public Placement { 10 | public: 11 | DAC(); 12 | int Classify(uint32_t blockAddr, bool isGcAppend) override; 13 | void Append(uint32_t blockAddr, uint64_t timestamp) override; 14 | void GcAppend(uint32_t blockAddr) override; 15 | void CollectSegment(Segment *segment) override; 16 | 17 | std::unique_ptr mTemperatureMap; 18 | }; 19 | 20 | #endif //LOGSTORE_DAC_H 21 | -------------------------------------------------------------------------------- /prototype/src/placement/factory.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef LOGSTORE_PLACEMENT_FACTORY_H 3 | #define LOGSTORE_PLACEMENT_FACTORY_H 4 | 5 | #include "placement.h" 6 | #include "no_placement.h" 7 | #include "sepbit.h" 8 | #include "sepgc.h" 9 | #include "dac.h" 10 | #include "warcip.h" 11 | #include 12 | #include 13 | 14 | class PlacementFactory { 15 | public: 16 | static Placement *GetInstance(std::string type) { 17 | std::cout << "Placement algorithm: " << type << std::endl; 18 | if (type == "NoSep") { 19 | return new NoPlacement(); 20 | } else if (type == "SepGC") { 21 | return new SepGC(); 22 | } else if (type == "DAC") { 23 | return new DAC(); 24 | } else if (type == "SepBIT") { 25 | return new SepBIT(); 26 | } else if (type == "WARCIP") { 27 | return new WARCIP(); 28 | } else { 29 | std::cout << "No Placement, type: " << type << std::endl; 30 | } 31 | return new NoPlacement(); 32 | } 33 | }; 34 | 35 | #endif 36 | -------------------------------------------------------------------------------- /prototype/src/placement/fifo.h: -------------------------------------------------------------------------------- 1 | #ifndef LOGSTORE_FIFO_H 2 | #define LOGSTORE_FIFO_H 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include "src/logstore/config.h" 12 | 13 | class FIFO 14 | { 15 | public: 16 | FIFO() 17 | { 18 | mFd = open(Config::GetInstance().fifoDir.c_str(), O_RDWR | O_CREAT, 0644); 19 | 20 | if (mFd == -1) 21 | { 22 | printf("Error open fifo, error: %s\n", strerror(errno)); 23 | exit(-1); 24 | } 25 | 26 | 27 | if (ftruncate(mFd, kFileSize * 8) == -1) 28 | { 29 | printf("Truncate failed, error: %s\n", strerror(errno)); 30 | exit(-1); 31 | } 32 | 33 | mArray = (uint32_t*)mmap(mArray, kFileSize * 8, PROT_READ | PROT_WRITE, MAP_SHARED, mFd, 0); 34 | if (mArray == MAP_FAILED) 35 | { 36 | printf("Mmap failed, error: %s\n", strerror(errno)); 37 | exit(-1); 38 | } 39 | } 40 | 41 | void Update(uint32_t blockAddr, double threshold) 42 | { 43 | double nValidBlocks = Config::GetInstance().numValidBlocks; 44 | int res = 0; 45 | 46 | mArray[mTail] = blockAddr; 47 | mMap[blockAddr] = mTail; 48 | mTail += 1; 49 | if (mTail == kFileSize) mTail = 0; 50 | 51 | if ((mTail + kFileSize - mHead) % kFileSize > std::min(threshold, nValidBlocks)) 52 | { 53 | uint32_t oldBlockAddr = mArray[mHead]; 54 | if (mMap[oldBlockAddr] == mHead) 55 | { 56 | mMap.erase(oldBlockAddr); 57 | } 58 | mHead += 1; 59 | if (mHead == kFileSize) mHead = 0; 60 | 61 | if ((mTail + kFileSize - mHead) % kFileSize > threshold) 62 | { 63 | oldBlockAddr = mArray[mHead]; 64 | 65 | if (mMap[oldBlockAddr] == mHead) 66 | { 67 | mMap.erase(oldBlockAddr); 68 | } 69 | mHead += 1; 70 | if (mHead == kFileSize) mHead = 0; 71 | } 72 | } 73 | } 74 | 75 | uint32_t Query(uint32_t blockAddr) 76 | { 77 | auto it = mMap.find(blockAddr); 78 | if (it == mMap.end()) 79 | { 80 | return UINT32_MAX; 81 | } 82 | uint32_t position = it->second; 83 | uint32_t lifespan = (mTail < position) ? 84 | mTail + kFileSize - position : mTail - position; 85 | return lifespan; 86 | } 87 | 88 | uint32_t mTail = 0; 89 | uint32_t mHead = 0; 90 | std::map mMap; 91 | uint32_t* mArray = NULL; 92 | int mFd; 93 | const uint32_t kFileSize = 128 * 1024 * 1024; 94 | }; 95 | #endif // LOGSTORE_FIFO_H 96 | -------------------------------------------------------------------------------- /prototype/src/placement/metadata.h: -------------------------------------------------------------------------------- 1 | #ifndef LOGSTORE_METADATA_H 2 | #define LOGSTORE_METADATA_H 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | class Metadata 12 | { 13 | public: 14 | Metadata() 15 | { 16 | mFd = open(Config::GetInstance().metadataDir.c_str(), O_RDWR | O_CREAT, 0644); 17 | 18 | if (mFd == -1) 19 | { 20 | printf("Error open metadata, error: %s\n", strerror(errno)); 21 | exit(-1); 22 | } 23 | if (ftruncate(mFd, kSize * 8) == -1) 24 | { 25 | printf("Truncate failed, error: %s\n", strerror(errno)); 26 | exit(-1); 27 | } 28 | mArray = (uint64_t*)mmap(mArray, kSize * 8, PROT_READ | PROT_WRITE, MAP_SHARED, mFd, 0); 29 | if (mArray == MAP_FAILED) 30 | { 31 | printf("Mmap failed, error: %s\n", strerror(errno)); 32 | exit(-1); 33 | } 34 | } 35 | 36 | void Update(uint64_t offset, uint64_t meta) 37 | { 38 | offset /= 4096; 39 | mArray[offset] = meta; 40 | } 41 | 42 | uint64_t Query(uint64_t offset) 43 | { 44 | uint64_t meta; 45 | offset /= 4096; 46 | meta = mArray[offset]; 47 | return meta; 48 | } 49 | 50 | int mFd; 51 | uint64_t* mArray; 52 | const uint64_t kSize = 512ull * 1024 * 1024 * 1024 / 4096; 53 | }; 54 | #endif // LOGSTORE_METADATA_H 55 | -------------------------------------------------------------------------------- /prototype/src/placement/no_placement.cc: -------------------------------------------------------------------------------- 1 | #include "src/placement/no_placement.h" 2 | 3 | int NoPlacement::Classify(uint32_t blockAddr, bool isGcAppend) { 4 | return 0; 5 | } 6 | 7 | void NoPlacement::Append(uint32_t blockAddr, uint64_t timestamp) { 8 | } 9 | 10 | void NoPlacement::GcAppend(uint32_t blockAddr) { 11 | 12 | } 13 | 14 | void NoPlacement::CollectSegment(Segment *segment) { 15 | 16 | } 17 | 18 | -------------------------------------------------------------------------------- /prototype/src/placement/no_placement.h: -------------------------------------------------------------------------------- 1 | #ifndef LOGSTORE_NO_PLACEMENT_H 2 | #define LOGSTORE_NO_PLACEMENT_H 3 | 4 | #include "src/logstore/segment.h" 5 | #include "sys/types.h" 6 | #include "src/placement/placement.h" 7 | 8 | class NoPlacement : public Placement { 9 | public: 10 | int Classify(uint32_t blockAddr, bool isGcAppend) override; 11 | void Append(uint32_t blockAddr, uint64_t timestamp) override; 12 | void GcAppend(uint32_t blockAddr) override; 13 | void CollectSegment(Segment *segment) override; 14 | }; 15 | 16 | #endif //LOGSTORE_NO_PLACEMENT_H 17 | -------------------------------------------------------------------------------- /prototype/src/placement/placement.h: -------------------------------------------------------------------------------- 1 | #ifndef LOGSTORE_PLACEMENT_H 2 | #define LOGSTORE_PLACEMENT_H 3 | 4 | #include "src/logstore/segment.h" 5 | class Placement { 6 | public: 7 | virtual int Classify(uint32_t, bool) = 0; 8 | virtual void Append(uint32_t addr, uint64_t timestamp) = 0; 9 | virtual void GcAppend(uint32_t addr) = 0; 10 | virtual void CollectSegment(Segment *segment) = 0; 11 | }; 12 | 13 | #endif //LOGSTORE_PLACEMENT_H 14 | -------------------------------------------------------------------------------- /prototype/src/placement/sepbit.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "src/placement/sepbit.h" 4 | #include "src/logstore/segment.h" 5 | #include "src/logstore/manager.h" 6 | 7 | SepBIT::SepBIT() { 8 | mMetadata.reset(new Metadata()); 9 | mLba2Fifo.reset(new FIFO()); 10 | mAvgLifespan = DBL_MAX; 11 | } 12 | 13 | int SepBIT::Classify(uint32_t blockAddr, bool isGcAppend) { 14 | if (!isGcAppend) { 15 | uint64_t lifespan = mLba2Fifo->Query(blockAddr); 16 | if (lifespan != UINT32_MAX && lifespan < mAvgLifespan) { 17 | return 0; 18 | } else { 19 | return 1; 20 | } 21 | } else { 22 | if (mClassNumOfLastCollectedSegment == 0) { 23 | return 2; 24 | } else { 25 | uint64_t age = Manager::globalTimestamp - mMetadata->Query(blockAddr); 26 | if (age < 4 * mAvgLifespan) { 27 | return 3; 28 | } else if (age < 16 * mAvgLifespan) { 29 | return 4; 30 | } else { 31 | return 5; 32 | } 33 | } 34 | } 35 | } 36 | 37 | void SepBIT::CollectSegment(Segment *segment) { 38 | static int totLifespan = 0; 39 | static int nCollects = 0; 40 | if (segment->GetClassNum() == 0) { 41 | totLifespan += segment->GetAge(); 42 | nCollects += 1; 43 | } 44 | if (nCollects == 16) { 45 | mAvgLifespan = 1.0 * totLifespan / nCollects; 46 | nCollects = 0; 47 | totLifespan = 0; 48 | std::cout << "AvgLifespan: " << mAvgLifespan << std::endl; 49 | } 50 | 51 | mClassNumOfLastCollectedSegment = segment->GetClassNum(); 52 | } 53 | 54 | void SepBIT::Append(uint32_t blockAddr, uint64_t timestamp) { 55 | mLba2Fifo->Update(blockAddr, mAvgLifespan); 56 | mMetadata->Update(blockAddr, Manager::globalTimestamp); 57 | } 58 | 59 | void SepBIT::GcAppend(uint32_t blockAddr) { 60 | } 61 | -------------------------------------------------------------------------------- /prototype/src/placement/sepbit.h: -------------------------------------------------------------------------------- 1 | #ifndef LOGSTORE_SepBIT_H 2 | #define LOGSTORE_SepBIT_H 3 | 4 | #include "src/logstore/segment.h" 5 | #include "src/indexmap/indexmap.h" 6 | #include "src/indexmap/factory.h" 7 | #include "src/placement/placement.h" 8 | #include "src/placement/fifo.h" 9 | #include "src/placement/metadata.h" 10 | 11 | class SepBIT : public Placement { 12 | public: 13 | SepBIT(); 14 | int Classify(uint32_t blockAddr, bool isGcAppend) override; 15 | void Append(uint32_t blockAddr, uint64_t timestamp) override; 16 | void GcAppend(uint32_t blockAddr) override; 17 | void CollectSegment(Segment *segment) override; 18 | 19 | private: 20 | std::unique_ptr mMetadata; 21 | std::unique_ptr mLba2Fifo; 22 | 23 | double mAvgLifespan; 24 | uint64_t mClassNumOfLastCollectedSegment; 25 | }; 26 | 27 | #endif //LOGSTORE_SepBIT_H 28 | -------------------------------------------------------------------------------- /prototype/src/placement/sepgc.cc: -------------------------------------------------------------------------------- 1 | #include "src/placement/sepgc.h" 2 | 3 | int SepGC::Classify(uint32_t blockAddr, bool isGcAppend) { 4 | if (!isGcAppend) return 0; 5 | else return 1; 6 | } 7 | 8 | void SepGC::Append(uint32_t blockAddr, uint64_t timestamp) { 9 | 10 | } 11 | 12 | void SepGC::GcAppend(uint32_t blockAddr) { 13 | 14 | } 15 | 16 | void SepGC::CollectSegment(Segment *segment) { 17 | 18 | } 19 | 20 | -------------------------------------------------------------------------------- /prototype/src/placement/sepgc.h: -------------------------------------------------------------------------------- 1 | #include "src/logstore/segment.h" 2 | #include "sys/types.h" 3 | #include "src/placement/placement.h" 4 | 5 | class SepGC : public Placement { 6 | public: 7 | int Classify(uint32_t addr, bool isGcAppend) override; 8 | void Append(uint32_t addr, uint64_t timestamp) override; 9 | void GcAppend(uint32_t addr) override; 10 | void CollectSegment(Segment *segment) override; 11 | }; 12 | 13 | -------------------------------------------------------------------------------- /prototype/src/placement/warcip.h: -------------------------------------------------------------------------------- 1 | #ifndef LOGSTORE_WARCIP_H 2 | #define LOGSTORE_WARCIP_H 3 | 4 | #include 5 | #include 6 | 7 | #include "src/logstore/segment.h" 8 | #include "src/indexmap/indexmap.h" 9 | #include "src/indexmap/factory.h" 10 | #include "src/placement/placement.h" 11 | 12 | 13 | class WARCIP : public Placement { 14 | public: 15 | WARCIP(); 16 | int Classify(uint32_t blockAddr, bool isGcAppend) override; 17 | void Append(uint32_t blockAddr, uint64_t timestamp) override; 18 | void GcAppend(uint32_t blockAddr) override; 19 | void CollectSegment(Segment *segment) override; 20 | 21 | private: 22 | void closeAndReassignCenter(int classNum); 23 | void dynamicSplitAndMerge(); 24 | void split(int classNum); 25 | void merge(int classNum); 26 | 27 | 28 | std::vector mCenterOfClusters; 29 | std::vector mTotalWeightOfClusters; 30 | std::vector mTotalBlocksOfClusters; 31 | std::vector mTotalWritesOfClusters; 32 | std::unique_ptr mLastWriteTimestamps; 33 | std::unique_ptr mPenalty; 34 | 35 | std::set mNextMerge; 36 | 37 | uint64_t mRwi = 0; 38 | int mTotalSealedUserSegments = 0; 39 | uint64_t mTotalWritesAllClusters = 0; 40 | }; 41 | 42 | #endif //LOGSTORE_WARCIP_H 43 | -------------------------------------------------------------------------------- /prototype/src/selection/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_library(selection greedy.cc costbenefit.cc) 2 | 3 | target_include_directories(selection PUBLIC ${PROJECT_SOURCE_DIR}) 4 | target_compile_features(selection PRIVATE cxx_std_17) 5 | -------------------------------------------------------------------------------- /prototype/src/selection/costbenefit.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "src/selection/costbenefit.h" 6 | #include "src/logstore/manager.h" 7 | 8 | std::vector> CostBenefit::Select(std::vector segments) { 9 | uint64_t globalTimestamp = Manager::globalTimestamp; 10 | std::vector> vec; 11 | for (int i = 0; i < segments.size(); ++i) { 12 | Segment &o = segments[i]; 13 | 14 | double gp = o.GetGp(); 15 | double age = globalTimestamp - o.GetCreationTimestamp(); 16 | double score = (gp == 1.0) ? DBL_MAX : gp / (1 - gp) * sqrt(age); 17 | 18 | vec.emplace_back(score, o.GetSegmentId()); 19 | } 20 | 21 | std::sort(vec.begin(), vec.end(), 22 | [] (const std::pair &a, 23 | const std::pair &b) 24 | { 25 | return (a.first > b.first); 26 | }); 27 | 28 | return vec; 29 | } 30 | 31 | -------------------------------------------------------------------------------- /prototype/src/selection/costbenefit.h: -------------------------------------------------------------------------------- 1 | #ifndef LOGSTORE_COSTBENEFIT_H 2 | #define LOGSTORE_COSTBENEFIT_H 3 | 4 | #include "src/selection/selection.h" 5 | 6 | class CostBenefit : public Selection { 7 | public: 8 | CostBenefit() = default; 9 | std::vector> Select(std::vector segment) override; 10 | }; 11 | 12 | #endif //LOGSTORE_COSTBENEFIT_H 13 | -------------------------------------------------------------------------------- /prototype/src/selection/factory.h: -------------------------------------------------------------------------------- 1 | #include "src/selection/selection.h" 2 | #include "src/selection/greedy.h" 3 | #include "src/selection/costbenefit.h" 4 | #include 5 | 6 | class SelectionFactory { 7 | public: 8 | static Selection *GetInstance(std::string type) { 9 | std::cout << "Selection algorithm: " << type << std::endl; 10 | if (type == "Greedy") { 11 | return new Greedy(); 12 | } else if (type == "CostBenefit") { 13 | return new CostBenefit(); 14 | } else { 15 | std::cerr << "No Selection, type: " << type << std::endl; 16 | } 17 | return new Greedy(); 18 | } 19 | }; 20 | -------------------------------------------------------------------------------- /prototype/src/selection/greedy.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include "src/selection/greedy.h" 3 | 4 | std::vector> Greedy::Select(std::vector segments) { 5 | std::vector> vec; 6 | for (int i = 0; i < segments.size(); ++i) { 7 | Segment &o = segments[i]; 8 | 9 | double gp = o.GetGp(); 10 | 11 | vec.emplace_back(gp, o.GetSegmentId()); 12 | } 13 | 14 | std::sort(vec.begin(), vec.end(), 15 | [] (const std::pair &a, 16 | const std::pair &b) 17 | { 18 | return (a.first > b.first); 19 | }); 20 | 21 | return vec; 22 | } 23 | 24 | -------------------------------------------------------------------------------- /prototype/src/selection/greedy.h: -------------------------------------------------------------------------------- 1 | #ifndef LOGSTORE_GREEDY_H 2 | #define LOGSTORE_GREEDY_H 3 | 4 | #include "src/selection/selection.h" 5 | 6 | class Greedy : public Selection { 7 | public: 8 | Greedy() = default; 9 | std::vector> Select(std::vector segment) override; 10 | }; 11 | 12 | #endif //LOGSTORE_GREEDY_H 13 | -------------------------------------------------------------------------------- /prototype/src/selection/selection.h: -------------------------------------------------------------------------------- 1 | #ifndef LOGSTORE_SELECTION_H 2 | #define LOGSTORE_SELECTION_H 3 | 4 | #include 5 | #include 6 | #include "src/logstore/segment.h" 7 | 8 | class Selection { 9 | 10 | public: 11 | Selection() = default; 12 | virtual std::vector> Select(std::vector segment) = 0; 13 | }; 14 | 15 | #endif //LOGSTORE_SELECTION_H 16 | -------------------------------------------------------------------------------- /prototype/src/storage_adapter/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | find_package(PkgConfig REQUIRED) 2 | pkg_check_modules(ROCKSDB REQUIRED rocksdb) 3 | 4 | add_library(storage_adapter local_adapter.cc zenfs_adapter.cc) 5 | 6 | target_include_directories(storage_adapter PUBLIC ${PROJECT_SOURCE_DIR}) 7 | target_compile_features(storage_adapter PRIVATE cxx_std_17) 8 | target_link_libraries(storage_adapter rocksdb dl pthread rt snappy bz2 tcmalloc zbd z) 9 | -------------------------------------------------------------------------------- /prototype/src/storage_adapter/factory.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "src/storage_adapter/storage_adapter.h" 4 | #include "src/storage_adapter/local_adapter.h" 5 | #include "src/storage_adapter/zenfs_adapter.h" 6 | //#include "src/storage_adapter/hdfs_adapter.h" 7 | //#include "src/storage_adapter/zoned_adapter.h" 8 | 9 | class StorageAdapterFactory { 10 | public: 11 | static StorageAdapter *GetInstance(std::string type) { 12 | if (type == "Local") { 13 | return new LocalAdapter(); 14 | } else if (type == "ZenFS") { 15 | return new ZenFSAdapter(); 16 | } else { 17 | std::cerr << "No StorageAdapter, type: " << type << std::endl; 18 | } 19 | return new LocalAdapter(); 20 | } 21 | }; 22 | -------------------------------------------------------------------------------- /prototype/src/storage_adapter/local_adapter.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "src/storage_adapter/local_adapter.h" 5 | 6 | void LocalAdapter::CreateSegment(int id) { 7 | mSegments.emplace(id, std::fstream{}); 8 | mSegments[id].open(mDirPrefix + "/" + "segment_" + std::to_string(id) + ".data", 9 | std::ios::in | std::ios::out | std::ios::binary | std::ios::trunc); 10 | std::cout << "Open the " << id << "-th segment." << std::endl; 11 | assert(mSegments[id].is_open()); 12 | } 13 | 14 | void LocalAdapter::Write(const void *buf, int id, off64_t offset) { 15 | std::fstream& fs = mSegments[id]; 16 | fs.seekg(offset * 4096); 17 | fs.write(static_cast(buf), 4096); 18 | } 19 | 20 | void LocalAdapter::Read(void *buf, int id, off64_t offset) { 21 | std::fstream& fs = mSegments[id]; 22 | fs.seekg(offset * 4096); 23 | fs.read(static_cast(buf), 4096); 24 | } 25 | 26 | void LocalAdapter::ReadWholeSegment(void *buf, int id) { 27 | std::fstream& fs = mSegments[id]; 28 | fs.seekg(0); 29 | fs.read(static_cast(buf), 4096 * 130172); 30 | } 31 | 32 | void LocalAdapter::DestroySegment(int id) { 33 | mSegments[id].close(); 34 | mSegments.erase(id); 35 | std::remove((mDirPrefix + "/segment_" + std::to_string(id) + ".data").c_str()); 36 | } 37 | 38 | -------------------------------------------------------------------------------- /prototype/src/storage_adapter/local_adapter.h: -------------------------------------------------------------------------------- 1 | #ifndef LOGSTORE_LOCAL_CONNECTOR_H 2 | #define LOGSTORE_LOCAL_CONNECTOR_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include "src/logstore/config.h" 8 | #include "src/storage_adapter/storage_adapter.h" 9 | 10 | class LocalAdapter : public StorageAdapter { 11 | void CreateSegment(int) override; 12 | void Write(const void *, int, off64_t) override; 13 | void Read(void *, int, off64_t) override; 14 | void ReadWholeSegment(void *, int) override; 15 | void DestroySegment(int) override; 16 | 17 | private: 18 | std::unordered_map mSegments; 19 | const std::string mDirPrefix = Config::GetInstance().localAdapterDir.c_str(); 20 | }; 21 | 22 | #endif //LOGSTORE_LOCAL_CONNECTOR_H 23 | -------------------------------------------------------------------------------- /prototype/src/storage_adapter/storage_adapter.h: -------------------------------------------------------------------------------- 1 | #ifndef LOGSTORE_STORAGE_ADAPTER_H 2 | #define LOGSTORE_STORAGE_ADAPTER_H 3 | 4 | class StorageAdapter { 5 | 6 | public: 7 | virtual void Write(const void *, int, off64_t) = 0; 8 | virtual void CreateSegment(int) = 0; 9 | virtual void Read(void *, int, off64_t) = 0; 10 | virtual void ReadWholeSegment(void *, int) = 0; 11 | virtual void DestroySegment(int) = 0; 12 | }; 13 | 14 | #endif //LOGSTORE_STORAGE_ADAPTER_H 15 | -------------------------------------------------------------------------------- /prototype/src/storage_adapter/zenfs_adapter.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include "src/logstore/config.h" 3 | #include "src/storage_adapter/zenfs_adapter.h" 4 | 5 | ZenFSAdapter::ZenFSAdapter() 6 | { 7 | mZbd = new ZonedBlockDevice(Config::GetInstance().zbdName.c_str(), nullptr); 8 | IOStatus status = mZbd->Open(false); 9 | 10 | if (!status.ok()) { 11 | std::cout << "Open zoned block device failed" << std::endl; 12 | delete mZbd; 13 | } 14 | 15 | Status s; 16 | mZenFS = new ZenFS(mZbd, FileSystem::Default(), nullptr); 17 | s = mZenFS->MkFS(Config::GetInstance().zenFsAuxPath.c_str(), 0); 18 | if (!s.ok()) { 19 | std::cout << "Open zenfs failed" << std::endl; 20 | delete mZenFS; 21 | } 22 | s = mZenFS->Mount(false); 23 | if (!s.ok()) { 24 | std::cout << "mount zenfs failed" << std::endl; 25 | delete mZenFS; 26 | } 27 | 28 | printf("ZenFS file system created. Free space: %lu MB\n", 29 | mZbd->GetFreeSpace() / (1024 * 1024)); 30 | } 31 | 32 | void ZenFSAdapter::CreateSegment(int id) 33 | { 34 | std::cout << "Open the " << id << "-th segment." << std::endl; 35 | std::string fileName = std::to_string(id); 36 | FileOptions fopts; 37 | IOOptions iopts; 38 | IODebugContext dbg; 39 | std::unique_ptr readHandle; 40 | std::unique_ptr writeHandle; 41 | 42 | Status s = mZenFS->NewWritableFile(fileName, fopts, &writeHandle, &dbg); 43 | 44 | if (!s.ok()) { 45 | std::cout << "Cannot create new file!" << std::endl; 46 | exit(-1); 47 | return; 48 | } 49 | s = mZenFS->NewSequentialFile(fileName, fopts, &readHandle, &dbg); 50 | if (!s.ok()) 51 | { 52 | std::cout << "Cannot open new file!" << std::endl; 53 | exit(-1); 54 | return; 55 | } 56 | 57 | mWriteSegments[id] = std::move(writeHandle); 58 | mReadSegments[id] = std::move(readHandle); 59 | } 60 | 61 | void ZenFSAdapter::Write(const void *data, int id, off64_t offset) 62 | { 63 | Slice slice(static_cast(data), 4096); 64 | IOOptions iopts; 65 | IODebugContext dbg; 66 | 67 | Status s = mWriteSegments[id]->Append(slice, iopts, &dbg); 68 | if (!s.ok()) { 69 | exit(-1); 70 | } 71 | // Ensure that the buffer of the segment is flushed and the zone 72 | // is closed when the segment is full. 73 | if (offset + 1 == 131072) { 74 | mWriteSegments[id]->Sync(iopts, &dbg); 75 | } 76 | assert(s.ok()); 77 | } 78 | 79 | void ZenFSAdapter::Read(void *data, int id, off64_t offset) 80 | { 81 | Slice slice; 82 | IOOptions iopts; 83 | IODebugContext dbg; 84 | 85 | Status s = mReadSegments[id]->PositionedRead(offset * 4096, 4096, iopts, &slice, static_cast(data), &dbg); 86 | if (!s.ok()) 87 | { 88 | exit(-1); 89 | } 90 | assert(s.ok()); 91 | } 92 | 93 | void ZenFSAdapter::ReadWholeSegment(void *data, int id) 94 | { 95 | Slice slice; 96 | IOOptions iopts; 97 | IODebugContext dbg; 98 | 99 | Status s = mReadSegments[id]->Read(4096 * 131072, iopts, &slice, static_cast(data), &dbg); 100 | assert(s.ok()); 101 | } 102 | 103 | void ZenFSAdapter::DestroySegment(int id) 104 | { 105 | std::cout << "Destroy the " << id << "-th segment." << std::endl; 106 | IOOptions iopts; 107 | IODebugContext dbg; 108 | 109 | std::string fileName = std::to_string(id); 110 | mWriteSegments.erase(id); 111 | mReadSegments.erase(id); 112 | 113 | mZenFS->DeleteFile(fileName, iopts, &dbg); 114 | } 115 | -------------------------------------------------------------------------------- /prototype/src/storage_adapter/zenfs_adapter.h: -------------------------------------------------------------------------------- 1 | #ifndef LOGSTORE_ZENFS_ADAPTER_H 2 | #define LOGSTORE_ZENFS_ADAPTER_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include "src/storage_adapter/storage_adapter.h" 12 | 13 | using namespace rocksdb; 14 | 15 | class ZenFSAdapter : public StorageAdapter { 16 | public: 17 | ZenFSAdapter(); 18 | void CreateSegment(int) override; 19 | void Write(const void *, int, off64_t) override; 20 | void Read(void *, int, off64_t) override; 21 | void ReadWholeSegment(void *, int) override; 22 | void DestroySegment(int) override; 23 | 24 | private: 25 | std::unordered_map> mReadSegments; 26 | std::unordered_map> mWriteSegments; 27 | ZonedBlockDevice* mZbd; 28 | ZenFS* mZenFS; 29 | }; 30 | 31 | #endif //LOGSTORE_ZENFS_ADAPTER_H 32 | -------------------------------------------------------------------------------- /trace_replay/analyze_script/.gitignore: -------------------------------------------------------------------------------- 1 | result/ 2 | bin/ 3 | *.swp 4 | -------------------------------------------------------------------------------- /trace_replay/analyze_script/README.md: -------------------------------------------------------------------------------- 1 | ## Scripts for Trace Analysis 2 | 3 | These C++ programs and R scripts are used for trace analysis. After you download the traces, you need to use the script `split.sh` to split the trace file (with hundreds of GiBs) into different volumes. Then you can run the scripts `run*.sh` to finish the processing. You can also plot the figures if needed (by adding the parameter "plot" to the Rscript). 4 | 5 | After you download the Alibaba Cloud and Tencent Cloud traces, modify `etc/common.sh` to set the paths of the both traces, and run the following to split the trace into independent csv files for each volume: 6 | ``` 7 | $ vim etc/common.sh # Edit the paths 8 | $ ./split.sh 9 | ``` 10 | 11 | Then you also need to select which set of traces you want to analyze, in `etc/common.sh`. 12 | 13 | ### About Motivation 14 | 15 | After the previous steps are finished, you can run the scripts to get the results of Observations 1-3. 16 | ``` 17 | $ ./run_obsv1.sh 18 | $ ./run_obsv2.sh 19 | $ ./run_obsv3.sh 20 | ``` 21 | 22 | + `src/obsv1.cc` will get the lifespans of user-written blocks. It provides the results for Figure 3. 23 | + `src/obsv2.cc` will get the CVs of the lifespans of blocks with similar (and high) frequency. It provides the results for Figure 4. 24 | + `src/obsv3.cc` will get the lifespans of rarely updated blocks. It provides the results for Figure 5. 25 | 26 | ### About Design 27 | 28 | You can run the scripts to get the results of design. 29 | ``` 30 | $ ./run_design.sh 31 | ``` 32 | 33 | + `src/design_uw.cc` will get the probability of $u<=u_0$ for the user writes conditioning on $v<=v_0$, under different $v_0$ and $u_0$ values as different fractions of the write WSS. 34 | + `src/design_gw.cc` will get the probability of $u<=r_0+g_0$ for the user writes conditioning on $u>=g_0$, under different $r_0$ and $g_0$ values as different times of the write WSS. 35 | 36 | ### About Evaluation 37 | 38 | To get the result of FK, you need to run the following script to annotate the trace in advance: 39 | ``` 40 | $ ./run_annotate.sh 41 | ``` 42 | 43 | To generate the synthetic traces in Experiment 5, run the following: 44 | ``` 45 | $ ./synthetic_gen.sh 46 | ``` 47 | 48 | To calculate the percetage of traffic in 20\% of the LBAs, run the following; 49 | ``` 50 | $ ./run_exp5_hot20.sh 51 | ``` 52 | -------------------------------------------------------------------------------- /trace_replay/analyze_script/etc/ali_selected_186.txt: -------------------------------------------------------------------------------- 1 | 783 2 | 404 3 | 209 4 | 363 5 | 124 6 | 219 7 | 631 8 | 269 9 | 100 10 | 726 11 | 126 12 | 205 13 | 310 14 | 354 15 | 740 16 | 138 17 | 206 18 | 808 19 | 32 20 | 68 21 | 728 22 | 483 23 | 262 24 | 733 25 | 445 26 | 361 27 | 264 28 | 99 29 | 212 30 | 175 31 | 737 32 | 348 33 | 714 34 | 227 35 | 242 36 | 276 37 | 177 38 | 466 39 | 316 40 | 12 41 | 93 42 | 197 43 | 717 44 | 256 45 | 293 46 | 753 47 | 207 48 | 220 49 | 623 50 | 752 51 | 228 52 | 374 53 | 754 54 | 150 55 | 260 56 | 257 57 | 96 58 | 223 59 | 23 60 | 202 61 | 14 62 | 117 63 | 194 64 | 226 65 | 424 66 | 748 67 | 7 68 | 40 69 | 244 70 | 455 71 | 467 72 | 26 73 | 727 74 | 0 75 | 283 76 | 391 77 | 468 78 | 724 79 | 679 80 | 133 81 | 97 82 | 232 83 | 176 84 | 34 85 | 208 86 | 198 87 | 54 88 | 29 89 | 211 90 | 580 91 | 697 92 | 4 93 | 160 94 | 440 95 | 654 96 | 122 97 | 178 98 | 144 99 | 52 100 | 107 101 | 141 102 | 58 103 | 225 104 | 780 105 | 37 106 | 701 107 | 41 108 | 272 109 | 651 110 | 435 111 | 771 112 | 460 113 | 261 114 | 427 115 | 38 116 | 280 117 | 470 118 | 804 119 | 123 120 | 195 121 | 263 122 | 555 123 | 132 124 | 730 125 | 751 126 | 745 127 | 749 128 | 221 129 | 759 130 | 345 131 | 731 132 | 275 133 | 538 134 | 725 135 | 736 136 | 231 137 | 145 138 | 59 139 | 248 140 | 18 141 | 28 142 | 103 143 | 16 144 | 27 145 | 201 146 | 111 147 | 61 148 | 13 149 | 43 150 | 213 151 | 116 152 | 130 153 | 94 154 | 36 155 | 67 156 | 200 157 | 47 158 | 39 159 | 35 160 | 22 161 | 254 162 | 166 163 | 74 164 | 136 165 | 303 166 | 20 167 | 106 168 | 187 169 | 169 170 | 109 171 | 15 172 | 25 173 | 11 174 | 742 175 | 148 176 | 165 177 | 79 178 | 507 179 | 810 180 | 278 181 | 434 182 | 265 183 | 10 184 | 746 185 | 190 186 | 45 187 | -------------------------------------------------------------------------------- /trace_replay/analyze_script/etc/common.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ########### Modify here to set the directory of traces 4 | ALI_DOWNLOAD_FILE_PATH="" # The csv trace file you downloaded from the Alibaba GitHub 5 | TENCENT_DOWNLOAD_DIR_PATH="" # The directory you downloaded from the SNIA, containing tgz files 6 | ALI_TRACE_PATH="/sdb_data/ali_trace/" # The directory that contains the traces split by volumes 7 | TENCENT_TRACE_PATH="/sdc_data/tencent_trace/" # The directory that contains the traces split by volumes 8 | SYNTHETIC_PATH="" 9 | SELECTED="0" # 0 for analysis of Alibaba Cloud, or 1 for Tencent Cloud 10 | ######################################################## 11 | 12 | TRACE_PATH=$ALI_TRACE_PATH 13 | TRACE_PREFIX="ali" 14 | TRACE_PROPERTY="etc/ali_property.txt" 15 | TRACE_VOLUME_PATH="etc/ali_selected_186.txt" 16 | TRACE_DISPLAY_NAME="AliCloud" 17 | 18 | # Both volumes use the format of AliCloud 19 | 20 | if [[ $SELECTED -eq 1 ]]; then 21 | TRACE_PATH=$TENCENT_TRACE_PATH 22 | TRACE_PREFIX="tc" 23 | TRACE_PROPERTY="etc/tc_property.txt" 24 | TRACE_VOLUME_PATH="etc/tc_selected_271.txt" 25 | TRACE_DISPLAY_NAME="TencentCloud" 26 | fi 27 | 28 | if [[ ! -d $TRACE_PATH ]]; then 29 | echo "TRACE_PATH not set; please set in etc/common.sh" 30 | exit 31 | fi 32 | 33 | analyze_multiple_files() { 34 | bin_suffix=$1; 35 | output_suffix=$2; 36 | src=$3; 37 | disp=$4; 38 | params=() 39 | 40 | if [[ $# -gt 4 ]]; then 41 | params=("${@:5}"); 42 | echo "${params[@]}" 43 | fi 44 | 45 | bin="bin/${TRACE_PREFIX}_${bin_suffix}" 46 | output_dir="result/${TRACE_PREFIX}_${output_suffix}" 47 | if [[ ! -d $output_dir ]]; then 48 | mkdir -p $output_dir 49 | fi 50 | if [[ ! -d bin ]]; then 51 | mkdir bin 52 | fi 53 | 54 | echo "Analyzing $TRACE_DISPLAY_NAME traces on $disp ... output at directory $output_dir" 55 | 56 | g++ $src -o $bin -std=c++11 -O3 -DALICLOUD -Werror -Wall 57 | if [[ $? -ne 0 ]]; then 58 | echo "Compile failed" 59 | exit 60 | fi 61 | 62 | total_traces=`wc -l ${trace_file_paths[$K]} | awk '{print $1;}'` 63 | current_traces=0 64 | 65 | cat $TRACE_VOLUME_PATH | while read line; do 66 | current_traces=$(( $current_traces + 1 )) 67 | echo "Processing volume ${current_traces} / ${total_traces}" 68 | 69 | trace_file=$TRACE_PATH/$line.csv 70 | output=${output_dir}/$line.data 71 | sz=`ls -s ${output} 2>/dev/null | awk '{print $1;}'` 72 | if [[ $? -ne 0 || $sz -eq 0 ]]; then # Not exist, or empty file 73 | $bin $line $trace_file $TRACE_PROPERTY ${params[@]} >> $output 74 | if [[ $? -ne 0 ]]; then 75 | echo "have error on volume $line, break" >> error_msg.txt 76 | sleep 2 77 | fi 78 | else 79 | echo "Volume $line in ${TRACE_DISPLAY_NAME} is analyzed before, skip" 80 | fi 81 | done 82 | } 83 | 84 | merge() { 85 | input_suffix=$1; 86 | output_suffix=$2; 87 | header=$3; 88 | 89 | output="result/${TRACE_PREFIX}_${output_suffix}" 90 | rm -f $output 91 | 92 | if [[ "${#header}" -gt 0 ]]; then 93 | echo "$header" >> $output 94 | fi 95 | 96 | cat $TRACE_VOLUME_PATH | while read line; do 97 | input="result/${TRACE_PREFIX}_${input_suffix}/${line}.data" 98 | if [[ ! -f $input ]]; then 99 | echo "Error: input $input not exist" 100 | return 1 101 | fi 102 | cat $input >> $output 103 | done 104 | } 105 | -------------------------------------------------------------------------------- /trace_replay/analyze_script/etc/tc_selected_271.txt: -------------------------------------------------------------------------------- 1 | 1148 2 | 1152 3 | 1162 4 | 1163 5 | 1164 6 | 1182 7 | 1205 8 | 1214 9 | 1273 10 | 1293 11 | 1306 12 | 1326 13 | 1360 14 | 1362 15 | 1394 16 | 1437 17 | 1458 18 | 1557 19 | 1591 20 | 1599 21 | 1625 22 | 1655 23 | 1662 24 | 1722 25 | 1730 26 | 1784 27 | 1830 28 | 1847 29 | 1894 30 | 1911 31 | 2009 32 | 2030 33 | 2035 34 | 2063 35 | 2079 36 | 2127 37 | 2145 38 | 2212 39 | 2228 40 | 2230 41 | 2275 42 | 2286 43 | 2308 44 | 2313 45 | 2315 46 | 2337 47 | 2356 48 | 2372 49 | 2390 50 | 2438 51 | 2482 52 | 2583 53 | 2627 54 | 2637 55 | 2745 56 | 2761 57 | 2771 58 | 2863 59 | 2878 60 | 2976 61 | 2982 62 | 3036 63 | 3240 64 | 3246 65 | 3300 66 | 3310 67 | 3342 68 | 3512 69 | 3535 70 | 3557 71 | 3708 72 | 3762 73 | 3768 74 | 3770 75 | 3774 76 | 3806 77 | 3810 78 | 3847 79 | 3858 80 | 3879 81 | 3944 82 | 3963 83 | 4011 84 | 4019 85 | 4046 86 | 4111 87 | 4124 88 | 4145 89 | 4246 90 | 4287 91 | 4323 92 | 4332 93 | 4377 94 | 4385 95 | 4462 96 | 4523 97 | 4587 98 | 4620 99 | 4786 100 | 4833 101 | 4870 102 | 4883 103 | 4927 104 | 5046 105 | 5048 106 | 5178 107 | 5334 108 | 5346 109 | 5369 110 | 5372 111 | 5458 112 | 5473 113 | 5519 114 | 5565 115 | 5592 116 | 5615 117 | 5710 118 | 5732 119 | 5853 120 | 5913 121 | 6018 122 | 6108 123 | 6327 124 | 6378 125 | 6410 126 | 6485 127 | 6528 128 | 6550 129 | 6591 130 | 6676 131 | 7116 132 | 7255 133 | 7299 134 | 7315 135 | 7568 136 | 7584 137 | 7709 138 | 7859 139 | 7915 140 | 7931 141 | 8077 142 | 8169 143 | 8281 144 | 8319 145 | 8809 146 | 8825 147 | 8932 148 | 8937 149 | 8957 150 | 9118 151 | 9214 152 | 9324 153 | 9327 154 | 9440 155 | 9551 156 | 9850 157 | 10001 158 | 10120 159 | 10171 160 | 10935 161 | 11156 162 | 11487 163 | 11509 164 | 11622 165 | 11633 166 | 11822 167 | 12093 168 | 12186 169 | 12217 170 | 12300 171 | 12430 172 | 12447 173 | 12455 174 | 12621 175 | 12745 176 | 12771 177 | 13016 178 | 13057 179 | 13139 180 | 13167 181 | 13198 182 | 13201 183 | 13262 184 | 13270 185 | 13286 186 | 13379 187 | 13494 188 | 13789 189 | 14002 190 | 14207 191 | 14248 192 | 14300 193 | 14415 194 | 14447 195 | 14688 196 | 15293 197 | 15420 198 | 15538 199 | 15549 200 | 15581 201 | 15739 202 | 15864 203 | 16275 204 | 16357 205 | 16433 206 | 16546 207 | 16558 208 | 16639 209 | 16739 210 | 16742 211 | 16779 212 | 16788 213 | 16864 214 | 16897 215 | 16918 216 | 16923 217 | 17009 218 | 17044 219 | 17358 220 | 17389 221 | 17393 222 | 17452 223 | 17742 224 | 18292 225 | 18297 226 | 18417 227 | 18530 228 | 18581 229 | 18605 230 | 18671 231 | 18735 232 | 18986 233 | 19045 234 | 19105 235 | 19120 236 | 19202 237 | 19498 238 | 20042 239 | 20095 240 | 20291 241 | 20314 242 | 20320 243 | 20414 244 | 20519 245 | 20627 246 | 20656 247 | 20675 248 | 20699 249 | 20941 250 | 20974 251 | 21031 252 | 21040 253 | 21108 254 | 21720 255 | 21795 256 | 21805 257 | 21974 258 | 22054 259 | 22404 260 | 22587 261 | 22923 262 | 23374 263 | 23689 264 | 23952 265 | 24190 266 | 24245 267 | 24332 268 | 24725 269 | 25476 270 | 25640 271 | 25669 272 | -------------------------------------------------------------------------------- /trace_replay/analyze_script/r/common.r: -------------------------------------------------------------------------------- 1 | display_names <- c("Alibaba Cloud", "Tencent Cloud"); 2 | prefices <- c("ali", "tc"); 3 | -------------------------------------------------------------------------------- /trace_replay/analyze_script/r/design_calculation.r: -------------------------------------------------------------------------------- 1 | options(scipen=999) 2 | 3 | prob_calculation <- function(probs, n, t0, t1) { 4 | probs <- probs / sum(probs) 5 | denominator <- sum(probs * (1-(1-probs)^t0)); 6 | 7 | left <- 1 - (1 - probs) ^ t1; 8 | right <- (1 - (1 - probs) ^ t0) * probs; 9 | 10 | numerator <- sum(left * right) 11 | numerator / denominator 12 | } 13 | 14 | get_prob_results <- function(n, v0s, u0s, probs, zipf_factor = 1) { 15 | v0_values <- c(); 16 | u0_values <- c(); 17 | prob_values <- c(); 18 | for (v0_value in v0s) { # v0 19 | for (u0_value in u0s) { # u0 20 | v0_values <- c(v0_values, v0_value); 21 | u0_values <- c(u0_values, u0_value); 22 | prob <- prob_calculation(probs, n, v0_value, u0_value); 23 | prob_values <- c(prob_values, prob); 24 | } 25 | } 26 | 27 | data.frame( 28 | s = zipf_factor, 29 | v0 = v0_values / 1024 / 256, 30 | u0 = u0_values / 1024 / 256, 31 | prob = prob_values 32 | ) 33 | } 34 | 35 | n <- 2^20 * 2.5; 36 | v0_values <- n / 2.5 * (2^((-4):(0))); 37 | u0_values <- n / 2.5 * (2^c(-4, -2, 0)); 38 | 39 | filename <- "../result/zipf_hot_wss10gb.csv" 40 | 41 | run_func <- function() { 42 | t <- NULL; 43 | for (alpha in seq(0, 1, 0.2)) { 44 | print(paste0("User-Written Blocks: Zipf ", alpha, ":")) 45 | t <- rbind(t, get_prob_results(n, v0_values, u0_values, 1/((1:n)^alpha), alpha)); 46 | } 47 | write.table(t, file = filename, quote = F, row.names = F, col.names = T, sep = ','); 48 | } 49 | 50 | if (!file.exists(filename)) { 51 | run_func(); 52 | } 53 | 54 | ################################################ 55 | # Second Part: P(u <= t | u >= t0) 56 | 57 | prob_calculation <- function(probs, n, t0, t1) { # t0 = a, t1 = l 58 | t1 <- t0 + t1; 59 | 60 | probs <- probs / sum(probs) 61 | denominator <- sum(probs * (1-probs)^t0); 62 | numerator <- sum(probs * (1-probs)^t0 - probs * (1-probs)^t1) 63 | numerator / denominator 64 | } 65 | 66 | get_prob_results <- function(n, v0_cuts, l_cuts, probs, zipf_factor) { 67 | value <- c(); 68 | for (t0 in v0_cuts) { # v0 69 | for (t1 in l_cuts) { # l 70 | value <- c(value, prob_calculation(probs, n, t0, t1)); 71 | print(c(t0, t1, value[length(value)])) 72 | } 73 | } 74 | df <- expand.grid(r = l_cuts / 1024 / 256, v = v0_cuts / 1024 / 256); 75 | s <- rep(zipf_factor, length(l_cuts) * length(v0_cuts)) 76 | df <- cbind(s, df, value); 77 | print(df) 78 | } 79 | 80 | g0_values <- n / 2.5 * (2^c(-1, 0, 1, 2, 3)); # 2, 4, 8, 16, 32 GiB 81 | r0_values <- n / 2.5 * (2^c(-1, 0, 1)); # 2, 4, 8 GiB 82 | filename <- "../result/zipf_cold_wss10gb.csv" 83 | 84 | run_func <- function() { 85 | t <- NULL; 86 | for (zf in seq(0, 1, 0.2)) { 87 | print(paste0("GC-rewritten Blocks: Zipf ", zf, ":")) 88 | t <- rbind(t, get_prob_results(n, g0_values, r0_values, 1/((1:n)^zf), zf)); 89 | } 90 | write.table(t, file = filename, quote = F, row.names = F, col.names = T, sep = ','); 91 | } 92 | 93 | if (!file.exists(filename)) { 94 | run_func(); 95 | } 96 | -------------------------------------------------------------------------------- /trace_replay/analyze_script/r/obsv1.r: -------------------------------------------------------------------------------- 1 | source("common.r"); 2 | 3 | filenames <- paste0("../result/", prefices, "_obsv1.data"); 4 | for (i in 1:length(filenames)) { 5 | filename <- filenames[i]; 6 | if (!file.exists(filename)) { 7 | next 8 | } 9 | df <- read.table(filename, header = T, stringsAsFactors = F); 10 | df <- df[!is.na(df$pct), ]; 11 | 12 | subs <- subset(df, wss == 0.8); 13 | subs <- subs[order(subs$pct), ]; 14 | print(paste0("50th percentile of 0-80% WSS in ", display_names[i], " is ", median(subs$pct))); 15 | subs <- subset(df, wss == 0.1); 16 | subs <- subs[order(subs$pct), ]; 17 | print(paste0("50th percentile of 0-10% WSS in ", display_names[i], " is ", median(subs$pct))); 18 | } 19 | 20 | args <- commandArgs(trailingOnly = T); 21 | if (length(args) < 1 || args[1] != "plot") { 22 | q() 23 | } 24 | 25 | print("Drawing Figure 3. Will stop if necessary packages are not installed"); 26 | print("---------------"); 27 | 28 | library(scales); 29 | source("common_graph.r"); 30 | options(scipen=999); 31 | 32 | pdf_width <- 3; 33 | pdf_height <- 1.4; 34 | axis.text.size <- 10; 35 | legend.text.size <- 10; 36 | 37 | breaks <- seq(0, 100, 25); 38 | xscale <- seq(0, 100, 20); 39 | xlabels <- xscale; 40 | xlimits <- c(0, 105); 41 | 42 | cdf_scale <- seq(0, 1, 0.25); 43 | cdf_labels <- cdf_scale * 100; 44 | cdf_limits <- c(0, 1.02); 45 | cdf_colors <- c(d1_line_color, d2_line_color, d3_line_color, d4_line_color); 46 | 47 | for (i in 1:length(filenames)) { 48 | filename <- filenames[i]; 49 | if (!file.exists(filename)) { 50 | next 51 | } 52 | print(paste0("Figure in ", display_names[i])); 53 | df <- read.table(filename, header = T, stringsAsFactors = F); 54 | df <- df[!is.na(df$pct), ]; 55 | 56 | wsses <- unique(df$wss); 57 | wsses_labels <- paste0("<", wsses * 100, "%"); 58 | wsses <- as.character(wsses[order(wsses)]); 59 | df$wss <- as.character(df$wss); 60 | df$wss <- factor(df$wss, levels = wsses); 61 | 62 | df_cdf <- toCdfFormat(df$pct, df$wss); 63 | 64 | t <- ggplot(df_cdf, aes(x = x, y = y, color = type)) + 65 | geom_line(stat = "identity") + 66 | scale_x_continuous(breaks = xscale, labels = xlabels) + 67 | scale_y_continuous(breaks = cdf_scale, labels = cdf_labels, expand = c(0.02, 0.02)) + 68 | scale_colour_manual(breaks = wsses, labels = wsses_labels, values = cdf_colors) + 69 | coord_cartesian(xlim = xlimits, ylim = cdf_limits) + 70 | xlab("% of User-Writes") + ylab("Cumulative (%)") + 71 | simplifiedTheme(c(0.23, 0.8), legend.direction = "vertical", hjust = 0.5, 72 | legend.text.size = legend.text.size, axis.text.size = axis.text.size); 73 | 74 | plot2pdf(paste0("../figure/", prefices[i], "_obsv1"), pdf_width, pdf_height, t); 75 | } 76 | -------------------------------------------------------------------------------- /trace_replay/analyze_script/r/obsv2.r: -------------------------------------------------------------------------------- 1 | source("common.r"); 2 | 3 | filenames <- paste0("../result/", prefices, "_obsv2.data"); 4 | for (i in 1:length(filenames)) { 5 | filename <- filenames[i]; 6 | if (!file.exists(filename)) { 7 | next 8 | } 9 | 10 | df <- read.table(filename, header = T, stringsAsFactors = F); 11 | 12 | types <- unique(df$type); 13 | type_labels <- c("<1%", "1-5%", "5-10%", "10-20%"); 14 | i <- 1; 15 | 16 | print(paste0("In ", display_names[i], ":")); 17 | for (tp in unique(df$type)) { 18 | subs <- subset(df, type == tp); 19 | print(paste0("75th percentile of ", type_labels[i], ": ", quantile(subs$value, c(0.75)))); 20 | i <- i + 1; 21 | } 22 | } 23 | 24 | args <- commandArgs(trailingOnly = T); 25 | if (length(args) < 1 || args[1] != "plot") { 26 | q() 27 | } 28 | 29 | print("Drawing Figure 4. Will stop if necessary packages are not installed"); 30 | print("---------------"); 31 | 32 | library(scales); 33 | source("common_graph.r"); 34 | options(scipen=999); 35 | 36 | pdf_width <- 3; 37 | pdf_height <- 1.4; 38 | axis.text.size <- 10; 39 | legend.text.size <- 10; 40 | 41 | cdf_scale <- seq(0, 1, 0.25); 42 | cdf_labels <- cdf_scale * 100; 43 | cdf_limits <- c(0, 1.02); 44 | cdf_colors <- c(d1_line_color, d2_line_color, d3_line_color, d4_line_color); 45 | 46 | for (i in 1:length(filenames)) { 47 | filename <- filenames[i]; 48 | if (!file.exists(filename)) { 49 | next 50 | } 51 | print(paste0("Figure in ", display_names[i])); 52 | df <- read.table(filename, header = T, stringsAsFactors = F); 53 | 54 | types <- unique(df$type); 55 | xbreaks <- types; 56 | df$type <- factor(df$type, levels = types); 57 | type_labels <- c("< 1%", "1-5%", "5-10%", "10-20%"); 58 | breaks <- seq(0, 8, 1); 59 | 60 | df_cdf <- toCdfFormat(df$value, df$type); 61 | xscale <- breaks; 62 | xlabels <- xscale; 63 | xlimits <- c(min(xscale), max(xscale)); 64 | 65 | t <- ggplot(df_cdf, aes(x = x, y = y, color = type)) + 66 | geom_line(stat = "identity") + 67 | scale_x_continuous(breaks = xscale, labels = xlabels) + 68 | scale_y_continuous(breaks = cdf_scale, labels = cdf_labels, expand = c(0.02, 0.02)) + 69 | scale_colour_manual(breaks = types, labels = type_labels, values = cdf_colors) + 70 | coord_cartesian(xlim = xlimits, ylim = cdf_limits) + 71 | xlab("CVs") + ylab("Cumulative (%)") + 72 | simplifiedTheme(c(0.75, 0.35), legend.direction = "vertical", hjust = 0.5, 73 | legend.text.size = legend.text.size, axis.text.size = axis.text.size); 74 | plot2pdf(paste0("../figure/", prefices[i], "_obsv2"), pdf_width, pdf_height, t); 75 | } 76 | -------------------------------------------------------------------------------- /trace_replay/analyze_script/r/obsv3.r: -------------------------------------------------------------------------------- 1 | source("common.r"); 2 | 3 | filenames <- paste0("../result/", prefices, "_obsv3.data"); 4 | for (i in 1:length(filenames)) { 5 | filename <- filenames[i]; 6 | if (!file.exists(filename)) { 7 | next 8 | } 9 | df <- read.table(filename, header = T, stringsAsFactors = F); 10 | 11 | for (lg in unique(df$log)) { 12 | subs <- subset(df, log == lg); 13 | df$pct[df$log == lg & df$type == "p1"] <- sum(subs$pct[subs$type %in% paste0("p", 1:3)]); 14 | df$pct[df$log == lg & df$type == "p2"] <- sum(subs$pct[subs$type %in% paste0("p", 4)]); 15 | df$pct[df$log == lg & df$type == "p3"] <- sum(subs$pct[subs$type %in% paste0("p", 5:6)]); 16 | df$pct[df$log == lg & df$type == "p4"] <- sum(subs$pct[subs$type %in% paste0("p", 7:8)]); 17 | df$pct[df$log == lg & df$type == "p5"] <- 1 - sum(subs$pct[subs$type %in% paste0("p", 1:8)]); 18 | } 19 | df <- subset(df, type %in% paste0("p", 1:5)); 20 | 21 | subs <- subset(df, type == "p1"); 22 | print(paste0("In 25% of the volumes of ", display_names[i], ", more than ", quantile(subs$pct, 0.75) * 100, 23 | "% of the rarely updated blocks have their lifespans shorter than 0.5x write WSS")); 24 | 25 | str <- "In the remaining 4 groups, the medians are "; 26 | for (tp in paste0("p", 2:5)) { 27 | subs <- subset(df, type == tp); 28 | str <- paste0(str, median(subs$pct) * 100, ifelse(tp == "p5", "%", "%, ")); 29 | } 30 | print(str); 31 | } 32 | 33 | args <- commandArgs(trailingOnly = T); 34 | if (length(args) < 1 || args[1] != "plot") { 35 | q() 36 | } 37 | 38 | print("Drawing Figure 5. Will stop if necessary packages are not installed"); 39 | print("---------------"); 40 | 41 | library(scales); 42 | source("common_graph.r"); 43 | options(scipen=999); 44 | 45 | pdf_width <- 3; 46 | pdf_height <- 1.4; 47 | axis.text.size <- 10; 48 | legend.text.size <- 10; 49 | 50 | cdf_scale <- seq(0, 1, 0.25); 51 | cdf_labels <- cdf_scale * 100; 52 | cdf_limits <- c(0, 1.02); 53 | cdf_colors <- c(d1_line_color, d2_line_color, d3_line_color, d4_line_color, d5_line_color); 54 | 55 | for (i in 1:length(filenames)) { 56 | filename <- filenames[i]; 57 | if (!file.exists(filename)) { 58 | next 59 | } 60 | print(paste0("Figure in ", display_names[i])); 61 | df <- read.table(filename, header = T, stringsAsFactors = F); 62 | for (lg in unique(df$log)) { 63 | subs <- subset(df, log == lg); 64 | df$pct[df$log == lg & df$type == "p1"] <- sum(subs$pct[subs$type %in% paste0("p", 1:3)]); 65 | df$pct[df$log == lg & df$type == "p2"] <- sum(subs$pct[subs$type %in% paste0("p", 4)]); 66 | df$pct[df$log == lg & df$type == "p3"] <- sum(subs$pct[subs$type %in% paste0("p", 5:6)]); 67 | df$pct[df$log == lg & df$type == "p4"] <- sum(subs$pct[subs$type %in% paste0("p", 7:8)]); 68 | df$pct[df$log == lg & df$type == "p5"] <- 1 - sum(subs$pct[subs$type %in% paste0("p", 1:8)]); 69 | } 70 | df <- subset(df, type %in% paste0("p", 1:5)); 71 | labels <- c(paste0(c("< 0.5", "0.5-1", "1-1.5", "1.5-2", ">2"), "x")); 72 | 73 | xscale <- seq(0, 1, 0.25); 74 | xlabels <- xscale * 100; 75 | xlimits <- c(0, 1.02); 76 | 77 | xlab_name <- "Percentage (%)"; 78 | ylab_name <- "Cumulative (%)"; 79 | 80 | types <- unique(df$type); 81 | df$type <- factor(df$type, types); 82 | 83 | df_cdf <- toCdfFormat(df$pct, df$type); 84 | 85 | t <- ggplot(data = df_cdf, aes(x = x, y = y, color = type)) + 86 | geom_line(stat = "identity") + 87 | coord_cartesian(xlim = xlimits, ylim = cdf_limits) + 88 | scale_x_continuous(breaks = xscale, labels = xlabels) + 89 | scale_y_continuous(breaks = cdf_scale, labels = cdf_labels) + 90 | scale_colour_manual(breaks = types, labels = labels, values = cdf_colors) + 91 | ylab(ylab_name) + xlab(xlab_name) + 92 | simplifiedTheme(c(0.82, 0.37), axis.text.size = axis.text.size, 93 | legend.text.size = legend.text.size, 94 | legend.direction = "vertical"); 95 | 96 | plot2pdf(paste0("../figure/", prefices[i], "_obsv3"), pdf_width, pdf_height, t); 97 | } 98 | -------------------------------------------------------------------------------- /trace_replay/analyze_script/r/plot_design_boxplot.r: -------------------------------------------------------------------------------- 1 | library(ggplot2) 2 | source("common.r") 3 | 4 | df <- read.table("../result/design_uw.data", header = T, stringsAsFactors = F); 5 | 6 | df <- subset(df, v0 %in% c(0.25, 0.5, 1, 2, 4) & u0 %in% c(0.25, 1, 4)); 7 | 8 | legend_colors <- c(d1_line_color, d2_line_color, d3_line_color) #c("#ff8888", "#7611E4", "#160184"); 9 | 10 | pdf_width <- 3.5; 11 | pdf_height <- 1.35; 12 | 13 | df$u0 <- as.character(df$u0); 14 | df$v0 <- as.character(df$v0); 15 | 16 | types <- unique(df$u0); 17 | df$u0 <- factor(df$u0, level = types); 18 | xscale <- unique(df$v0); 19 | df$v0 <- factor(df$v0, level = xscale); 20 | print(types); 21 | 22 | for (u00 in types) { 23 | subs <- subset(df, u00 == u0); 24 | for (v00 in xscale) { 25 | subsubs <- subset(subs, v00 == v0); 26 | print(unname(c(v00, u00, quantile(subsubs$prob, c(0.25, 0.5, 0.75))))); 27 | } 28 | } 29 | 30 | legend_breaks <- types; 31 | legend_labels <- paste0(legend_breaks, ""); 32 | legend_labels[1] <- expression("u"[0]~"= 0.25"); 33 | 34 | legend.position <- c(0.5, 0.97); 35 | axis.text.size <- 10; 36 | legend.text.size <- 10; 37 | 38 | t <- ggplot(df, aes(x = v0, y = prob, color = u0)) + 39 | geom_boxplot(outlier.shape = outlier.shape, outlier.size = outlier.size) + 40 | scale_y_continuous(breaks = seq(0,1,0.25), labels = seq(0, 100, 25)) + 41 | scale_colour_manual(breaks = legend_breaks, labels = legend_labels, values = legend_colors) + 42 | xlab("v"[0]~"(GiB)") + ylab("Probability (%)") + 43 | coord_cartesian(ylim = c(0, 1.08)) + 44 | simplifiedTheme(legend.position, legend.direction = "horizontal", axis.text.size = axis.text.size, legend.text.size = legend.text.size) 45 | plot2pdf("../figure/design_uw", pdf_width, pdf_height, t); 46 | 47 | #################### Cold 48 | filename <- "../result/design_gw.data"; 49 | df <- read.table(filename, header = T, stringsAsFactors = F); 50 | 51 | legend_colors <- c(d1_line_color, d2_line_color, d3_line_color, d4_line_color); #c("#ff8888", "#7611E4", "#160184"); 52 | 53 | df$r <- as.character(df$l); 54 | df$v <- as.character(df$t); 55 | 56 | df <- subset(df, v %in% c("1", "4", "16", "64") & r %in% c("2", "4", "8")); 57 | types <- unique(df$v); 58 | xscale <- unique(df$r); 59 | df$r <- factor(df$r, level = xscale); 60 | df$v <- factor(df$v, level = types); 61 | print(types); 62 | 63 | legend_breaks <- types; 64 | legend_labels <- paste0(legend_breaks, ""); 65 | legend_labels[1] <- expression("g"[0]~" = 1"); 66 | 67 | for (u00 in xscale) { 68 | subs <- subset(df, u00 == r); 69 | for (v00 in types) { 70 | subsubs <- subset(subs, v00 == v); 71 | print(unname(c(v00, u00, round(quantile(subsubs$value, c(0.25, 0.5, 0.75)), digits = 4)))); 72 | } 73 | } 74 | 75 | t <- ggplot(df, aes(x = r, y = value, color = v)) + 76 | geom_boxplot(outlier.shape = outlier.shape, outlier.size = outlier.size, width = 0.7) + 77 | scale_y_continuous(breaks = seq(0, 1, 0.25), labels = seq(0, 100, 25)) + 78 | scale_colour_manual(breaks = legend_breaks, labels = legend_labels, values = legend_colors) + 79 | xlab(expression("r"[0]~" (GiB)")) + ylab("Probability (%)") + 80 | coord_cartesian(ylim = c(0, 1.10)) + 81 | simplifiedTheme(legend.position, legend.direction = "horizontal", axis.text.size = axis.text.size, legend.text.size = legend.text.size) 82 | plot2pdf("../figure/design_gw", pdf_width, pdf_height, t); 83 | -------------------------------------------------------------------------------- /trace_replay/analyze_script/r/plot_design_traces.r: -------------------------------------------------------------------------------- 1 | library(ggplot2) 2 | source("common_graph.r"); 3 | source("common.r"); 4 | 5 | filenames <- paste0("../result/", prefices, "_design_uw.data"); 6 | pdf_width <- 3.5; 7 | pdf_height <- 1.23; 8 | legend.position <- c(0.5, 0.97); 9 | axis.text.size <- 8.5; 10 | legend.text.size <- 8.5; 11 | 12 | outlier.shape <- 4; 13 | outlier.color <- "#ff8888"; 14 | outlier.size <- 0.8; 15 | 16 | for (i in 1:length(filenames)) { 17 | filename <- filenames[i]; 18 | if (!file.exists(filename)) { 19 | next 20 | } 21 | 22 | df <- read.table(filename, header = T, stringsAsFactors = F); 23 | df <- subset(df, v0 %in% c(0.025, 0.05, 0.1, 0.2, 0.4) & u0 %in% c(0.025, 0.1, 0.4)); 24 | 25 | legend_colors <- c(d1_line_color, d2_line_color, d3_line_color) 26 | 27 | df$u0 <- as.character(df$u0); 28 | df$v0 <- as.character(df$v0); 29 | 30 | types <- unique(df$u0); 31 | df$u0 <- factor(df$u0, level = types); 32 | xscale <- unique(df$v0); 33 | df$v0 <- factor(df$v0, level = xscale); 34 | 35 | legend_breaks <- types; 36 | legend_labels <- paste0(as.character(as.numeric(legend_breaks) * 100), "%"); 37 | legend_labels[1] <- expression("u"[0]~"= 2.5% write WSS"); 38 | 39 | t <- ggplot(df, aes(x = v0, y = prob, color = u0)) + 40 | geom_boxplot(outlier.shape = outlier.shape, outlier.size = outlier.size) + 41 | scale_y_continuous(breaks = seq(0,1,0.25), labels = seq(0, 100, 25)) + 42 | scale_colour_manual(breaks = legend_breaks, labels = legend_labels, values = legend_colors) + 43 | xlab("v"[0]~"(% of write WSS)") + ylab("Probability (%)") + 44 | coord_cartesian(ylim = c(0, 1.15)) + 45 | simplifiedTheme(legend.position, legend.direction = "horizontal", axis.text.size = axis.text.size, legend.text.size = legend.text.size) 46 | plot2pdf(paste0("../figure/", prefices[i], "_design_uw"), pdf_width, pdf_height, t); 47 | } 48 | 49 | #################### Cold 50 | filenames <- paste0("../result/", prefices, "_design_gw.data"); 51 | legend_colors <- c(d1_line_color, d2_line_color, d3_line_color, d4_line_color); 52 | 53 | for (i in 1:length(filenames)) { 54 | filename <- filenames[i]; 55 | if (!file.exists(filename)) { 56 | next 57 | } 58 | 59 | df <- read.table(filename, header = T, stringsAsFactors = F); 60 | df <- subset(df, g0 %in% c(0.8, 1.6, 3.2, 6.4) & r0 %in% c(0.4, 0.8, 1.6)); 61 | 62 | df$g0 <- as.character(df$g0); 63 | df$r0 <- as.character(df$r0); 64 | df$prob[is.nan(df$prob)] <- 0; 65 | 66 | types <- unique(df$g0); 67 | xscale <- unique(df$r0); 68 | df$g0 <- factor(df$g0, level = types); 69 | df$r0 <- factor(df$r0, level = xscale); 70 | 71 | legend_breaks <- types; 72 | legend_labels <- paste0(as.character(as.numeric(legend_breaks)), "x"); 73 | legend_labels[1] <- expression("g"[0]~" = 0.8x write WSS"); 74 | 75 | t <- ggplot(df, aes(x = r0, y = prob, color = g0)) + 76 | geom_boxplot(outlier.shape = outlier.shape, outlier.size = outlier.size, width = 0.65) + 77 | scale_y_continuous(breaks = seq(0, 1, 0.25), labels = seq(0, 100, 25)) + 78 | scale_colour_manual(breaks = legend_breaks, labels = legend_labels, values = legend_colors) + 79 | xlab(expression("r"[0]~" (times of write WSS)")) + ylab("Probability (%)") + 80 | coord_cartesian(ylim = c(0, 1.15)) + 81 | simplifiedTheme(legend.position, legend.direction = "horizontal", axis.text.size = axis.text.size, legend.text.size = legend.text.size) 82 | plot2pdf(paste0("../figure/", prefices[i], "_design_gw"), pdf_width, pdf_height, t); 83 | } 84 | -------------------------------------------------------------------------------- /trace_replay/analyze_script/r/synthetic.r: -------------------------------------------------------------------------------- 1 | sequence_gen_zipf <- function(probs, n, m, permutation_interval, filename) { 2 | if (!file.exists(filename)) { 3 | i <- 0; 4 | ans <- c(); 5 | 6 | hot_length <- n %/% 5; 7 | probs_indices_hot <- 1:hot_length; 8 | probs_indices_cold <- (hot_length+1):n; 9 | 10 | while (i < m) { 11 | m_tmp <- min(permutation_interval, m - i); 12 | seqs <- sample(c(probs_indices_hot, probs_indices_cold), size = m_tmp, prob = probs, replace = T); 13 | ans <- c(ans, seqs); 14 | print(paste0("Processed ", i, " to ", i + m_tmp, " (", i / m * 100, " %)")); 15 | i <- i + permutation_interval; 16 | 17 | if (alpha > 0) { 18 | probs_indices_hot <- sample(1:hot_length, size = hot_length, replace = F); 19 | } 20 | 21 | if (length(ans) >= 500 * 1024 * 256) { # Flush to disk for every 500 GiB writes 22 | write.table(ans, file = filename, quote = F, row.names = F, col.names = F, append = T); 23 | ans <- c(); 24 | } 25 | } 26 | 27 | if (length(ans) > 0) { # Flush to disk for the rest 28 | write.table(ans, file = filename, quote = F, row.names = F, col.names = F, append = T); 29 | ans <- c(); 30 | } 31 | 32 | } 33 | print(paste0("finished ", filename)); 34 | } 35 | 36 | n <- 50 * 1024 * 256; # 50 GiB write WSS 37 | interval <- 0.5; # Change the permutation for every 512 MiB writes 38 | args <- commandArgs(trailingOnly = T) 39 | if (length(args) < 2) { 40 | print("Usage: Rscript synthetic.r "); 41 | q() 42 | } 43 | 44 | print(args); 45 | alpha <- as.numeric(args[1]); 46 | 47 | print("Zipf - hotness change - 20 %"); 48 | print(paste0("alpha = ", alpha)); 49 | sequence_gen_zipf(1/(1:n)^alpha, n, n * 60, interval * 1024 * 256, paste0(args[2], "/alpha_", alpha, ".csv")); 50 | -------------------------------------------------------------------------------- /trace_replay/analyze_script/run_annotate.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | source etc/common.sh 4 | 5 | # Both volumes use the format of AliCloud 6 | analyze_multiple_files "annotate" "annotate" "src/annotate_future_knowledge.cc" "annotate" "Annotate future knowledge" 7 | 8 | cd result/annotate 9 | for line in `ls *.data`; do 10 | nm=`echo $line | cut -d. -f 1` 11 | mv $line ${nm}.oracle 12 | done 13 | 14 | 15 | -------------------------------------------------------------------------------- /trace_replay/analyze_script/run_design.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | source etc/common.sh 4 | 5 | analyze_multiple_files "design_uw" "design_uw" "src/design_uw.cc" "design_uw" "Design Analysis for user writes" 6 | merge "design_uw" "design_uw.data" "log v0 u0 denominator numerator prob" 7 | 8 | analyze_multiple_files "design_gw" "design_gw" "src/design_gw.cc" "design_gw" "Design Analysis for GC writes" 9 | merge "design_gw" "design_gw.data" "log r0 g0 numerator denominator prob" 10 | 11 | cd r 12 | Rscript design_calculation.r 13 | Rscript plot_design_lines.r 14 | Rscript plot_design_traces.r 15 | -------------------------------------------------------------------------------- /trace_replay/analyze_script/run_exp5_hot20.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | source etc/common.sh 4 | 5 | analyze_multiple_files "exp5_hot20" "exp5_hot20" "src/exp5_hot20.cc" "exp5_hot20" "Traffic in Hot 20% blocks" 6 | merge "exp5_hot20" "exp5_hot20.data" "log pct" 7 | -------------------------------------------------------------------------------- /trace_replay/analyze_script/run_obsv1.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | source etc/common.sh 4 | 5 | analyze_multiple_files "obsv1" "obsv1" "src/obsv1.cc" "obsv1" "Observation 1" 6 | merge "obsv1" "obsv1.data" "wss pct" 7 | 8 | cd r 9 | Rscript obsv1.r 10 | 11 | ############ Uncomment these lines if you want to plot the figures 12 | #if [[ ! -d ../figure ]] ; then 13 | # mkdir -p ../figure 14 | #fi 15 | #Rscript obsv1.r plot 16 | -------------------------------------------------------------------------------- /trace_replay/analyze_script/run_obsv2.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | source etc/common.sh 4 | 5 | analyze_multiple_files "obsv2" "obsv2" "src/obsv2.cc" "obsv2" "Observation 2" 6 | merge "obsv2" "obsv2.data" "log value type" 7 | 8 | cd r 9 | Rscript obsv2.r 10 | 11 | ############ Uncomment these lines if you want to plot the figures 12 | #if [[ ! -d ../figure ]] ; then 13 | # mkdir -p ../figure 14 | #fi 15 | #Rscript obsv2.r plot 16 | -------------------------------------------------------------------------------- /trace_replay/analyze_script/run_obsv3.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | source etc/common.sh 4 | 5 | analyze_multiple_files "obsv3" "obsv3" "src/obsv3.cc" "obsv3" "Observation 3" 6 | merge "obsv3" "obsv3.data" "log type pct" 7 | 8 | cd r 9 | Rscript obsv3.r 10 | 11 | ############ Uncomment these lines if you want to plot the figures 12 | #if [[ ! -d ../figure ]] ; then 13 | # mkdir -p ../figure 14 | #fi 15 | #Rscript obsv3.r plot 16 | -------------------------------------------------------------------------------- /trace_replay/analyze_script/split.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | source etc/common.sh 4 | 5 | if [[ ! -d bin ]]; then 6 | mkdir bin 7 | fi 8 | 9 | ################# Split the Alibaba Cloud traces 10 | g++ src/split.cc -o bin/split -std=c++11 -O3 -DALICLOUD 11 | bin/split $ALI_DOWNLOAD_FILE_PATH $ALI_TRACE_PATH 12 | 13 | ################# Split and transform timestamps of the Tencent Cloud traces 14 | g++ src/split.cc -o bin/split -std=c++11 -Wall -Werror -O3 -DTENCENTCLOUD 15 | if [[ ! -d $TENCENT_TRACE_PATH ]]; then 16 | echo "TENCENT_TRACE_PATH not set or not exist; please set in etc/common.sh" 17 | exit 18 | fi 19 | if [[ ! -d $TENCENT_DOWNLOAD_DIR_PATH ]]; then 20 | echo "TENCENT_DOWNLOAD_DIR_PATH not set or not exist; please set in etc/common.sh" 21 | exit 22 | fi 23 | 24 | for tgz_file in `ls ${TENCENT_DOWNLOAD_DIR_PATH}/*.tgz`; do 25 | echo "Extracting $tgz_file" 26 | name=`echo $tgz_file | rev | cut -d'/' -f 1 | rev | cut -d'.' -f 1` 27 | tar xzf $tgz_file 28 | INPUT="cbs_trace1/atc_2020_trace/trace_ori/${name}" 29 | echo "Spliting $INPUT" 30 | bin/split $INPUT $TENCENT_TRACE_PATH 31 | rm $INPUT 32 | done 33 | 34 | transform() { 35 | bin_suffix=$1; 36 | src=$2; 37 | 38 | bin="bin/tc_${bin_suffix}" 39 | if [[ ! -d bin ]]; then 40 | mkdir bin 41 | fi 42 | 43 | property_file="etc/tc_property.txt" 44 | echo "Transforming Tencent Cloud ... output at directory $TENCENT_TRACE_PATH" 45 | 46 | g++ $src -o $bin -std=c++11 -DTENCENTCLOUD 47 | if [[ $? -ne 0 ]]; then 48 | echo "Compile failed" 49 | exit 50 | fi 51 | 52 | cat etc/tc_selected_271.txt | while read line; do 53 | trace_file=$TENCENT_TRACE_PATH/$line.csv 54 | if [[ ! -f $trace_file ]]; then 55 | echo "$trace_file not exist; did you download the traces or use split.sh to split the traces?" 56 | exit 57 | fi 58 | echo "transforming $trace_file in TencentCloud" 59 | output=${TENCENT_TRACE_PATH}/${line}_tmp.csv 60 | sz=`ls -s ${output} 2>/dev/null | awk '{print $1;}'` 61 | if [[ $? -ne 0 || $sz -eq 0 ]]; then # Not exist, or empty file 62 | $bin $line $trace_file $property_file >> $output 63 | else 64 | echo "Volume $line in Tencent Cloud is transformed before, skip" 65 | fi 66 | mv $output $trace_file 67 | done 68 | } 69 | 70 | transform "transform" "src/transform_timestamp_tencentCloud.cc" 71 | 72 | -------------------------------------------------------------------------------- /trace_replay/analyze_script/src/annotate_future_knowledge.cc: -------------------------------------------------------------------------------- 1 | #include "large_array.h" 2 | #include "trace.h" 3 | 4 | class Analyzer : Analyzer_base { 5 | LargeArray* indexMap_; 6 | LargeArray* fifo_; 7 | 8 | uint64_t n_blocks_ = -1ull; 9 | uint64_t currentId_ = 1; 10 | // For blocks whose lifespans are greater than 8-TiB data (due to memory constrain on a single machine), 11 | // we annotate it as infinity (represented as UINT64_MAX) 12 | // This will not affect the simulation because they are rare (the largest write traffic is 35TiB) 13 | // maintain a FIFO of 8-TiB data - 2048 million blocks - 16GiB memory overhead 14 | uint64_t size_ = 2ull * 1024 * 1024 * 1024; 15 | uint64_t head = 0, tail = 0, headAccessId = 1; 16 | 17 | public: 18 | 19 | // initialize properties 20 | void init(char *propertyFileName, char *volume) { 21 | std::string volume_id(volume); 22 | volume_id_ = volume_id; 23 | 24 | trace_.loadProperty(propertyFileName, volume); 25 | 26 | uint64_t maxLba = trace_.getMaxLba(volume_id); 27 | n_blocks_ = maxLba + 1; 28 | 29 | indexMap_ = new LargeArray(n_blocks_); 30 | fifo_ = new LargeArray(size_); 31 | } 32 | 33 | void analyze(char *inputTrace) { 34 | uint64_t offset, length, timestamp; 35 | bool isWrite; 36 | char line[100]; 37 | 38 | openTrace(inputTrace); 39 | 40 | while (trace_.readNextRequestFstream(*is_, timestamp, isWrite, offset, length, line)) { 41 | 42 | if (!isWrite) continue; 43 | 44 | for (uint64_t i = 0; i < length; i += 1) { 45 | uint64_t lastAccessId = indexMap_->get(offset + i); 46 | if (lastAccessId != 0) { 47 | uint64_t lifespan = currentId_ - lastAccessId; 48 | if (lastAccessId >= headAccessId) { 49 | uint64_t posAccessId = lastAccessId - headAccessId; 50 | uint64_t pos = head + posAccessId; 51 | if (pos >= size_) { 52 | pos -= size_; 53 | } 54 | fifo_->put(pos, lifespan); 55 | } 56 | } 57 | 58 | indexMap_->put(offset + i, currentId_++); 59 | fifo_->put(tail++, UINT64_MAX); 60 | if (tail == size_) { 61 | tail = 0; 62 | } 63 | 64 | if (head == tail) { 65 | uint64_t lifespan = fifo_->get(head++); 66 | headAccessId += 1; 67 | std::cout << lifespan << std::endl; 68 | if (head == size_) { 69 | head = 0; 70 | } 71 | } 72 | } 73 | } 74 | 75 | while (head != tail) { 76 | std::cout << fifo_->get(head++) << std::endl; 77 | if (head == size_) { 78 | head = 0; 79 | } 80 | } 81 | } 82 | }; 83 | 84 | int main(int argc, char *argv[]) { 85 | Analyzer analyzer; 86 | if (argc <= 3) { 87 | std::cerr << "Parameters not enough!\n"; 88 | std::cerr << "Usage: " << argv[0] << " \n"; 89 | return 1; 90 | } 91 | analyzer.init(argv[3], argv[1]); 92 | analyzer.analyze(argv[2]); 93 | return 0; 94 | } 95 | -------------------------------------------------------------------------------- /trace_replay/analyze_script/src/exp5_hot20.cc: -------------------------------------------------------------------------------- 1 | #include "large_array.h" 2 | #include "trace.h" 3 | 4 | class Analyzer : Analyzer_base { 5 | const uint32_t max_dis = (uint32_t)(1) << 29; // 128 GiB as maximum 6 | 7 | LargeArray* lba_2_freq_; 8 | uint64_t traffic_ = 0; 9 | 10 | void summary() { 11 | std::vector freqs; 12 | uint64_t wss = 0; 13 | uint64_t hot20_traffic = 0; 14 | 15 | for (uint64_t i = 0; i < lba_2_freq_->getSize(); i++) { 16 | uint64_t value = lba_2_freq_->get(i); 17 | if (value) { 18 | freqs.push_back(value); 19 | wss++; 20 | } 21 | } 22 | std::sort(freqs.begin(), freqs.end(), std::greater()); 23 | 24 | for (uint64_t i = 0; i < wss / 5; i++) { 25 | hot20_traffic += freqs[i]; 26 | } 27 | std::cout << volume_id_ << " " << (double)hot20_traffic / traffic_ << std::endl; 28 | } 29 | 30 | public: 31 | 32 | // initialize properties 33 | void init(char *propertyFileName, char *volume) { 34 | std::string volume_id(volume); 35 | volume_id_ = volume_id; 36 | trace_.loadProperty(propertyFileName, volume); 37 | 38 | uint64_t maxLba = trace_.getMaxLba(volume_id); 39 | n_blocks_ = maxLba + 1; 40 | 41 | std::cerr << "nBlocks = " << n_blocks_ << std::endl; 42 | 43 | lba_2_freq_ = new LargeArray(n_blocks_); 44 | } 45 | 46 | void analyze(char *inputTrace) { 47 | uint64_t offset, length, timestamp; 48 | bool is_write; 49 | 50 | openTrace(inputTrace); 51 | 52 | trace_.myTimer(true, "hot 20% traffic"); 53 | 54 | while (trace_.readNextRequestFstream(*is_, timestamp, is_write, offset, length, line2_)) { 55 | if (!is_write) continue; 56 | 57 | traffic_ += length; 58 | for (uint64_t i = 0; i < length; i += 1) { 59 | lba_2_freq_->inc(offset + i); 60 | } 61 | 62 | trace_.myTimer(false, "hot 20% traffic"); 63 | } 64 | 65 | summary(); 66 | } 67 | }; 68 | 69 | int main(int argc, char *argv[]) { 70 | Analyzer analyzer; 71 | analyzer.init(argv[3], argv[1]); 72 | analyzer.analyze(argv[2]); 73 | return 0; 74 | } 75 | -------------------------------------------------------------------------------- /trace_replay/analyze_script/src/obsv1.cc: -------------------------------------------------------------------------------- 1 | /** 2 | * Observation 1: Test the update distance of all the user-written blocks 3 | */ 4 | 5 | #include "large_array.h" 6 | #include "trace.h" 7 | 8 | class Analyzer : Analyzer_base { 9 | char volume_cstr[100]; 10 | 11 | LargeArray* index_map_; 12 | LargeArray* lifespans_in_mb_; 13 | uint64_t max_lifespan_; 14 | double write_wss_mb_ = 0; 15 | uint64_t current_id_ = 1; 16 | 17 | uint64_t getLifespanMb(uint64_t current_id, uint64_t prev_id) { 18 | uint64_t distance_in_mb = (current_id - prev_id) / 256; 19 | if (distance_in_mb >= max_lifespan_) distance_in_mb = max_lifespan_; 20 | return distance_in_mb; 21 | } 22 | 23 | public: 24 | 25 | // initialize properties 26 | void init(char *propertyFileName, char *volume) { 27 | std::string volume_id(volume); 28 | volume_id_ = volume_id; 29 | 30 | strcpy(volume_cstr, volume); 31 | trace_.loadProperty(propertyFileName, volume); 32 | 33 | uint64_t maxLba = trace_.getMaxLba(volume_id); 34 | write_wss_mb_ = (double)trace_.getUniqueLba(volume_id) / 256.0; 35 | n_blocks_ = maxLba + 1; 36 | max_lifespan_ = (double)n_blocks_ * 8 / 256; 37 | 38 | index_map_ = new LargeArray(n_blocks_); 39 | lifespans_in_mb_ = new LargeArray(max_lifespan_ + 1); // In MiB 40 | } 41 | 42 | void obsv1_summary() { 43 | uint64_t levels[4], tmp_lifespan_cnts = 0, ptr = 0; 44 | double chkpts[4] = {0.1, 0.2, 0.4, 0.8}; 45 | for (int i = 0; i < 4; i++) levels[i] = 0; 46 | for (uint64_t i = 0; i < lifespans_in_mb_->getSize(); i++) { 47 | tmp_lifespan_cnts += lifespans_in_mb_->get(i); 48 | //printf("%s %lu %lu\n", volume_id_.c_str(), i, lifespans_in_mb_->get(i)); 49 | while (ptr < 4 && (double)i > chkpts[ptr] * write_wss_mb_) { 50 | levels[ptr] = tmp_lifespan_cnts; 51 | ptr++; 52 | } 53 | } 54 | 55 | for (int i = 0; i < 4; i++) { 56 | // tmp_lifespan_cnts is the number of lifespans 57 | fprintf(stderr, "%.1f %.6lf %lu %lu\n", chkpts[i], (double)levels[i] / tmp_lifespan_cnts * 100, levels[i], tmp_lifespan_cnts); 58 | printf("%.1f %.6lf\n", chkpts[i], (double)levels[i] / tmp_lifespan_cnts * 100); 59 | } 60 | } 61 | 62 | void analyze(char *input_trace_file) { 63 | uint64_t offset, length, timestamp; 64 | bool is_write; 65 | openTrace(input_trace_file); 66 | 67 | uint64_t prev_id; 68 | trace_.myTimer(true, "update distance"); 69 | 70 | while (trace_.readNextRequestFstream(*is_, timestamp, is_write, offset, length, line2_)) { 71 | if (!is_write) continue; 72 | 73 | for (uint64_t i = 0; i < length; i += 1) { 74 | prev_id = index_map_->get(offset + i); 75 | 76 | if (prev_id != 0) { 77 | lifespans_in_mb_->inc(getLifespanMb(current_id_, prev_id)); 78 | } 79 | 80 | index_map_->put(offset + i, current_id_++); 81 | } 82 | 83 | trace_.myTimer(false, "update distance"); 84 | } 85 | 86 | obsv1_summary(); 87 | } 88 | }; 89 | 90 | int main(int argc, char *argv[]) { 91 | Analyzer analyzer; 92 | analyzer.init(argv[3], argv[1]); 93 | analyzer.analyze(argv[2]); 94 | } 95 | -------------------------------------------------------------------------------- /trace_replay/analyze_script/src/split.cc: -------------------------------------------------------------------------------- 1 | #include "large_array.h" 2 | #include "trace.h" 3 | 4 | class Split : Analyzer_base { 5 | public: 6 | void analyze(char *input_trace, char* output_prefix) { 7 | char filename[300]; 8 | openTrace(input_trace); 9 | 10 | std::map> left_strings; 11 | uint64_t saved = 0; 12 | 13 | trace_.myTimer(true, "split"); 14 | 15 | while (std::getline(*is_, line_)) { 16 | strcpy(line2_, line_.c_str()); 17 | int pos = strlen(line2_) - 1; 18 | for ( ; pos >= 0 && line2_[pos] != ','; pos--); 19 | std::string s = std::string(line2_, pos); 20 | 21 | int res = 0; 22 | for (int i = pos+1; line2_[i] != '\0'; i++) if (isdigit(line2_[i])) res = res*10 + (line2_[i] - '0'); 23 | 24 | left_strings[res].push_back(s); 25 | saved++; 26 | trace_.myTimer(false, "split"); 27 | } 28 | 29 | for(auto& it : left_strings) { 30 | sprintf(filename, "%s/%d.csv", output_prefix, it.first); 31 | std::ofstream fs; 32 | fs.open(filename, std::ofstream::out | std::ofstream::app); 33 | for(auto& it0 : it.second) { 34 | fs << it0 << "\n"; 35 | } 36 | fs.close(); 37 | } 38 | } 39 | }; 40 | 41 | int main(int argc, char *argv[]) { 42 | Split split; 43 | split.analyze(argv[1], argv[2]); 44 | } 45 | -------------------------------------------------------------------------------- /trace_replay/analyze_script/src/transform_timestamp_tencentCloud.cc: -------------------------------------------------------------------------------- 1 | #include "large_array.h" 2 | #include "trace.h" 3 | 4 | class Split : public Analyzer_base { 5 | std::vector savedOffsets; 6 | std::vector savedLengths; 7 | std::vector savedIsWrites; 8 | uint64_t lastTimestamp; 9 | uint64_t ptr, maxPtr; 10 | 11 | void output() { 12 | char s[200]; 13 | if (ptr > 0) { 14 | double interval = (double)1000000.0 / ptr; 15 | for (int i = 0; i < ptr; i++) { 16 | // 0,R,126703661056,4096,1577808000000046 17 | sprintf(s, "%s,%c,%lu,%lu,%lu\n", volume_id_.c_str(), 18 | (savedIsWrites[i] ? 'W' : 'R'), savedOffsets[i] * 4096, savedLengths[i] * 4096, 19 | (uint64_t)std::min((double)lastTimestamp / 10.0 + interval * i, (double)lastTimestamp / 10.0 + 999999.0)); 20 | std::cout << s; 21 | // std::cout << volume_id_ << "," 22 | // << ((savedIsWrites[i]) ? 'W' : 'R') << "," 23 | // << savedOffsets[i] << "," 24 | // << savedLengths[i] << "," 25 | // << (uint64_t)std::min((double)lastTimestamp / 10.0 + interval * i, (double)lastTimestamp / 10.0 + 999999.0) << std::endl; 26 | } 27 | } 28 | ptr = 0; 29 | } 30 | 31 | public: 32 | 33 | void analyze(char *inputTrace) { 34 | uint64_t offset, length, timestamp; 35 | bool isWrite; 36 | char filename[300]; 37 | openTrace(inputTrace); 38 | trace_.myTimer(true, "split"); 39 | 40 | bool first = true; 41 | uint64_t cnt = 0; 42 | ptr = maxPtr = 0; 43 | 44 | while (trace_.readNextRequestFstream(*is_, timestamp, isWrite, offset, length, line2_)) { 45 | if (!isWrite) continue; 46 | if (lastTimestamp != timestamp) { 47 | output(); 48 | } 49 | 50 | if (ptr < maxPtr) { 51 | savedOffsets[ptr] = offset; 52 | savedLengths[ptr] = length; 53 | savedIsWrites[ptr] = isWrite; 54 | ptr++; 55 | } else { 56 | savedOffsets.push_back(offset); 57 | savedLengths.push_back(length); 58 | savedIsWrites.push_back(isWrite); 59 | maxPtr = (uint64_t)savedOffsets.size(); 60 | ptr = maxPtr; 61 | } 62 | lastTimestamp = timestamp; 63 | 64 | trace_.myTimer(false, "split"); 65 | } 66 | 67 | output(); 68 | } 69 | }; 70 | 71 | int main(int argc, char *argv[]) { 72 | setbuf(stderr, NULL); 73 | Split split; 74 | split.init(argv[3], argv[1]); 75 | split.analyze(argv[2]); 76 | } 77 | -------------------------------------------------------------------------------- /trace_replay/analyze_script/synthetic_gen.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | source etc/common.sh 4 | 5 | if [[ ! -d $SYNTHETIC_PATH ]]; then 6 | echo "SYNTHETIC_PATH not set or not exist; please set in etc/common.sh" 7 | exit 8 | fi 9 | 10 | alphas=("0" "0.2" "0.4" "0.6" "0.8" "1") 11 | 12 | for ((i=0; i<${#alphas[@]}; i++)); do 13 | alpha=${alphas[$i]} 14 | Rscript r/synthetic.r ${alpha} $SYNTHETIC_PATH 15 | input_path="$SYNTHETIC_PATH/alpha_${alpha}.csv" 16 | 17 | vol_num="$i" 18 | output="$SYNTHETIC_PATH/${vol_num}.csv" 19 | 20 | if [[ ! -f $output ]]; then 21 | awk 'BEGIN {s=1;} {print "'"$vol_num"',W," $1*4096 ",4096," s; s=s+1;}' $input_path > $output 22 | head $output 23 | else 24 | echo "exist" 25 | fi 26 | 27 | rm $input_path 28 | done 29 | -------------------------------------------------------------------------------- /trace_replay/etc/ali_groups/Note: -------------------------------------------------------------------------------- 1 | Grouping: 2 | The aim of grouping volumes is to fully enjoy the memory-efficient indexmap 3 | implementation. The rationale is that a group of volumes have diverse workload 4 | patterns (skewed and non-skewed), thus caching can have a good hit ratio 5 | (overall a skewed workload). 6 | 7 | Content: 8 | The selected volumes are listed in order of file sizes. As a result, the 9 | divide.py can evenly split them into groups so that we can parallel the 10 | simulation. 11 | -------------------------------------------------------------------------------- /trace_replay/etc/ali_groups/ali_selected.txt: -------------------------------------------------------------------------------- 1 | 10 2 | 4 3 | 38 4 | 225 5 | 679 6 | 124 7 | 740 8 | 206 9 | 144 10 | 58 11 | 79 12 | 177 13 | 141 14 | 107 15 | 178 16 | 804 17 | 52 18 | 40 19 | 7 20 | 810 21 | 32 22 | 228 23 | 256 24 | 148 25 | 293 26 | 262 27 | 68 28 | 631 29 | 207 30 | 276 31 | 12 32 | 195 33 | 54 34 | 391 35 | 507 36 | 466 37 | 727 38 | 190 39 | 23 40 | 219 41 | 435 42 | 651 43 | 263 44 | 746 45 | 283 46 | 354 47 | 117 48 | 126 49 | 26 50 | 226 51 | 37 52 | 197 53 | 208 54 | 133 55 | 242 56 | 232 57 | 176 58 | 198 59 | 205 60 | 138 61 | 34 62 | 97 63 | 264 64 | 460 65 | 483 66 | 748 67 | 160 68 | 737 69 | 257 70 | 445 71 | 427 72 | 122 73 | 254 74 | 303 75 | 169 76 | 455 77 | 74 78 | 733 79 | 467 80 | 361 81 | 348 82 | 434 83 | 136 84 | 166 85 | 751 86 | 749 87 | 538 88 | 736 89 | 103 90 | 730 91 | 221 92 | 39 93 | 200 94 | 43 95 | 201 96 | 111 97 | 231 98 | 145 99 | 345 100 | 731 101 | 116 102 | 109 103 | 745 104 | 16 105 | 96 106 | 130 107 | 248 108 | 213 109 | 759 110 | 13 111 | 106 112 | 22 113 | 67 114 | 35 115 | 165 116 | 187 117 | 275 118 | 20 119 | 36 120 | 28 121 | 211 122 | 94 123 | 59 124 | 725 125 | 61 126 | 27 127 | 47 128 | 18 129 | 123 130 | 100 131 | 0 132 | 223 133 | 132 134 | 316 135 | 14 136 | 265 137 | 742 138 | 227 139 | 29 140 | 45 141 | 15 142 | 404 143 | 269 144 | 310 145 | 25 146 | 11 147 | 470 148 | 272 149 | 278 150 | 41 151 | 202 152 | 261 153 | 220 154 | 580 155 | 424 156 | 194 157 | 701 158 | 363 159 | 654 160 | 726 161 | 93 162 | 728 163 | 244 164 | 150 165 | 724 166 | 697 167 | 209 168 | 555 169 | 623 170 | 783 171 | 374 172 | 771 173 | 714 174 | 717 175 | 212 176 | 752 177 | 175 178 | 440 179 | 99 180 | 280 181 | 260 182 | 754 183 | 468 184 | 780 185 | 753 186 | 808 187 | -------------------------------------------------------------------------------- /trace_replay/etc/ali_groups/divide.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | n_groups = sys.argv[1] 4 | 5 | files = [] 6 | for i in range(1, int(n_groups) + 1): 7 | files.append(open("group" + str(i), "w")) 8 | 9 | filelist = open("ali_selected.txt", "r") 10 | 11 | cnt = 0 12 | for l in filelist.readlines(): 13 | files[cnt % int(n_groups)].write(l) 14 | cnt += 1 15 | 16 | for f in files: 17 | f.close() 18 | -------------------------------------------------------------------------------- /trace_replay/etc/ali_groups/group1: -------------------------------------------------------------------------------- 1 | ali_tracess/10.csv 2 | ali_tracess/12.csv 3 | ali_tracess/34.csv 4 | ali_tracess/221.csv 5 | ali_tracess/211.csv 6 | ali_tracess/202.csv 7 | ali_tracess/260.csv 8 | -------------------------------------------------------------------------------- /trace_replay/etc/ali_groups/group10: -------------------------------------------------------------------------------- 1 | ali_tracess/58.csv 2 | ali_tracess/219.csv 3 | ali_tracess/445.csv 4 | ali_tracess/731.csv 5 | ali_tracess/100.csv 6 | ali_tracess/726.csv 7 | -------------------------------------------------------------------------------- /trace_replay/etc/ali_groups/group11: -------------------------------------------------------------------------------- 1 | ali_tracess/79.csv 2 | ali_tracess/435.csv 3 | ali_tracess/427.csv 4 | ali_tracess/116.csv 5 | ali_tracess/0.csv 6 | ali_tracess/93.csv 7 | -------------------------------------------------------------------------------- /trace_replay/etc/ali_groups/group12: -------------------------------------------------------------------------------- 1 | ali_tracess/177.csv 2 | ali_tracess/651.csv 3 | ali_tracess/122.csv 4 | ali_tracess/109.csv 5 | ali_tracess/223.csv 6 | ali_tracess/728.csv 7 | -------------------------------------------------------------------------------- /trace_replay/etc/ali_groups/group13: -------------------------------------------------------------------------------- 1 | ali_tracess/141.csv 2 | ali_tracess/263.csv 3 | ali_tracess/254.csv 4 | ali_tracess/745.csv 5 | ali_tracess/132.csv 6 | ali_tracess/244.csv 7 | -------------------------------------------------------------------------------- /trace_replay/etc/ali_groups/group14: -------------------------------------------------------------------------------- 1 | ali_tracess/107.csv 2 | ali_tracess/746.csv 3 | ali_tracess/303.csv 4 | ali_tracess/16.csv 5 | ali_tracess/316.csv 6 | ali_tracess/150.csv 7 | -------------------------------------------------------------------------------- /trace_replay/etc/ali_groups/group15: -------------------------------------------------------------------------------- 1 | ali_tracess/178.csv 2 | ali_tracess/283.csv 3 | ali_tracess/169.csv 4 | ali_tracess/96.csv 5 | ali_tracess/14.csv 6 | ali_tracess/724.csv 7 | -------------------------------------------------------------------------------- /trace_replay/etc/ali_groups/group16: -------------------------------------------------------------------------------- 1 | ali_tracess/804.csv 2 | ali_tracess/354.csv 3 | ali_tracess/455.csv 4 | ali_tracess/130.csv 5 | ali_tracess/265.csv 6 | ali_tracess/697.csv 7 | -------------------------------------------------------------------------------- /trace_replay/etc/ali_groups/group17: -------------------------------------------------------------------------------- 1 | ali_tracess/52.csv 2 | ali_tracess/117.csv 3 | ali_tracess/74.csv 4 | ali_tracess/248.csv 5 | ali_tracess/742.csv 6 | ali_tracess/209.csv 7 | -------------------------------------------------------------------------------- /trace_replay/etc/ali_groups/group18: -------------------------------------------------------------------------------- 1 | ali_tracess/40.csv 2 | ali_tracess/126.csv 3 | ali_tracess/733.csv 4 | ali_tracess/213.csv 5 | ali_tracess/227.csv 6 | ali_tracess/555.csv 7 | -------------------------------------------------------------------------------- /trace_replay/etc/ali_groups/group19: -------------------------------------------------------------------------------- 1 | ali_tracess/7.csv 2 | ali_tracess/26.csv 3 | ali_tracess/467.csv 4 | ali_tracess/759.csv 5 | ali_tracess/29.csv 6 | ali_tracess/623.csv 7 | -------------------------------------------------------------------------------- /trace_replay/etc/ali_groups/group2: -------------------------------------------------------------------------------- 1 | ali_tracess/4.csv 2 | ali_tracess/195.csv 3 | ali_tracess/97.csv 4 | ali_tracess/39.csv 5 | ali_tracess/94.csv 6 | ali_tracess/261.csv 7 | ali_tracess/754.csv 8 | -------------------------------------------------------------------------------- /trace_replay/etc/ali_groups/group20: -------------------------------------------------------------------------------- 1 | ali_tracess/810.csv 2 | ali_tracess/226.csv 3 | ali_tracess/361.csv 4 | ali_tracess/13.csv 5 | ali_tracess/45.csv 6 | ali_tracess/783.csv 7 | -------------------------------------------------------------------------------- /trace_replay/etc/ali_groups/group21: -------------------------------------------------------------------------------- 1 | ali_tracess/32.csv 2 | ali_tracess/37.csv 3 | ali_tracess/348.csv 4 | ali_tracess/106.csv 5 | ali_tracess/15.csv 6 | ali_tracess/374.csv 7 | -------------------------------------------------------------------------------- /trace_replay/etc/ali_groups/group22: -------------------------------------------------------------------------------- 1 | ali_tracess/228.csv 2 | ali_tracess/197.csv 3 | ali_tracess/434.csv 4 | ali_tracess/22.csv 5 | ali_tracess/404.csv 6 | ali_tracess/771.csv 7 | -------------------------------------------------------------------------------- /trace_replay/etc/ali_groups/group23: -------------------------------------------------------------------------------- 1 | ali_tracess/256.csv 2 | ali_tracess/208.csv 3 | ali_tracess/136.csv 4 | ali_tracess/67.csv 5 | ali_tracess/269.csv 6 | ali_tracess/714.csv 7 | -------------------------------------------------------------------------------- /trace_replay/etc/ali_groups/group24: -------------------------------------------------------------------------------- 1 | ali_tracess/148.csv 2 | ali_tracess/133.csv 3 | ali_tracess/166.csv 4 | ali_tracess/35.csv 5 | ali_tracess/310.csv 6 | ali_tracess/717.csv 7 | -------------------------------------------------------------------------------- /trace_replay/etc/ali_groups/group25: -------------------------------------------------------------------------------- 1 | ali_tracess/293.csv 2 | ali_tracess/242.csv 3 | ali_tracess/751.csv 4 | ali_tracess/165.csv 5 | ali_tracess/25.csv 6 | ali_tracess/212.csv 7 | -------------------------------------------------------------------------------- /trace_replay/etc/ali_groups/group26: -------------------------------------------------------------------------------- 1 | ali_tracess/262.csv 2 | ali_tracess/232.csv 3 | ali_tracess/749.csv 4 | ali_tracess/187.csv 5 | ali_tracess/11.csv 6 | ali_tracess/752.csv 7 | -------------------------------------------------------------------------------- /trace_replay/etc/ali_groups/group27: -------------------------------------------------------------------------------- 1 | ali_tracess/68.csv 2 | ali_tracess/176.csv 3 | ali_tracess/538.csv 4 | ali_tracess/275.csv 5 | ali_tracess/470.csv 6 | ali_tracess/175.csv 7 | -------------------------------------------------------------------------------- /trace_replay/etc/ali_groups/group28: -------------------------------------------------------------------------------- 1 | ali_tracess/631.csv 2 | ali_tracess/198.csv 3 | ali_tracess/736.csv 4 | ali_tracess/20.csv 5 | ali_tracess/272.csv 6 | ali_tracess/440.csv 7 | -------------------------------------------------------------------------------- /trace_replay/etc/ali_groups/group29: -------------------------------------------------------------------------------- 1 | ali_tracess/207.csv 2 | ali_tracess/205.csv 3 | ali_tracess/103.csv 4 | ali_tracess/36.csv 5 | ali_tracess/278.csv 6 | ali_tracess/99.csv 7 | -------------------------------------------------------------------------------- /trace_replay/etc/ali_groups/group3: -------------------------------------------------------------------------------- 1 | ali_tracess/38.csv 2 | ali_tracess/54.csv 3 | ali_tracess/264.csv 4 | ali_tracess/200.csv 5 | ali_tracess/59.csv 6 | ali_tracess/220.csv 7 | ali_tracess/468.csv 8 | -------------------------------------------------------------------------------- /trace_replay/etc/ali_groups/group30: -------------------------------------------------------------------------------- 1 | ali_tracess/276.csv 2 | ali_tracess/138.csv 3 | ali_tracess/730.csv 4 | ali_tracess/28.csv 5 | ali_tracess/41.csv 6 | ali_tracess/280.csv 7 | -------------------------------------------------------------------------------- /trace_replay/etc/ali_groups/group4: -------------------------------------------------------------------------------- 1 | ali_tracess/225.csv 2 | ali_tracess/391.csv 3 | ali_tracess/460.csv 4 | ali_tracess/43.csv 5 | ali_tracess/725.csv 6 | ali_tracess/580.csv 7 | ali_tracess/780.csv 8 | -------------------------------------------------------------------------------- /trace_replay/etc/ali_groups/group5: -------------------------------------------------------------------------------- 1 | ali_tracess/679.csv 2 | ali_tracess/507.csv 3 | ali_tracess/483.csv 4 | ali_tracess/201.csv 5 | ali_tracess/61.csv 6 | ali_tracess/424.csv 7 | ali_tracess/753.csv 8 | -------------------------------------------------------------------------------- /trace_replay/etc/ali_groups/group6: -------------------------------------------------------------------------------- 1 | ali_tracess/124.csv 2 | ali_tracess/466.csv 3 | ali_tracess/748.csv 4 | ali_tracess/111.csv 5 | ali_tracess/27.csv 6 | ali_tracess/194.csv 7 | ali_tracess/808.csv 8 | -------------------------------------------------------------------------------- /trace_replay/etc/ali_groups/group7: -------------------------------------------------------------------------------- 1 | ali_tracess/740.csv 2 | ali_tracess/727.csv 3 | ali_tracess/160.csv 4 | ali_tracess/231.csv 5 | ali_tracess/47.csv 6 | ali_tracess/701.csv 7 | -------------------------------------------------------------------------------- /trace_replay/etc/ali_groups/group8: -------------------------------------------------------------------------------- 1 | ali_tracess/206.csv 2 | ali_tracess/190.csv 3 | ali_tracess/737.csv 4 | ali_tracess/145.csv 5 | ali_tracess/18.csv 6 | ali_tracess/363.csv 7 | -------------------------------------------------------------------------------- /trace_replay/etc/ali_groups/group9: -------------------------------------------------------------------------------- 1 | ali_tracess/144.csv 2 | ali_tracess/23.csv 3 | ali_tracess/257.csv 4 | ali_tracess/345.csv 5 | ali_tracess/123.csv 6 | ali_tracess/654.csv 7 | -------------------------------------------------------------------------------- /trace_replay/etc/synthetic_groups/all: -------------------------------------------------------------------------------- 1 | synthetic_groups/0.csv 2 | synthetic_groups/1.csv 3 | synthetic_groups/2.csv 4 | synthetic_groups/3.csv 5 | synthetic_groups/4.csv 6 | synthetic_groups/5.csv 7 | -------------------------------------------------------------------------------- /trace_replay/etc/synthetic_property.txt: -------------------------------------------------------------------------------- 1 | 0 13107200 53687091200 2 | 1 13107200 53687091200 3 | 2 13107200 53687091200 4 | 3 13107200 53687091200 5 | 4 13107200 53687091200 6 | 5 13107200 53687091200 7 | -------------------------------------------------------------------------------- /trace_replay/etc/tencent_groups/divide.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | n_groups = sys.argv[1] 4 | 5 | files = [] 6 | for i in range(1, int(n_groups) + 1): 7 | files.append(open("group" + str(i), "w")) 8 | 9 | filelist = open("selected_volumes.txt", "r") 10 | 11 | cnt = 0 12 | for l in filelist.readlines(): 13 | files[cnt % int(n_groups)].write(l) 14 | cnt += 1 15 | 16 | for f in files: 17 | f.close() 18 | -------------------------------------------------------------------------------- /trace_replay/etc/tencent_groups/group1: -------------------------------------------------------------------------------- 1 | 1458.csv 2 | 5048.csv 3 | 4377.csv 4 | 13057.csv 5 | 2063.csv 6 | 1655.csv 7 | 3806.csv 8 | 3036.csv 9 | 20095.csv 10 | 18530.csv 11 | 19045.csv 12 | 14300.csv 13 | 1847.csv 14 | 13262.csv 15 | -------------------------------------------------------------------------------- /trace_replay/etc/tencent_groups/group10: -------------------------------------------------------------------------------- 1 | 1326.csv 2 | 4287.csv 3 | 13286.csv 4 | 7255.csv 5 | 3535.csv 6 | 18417.csv 7 | 21031.csv 8 | 11156.csv 9 | 2228.csv 10 | 15293.csv 11 | 17742.csv 12 | 20519.csv 13 | 4587.csv 14 | 24725.csv 15 | -------------------------------------------------------------------------------- /trace_replay/etc/tencent_groups/group11: -------------------------------------------------------------------------------- 1 | 1730.csv 2 | 1662.csv 3 | 13198.csv 4 | 1273.csv 5 | 25669.csv 6 | 14248.csv 7 | 7116.csv 8 | 5334.csv 9 | 19498.csv 10 | 5346.csv 11 | 16923.csv 12 | 19120.csv 13 | 3770.csv 14 | 14207.csv 15 | -------------------------------------------------------------------------------- /trace_replay/etc/tencent_groups/group12: -------------------------------------------------------------------------------- 1 | 2583.csv 2 | 4246.csv 3 | 3240.csv 4 | 8937.csv 5 | 13201.csv 6 | 2356.csv 7 | 16433.csv 8 | 9214.csv 9 | 16897.csv 10 | 18297.csv 11 | 15420.csv 12 | 23952.csv 13 | 1162.csv 14 | -------------------------------------------------------------------------------- /trace_replay/etc/tencent_groups/group13: -------------------------------------------------------------------------------- 1 | 3342.csv 2 | 16918.csv 3 | 1214.csv 4 | 1148.csv 5 | 10001.csv 6 | 4124.csv 7 | 21805.csv 8 | 19105.csv 9 | 18986.csv 10 | 13016.csv 11 | 13139.csv 12 | 1722.csv 13 | 6550.csv 14 | -------------------------------------------------------------------------------- /trace_replay/etc/tencent_groups/group14: -------------------------------------------------------------------------------- 1 | 6410.csv 2 | 5565.csv 3 | 8077.csv 4 | 6327.csv 5 | 12300.csv 6 | 1894.csv 7 | 12186.csv 8 | 11822.csv 9 | 11487.csv 10 | 10120.csv 11 | 6108.csv 12 | 7859.csv 13 | 8169.csv 14 | -------------------------------------------------------------------------------- /trace_replay/etc/tencent_groups/group15: -------------------------------------------------------------------------------- 1 | 1394.csv 2 | 3963.csv 3 | 16357.csv 4 | 5519.csv 5 | 13167.csv 6 | 7584.csv 7 | 12771.csv 8 | 10935.csv 9 | 1306.csv 10 | 18735.csv 11 | 5458.csv 12 | 8319.csv 13 | 5615.csv 14 | -------------------------------------------------------------------------------- /trace_replay/etc/tencent_groups/group16: -------------------------------------------------------------------------------- 1 | 5592.csv 2 | 2627.csv 3 | 12217.csv 4 | 15549.csv 5 | 1293.csv 6 | 6591.csv 7 | 2976.csv 8 | 2761.csv 9 | 17393.csv 10 | 4011.csv 11 | 16275.csv 12 | 11622.csv 13 | 4620.csv 14 | -------------------------------------------------------------------------------- /trace_replay/etc/tencent_groups/group17: -------------------------------------------------------------------------------- 1 | 3557.csv 2 | 4385.csv 3 | 5046.csv 4 | 3246.csv 5 | 9327.csv 6 | 13789.csv 7 | 18605.csv 8 | 16639.csv 9 | 2482.csv 10 | 11509.csv 11 | 3847.csv 12 | 2863.csv 13 | 5732.csv 14 | -------------------------------------------------------------------------------- /trace_replay/etc/tencent_groups/group18: -------------------------------------------------------------------------------- 1 | 22923.csv 2 | 3768.csv 3 | 8809.csv 4 | 6528.csv 5 | 17389.csv 6 | 4883.csv 7 | 3310.csv 8 | 5913.csv 9 | 15581.csv 10 | 14688.csv 11 | 20941.csv 12 | 16779.csv 13 | 7915.csv 14 | -------------------------------------------------------------------------------- /trace_replay/etc/tencent_groups/group19: -------------------------------------------------------------------------------- 1 | 12455.csv 2 | 3708.csv 3 | 2230.csv 4 | 8281.csv 5 | 3858.csv 6 | 25476.csv 7 | 18292.csv 8 | 13379.csv 9 | 2009.csv 10 | 1784.csv 11 | 8957.csv 12 | 6018.csv 13 | 25640.csv 14 | -------------------------------------------------------------------------------- /trace_replay/etc/tencent_groups/group2: -------------------------------------------------------------------------------- 1 | 2982.csv 2 | 2313.csv 3 | 3300.csv 4 | 6378.csv 5 | 19202.csv 6 | 9551.csv 7 | 3879.csv 8 | 22054.csv 9 | 1362.csv 10 | 21795.csv 11 | 2372.csv 12 | 21974.csv 13 | 22404.csv 14 | 12093.csv 15 | -------------------------------------------------------------------------------- /trace_replay/etc/tencent_groups/group20: -------------------------------------------------------------------------------- 1 | 13494.csv 2 | 4145.csv 3 | 2337.csv 4 | 16546.csv 5 | 1182.csv 6 | 2127.csv 7 | 11633.csv 8 | 7299.csv 9 | 18671.csv 10 | 18581.csv 11 | 12621.csv 12 | 14415.csv 13 | 4462.csv 14 | -------------------------------------------------------------------------------- /trace_replay/etc/tencent_groups/group3: -------------------------------------------------------------------------------- 1 | 1163.csv 2 | 1205.csv 3 | 9440.csv 4 | 5369.csv 5 | 3762.csv 6 | 12447.csv 7 | 8932.csv 8 | 15864.csv 9 | 1360.csv 10 | 20291.csv 11 | 20675.csv 12 | 21040.csv 13 | 6485.csv 14 | 20656.csv 15 | -------------------------------------------------------------------------------- /trace_replay/etc/tencent_groups/group4: -------------------------------------------------------------------------------- 1 | 7315.csv 2 | 1164.csv 3 | 5473.csv 4 | 10171.csv 5 | 4523.csv 6 | 23689.csv 7 | 2438.csv 8 | 9118.csv 9 | 15739.csv 10 | 16864.csv 11 | 7931.csv 12 | 17452.csv 13 | 17358.csv 14 | 20414.csv 15 | -------------------------------------------------------------------------------- /trace_replay/etc/tencent_groups/group5: -------------------------------------------------------------------------------- 1 | 3810.csv 2 | 1152.csv 3 | 7568.csv 4 | 20699.csv 5 | 15538.csv 6 | 14002.csv 7 | 6676.csv 8 | 20320.csv 9 | 24190.csv 10 | 2745.csv 11 | 17009.csv 12 | 16558.csv 13 | 12745.csv 14 | 2315.csv 15 | -------------------------------------------------------------------------------- /trace_replay/etc/tencent_groups/group6: -------------------------------------------------------------------------------- 1 | 9850.csv 2 | 2079.csv 3 | 1437.csv 4 | 2030.csv 5 | 2275.csv 6 | 3944.csv 7 | 2878.csv 8 | 2637.csv 9 | 4870.csv 10 | 17044.csv 11 | 3774.csv 12 | 14447.csv 13 | 5853.csv 14 | 21720.csv 15 | -------------------------------------------------------------------------------- /trace_replay/etc/tencent_groups/group7: -------------------------------------------------------------------------------- 1 | 4786.csv 2 | 2308.csv 3 | 4111.csv 4 | 9324.csv 5 | 4332.csv 6 | 20627.csv 7 | 13270.csv 8 | 24332.csv 9 | 1557.csv 10 | 20974.csv 11 | 5178.csv 12 | 4019.csv 13 | 4046.csv 14 | 24245.csv 15 | -------------------------------------------------------------------------------- /trace_replay/etc/tencent_groups/group8: -------------------------------------------------------------------------------- 1 | 4927.csv 2 | 5372.csv 3 | 7709.csv 4 | 2771.csv 5 | 2286.csv 6 | 4833.csv 7 | 16788.csv 8 | 22587.csv 9 | 16739.csv 10 | 2145.csv 11 | 20042.csv 12 | 20314.csv 13 | 1911.csv 14 | 1599.csv 15 | -------------------------------------------------------------------------------- /trace_replay/etc/tencent_groups/group9: -------------------------------------------------------------------------------- 1 | 1625.csv 2 | 5710.csv 3 | 2212.csv 4 | 8825.csv 5 | 4323.csv 6 | 1830.csv 7 | 12430.csv 8 | 16742.csv 9 | 2390.csv 10 | 3512.csv 11 | 23374.csv 12 | 21108.csv 13 | 1591.csv 14 | 2035.csv 15 | -------------------------------------------------------------------------------- /trace_replay/etc/tencent_groups/selected_volumes.txt: -------------------------------------------------------------------------------- 1 | 1458.csv 2 | 2982.csv 3 | 1163.csv 4 | 7315.csv 5 | 3810.csv 6 | 9850.csv 7 | 4786.csv 8 | 4927.csv 9 | 1625.csv 10 | 1326.csv 11 | 1730.csv 12 | 2583.csv 13 | 3342.csv 14 | 6410.csv 15 | 1394.csv 16 | 5592.csv 17 | 3557.csv 18 | 22923.csv 19 | 12455.csv 20 | 13494.csv 21 | 5048.csv 22 | 2313.csv 23 | 1205.csv 24 | 1164.csv 25 | 1152.csv 26 | 2079.csv 27 | 2308.csv 28 | 5372.csv 29 | 5710.csv 30 | 4287.csv 31 | 1662.csv 32 | 4246.csv 33 | 16918.csv 34 | 5565.csv 35 | 3963.csv 36 | 2627.csv 37 | 4385.csv 38 | 3768.csv 39 | 3708.csv 40 | 4145.csv 41 | 4377.csv 42 | 3300.csv 43 | 9440.csv 44 | 5473.csv 45 | 7568.csv 46 | 1437.csv 47 | 4111.csv 48 | 7709.csv 49 | 2212.csv 50 | 13286.csv 51 | 13198.csv 52 | 3240.csv 53 | 1214.csv 54 | 8077.csv 55 | 16357.csv 56 | 12217.csv 57 | 5046.csv 58 | 8809.csv 59 | 2230.csv 60 | 2337.csv 61 | 13057.csv 62 | 6378.csv 63 | 5369.csv 64 | 10171.csv 65 | 20699.csv 66 | 2030.csv 67 | 9324.csv 68 | 2771.csv 69 | 8825.csv 70 | 7255.csv 71 | 1273.csv 72 | 8937.csv 73 | 1148.csv 74 | 6327.csv 75 | 5519.csv 76 | 15549.csv 77 | 3246.csv 78 | 6528.csv 79 | 8281.csv 80 | 16546.csv 81 | 2063.csv 82 | 19202.csv 83 | 3762.csv 84 | 4523.csv 85 | 15538.csv 86 | 2275.csv 87 | 4332.csv 88 | 2286.csv 89 | 4323.csv 90 | 3535.csv 91 | 25669.csv 92 | 13201.csv 93 | 10001.csv 94 | 12300.csv 95 | 13167.csv 96 | 1293.csv 97 | 9327.csv 98 | 17389.csv 99 | 3858.csv 100 | 1182.csv 101 | 1655.csv 102 | 9551.csv 103 | 12447.csv 104 | 23689.csv 105 | 14002.csv 106 | 3944.csv 107 | 20627.csv 108 | 4833.csv 109 | 1830.csv 110 | 18417.csv 111 | 14248.csv 112 | 2356.csv 113 | 4124.csv 114 | 1894.csv 115 | 7584.csv 116 | 6591.csv 117 | 13789.csv 118 | 4883.csv 119 | 25476.csv 120 | 2127.csv 121 | 3806.csv 122 | 3879.csv 123 | 8932.csv 124 | 2438.csv 125 | 6676.csv 126 | 2878.csv 127 | 13270.csv 128 | 16788.csv 129 | 12430.csv 130 | 21031.csv 131 | 7116.csv 132 | 16433.csv 133 | 21805.csv 134 | 12186.csv 135 | 12771.csv 136 | 2976.csv 137 | 18605.csv 138 | 3310.csv 139 | 18292.csv 140 | 11633.csv 141 | 3036.csv 142 | 22054.csv 143 | 15864.csv 144 | 9118.csv 145 | 20320.csv 146 | 2637.csv 147 | 24332.csv 148 | 22587.csv 149 | 16742.csv 150 | 11156.csv 151 | 5334.csv 152 | 9214.csv 153 | 19105.csv 154 | 11822.csv 155 | 10935.csv 156 | 2761.csv 157 | 16639.csv 158 | 5913.csv 159 | 13379.csv 160 | 7299.csv 161 | 20095.csv 162 | 1362.csv 163 | 1360.csv 164 | 15739.csv 165 | 24190.csv 166 | 4870.csv 167 | 1557.csv 168 | 16739.csv 169 | 2390.csv 170 | 2228.csv 171 | 19498.csv 172 | 16897.csv 173 | 18986.csv 174 | 11487.csv 175 | 1306.csv 176 | 17393.csv 177 | 2482.csv 178 | 15581.csv 179 | 2009.csv 180 | 18671.csv 181 | 18530.csv 182 | 21795.csv 183 | 20291.csv 184 | 16864.csv 185 | 2745.csv 186 | 17044.csv 187 | 20974.csv 188 | 2145.csv 189 | 3512.csv 190 | 15293.csv 191 | 5346.csv 192 | 18297.csv 193 | 13016.csv 194 | 10120.csv 195 | 18735.csv 196 | 4011.csv 197 | 11509.csv 198 | 14688.csv 199 | 1784.csv 200 | 18581.csv 201 | 19045.csv 202 | 2372.csv 203 | 20675.csv 204 | 7931.csv 205 | 17009.csv 206 | 3774.csv 207 | 5178.csv 208 | 20042.csv 209 | 23374.csv 210 | 17742.csv 211 | 16923.csv 212 | 15420.csv 213 | 13139.csv 214 | 6108.csv 215 | 5458.csv 216 | 16275.csv 217 | 3847.csv 218 | 20941.csv 219 | 8957.csv 220 | 12621.csv 221 | 14300.csv 222 | 21974.csv 223 | 21040.csv 224 | 17452.csv 225 | 16558.csv 226 | 14447.csv 227 | 4019.csv 228 | 20314.csv 229 | 21108.csv 230 | 20519.csv 231 | 19120.csv 232 | 23952.csv 233 | 1722.csv 234 | 7859.csv 235 | 8319.csv 236 | 11622.csv 237 | 2863.csv 238 | 16779.csv 239 | 6018.csv 240 | 14415.csv 241 | 1847.csv 242 | 22404.csv 243 | 6485.csv 244 | 17358.csv 245 | 12745.csv 246 | 5853.csv 247 | 4046.csv 248 | 1911.csv 249 | 1591.csv 250 | 4587.csv 251 | 3770.csv 252 | 1162.csv 253 | 6550.csv 254 | 8169.csv 255 | 5615.csv 256 | 4620.csv 257 | 5732.csv 258 | 7915.csv 259 | 25640.csv 260 | 4462.csv 261 | 13262.csv 262 | 12093.csv 263 | 20656.csv 264 | 20414.csv 265 | 2315.csv 266 | 21720.csv 267 | 24245.csv 268 | 1599.csv 269 | 2035.csv 270 | 24725.csv 271 | 14207.csv 272 | -------------------------------------------------------------------------------- /trace_replay/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 4.0.0 6 | 7 | gc-simulator 8 | gc-simulator 9 | 1.0-SNAPSHOT 10 | 11 | 12 | 13 | 14 | 15 | org.apache.maven.plugins 16 | maven-compiler-plugin 17 | 18 | 11 19 | 11 20 | 21 | 22 | 23 | org.apache.maven.plugins 24 | maven-compiler-plugin 25 | 26 | 11 27 | 11 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | org.apache.maven.plugins 37 | maven-resources-plugin 38 | 2.6 39 | 40 | 41 | 42 | org.apache.hadoop 43 | hadoop-hdfs 44 | 3.1.1 45 | 46 | 47 | 48 | org.apache.hadoop 49 | hadoop-client 50 | 3.1.1 51 | 52 | 53 | 54 | org.apache.hadoop 55 | hadoop-common 56 | 3.1.1 57 | 58 | 59 | 60 | net.sf.trove4j 61 | trove4j 62 | 3.0.3 63 | 64 | 65 | 66 | org.iq80.leveldb 67 | leveldb-api 68 | 0.7 69 | 70 | 71 | 72 | org.iq80.leveldb 73 | leveldb 74 | 0.7 75 | 76 | 77 | 78 | com.googlecode.json-simple 79 | json-simple 80 | 1.1.1 81 | 82 | 83 | 84 | org.rocksdb 85 | rocksdbjni 86 | 6.6.4 87 | 88 | 89 | 90 | com.google.guava 91 | guava 92 | 29.0-jre 93 | 94 | 95 | 96 | commons-codec 97 | commons-codec 98 | 1.14 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | -------------------------------------------------------------------------------- /trace_replay/scripts/base.sh: -------------------------------------------------------------------------------- 1 | export MAVEN_OPTS="-XX:ParallelGCThreads=3 -Xmx10g -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=./dumps" 2 | 3 | # data placement scheme 4 | separateMethod=$1 5 | # varying selection algorithm 6 | selectionAlgorithm=$2 7 | # varying GP 8 | garbageProportionThreshold=$3 9 | # varying segment sizes 10 | segmentSize=$4 11 | numPickSegs=$5 12 | # varying number of classes 13 | numClasses=$6 14 | 15 | # input and output 16 | inputTrace=$7 17 | temporaryDir=$8 18 | rawOutputFile=$9 19 | 20 | 21 | # enter working directory 22 | cd ../ 23 | 24 | # create directory 25 | mkdir -p $rawOutputFile 26 | rm -r $rawOutputFile 27 | 28 | run() { 29 | rm -r $temporaryDir 30 | mkdir -p $temporaryDir/segments 31 | mkdir -p $temporaryDir/indexmap 32 | mkdir -p $temporaryDir/ondiskfifo 33 | 34 | mvn exec:java -Dexec.mainClass="gcsimulator.Simulator" \ 35 | -Dexec.args="--path ${inputTrace} --outputPrefix ${temporaryDir} ${options} ${hardcodedOptions}" \ 36 | | tee $rawOutputFile 37 | } 38 | 39 | hardcodedOptions="--propertyPath ${propertyPath} --oraclePath ${oraclePath}" 40 | 41 | options="" 42 | options="${options} --setSeparateMethod ${separateMethod}" 43 | options="${options} --selectionAlgorithm ${selectionAlgorithm}" 44 | options="${options} --setSystemGarbageProportionThreshold ${garbageProportionThreshold}" 45 | options="${options} --setSegmentSize ${segmentSize} --setPickSegAmount ${numPickSegs}" 46 | options="${options} --setNumOpenSegments ${numClasses}" 47 | echo $options 48 | 49 | run 50 | -------------------------------------------------------------------------------- /trace_replay/scripts/obtain_removed_seg.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from parse import parse 3 | 4 | files = sys.argv[1:] 5 | mp = {} 6 | 7 | for filename in files: 8 | f = open(filename, "r") 9 | for line in f.readlines(): 10 | if line.startswith("segment"): 11 | logid = parse('segments/{}/{}', line)[0] 12 | valid = True 13 | if line.startswith("rmed_seg") and valid: 14 | valid = False 15 | if logid not in mp: 16 | mp[logid] = [] 17 | gp = parse('rmed_seg: {} {} {} {} {} {} {}', line)[0] 18 | 19 | mp[logid].append(gp) 20 | 21 | for logid in mp: 22 | print(logid, mp[logid]) 23 | -------------------------------------------------------------------------------- /trace_replay/scripts/process_fifo_len.py: -------------------------------------------------------------------------------- 1 | # (Exp#7) Memory Overhead: extract the number of unique LBAs in the FIFO queue 2 | import sys 3 | 4 | fifo_size_per_log = {} 5 | num_lbas_per_log = {} 6 | final_fifo_size_per_log = {} 7 | final_num_lbas_per_log = {} 8 | 9 | f = open(sys.argv[1], "r") 10 | for line in f.readlines(): 11 | words = line.rstrip().replace(" ", "").split(",") 12 | logid = words[0] 13 | fifo_size = words[1] 14 | num_lbas = words[2] 15 | 16 | key, value = fifo_size.split(":") 17 | if key == "currentFIFOSize": 18 | if logid not in fifo_size_per_log: 19 | fifo_size_per_log[logid] = [] 20 | fifo_size_per_log[logid].append(value) 21 | elif key == "finalFIFOsize": 22 | if logid not in final_fifo_size_per_log: 23 | final_fifo_size_per_log[logid] = value 24 | 25 | key, value = num_lbas.split(":") 26 | if key == "numLBA": 27 | if logid not in num_lbas_per_log: 28 | num_lbas_per_log[logid] = [] 29 | num_lbas_per_log[logid].append(value) 30 | elif key == "finalnumberofLBAs": 31 | if logid not in final_num_lbas_per_log: 32 | final_num_lbas_per_log[logid] = value 33 | 34 | 35 | print("volumeid", "avg_num_lbas", "max_num_lbas", "min_num_lbas", "final_num_lbas") 36 | for logid in final_num_lbas_per_log: 37 | if logid not in num_lbas_per_log: 38 | print(logid, final_num_lbas_per_log[logid], final_num_lbas_per_log[logid], final_num_lbas_per_log[logid], final_num_lbas_per_log[logid]) 39 | continue 40 | 41 | lbas = num_lbas_per_log[logid] 42 | lbas = lbas[int(len(lbas) * 0.1):] 43 | tot = 0 44 | mx = 0 45 | mi = sys.maxsize 46 | for lba in lbas: 47 | n = int(lba) 48 | tot += n 49 | mx = max(mx, n) 50 | mi = min(mi, n) 51 | print(logid, tot / len(lbas), mx, mi, final_num_lbas_per_log[logid]) 52 | -------------------------------------------------------------------------------- /trace_replay/scripts/run_exp1_selection.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Varying selection algorithms between Greedy and Cost-Benefit 3 | 4 | export propertyPath="./etc/ali_property.txt" 5 | export oraclePath="./traces/oracles/" 6 | schemes="NoSep SepGC DAC SFS MultiLog ETI MultiQueue SFR FADaC WARCIP SepBIT FK" 7 | options="0.15 536870912 1 6" 8 | 9 | for scheme in $schemes 10 | do 11 | echo "Running ${scheme}" 12 | for groupId in {1..30} 13 | do 14 | for selection in "Greedy" "CostBenefit" 15 | do 16 | bash base.sh ${scheme} ${selection} $options \ 17 | ./etc/ali_groups/group${groupId} \ 18 | tmp/ \ 19 | ./results/exp1/${groupId}/${scheme}_${selection}.result 20 | done 21 | done 22 | done 23 | 24 | cd ../results 25 | for scheme in $schemes 26 | do 27 | for selection in "Greedy" "CostBenefit" 28 | do 29 | for groupId in {1..30} 30 | do 31 | mkdir exp1/${scheme} 32 | ag "segment WA" exp1/${groupId}/${scheme}_${selection}.result > exp1/${scheme}/${groupId}_${selection}.result 33 | awk '{print $2$16}' exp1/${scheme}/${groupId}_${selection}.result > tmp; mv tmp exp1/${scheme}/${groupId}_${selection} 34 | done 35 | cat exp1/${scheme}/*_${selection} > exp1/${scheme}_${selection} 36 | done 37 | done 38 | 39 | echo "Please check the results in results/exp1 for the WAs of each scheme" 40 | -------------------------------------------------------------------------------- /trace_replay/scripts/run_exp2_segsize.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Varying the segment sizes 3 | 4 | export propertyPath="./etc/ali_property.txt" 5 | export oraclePath="./traces/oracles/" 6 | 7 | schemes="NoSep SepGC DAC SFS MultiLog ETI MultiQueue SFR FADaC WARCIP SepBIT FK" 8 | for scheme in $schemes 9 | do 10 | for groupId in {1..30} 11 | do 12 | selection="CostBenefit" 13 | options="0.15 268435456 2 6" 14 | bash base.sh ${scheme} ${selection} $options \ 15 | ./etc/ali_groups/group${groupId} \ 16 | tmp/ \ 17 | ./results/exp2/${groupId}/${scheme}_256m.result 18 | 19 | options="0.15 134217728 4 6" 20 | bash base.sh ${scheme} ${selection} $options \ 21 | ./etc/ali_groups/group${groupId} \ 22 | tmp/ \ 23 | ./results/exp2/${groupId}/${scheme}_128m.result 24 | 25 | options="0.15 67108864 8 6" 26 | bash base.sh ${scheme} ${selection} $options \ 27 | ./etc/ali_groups/group${groupId} \ 28 | tmp/ \ 29 | ./results/exp2/${groupId}/${scheme}_64m.result 30 | done 31 | done 32 | 33 | cd ../results 34 | for scheme in $schemes 35 | do 36 | for segsize in "64m" "128" "256m" 37 | do 38 | for groupId in {1..30} 39 | do 40 | mkdir exp2/${scheme} 41 | ag "segment WA" exp2/${groupId}/${scheme}_${segsize}.result > exp2/${scheme}/${groupId}_${segsize}.result 42 | awk '{print $2$16}' exp2/${scheme}/${groupId}_${segsize}.result > tmp; mv tmp exp2/${scheme}/${groupId}_${segsize} 43 | done 44 | cat exp2/${scheme}/*_${segsize} > exp2/${scheme}_${segsize} 45 | done 46 | done 47 | 48 | echo "Please check the results in results/exp2 for the WAs of each scheme" 49 | -------------------------------------------------------------------------------- /trace_replay/scripts/run_exp3_gp.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Varying the overall GP for triggering GC 3 | 4 | export propertyPath="./etc/ali_property.txt" 5 | export oraclePath="./traces/oracles/" 6 | 7 | schemes="NoSep SepGC DAC SFS MultiLog ETI MultiQueue SFR FADaC WARCIP SepBIT FK" 8 | for scheme in $schemes 9 | do 10 | for groupId in {1..30} 11 | do 12 | selection="CostBenefit" 13 | for gp in 0.10 0.20 0.25 14 | do 15 | options="${gp} 536870912 1 6" 16 | bash base.sh ${scheme} ${selection} $options \ 17 | ./etc/ali_groups/group${groupId} \ 18 | tmp/ \ 19 | ./results/exp3/${groupId}/${scheme}_${gp}.result 20 | done 21 | done 22 | done 23 | 24 | cd ../results 25 | for scheme in $schemes 26 | do 27 | for gp in "0.10" "0.20" "0.25" 28 | do 29 | for groupId in {1..30} 30 | do 31 | mkdir exp3/${scheme} 32 | ag "segment WA" exp3/${groupId}/${scheme}_${gp}.result > exp3/${scheme}/${groupId}_${gp}.result 33 | awk '{print $2$16}' exp3/${scheme}/${groupId}_${gp}.result > tmp; mv tmp exp3/${scheme}/${groupId}_${gp} 34 | done 35 | cat exp3/${scheme}/*_${gp} > exp3/${scheme}_${gp} 36 | done 37 | done 38 | 39 | echo "Please check the results in results/exp3 for the WAs of each scheme" 40 | -------------------------------------------------------------------------------- /trace_replay/scripts/run_exp4_predictability.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Extract per-segment GP out of Exp1 results for predictability analysis 3 | # The extracted result is in the form of logid, array of [gps]; numeric analysis needs further hand-writing scripts 4 | # Hint: Python can parse array of [gps] using ast.literal_eval 5 | 6 | schemes="NoSep SepGC DAC WARCIP SepBIT" 7 | cd ../results 8 | 9 | mkdir exp4 10 | for scheme for $schemes 11 | do 12 | python3 obtain_removed_seg.py exp1/*/${scheme}_CostBenefit.result > exp4/${scheme} 13 | done 14 | -------------------------------------------------------------------------------- /trace_replay/scripts/run_exp5_micro.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Microbenchmark that examies UW and GW 3 | 4 | schemes="UW GW" 5 | options="0.15 536870912 1 6" 6 | for scheme in $schemes 7 | do 8 | for groupId in {1..30} 9 | do 10 | selection="CostBenefit" 11 | bash base.sh ${scheme} ${selection} $options \ 12 | ./etc/ali_groups/group${groupId} \ 13 | tmp/ \ 14 | exp5/${groupId}/${scheme}_${selection}.result 15 | done 16 | done 17 | 18 | cd ../results 19 | for scheme in $schemes 20 | do 21 | do 22 | for groupId in {1..30} 23 | do 24 | mkdir exp5/${scheme} 25 | ag "segment WA" exp5/${groupId}/${scheme}_${CostBenefit}.result > exp5/${scheme}/${groupId}_${CostBenefit}.result 26 | awk '{print $2$16}' exp5/${scheme}/${groupId}_${CostBenefit}.result > tmp; mv tmp exp5/${scheme}/${groupId}_${CostBenefit} 27 | done 28 | cat exp5/${scheme}/*_${CostBenefit} > exp5/${scheme}_${CostBenefit} 29 | done 30 | done 31 | 32 | echo "Please check the results in results/exp5 for the WAs of each scheme" 33 | -------------------------------------------------------------------------------- /trace_replay/scripts/run_exp6_tencent.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Run Tencent Cloud traces 3 | 4 | export propertyPath="./etc/tencent_property.txt" 5 | export oraclePath="./tencent_traces/oracles/" 6 | 7 | schemes="NoSep SepGC DAC SFS MultiLog ETI MultiQueue SFR FADaC WARCIP SepBIT FK" 8 | options="0.15 536870912 1 6" 9 | 10 | for scheme in $schemes 11 | do 12 | echo "Running ${scheme}" 13 | for groupId in {1..20} 14 | do 15 | for selection in "CostBenefit" 16 | do 17 | bash base.sh ${scheme} ${selection} $options \ 18 | ./etc/tencent_groups/group${groupId} \ 19 | tmp/ \ 20 | ./results/exp6/${groupId}/${scheme}_${selection}.result 21 | done 22 | done 23 | done 24 | 25 | cd ../results 26 | for scheme in $schemes 27 | do 28 | for selection in "CostBenefit" 29 | do 30 | for groupId in {1..10} 31 | do 32 | mkdir exp6/${scheme} 33 | ag "segment WA" exp6/${groupId}/${scheme}_${selection}.result > exp6/${scheme}/${groupId}_${selection}.result 34 | awk '{print $2$16}' exp6/${scheme}/${groupId}_${selection}.result > tmp; mv tmp exp6/${scheme}/${groupId}_${selection} 35 | done 36 | cat exp6/${scheme}/*_${selection} > exp6/${scheme}_${selection} 37 | done 38 | done 39 | 40 | echo "Please check the results in results/exp6 for the WAs of each scheme" 41 | -------------------------------------------------------------------------------- /trace_replay/scripts/run_exp7_skewness.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Run the experiments of skewness (synthetic) 3 | # For the per-volume skewness in the Alibaba Cloud traces, please refer to **analyze_script/run_expa5_hot20.sh** 4 | 5 | export propertyPath="./etc/synthetic_property.txt" 6 | export oraclePath="./synthetic_traces/oracles/" 7 | 8 | schemes="NoSep SepGC DAC SFS MultiLog ETI MultiQueue SFR FADaC WARCIP SepBIT FK" 9 | options="0.15 536870912 1 6" 10 | for scheme in $schemes 11 | do 12 | echo "Running ${scheme}" 13 | for selection in "Greedy" 14 | do 15 | bash base.sh ${scheme} ${selection} $options \ 16 | ./etc/synthetic_groups/all \ 17 | tmp/ \ 18 | ./results/exp7/${groupId}/${scheme}_${selection}.result 19 | done 20 | done 21 | 22 | cd ../results 23 | for scheme in $schemes 24 | do 25 | for selection in "Greedy" 26 | do 27 | mkdir exp7/${scheme} 28 | ag "segment WA" exp7/${groupId}/${scheme}_${selection}.result > exp7/${scheme}/${groupId}_${selection}.result 29 | awk '{print $2$16}' exp7/${scheme}/${groupId}_${selection}.result > tmp; mv tmp exp7/${scheme}/${groupId}_${selection} 30 | cat exp7/${scheme}/*_${selection} > exp7/${scheme}_${selection} 31 | done 32 | done 33 | 34 | echo "Please check the results in results/exp7 for the WAs of each scheme" 35 | -------------------------------------------------------------------------------- /trace_replay/scripts/run_exp8_memory.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # The number of LBAs in the FIFO for memory usage analysis 3 | ag "FIFO" ../results/exp1/sepbit/*_CostBenefit.result > tmp 4 | awk -F':' 'BEGIN {OFS=FS} {$1=$2=$3=""; print substr($0,5)}' tmp > tmp1 5 | 6 | # sort by volume id 7 | sort -t, -k1 -s -n tmp1 > sort_res 8 | 9 | # output the results 10 | mkdir ../results/exp8 11 | python3 process_fifo_len.py sort_res > ../results/exp8/ 12 | 13 | # clean 14 | rm tmp tmp1 sort_res 15 | -------------------------------------------------------------------------------- /trace_replay/src/main/java/gcsimulator/BlockContainer.java: -------------------------------------------------------------------------------- 1 | package gcsimulator; 2 | 3 | public interface BlockContainer { 4 | long getnValidBlocks(); 5 | long getnInvalidBlocks(); 6 | long getnBlocks(); 7 | double getGarbageProportion(); 8 | double getAge(); 9 | double getLastAccessedTime(); 10 | } 11 | -------------------------------------------------------------------------------- /trace_replay/src/main/java/gcsimulator/Configs.java: -------------------------------------------------------------------------------- 1 | package gcsimulator; 2 | 3 | import java.io.*; 4 | import java.util.Scanner; 5 | import java.util.HashMap; 6 | 7 | public class Configs { 8 | public static HashMap volumeWSS = new HashMap<>(); 9 | public static HashMap volumeMaxLba = new HashMap<>(); 10 | 11 | public static String outputPrefix = "./"; 12 | 13 | // Simulator related 14 | public static long BLOCK_SIZE = 4096; 15 | public static long SEGMENT_SIZE = 512L * 1024 * 1024; 16 | 17 | // Garbage Collection Related 18 | public static double LOG_GARBAGE_PROPORTION = 0.15; 19 | public static double SEGMENT_GARBAGE_PROPORTION = 0.15; 20 | 21 | public enum RequestTypeEnum { 22 | ALI 23 | } 24 | 25 | public enum TraceReplayModeEnum { 26 | DEV_LIST 27 | } 28 | 29 | // Global configurations 30 | public static int numOpenSegments = 2; 31 | public static String tracePath = ""; 32 | public static String oraclePath = ""; 33 | public static TraceReplayModeEnum traceReplayMode = TraceReplayModeEnum.DEV_LIST; 34 | public static RequestTypeEnum requestType = RequestTypeEnum.ALI; 35 | public static boolean printGCInfo = true; 36 | public static boolean printSegmentInSummary = true; 37 | 38 | public static String selectionAlgorithm = "Greedy"; 39 | public static String separateMethod = "Null"; 40 | 41 | public static long endNIORequests = 0; 42 | public static long endNLBAs = 0; 43 | 44 | public static long randomSeed = 0; 45 | 46 | public static String indexMapType = "Persistent"; 47 | public static String indexMapCache = "GlobalPageCache"; 48 | 49 | // pickSegMode indicates how many data amount should be collected; 50 | public static int pickSegMode = 3; 51 | public static int pickSegAmountFactor = 1; 52 | 53 | public static long getPickSegmentAmount() { 54 | return SEGMENT_SIZE * pickSegAmountFactor; 55 | } 56 | 57 | public static long getSegmentMaxLen() { 58 | return SEGMENT_SIZE / BLOCK_SIZE; 59 | } 60 | 61 | // public static String propertyPath = ""; 62 | public static void loadProperty(String propertyPath) { 63 | File f = new File(propertyPath); 64 | try { 65 | Scanner input = new Scanner(f); 66 | while (input.hasNextLine()) { 67 | String s = input.nextLine(); 68 | String[] splitData = s.split("\\s+"); 69 | if (splitData.length < 3) { 70 | break; 71 | } 72 | String deviceId = splitData[0]; 73 | Long uniqueLbas = Long.parseLong(splitData[1]); 74 | Long maxLba = Long.parseLong(splitData[2]); 75 | volumeWSS.put(deviceId, uniqueLbas); 76 | volumeMaxLba.put(deviceId, maxLba); 77 | } 78 | } catch (IOException e) { 79 | e.printStackTrace(); 80 | } 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /trace_replay/src/main/java/gcsimulator/GCScheduler.java: -------------------------------------------------------------------------------- 1 | package gcsimulator; 2 | 3 | import gcsimulator.Metadata; 4 | import gcsimulator.Log; 5 | import gcsimulator.Segment; 6 | import static gcsimulator.Configs.LOG_GARBAGE_PROPORTION; 7 | 8 | import java.util.*; 9 | 10 | public class GCScheduler { 11 | GCScheduler() { 12 | } 13 | 14 | public static void schedule() { 15 | if (Metadata.lastUpdateLog == null) { 16 | return; 17 | } 18 | 19 | if (Metadata.lastUpdateLog.getGarbageProportion() > LOG_GARBAGE_PROPORTION) { 20 | System.out.println("Before GC: " + Metadata.stat.getGarbageProportion()); 21 | Statistics.getInstance().incrementnGc(); 22 | 23 | GCWorker.doJob(Metadata.lastUpdateLog); 24 | 25 | System.out.println("After GC: " + Metadata.stat.getGarbageProportion()); 26 | System.out.println(); 27 | System.out.println(); 28 | } 29 | } 30 | 31 | public static void summary(boolean simple) { 32 | if (simple) { 33 | long nValidBlocksRealTime = 0; 34 | long nSegments = 0; 35 | Collection logs = Metadata.getLogs(); 36 | for (Log log : logs) { 37 | nValidBlocksRealTime += log.getnRealTimeValidBlocks(); 38 | nSegments += log.getSegments().size(); 39 | } 40 | 41 | System.out.println(" valid Blocks: " + nValidBlocksRealTime + ", num of segments: " + nSegments + 42 | " elapsed time (s): " + 43 | 1.0 * (System.currentTimeMillis() - Statistics.getInstance().getRealStartTimestamp().getTime()) / 1000); 44 | return; 45 | } 46 | 47 | Collection logs = Metadata.getLogs(); 48 | for (Log log : logs) { 49 | log.shutdown(); 50 | System.out.print(" " + log.getId().toString() + ": " + log.getStat().toString()); 51 | System.out.format(" , garbage prop = %.3f ", (log.getGarbageProportion())); 52 | System.out.format(" , segment WA = %.6f\n", log.getWA()); 53 | 54 | if (Configs.printSegmentInSummary) { 55 | Collection segments = log.getSegments(); 56 | for (Segment segment : segments) { 57 | System.out.println(" segment gp: " + segment.getGarbageProportion() + " , " + 58 | " age_in_s: " + segment.getAge() / 1000000 + " , lifespan_in_s: " + segment.getLifeSpan() / 1000000); 59 | System.out.println(segment.toString()); 60 | } 61 | } 62 | } 63 | } 64 | 65 | public static long[] getTotalnBlocksStat() { 66 | long nBlocks = 0; 67 | long nInvalidBlocks = 0; 68 | Collection logs = Metadata.getLogs(); 69 | for (Log log : logs) { 70 | nBlocks += log.getStat().nBlocks; 71 | nInvalidBlocks += log.getStat().nInvalidBlocks; 72 | } 73 | 74 | long[] longs = new long[2]; 75 | longs[0] = nBlocks; 76 | longs[1] = nInvalidBlocks; 77 | 78 | return longs; 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /trace_replay/src/main/java/gcsimulator/GCWorker.java: -------------------------------------------------------------------------------- 1 | package gcsimulator; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | import java.util.Collection; 6 | 7 | import gcsimulator.Configs; 8 | import gcsimulator.Simulator; 9 | import gcsimulator.Log; 10 | import gcsimulator.Segment; 11 | 12 | public class GCWorker { 13 | GCWorker() {} 14 | 15 | public static void doJob(Log log) { 16 | List pickedSegments = 17 | pickSegments(new ArrayList<>(log.getSegments())); 18 | 19 | if (pickedSegments.size() == 0) { 20 | return; 21 | } 22 | 23 | collectSegments(log, pickedSegments); 24 | 25 | if (Configs.printGCInfo && pickedSegments.size() > 0) 26 | System.out.print("rm_segs: " + pickedSegments.size() + "\n"); 27 | } 28 | 29 | public static List pickSegments(List input) { 30 | Simulator.selectionAlgorithm.filterEmptySegments(input) 31 | .filterNonSealedSegments(input) 32 | .filterSmallerThanThreshold(input, Configs.SEGMENT_GARBAGE_PROPORTION) 33 | .pick(input); 34 | 35 | Simulator.selectionAlgorithm.pickFirstK(input, (int) (Configs.getPickSegmentAmount() / Configs.SEGMENT_SIZE)); 36 | 37 | return input; 38 | } 39 | 40 | public static void collectSegments(Log log, List input) { 41 | for (Segment segment : input) { 42 | segment.summarize(); 43 | log.separator.collectSegment(segment); 44 | 45 | Collection entries = segment.getValidLbas(); 46 | 47 | for (Long lba : entries) { 48 | log.gcAppend(lba); 49 | } 50 | } 51 | 52 | if (Configs.printGCInfo) { 53 | System.out.println("Previous: " + log.getStat().toString()); 54 | } 55 | 56 | log.removeSegments(input); 57 | } 58 | 59 | } 60 | -------------------------------------------------------------------------------- /trace_replay/src/main/java/gcsimulator/Metadata.java: -------------------------------------------------------------------------------- 1 | package gcsimulator; 2 | 3 | import gcsimulator.indexmap.IndexMap; 4 | import gnu.trove.map.hash.TIntIntHashMap; 5 | import gnu.trove.map.hash.TIntLongHashMap; 6 | import org.apache.commons.math3.util.Pair; 7 | 8 | import java.util.*; 9 | 10 | import static gcsimulator.Configs.*; 11 | 12 | 13 | public class Metadata { 14 | 15 | public static class Stat { 16 | long nRealTimeValidBlocks; 17 | long nBlocks; 18 | long nInvalidBlocks; 19 | 20 | long nBytesWriteToStorage; 21 | long nBytesWriteNormal; 22 | 23 | Stat() { 24 | nBlocks = 0; 25 | nInvalidBlocks = 0; 26 | nBytesWriteToStorage = 0; 27 | nBytesWriteNormal = 0; 28 | } 29 | 30 | @Override 31 | public String toString() { 32 | return "nBlocks: " + nBlocks + ", nInvalidBlocks: " + nInvalidBlocks; 33 | } 34 | 35 | public double getGarbageProportion() { 36 | return (double)nInvalidBlocks / nBlocks; 37 | } 38 | } 39 | 40 | 41 | public static HashMap logs = new HashMap<>(); 42 | public static Log lastUpdateLog = null; 43 | public static Stat stat = new Stat(); 44 | 45 | Metadata(GCScheduler GCScheduler) { 46 | } 47 | 48 | /** 49 | * Normal write: Append a request to the log 50 | * @param logId ID of log 51 | * @param LBA Logical Block Address 52 | */ 53 | static Log write(String logId, long offset, long length, long timestamp) { 54 | Log log = logs.get(logId); 55 | // Add a new segment if it doesn't appear before 56 | if (log == null) { 57 | log = new Log(logId, timestamp); 58 | logs.put(logId, log); 59 | } 60 | 61 | lastUpdateLog = null; 62 | 63 | if (log.appendRequest(offset, length, timestamp) > 0) { 64 | lastUpdateLog = log; 65 | } 66 | 67 | return null; 68 | } 69 | // 70 | static HashMap getLogStats() { // all logId, num of invalid blocks, num of total blocks 71 | HashMap ret = new HashMap<>(); 72 | for (HashMap.Entry segmentEntry : logs.entrySet()) { 73 | ret.put(segmentEntry.getKey(), segmentEntry.getValue().getStat()); 74 | } 75 | return ret; 76 | } 77 | 78 | public static Collection getLogs() { 79 | return logs.values(); 80 | } 81 | 82 | public static IndexMap getIndexMap(String logId) { 83 | return logs.get(logId).getIndexMap(); 84 | } 85 | } 86 | -------------------------------------------------------------------------------- /trace_replay/src/main/java/gcsimulator/Segment.java: -------------------------------------------------------------------------------- 1 | package gcsimulator; 2 | 3 | import gcsimulator.segment.SegmentMeta; 4 | import org.apache.commons.math3.util.Pair; 5 | 6 | import java.util.Collection; 7 | import java.util.ArrayList; 8 | import java.util.Map; 9 | 10 | import static gcsimulator.Configs.*; 11 | 12 | /* 13 | gcsimulator.Segment is the basic unit of garbage collection 14 | */ 15 | public class Segment implements BlockContainer { 16 | private boolean sealed; 17 | private Log log; 18 | public SegmentMeta meta; 19 | 20 | // Only available when "printGCInfo_WithBlockLastAccess" is enabled. 21 | // Update distance = next access id - invalidated access id 22 | private long totalUpdateDistanceOfInvalidBlocks = 0; 23 | 24 | public void summarize() { 25 | meta.summarize(); 26 | } 27 | 28 | public Map getValidPairs() { 29 | return meta.getValidPairs(); 30 | } 31 | 32 | public Collection getValidLbas() { 33 | return meta.getValidLbas(); 34 | } 35 | 36 | public long getCreatedAccessId() { 37 | return meta.createdAccessId; 38 | } 39 | 40 | public long getAgeFromFirstInvalidate() { 41 | if (getnInvalidBlocks() == 0) { 42 | return 0; 43 | } else { 44 | return log.accessId - meta.firstEvitcionAccessId; 45 | } 46 | } 47 | 48 | 49 | @Override 50 | public long getnValidBlocks() { 51 | return meta.nBlocks - meta.nInvalidBlocks; 52 | } 53 | 54 | @Override 55 | public long getnInvalidBlocks() { 56 | return meta.nInvalidBlocks; 57 | } 58 | 59 | @Override 60 | public long getnBlocks() { 61 | return meta.nBlocks; 62 | } 63 | 64 | @Override 65 | public double getGarbageProportion() { 66 | return (double)(meta.nInvalidBlocks) / meta.nBlocks; 67 | } 68 | 69 | @Override 70 | public double getAge() { 71 | return Simulator.globalTimestampInUs - meta.timestampModifiedInUs; 72 | } 73 | 74 | @Override 75 | public double getLastAccessedTime() { 76 | return meta.timestampModifiedInUs; 77 | } 78 | 79 | double getLifeSpan() { 80 | return meta.timestampModifiedInUs - meta.timestampCreatedInUs; 81 | } 82 | 83 | void addUpdateDistanceOfInvalidBlocks(long updateDistance) { 84 | totalUpdateDistanceOfInvalidBlocks += updateDistance; 85 | } 86 | 87 | double getAvgUpdateDistanceOfInvalidBlocks() { 88 | return (getnInvalidBlocks() == 0) ? 0.00 : 1.0 * totalUpdateDistanceOfInvalidBlocks / getnInvalidBlocks(); 89 | } 90 | 91 | public Segment(long timestamp, Log log) { 92 | this.log = log; 93 | meta = new SegmentMeta(log.getId().toString(), log.getNewSegmentId(), timestamp); 94 | meta.createdAccessId = log.accessId; 95 | } 96 | 97 | public void setTemperature(int t) { 98 | meta.temperature = t; 99 | } 100 | 101 | public long getSegmentId() { 102 | return meta.segmentId; 103 | } 104 | 105 | public boolean isSealed() { 106 | return meta.isSealed; 107 | } 108 | 109 | public void seal() { 110 | if (!sealed) { 111 | sealed = true; 112 | meta.seal(); 113 | meta.sealedAccessId = log.accessId; 114 | } 115 | } 116 | public Log getLog() { 117 | return this.log; 118 | } 119 | 120 | long append(long LBA, long timestamp) { 121 | long offset = meta.append(LBA, timestamp); 122 | return offset; 123 | } 124 | 125 | public void appendInfo(long[] o) { 126 | meta.appendInfo(o); 127 | } 128 | 129 | void invalidate(long offset) { 130 | if (getnInvalidBlocks() == 128) { 131 | meta.firstEvitcionAccessId = log.accessId; 132 | } 133 | meta.invalidate(offset); 134 | } 135 | 136 | void destroy() { 137 | meta.destroy(); 138 | } 139 | 140 | 141 | @Override 142 | public String toString() { 143 | return meta.toString() + ", " + (isSealed() ? "sealed" : "not sealed"); 144 | } 145 | 146 | } 147 | -------------------------------------------------------------------------------- /trace_replay/src/main/java/gcsimulator/Statistics.java: -------------------------------------------------------------------------------- 1 | package gcsimulator; 2 | 3 | import java.sql.Timestamp; 4 | 5 | public class Statistics { 6 | private static Statistics instance = null; 7 | private long nIORequests; 8 | private long nGC; 9 | 10 | private long nLBAs; 11 | private long nTotalInvalidBlocks = 0; 12 | private long nTotalBlocks = 1; 13 | 14 | private double accumulatedGarbageProportion = 0.0; 15 | private long nRemovedSegments = 0; 16 | 17 | private Timestamp realStartTimestamp; 18 | 19 | public void incrementnIORequests() { 20 | nIORequests++; 21 | } 22 | 23 | public void incrementnGc() { 24 | nGC++; 25 | } 26 | 27 | public void addnLBAs(long nLBAs) { 28 | this.nLBAs += nLBAs; 29 | } 30 | 31 | public long getnIORequests() { 32 | return nIORequests; 33 | } 34 | 35 | public long getnLBAs() { 36 | return nLBAs; 37 | } 38 | 39 | public void setnTotalInvalidBlocks(long nTotalInvalidBlocks) { 40 | this.nTotalInvalidBlocks = nTotalInvalidBlocks; 41 | } 42 | 43 | public void setnTotalBlocks(long nTotalBlocks) { 44 | this.nTotalBlocks = nTotalBlocks; 45 | } 46 | 47 | public double getOverallGarbageProp() { 48 | return 1.0 * nTotalInvalidBlocks / nTotalBlocks; 49 | } 50 | 51 | public void addRemovedSegmentGP(double removedSegmentGP) { 52 | accumulatedGarbageProportion += removedSegmentGP; 53 | nRemovedSegments++; 54 | } 55 | 56 | public Timestamp getRealStartTimestamp() { 57 | return this.realStartTimestamp; 58 | } 59 | 60 | public void start() { 61 | this.realStartTimestamp = new Timestamp(System.currentTimeMillis()); 62 | } 63 | 64 | public void summary() { 65 | System.out.println("SUMMARY: "); 66 | System.out.println(" Requests : " + getnIORequests()); 67 | System.out.println(" nGC : " + nGC); 68 | System.out.println(" LBAs : " + getnLBAs()); 69 | System.out.format (" bytes_to_System: %d\n" + 70 | " bytes_to_Storage: %d\n" + 71 | " ** WA ** : %f\n", 72 | Metadata.stat.nBytesWriteNormal, 73 | Metadata.stat.nBytesWriteToStorage, 74 | Metadata.stat.nBytesWriteToStorage * 1.0 / 75 | (Metadata.stat.nBytesWriteNormal == 0 ? 1 : 76 | Metadata.stat.nBytesWriteNormal) 77 | ); 78 | System.out.println(" nBlocks: : " + nTotalBlocks); 79 | System.out.println(" nInvalidBlks : " + nTotalInvalidBlocks); 80 | System.out.println(" garb prop : " + getOverallGarbageProp()); 81 | System.out.println(" removed avg gp: " + (1.0 * accumulatedGarbageProportion / nRemovedSegments)); 82 | System.out.println(" "); 83 | 84 | Timestamp currentTime = new Timestamp(System.currentTimeMillis()); 85 | System.out.println("\n" + 86 | " Run time(s) : " + (currentTime.getTime() - realStartTimestamp.getTime()) * 1.0 / 1000); 87 | } 88 | 89 | private Statistics() { 90 | nIORequests = 0; 91 | nLBAs = 0; 92 | } 93 | 94 | public static Statistics getInstance() { 95 | if (instance == null) { 96 | instance = new Statistics(); 97 | } 98 | return instance; 99 | } 100 | 101 | } 102 | -------------------------------------------------------------------------------- /trace_replay/src/main/java/gcsimulator/fifo/OnDiskFIFO.java: -------------------------------------------------------------------------------- 1 | package gcsimulator.fifo; 2 | 3 | import gcsimulator.Configs; 4 | 5 | import java.io.FileNotFoundException; 6 | import java.io.IOException; 7 | import java.io.RandomAccessFile; 8 | import java.nio.ByteBuffer; 9 | 10 | import java.util.Arrays; 11 | 12 | public class OnDiskFIFO { 13 | static int globalId = 0; 14 | RandomAccessFile circularBuffer; 15 | ByteBuffer tailArray; 16 | ByteBuffer headArray; 17 | long capacity; 18 | int bufferSize = 4096 * 4; // 16K * 8B = 128KB buffer 19 | long currentLoad = -1L; 20 | long head = 0; 21 | long tail = 0; 22 | 23 | public OnDiskFIFO() { 24 | try { 25 | circularBuffer = new RandomAccessFile(Configs.outputPrefix + "ondiskfifo/fifo_" + globalId, "rw"); 26 | } catch (FileNotFoundException e) { 27 | e.printStackTrace(); 28 | } 29 | globalId += 1; 30 | headArray = ByteBuffer.allocate(bufferSize * Long.BYTES); // Long 31 | tailArray = ByteBuffer.allocate(bufferSize * Long.BYTES); // Long 32 | } 33 | 34 | public void setSize(long size) { 35 | capacity = (size + bufferSize) / bufferSize * bufferSize; // align with buffer size 36 | } 37 | 38 | public void add(long key) { 39 | tailArray.putLong(key); 40 | tail += 1; 41 | if (tail % bufferSize == 0) { 42 | // if current tail buffer is filled, flush it. 43 | try { 44 | circularBuffer.getChannel().position((tail - bufferSize) * 8); 45 | circularBuffer.write(tailArray.array(), 0, bufferSize * 8); 46 | tailArray.clear(); 47 | } catch (IOException e) { 48 | e.printStackTrace(); 49 | } 50 | } 51 | if (tail == capacity) { 52 | tail = 0; 53 | } 54 | } 55 | 56 | public long removeFirst() { 57 | long headBlockId = head / bufferSize; 58 | long tailBlockId = tail / bufferSize; 59 | long value = 0; 60 | if (headBlockId == tailBlockId) { 61 | int pos = tailArray.position(); 62 | // rewind because the content of asLongBuffer will follow the current position 63 | tailArray.rewind(); 64 | value = tailArray.asLongBuffer().get(Math.toIntExact(head % bufferSize)); 65 | tailArray.position(pos); 66 | } else { 67 | // If the values are not in mem, fetch it. 68 | if (currentLoad != headBlockId) { 69 | try { 70 | circularBuffer.getChannel().position(head / bufferSize * bufferSize * 8); 71 | circularBuffer.read(headArray.array(), 0, bufferSize * 8); 72 | headArray.rewind(); 73 | currentLoad = headBlockId; 74 | } catch (IOException e) { 75 | e.printStackTrace(); 76 | } 77 | } 78 | value = headArray.asLongBuffer().get(Math.toIntExact(head % bufferSize)); 79 | } 80 | head += 1; 81 | if (head == capacity) { 82 | head = 0; 83 | } 84 | return value; 85 | } 86 | 87 | public long size() { 88 | return (tail + capacity - head) % capacity; 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /trace_replay/src/main/java/gcsimulator/indexmap/IndexMap.java: -------------------------------------------------------------------------------- 1 | package gcsimulator.indexmap; 2 | 3 | import org.apache.commons.math3.util.Pair; 4 | 5 | import java.lang.reflect.Method; 6 | import java.util.Collection; 7 | 8 | public interface IndexMap { 9 | void setSize(long size); 10 | void put(long key, long value); 11 | long get(long key); 12 | boolean containsKey(long key); 13 | long size(); 14 | } 15 | -------------------------------------------------------------------------------- /trace_replay/src/main/java/gcsimulator/indexmap/IndexMapFactory.java: -------------------------------------------------------------------------------- 1 | package gcsimulator.indexmap; 2 | 3 | public class IndexMapFactory { 4 | public static IndexMap getInstance(String type, String cache) { 5 | IndexMap map = null; 6 | switch (type) { 7 | case "Persistent": 8 | map = new PersistentIndexMap(); 9 | break; 10 | case "PageTable": 11 | map = new PageTableIndexMap(); 12 | break; 13 | case "PureInMem": 14 | map = new PureInMemIndexMap(); 15 | break; 16 | default: 17 | System.out.println("Wrong type for IndexMap"); 18 | break; 19 | } 20 | 21 | switch (cache) { 22 | case "Null": 23 | return map; 24 | case "GlobalPageCache": 25 | return new IndexMapWithGlobalPageCache(map); 26 | default: 27 | return map; 28 | } 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /trace_replay/src/main/java/gcsimulator/indexmap/LargeArray.java: -------------------------------------------------------------------------------- 1 | package gcsimulator.indexmap; 2 | 3 | interface LargeArray { 4 | void put(long index, long value); 5 | long get(long index); 6 | } 7 | -------------------------------------------------------------------------------- /trace_replay/src/main/java/gcsimulator/indexmap/LargeNativeArray.java: -------------------------------------------------------------------------------- 1 | package gcsimulator.indexmap; 2 | 3 | import java.nio.ByteBuffer; 4 | import java.nio.LongBuffer; 5 | import java.util.Arrays; 6 | import java.lang.ArrayIndexOutOfBoundsException; 7 | 8 | class LargeNativeArray implements LargeArray { 9 | long size; 10 | long[][] buffers; 11 | final int ARRAY_MAX_SIZE = 128 * 1024 * 1024; 12 | 13 | public LargeNativeArray(long size) { 14 | long nArrays = (size + ARRAY_MAX_SIZE - 1) / ARRAY_MAX_SIZE; 15 | long lastArraySize = size % ARRAY_MAX_SIZE; 16 | if (lastArraySize == 0) lastArraySize = ARRAY_MAX_SIZE; 17 | buffers = new long[Math.toIntExact(nArrays)][]; 18 | 19 | for (int i = 0; i < nArrays; ++i) { 20 | if (i == nArrays - 1) buffers[i] = new long[Math.toIntExact(lastArraySize)]; 21 | else buffers[i] = new long[ARRAY_MAX_SIZE]; 22 | Arrays.fill(buffers[i], -1L); 23 | } 24 | } 25 | 26 | @Override 27 | public void put(long index, long value) { 28 | long arrayId = index / ARRAY_MAX_SIZE; 29 | index = index % ARRAY_MAX_SIZE; 30 | 31 | buffers[Math.toIntExact(arrayId)][Math.toIntExact(index)] = value; 32 | } 33 | 34 | @Override 35 | public long get(long index) { 36 | long arrayId = index / ARRAY_MAX_SIZE; 37 | index = index % ARRAY_MAX_SIZE; 38 | 39 | long value = 0; 40 | try { 41 | value = buffers[Math.toIntExact(arrayId)][Math.toIntExact(index)]; 42 | } catch (ArrayIndexOutOfBoundsException e) { 43 | System.out.println(buffers.length); 44 | System.out.println("" + index + "" + arrayId + buffers[Math.toIntExact(arrayId)].length); 45 | throw e; 46 | } 47 | return value; 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /trace_replay/src/main/java/gcsimulator/indexmap/Page.java: -------------------------------------------------------------------------------- 1 | package gcsimulator.indexmap; 2 | public class Page { 3 | long[] entries; 4 | }; 5 | -------------------------------------------------------------------------------- /trace_replay/src/main/java/gcsimulator/indexmap/PageTableIndexMap.java: -------------------------------------------------------------------------------- 1 | package gcsimulator.indexmap; 2 | 3 | import org.apache.commons.math3.util.Pair; 4 | 5 | import java.lang.reflect.Method; 6 | import java.util.Collection; 7 | 8 | public class PageTableIndexMap implements IndexMap { 9 | 10 | class PageDirectory { 11 | T[] entries; 12 | } 13 | 14 | PageDirectory directory; 15 | int nLevels = 1; 16 | int nEntriesPerPageTable = 2048; 17 | long size; 18 | 19 | public PageTableIndexMap() { 20 | 21 | } 22 | 23 | PageDirectory createPageTable(boolean isDirectory) { 24 | PageDirectory pd = new PageDirectory<>(); 25 | if (isDirectory) { 26 | pd.entries = new PageDirectory[nEntriesPerPageTable]; 27 | } else { 28 | pd.entries = new Long[nEntriesPerPageTable]; 29 | } 30 | return pd; 31 | } 32 | 33 | @Override 34 | public void setSize(long size) { 35 | this.size = size; 36 | while (size > nEntriesPerPageTable) { 37 | size /= nEntriesPerPageTable; 38 | this.nLevels += 1; 39 | } 40 | directory = new PageDirectory<>(); 41 | directory.entries = new PageDirectory[(int) size]; 42 | } 43 | 44 | @Override 45 | public void put(long key, long value) { 46 | int[] offsets = getOffsets(key); 47 | PageDirectory pd = directory; 48 | for (int i = 0; i < nLevels; ++i) { 49 | if (i != nLevels - 1) { 50 | if (pd.entries[offsets[i]] == null) { 51 | pd.entries[offsets[i]] = createPageTable(i != nLevels - 2); 52 | } 53 | pd = (PageDirectory) pd.entries[offsets[i]]; 54 | } else { 55 | pd.entries[offsets[i]] = value; 56 | } 57 | } 58 | } 59 | 60 | @Override 61 | public long get(long key) { 62 | int[] offsets = getOffsets(key); 63 | PageDirectory pd = directory; 64 | Long value = null; 65 | for (int i = 0; i < nLevels; ++i) { 66 | if (i == nLevels - 1) { 67 | value = (Long) pd.entries[offsets[i]]; 68 | } else { 69 | pd = (PageDirectory) pd.entries[offsets[i]]; 70 | } 71 | } 72 | return value; 73 | } 74 | 75 | @Override 76 | public boolean containsKey(long key) { 77 | int[] offsets = getOffsets(key); 78 | PageDirectory pd = directory; 79 | for (int i = 0; i < nLevels; ++i) { 80 | if (pd.entries[offsets[i]] == null) 81 | return false; 82 | if (i != nLevels - 1) 83 | pd = (PageDirectory) pd.entries[offsets[i]]; 84 | } 85 | 86 | return true; 87 | } 88 | 89 | @Override 90 | public long size() { 91 | return 0; 92 | } 93 | 94 | private int[] getOffsets(long key) { 95 | int[] offsets = new int[nLevels]; 96 | for (int i = nLevels - 1; i >= 0; --i) { 97 | offsets[i] = (int) (key % nEntriesPerPageTable); 98 | key /= nEntriesPerPageTable; 99 | } 100 | 101 | return offsets; 102 | } 103 | } 104 | -------------------------------------------------------------------------------- /trace_replay/src/main/java/gcsimulator/indexmap/PersistentIndexMap.java: -------------------------------------------------------------------------------- 1 | package gcsimulator.indexmap; 2 | 3 | import gcsimulator.Configs; 4 | import org.apache.commons.math3.util.Pair; 5 | 6 | import java.io.FileNotFoundException; 7 | import java.io.IOException; 8 | import java.io.RandomAccessFile; 9 | import java.lang.reflect.Method; 10 | import java.nio.ByteBuffer; 11 | import java.util.ArrayList; 12 | import java.util.Collection; 13 | 14 | /* 15 | This class implements an on-disk index data structure 16 | */ 17 | public class PersistentIndexMap implements IndexMap { 18 | static int globalId = 0; 19 | RandomAccessFile map; 20 | ByteBuffer buffer; 21 | ByteBuffer pageBuffer; 22 | long maxSize; 23 | 24 | public PersistentIndexMap() { 25 | try { 26 | map = new RandomAccessFile(Configs.outputPrefix + "indexmap/persistent_indexmap_" + globalId, "rw"); 27 | } catch (FileNotFoundException e) { 28 | e.printStackTrace(); 29 | } 30 | globalId += 1; 31 | buffer = ByteBuffer.allocate(Long.BYTES); // Long 32 | pageBuffer = ByteBuffer.allocate(512 * Long.BYTES); // Long 33 | } 34 | 35 | public void setSize(long size) { 36 | maxSize = size; 37 | try { 38 | ByteBuffer value = ByteBuffer.allocate(1024 * 1024 * 8); 39 | for (int i = 0; i < 1024 * 1024; ++i) { 40 | value.putLong(-1L); 41 | } 42 | for (int i = 0; i < size; i += 1024 * 1024) { 43 | map.write(value.array(), 0, (int)1024 * 1024 * 8); 44 | } 45 | } catch (IOException e) { 46 | e.printStackTrace(); 47 | } 48 | } 49 | 50 | @Override 51 | public void put(long key, long value) { 52 | buffer.putLong(0, value); 53 | try { 54 | map.getChannel().position(key * 8); 55 | map.write(buffer.array(), 0, 8); 56 | } catch (IOException e) { 57 | e.printStackTrace(); 58 | } 59 | } 60 | 61 | @Override 62 | public boolean containsKey(long key) { 63 | try { 64 | buffer.rewind(); 65 | map.getChannel().position(key * 8); 66 | map.read(buffer.array(), 0, 8); 67 | } catch (IOException e) { 68 | e.printStackTrace(); 69 | } 70 | return (buffer.asLongBuffer().get(0) != -1L); 71 | } 72 | 73 | @Override 74 | public long get(long key) { 75 | try { 76 | map.getChannel().position(key * 8); 77 | map.read(buffer.array(), 0, 8); 78 | } catch (IOException e) { 79 | e.printStackTrace(); 80 | } 81 | pageBuffer.rewind(); 82 | return buffer.asLongBuffer().get(0); 83 | } 84 | 85 | @Override 86 | public long size() { 87 | return 0; 88 | } 89 | 90 | public void putBulk(long key, long[] values) { 91 | pageBuffer.rewind(); 92 | pageBuffer.asLongBuffer().put(values); 93 | try { 94 | map.getChannel().position(key * Long.BYTES); 95 | map.write(pageBuffer.array(), 0, values.length * Long.BYTES); 96 | } catch (IOException e) { 97 | e.printStackTrace(); 98 | } 99 | } 100 | 101 | public long[] getBulk(long key, int length, long[] values) { 102 | try { 103 | map.getChannel().position(key * Long.BYTES); 104 | map.read(pageBuffer.array(), 0, length * Long.BYTES); 105 | } catch (IOException e) { 106 | e.printStackTrace(); 107 | } 108 | pageBuffer.rewind(); 109 | pageBuffer.asLongBuffer().get(values); 110 | 111 | return values; 112 | } 113 | 114 | } 115 | -------------------------------------------------------------------------------- /trace_replay/src/main/java/gcsimulator/indexmap/PureInMemIndexMap.java: -------------------------------------------------------------------------------- 1 | package gcsimulator.indexmap; 2 | 3 | import gcsimulator.Configs; 4 | import org.apache.commons.math3.util.Pair; 5 | 6 | import java.io.FileNotFoundException; 7 | import java.io.IOException; 8 | import java.io.RandomAccessFile; 9 | import java.lang.reflect.Method; 10 | import java.nio.ByteBuffer; 11 | import java.util.ArrayList; 12 | import java.util.Collection; 13 | 14 | /* 15 | */ 16 | public class PureInMemIndexMap implements IndexMap { 17 | private LargeArray map; 18 | private long size; 19 | 20 | public PureInMemIndexMap() {} 21 | 22 | public void setSize(long size) { 23 | this.size = size; 24 | map = new LargeNativeArray(size); 25 | } 26 | 27 | @Override 28 | public void put(long key, long value) { 29 | map.put(key, value); 30 | } 31 | 32 | @Override 33 | public long get(long key) { 34 | return map.get(key); 35 | } 36 | 37 | @Override 38 | public boolean containsKey(long key) { 39 | return (map.get(key) != -1L); 40 | } 41 | 42 | @Override 43 | public long size() { 44 | return 0; 45 | } 46 | 47 | } 48 | -------------------------------------------------------------------------------- /trace_replay/src/main/java/gcsimulator/iorequest/AliIORequest.java: -------------------------------------------------------------------------------- 1 | package gcsimulator.iorequest; 2 | 3 | import gcsimulator.Log; 4 | 5 | import static gcsimulator.Configs.BLOCK_SIZE; 6 | 7 | public class AliIORequest extends IORequest { 8 | private void convertToRequest(String requestString) { 9 | String[] strs = requestString.split(","); 10 | this.logId = strs[0]; 11 | this.write = strs[1].equals("W"); 12 | this.LBA = Long.parseLong(strs[2]); 13 | this.length = Long.parseLong(strs[3]); 14 | this.timestampInUs = Long.parseLong(strs[4]); 15 | 16 | long begin = this.LBA; 17 | long end = this.LBA + length; 18 | this.LBA = begin / BLOCK_SIZE; 19 | this.length = ((end + BLOCK_SIZE - 1) / BLOCK_SIZE - this.LBA); 20 | } 21 | 22 | public AliIORequest() { 23 | this.timestampInUs = 0; 24 | } 25 | 26 | public AliIORequest(String requestString) { 27 | this.timestampInUs = 0; 28 | init(requestString); 29 | } 30 | 31 | public void init(String requestString) { 32 | this.str = requestString; 33 | setFromRequestString(requestString); 34 | } 35 | 36 | public void setFromRequestString(String requestString) { 37 | convertToRequest(requestString); 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /trace_replay/src/main/java/gcsimulator/iorequest/IORequest.java: -------------------------------------------------------------------------------- 1 | package gcsimulator.iorequest; 2 | 3 | import gcsimulator.Log; 4 | 5 | public abstract class IORequest { 6 | public String logId = ""; 7 | public long timestampInUs; 8 | public String str = ""; 9 | long LBA; 10 | long length; 11 | boolean write; 12 | 13 | IORequest() {} 14 | 15 | public abstract void setFromRequestString(String input); 16 | 17 | @Override 18 | public String toString() { 19 | return "IORequest{" + 20 | "logId=" + logId + 21 | ", LBA=" + LBA + 22 | ", length=" + length + 23 | ", time=" + timestampInUs + 24 | ", r/w=" + (write ? "W" : "R"); 25 | } 26 | 27 | public boolean isWrite() { 28 | return write; 29 | } 30 | 31 | public long getLBA() { 32 | return LBA; 33 | } 34 | 35 | public long getLength() { 36 | return length; 37 | } 38 | 39 | public String getLogId() { 40 | return logId; 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /trace_replay/src/main/java/gcsimulator/placement/DAC.java: -------------------------------------------------------------------------------- 1 | package gcsimulator.placement; 2 | 3 | import gcsimulator.Log; 4 | import gcsimulator.Configs; 5 | import gcsimulator.indexmap.IndexMapFactory; 6 | import gcsimulator.indexmap.IndexMap; 7 | 8 | public class DAC extends Separator { 9 | public IndexMap levels; 10 | 11 | public DAC() {} 12 | 13 | @Override 14 | public void init(Log log, int numOpenSegments) { 15 | super.init(log, numOpenSegments); 16 | 17 | levels = IndexMapFactory.getInstance(Configs.indexMapType, Configs.indexMapCache); 18 | levels.setSize(Configs.volumeMaxLba.get(log.getId()) / 4096 + 1024); 19 | } 20 | 21 | @Override 22 | public void append(long lba) { 23 | if (!levels.containsKey(lba)) { 24 | levels.put(lba, 0); 25 | } else { 26 | long level = levels.get(lba); 27 | if (level < numOpenSegments - 1) levels.put(lba, level + 1); 28 | } 29 | } 30 | 31 | @Override 32 | public void gcAppend(long lba) { 33 | long level = levels.get(lba); 34 | if (level > 0) levels.put(lba, level - 1); 35 | } 36 | 37 | @Override 38 | public int classify(boolean isGcAppend, long lba) { 39 | return Math.toIntExact(levels.get(lba)); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /trace_replay/src/main/java/gcsimulator/placement/FADaC.java: -------------------------------------------------------------------------------- 1 | package gcsimulator.placement; 2 | 3 | import gcsimulator.indexmap.*; 4 | import gcsimulator.Log; 5 | import gcsimulator.Segment; 6 | import gcsimulator.Configs; 7 | import gcsimulator.Simulator; 8 | 9 | public class FADaC extends Separator { 10 | public IndexMap writeFrequency; 11 | double fadingAvg = 0.0; 12 | 13 | public FADaC() {} 14 | 15 | public void init(Log log, int numOpenSegments) { 16 | super.init(log, numOpenSegments); 17 | 18 | writeFrequency = IndexMapFactory.getInstance(Configs.indexMapType, Configs.indexMapCache); 19 | writeFrequency.setSize(Configs.volumeMaxLba.get(log.getId()) / 4096 + 1024); 20 | } 21 | 22 | @Override 23 | public void append(long lba) { 24 | if (writeFrequency.containsKey(lba)) { 25 | writeFrequency.put(lba, writeFrequency.get(lba) + 1); 26 | } else { 27 | writeFrequency.put(lba, 1L); 28 | } 29 | } 30 | 31 | @Override 32 | public void gcAppend(long lba) { 33 | if (writeFrequency.containsKey(lba)) { 34 | if (writeFrequency.get(lba) > 0) { 35 | writeFrequency.put(lba, writeFrequency.get(lba) - 1); 36 | } 37 | } 38 | } 39 | 40 | @Override 41 | public void addRequest(long lba, long length) { 42 | long nValidBlocks = log.getnValidBlocks(); 43 | if (nValidBlocks == 0) return; 44 | fadingAvg = fadingAvg + (double)length / nValidBlocks - (double)fadingAvg / nValidBlocks; 45 | } 46 | 47 | @Override 48 | public int classify(boolean isGcAppend, long lba) { 49 | double base = fadingAvg; 50 | int level = 0; 51 | long count = 0; 52 | if (writeFrequency.containsKey(lba)) { 53 | count = writeFrequency.get(lba); 54 | } 55 | if (base != 0.0) { 56 | while (count > base) { 57 | base *= 10; 58 | level += 1; 59 | } 60 | } 61 | if (level >= numOpenSegments) { 62 | level = numOpenSegments - 1; 63 | } 64 | 65 | return level; 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /trace_replay/src/main/java/gcsimulator/placement/FK.java: -------------------------------------------------------------------------------- 1 | package gcsimulator.placement; 2 | 3 | import gcsimulator.Log; 4 | import gcsimulator.Configs; 5 | import gcsimulator.indexmap.IndexMap; 6 | import gcsimulator.indexmap.IndexMapFactory; 7 | 8 | import java.util.Scanner; 9 | import java.math.BigInteger; 10 | import java.io.File; 11 | import java.io.FileNotFoundException; 12 | 13 | public class FK extends Separator { 14 | public IndexMap lastAccess; 15 | public IndexMap lifespans; 16 | public Scanner reader; 17 | 18 | public FK() {} 19 | 20 | @Override 21 | public void init(Log log, int numOpenSegments) { 22 | super.init(log, numOpenSegments); 23 | 24 | lastAccess = IndexMapFactory.getInstance(Configs.indexMapType, Configs.indexMapCache); 25 | lastAccess.setSize((Configs.volumeMaxLba.get(log.getId()) / 4096 + 1024)); 26 | lifespans = IndexMapFactory.getInstance(Configs.indexMapType, Configs.indexMapCache); 27 | lifespans.setSize((Configs.volumeMaxLba.get(log.getId()) / 4096 + 1024)); 28 | 29 | try { 30 | reader = new Scanner(new File(Configs.oraclePath + "/" + log.getId() + ".oracle")); 31 | } catch (FileNotFoundException e) { 32 | e.printStackTrace(); 33 | } 34 | } 35 | 36 | @Override 37 | public void append(long lba) { 38 | BigInteger tmp = reader.nextBigInteger(10); 39 | long lifespan = 0; 40 | if (tmp.compareTo(BigInteger.valueOf(Long.MAX_VALUE)) == 1) { 41 | lifespan = -1; 42 | } else { 43 | lifespan = tmp.longValue(); 44 | } 45 | if (lifespan == -1) lifespan = Long.MAX_VALUE - log.accessId; 46 | 47 | lastAccess.put(lba, log.accessId); 48 | lifespans.put(lba, lifespan); 49 | } 50 | 51 | @Override 52 | public int classify(boolean isGcAppend, long lba) { 53 | long remainingLifespan = 0; 54 | long level = 0; 55 | 56 | remainingLifespan = (lastAccess.get(lba) + lifespans.get(lba)) - log.accessId; 57 | level = remainingLifespan / Configs.getSegmentMaxLen(); 58 | if (level >= numOpenSegments) level = numOpenSegments - 1; 59 | 60 | return (int)level; 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /trace_replay/src/main/java/gcsimulator/placement/GW.java: -------------------------------------------------------------------------------- 1 | package gcsimulator.placement; 2 | 3 | import gcsimulator.Log; 4 | import gcsimulator.Configs; 5 | import gcsimulator.Simulator; 6 | import gcsimulator.Segment; 7 | import gcsimulator.indexmap.IndexMap; 8 | import gcsimulator.indexmap.IndexMapFactory; 9 | 10 | import java.util.Map; 11 | import java.util.ArrayList; 12 | import java.util.Collections; 13 | import java.util.Random; 14 | 15 | public class GW extends Separator { 16 | public IndexMap lastAccess; 17 | 18 | public long currentUd = 0; 19 | public int currentCollectedTemp = 0; 20 | 21 | public double threshold = Double.MAX_VALUE; 22 | public double totLifespan = 0; 23 | public int numCollectedSegs = 0; 24 | 25 | public GW() {} 26 | 27 | @Override 28 | public void init(Log log, int numOpenSegments) { 29 | super.init(log, numOpenSegments); 30 | 31 | lastAccess = IndexMapFactory.getInstance(Configs.indexMapType, Configs.indexMapCache); 32 | lastAccess.setSize((Configs.volumeMaxLba.get(log.getId()) / 4096 + 1024)); 33 | } 34 | 35 | @Override 36 | public void collectSegment(Segment segment) { 37 | int updateThreshold = 16; 38 | super.collectSegment(segment); 39 | 40 | int temp = segment.meta.temperature; 41 | currentCollectedTemp = temp; 42 | 43 | if (temp == 0) { 44 | totLifespan += (double)log.accessId - segment.meta.createdAccessId; 45 | numCollectedSegs += 1; 46 | 47 | if (numCollectedSegs == updateThreshold) { 48 | threshold = totLifespan / updateThreshold; 49 | numCollectedSegs = 0; 50 | totLifespan = 0; 51 | System.out.println("Log id: " + log.getId() + ", current threshold: " + threshold); 52 | } 53 | } 54 | } 55 | 56 | @Override 57 | public void append(long lba) { 58 | if (lastAccess.containsKey(lba)) { 59 | currentUd = log.accessId - lastAccess.get(lba); 60 | } else { 61 | currentUd = Long.MAX_VALUE; 62 | } 63 | 64 | lastAccess.put(lba, log.accessId); 65 | } 66 | 67 | @Override 68 | public void gcAppend(long lba) { 69 | currentUd = log.accessId - lastAccess.get(lba); 70 | } 71 | 72 | @Override 73 | public int classify(boolean isGcAppend, long lba) { 74 | int level = 0; 75 | 76 | if (!isGcAppend) { 77 | level = 0; 78 | } else { 79 | double age = log.accessId - lastAccess.get(lba); 80 | double base = threshold * 4; 81 | 82 | level = 1; 83 | while (age >= base && level < numOpenSegments - 1) { 84 | base *= 4; 85 | level += 1; 86 | } 87 | } 88 | nValidBlocks[level] += 1; 89 | nTotalBlocks[level] += 1; 90 | 91 | return level; 92 | } 93 | 94 | } 95 | -------------------------------------------------------------------------------- /trace_replay/src/main/java/gcsimulator/placement/MultiLog.java: -------------------------------------------------------------------------------- 1 | // MultiLog stoica.vldb13 2 | package gcsimulator.placement; 3 | 4 | import gcsimulator.indexmap.*; 5 | import gcsimulator.Log; 6 | import gcsimulator.Segment; 7 | import gcsimulator.Configs; 8 | import gcsimulator.Simulator; 9 | 10 | import java.util.Random; 11 | 12 | public class MultiLog extends Separator { 13 | public IndexMap levels; 14 | public IndexMap lastAccess; 15 | public int currentMaxLevel = 0; 16 | public int lastUserWriteLevel = 0; 17 | public int lastGcWriteLevel = 0; 18 | 19 | public Random rand; 20 | 21 | public void MultiLog() {} 22 | 23 | @Override 24 | public void init(Log log, int numOpenSegments) { 25 | super.init(log, numOpenSegments); 26 | 27 | levels = IndexMapFactory.getInstance(Configs.indexMapType, Configs.indexMapCache); 28 | levels.setSize(Configs.volumeMaxLba.get(log.getId()) / 4096 + 1024); 29 | lastAccess = IndexMapFactory.getInstance(Configs.indexMapType, Configs.indexMapCache); 30 | lastAccess.setSize(Configs.volumeMaxLba.get(log.getId()) / 4096 + 1024); 31 | 32 | nValidBlocks = new long[numOpenSegments]; 33 | rand = new Random(); 34 | } 35 | 36 | @Override 37 | public void invalidate(Segment segment, long lba) { 38 | if (segment.meta.temperature >= numOpenSegments) { 39 | return; 40 | } 41 | nValidBlocks[segment.meta.temperature] -= 1; 42 | if (nValidBlocks[segment.meta.temperature] == 0) { 43 | System.out.println("Temperature " + segment.meta.temperature + " has no valid blocks!"); 44 | } 45 | } 46 | 47 | @Override 48 | public void append(long lba) { 49 | long ud = 0; 50 | if (lastAccess.containsKey(lba)) { 51 | ud = log.accessId - lastAccess.get(lba); 52 | } else { 53 | ud = Long.MAX_VALUE; 54 | } 55 | lastAccess.put(lba, log.accessId); 56 | 57 | int level = 0; 58 | if (levels.containsKey(lba)) { 59 | level = Math.toIntExact(levels.get(lba)); 60 | double udExpected = nValidBlocks[level] * (1 - (double) 61 | nInvalidBlocksCollected[level] / nBlocksCollected[level]) / 2.0; 62 | double prob = (udExpected - ud) / udExpected; 63 | if (prob > 0 && level > 0 && rand.nextDouble() < prob) { 64 | levels.put(lba, level - 1); 65 | } 66 | } else { 67 | levels.put(lba, 0); 68 | } 69 | } 70 | 71 | @Override 72 | public void gcAppend(long lba) { 73 | int level = Math.toIntExact(levels.get(lba)); 74 | if (level == currentMaxLevel) { 75 | if (currentMaxLevel + 1 < numOpenSegments && 76 | nValidBlocks[level] >= Configs.getSegmentMaxLen()) { 77 | level += 1; 78 | currentMaxLevel += 1; 79 | } 80 | } else { 81 | level += 1; 82 | } 83 | levels.put(lba, level); 84 | } 85 | 86 | @Override 87 | public int classify(boolean isGcAppend, long lba) { 88 | int level = Math.toIntExact(levels.get(lba)); 89 | nValidBlocks[level] += 1; 90 | nTotalBlocks[level] += 1; 91 | if (!isGcAppend) { 92 | nBlocksWritten[level] += 1; 93 | } 94 | 95 | if (isGcAppend) { 96 | lastUserWriteLevel = level; 97 | } else { 98 | lastGcWriteLevel = level; 99 | } 100 | 101 | return level; 102 | } 103 | } 104 | -------------------------------------------------------------------------------- /trace_replay/src/main/java/gcsimulator/placement/NoSep.java: -------------------------------------------------------------------------------- 1 | package gcsimulator.placement; 2 | 3 | import gcsimulator.Log; 4 | import gcsimulator.Configs; 5 | 6 | public class NoSep extends Separator { 7 | public NoSep() {} 8 | 9 | @Override 10 | public void init(Log log, int numOpenSegments) { 11 | super.init(log, numOpenSegments); 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /trace_replay/src/main/java/gcsimulator/placement/SFR.java: -------------------------------------------------------------------------------- 1 | /* 2 | * AutoStream.systor17: SFR, sequential, frequency, recency; 3 | * Normal writes are separated into four classes (0 - 3); 4 | * GC writes are appended to class 4; 5 | */ 6 | package gcsimulator.placement; 7 | 8 | import gcsimulator.indexmap.*; 9 | import gcsimulator.Log; 10 | import gcsimulator.Segment; 11 | import gcsimulator.Configs; 12 | import gcsimulator.Simulator; 13 | 14 | import java.util.ArrayList; 15 | import java.util.LinkedHashSet; 16 | 17 | public class SFR extends Separator { 18 | public long chunkSize = 512L; // 2MiB 19 | public IndexMap chunkLastTime; 20 | public IndexMap chunkWrites; 21 | public long prevEndLba = 0L; 22 | public int prevLevel = 0; 23 | public int decayPeriod = 16384; // 64MiB writes 24 | public boolean isSequential = false; 25 | 26 | public SFR() {} 27 | 28 | @Override 29 | public void init(Log log, int numOpenSegments) { 30 | super.init(log, numOpenSegments); 31 | 32 | chunkLastTime = IndexMapFactory.getInstance(Configs.indexMapType, Configs.indexMapCache); 33 | chunkLastTime.setSize((Configs.volumeMaxLba.get(log.getId()) / 4096 + 1024) / chunkSize); 34 | chunkWrites = IndexMapFactory.getInstance(Configs.indexMapType, Configs.indexMapCache); 35 | chunkWrites.setSize((Configs.volumeMaxLba.get(log.getId()) / 4096 + 1024) / chunkSize); 36 | } 37 | 38 | public void promote(long chunkId) { 39 | long numWrites, lastTime; 40 | if (chunkWrites.containsKey(chunkId)) { 41 | numWrites = chunkWrites.get(chunkId); 42 | lastTime = chunkLastTime.get(chunkId); 43 | } else { 44 | numWrites = 0; 45 | lastTime = log.accessId; 46 | } 47 | 48 | numWrites /= Math.pow(2, (log.accessId - lastTime) / decayPeriod); 49 | numWrites += 1; 50 | 51 | chunkWrites.put(chunkId, numWrites); 52 | chunkLastTime.put(chunkId, log.accessId); 53 | } 54 | 55 | @Override 56 | public void addRequest(long lba, long length) { 57 | long chunkId = lba / chunkSize; 58 | isSequential = false; 59 | if (lba == prevEndLba) { 60 | isSequential = true; 61 | } 62 | 63 | prevEndLba = lba + length; 64 | for (; chunkId * chunkSize < lba + length; ++chunkId) { 65 | promote(chunkId); 66 | } 67 | } 68 | 69 | @Override 70 | public int classify(boolean isGcAppend, long lba) { 71 | if (isGcAppend) { 72 | return numOpenSegments - 1; 73 | } else { 74 | int level = 0; 75 | if (isSequential) { 76 | level = prevLevel; 77 | } else { 78 | long chunkId = lba / chunkSize; 79 | long numWrites = chunkWrites.get(chunkId); 80 | level = (int)Math.log((double)numWrites); 81 | if (level >= numOpenSegments - 1) { 82 | level = numOpenSegments - 2; 83 | } 84 | prevLevel = level; 85 | } 86 | return level; 87 | } 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /trace_replay/src/main/java/gcsimulator/placement/SepGC.java: -------------------------------------------------------------------------------- 1 | package gcsimulator.placement; 2 | 3 | import gcsimulator.Log; 4 | import gcsimulator.Configs; 5 | 6 | public class SepGC extends Separator { 7 | 8 | public SepGC() {} 9 | 10 | @Override 11 | public void init(Log log, int numOpenSegments) { 12 | super.init(log, numOpenSegments); 13 | } 14 | 15 | @Override 16 | public int classify(boolean isGcAppend, long Lba) { 17 | if (isGcAppend) return 1; 18 | else return 0; 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /trace_replay/src/main/java/gcsimulator/placement/SeparatorFactory.java: -------------------------------------------------------------------------------- 1 | package gcsimulator.placement; 2 | 3 | import gcsimulator.Log; 4 | 5 | public class SeparatorFactory { 6 | public static Separator getInstance(String name) { 7 | System.out.println("Sep: " + name); 8 | switch (name) { 9 | case "NoSep": 10 | return new NoSep(); 11 | case "SepGC": 12 | return new SepGC(); 13 | case "FADaC": // fadac.systor19 14 | return new FADaC(); 15 | case "WARCIP": // warcip.systor19 16 | return new Warcip(); 17 | case "MultiQueue": // autostream.systor19 (mq) 18 | return new MultiQueue(); 19 | case "SFR": // autostream.systor19 (sfr) 20 | return new SFR(); 21 | case "UW": 22 | return new UW(); 23 | case "GW": 24 | return new GW(); 25 | case "SepBIT": 26 | return new SepBIT(); 27 | case "DAC": 28 | return new DAC(); 29 | case "ETI": 30 | return new ETI(); 31 | case "SFS": 32 | return new SFS(); 33 | case "MultiLog": 34 | return new MultiLog(); 35 | case "FK": 36 | return new FK(); 37 | case "Method1": 38 | return new Method1(); 39 | case "Method2": 40 | return new Method2(); 41 | case "BITGW": 42 | return new BITGW(); 43 | case "BITHalf": 44 | return new BITHalf(); 45 | case "BITDouble": 46 | return new BITDouble(); 47 | } 48 | 49 | return null; 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /trace_replay/src/main/java/gcsimulator/placement/UW.java: -------------------------------------------------------------------------------- 1 | package gcsimulator.placement; 2 | 3 | import gcsimulator.Log; 4 | import gcsimulator.Configs; 5 | import gcsimulator.Simulator; 6 | import gcsimulator.Segment; 7 | import gcsimulator.indexmap.IndexMap; 8 | import gcsimulator.indexmap.IndexMapFactory; 9 | import gcsimulator.fifo.OnDiskFIFO; 10 | 11 | import java.util.Map; 12 | import java.util.ArrayList; 13 | 14 | public class UW extends Separator { 15 | public IndexMap lastAccess; 16 | 17 | // used to illustrate the memory overhead of SepBIT with simulation 18 | public OnDiskFIFO fifo; 19 | public IndexMap lba2fifo; 20 | // 21 | 22 | public long currentUd = 0; 23 | public int currentCollectedTemp = 0; 24 | 25 | public double threshold = Double.MAX_VALUE; 26 | public double totLifespan = 0; 27 | public int numCollectedSegs = 0; 28 | 29 | public long volumeMaxLBA = 0; 30 | 31 | public UW() {} 32 | 33 | @Override 34 | public void init(Log log, int numOpenSegments) { 35 | super.init(log, numOpenSegments); 36 | 37 | lastAccess = IndexMapFactory.getInstance(Configs.indexMapType, Configs.indexMapCache); 38 | lastAccess.setSize((Configs.volumeMaxLba.get(log.getId()) / 4096 + 1024)); 39 | 40 | fifo = new OnDiskFIFO(); 41 | fifo.setSize((Configs.volumeMaxLba.get(log.getId()) / 4096 + 1024)); 42 | 43 | lba2fifo = IndexMapFactory.getInstance(Configs.indexMapType, Configs.indexMapCache); 44 | lba2fifo.setSize((Configs.volumeMaxLba.get(log.getId()) / 4096 + 1024)); 45 | 46 | } 47 | 48 | @Override 49 | public void collectSegment(Segment segment) { 50 | int updateThreshold = 16; 51 | super.collectSegment(segment); 52 | 53 | int temp = segment.meta.temperature; 54 | currentCollectedTemp = temp; 55 | 56 | if (temp == 0) { 57 | totLifespan += (double)log.accessId - segment.meta.createdAccessId; 58 | numCollectedSegs += 1; 59 | 60 | if (numCollectedSegs == updateThreshold) { 61 | threshold = totLifespan / updateThreshold; 62 | numCollectedSegs = 0; 63 | totLifespan = 0; 64 | System.out.println("Log id: " + log.getId() + ", current threshold: " + threshold); 65 | System.out.println("Log id: " + log.getId() + ", current FIFO size: " + fifo.size() + ", num LBA: " + lba2fifo.size()); 66 | } 67 | } 68 | } 69 | 70 | @Override 71 | public int classify(boolean isGcAppend, long lba) { 72 | int level = 0; 73 | 74 | if (!isGcAppend) { 75 | if (log.accessId - lba2fifo.get(lba) < Double.min(threshold, fifo.size())) { 76 | // lbas that do not exist in FIFO will have a value of -1 and thus must exceed the rhs 77 | level = 0; 78 | } else { 79 | level = 1; 80 | } 81 | 82 | lastAccess.put(lba, log.accessId); 83 | lba2fifo.put(lba, log.accessId); 84 | fifo.add(lba); 85 | if (fifo.size() > Double.min(log.getnValidBlocks(), threshold)) { 86 | long accessId = log.accessId - fifo.size() + 1; 87 | long oldLba = fifo.removeFirst(); 88 | if (lba2fifo.get(oldLba) == accessId) { 89 | lba2fifo.put(oldLba, -1L); 90 | } 91 | // amortize the deque overhead to each append 92 | if (fifo.size() > threshold) { 93 | accessId += 1; 94 | oldLba = fifo.removeFirst(); 95 | if (lba2fifo.get(oldLba) == accessId) { 96 | lba2fifo.put(oldLba, -1L); 97 | } 98 | } 99 | } 100 | 101 | addBlocksWritten(level); 102 | } else { 103 | level = 2; 104 | } 105 | nValidBlocks[level] += 1; 106 | nTotalBlocks[level] += 1; 107 | 108 | return level; 109 | } 110 | } 111 | -------------------------------------------------------------------------------- /trace_replay/src/main/java/gcsimulator/selection/Basic.java: -------------------------------------------------------------------------------- 1 | package gcsimulator.selection; 2 | 3 | import gcsimulator.BlockContainer; 4 | 5 | import java.util.*; 6 | 7 | class Basic extends SelectionAlgorithm { 8 | @Override 9 | public SelectionAlgorithm pick(List list) { 10 | return null; 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /trace_replay/src/main/java/gcsimulator/selection/CostBenefit.java: -------------------------------------------------------------------------------- 1 | package gcsimulator.selection; 2 | 3 | import gcsimulator.BlockContainer; 4 | import gcsimulator.Configs; 5 | import gcsimulator.Segment; 6 | 7 | import java.util.Comparator; 8 | import java.util.List; 9 | 10 | /** 11 | */ 12 | public class CostBenefit extends SelectionAlgorithm { 13 | 14 | CostBenefit() { 15 | } 16 | 17 | double score(T object) { 18 | double u = 1.0 - object.getGarbageProportion(); 19 | 20 | double age = object.getAge(); 21 | 22 | if (u == 0.0) return Double.MAX_VALUE; 23 | return (1.0 - u) / u * Math.sqrt(age); 24 | } 25 | 26 | 27 | @Override 28 | public SelectionAlgorithm pick(List list) { 29 | list.sort(Comparator.comparing(this::score, Comparator.reverseOrder())); 30 | return this; 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /trace_replay/src/main/java/gcsimulator/selection/CostHotness.java: -------------------------------------------------------------------------------- 1 | package gcsimulator.selection; 2 | 3 | import gcsimulator.BlockContainer; 4 | import gcsimulator.Configs; 5 | import gcsimulator.Segment; 6 | import gcsimulator.Simulator; 7 | 8 | import java.util.Comparator; 9 | import java.util.List; 10 | 11 | /** 12 | */ 13 | public class CostHotness extends SelectionAlgorithm { 14 | 15 | CostHotness() { 16 | } 17 | 18 | double score(T object) { 19 | double u = 1.0 - object.getGarbageProportion(); 20 | Segment segment = (Segment)object; 21 | 22 | double hotness = (double) segment.meta.totalWriteCount / 23 | ((double)segment.getnValidBlocks() * Simulator.globalTimestampInUs - 24 | segment.meta.totalLastModifiedTime); 25 | 26 | if (u == 0.0) return Double.MAX_VALUE; 27 | return (1.0 - u) / (u * hotness); 28 | } 29 | 30 | 31 | @Override 32 | public SelectionAlgorithm pick(List list) { 33 | list.sort(Comparator.comparing(this::score, Comparator.reverseOrder())); 34 | return this; 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /trace_replay/src/main/java/gcsimulator/selection/Greedy.java: -------------------------------------------------------------------------------- 1 | package gcsimulator.selection; 2 | 3 | import gcsimulator.BlockContainer; 4 | 5 | import java.util.*; 6 | 7 | /** 8 | * Basic Greedy Algorithm: 9 | */ 10 | public class Greedy extends SelectionAlgorithm{ 11 | 12 | public Greedy() { 13 | } 14 | 15 | public SelectionAlgorithm pick(List list) { 16 | list.sort(Comparator.comparing(BlockContainer::getGarbageProportion, Comparator.reverseOrder())); 17 | 18 | return this; 19 | } 20 | 21 | } 22 | -------------------------------------------------------------------------------- /trace_replay/src/main/java/gcsimulator/selection/Lru.java: -------------------------------------------------------------------------------- 1 | package gcsimulator.selection; 2 | 3 | import gcsimulator.BlockContainer; 4 | 5 | import java.util.Comparator; 6 | import java.util.List; 7 | 8 | /** 9 | * LRU Algorithm 10 | */ 11 | public class Lru extends SelectionAlgorithm { 12 | 13 | Lru() { 14 | } 15 | 16 | public SelectionAlgorithm pick(List list) { 17 | list.sort(Comparator.comparing(BlockContainer::getLastAccessedTime)); 18 | return this; 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /trace_replay/src/main/java/gcsimulator/selection/MultiLog.java: -------------------------------------------------------------------------------- 1 | package gcsimulator.selection; 2 | 3 | import gcsimulator.BlockContainer; 4 | import gcsimulator.Log; 5 | import gcsimulator.Segment; 6 | import gcsimulator.Configs; 7 | 8 | import java.util.Comparator; 9 | import java.util.List; 10 | 11 | /** 12 | * LRU Algorithm for MultiLog only 13 | * MultiLog requires to adjust the size of each partition via a customized LRU algorithm 14 | */ 15 | public class MultiLog extends SelectionAlgorithm { 16 | 17 | public MultiLog() { 18 | } 19 | 20 | // Note this function computes W(-(1+alpha) * exp(-(1+alpha))) 21 | double computeLambertW(double alpha) { 22 | return -Math.exp(-0.9 * alpha); 23 | } 24 | 25 | // z1 = 1 + op_1 / s_1 26 | // z2 = 1 + op_2 / s_2 (s_2 = 1 - s_1, op_2 = (op - op_1 * s_1) / s_2) 27 | double computeDerivative(double alpha, double beta, double f1, double s1, double f2, double s2) { 28 | double z1 = 1 + beta / s1, z2 = 1 + (alpha - beta) / s2; 29 | double w1 = computeLambertW(z1 - 1); 30 | double w2 = computeLambertW(z2 - 1); 31 | double result = f1 * w1 / (s1 * (w1 + 1) * (w1 + z1)) - f2 * w2 / (s2 * (w2 + 1) * (w2 + z2)); 32 | return result; 33 | } 34 | 35 | public SelectionAlgorithm pick(List list) { 36 | Segment s = (Segment)list.get(0); 37 | gcsimulator.placement.MultiLog sep = (gcsimulator.placement.MultiLog)(s.getLog().separator); 38 | 39 | boolean exists[] = new boolean[sep.numOpenSegments]; 40 | for (int i = 0; i < sep.numOpenSegments; ++i) exists[i] = false; 41 | 42 | for (T o : list) { 43 | Segment tmpSeg = (Segment) o; 44 | exists[tmpSeg.meta.temperature] = true; 45 | } 46 | 47 | int start = sep.lastUserWriteLevel; 48 | int targetClass = 0; 49 | double minDerivative = Double.MAX_VALUE; 50 | double alpha = Configs.SEGMENT_GARBAGE_PROPORTION / (1 - Configs.SEGMENT_GARBAGE_PROPORTION); 51 | for (int i = 0; i < sep.numOpenSegments; ++i) { 52 | if (!exists[i]) continue; 53 | double beta = sep.getOp(i); 54 | double s1 = sep.getValidBlockPercentage(i); 55 | double s2 = 1 - s1; 56 | double f1 = sep.getWritePercentage(i); 57 | double f2 = 1 - f1; 58 | double derivative = computeDerivative(alpha, beta, s1, f1, s2, f2); 59 | System.out.println("Current considered level: " + i + ", derivative: " + derivative); 60 | System.out.format("%f %f %f %f %f %f\n", alpha, beta, s1, s2, f1, f2); 61 | if (derivative < minDerivative) { 62 | targetClass = i; 63 | minDerivative = derivative; 64 | } 65 | } 66 | 67 | final int target = targetClass; 68 | list.removeIf(o -> ((Segment)o).meta.temperature != target); 69 | list.sort(Comparator.comparing(BlockContainer::getLastAccessedTime)); 70 | 71 | return this; 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /trace_replay/src/main/java/gcsimulator/selection/Random.java: -------------------------------------------------------------------------------- 1 | package gcsimulator.selection; 2 | 3 | import gcsimulator.BlockContainer; 4 | 5 | import java.util.List; 6 | 7 | /** 8 | * Random Algorithm: 9 | */ 10 | public class Random extends SelectionAlgorithm { 11 | 12 | Random() { 13 | } 14 | 15 | public SelectionAlgorithm pick(List list) { 16 | java.util.Collections.shuffle(list, random); 17 | return this; 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /trace_replay/src/main/java/gcsimulator/selection/RandomGreedy.java: -------------------------------------------------------------------------------- 1 | package gcsimulator.selection; 2 | 3 | import gcsimulator.BlockContainer; 4 | 5 | import java.util.ArrayList; 6 | import java.util.Comparator; 7 | import java.util.List; 8 | 9 | import static gcsimulator.Configs.*; 10 | 11 | /** 12 | * Random Greedy GC Algorithm: 13 | */ 14 | public class RandomGreedy extends SelectionAlgorithm { 15 | private static final int RANDOM_PERCENTAGE = 20; // Choose a small number 16 | 17 | RandomGreedy() { 18 | } 19 | 20 | public SelectionAlgorithm pick(List list) { 21 | return this; 22 | } 23 | 24 | public SelectionAlgorithm pickInternal(List list) { 25 | pickKRandomly(list, Math.max((int)Math.ceil(0.01 * RANDOM_PERCENTAGE * list.size()), 2)); 26 | list.sort(Comparator.comparing(BlockContainer::getGarbageProportion, Comparator.reverseOrder())); 27 | return this; 28 | } 29 | 30 | @Override 31 | public SelectionAlgorithm pickUntilReadBytes(List list, long untilBytes) { 32 | List candidates = new ArrayList<>(list); 33 | list.clear(); 34 | long accumulatedCapacity = 0; 35 | 36 | while (!candidates.isEmpty() && accumulatedCapacity < untilBytes) { 37 | List _candidates = new ArrayList<>(candidates); 38 | pickInternal(_candidates); 39 | T candidate = _candidates.get(0); 40 | accumulatedCapacity += candidate.getnBlocks() * BLOCK_SIZE; 41 | list.add(candidate); 42 | candidates.remove(candidate); 43 | } 44 | return this; 45 | } 46 | 47 | @Override 48 | public SelectionAlgorithm pickFirstK(List list, int k) { 49 | if (k < 0) return this; 50 | List candidates = new ArrayList<>(list); 51 | list.clear(); 52 | 53 | while (!candidates.isEmpty() && list.size() < k) { 54 | List _candidates = new ArrayList<>(candidates); 55 | pickInternal(_candidates); 56 | T candidate = _candidates.get(0); 57 | list.add(candidate); 58 | candidates.remove(candidate); 59 | } 60 | 61 | return this; 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /trace_replay/src/main/java/gcsimulator/selection/SelectionAlgorithmFactory.java: -------------------------------------------------------------------------------- 1 | package gcsimulator.selection; 2 | 3 | import gcsimulator.BlockContainer; 4 | 5 | public class SelectionAlgorithmFactory { 6 | public static SelectionAlgorithm getSelectionAlgorithm(String algorithm) { 7 | switch (algorithm) { 8 | case "Basic": 9 | return new Basic<>(); 10 | case "Lru": 11 | return new Lru<>(); 12 | case "Random": 13 | return new Random<>(); 14 | case "Greedy": 15 | return new Greedy<>(); 16 | case "RandomGreedy": 17 | return new RandomGreedy<>(); 18 | case "WindowGreedy": 19 | return new WindowGreedy<>(); 20 | case "CostBenefit": 21 | return new CostBenefit<>(); 22 | case "MultiLog": 23 | return new MultiLog<>(); 24 | case "CostHotness": 25 | return new CostHotness<>(); 26 | } 27 | return null; 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /trace_replay/src/main/java/gcsimulator/selection/WindowGreedy.java: -------------------------------------------------------------------------------- 1 | package gcsimulator.selection; 2 | 3 | import gcsimulator.BlockContainer; 4 | 5 | import java.util.Comparator; 6 | import java.util.List; 7 | 8 | /** 9 | * Random Greedy GC Algorithm: 10 | */ 11 | public class WindowGreedy extends SelectionAlgorithm { 12 | private static final int WINDOW_PERCENTAGE = 90; 13 | 14 | WindowGreedy() { 15 | } 16 | 17 | public SelectionAlgorithm pick(List list) { 18 | list.sort(Comparator.comparing(BlockContainer::getAge, Comparator.reverseOrder())); 19 | pickFirstK(list, (int)Math.ceil(0.01 * WINDOW_PERCENTAGE * list.size())); 20 | list.sort(Comparator.comparing(BlockContainer::getGarbageProportion, Comparator.reverseOrder())); 21 | 22 | return this; 23 | } 24 | } 25 | --------------------------------------------------------------------------------