├── .github └── workflows │ └── compile.yml ├── .gitignore ├── .gitmodules ├── CMakeLists.txt ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── MAINTAINERS.md ├── README.md ├── README_cn.md ├── ROADMAP.md ├── S5bd ├── CMakeLists.txt ├── README.txt ├── doc │ ├── S5BD modification for IOPS density.pptx │ ├── S5BD performance improvement.pptx │ ├── S5BD_Line_Up_Enhancement.pptx │ └── s5bd-architecture.docx ├── include │ ├── bitarray.h │ ├── disable_warn.h │ ├── idgenerator.h │ ├── int_types.h │ ├── internal.h │ ├── libs5bd.h │ ├── s5_context.h │ ├── s5_meta.h │ ├── s5aiocompletion.h │ ├── s5imagectx.h │ ├── s5session.h │ └── tasknode.h ├── src │ ├── bitarray.c │ ├── disable_warn.c │ ├── idgenerator.c │ ├── libs5bd.c │ ├── s5_context.c │ ├── s5aiocompletion.c │ ├── s5imagectx.c │ ├── s5session.c │ └── tasknode.c └── unittest │ ├── CMakeLists.txt │ ├── import_test.data │ ├── libs5bd_api.cpp │ ├── libs5bd_conductor_test.cpp │ ├── line_up_test.cpp │ ├── performance_test.cpp │ ├── rt_info.c │ ├── rt_info.h │ ├── test_libs5bd.cpp │ ├── test_memory_item.cpp │ └── test_memory_leak.cpp ├── build_and_run.txt ├── common ├── CMakeLists.txt ├── README.txt ├── include │ ├── atomic_op.h │ ├── basetype.h │ ├── broker.h │ ├── cmdopt.h │ ├── cxxopts.hpp │ ├── hash │ │ ├── hashfunc.h │ │ ├── hashtable.h │ │ └── murmur.h │ ├── md5.h │ ├── parse_config.h │ ├── pf_aof.h │ ├── pf_aof_cache.h │ ├── pf_app_ctx.h │ ├── pf_buffer.h │ ├── pf_client_api.h │ ├── pf_client_priv.h │ ├── pf_client_store.h │ ├── pf_conf.h │ ├── pf_conf_utils.h │ ├── pf_connection.h │ ├── pf_connection_pool.h │ ├── pf_errno.h │ ├── pf_event_queue.h │ ├── pf_event_thread.h │ ├── pf_fixed_size_queue.h │ ├── pf_ioengine.h │ ├── pf_iotask.h │ ├── pf_ipcdef.h │ ├── pf_list.h │ ├── pf_lmt.h │ ├── pf_lock.h │ ├── pf_log.h │ ├── pf_mempool.h │ ├── pf_message.h │ ├── pf_mq.h │ ├── pf_mq_common.h │ ├── pf_mq_pack_unpack.h │ ├── pf_poller.h │ ├── pf_rdma_connection.h │ ├── pf_socket.h │ ├── pf_socket_impl.h │ ├── pf_strtol.h │ ├── pf_tcp_connection.h │ ├── pf_thread.h │ ├── pf_trace_defs.h │ ├── pf_utf8.h │ ├── pf_utils.h │ ├── pf_volume_type.h │ ├── pf_zk_client.h │ ├── special_vol_mgr.h │ ├── spy.h │ └── sqlite3 │ │ ├── s5_sql.h │ │ └── sqlite3.h ├── log4crc ├── src │ ├── broker.c │ ├── hash │ │ ├── hashtable.c │ │ └── murmur.c │ ├── libs5mq.c │ ├── pf_aof.cpp │ ├── pf_aof_cache.cpp │ ├── pf_aof_test_helper.cpp │ ├── pf_app_ctx.cpp │ ├── pf_buffer.cpp │ ├── pf_client_api.cpp │ ├── pf_client_store.cpp │ ├── pf_cmdopt.c │ ├── pf_conf.cpp │ ├── pf_conf_utils.cpp │ ├── pf_connection.cpp │ ├── pf_connection_pool.cpp │ ├── pf_errno.cpp │ ├── pf_event_queue.cpp │ ├── pf_event_thread.cpp │ ├── pf_ioengine.cpp │ ├── pf_iotask.cpp │ ├── pf_list.cpp │ ├── pf_md5.c │ ├── pf_message.cpp │ ├── pf_mq_pack_unpack.c │ ├── pf_performance_profiler.c │ ├── pf_pfdd.cpp │ ├── pf_poller.cpp │ ├── pf_rdma_connection.cpp │ ├── pf_socket.cpp │ ├── pf_socket_impl.cpp │ ├── pf_spdk_ring.cpp │ ├── pf_strtol.cpp │ ├── pf_tcp_connection.cpp │ ├── pf_thread.cpp │ ├── pf_utf8.cpp │ ├── pf_utils.cpp │ ├── pf_zk_client.cpp │ ├── special_vol_mgr.c │ ├── spy.cpp │ ├── spy_client.cpp │ └── sqlite3 │ │ ├── s5_sql.c │ │ └── sqlite3.c └── unittest │ ├── CMakeLists.txt │ ├── Makefile │ ├── Makefile-socket-c │ ├── clt_socket.c │ ├── common_gtest.cpp │ ├── common_gtest2.cpp │ ├── conf_file │ └── s5unittest.conf │ ├── gtest_conf.cpp │ ├── hash │ ├── main.c │ ├── test.h │ └── timer.h │ ├── s5_autotest_gcov_s5mq.sh │ ├── session_clt_socket.c │ ├── sqlite_db_create.py │ ├── srv_socket.c │ ├── test.db │ ├── test_clt_socket.cpp │ ├── test_cndct.c │ ├── test_s5list.c │ ├── test_s5mq_msg_pack_unpack.c │ ├── test_s5mq_trans_status_by_hb_msg.c │ ├── test_s5session.cpp │ ├── test_s5sql.c │ ├── test_srv_socket.cpp │ ├── test_worker.c │ └── util_test.cpp ├── deploy.md ├── docker-based-develope-env.txt ├── docker ├── Dockerfile ├── Dockerfile.base ├── Dockerfile.base.2204 ├── Dockerfile.dev ├── Dockerfile.dev.2204 ├── apt-ali-sources-2204-arm.list ├── apt-ali-sources-2204.list ├── apt-origin.list ├── build-all.sh ├── build-docker.sh ├── conf │ ├── pf.conf │ ├── pfc.conf │ └── pfs.conf ├── mariadb │ ├── 50-server.cnf │ ├── init_s5metadb.sql │ └── mariadb.cnf ├── restart-pfc.sh ├── restart-pfs.sh ├── run-all.sh ├── sources.list └── zoo.cfg ├── docs ├── BlockDirectlyArchitecture.md ├── aof.md ├── design_ref_cnt.md ├── heart_beat_design.md ├── images │ └── arch.png └── testoutline.md ├── pfs ├── CMakeLists.txt ├── include │ ├── pf_adaptor.h │ ├── pf_atslock.h │ ├── pf_bgtask_manager.h │ ├── pf_bitmap.h │ ├── pf_block_tray.h │ ├── pf_cluster.h │ ├── pf_dispatcher.h │ ├── pf_error_handler.h │ ├── pf_flash_store.h │ ├── pf_iouring_engine.h │ ├── pf_main.h │ ├── pf_md5.h │ ├── pf_redolog.h │ ├── pf_replica.h │ ├── pf_replicator.h │ ├── pf_request.h │ ├── pf_restful_api.h │ ├── pf_scrub.h │ ├── pf_server.h │ ├── pf_spdk.h │ ├── pf_spdk_engine.h │ ├── pf_stat.h │ ├── pf_sync_replicator.h │ ├── pf_tcp_server.h │ ├── pf_threadpool.h │ ├── pf_trace_defs.h │ ├── pf_tray.h │ ├── pf_volume.h │ ├── s5socket.h │ └── s5socket_impl.h ├── log4crc ├── pfs_template.conf └── src │ ├── pf_atslock.cpp │ ├── pf_bgtask_manager.cpp │ ├── pf_bitmap.cpp │ ├── pf_block_tray.cpp │ ├── pf_cluster.cpp │ ├── pf_dispatcher.cpp │ ├── pf_error_handler.cpp │ ├── pf_flash_store.cpp │ ├── pf_iouring_engine.cpp │ ├── pf_main.cpp │ ├── pf_md5.cpp │ ├── pf_rdma_server.cpp │ ├── pf_redolog.cpp │ ├── pf_replica.cpp │ ├── pf_replicator.cpp │ ├── pf_request.cpp │ ├── pf_restful_api.cpp │ ├── pf_restful_server.cpp │ ├── pf_s5message.cpp │ ├── pf_scrub.cpp │ ├── pf_server.cpp │ ├── pf_socket.cpp │ ├── pf_socket_impl.cpp │ ├── pf_spdk_engine.cpp │ └── pf_volume.cpp ├── pre_build_libs ├── centos_7_x86_64 │ ├── libhashtable.a │ ├── libzookeeper_mt.a │ ├── zookeeper.jute.c │ └── zookeeper.jute.h ├── modules │ └── 5.15.0-73-generic │ │ ├── os.txt │ │ └── pfkd.ko ├── ubuntu_20.04_x86_64 │ ├── libhashtable.a │ ├── libzookeeper_mt.a │ ├── zookeeper.jute.c │ └── zookeeper.jute.h ├── ubuntu_22.04_aarch64 │ ├── libhashtable.a │ ├── libzkmt.a │ └── libzookeeper_mt.a └── ubuntu_22.04_x86_64 │ ├── libhashtable.a │ ├── libzookeeper_mt.a │ ├── zookeeper.jute.c │ └── zookeeper.jute.h ├── run-from-docker.txt ├── scripts ├── env.sh ├── osname.sh ├── pfc_supervisor.conf ├── pfs_supervisor.conf └── tar-client-libs.sh ├── sld ├── CMakeLists.txt ├── bdd │ └── bdd.c ├── driver │ ├── Makefile │ ├── s5k_basetype.h │ ├── s5k_blkdev.c │ ├── s5k_conductor.c │ ├── s5k_conductor.h │ ├── s5k_imagectx.c │ ├── s5k_imagectx.h │ ├── s5k_log.h │ ├── s5k_message.h │ ├── s5k_miscdev.c │ ├── s5k_spy.c │ └── s5k_spy.h ├── include │ ├── bdd_log.h │ ├── bdd_message.h │ └── s5ioctl.h └── s5bd-cli │ └── cmd_parse.c └── testing ├── create_vol.sh ├── startvm.sh ├── test_1.sh ├── test_2_store_error.sh ├── test_3_recovery.sh ├── test_4_move_replica.sh ├── test_5_aof_io.sh ├── test_6_recovery_snapshot.sh ├── test_7_restart.sh ├── test_fio.sh ├── test_vm.sh └── utils.sh /.github/workflows/compile.yml: -------------------------------------------------------------------------------- 1 | name: Docker Image CI 2 | 3 | on: 4 | push: 5 | branches: [ "master" ] 6 | pull_request: 7 | branches: [ "master" ] 8 | 9 | jobs: 10 | 11 | build: 12 | 13 | runs-on: ubuntu-latest 14 | 15 | steps: 16 | - uses: actions/checkout@v4 17 | - name: Build the Docker image 18 | run: docker build docker --file docker/Dockerfile --tag my-image-name:$(date +%s) --build-arg PFREPO=github 19 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | build 2 | cmake-build-debug 3 | /ninja-build 4 | .vs 5 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "thirdParty/zookeeper"] 2 | path = thirdParty/zookeeper 3 | url = https://gitee.com/cocalele/zookeeper.git 4 | [submodule "thirdParty/mongoose"] 5 | path = thirdParty/mongoose 6 | url = https://gitee.com/cocalele/mongoose.git 7 | branch = 6.7.1 8 | [submodule "pfs/nlohmann_json"] 9 | path = pfs/nlohmann_json 10 | url = https://gitee.com/cocalele/json.git 11 | [submodule "thirdParty/isa-l_crypto"] 12 | path = thirdParty/isa-l_crypto 13 | url = https://gitee.com/mirrors/isa-l_crypto.git 14 | [submodule "thirdParty/spdk"] 15 | path = thirdParty/spdk 16 | url = https://gitee.com/mirrors/spdk.git 17 | [submodule "thirdParty/sg3_utils"] 18 | path = thirdParty/sg3_utils 19 | url = https://gitee.com/mirrors/sg3_utils 20 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | ## Community Code of Conduct 2 | 3 | This project follows the [CNCF Code of Conduct](https://github.com/cncf/foundation/blob/master/code-of-conduct.md). -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | ### Contributing to ViveNAS 2 | 3 | - Please certify your [Developer Certificate of Origin (DCO)](https://developercertificate.org/), 4 | by signing off your commit with `git commit -s` and with your real name. 5 | 6 | - Please squash commits in meaningful granularity. -------------------------------------------------------------------------------- /MAINTAINERS.md: -------------------------------------------------------------------------------- 1 | ## Overview 2 | 3 | This document contains a list of maintainers in this repo. 4 | 5 | ## Current Maintainers 6 | 7 | | Maintainer | GitHub ID | Email | 8 | |---------------------| --------------------------------------------------------- | ----------------------- | 9 | | Lele Liu | [cocalele](https://github.com/cocalele) | cocalele@gmail.com | -------------------------------------------------------------------------------- /ROADMAP.md: -------------------------------------------------------------------------------- 1 | # Roadmap 2 | 3 | This document provides information on PureFlash development in current and upcoming releases. Community and contributor involvement is vital for successfully implementing all desired items for each release. We hope that the items listed below will inspire further engagement from the community to keep PureFlash progressing and shipping exciting and valuable features. 4 | 5 | PureFlash follows a lean project management approach by splitting the development items into current, near term and future categories. 6 | 7 | ## Completed 8 | The following features have been completed and included in version 1.8.2 9 | - Volume in replication mode, supports 1, 2 or 3 replicas 10 | - Snapshot 11 | - Failover between replicas 12 | - Cluster manager, node discovery and state monitoring 13 | - TCP protocol supporting 14 | - qemu, fio, nbd, iSCSI integration 15 | - Manually data balance 16 | - Manually data recovery after failure 17 | - aio and io_uring engine to access NVMe SSD 18 | - AOF(Append Only File) API, to use volume as AOF 19 | - Client multiple path and auto switch on network failure 20 | 21 | ## Current 22 | This is the features in developing: 23 | - heartbeat between store nodes, client and store nodes 24 | - deploy with k8s operator 25 | - MetaDB HA with help of k8s 26 | - CSI driver 27 | 28 | 29 | ## Near Term 30 | 31 | Typically the items under this category fall under next major release (after the current. e.g 1.9.0). To name a few backlogs (not in any particular order) on the near-term radar, where we are looking for additional help: 32 | - Auto balance & recovery 33 | - Resource group, to seperate physical disk into group 34 | - RDMA protocol 35 | - Snapshot consistency group 36 | - NoF interface 37 | - Black list 38 | - Multi-Queue to access NVMe SSD 39 | - OpenStack Cinder driver 40 | 41 | ## Future 42 | As the name suggests this bucket contains items that are planned for future. 43 | - QoS and client SLA 44 | - EC & Dedup 45 | - Stretch Cluster 46 | - Remote disaster redundancy in async mode 47 | - Volume Clone 48 | - support to use NVMe-SSD, HDD, SMR-HDD, ZNS-SSD, Tape as under layer media. 49 | - Support Erasure Code on all type of medias 50 | - Quickly failover with help of sharable CXL memory pool 51 | - Support to use CXL memory pool as a distributed cache 52 | 53 | # Getting involved with Contributions 54 | 55 | We are always looking for more contributions. If you see anything above that you would love to work on, we welcome you to become a contributor and maintainer of the areas that you love. You can get started by commenting on the related issue or by creating a new issue. 56 | 57 | #Release planning 58 | - v1.8.2, the latest release 59 | - v1.9.0 at 2023.12, all features in 'Current' stage will be included -------------------------------------------------------------------------------- /S5bd/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8) 2 | if(COMMAND cmake_policy) 3 | cmake_policy(SET CMP0015 NEW) 4 | endif() 5 | 6 | PROJECT(s5bd) 7 | 8 | 9 | set(S5COMMON_SRC ${s5bd_SOURCE_DIR}/../Common) 10 | set(S5MANAGER_SRC ${s5bd_SOURCE_DIR}/../S5manager) 11 | 12 | set(CMAKE_C_FLAGS "-Wall -Wconversion -fPIC -I/usr/include ${C_FLAG_GCOV}") 13 | #message ("\n ***** C_FLAG_GCV=${C_FLAG_GCOV} *****\n") 14 | set(CMAKE_C_FLAGS_DEBUG "-O0 -g ") 15 | 16 | set(CMAKE_CXX_FLAGS "-Wall -Wconversion -fPIC ${CXX_FLAG_GCOV}") 17 | #message ("\n ***** C_FLAG_GCV=${CXX_FLAG_GCOV} *****\n") 18 | set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g ") 19 | set(CMAKE_CXX_COMPILER g++) 20 | 21 | include_directories(${s5bd_SOURCE_DIR}/include) 22 | include_directories(${S5COMMON_SRC}/include) 23 | 24 | link_directories(${S5COMMON_SRC}/lib) 25 | link_directories(/usr/local/lib) 26 | 27 | #add_subdirectory(unittest) 28 | file(GLOB_RECURSE INCS "*.h") 29 | 30 | 31 | add_library(s5bd SHARED 32 | src/libs5bd.c 33 | src/s5imagectx.c 34 | src/s5session.c 35 | src/s5aiocompletion.c 36 | src/tasknode.c 37 | src/idgenerator.c 38 | src/bitarray.c 39 | src/s5_context.c 40 | src/disable_warn.c 41 | ${INCS}) 42 | 43 | target_link_libraries(s5bd s5common pthread log4c ${GCOV}) 44 | #message ("\n ***** GCOV=${GCOV} *****\n") 45 | set_target_properties(s5bd PROPERTIES SKIP_BUILD_RPATH true) 46 | 47 | add_subdirectory(unittest) 48 | 49 | -------------------------------------------------------------------------------- /S5bd/README.txt: -------------------------------------------------------------------------------- 1 | This is S5bd project. Block device driver for S5 storage. -------------------------------------------------------------------------------- /S5bd/doc/S5BD modification for IOPS density.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cocalele/PureFlash/79d05bec816f7cac34cc7d3f33cbc66787fbf3d8/S5bd/doc/S5BD modification for IOPS density.pptx -------------------------------------------------------------------------------- /S5bd/doc/S5BD performance improvement.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cocalele/PureFlash/79d05bec816f7cac34cc7d3f33cbc66787fbf3d8/S5bd/doc/S5BD performance improvement.pptx -------------------------------------------------------------------------------- /S5bd/doc/S5BD_Line_Up_Enhancement.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cocalele/PureFlash/79d05bec816f7cac34cc7d3f33cbc66787fbf3d8/S5bd/doc/S5BD_Line_Up_Enhancement.pptx -------------------------------------------------------------------------------- /S5bd/doc/s5bd-architecture.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cocalele/PureFlash/79d05bec816f7cac34cc7d3f33cbc66787fbf3d8/S5bd/doc/s5bd-architecture.docx -------------------------------------------------------------------------------- /S5bd/include/bitarray.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C), 2014-2015. 3 | * @file 4 | * This file declares the basic operation on the bit array 5 | */ 6 | 7 | #ifndef __BIT_ARRAY_H__ 8 | #define __BIT_ARRAY_H__ 9 | 10 | 11 | #ifdef __cplusplus 12 | extern "C" { 13 | #endif 14 | 15 | #include "basetype.h" 16 | 17 | typedef void *bitarray; 18 | 19 | /** 20 | * Init bit array. 21 | * 22 | * This function is used to init bit array. 23 | * 24 | * @param[in, out] barr init sz as id could be alloced. 25 | * @param[in] length bits to be initlized. 26 | * 27 | * @return 0 on success, negative error code on failure 28 | * @retval 0 success 29 | * @retval -ENOMEM run out of memory. 30 | */ 31 | int bitarray_init(bitarray *barr, int length); 32 | 33 | /** 34 | * Release bit array. 35 | * 36 | * This function is used to release bit array. 37 | * 38 | * @param[in, out] barr bit array to release. 39 | */ 40 | void bitarray_release(bitarray barr); 41 | 42 | /** 43 | * Set bits. 44 | * 45 | * This function is used to set bits. 46 | * 47 | * @param[in] barr bit array to set. 48 | * @param[in] off offset in bit array. 49 | * @param[in] len length of bits to set. 50 | * 51 | * @return bits set success or failed. 52 | * @retval TRUE set bits [offset, offset+len) 53 | * @retval FALSE set no bits. 54 | */ 55 | BOOL bitarray_set(bitarray barr, int off, int len); 56 | 57 | /** 58 | * Reset bits. 59 | * 60 | * This function is used to reset bits. 61 | * 62 | * @param[in] barr bit array to reset. 63 | * @param[in] off offset in bit array. 64 | * @param[in] len length of bits to reset. 65 | */ 66 | void bitarray_reset(bitarray barr, int off, int len); 67 | 68 | #ifdef __cplusplus 69 | } 70 | #endif 71 | 72 | #endif 73 | -------------------------------------------------------------------------------- /S5bd/include/disable_warn.h: -------------------------------------------------------------------------------- 1 | #ifndef _DISABLE_WARN_H__ 2 | #define _DISABLE_WARN_H__ 3 | 4 | 5 | #include 6 | 7 | inline void s5_fd_set(int fd, fd_set *set); 8 | 9 | 10 | #endif 11 | 12 | -------------------------------------------------------------------------------- /S5bd/include/idgenerator.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C), 2014-2015. 3 | * @file 4 | * The file declares the operations for id generator 5 | */ 6 | 7 | #ifndef __ID_GENERATOR_H___ 8 | #define __ID_GENERATOR_H___ 9 | 10 | #ifdef __cplusplus 11 | extern "C" { 12 | #endif 13 | 14 | #ifndef INVALID_ID 15 | #define INVALID_ID -1 16 | #endif 17 | 18 | typedef void *idgenerator; 19 | 20 | /** 21 | * Init id generator. 22 | * 23 | * This function is used to init id generator. 24 | * 25 | * @param[in] sz init sz as id could be alloced. 26 | * @param[in, out] idg idgenerator to be initlized. 27 | * 28 | * @return 0 on success, negative error code on failure 29 | * @retval 0 success 30 | * @retval -ENOMEM run out of memory. 31 | */ 32 | int init_id_generator(size_t sz, idgenerator *idg); 33 | 34 | /** 35 | * Release id generator. 36 | * 37 | * This function is used to release id generator. 38 | * 39 | * @param[in, out] idg idgenerator to release. 40 | * 41 | * @return 0 on success, negative error code on failure 42 | * @retval 0 success 43 | * @retval -EINVAL invalid argument. 44 | */ 45 | int release_id_generator(idgenerator idg); 46 | 47 | /** 48 | * Alloc id. 49 | * 50 | * This function is used to alloc id. 51 | * 52 | * @param[in, out] idg idgenerator to alloc. 53 | * 54 | * @return 0<=id<=sz on success, INVALID_ID on failure 55 | * @retval 0<=id<=sz success 56 | * @retval INVALID_ID no more id. 57 | */ 58 | int alloc_id(idgenerator idg); 59 | 60 | /** 61 | * Free id. 62 | * 63 | * This function is used to free id. 64 | * 65 | * @param[in] idg id generator. 66 | * @param[in] id id to be free. 67 | */ 68 | void free_id(idgenerator idg, int id); 69 | 70 | 71 | #ifdef __cplusplus 72 | } 73 | #endif 74 | 75 | #endif //__ID_GENERATOR_H___ 76 | -------------------------------------------------------------------------------- /S5bd/include/int_types.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C), 2014-2015. 3 | * @file 4 | * This file defines the common type for s5bd 5 | */ 6 | 7 | #ifndef S5BD_INTTYPES_H 8 | #define S5BD_INTTYPES_H 9 | 10 | #include 11 | //#include "acconfig.h" 12 | 13 | #ifdef HAVE_LINUX_TYPES_H 14 | #include 15 | #endif 16 | 17 | /* 18 | * Get 64b integers either from inttypes.h or glib.h 19 | */ 20 | #ifdef HAVE_INTTYPES_H 21 | # include 22 | //#else 23 | //# ifdef HAVE_GLIB 24 | //# include 25 | //# endif 26 | #endif 27 | 28 | /* 29 | * C99 says inttypes.h includes stdint.h, but that's not true on all 30 | * systems. If it's there, include it always - just in case. 31 | */ 32 | #ifdef HAVE_STDINT_H 33 | #include 34 | #endif 35 | 36 | /* 37 | * Emergency replacements for PRI*64 modifiers. Some systems have 38 | * an inttypes.h that doesn't define all the PRI[doxu]64 macros. 39 | */ 40 | #if !defined(PRIu64) 41 | # if defined(HAVE_INTTYPES_H) || defined(HAVE_GLIB) 42 | /* If we have inttypes or glib, assume we have 64-bit long long int */ 43 | # define PRIu64 "llu" 44 | # define PRIi64 "lli" 45 | # define PRIx64 "llx" 46 | # define PRIX64 "llX" 47 | # define PRIo64 "llo" 48 | # define PRId64 "lld" 49 | # else 50 | /* Assume that we don't have long long, so use long int modifiers */ 51 | # define PRIu64 "lu" 52 | # define PRIi64 "li" 53 | # define PRIx64 "lx" 54 | # define PRIX64 "lX" 55 | # define PRIo64 "lo" 56 | # define PRId64 "ld" 57 | # endif 58 | #endif 59 | 60 | #ifdef HAVE_SYS_TYPES_H 61 | #include 62 | #endif 63 | 64 | #ifndef HAVE___U8 65 | typedef uint8_t __u8; 66 | #endif 67 | 68 | #ifndef HAVE___S8 69 | typedef int8_t __s8; 70 | #endif 71 | 72 | #ifndef HAVE___U16 73 | typedef uint16_t __u16; 74 | #endif 75 | 76 | #ifndef HAVE___S16 77 | typedef int16_t __s16; 78 | #endif 79 | 80 | #ifndef HAVE___U32 81 | typedef uint32_t __u32; 82 | #endif 83 | 84 | #ifndef HAVE___S32 85 | typedef int32_t __s32; 86 | #endif 87 | 88 | /* 89 | #ifndef HAVE___U64 90 | typedef uint64_t __u64; 91 | #endif 92 | 93 | #ifndef HAVE___S64 94 | typedef int64_t __s64; 95 | #endif 96 | */ 97 | 98 | #define __bitwise__ 99 | 100 | typedef __u16 __bitwise__ __le16; 101 | typedef __u16 __bitwise__ __be16; 102 | typedef __u32 __bitwise__ __le32; 103 | typedef __u32 __bitwise__ __be32; 104 | //typedef __u64 __bitwise__ __le64; 105 | //typedef __u64 __bitwise__ __be64; 106 | 107 | #endif 108 | -------------------------------------------------------------------------------- /S5bd/include/s5aiocompletion.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C), 2014-2015. 3 | * @file 4 | * This file declares the callback completion data structure, and APIs to operate completions. 5 | */ 6 | 7 | #ifndef __S5AIOCOMPLETION_H__ 8 | #define __S5AIOCOMPLETION_H__ 9 | 10 | #ifdef __cplusplus 11 | extern "C" { 12 | #endif 13 | 14 | #include "basetype.h" 15 | #include "s5imagectx.h" 16 | #include "libs5bd.h" 17 | 18 | /** 19 | * Declares the callback function format. 20 | */ 21 | typedef s5bd_callback_t callback_t; 22 | 23 | /** 24 | * Define the Async completion 25 | */ 26 | typedef struct s5_aiocompletion 27 | { 28 | pthread_mutex_t mutex; ///< The mutex for pthread_cond_t 29 | pthread_cond_t cond; ///< The conditional varable for notify completion is done 30 | volatile BOOL done; ///< Whether the aiocompletion is done or not 31 | uint32 nlba; ///< The number of LBA_LENGTH which this completion is waiting for. 32 | uint32 filled; ///< The number of finished nlba. 33 | callback_t complete_cb; ///< The callback_t for this completion. 34 | void *complete_arg; ///< The argument of complete_cb. 35 | BOOL sync_or_not; 36 | int status; ///< Return status of IO, 0 for success, other value means Error 37 | } s5_aiocompletion_t; 38 | 39 | /** 40 | * Create async IO completion 41 | * 42 | * This function will create an async completion function, according to the input argument and callback. 43 | * 44 | * @param[in] cb_arg The input argument for creating aio completion. 45 | * @param[in] cb_complete The user specified callback function. 46 | * @return The pointer to created async IO completion. This pointer should be maintained by user, and deleted by using s5_aio_release_completion. 47 | */ 48 | s5_aiocompletion_t *s5_aio_create_completion(void *cb_arg, callback_t cb_complete, BOOL sync_or_not); 49 | 50 | /** 51 | * Delete one async IO completion 52 | * 53 | * This function will delete an async completion function. 54 | * 55 | * @param[in] aiocompletion The pointer to an existing aio completion. 56 | * @return No return. 57 | */ 58 | void s5_aio_release_completion(s5_aiocompletion_t* aiocompletion); 59 | 60 | /** 61 | * Wait for the aio completion done. 62 | * 63 | * This function will return 0 until aio completion is done, otherwise, keep waiting. 64 | * 65 | * @param[in] aiocompletion The pointer to an existing aio completion. 66 | * @return 0 Success 67 | */ 68 | int s5_aiocompletion_wait_for_complete(s5_aiocompletion_t* aiocompletion); 69 | 70 | /** 71 | * Notify the aio completion done. 72 | * 73 | * This function will notify to the user: input aiocompletion is done. 74 | * 75 | * @param[in] aiocompletion The pointer to an existing aio completion. 76 | * @return No return. 77 | */ 78 | void s5_aiocompletion_complete(s5_aiocompletion_t* aiocompletion); 79 | 80 | /** 81 | * Return the byte of finished operation data length. 82 | * 83 | * Each IO request will have a user expected operation data length. This function will return the finished operation data length. 84 | * 85 | * @param[in] aiocompletion The pointer to an existing aio completion. 86 | * @return The byte number of finished operation data length. 87 | */ 88 | ssize_t s5_aiocompletion_get_return_value(s5_aiocompletion_t* aiocompletion); 89 | 90 | #ifdef __cplusplus 91 | } 92 | #endif 93 | 94 | #endif 95 | -------------------------------------------------------------------------------- /S5bd/include/tasknode.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C), 2014-2015. 3 | * @file 4 | * This file declares the s5 uint node, and s5 block node. 5 | */ 6 | 7 | #ifndef __TASKNODE_H__ 8 | #define __TASKNODE_H__ 9 | 10 | #ifdef __cplusplus 11 | extern "C" { 12 | #endif 13 | 14 | #include "s5aiocompletion.h" 15 | #include "s5imagectx.h" 16 | #include "s5message.h" 17 | #include "bitarray.h" 18 | #include "s5_meta.h" 19 | 20 | #ifndef SLOT_SIZE 21 | /** 22 | * brief Macro defines the slot number 23 | */ 24 | #define SLOT_SIZE (S5_OBJ_LEN/LBA_LENGTH) 25 | #endif 26 | 27 | enum NODEFLAG 28 | { 29 | //unitnode 30 | NODE_AIO_WRITE, 31 | NODE_AIO_READ, 32 | 33 | //blocknode 34 | NODE_IDLE, 35 | NODE_BLOCKED, 36 | NODE_UNBLOCKED, 37 | }; 38 | 39 | typedef struct s5_unitnode 40 | { 41 | int32 task_id; // Used for message header transaction id 42 | uint32 flag; 43 | uint32 nlba; 44 | uint32 len; // len = nlba * LBA_LENGTH 45 | uint64 ofs; 46 | struct s5_aiocompletion *comp; 47 | struct s5_volume_ctx *ictx; 48 | union 49 | { 50 | char *readbuf; 51 | const char *writedata; 52 | }; 53 | 54 | s5_message_t *msg[MAX_REPLICA_NUM]; 55 | struct s5_unitnode *next; 56 | struct timeval task_start; 57 | } s5_unitnode_t; 58 | 59 | void s5_unitnode_reset(struct s5_unitnode *unode); 60 | 61 | typedef struct s5_unitnode_queue 62 | { 63 | struct s5_unitnode *head; 64 | struct s5_unitnode *tail; 65 | int length; 66 | } s5_unitnode_queue_t; 67 | 68 | void s5_unitnode_queue_init(struct s5_unitnode_queue *queue); 69 | 70 | void s5_unitnode_queue_release(struct s5_unitnode_queue *queue); 71 | 72 | struct s5_unitnode* s5_unitnode_queue_head(struct s5_unitnode_queue *queue); 73 | 74 | struct s5_unitnode* s5_unitnode_queue_tail(struct s5_unitnode_queue *queue); 75 | 76 | int s5_unitnode_queue_length(struct s5_unitnode_queue *queue); 77 | 78 | BOOL s5_unitnode_queue_empty(struct s5_unitnode_queue *queue); 79 | 80 | void s5_unitnode_queue_enqueue(struct s5_unitnode_queue *queue, struct s5_unitnode *unode); 81 | 82 | struct s5_unitnode* s5_unitnode_queue_dequeue(struct s5_unitnode_queue *queue); 83 | 84 | 85 | typedef struct s5_blocknode 86 | { 87 | int32 flag; 88 | uint32 running_num; 89 | struct s5_unitnode_queue readyqueue; 90 | bitarray barr; 91 | } s5_blocknode_t; 92 | 93 | void s5_blocknode_init(struct s5_blocknode* bnode); 94 | 95 | void s5_blocknode_release(struct s5_blocknode* bnode); 96 | 97 | #ifdef __cplusplus 98 | } 99 | #endif 100 | 101 | #endif //__TASKNODE_H__ 102 | 103 | -------------------------------------------------------------------------------- /S5bd/src/disable_warn.c: -------------------------------------------------------------------------------- 1 | #include "disable_warn.h" 2 | 3 | #pragma GCC diagnostic ignored "-Wsign-conversion" 4 | 5 | void s5_fd_set(int fd, fd_set *set) 6 | { 7 | FD_SET(fd, set); 8 | } 9 | 10 | -------------------------------------------------------------------------------- /S5bd/src/idgenerator.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "idgenerator.h" 5 | #include "s5log.h" 6 | 7 | 8 | typedef struct id_entry 9 | { 10 | struct id_entry* next; 11 | } id_entry_t; 12 | 13 | typedef struct idmanager 14 | { 15 | struct id_entry* id_pool; 16 | struct id_entry* id_head; 17 | struct id_entry* id_tail; 18 | size_t sz; 19 | } idmanager_t; 20 | 21 | int init_id_generator(size_t sz, idgenerator *idg) 22 | { 23 | int ret = 0; 24 | int i = 0; 25 | struct idmanager *idm = (struct idmanager*)malloc(sizeof(struct idmanager)); 26 | if(!idm) 27 | { 28 | ret = -ENOMEM; 29 | *idg = NULL; 30 | S5LOG_ERROR("Failed to malloc idmanager."); 31 | goto out; 32 | } 33 | 34 | idm->sz = sz; 35 | idm->id_pool = (struct id_entry*)malloc(sizeof(struct id_entry) * (sz + 1)); 36 | if(!idm->id_pool) 37 | { 38 | ret = -ENOMEM; 39 | *idg = NULL; 40 | S5LOG_ERROR("Failde to malloc id_entry."); 41 | goto release_idm; 42 | } 43 | 44 | for(i = 0; i < sz; ++i) 45 | { 46 | idm->id_pool[i].next = &(idm->id_pool[i + 1]); 47 | } 48 | 49 | idm->id_head = &(idm->id_pool[0]); 50 | idm->id_tail = &(idm->id_pool[sz]); 51 | idm->id_tail->next = NULL; 52 | *idg = (idgenerator)idm; 53 | goto out; 54 | 55 | release_idm: 56 | free(idm); 57 | 58 | out: 59 | return ret; 60 | } 61 | 62 | int release_id_generator(idgenerator idg) 63 | { 64 | struct idmanager *idm = NULL; 65 | if(idg == NULL) 66 | { 67 | return -EINVAL; 68 | } 69 | 70 | idm = (struct idmanager*)idg; 71 | if(idm->id_pool != NULL) 72 | free(idm->id_pool); 73 | free(idm); 74 | 75 | return 0; 76 | } 77 | 78 | int alloc_id(idgenerator idg) 79 | { 80 | struct idmanager *idm = (struct idmanager*)idg; 81 | struct id_entry* p = NULL; 82 | struct id_entry* q = NULL; 83 | 84 | do 85 | { 86 | p = idm->id_head; 87 | q = idm->id_head->next; 88 | if(NULL == q) 89 | return INVALID_ID; 90 | } 91 | while(! __sync_bool_compare_and_swap(&(idm->id_head), p, q)); 92 | 93 | int id = (int)(p - idm->id_pool); 94 | assert(0 <= id && id <= idm->sz); 95 | return id; 96 | } 97 | 98 | void free_id(idgenerator idg, int id) 99 | { 100 | struct idmanager *idm = (struct idmanager*)idg; 101 | assert(0 <= id && id <= idm->sz); 102 | struct id_entry* q = &(idm->id_pool[id]); 103 | q->next = NULL; 104 | struct id_entry* p = idm->id_tail; 105 | struct id_entry* oldp = p; 106 | 107 | do 108 | { 109 | while(p->next != NULL) 110 | p = p->next; 111 | } 112 | while(! __sync_bool_compare_and_swap(&p->next, NULL, q)); 113 | __sync_bool_compare_and_swap(&(idm->id_tail), oldp, q); 114 | } 115 | -------------------------------------------------------------------------------- /S5bd/src/s5aiocompletion.c: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C), 2014-2015. 3 | * @file 4 | * This file implements the APIs to operate completions. 5 | */ 6 | 7 | #include 8 | #include "s5aiocompletion.h" 9 | 10 | void s5_aiocompletion_set_complete_cb(s5_aiocompletion_t* aiocompletion, void *cb_arg, callback_t cb) 11 | { 12 | aiocompletion->complete_cb = cb; 13 | aiocompletion->complete_arg = cb_arg; 14 | } 15 | 16 | void s5_aiocompletion_init(s5_aiocompletion_t* aiocompletion, BOOL sync_or_not) 17 | { 18 | aiocompletion->done = FALSE; 19 | aiocompletion->sync_or_not = sync_or_not; 20 | aiocompletion->status = 0; 21 | pthread_mutex_init(&aiocompletion->mutex, NULL); 22 | pthread_cond_init(&aiocompletion->cond, NULL); 23 | } 24 | 25 | void s5_aiocompletion_destroy(s5_aiocompletion_t* aiocompletion) 26 | { 27 | pthread_mutex_destroy(&aiocompletion->mutex); 28 | pthread_cond_destroy(&aiocompletion->cond); 29 | 30 | free(aiocompletion); 31 | } 32 | 33 | s5_aiocompletion_t *s5_aio_create_completion(void *cb_arg, callback_t cb_complete, BOOL sync_or_not) 34 | { 35 | s5_aiocompletion_t *c = (s5_aiocompletion_t*)malloc(sizeof(s5_aiocompletion_t)); 36 | s5_aiocompletion_init(c, sync_or_not); 37 | s5_aiocompletion_set_complete_cb(c, cb_arg, cb_complete); 38 | return c; 39 | } 40 | 41 | void s5_aio_release_completion(s5_aiocompletion_t* aiocompletion) 42 | { 43 | s5_aiocompletion_destroy(aiocompletion); 44 | } 45 | 46 | int s5_aiocompletion_wait_for_complete(s5_aiocompletion_t* aiocompletion) 47 | { 48 | pthread_mutex_lock(&aiocompletion->mutex); 49 | while(!aiocompletion->done) 50 | { 51 | pthread_cond_wait(&aiocompletion->cond, &aiocompletion->mutex); 52 | } 53 | pthread_mutex_unlock(&aiocompletion->mutex); 54 | 55 | return 0; 56 | } 57 | 58 | void s5_aiocompletion_complete(s5_aiocompletion_t* aiocompletion) 59 | { 60 | BOOL tmp_sync_or_not; 61 | pthread_mutex_lock(&aiocompletion->mutex); 62 | if(aiocompletion->complete_cb) 63 | { 64 | aiocompletion->complete_cb(aiocompletion->complete_arg, 65 | aiocompletion->status == 0 ? aiocompletion->filled * LBA_LENGTH : 0);//return length 0 to indicate error 66 | } 67 | aiocompletion->done = TRUE; 68 | tmp_sync_or_not = aiocompletion->sync_or_not; 69 | pthread_cond_signal(&aiocompletion->cond); 70 | pthread_mutex_unlock(&aiocompletion->mutex); 71 | 72 | 73 | if(tmp_sync_or_not == FALSE) 74 | { 75 | s5_aiocompletion_destroy(aiocompletion); 76 | } 77 | 78 | } 79 | 80 | ssize_t s5_aiocompletion_get_return_value(s5_aiocompletion_t* aiocompletion) 81 | { 82 | return aiocompletion->filled * LBA_LENGTH;// LBA_LENGTH to byte 83 | } 84 | 85 | -------------------------------------------------------------------------------- /S5bd/src/tasknode.c: -------------------------------------------------------------------------------- 1 | #include "tasknode.h" 2 | 3 | void s5_unitnode_reset(s5_unitnode_t *unode) 4 | { 5 | unode->task_id = -1; 6 | unode->flag = NODE_IDLE; 7 | unode->nlba = 0; 8 | unode->len = 0; 9 | unode->ofs = 0; 10 | unode->comp = NULL; 11 | unode->ictx = NULL; 12 | unode->readbuf = NULL; 13 | } 14 | 15 | void s5_unitnode_queue_init(s5_unitnode_queue_t* queue) 16 | { 17 | queue->head = NULL; 18 | queue->tail = NULL; 19 | queue->length = 0; 20 | } 21 | 22 | void s5_unitnode_queue_release(s5_unitnode_queue_t* queue) 23 | { 24 | } 25 | 26 | s5_unitnode_t* s5_unitnode_queue_head(s5_unitnode_queue_t* queue) 27 | { 28 | return queue->head; 29 | } 30 | 31 | s5_unitnode_t* s5_unitnode_queue_tail(s5_unitnode_queue_t* queue) 32 | { 33 | return queue->tail; 34 | } 35 | 36 | int s5_unitnode_queue_length(s5_unitnode_queue_t* queue) 37 | { 38 | return queue->length; 39 | } 40 | 41 | BOOL s5_unitnode_queue_empty(s5_unitnode_queue_t* queue) 42 | { 43 | if(queue->length == 0) 44 | return TRUE; 45 | else 46 | return FALSE; 47 | } 48 | 49 | void s5_unitnode_queue_enqueue(s5_unitnode_queue_t* queue, s5_unitnode_t* unode) 50 | { 51 | unode->next = NULL; 52 | if(s5_unitnode_queue_empty(queue)) 53 | { 54 | queue->tail = unode; 55 | queue->head = queue->tail; 56 | } 57 | else 58 | { 59 | queue->tail->next = unode; 60 | queue->tail = unode; 61 | } 62 | ++queue->length; 63 | 64 | return; 65 | } 66 | 67 | s5_unitnode_t* s5_unitnode_queue_dequeue(s5_unitnode_queue_t* queue) 68 | { 69 | s5_unitnode_t* ret = NULL; 70 | if(s5_unitnode_queue_empty(queue)) 71 | { 72 | return NULL; 73 | } 74 | ret = queue->head; 75 | queue->head = queue->head->next; 76 | --queue->length; 77 | ret->next = NULL; 78 | 79 | return ret; 80 | } 81 | 82 | 83 | void s5_blocknode_init(s5_blocknode_t* bnode) 84 | { 85 | bnode->flag = NODE_IDLE; 86 | bnode->running_num = 0; 87 | s5_unitnode_queue_init(&bnode->readyqueue); 88 | bitarray_init(&bnode->barr, SLOT_SIZE); 89 | } 90 | 91 | void s5_blocknode_release(s5_blocknode_t* bnode) 92 | { 93 | bitarray_release(bnode->barr); 94 | s5_unitnode_queue_release(&bnode->readyqueue); 95 | } 96 | 97 | -------------------------------------------------------------------------------- /S5bd/unittest/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | USE_S5LOG() 2 | USE_GTEST() 3 | include_directories(${s5bd_SOURCE_DIR}/include) 4 | include_directories(${S5COMMON_SRC}/include) 5 | include_directories(${CEPH_SRC}/src) 6 | link_directories(${S5COMMON_SRC}/lib) 7 | link_directories(${CEPH_SRC}/src/.libs) 8 | link_directories(/usr/local/lib) 9 | 10 | #add_executable(test_libs5bd test_libs5bd.cpp) 11 | #set_target_properties(test_libs5bd PROPERTIES SKIP_BUILD_RPATH true) 12 | #target_link_libraries(test_libs5bd s5bd s5common gtest log4c) 13 | 14 | #add_executable(test_threadpool test_threadpool.cpp) 15 | #set_target_properties(test_threadpool PROPERTIES SKIP_BUILD_RPATH true) 16 | #target_link_libraries(test_threadpool s5bd gtest log4c) 17 | 18 | #add_executable(test_memory_item test_memory_item.cpp) 19 | #set_target_properties(test_memory_item PROPERTIES SKIP_BUILD_RPATH true) 20 | #target_link_libraries(test_memory_item s5bd gtest log4c) 21 | 22 | #add_executable(performance_test performance_test.cpp) 23 | #set_target_properties(performance_test PROPERTIES SKIP_BUILD_RPATH true) 24 | #target_link_libraries(performance_test s5bd) 25 | 26 | #add_executable(line_up_test line_up_test.cpp) 27 | #set_target_properties(line_up_test PROPERTIES SKIP_BUILD_RPATH true) 28 | #target_link_libraries(line_up_test s5bd) 29 | 30 | add_executable(s5bd_api_unittest libs5bd_api.cpp) 31 | set_target_properties(s5bd_api_unittest PROPERTIES SKIP_BUILD_RPATH true) 32 | target_link_libraries(s5bd_api_unittest s5bd s5common gtest log4c s5manager) 33 | add_custom_target(test_data ALL 34 | COMMAND cp ${CMAKE_CURRENT_SOURCE_DIR}/import_test.data ${CMAKE_BINARY_DIR} 35 | ) 36 | 37 | add_executable(s5bd_memleak_unittest test_memory_leak.cpp rt_info.c) 38 | set_target_properties(s5bd_memleak_unittest PROPERTIES SKIP_BUILD_RPATH true) 39 | target_link_libraries(s5bd_memleak_unittest s5bd s5common gtest log4c s5manager) 40 | 41 | -------------------------------------------------------------------------------- /S5bd/unittest/import_test.data: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cocalele/PureFlash/79d05bec816f7cac34cc7d3f33cbc66787fbf3d8/S5bd/unittest/import_test.data -------------------------------------------------------------------------------- /S5bd/unittest/rt_info.h: -------------------------------------------------------------------------------- 1 | #ifndef __RT_INFO_HEADER__ 2 | #define __RT_INFO_HEADER__ 3 | #ifdef __cplusplus 4 | extern "C"{ 5 | #endif 6 | 7 | #define VMRSS_LINE 15//VMRSS所在行 8 | #define PROCESS_ITEM 14//进程CPU时间开始的项数 9 | 10 | typedef struct //声明一个occupy的结构体 11 | { 12 | unsigned int user; //从系统启动开始累计到当前时刻,处于用户态的运行时间,不包含 nice值为负进程。 13 | unsigned int nice; //从系统启动开始累计到当前时刻,nice值为负的进程所占用的CPU时间 14 | unsigned int system;//从系统启动开始累计到当前时刻,处于核心态的运行时间 15 | unsigned int idle; //从系统启动开始累计到当前时刻,除IO等待时间以外的其它等待时间iowait (12256) 从系统启动开始累计到当前时刻,IO等待时间(since 2.5.41) 16 | }total_cpu_occupy_t; 17 | 18 | typedef struct 19 | { 20 | pid_t pid;//pid号 21 | unsigned int utime; //该任务在用户态运行的时间,单位为jiffies 22 | unsigned int stime; //该任务在核心态运行的时间,单位为jiffies 23 | unsigned int cutime;//所有已死线程在用户态运行的时间,单位为jiffies 24 | unsigned int cstime; //所有已死在核心态运行的时间,单位为jiffies 25 | }process_cpu_occupy_t; 26 | 27 | int get_phy_mem(const pid_t p);//获取占用物理内存 28 | int get_total_mem();//获取系统总内存 29 | unsigned int get_cpu_total_occupy();//获取总的CPU时间 30 | unsigned int get_cpu_process_occupy(const pid_t p);//获取进程的CPU时间 31 | const char* get_items(const char* buffer,int ie);//取得缓冲区指定项的起始地址 32 | 33 | extern double get_pcpu(pid_t p);//获取进程CPU占用 34 | extern double get_pmem(pid_t p);//获取进程内存占用 35 | extern int get_rmem(pid_t p);//获取真实物理内存 36 | 37 | 38 | #ifdef __cplusplus 39 | } 40 | #endif 41 | 42 | #endif 43 | -------------------------------------------------------------------------------- /S5bd/unittest/test_memory_item.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include 6 | #include 7 | #include 8 | #include "threadpool.h" 9 | #include "atomic_op.h" 10 | #include "memory_item.h" 11 | 12 | 13 | int add(const int &a, const int &b) 14 | { 15 | return a + b; 16 | } 17 | 18 | TEST(threadpool, add) 19 | { 20 | EXPECT_EQ(2, add(1, 1)); 21 | EXPECT_EQ(8, add(3, 5)); 22 | } 23 | 24 | enum TestTaskType 25 | { 26 | TEST_TYPE_SEND_TASK = 0, 27 | TEST_TYPE_RECV_TASK, 28 | TEST_TYPE_MAX 29 | }; 30 | 31 | Threadpool* tp[TEST_TYPE_MAX]; 32 | 33 | uint32 get_core_num() 34 | { 35 | int processor_num; 36 | std::ifstream iff; 37 | /// number of logical processors 38 | if(system(" cat /proc/cpuinfo | grep \"processor\" | wc -l>/tmp/processor_count.txt") == -1) 39 | { 40 | std::cerr << " number of logical processors FAILED" << std::endl; 41 | } 42 | iff.open("/tmp/processor_count.txt"); 43 | iff >> processor_num; 44 | iff.close(); 45 | std::remove("/tmp/processor_count.txt"); 46 | 47 | return processor_num; 48 | } 49 | 50 | struct info 51 | { 52 | int id; 53 | int num; 54 | }; 55 | 56 | memory_item *minfo; 57 | 58 | void* send_thread_entry(void *node) 59 | { 60 | int *n = (int*)node; 61 | for(int i = 0; i != *n; ++i) 62 | { 63 | info *mi = minfo->item_alloc(); 64 | while(NULL == mi) 65 | { 66 | printf("warning-alloc info error in send_thread_entry.\n"); 67 | usleep(1000); 68 | mi = minfo->item_alloc(); 69 | } 70 | tp[TEST_TYPE_RECV_TASK]->feed_task(mi); 71 | } 72 | 73 | return NULL; 74 | } 75 | 76 | void* recv_thread_entry(void *node) 77 | { 78 | info *mi = (info*)node; 79 | minfo->item_free(mi); 80 | 81 | return NULL; 82 | } 83 | 84 | int32 init_tpm() 85 | { 86 | uint32 core_num = get_core_num(); 87 | tp[TEST_TYPE_SEND_TASK] = new Threadpool(core_num); 88 | tp[TEST_TYPE_RECV_TASK] = new Threadpool(core_num); 89 | 90 | tp[TEST_TYPE_SEND_TASK]->set_entry_func(send_thread_entry); 91 | tp[TEST_TYPE_RECV_TASK]->set_entry_func(recv_thread_entry); 92 | 93 | minfo = new memory_item; 94 | return 0; 95 | } 96 | 97 | int32 release_tpm() 98 | { 99 | delete tp[TEST_TYPE_SEND_TASK]; 100 | delete tp[TEST_TYPE_RECV_TASK]; 101 | 102 | delete minfo; 103 | return 0; 104 | } 105 | 106 | TEST(memory_item, memory_alloc_free) 107 | { 108 | init_tpm(); 109 | int a[1000]; 110 | for(int i = 0; i != 1000; ++i) 111 | { 112 | a[i] = i; 113 | tp[TEST_TYPE_SEND_TASK]->feed_task(a + i); 114 | } 115 | 116 | sleep(1); 117 | 118 | release_tpm(); 119 | } 120 | 121 | 122 | int main(int argc, char **argv) 123 | { 124 | ::testing::InitGoogleTest(&argc, argv); 125 | return RUN_ALL_TESTS(); 126 | } 127 | -------------------------------------------------------------------------------- /common/README.txt: -------------------------------------------------------------------------------- 1 | This is common project in S5 software. 2 | This project include common and util code like Log, Util, TCP reader/writer. 3 | -------------------------------------------------------------------------------- /common/include/atomic_op.h: -------------------------------------------------------------------------------- 1 | #ifndef __ATOMIC_OP_H__ 2 | #define __ATOMIC_OP_H__ 3 | 4 | #include "basetype.h" 5 | #include "machine.h" 6 | 7 | template 8 | class AtomicType 9 | { 10 | private: 11 | volatile T value_; 12 | 13 | public: 14 | AtomicType(T v = 0) : value_(v) 15 | {} 16 | 17 | AtomicType(AtomicType &v ) 18 | { 19 | value_ = v.value_; 20 | } 21 | 22 | operator T () 23 | { 24 | return MachineExchangeAdd(&value_, 0); 25 | } 26 | 27 | T operator ()() 28 | { 29 | return MachineExchangeAdd(&value_, 0); 30 | } 31 | 32 | 33 | AtomicType& operator = (T v) 34 | { 35 | MachineExchange(&value_, v); 36 | return *this; 37 | } 38 | 39 | AtomicType& operator = (AtomicType& v) 40 | { 41 | MachineExchange(&value_, v.value_); 42 | return *this; 43 | } 44 | 45 | T compare_exchange(T to_exchange, T to_compare) 46 | { 47 | return MachineCompareExchange(&value_, to_exchange, to_compare); 48 | } 49 | 50 | T fetch_and_add(T addend) 51 | { 52 | return MachineExchangeAdd(&value_,addend); 53 | } 54 | 55 | T fetch_and_store(T val) 56 | { 57 | return MachineExchange(&value_, val); 58 | } 59 | 60 | T operator++() 61 | { 62 | // prefix 63 | return MachineIncrement(&value_); 64 | } 65 | 66 | T operator++(int) 67 | { 68 | //postfix 69 | T before(value_); 70 | MachineIncrement(&value_); 71 | return before; 72 | } 73 | 74 | T operator--() 75 | { 76 | // prefix 77 | return MachineDecrement(&value_); 78 | } 79 | 80 | T operator--(int) 81 | { 82 | //postfix 83 | T before(value_); 84 | MachineDecrement(&value_); 85 | return before; 86 | } 87 | 88 | T operator+=(T right) 89 | { 90 | return MachineExchangeAdd(&value_, right); 91 | } 92 | 93 | T operator+=(const AtomicType& right) 94 | { 95 | return MachineExchangeAdd(&value_, right.value_); 96 | } 97 | 98 | T operator-=(T right) 99 | { 100 | return MachineExchangeSub(&value_, right); 101 | } 102 | 103 | T operator-=(AtomicType& right) 104 | { 105 | return MachineExchangeSub(&value_, right.value_); 106 | } 107 | 108 | 109 | int operator == (AtomicType& rhs) 110 | { 111 | return value_ == rhs.value_; 112 | } 113 | 114 | 115 | int operator == (T rhs) 116 | { 117 | return value_ == rhs; 118 | } 119 | 120 | int operator < (AtomicType& rhs) 121 | { 122 | return value_ < rhs.value_; 123 | } 124 | 125 | int operator != (AtomicType& rhs) 126 | { 127 | return value_ != rhs.value_; 128 | } 129 | 130 | int operator >= (AtomicType& rhs) 131 | { 132 | return value_ >= rhs.value_; 133 | } 134 | 135 | int operator > (AtomicType& rhs) 136 | { 137 | return value_ > rhs.value_; 138 | } 139 | 140 | int operator<= (AtomicType& rhs) 141 | { 142 | return value_ <= rhs.value_; 143 | } 144 | 145 | T value() 146 | { 147 | return value_ ; 148 | } 149 | 150 | }; 151 | 152 | typedef AtomicType AtomicInt8; 153 | typedef AtomicType AtomicUInt8; 154 | typedef AtomicType AtomicInt16; 155 | typedef AtomicType AtomicUInt16; 156 | typedef AtomicType AtomicInt32; 157 | typedef AtomicType AtomicUInt32; 158 | typedef AtomicType AtomicInt64; 159 | typedef AtomicType AtomicUInt64; 160 | 161 | #define atomic AtomicType 162 | 163 | 164 | #endif //__ATOMIC_OP_H__ 165 | -------------------------------------------------------------------------------- /common/include/basetype.h: -------------------------------------------------------------------------------- 1 | #ifndef __BASE_TYPE_H__ 2 | #define __BASE_TYPE_H__ 3 | 4 | /** 5 | * Copyright (C), 2014-2020. 6 | * @file 7 | * Base type definitions. 8 | * 9 | * This file includes all base type definition, which are used by S5 modules. 10 | */ 11 | 12 | 13 | #include 14 | #include 15 | 16 | typedef unsigned int BOOL; 17 | #define TRUE 1 18 | #define FALSE 0 19 | 20 | #define SHARD_LBA_CNT (16LL << 20) //a LBA is 4K, 1< 9 | 10 | #include "hashfunc.h" 11 | 12 | HashFunc MurmurHash3_x86_32; 13 | HashFunc MurmurHash3_x86_128; 14 | HashFunc MurmurHash3_x64_128; 15 | 16 | #endif // _MURMURHASH3_H_ 17 | -------------------------------------------------------------------------------- /common/include/md5.h: -------------------------------------------------------------------------------- 1 | #ifndef MD5_H 2 | #define MD5_H 3 | 4 | 5 | #ifdef __cplusplus 6 | extern "C" { 7 | #endif 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | 18 | #define READ_DATA_SIZE 1024 19 | #define MD5_SIZE 16 20 | #define MD5_STR_LEN (MD5_SIZE * 2) 21 | 22 | typedef struct 23 | { 24 | unsigned int count[2]; 25 | unsigned int state[4]; 26 | unsigned char buffer[64]; 27 | } MD5_CTX; 28 | 29 | 30 | #define F(x,y,z) ((x & y) | (~x & z)) 31 | #define G(x,y,z) ((x & z) | (y & ~z)) 32 | #define H(x,y,z) (x^y^z) 33 | #define I(x,y,z) (y ^ (x | ~z)) 34 | #define ROTATE_LEFT(x,n) ((x << n) | (x >> (32-n))) 35 | 36 | #define FF(a,b,c,d,x,s,ac) \ 37 | { \ 38 | a += F(b,c,d) + x + ac; \ 39 | a = ROTATE_LEFT(a,s); \ 40 | a += b; \ 41 | } 42 | #define GG(a,b,c,d,x,s,ac) \ 43 | { \ 44 | a += G(b,c,d) + x + ac; \ 45 | a = ROTATE_LEFT(a,s); \ 46 | a += b; \ 47 | } 48 | #define HH(a,b,c,d,x,s,ac) \ 49 | { \ 50 | a += H(b,c,d) + x + ac; \ 51 | a = ROTATE_LEFT(a,s); \ 52 | a += b; \ 53 | } 54 | #define II(a,b,c,d,x,s,ac) \ 55 | { \ 56 | a += I(b,c,d) + x + ac; \ 57 | a = ROTATE_LEFT(a,s); \ 58 | a += b; \ 59 | } 60 | void MD5Init(MD5_CTX *context); 61 | void MD5Update(MD5_CTX *context, unsigned char *input, unsigned int inputlen); 62 | void MD5Final(MD5_CTX *context, unsigned char digest[16]); 63 | void MD5Transform(unsigned int state[4], unsigned char block[64]); 64 | void MD5Encode(unsigned char *output, unsigned int *input, unsigned int len); 65 | void MD5Decode(unsigned int *output, unsigned char *input, unsigned int len); 66 | int compute_file_md5(const char *file_path, char *md5_str, int buf_len); 67 | 68 | 69 | #ifdef __cplusplus 70 | } 71 | #endif 72 | 73 | #endif 74 | -------------------------------------------------------------------------------- /common/include/parse_config.h: -------------------------------------------------------------------------------- 1 | /* 2 | * ===================================================================================== 3 | * 4 | * Filename: parse_config.h 5 | * 6 | * Description: 7 | * 8 | * Version: 1.0 9 | * Created: 2015年09月14日 15时24分00秒 10 | * Revision: none 11 | * Compiler: gcc 12 | * 13 | * Author: FanXiaoGuang (), solar_ambitious@126.com 14 | * Organization: 15 | * 16 | * ===================================================================================== 17 | */ 18 | #ifndef _PARSE_CONFIG_H__ 19 | #define _PARSE_CONFIG_H__ 20 | 21 | int GetProfileString(const char *profile, const char *AppName, char *KeyName, char *KeyVal ); 22 | 23 | #endif 24 | -------------------------------------------------------------------------------- /common/include/pf_aof.h: -------------------------------------------------------------------------------- 1 | #ifndef pf_aof_h__ 2 | #define pf_aof_h__ 3 | 4 | #include 5 | #include 6 | #include "pf_client_api.h" 7 | class PfAof; 8 | 9 | PfAof* pf_open_aof(const char* volume_name, const char* snap_name, int flags, const char* cfg_filename, int lib_ver); 10 | int pf_aof_access(const char* volume_name, const char* cfg_filename); 11 | int pf_ls_aof_children(const char* tenant_name, const char* cfg_filename, std::vector* result); 12 | int pf_rename_aof(const char* volume_name, const char* new_name, const char* pf_cfg_file); 13 | int pf_delete_aof(const char* volume_name, const char* pf_cfg_file); 14 | 15 | //#define _DATA_DBG 16 | class SimpleCache; 17 | 18 | class PfAof 19 | { 20 | public: 21 | PfClientVolume* volume; 22 | private: 23 | void* append_buf; 24 | off_t append_tail;//append tail in buffer 25 | ssize_t file_len; 26 | // mutable void* read_buf;//a small buffer to read unaligned part 27 | union{ 28 | void* head_buf; 29 | struct PfAofHead* head; 30 | }; 31 | ssize_t append_buf_size; 32 | public: 33 | PfAof(ssize_t append_buf_size = 2 << 20); 34 | ssize_t append(const void* buf, ssize_t len); 35 | ssize_t read(void* buf, ssize_t len, off_t offset) const; 36 | void sync(); 37 | inline ssize_t file_length() { return file_len; } 38 | const char* path(); //return file full path include name 39 | int reader_cnt = 0; 40 | int writer_cnt = 0; 41 | int ref_count = 1; 42 | inline void add_ref() { 43 | __sync_fetch_and_add(&ref_count, 1); 44 | } 45 | inline void dec_ref() { 46 | if (__sync_sub_and_fetch(&ref_count, 1) == 0) { 47 | delete this; 48 | } 49 | } 50 | 51 | private: 52 | ~PfAof(); 53 | int open(); 54 | friend PfAof* pf_open_aof(const char* volume_name, const char* snap_name, int flags, const char* cfg_filename, int lib_ver); 55 | #ifdef _DATA_DBG 56 | int localfd; 57 | #else 58 | int _holder; 59 | #endif 60 | }; 61 | 62 | struct PfAofHead 63 | { 64 | uint32_t magic; 65 | uint32_t version; 66 | uint64_t length; 67 | uint64_t modify_time; 68 | uint64_t access_time; 69 | uint64_t create_time; 70 | }; 71 | #endif // pf_aof_h__ 72 | -------------------------------------------------------------------------------- /common/include/pf_aof_cache.h: -------------------------------------------------------------------------------- 1 | #ifndef pf_aof_cache_h__ 2 | #define pf_aof_cache_h__ 3 | #include 4 | #include 5 | #include //for off_t type 6 | 7 | class PfAof; 8 | 9 | #define SLOT_CNT 8 10 | //#define SLOT_SIZE (PF_MAX_IO_SIZE ) 11 | #define SLOT_SIZE (64<<10) 12 | 13 | class CacheLine 14 | { 15 | public: 16 | //size_t buf_size; //always SLOT_SIZE 17 | off_t off_in_file; 18 | void* buf; 19 | PfAof* aof; 20 | std::shared_mutex lock; 21 | 22 | int hit_cnt; 23 | 24 | int init(PfAof* aof); 25 | size_t pread(void* buf, size_t len, off_t offset/*, int* disk_accessed*/); 26 | int fetch_data(off_t offset); 27 | int bg_fetch_data(off_t offset); 28 | }; 29 | 30 | class AofWindowCache 31 | { 32 | public: 33 | 34 | CacheLine slots[SLOT_CNT]; //a 2M buffer 35 | PfAof* aof; 36 | 37 | bool prefetch; 38 | 39 | int init(PfAof* aof, bool prefetch=false); 40 | size_t pread(void* buf, size_t len, off_t offset); 41 | 42 | 43 | }; 44 | #endif // pf_aof_cache_h__ -------------------------------------------------------------------------------- /common/include/pf_app_ctx.h: -------------------------------------------------------------------------------- 1 | #ifndef pf_app_ctx_h__ 2 | #define pf_app_ctx_h__ 3 | #include 4 | 5 | #include "pf_buffer.h" 6 | #include "pf_mempool.h" 7 | #include "pf_volume_type.h" 8 | #include "pf_client_api.h" 9 | 10 | #include "pf_rdma_connection.h" 11 | 12 | 13 | enum { 14 | AIO, 15 | IO_URING, 16 | SPDK, 17 | }; 18 | 19 | enum RDMA_CQ_PROC_MODEL { 20 | EVENT, 21 | POLLING, 22 | NONE_MODEL, 23 | }; 24 | 25 | class BufferDescriptor; 26 | class PfIoDesc 27 | { 28 | public: 29 | BufferDescriptor* io_cmd; 30 | BufferDescriptor* io_data; 31 | BufferDescriptor* io_reply; 32 | }; 33 | class PfAppCtx 34 | { 35 | public: 36 | int io_desc_count; 37 | BigMemPool cow_buf_pool; 38 | 39 | //BufferPool cmd_pool; 40 | //BufferPool data_pool; 41 | //BufferPool reply_pool; 42 | //BufferPool handshake_pool; 43 | 44 | //ObjectMemoryPool cmd_pool; 45 | //ObjectMemoryPool data_pool; 46 | //ObjectMemoryPool reply_pool; 47 | //ObjectMemoryPool handshake_pool; 48 | 49 | std::string conf_file_name; 50 | conf_file_t conf; 51 | int engine; 52 | RDMA_CQ_PROC_MODEL cq_proc_model; 53 | bool shard_to_replicator = false; 54 | struct PfRdmaDevContext *dev_ctx[MAX_RDMA_DEVICE]; 55 | virtual int PfRdmaRegisterMr(struct PfRdmaDevContext *dev_ctx) = 0 ; 56 | virtual void PfRdmaUnRegisterMr() = 0; 57 | bool rdma_client_only; 58 | PfAppCtx():cow_buf_pool(COW_OBJ_SIZE), engine(AIO), cq_proc_model(EVENT), shard_to_replicator(false) 59 | { 60 | for (int i = 0 ; i < MAX_RDMA_DEVICE; i++) 61 | dev_ctx[i] = NULL; 62 | 63 | rdma_client_only = false; 64 | } 65 | virtual ~PfAppCtx(){} 66 | }; 67 | 68 | extern PfAppCtx* g_app_ctx; 69 | extern bool spdk_engine; 70 | static inline __attribute__((always_inline)) bool spdk_engine_used() 71 | { 72 | return spdk_engine == true; 73 | } 74 | void spdk_engine_set(bool use_spdk); 75 | #endif // pf_app_ctx_h__ 76 | -------------------------------------------------------------------------------- /common/include/pf_buffer.h: -------------------------------------------------------------------------------- 1 | #ifndef _S5_BUFFER_H_ 2 | #define _S5_BUFFER_H_ 3 | /** 4 | * Copyright (C), 2019. 5 | * @endcode GBK 6 | * @file 7 | * һ��buffer����һ���������ڴ档һ�������һ��ָ�����һ�����ȾͿ�������һ���ڴ档��ʹ��RDMA����ʱ 8 | * ����ڴ���Ҫ�������Ϣ����local key, remote key, offset���ҶԲ�ͬ��RDMA�豸��ע�����и��Զ�Ӧ 9 | * ��local key, remote key������������ͬһ��buffer������ͬ���豸ʹ�þ�Ҫ�ṩ��ͬ�ķ���Ҫ�ء������ 10 | * buffer_descriptor ���ڵ����塣buffer_descriptor�����¼�����buffer���ֳ����µķ�����Ϣ�� 11 | * 12 | * buffer �ᰴ����󳤶ȷ��䣬����������������IO��64K byte�� ��ôbufer�������64K byte��Ҳ����buf_size 13 | * ��65536��Ȼ��������һ��4K byte IO����ʱ����Ч���ݵij���,��data_len��4096 14 | */ 15 | 16 | #include "pf_fixed_size_queue.h" 17 | #include 18 | 19 | class BufferPool; 20 | class PfConnection; 21 | 22 | //Work complete status 23 | enum WcStatus { 24 | WC_SUCCESS = 0, 25 | WC_FLUSH_ERR = 5, 26 | }; 27 | const char* WcStatusToStr(WcStatus s); 28 | 29 | //Work request op code 30 | enum WrOpcode { 31 | TCP_WR_SEND = 0, 32 | TCP_WR_RECV = 128, 33 | RDMA_WR_SEND = 129, 34 | RDMA_WR_RECV = 130, 35 | RDMA_WR_WRITE = 131, 36 | RDMA_WR_READ = 132 37 | }; 38 | const char* OpCodeToStr(WrOpcode op) ; 39 | 40 | typedef void(*completion_handler)(int status, int opcode, void* data); 41 | 42 | struct BufferDescriptor 43 | { 44 | WrOpcode wr_op;// work request op code 45 | union { 46 | void* buf; 47 | struct PfMessageHead* cmd_bd; //valid if thie BD used for command 48 | struct PfMessageReply* reply_bd; //valid if this BD used for message reply 49 | }; 50 | int data_len; /// this is the validate data len in the buffer. 51 | union { 52 | struct PfClientIocb *client_iocb; 53 | struct PfServerIocb *server_iocb; 54 | }; 55 | //int(*on_work_complete)(BufferDescriptor* bd, WcStatus complete_status, PfConnection* conn, void* cbk_data); 56 | void* cbk_data; 57 | int buf_capacity; /// this is the size, i.e. max size of buf 58 | struct ibv_mr* mrs[4]; 59 | BufferPool* owner_pool; 60 | PfConnection* conn; 61 | }; 62 | 63 | class BufferPool 64 | { 65 | public: 66 | BufferPool():dma_buffer_used(false){}; 67 | size_t buf_size; 68 | int buf_count; 69 | bool dma_buffer_used; 70 | struct ibv_mr* mrs[4]; 71 | int init(size_t buffer_size, int count); 72 | inline BufferDescriptor* alloc() { return free_bds.dequeue(); } 73 | inline int free(BufferDescriptor* bd){ bd->client_iocb = NULL; return free_bds.enqueue(bd); } 74 | void destroy(); 75 | void* data_buf; 76 | BufferDescriptor* data_bds; 77 | int rmda_register_mr(struct ibv_pd* pd, int idx, int access_mode); 78 | void rmda_unregister_mr(); 79 | private: 80 | PfFixedSizeQueue free_bds; 81 | }; 82 | 83 | struct disp_mem_pool 84 | { 85 | BufferPool cmd_pool; 86 | BufferPool data_pool; 87 | BufferPool reply_pool; 88 | }; 89 | 90 | struct replicator_mem_pool 91 | { 92 | BufferPool cmd_pool; 93 | BufferPool reply_pool; 94 | }; 95 | #endif //_S5_BUFFER_H_ 96 | 97 | -------------------------------------------------------------------------------- /common/include/pf_connection.h: -------------------------------------------------------------------------------- 1 | #ifndef pf_connection_h__ 2 | #define pf_connection_h__ 3 | #include "pf_buffer.h" 4 | 5 | #define CONN_INIT 0 6 | #define CONN_OK 1 7 | #define CONN_CLOSED 2 8 | #define CONN_CLOSING 3 9 | 10 | const char* ConnState2Str(int conn_state); 11 | 12 | 13 | #define PROTOCOL_VER 1 14 | class PfClientVolume; 15 | class PfDispatcher; 16 | class PfVolume; 17 | class PfReplicator; 18 | class PfClientAppCtx; 19 | 20 | enum connection_type { 21 | TCP_TYPE = 0, 22 | RDMA_TYPE = 1 23 | }; 24 | 25 | typedef int(*work_complete_handler)(BufferDescriptor* bd, WcStatus complete_status, PfConnection* conn, void* cbk_data); 26 | class PfConnection 27 | { 28 | public: 29 | static int total_count; 30 | static int closed_count; 31 | static int released_count; 32 | int ref_count = 0; 33 | work_complete_handler on_work_complete; 34 | 35 | union { 36 | PfClientAppCtx* client_ctx; //used in client side 37 | PfVolume* srv_vol; //used in server side 38 | PfReplicator* replicator; 39 | void* master; 40 | }; 41 | PfDispatcher* dispatcher; 42 | int state; 43 | connection_type conn_type; 44 | uint64_t last_heartbeat_time; 45 | int io_depth; 46 | std::string connection_info; 47 | std::string peer_ip; 48 | int peer_port; 49 | int inflying_heartbeat; 50 | int inflying_io; 51 | bool unclean_closed = false; 52 | uint64_t close_time = 0; 53 | PfConnection(); 54 | virtual ~PfConnection(); 55 | virtual int post_recv(BufferDescriptor* buf)=0; 56 | virtual int post_send(BufferDescriptor* buf)=0; 57 | virtual int post_read(BufferDescriptor* buf)=0; 58 | virtual int post_write(BufferDescriptor* buf)=0; 59 | virtual int do_close() = 0; 60 | int close(); 61 | int send_heartbeat(); 62 | 63 | void (*on_close)(PfConnection*); 64 | void (*on_destroy)(PfConnection*); 65 | 66 | inline void add_ref() {__sync_fetch_and_add(&ref_count, 1); 67 | //S5LOG_INFO("add_ref conn:0x%x ref_cnt:%d", this, ref_count); 68 | } 69 | inline void dec_ref() { 70 | if (__sync_sub_and_fetch(&ref_count, 1) == 0) 71 | { 72 | //S5LOG_INFO("dec_ref conn:0x%x ref_cnt:%d", this, ref_count); 73 | if (state == CONN_OK) 74 | { 75 | close(); 76 | } 77 | if(on_destroy) 78 | on_destroy(this); 79 | delete this; 80 | } 81 | } 82 | 83 | inline bool get_throttle() 84 | { 85 | int current_cnt = __sync_fetch_and_add(&inflying_io, 1); 86 | if (current_cnt < io_depth) 87 | return true; 88 | else{ 89 | 90 | //S5LOG_DEBUG(" throttle by infly:%d iod:%d", inflying_io, io_depth); 91 | __sync_fetch_and_sub(&inflying_io, 1); 92 | } 93 | return false; 94 | 95 | return true; 96 | } 97 | inline void put_throttle() 98 | { 99 | //S5LOG_DEBUG("put throttle"); 100 | __sync_fetch_and_sub(&inflying_io, 1); 101 | } 102 | }; 103 | 104 | int parse_net_address(const char* ipv4, unsigned short port, /*out*/struct sockaddr_in* ipaddr); 105 | #endif // pf_connection_h__ 106 | -------------------------------------------------------------------------------- /common/include/pf_connection_pool.h: -------------------------------------------------------------------------------- 1 | #ifndef pf_connection_pool_h__ 2 | #define pf_connection_pool_h__ 3 | #include 4 | #include 5 | #include 6 | #include "pf_connection.h" 7 | 8 | class PfConnection; 9 | class PfPoller; 10 | 11 | typedef void (*conn_close_handler)(PfConnection*); 12 | class PfConnectionPool 13 | { 14 | public: 15 | enum connection_type conn_type; 16 | PfConnectionPool() : pool_size(0){ } 17 | int init(int size, PfPoller* poller, void* owner, uint64_t vol_id, int io_depth, enum connection_type type, work_complete_handler _handler, conn_close_handler close_handler); 18 | PfConnection* get_conn(const std::string& ip, enum connection_type) noexcept ; 19 | void close_all(); 20 | public: 21 | std::map ip_id_map; 22 | std::mutex mtx; 23 | int pool_size; 24 | int io_depth; 25 | PfPoller* poller; 26 | union{ 27 | PfClientVolume* volume; //used in client side 28 | PfReplicator* replicator; 29 | void* owner; 30 | }; 31 | uint64_t vol_id; 32 | work_complete_handler on_work_complete; 33 | conn_close_handler on_conn_closed; 34 | }; 35 | 36 | #endif // pf_connection_pool_h__ 37 | -------------------------------------------------------------------------------- /common/include/pf_errno.h: -------------------------------------------------------------------------------- 1 | #ifndef __S5_ERRNO_H 2 | #define __S5_ERRNO_H 3 | 4 | /** 5 | * Copyright (C), 2014-2015. 6 | * @file 7 | * s5errno type and API definitions. 8 | * 9 | * This file includes all s5error types and interfaces, which are used by S5 modules. 10 | */ 11 | 12 | #include 13 | 14 | #ifdef __cplusplus 15 | #include 16 | std::string cpp_strerror(int err); 17 | #endif 18 | 19 | /** 20 | * S5 error code start from S5_ESTART, and name refer to S5_Exxxx. 21 | */ 22 | #define S5_ESTART 512 ///< S5 error code start. 23 | #define S5_EMISSALIGN (S5_ESTART+0) ///< data address or length not aligned at 4k. 24 | #define S5_ETENANT_ID_NONEXIST (S5_ESTART+1) ///< tenant id is not exist. 25 | #define S5_EQUOTASET_ID_NONEXIST (S5_ESTART+2) ///< quotaset id is not exist. 26 | #define S5_ENEED_INIT (S5_ESTART+3) ///< object need to initialize. 27 | #define S5_ENETWORK_EXCEPTION (S5_ESTART+4) ///< object need to initialize. 28 | #define S5_CONF_ERR (S5_ESTART+5) ///< S5 config file has problem 29 | #define S5_BIND_ERR (S5_ESTART+6) ///< Error occurs when bind port and IP 30 | 31 | #define S5_INTERNAL_ERR (S5_ESTART+7) ///< For errors of S5 which may be caused by unfixed bugs 32 | 33 | #define S5_MQ_ERR (S5_ESTART+100) ///< For errors of S5 which may be caused by s5mq 34 | #define S5_E_NO_MASTER (S5_ESTART+101) ///< There is no master conductor in cluster, 35 | ///< new worker can no register in cluster. 36 | 37 | #endif 38 | -------------------------------------------------------------------------------- /common/include/pf_event_thread.h: -------------------------------------------------------------------------------- 1 | #ifndef pf_event_thread_h__ 2 | #define pf_event_thread_h__ 3 | #include 4 | #include "pf_event_queue.h" 5 | 6 | struct pf_thread_stats { 7 | uint64_t busy_tsc; 8 | uint64_t idle_tsc; 9 | }; 10 | 11 | /** 12 | * Pollers should always return a value of this type 13 | * indicating whether they did real work or not. 14 | */ 15 | enum pf_thread_poller_rc { 16 | PF_POLLER_IDLE, 17 | PF_POLLER_BUSY, 18 | }; 19 | 20 | class PfEventThread 21 | { 22 | public: 23 | pfqueue *event_queue; 24 | pthread_t tid; 25 | char name[32]; 26 | uint16_t poller_id; 27 | uint32_t proceessed_events; 28 | uint64_t tsc_rate; 29 | uint64_t tsc_last; 30 | pf_thread_stats stats; 31 | 32 | int (*func_priv)(int *, void *) = NULL; 33 | void *arg_v; 34 | 35 | bool inited; 36 | bool exiting = false; 37 | int init(const char* name, int queue_depth, uint16_t p_id); 38 | PfEventThread(); 39 | void destroy(); 40 | virtual ~PfEventThread(); 41 | virtual int process_event(int event_type, int arg_i, void* arg_p, void* arg_q) = 0; 42 | int start(); 43 | void stop(); 44 | void * (*thread_proc)(void* arg); 45 | 46 | int sync_invoke(std::function _f); 47 | virtual int commit_batch(){return 0;}; 48 | }; 49 | 50 | typedef void (*pf_event_fn)(void *ctx); 51 | 52 | struct thread_stat { 53 | std::string name; 54 | pthread_t tid; 55 | pf_thread_stats stats; 56 | }; 57 | 58 | struct get_stats_ctx { 59 | pf_event_fn fn; 60 | int next_thread_id; 61 | int num_threads; 62 | std::vector threads; 63 | std::vector thread_stats; 64 | uint64_t now; 65 | }; 66 | 67 | int get_thread_stats(pf_thread_stats *stats); 68 | PfEventThread * get_current_thread(); 69 | #endif // pf_event_thread_h__ 70 | -------------------------------------------------------------------------------- /common/include/pf_ioengine.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2016 Liu Lele(liu_lele@126.com) 3 | * 4 | * This code is licensed under the GPL. 5 | */ 6 | 7 | 8 | #ifndef PUREFLASH_PF_IOENGINE_H 9 | #define PUREFLASH_PF_IOENGINE_H 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | #include "basetype.h" 17 | 18 | class PfFlashStore; 19 | class IoSubTask; 20 | class PfAppCtx; 21 | 22 | #define MAX_AIO_DEPTH 4096 23 | 24 | struct ns_entry; 25 | uint64_t fd_get_cap(int fd); 26 | class PfIoEngine 27 | { 28 | public: 29 | std::string disk_name; 30 | PfIoEngine(const char* name):disk_name(name){}; 31 | virtual int init()=0; 32 | virtual int submit_io(struct IoSubTask* io, int64_t media_offset, int64_t media_len) = 0; 33 | virtual int submit_cow_io(struct CowTask* io, int64_t media_offset, int64_t media_len) = 0; 34 | virtual int submit_batch(){return 0;}; 35 | //virtual int poll_io(int *completions) = 0; 36 | virtual uint64_t sync_read(void *buffer, uint64_t buf_size, uint64_t offset) = 0; 37 | virtual uint64_t sync_write(void *buffer, uint64_t buf_size, uint64_t offset) = 0; 38 | virtual uint64_t get_device_cap() = 0; 39 | virtual ~PfIoEngine(){} 40 | }; 41 | 42 | #define BATCH_IO_CNT 512 43 | class PfAioEngine : public PfIoEngine 44 | { 45 | public: 46 | int fd; 47 | io_context_t aio_ctx; 48 | struct iocb* batch_iocb[BATCH_IO_CNT]; 49 | int batch_io_cnt=0; 50 | PfAppCtx *app_ctx; 51 | public: 52 | PfAioEngine(const char* name, int _fd, PfAppCtx* ctx) :PfIoEngine(name), fd(_fd), app_ctx(ctx) {}; 53 | ~PfAioEngine(); 54 | int init(); 55 | int submit_io(struct IoSubTask* io, int64_t media_offset, int64_t media_len); 56 | int submit_cow_io(struct CowTask* io, int64_t media_offset, int64_t media_len); 57 | virtual int submit_batch(); 58 | std::thread aio_poller; 59 | void polling_proc(); 60 | 61 | uint64_t sync_read(void *buffer, uint64_t buf_size, uint64_t offset); 62 | uint64_t sync_write(void *buffer, uint64_t buf_size, uint64_t offset); 63 | uint64_t get_device_cap(); 64 | //int poll_io(int *completions); 65 | }; 66 | 67 | 68 | BOOL is_disk_clean(PfIoEngine* eng); 69 | 70 | #endif //PUREFLASH_PF_IOENGINE_H 71 | -------------------------------------------------------------------------------- /common/include/pf_iotask.h: -------------------------------------------------------------------------------- 1 | #ifndef pf_iotask_h__ 2 | #define pf_iotask_h__ 3 | 4 | #include 5 | #include 6 | 7 | #include "pf_message.h" 8 | #include "pf_iotask.h" 9 | #include "pf_buffer.h" 10 | 11 | class PfServerIocb; 12 | class PfClientIocb; 13 | struct BufferDescriptor; 14 | struct SubTask; 15 | struct IoSubTask; 16 | #define PF_MAX_SUBTASK_CNT 5 //1 local, 2 sync rep, 1 remote replicating, 1 rebalance 17 | struct PfIocb { 18 | public: 19 | BufferDescriptor* cmd_bd; 20 | BufferDescriptor* data_bd; 21 | BufferDescriptor* reply_bd; 22 | uint32_t task_mask; 23 | 24 | 25 | 26 | SubTask* subtasks[PF_MAX_SUBTASK_CNT]; 27 | }; 28 | 29 | struct TaskCompleteOps 30 | { 31 | 32 | void (*complete)(SubTask* t, PfMessageStatus comp_status); 33 | void (*complete_meta_ver)(SubTask* t, PfMessageStatus comp_status, uint16_t meta_ver); 34 | 35 | }; 36 | 37 | struct SubTask 38 | { 39 | PfOpCode opcode; 40 | //NOTE: any added member should be initialized either in PfDispatcher::init_mempools, or in PfServerIocb::setup_subtask 41 | PfIocb* parent_iocb; 42 | uint64_t rep_id; 43 | uint64_t store_id; 44 | uint32_t task_mask; 45 | uint32_t rep_index; //task_mask = 1 << rep_index; 46 | uint64_t submit_time; 47 | uint64_t reply_time; 48 | PfMessageStatus complete_status; 49 | TaskCompleteOps *ops = NULL; 50 | //virtual PfEventQueue* half_complete(PfMessageStatus comp_status); 51 | 52 | SubTask() :opcode(PfOpCode(0)), parent_iocb(NULL), task_mask(0), rep_index(0), 53 | submit_time(0), reply_time(0), complete_status((PfMessageStatus)0) {} 54 | }; 55 | 56 | struct IoSubTask : public SubTask 57 | { 58 | struct iovec uring_iov; 59 | iocb aio_cb; //aio cb to perform io, union with uring_iov 60 | 61 | IoSubTask* next;//used for chain waiting io 62 | inline void complete_read_with_zero(); 63 | 64 | IoSubTask() :next(NULL) {} 65 | 66 | }; 67 | struct CowTask : public IoSubTask { 68 | off_t src_offset; 69 | off_t dst_offset; 70 | void* buf; 71 | int size; 72 | sem_t sem; 73 | }; 74 | inline void IoSubTask::complete_read_with_zero() { 75 | // PfMessageHead* cmd = parent_iocb->cmd_bd->cmd_bd; 76 | BufferDescriptor* data_bd = parent_iocb->data_bd; 77 | 78 | memset(data_bd->buf, 0, data_bd->data_len); 79 | ops->complete(this, PfMessageStatus::MSG_STATUS_SUCCESS); 80 | } 81 | 82 | #endif // pf_iotask_h__ -------------------------------------------------------------------------------- /common/include/pf_ipcdef.h: -------------------------------------------------------------------------------- 1 | #ifndef __S5IPCDEF__ 2 | #define __S5IPCDEF__ 3 | 4 | /** 5 | * Copyright (C), 2014-2015. 6 | * @file 7 | * s5ipc C API. 8 | * 9 | * This file includes all s5ipc data structures and APIs. 10 | */ 11 | 12 | #ifdef __cplusplus 13 | extern "C" { 14 | #endif 15 | 16 | #include "pf_message.h" 17 | #include 18 | #include 19 | #include 20 | 21 | #define MOE_NOTIFY_ITEMS_MAX 128 ///< moe items count. 22 | #define SEM_NAME_MOE "pf_lock-moe" ///< request lock's name of semaphore. 23 | #define SEM_NAME_MOE_ACK "pf_lock-moe-ack" ///< reply lock's name of semaphore. 24 | 25 | #define MOE_DEFAULT_FILE "/usr/tmp/.pf_shr_moe" ///< ipc's file name. 26 | 27 | /** 28 | * moe share memory type. 29 | */ 30 | typedef enum shm_type 31 | { 32 | SHM_TYPE_MOE = 0, ///< request share mem type. 33 | SHM_TYPE_MOE_ACK = 1, ///< reply share memory type 34 | SHM_TYPE_MAX 35 | } shm_type_t; 36 | 37 | /** 38 | * moe's share memory structure. 39 | */ 40 | typedef struct moe_notify_shm 41 | { 42 | int count; 43 | pf_message_head_t msg_head[MOE_NOTIFY_ITEMS_MAX]; 44 | } moe_notify_shm_t; 45 | 46 | 47 | 48 | /** 49 | * init semaphore lock for protecting moe's share memory. 50 | * 51 | * @param[in] type share memory type. 52 | * @param[in] value the default value of lock. 53 | * @return pointer to sem_t on success. 54 | * @retval NULL SEM_FAILED the result of sem_open(). 55 | */ 56 | sem_t* s5shm_init_lock(shm_type_t type, int value); 57 | 58 | #define s5shm_release_lock(lock) sem_close(lock) ///< release lock. 59 | #define s5shm_wait(lock) sem_wait(lock) ///< wait lock. 60 | #define s5shm_post(lock) sem_post(lock) ///< post lock. 61 | 62 | /** 63 | * open the share memory for type's moe share memory. 64 | * 65 | * the length of share memory is sizeof(moe_notify_shm_t). 66 | * @param[in] type share memory type. 67 | * @return pointer to share memory on success. 68 | * @retval NULL MAP_FAILED the result of mmap(). 69 | */ 70 | void* s5shm_open(shm_type_t type); 71 | 72 | 73 | /** 74 | * close the share memory for type's moe share memory. 75 | * 76 | * @param[in] shrmem pointer to share memory. 77 | * @param[in] len the length of share memory. 78 | * @return the result of munmap(). 79 | */ 80 | int s5shm_close(void* shrmem, size_t len); 81 | 82 | 83 | /** 84 | * init semaphore lock for protecting moe's share memory. 85 | * 86 | * @param[in] start start address of share memory want to sync. 87 | * @param[in] len the length of share memory want to sync. 88 | * @return the result of msync(). 89 | */ 90 | int s5shm_sync(void* start, size_t len); 91 | 92 | 93 | #ifdef __cplusplus 94 | } 95 | #endif 96 | 97 | #endif /*__S5IPCDEF__*/ 98 | 99 | -------------------------------------------------------------------------------- /common/include/pf_lock.h: -------------------------------------------------------------------------------- 1 | #ifndef pf_lock_h__ 2 | #define pf_lock_h__ 3 | #include 4 | 5 | class AutoSpinLock { 6 | pthread_spinlock_t* lock; 7 | public: 8 | inline AutoSpinLock(pthread_spinlock_t* l){ 9 | this->lock = l; 10 | pthread_spin_lock(lock); 11 | } 12 | inline ~AutoSpinLock() { 13 | pthread_spin_unlock(lock); 14 | } 15 | 16 | }; 17 | 18 | class AutoMutexLock { 19 | pthread_mutex_t* lock; 20 | public: 21 | inline AutoMutexLock(pthread_mutex_t* l){ 22 | this->lock = l; 23 | pthread_mutex_lock(lock); 24 | } 25 | inline ~AutoMutexLock() { 26 | pthread_mutex_unlock(lock); 27 | } 28 | 29 | }; 30 | #endif // pf_lock_h__ 31 | -------------------------------------------------------------------------------- /common/include/pf_log.h: -------------------------------------------------------------------------------- 1 | #ifndef _S5LOG_H_ 2 | #define _S5LOG_H_ 3 | 4 | /** 5 | * Copyright (C), 2014-2019. 6 | * @file 7 | * s5log macroes. 8 | * 9 | * This file defines s5log's macroes. 10 | */ 11 | 12 | void s5log(int level, const char * format, ...); 13 | 14 | #define S5LOG_LEVEL_FATAL 0 15 | #define S5LOG_LEVEL_ERROR 1 16 | #define S5LOG_LEVEL_WARN 2 17 | #define S5LOG_LEVEL_INFO 3 18 | #define S5LOG_LEVEL_DEBUG 4 19 | /** 20 | * log the fatal type information. 21 | */ 22 | #define S5LOG_FATAL(fmt,args...) \ 23 | s5log(S5LOG_LEVEL_FATAL, fmt "(%s:%d:%s) " , ##args, __FILE__ , __LINE__ , __FUNCTION__ ) 24 | 25 | /** 26 | * log the error type information. 27 | */ 28 | #define S5LOG_ERROR(fmt,args...) \ 29 | s5log(S5LOG_LEVEL_ERROR, fmt "(%s:%d:%s) " , ##args, __FILE__ , __LINE__ , __FUNCTION__ ) 30 | 31 | /** 32 | * log the warn type information. 33 | */ 34 | #define S5LOG_WARN(fmt,args...) \ 35 | s5log(S5LOG_LEVEL_WARN, fmt "(%s:%d:%s) " , ##args, __FILE__ , __LINE__ , __FUNCTION__ ) 36 | 37 | 38 | /** 39 | * log the info type information. 40 | */ 41 | #define S5LOG_INFO(fmt,args...) \ 42 | s5log(S5LOG_LEVEL_INFO, fmt "(%s:%d:%s) " , ##args, __FILE__ , __LINE__ , __FUNCTION__ ) 43 | 44 | 45 | /** 46 | * log the debug type information. 47 | */ 48 | #define S5LOG_DEBUG(fmt,args...) \ 49 | s5log(S5LOG_LEVEL_DEBUG, fmt "(%s:%d:%s) " , ##args, __FILE__ , __LINE__ , __FUNCTION__ ) 50 | 51 | 52 | #endif //_S5LOG_H_ 53 | -------------------------------------------------------------------------------- /common/include/pf_mempool.h: -------------------------------------------------------------------------------- 1 | #ifndef pf_mempool_h__ 2 | #define pf_mempool_h__ 3 | #include 4 | #include 5 | 6 | #include "pf_fixed_size_queue.h" 7 | #include "pf_lock.h" 8 | #include "spdk/env.h" 9 | 10 | template 11 | class ObjectMemoryPool 12 | { 13 | public: 14 | pthread_spinlock_t lock; 15 | PfFixedSizeQueue free_obj_queue; 16 | U* data; 17 | int obj_count; 18 | 19 | public: 20 | ObjectMemoryPool() :data(NULL), obj_count(0) {} 21 | int init(int cap) { 22 | int rc = 0; 23 | obj_count = cap; 24 | rc = pthread_spin_init(&lock, 0); 25 | if (rc) 26 | return -rc; 27 | rc = free_obj_queue.init(cap); 28 | if(rc) 29 | { 30 | S5LOG_ERROR("Failed init queue in memory pool, rc:%d", rc); 31 | goto release1; 32 | } 33 | data = (U*)calloc(cap, sizeof(U)); 34 | if(data == NULL) 35 | { 36 | rc = -ENOMEM; 37 | S5LOG_ERROR("Failed alloc memory pool, rc:%d, count:%d, size:%d", rc, sizeof(U), cap); 38 | goto release2; 39 | } 40 | for(int i=0;ibuf_size = buf_size; } 102 | void* alloc(size_t s, bool dma_buf) { 103 | if (dma_buf) 104 | return spdk_dma_zmalloc(s, 4096, NULL); 105 | else 106 | return memalign(4096, s); 107 | } 108 | void free(void* p, bool dma_buf) { 109 | if (dma_buf) 110 | spdk_dma_free(p); 111 | else 112 | ::free(p); 113 | } 114 | private: 115 | size_t buf_size; 116 | }; 117 | #endif // pf_mempool_h__ 118 | -------------------------------------------------------------------------------- /common/include/pf_mq_pack_unpack.h: -------------------------------------------------------------------------------- 1 | /* 2 | * ===================================================================================== 3 | * 4 | * Filename: s5mq_pack_unpack.h 5 | * 6 | * Description: 7 | * 8 | * Version: 1.0 9 | * Created: 2015年09月28日 14时20分50秒 10 | * Revision: none 11 | * Compiler: gcc 12 | * 13 | * Author: FanXiaoGuang (), solar_ambitious@126.com 14 | * Organization: 15 | * 16 | * ===================================================================================== 17 | */ 18 | #ifndef __S5MQ_PACK_UNPACk_H_H 19 | #define __S5MQ_PACK_UNPACk_H_H 20 | 21 | #include 22 | int pack_s5msg_to_zmsg(const pf_message_t *s5msg, zmsg_t *zmsg); 23 | int unpack_zmsg_to_s5msg(zmsg_t *zmsg, pf_message_t *s5msg); 24 | 25 | int pack_worker_to_zmsg(const worker_self_t *worker, zmsg_t *zmsg); 26 | int unpack_zmsg_to_worker(zmsg_t *zmsg, worker_self_t *worker); 27 | 28 | int pack_cndct_to_zmsg(const cndct_self_t *cndct, zmsg_t *zmsg); 29 | int unpack_zmsg_to_cndct(zmsg_t *zmsg, cndct_self_t *cndct); 30 | 31 | int pack_mqmsg_head(mq_head_t *msghead, zmsg_t *zmsg); 32 | int unpack_mqmsg_head(zmsg_t *zmsg, mq_head_t *msghead); 33 | 34 | #endif 35 | -------------------------------------------------------------------------------- /common/include/pf_poller.h: -------------------------------------------------------------------------------- 1 | #ifndef pf_poller_h__ 2 | #define pf_poller_h__ 3 | #include 4 | #include 5 | 6 | #include "pf_event_queue.h" 7 | #include "pf_mempool.h" 8 | 9 | typedef void (*epoll_evt_handler)(int fd, uint32_t event, void* user_arg); 10 | struct PollerFd 11 | { 12 | int fd; 13 | struct epoll_event events_to_watch; 14 | epoll_evt_handler handler; 15 | void* cbk_arg; 16 | }; 17 | class PfPoller 18 | { 19 | public: 20 | int epfd; 21 | struct PfEventQueue ctrl_queue; 22 | ObjectMemoryPool desc_pool; 23 | pthread_t tid; 24 | char name[32]; 25 | int max_fd; 26 | struct PfRdmaDevContext *dev_ctx; 27 | 28 | PfPoller(); 29 | ~PfPoller(); 30 | int init(const char* name, int max_fd_count); 31 | int add_fd(int fd, uint32_t events, epoll_evt_handler callback, void* callback_data); 32 | int del_fd(int fd); 33 | void destroy(); 34 | void run(); 35 | static void* thread_entry(void* arg); 36 | private: 37 | int async_add_fd(int fd, uint32_t events, epoll_evt_handler callback, void* callback_data); 38 | int async_del_fd(int fd); 39 | }; 40 | 41 | #endif // pf_poller_h__ 42 | -------------------------------------------------------------------------------- /common/include/pf_tcp_connection.h: -------------------------------------------------------------------------------- 1 | #ifndef pf_tcp_connection_h__ 2 | #define pf_tcp_connection_h__ 3 | #include "pf_message.h" 4 | #include "pf_connection.h" 5 | #include "pf_event_queue.h" 6 | #include "pf_buffer.h" 7 | #include "pf_poller.h" 8 | #include "pf_utils.h" 9 | 10 | class PfPoller; 11 | class PfClientVolume; 12 | class BufferDescriptor; 13 | 14 | class PfTcpConnection : public PfConnection 15 | { 16 | public: 17 | PfTcpConnection(bool is_client); 18 | virtual ~PfTcpConnection(); 19 | virtual int post_recv(BufferDescriptor* buf); 20 | virtual int post_send(BufferDescriptor* buf); 21 | virtual int post_read(BufferDescriptor* buf); 22 | virtual int post_write(BufferDescriptor* buf); 23 | virtual int do_close(); 24 | 25 | int start_send(BufferDescriptor* bd); 26 | int start_send(BufferDescriptor* bd, void* buf); 27 | int start_recv(BufferDescriptor* bd); 28 | int start_recv(BufferDescriptor* bd, void* buf); 29 | 30 | static void on_send_q_event(int fd, uint32_t event, void* c); 31 | static void on_recv_q_event(int fd, uint32_t event, void* c); 32 | static void on_socket_event(int fd, uint32_t event, void* c); 33 | static PfTcpConnection* connect_to_server(const std::string& ip, int port, PfPoller *poller, 34 | uint64_t vol_id, int& io_depth, int timeout_sec); 35 | 36 | int init(int sock_fd, PfPoller *poller, int send_q_depth, int recv_q_depth); 37 | 38 | int socket_fd; 39 | PfPoller *poller; 40 | 41 | void* recv_buf; 42 | int recved_len; ///< how many has received. 43 | int wanted_recv_len; ///< want received length. 44 | BufferDescriptor* recv_bd; 45 | 46 | void* send_buf; 47 | int sent_len; ///< how many has sent 48 | int wanted_send_len; ///< want to send length of data. 49 | BufferDescriptor* send_bd; 50 | 51 | BOOL readable; ///< socket buffer have data to read yes or no. 52 | BOOL writeable; ///< socket buffer can send data yes or no. 53 | 54 | BOOL need_reconnect; 55 | 56 | PfEventQueue recv_q; 57 | PfEventQueue send_q; 58 | 59 | bool is_client; //is this a client side connection 60 | 61 | private: 62 | int do_receive(); 63 | int do_send(); 64 | int rcv_with_error_handle(); 65 | int send_with_error_handle(); 66 | void flush_wr(); 67 | }; 68 | 69 | #endif // pf_tcp_connection_h__ 70 | -------------------------------------------------------------------------------- /common/include/pf_thread.h: -------------------------------------------------------------------------------- 1 | #ifndef pf_thread_h__ 2 | #define pf_thread_h__ 3 | 4 | #include 5 | #include 6 | #include "pf_utils.h" 7 | #include "spdk/nvme.h" 8 | #include "spdk/util.h" 9 | #include "spdk/env.h" 10 | #include "spdk/stdinc.h" 11 | #include "spdk/version.h" 12 | #include "spdk/assert.h" 13 | #include "spdk/init.h" 14 | #include "spdk/log.h" 15 | #include "spdk/thread.h" 16 | #include "spdk/trace.h" 17 | #include "spdk/string.h" 18 | #include "spdk/scheduler.h" 19 | #include "spdk/rpc.h" 20 | #include "spdk/util.h" 21 | #include "spdk/string.h" 22 | #include "rte_mempool.h" 23 | 24 | /* 25 | * try spdk thread furture 26 | */ 27 | 28 | struct pf_thread_context { 29 | bool spdk_engine; 30 | void *(*f)(void *); 31 | void *arg; 32 | pthread_attr_t *a; 33 | pthread_t *t; 34 | bool affinitize; 35 | uint32_t core; 36 | int ret; 37 | }; 38 | 39 | 40 | void pf_thread_create(struct pf_thread_context *tc); 41 | 42 | void pf_thread_init(struct pf_thread_context *tc, void *(*f)(void *), void *arg, bool spdk_engine); 43 | 44 | void pf_thread_ptreahd_att(struct pf_thread_context *tc, pthread_t *tid, pthread_attr_t *a); 45 | 46 | void pf_thread_set_aff(struct pf_thread_context *tc, uint32_t core); 47 | 48 | #endif -------------------------------------------------------------------------------- /common/include/pf_trace_defs.h: -------------------------------------------------------------------------------- 1 | #ifndef pf_trace_defs_h__ 2 | #define pf_trace_defs_h__ 3 | 4 | /* Owner definitions */ 5 | #define OWNER_PFS_CIENT_IO 0x1 6 | 7 | /* Object definitions */ 8 | #define OBJECT_CLIENT_IO 0x1 9 | 10 | /* Trace group definitions */ 11 | #define TRACE_GROUP_CLIENT 0x1 12 | 13 | /* Client io tracepoint definitions */ 14 | 15 | #define TRACE_IO_EVENT_STAT SPDK_TPOINT_ID(TRACE_GROUP_CLIENT, 0x0) 16 | 17 | static inline uint64_t get_us_from_tsc(uint64_t tsc, uint64_t tsc_rate) 18 | { 19 | return tsc * 1000 * 1000 / tsc_rate; 20 | } 21 | 22 | #endif /* SPDK_INTERNAL_TRACE_DEFS */ -------------------------------------------------------------------------------- /common/include/pf_utf8.h: -------------------------------------------------------------------------------- 1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- 2 | // vim: ts=8 sw=2 smarttab 3 | /* 4 | * Ceph - scalable distributed file system 5 | * 6 | * Copyright (C) 2011 New Dream Network 7 | * 8 | * This is free software; you can redistribute it and/or 9 | * modify it under the terms of the GNU Lesser General Public 10 | * License version 2.1, as published by the Free Software 11 | * Foundation. See file COPYING. 12 | * 13 | */ 14 | 15 | #ifndef __S5_UTF8_H 16 | #define __S5_UTF8_H 17 | 18 | #ifdef __cplusplus 19 | extern "C" { 20 | #endif 21 | 22 | /* Checks if a buffer is valid UTF-8. 23 | * Returns 0 if it is, and one plus the offset of the first invalid byte 24 | * if it is not. 25 | */ 26 | int check_utf8(const char *buf, int len); 27 | 28 | /* Checks if a null-terminated string is valid UTF-8. 29 | * Returns 0 if it is, and one plus the offset of the first invalid byte 30 | * if it is not. 31 | */ 32 | int check_utf8_cstr(const char *buf); 33 | 34 | /* Returns true if 'ch' is a control character. 35 | * We do count newline as a control character, but not NULL. 36 | */ 37 | int is_control_character(int ch); 38 | 39 | /* Checks if a buffer contains control characters. 40 | */ 41 | int check_for_control_characters(const char *buf, int len); 42 | 43 | /* Checks if a null-terminated string contains control characters. 44 | */ 45 | int check_for_control_characters_cstr(const char *buf); 46 | 47 | #ifdef __cplusplus 48 | } 49 | #endif 50 | 51 | #endif 52 | -------------------------------------------------------------------------------- /common/include/pf_zk_client.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by lele on 10/26/19. 3 | // 4 | 5 | #ifndef PUREFLASH_S5_ZK_CLIENT_H 6 | #define PUREFLASH_S5_ZK_CLIENT_H 7 | #include "string.h" 8 | #include 9 | #include 10 | #include "zookeeper.h" 11 | 12 | class PfZkClient { 13 | public: 14 | PfZkClient(){ zkhandle = NULL; } 15 | ~PfZkClient(); 16 | int init(const char* zk_ip, int zk_timeout, const char* cluster_name); 17 | int create_node(const std::string& node_path, bool is_ephemeral, const char* node_data); 18 | int delete_node(const std::string& node_path); 19 | int wait_lock(const std::string& lock_path, const char* myid); 20 | std::string get_data_port(int store_id, int port_idx); 21 | int watch_disk_owner(const char* disk_uuid, std::function on_new_owner); 22 | //members: 23 | zhandle_t *zkhandle; 24 | std::string cluster_name; 25 | }; 26 | 27 | #endif //PUREFLASH_S5_ZK_CLIENT_H 28 | -------------------------------------------------------------------------------- /common/log4crc: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 0 8 | 9 | 0 10 | 1 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | -------------------------------------------------------------------------------- /common/src/libs5mq.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cocalele/PureFlash/79d05bec816f7cac34cc7d3f33cbc66787fbf3d8/common/src/libs5mq.c -------------------------------------------------------------------------------- /common/src/pf_app_ctx.cpp: -------------------------------------------------------------------------------- 1 | #include "pf_app_ctx.h" 2 | 3 | // g_app_ctx has global info, such as rdma device info 4 | PfAppCtx* g_app_ctx=NULL; 5 | 6 | bool spdk_engine = false; 7 | 8 | 9 | 10 | void spdk_engine_set(bool use_spdk) 11 | { 12 | spdk_engine = use_spdk; 13 | } 14 | -------------------------------------------------------------------------------- /common/src/pf_buffer.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "pf_buffer.h" 5 | 6 | #include "spdk/env.h" 7 | 8 | using namespace std; 9 | 10 | int BufferPool::init(size_t buffer_size, int count) 11 | { 12 | int rc = 0; 13 | Cleaner clean; 14 | this->buf_size = buffer_size; 15 | this->buf_count = count; 16 | memset(mrs, 0, 4*sizeof(struct ibv_mr*)); 17 | rc = free_bds.init(count); 18 | if(rc != 0) 19 | throw std::runtime_error(format_string("init memory pool failed, rc:%d", rc)); 20 | clean.push_back([this](){free_bds.destroy(); }); 21 | if (dma_buffer_used) { 22 | data_buf = spdk_dma_zmalloc(buffer_size*count, 4096, NULL); 23 | if(data_buf == NULL) 24 | throw std::runtime_error(format_string("Failed to alloc memory of:%d bytes", buffer_size*count)); 25 | clean.push_back([this](){ ::spdk_dma_free(data_buf); }); 26 | } else { 27 | data_buf = memalign(4096, buffer_size*count); 28 | if(data_buf == NULL) 29 | throw std::runtime_error(format_string("Failed to alloc memory of:%d bytes", buffer_size*count)); 30 | clean.push_back([this](){ ::free(data_buf); }); 31 | } 32 | 33 | data_bds = (BufferDescriptor*)calloc(count, sizeof(BufferDescriptor)); 34 | if(data_bds == NULL) 35 | throw std::runtime_error(format_string("Failed to alloc memory of:%d bytes", count * sizeof(BufferDescriptor))); 36 | clean.push_back([this](){ ::free(data_bds); }); 37 | for(int i = 0; i < count; i++) 38 | { 39 | data_bds[i].buf = (char*)data_buf + buffer_size * i; 40 | data_bds[i].buf_capacity = (int)buffer_size; 41 | data_bds[i].owner_pool = this; 42 | free_bds.enqueue(&data_bds[i]); 43 | } 44 | clean.cancel_all(); 45 | return 0; 46 | } 47 | 48 | 49 | void BufferPool::destroy() 50 | { 51 | ::free(data_bds); 52 | if (dma_buffer_used) { 53 | spdk_dma_free(data_buf); 54 | } else { 55 | ::free(data_buf); 56 | } 57 | 58 | free_bds.destroy(); 59 | } 60 | 61 | int BufferPool::rmda_register_mr(struct ibv_pd* pd, int idx, int access_mode) 62 | { 63 | if (mrs[idx] != NULL) 64 | { 65 | S5LOG_ERROR("pool->mrs[%d] is not NULL", idx); 66 | return -EEXIST; 67 | } 68 | struct ibv_mr *mr = mrs[idx] = ibv_reg_mr(pd, data_buf, buf_size * buf_count, access_mode); 69 | if (!mr) { 70 | S5LOG_ERROR("ibv_reg_mr failed, idx;%d, errno:%d", idx, errno); 71 | return -errno; 72 | } 73 | for (int i = 0 ;i < buf_count; i++) { 74 | data_bds[i].mrs[idx] = mr; 75 | } 76 | 77 | return 0; 78 | } 79 | 80 | void BufferPool::rmda_unregister_mr() 81 | { 82 | for (int i = 0 ; i < 4; i++) { 83 | if (mrs[i] != NULL) 84 | ibv_dereg_mr(mrs[i]); 85 | } 86 | 87 | return ; 88 | } 89 | 90 | const char* WcStatusToStr(WcStatus s) { 91 | switch(s){ 92 | case WC_SUCCESS: 93 | return "WC_SUCCESS"; 94 | case WC_FLUSH_ERR: 95 | return "WC_FLUSH_ERR"; 96 | } 97 | S5LOG_ERROR("Unknown WcStatus:%d", s); 98 | return "Unknown"; 99 | } 100 | const char* OpCodeToStr(WrOpcode op) { 101 | switch(op) { 102 | case TCP_WR_SEND: 103 | return "TCP_WR_SEND"; 104 | case TCP_WR_RECV: 105 | return "TCP_WR_RECV"; 106 | case RDMA_WR_SEND: 107 | return "RDMA_WR_SEND"; 108 | case RDMA_WR_RECV: 109 | return "RDMA_WR_RECV"; 110 | case RDMA_WR_WRITE: 111 | return "RDMA_WR_WRITE"; 112 | case RDMA_WR_READ: 113 | return "RDMA_WR_READ"; 114 | } 115 | S5LOG_ERROR("Unknown op code:%d", op); 116 | return "Unknown"; 117 | } 118 | -------------------------------------------------------------------------------- /common/src/pf_connection.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "pf_connection.h" 6 | #include "pf_app_ctx.h" 7 | #include "pf_message.h" 8 | 9 | int PfConnection::total_count=0; 10 | int PfConnection::closed_count=0; 11 | int PfConnection::released_count=0; 12 | 13 | PfConnection::PfConnection():ref_count(0),master(NULL), state(0), on_destroy(NULL) 14 | { 15 | total_count++; 16 | } 17 | 18 | PfConnection::~PfConnection() 19 | { 20 | released_count++; 21 | } 22 | 23 | int PfConnection::close() 24 | { 25 | if (__sync_val_compare_and_swap(&state, CONN_OK, CONN_CLOSED) != CONN_OK) 26 | { 27 | return 0;//connection already closed 28 | } 29 | 30 | S5LOG_INFO("Close connection conn:%p, %s", this, connection_info.c_str()); 31 | closed_count++; 32 | do_close(); 33 | if (on_close) 34 | on_close(this); 35 | close_time = now_time_usec(); 36 | return 0; 37 | } 38 | 39 | int parse_net_address(const char* ipv4, unsigned short port, /*out*/struct sockaddr_in* ipaddr) 40 | { 41 | struct addrinfo *addr; 42 | int rc = getaddrinfo(ipv4, NULL, NULL, &addr); 43 | if (rc) 44 | { 45 | S5LOG_ERROR("Failed to getaddrinfo: %s, %s", ipv4, gai_strerror(rc)); 46 | return -1; 47 | } 48 | *ipaddr = *(struct sockaddr_in*)addr->ai_addr; 49 | ipaddr->sin_port = htons(port); 50 | freeaddrinfo(addr); 51 | return rc; 52 | } 53 | 54 | int PfConnection::send_heartbeat() 55 | { 56 | S5LOG_FATAL("send_heartbeat not implemented"); 57 | return 0; 58 | } 59 | 60 | #define C_NAME(x) case x: return #x; 61 | const char* ConnState2Str(int conn_state) 62 | { 63 | static __thread char buf[64]; 64 | 65 | switch(conn_state){ 66 | C_NAME(CONN_INIT) 67 | C_NAME(CONN_OK) 68 | C_NAME(CONN_CLOSED) 69 | C_NAME(CONN_CLOSING) 70 | default: 71 | sprintf(buf, "%d", conn_state); 72 | return buf; 73 | } 74 | } -------------------------------------------------------------------------------- /common/src/pf_connection_pool.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "pf_connection_pool.h" 3 | #include "pf_connection.h" 4 | #include "pf_tcp_connection.h" 5 | #include "pf_rdma_connection.h" 6 | 7 | using namespace std; 8 | 9 | int PfConnectionPool::init(int size, PfPoller* poller, void* owner, uint64_t vol_id, int io_depth, enum connection_type type, 10 | work_complete_handler _handler, conn_close_handler close_handler) 11 | { 12 | pool_size = size; 13 | this->poller = poller; 14 | this->owner = owner; 15 | this->io_depth = io_depth; 16 | on_work_complete = _handler; 17 | on_conn_closed = close_handler; 18 | this->vol_id = vol_id; 19 | this->conn_type = type; 20 | return 0; 21 | } 22 | 23 | 24 | PfConnection* PfConnectionPool::get_conn(const std::string& ip, enum connection_type conn_type) noexcept 25 | { 26 | std::lock_guard _l(mtx); 27 | auto pos = ip_id_map.find(ip); 28 | if (pos != ip_id_map.end()) { 29 | auto c = pos->second; 30 | if(c->state == CONN_OK) 31 | return c; 32 | else { 33 | S5LOG_WARN("Connection:%s in state:%s, will reconnect.", c->connection_info.c_str(), ConnState2Str(c->state)); 34 | ip_id_map.erase(pos); 35 | c->dec_ref(); 36 | } 37 | } 38 | 39 | try { 40 | S5LOG_INFO("Connect to server %s://%s, io_depth:%d ...", conn_type == TCP_TYPE?"TCP":"RDMA", ip.c_str(), io_depth); 41 | 42 | if (conn_type == TCP_TYPE) { 43 | PfTcpConnection *c = PfTcpConnection::connect_to_server(ip, 49162, poller, vol_id, io_depth, 4/*connection timeout*/); 44 | c->add_ref(); //this ref hold by pool, decreased when remove from connection pool 45 | c->on_work_complete = on_work_complete; 46 | c->on_close = on_conn_closed; 47 | c->master = this->owner; 48 | c->conn_type = TCP_TYPE; 49 | c->io_depth = io_depth; 50 | ip_id_map[ip] = c; 51 | return c; 52 | } 53 | else if (conn_type == RDMA_TYPE) { 54 | #ifdef WITH_RDMA 55 | PfRdmaConnection *c = PfRdmaConnection::connect_to_server(ip, 49160, poller, vol_id, io_depth, 4/*connection timeout*/); 56 | c->add_ref(); //this ref hold by pool, decreased when remove from connection pool 57 | c->on_work_complete = on_work_complete; 58 | c->on_close = on_conn_closed; 59 | c->master = this->owner; 60 | c->conn_type = RDMA_TYPE; 61 | c->io_depth = io_depth; 62 | ip_id_map[ip] = c; 63 | return c; 64 | #else 65 | S5LOG_FATAL("RDMA not enabled, please compile with -DWITH_RDMA=1"); 66 | #endif 67 | } 68 | } 69 | catch(std::exception& e) { 70 | S5LOG_ERROR("Error connect to:%s, exception:%s", ip.c_str(), e.what()); 71 | } 72 | return NULL; 73 | } 74 | 75 | void PfConnectionPool::close_all() 76 | { 77 | S5LOG_INFO("Close all connection in pool, %d connections to release", ip_id_map.size()); 78 | 79 | for(auto it = ip_id_map.begin(); it != ip_id_map.end(); ++it) { 80 | auto c = it->second; 81 | c->close(); 82 | c->dec_ref(); 83 | } 84 | ip_id_map.clear(); 85 | } 86 | -------------------------------------------------------------------------------- /common/src/pf_errno.cpp: -------------------------------------------------------------------------------- 1 | #include "pf_errno.h" 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | std::string cpp_strerror(int err) 8 | { 9 | char buf[128]; 10 | 11 | if (err < 0) 12 | err = -err; 13 | std::ostringstream oss; 14 | oss << "(" << err << ") " << strerror_r(err, buf, sizeof(buf)); 15 | 16 | return oss.str(); 17 | } 18 | -------------------------------------------------------------------------------- /common/src/pf_iotask.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cocalele/PureFlash/79d05bec816f7cac34cc7d3f33cbc66787fbf3d8/common/src/pf_iotask.cpp -------------------------------------------------------------------------------- /common/src/pf_message.cpp: -------------------------------------------------------------------------------- 1 | 2 | 3 | #include "pf_message.h" 4 | 5 | 6 | const char* PfOpCode2Str(PfOpCode op) 7 | { 8 | switch (op) 9 | { 10 | case S5_OP_READ: 11 | return "S5_OP_READ"; 12 | case S5_OP_WRITE: 13 | return "S5_OP_WRITE"; 14 | case S5_OP_REPLICATE_WRITE: 15 | return "S5_OP_REPLICATE_WRITE"; 16 | case S5_OP_COW_READ: 17 | return "S5_OP_COW_READ"; 18 | case S5_OP_COW_WRITE: 19 | return "S5_OP_COW_WRITE"; 20 | case S5_OP_RECOVERY_READ: 21 | return "S5_OP_RECOVERY_READ"; 22 | case S5_OP_RECOVERY_WRITE: 23 | return "S5_OP_RECOVERY_WRITE"; 24 | case S5_OP_HEARTBEAT: 25 | return "S5_OP_HEARTBEAT"; 26 | 27 | default: 28 | return "UNKNOWN_TYPE"; 29 | break; 30 | } 31 | } 32 | 33 | #define C_NAME(x) case x: return #x; 34 | const char* PfMessageStatus2Str(PfMessageStatus msg_st) 35 | { 36 | static __thread char buf[64]; 37 | switch(msg_st) { 38 | C_NAME(MSG_STATUS_SUCCESS) 39 | C_NAME(MSG_STATUS_INVALID_OPCODE) 40 | C_NAME(MSG_STATUS_INVALID_FIELD) 41 | C_NAME(MSG_STATUS_CMDID_CONFLICT) 42 | C_NAME(MSG_STATUS_DATA_XFER_ERROR) 43 | C_NAME(MSG_STATUS_POWER_LOSS) 44 | C_NAME(MSG_STATUS_INTERNAL) 45 | C_NAME(MSG_STATUS_ABORT_REQ) 46 | C_NAME(MSG_STATUS_INVALID_IO_TIMEOUT) 47 | C_NAME(MSG_STATUS_INVALID_STATE) 48 | C_NAME(MSG_STATUS_LBA_RANGE) 49 | C_NAME(MSG_STATUS_NS_NOT_READY) 50 | C_NAME(MSG_STATUS_NOT_PRIMARY) 51 | C_NAME(MSG_STATUS_NOSPACE) 52 | C_NAME(MSG_STATUS_READONLY) 53 | C_NAME(MSG_STATUS_CONN_LOST) 54 | C_NAME(MSG_STATUS_AIOERROR) 55 | C_NAME(MSG_STATUS_ERROR_HANDLED) 56 | C_NAME(MSG_STATUS_ERROR_UNRECOVERABLE) 57 | C_NAME(MSG_STATUS_AIO_TIMEOUT) 58 | C_NAME(MSG_STATUS_REPLICATING_TIMEOUT) 59 | C_NAME(MSG_STATUS_NODE_LOST) 60 | C_NAME(MSG_STATUS_LOGFAILED) 61 | C_NAME(MSG_STATUS_METRO_REPLICATING_FAILED) 62 | C_NAME(MSG_STATUS_RECOVERY_FAILED) 63 | C_NAME(MSG_STATUS_SSD_ERROR) 64 | C_NAME(MSG_STATUS_REP_TO_PRIMARY) 65 | C_NAME(MSG_STATUS_NO_RESOURCE) 66 | C_NAME(MSG_STATUS_DEGRADE) 67 | C_NAME(MSG_STATUS_REOPEN) 68 | default: 69 | sprintf(buf, "Unknown status:%d", msg_st); 70 | return buf; 71 | } 72 | // MSG_STATUS_DEGRADE = 0x2000, 73 | // MSG_STATUS_REOPEN = 0x4000, 74 | 75 | } 76 | 77 | -------------------------------------------------------------------------------- /common/src/pf_performance_profiler.c: -------------------------------------------------------------------------------- 1 | 2 | #include "gperftools/heap-profiler.h" 3 | #include "gperftools/profiler.h" 4 | #include "pf_performance_profiler.h" 5 | 6 | #define SA_NODEFER 0x40000000 7 | #define SA_RESETHAND 0x80000000 8 | 9 | #define SA_NOMASK SA_NODEFER 10 | #define SA_ONESHOT SA_RESETHAND 11 | 12 | #define APP_PREFIX_LEN 64 13 | #define PROFILE_LEN 512 14 | char heap_profile_name_buf[PROFILE_LEN] = {}; 15 | char cpu_profile_name_buf[PROFILE_LEN] = {}; 16 | 17 | void sig_start_profile(int sig) 18 | { 19 | printf("Receive signal 'SIGUSR1'\n"); 20 | #ifdef HEAP_PROFILE 21 | HeapProfilerStart(heap_profile_name_buf); 22 | #endif 23 | #ifdef CPU_PROFILE 24 | ProfilerStart(cpu_profile_name_buf); 25 | #endif 26 | } 27 | 28 | void sig_stop_profile(int sig) 29 | { 30 | printf("Receive signal 'SIGUSR2'\n"); 31 | #ifdef HEAP_PROFILE 32 | HeapProfilerStop(); 33 | #endif 34 | #ifdef CPU_PROFILE 35 | ProfilerStop(); 36 | #endif 37 | } 38 | 39 | int pf_profiler_init(const char* prefix) 40 | { 41 | int prefix_len = strlen(prefix); 42 | if (prefix_len >= APP_PREFIX_LEN) 43 | { 44 | S5LOG_ERROR("Prefix for profile file name is too long!\n"); 45 | return -EINVAL; 46 | } 47 | sprintf(heap_profile_name_buf, "/tmp/%s_heap.pro", prefix); 48 | sprintf(cpu_profile_name_buf, "/tmp/%s_cpu.pro", prefix); 49 | 50 | struct sigaction act1; 51 | act1.sa_handler = sig_start_profile; 52 | sigemptyset(&act1.sa_mask); 53 | act1.sa_flags = SA_RESETHAND; 54 | int ret = sigaction(SIGUSR1, &act1, 0); 55 | if (ret < 0) 56 | { 57 | S5LOG_ERROR("Failed to register signal 'SIGUSR1' to op: heap_profile start!\n"); 58 | return -errno; 59 | } 60 | 61 | struct sigaction act2; 62 | act2.sa_handler = sig_stop_profile; 63 | sigemptyset(&act2.sa_mask); 64 | act2.sa_flags = SA_RESETHAND; 65 | ret = sigaction(SIGUSR2, &act2, 0); 66 | if (ret < 0) 67 | { 68 | S5LOG_ERROR("Failed to register signal 'SIGUSR2' to op: heap_profile stop!"); 69 | return -errno; 70 | } 71 | return 0; 72 | } 73 | -------------------------------------------------------------------------------- /common/src/pf_thread.cpp: -------------------------------------------------------------------------------- 1 | #include "pf_thread.h" 2 | 3 | 4 | int spdk_thread_run(void *_arg) 5 | { 6 | struct pf_thread_context *arg = (struct pf_thread_context *)_arg; 7 | arg->f(arg->arg); 8 | } 9 | 10 | void* p_thread_run(void *_arg) 11 | { 12 | struct pf_thread_context *arg = (struct pf_thread_context *)_arg; 13 | int rc; 14 | 15 | arg->ret = pthread_create(arg->t, arg->a, arg->f, arg->arg); 16 | } 17 | 18 | 19 | void pf_thread_create(struct pf_thread_context *tc) 20 | { 21 | if (tc->spdk_engine) { 22 | if (tc->affinitize) { 23 | tc->ret = spdk_env_thread_launch_pinned(tc->core, spdk_thread_run, tc); 24 | }else{ 25 | spdk_call_unaffinitized(p_thread_run, tc); 26 | } 27 | }else{ 28 | p_thread_run(tc); 29 | } 30 | } 31 | 32 | void pf_thread_init(struct pf_thread_context *tc, void *(*f)(void *), void *arg, bool spdk_engine) 33 | { 34 | tc->f = f; 35 | tc->arg = arg; 36 | tc->ret = 0; 37 | tc->spdk_engine = spdk_engine; 38 | } 39 | 40 | void pf_thread_ptreahd_att(struct pf_thread_context *tc, pthread_t *tid, pthread_attr_t *a) 41 | { 42 | tc->t = tid; 43 | tc->a = a; 44 | } 45 | 46 | void pf_thread_set_aff(struct pf_thread_context *tc, uint32_t core) 47 | { 48 | tc->affinitize = true; 49 | tc->core = core; 50 | } -------------------------------------------------------------------------------- /common/src/special_vol_mgr.c: -------------------------------------------------------------------------------- 1 | #include "special_vol_mgr.h" 2 | #include 3 | 4 | int pf_mount(const char *source 5 | , const char *target 6 | , const char *filesystemtype 7 | , unsigned long mountflags 8 | , const void *data) 9 | { 10 | return mount(source, target, filesystemtype, mountflags, data); 11 | } 12 | 13 | int pf_remount(const char *source 14 | , const char *target 15 | , unsigned long mountflags 16 | , const void *data) 17 | { 18 | return mount(source, target, NULL, MS_REMOUNT | mountflags, data); 19 | } 20 | 21 | int pf_umount(const char *target, int flag) 22 | { 23 | if (flag) 24 | { 25 | return umount2(target, flag); 26 | } 27 | else 28 | { 29 | return umount(target); 30 | } 31 | } 32 | 33 | -------------------------------------------------------------------------------- /common/src/spy_client.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include // For inet_addr() 10 | #include // For sockaddr_in 11 | #include 12 | #include 13 | #include 14 | using namespace std; 15 | 16 | void usageExit() 17 | { 18 | printf("Usage: spy [var_name] ...\n cmd can be 'list', 'read'\n"); 19 | exit(1); 20 | } 21 | 22 | int main(int argc, const char **argv) 23 | { 24 | if(argc < 4) 25 | usageExit(); 26 | 27 | int port = atoi(argv[2]); 28 | 29 | 30 | ostringstream oss; 31 | for(int i=3;i 2 | #include "s5log.h" 3 | 4 | S5LOG_INIT("LogTest2"); 5 | 6 | TEST(LogTest, Basic2) 7 | { 8 | S5LOG_DEBUG("Debug with 1 int arg=%d", 1); 9 | S5LOG_INFO("Info with 1 int arg=%d, str arg=%s", 1, "'Hello world'"); 10 | S5LOG_WARN("Warn with no args"); 11 | S5LOG_ERROR("Error with no args"); 12 | S5LOG_TRACE("Trace with no args"); 13 | } -------------------------------------------------------------------------------- /common/unittest/conf_file/s5unittest.conf: -------------------------------------------------------------------------------- 1 | ############################################################## 2 | # unit test for common library 3 | ############################################################## 4 | [global] 5 | db_path = /etc/s5/s5meta.db 6 | 7 | [daemon.0] 8 | front_port = 3000 9 | back_port = 3000.000000 10 | front_port_for_fail = 3000 11 | -------------------------------------------------------------------------------- /common/unittest/hash/test.h: -------------------------------------------------------------------------------- 1 | /// @file test.h 2 | /// @copyright BSD 2-clause. See LICENSE.txt for the complete license text. 3 | /// @author Dane Larsen 4 | /// @brief A _very_ simple set of functions for running tests 5 | 6 | 7 | 8 | #ifndef TEST_H 9 | #define TEST_H 10 | 11 | #include 12 | 13 | static int SUCCESS_COUNT = 0; 14 | static int FAIL_COUNT = 0; 15 | 16 | #ifdef TEST 17 | 18 | /// @brief Prints the successful test message and 19 | /// increments the number of successful 20 | /// tests 21 | #define test_success(M, ...) { fprintf(stderr, "[SUCCESS] " M "\n", ##__VA_ARGS__); SUCCESS_COUNT += 1; } 22 | 23 | /// @brief Prints the failed test message and 24 | /// increments the number of failed 25 | /// tests 26 | #define test_fail(M, ...) { fprintf(stderr, "*** FAIL *** " M "\n\n", ##__VA_ARGS__); FAIL_COUNT += 1; } 27 | 28 | /// @def test(A, M, ...) 29 | /// @brief A convenient test function 30 | /// @param A A boolean value, 31 | /// true will cause the test to be successful, 32 | /// false will cause the test to fail 33 | /// @param M The message to print upon test completion (accepts format string) 34 | /// @param ... variables for the format string 35 | #define test(A, M, ...) if(A) { test_success(M, ##__VA_ARGS__); } else { test_fail(M, ##__VA_ARGS__); } 36 | 37 | #else 38 | 39 | #define test_success(M, ...) 40 | #define test_fail(M, ...) 41 | #define test(A, M, ...) 42 | 43 | #endif //TEST 44 | 45 | /// @brief Returns the number of tests that passed 46 | int successes() { return SUCCESS_COUNT; } 47 | /// @brief Returns the number of tests that failed 48 | int failures() { return FAIL_COUNT; } 49 | 50 | /// @brief Reports the results of all of the tests to stderr 51 | /// @returns 0 if all tests passed, 1 otherwise 52 | int report_results() 53 | { 54 | fprintf(stderr, "Test results: [%d successes, %d failures]\n", SUCCESS_COUNT, FAIL_COUNT); 55 | 56 | if(FAIL_COUNT > 0) 57 | return FAIL_COUNT; 58 | else 59 | return 0; 60 | } 61 | 62 | #endif //TEST_H 63 | -------------------------------------------------------------------------------- /common/unittest/hash/timer.h: -------------------------------------------------------------------------------- 1 | /// @file timer.h 2 | /// @copyright BSD 2-clause. See LICENSE.txt for the complete license text. 3 | /// @author Dane Larsen 4 | /// @brief Two functions for profiling function call time. 5 | 6 | #ifndef TIMER_H 7 | #define TIMER_H 8 | 9 | #include 10 | 11 | /// @brief A wrapper for getting the current time. 12 | /// @returns The current time. 13 | struct timespec snap_time() 14 | { 15 | struct timespec t; 16 | clock_gettime(CLOCK_REALTIME, &t); 17 | return t; 18 | } 19 | 20 | /// @brief Calculates the time difference between two struct timespecs 21 | /// @param t1 The first time. 22 | /// @param t2 The second time. 23 | /// @returns The difference between the two times. 24 | double get_elapsed(struct timespec t1, struct timespec t2) 25 | { 26 | double ft1 = t1.tv_sec + ((double)t1.tv_nsec / 1000000000.0); 27 | double ft2 = t2.tv_sec + ((double)t2.tv_nsec / 1000000000.0); 28 | return ft2 - ft1; 29 | } 30 | 31 | 32 | #endif 33 | -------------------------------------------------------------------------------- /common/unittest/s5_autotest_gcov_s5mq.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | function check_process() 4 | { 5 | while true 6 | do 7 | echo "Testind s5mq msg send" 8 | ps -ef|grep $1 |grep -v grep 9 | if [ $? -ne 0 ] 10 | then 11 | echo "Finish test $2" 12 | break; 13 | fi 14 | sleep 1 15 | done 16 | } 17 | 18 | ./test_cndct 0 > test_cndct_send.log & 19 | sleep 1 20 | ./test_worker 0 > test_worker_send.log & 21 | 22 | check_process "test_cndct" "s5mq message send" 23 | check_process "test_worker" "s5mq message send" 24 | 25 | 26 | ./test_cndct 1 > test_cndct_send.log & 27 | sleep 1 28 | ./test_worker 1 > test_worker_send.log & 29 | 30 | check_process "test_cndct" "s5mq message asend" 31 | check_process "test_worker" "s5mq message asend" 32 | 33 | ./test_cndct 2 > test_cndct_send.log & 34 | sleep 1 35 | ./test_worker 0 > test_worker_send.log & 36 | check_process "test_cndct" "s5mq message send with no data" 37 | check_process "test_worker" "s5mq message send with no data" 38 | -------------------------------------------------------------------------------- /common/unittest/sqlite_db_create.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | import sqlite3 3 | 4 | cx = sqlite3.connect("test.db"); 5 | cur = cx.cursor(); 6 | 7 | cur.execute("pragma foreign_keys=on"); 8 | 9 | cur.execute("create table t_student(idx integer primary key autoincrement , bin_info blob)"); 10 | cx.commit(); 11 | 12 | cx.close(); 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /common/unittest/test.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cocalele/PureFlash/79d05bec816f7cac34cc7d3f33cbc66787fbf3d8/common/unittest/test.db -------------------------------------------------------------------------------- /common/unittest/test_s5list.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cocalele/PureFlash/79d05bec816f7cac34cc7d3f33cbc66787fbf3d8/common/unittest/test_s5list.c -------------------------------------------------------------------------------- /common/unittest/test_s5mq_trans_status_by_hb_msg.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cocalele/PureFlash/79d05bec816f7cac34cc7d3f33cbc66787fbf3d8/common/unittest/test_s5mq_trans_status_by_hb_msg.c -------------------------------------------------------------------------------- /common/unittest/test_s5session.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "s5log.h" 3 | #include "cmdopt.h" 4 | #include "s5session.h" 5 | #include "s5message.h" 6 | 7 | pthread_mutex_t mutex; 8 | pthread_cond_t cond; 9 | 10 | void io_callback(void* arg) 11 | { 12 | pthread_mutex_lock(&mutex); 13 | pthread_cond_signal(&cond); 14 | pthread_mutex_unlock(&mutex); 15 | } 16 | 17 | TEST(S5Session, Read4k) 18 | { 19 | pthread_mutex_init(&mutex, NULL); 20 | pthread_cond_init(&cond, NULL); 21 | s5_message_t* msg = s5msg_create(4096); 22 | msg->head.nlba=1; 23 | msg->head.msg_type=MSG_TYPE_FLUSH_READ; 24 | s5_session_t s5session; 25 | s5session_conf_t conf; 26 | conf.retry_delay_ms = 50; 27 | conf.rge_io_depth = 256; 28 | conf.s5_io_depth = 512; 29 | conf.rge_io_max_lbas = 256; 30 | 31 | ASSERT_EQ(0, s5session_init(&s5session, "127.0.0.1", 10000, CONNECT_TYPE_STABLE, &conf)); 32 | ASSERT_EQ(0, s5session_aio_read(&s5session, msg, io_callback, msg)); 33 | pthread_mutex_lock(&mutex); 34 | pthread_cond_wait(&cond, &mutex); 35 | pthread_mutex_unlock(&mutex); 36 | } 37 | -------------------------------------------------------------------------------- /common/unittest/test_s5sql.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "s5_sql.h" 5 | #include "s5utils.h" 6 | #include "s5log.h" 7 | 8 | #define DB_PATH "/var/tmp/test.db" 9 | 10 | typedef struct student_binary_info 11 | { 12 | char name[32]; 13 | int age; 14 | int gender; 15 | }student_binary_info_t; 16 | 17 | typedef struct student_info 18 | { 19 | int idx; 20 | student_binary_info_t info; 21 | }student_info_t; 22 | 23 | int student_sel_cb(rd_cb_param_t* param, int n_column, char** column_values, char** column_names); 24 | 25 | int main() 26 | { 27 | int ofs = 0; 28 | student_binary_info_t bin_info[5]; 29 | int i; 30 | for (i = 0; i < 5; i++) 31 | { 32 | ofs = snprintf(bin_info[i].name, 32, "stundent_%d", i); 33 | bin_info[i].name[ofs] = 0; 34 | bin_info[i].gender = i % 2; 35 | bin_info[i].age = 20 + i; 36 | } 37 | 38 | db_t test_db; 39 | test_db.name = DB_PATH; 40 | int ret = s5c_open_db(&test_db); 41 | S5ASSERT(ret == 0); 42 | exec_sql_ctx_t sql_ctx; 43 | char sql[1024]; 44 | for (i = 0; i < 5; i++) 45 | { 46 | ofs = snprintf(sql, 1024, "insert into t_student(bin_info) values(?);"); 47 | sql[ofs] = 0; 48 | sql_ctx.sql = sql; 49 | sql_ctx.param.blob_param.blob_data = &bin_info[i]; 50 | sql_ctx.param.blob_param.blob_size = sizeof(student_binary_info_t); 51 | sql_ctx.type = SQL_BLOB_DML; 52 | ret = s5c_exec_sql(&test_db, &sql_ctx); 53 | S5ASSERT(ret == 0); 54 | } 55 | 56 | //select info from db 57 | student_info_t students[5]; 58 | ofs = snprintf(sql, 1024, "select * from t_student;"); 59 | sql[ofs] = 0; 60 | sql_ctx.type = SQL_BLOB_QUERY; 61 | sql_ctx.param.cb_param.res_buf = students; 62 | sql_ctx.param.cb_param.res_cnt = 0; 63 | sql_ctx.param.cb_param.buf_size = 5 * sizeof(student_info_t); 64 | sql_ctx.cb_fun = student_sel_cb; 65 | 66 | ret = s5c_exec_sql(&test_db, &sql_ctx); 67 | S5ASSERT(ret == 0); 68 | 69 | //printf info 70 | for (i = 0; i < 5; i++) 71 | { 72 | printf("Student %d Info:\n", i); 73 | printf("Id: %d\n", students[i].idx); 74 | printf("Name: %s\n", students[i].info.name); 75 | printf("Gender: %d\n", students[i].info.gender); 76 | printf("Age: %d\n", students[i].info.age); 77 | } 78 | 79 | } 80 | 81 | int student_sel_cb(rd_cb_param_t* param, int n_column, char** column_values, char** column_names) 82 | { 83 | rd_cb_param_t * rd_cb_param = ( rd_cb_param_t*)param; 84 | S5ASSERT(rd_cb_param->buf_size >= rd_cb_param->res_cnt); 85 | int idx = rd_cb_param->res_cnt; 86 | student_info_t* student = (student_info_t*)rd_cb_param->res_buf + idx ; 87 | student->idx = atoi(column_values[0]); 88 | void* read_pos = (void*)column_values[1]; 89 | int size = *((int*)read_pos); 90 | read_pos += sizeof(int); 91 | S5ASSERT(size == sizeof(student_binary_info_t)); 92 | memcpy(&student->info, read_pos, sizeof(student_binary_info_t)); 93 | rd_cb_param->res_cnt++; 94 | return 0; 95 | } 96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /common/unittest/util_test.cpp: -------------------------------------------------------------------------------- 1 | #include "s5utils.h" 2 | 3 | #include 4 | 5 | TEST(S5Session, Iptest) 6 | { 7 | const char* ip0 = "www.163.com"; 8 | const char* ip1 = "192.168"; 9 | const char* ip2 = "192.168.0"; 10 | const char* ip3 = "256.168.0.1"; 11 | const char* ip4 = "192.168.0.1"; 12 | const char* ip5 = "255.255.255.255"; 13 | 14 | ASSERT_EQ(FALSE, isIpValid(ip0)); 15 | ASSERT_EQ(FALSE, isIpValid(ip1)); 16 | ASSERT_EQ(FALSE, isIpValid(ip2)); 17 | ASSERT_EQ(FALSE, isIpValid(ip3)); 18 | ASSERT_EQ(TRUE, isIpValid(ip4)); 19 | ASSERT_EQ(TRUE, isIpValid(ip5)); 20 | } 21 | 22 | int main(int argc, char* argv[]) 23 | { 24 | testing::InitGoogleTest(&argc, argv); 25 | return RUN_ALL_TESTS(); 26 | } 27 | 28 | -------------------------------------------------------------------------------- /deploy.md: -------------------------------------------------------------------------------- 1 | PureFlash Deployment 2 | ==================== 3 | 4 | ## Config Zookeeper 5 | 1. to make zookeeper reponse quickly when node is down, suggest to change `tickTime` and `syncLimit` in _zoo.cfg_ 6 | ``` 7 | # The number of milliseconds of each tick 8 | tickTime=300 9 | # The number of ticks that can pass between 10 | # sending a request and getting an acknowledgement 11 | syncLimit=3 12 | ``` 13 | -------------------------------------------------------------------------------- /docker-based-develope-env.txt: -------------------------------------------------------------------------------- 1 | 使用docker快速搭建开发环境 2 | ============================ 3 | 4 | 1. pull docker image 5 | 6 | ``` 7 | # docker pull pureflash/pfs-dev:1.6 8 | ``` 9 | 10 | 如果这个镜像拉取失败,可以从百度网盘下载:https://pan.baidu.com/s/1nGYIpP-WFOUXxv3vfQwPxQ?pwd=4564 11 | 然后导入: 12 | ``` 13 | # gunzip pfs-dev.tar.gz 14 | # docker load < pfs-dev.tar 15 | ``` 16 | 17 | 2. check out code 18 | ``` 19 | # mkdir ~/pf && cd ~/pf 20 | # git clone https://gitee.com/cocalele/PureFlash.git 21 | # set PFHOME=$(pwd)/PureFlash 22 | # cd PureFlash 23 | # git submodule init 24 | # git submodule update 25 | # cd .. #to pf again 26 | # git https://gitee.com/cocalele/jconductor.git 27 | ``` 28 | 29 | 3. run docker 30 | ``` 31 | # docker run -ti --ulimit core=-1 --privileged --hostname pfs-d --net pfnet --ip 172.1.1.2 --rm -v ~/pf:/root/pf --name pfs-d -e TZ=Asia/Shanghai pureflash/pfs-dev:1.6 /bin/bash 32 | ``` 33 | 上面命令里的-v ~/pf:/root/pf 把刚才的源代码目录mount到了容器里面的/root/pf目录上。然后就可以参照build_and_run.txt文档里面的cmake, ant步骤分别编译PureFlash和jconductor 34 | 35 | -------------------------------------------------------------------------------- /docker/Dockerfile.base: -------------------------------------------------------------------------------- 1 | FROM ubuntu:20.04 2 | LABEL version="1.0" 3 | ARG DEBIAN_FRONTEND=noninteractive 4 | ENV TZ=Asia/ShangHai 5 | RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone 6 | 7 | COPY sources.list /etc/apt/sources.list 8 | RUN apt update 9 | 10 | RUN apt install -y cgdb curl gzip jq libaio1 libcurl4 libibverbs1 libicu66 libjsoncpp1 librdmacm1 libreadline5 libstdc++6 libtool liburcu6 libuuid1 tar unzip util-linux vim wget net-tools 11 | 12 | #To install mariadb-server 13 | RUN apt install -y apt-transport-https curl 14 | RUN curl -o /etc/apt/trusted.gpg.d/mariadb_release_signing_key.asc 'https://mariadb.org/mariadb_release_signing_key.asc' 15 | RUN sh -c "echo 'deb https://mirrors.aliyun.com/mariadb/repo/10.5/ubuntu focal main' >>/etc/apt/sources.list" 16 | RUN apt update 17 | RUN apt install -y mariadb-server 18 | 19 | #To install java for zk and jconductor 20 | RUN apt install -y default-jre 21 | -------------------------------------------------------------------------------- /docker/Dockerfile.base.2204: -------------------------------------------------------------------------------- 1 | FROM ubuntu:22.04 2 | LABEL version="1.0" 3 | ARG DEBIAN_FRONTEND=noninteractive 4 | ENV TZ=Asia/ShangHai 5 | RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone 6 | 7 | COPY apt-ali-sources-2204-arm.list /etc/apt/sources.list 8 | RUN apt update 9 | 10 | RUN apt install -y cgdb curl gzip jq libaio1 libcurl4 libibverbs1 libicu-dev libjsoncpp25 librdmacm1 readline-common libstdc++6 libtool libuuid1 tar unzip util-linux vim wget net-tools 11 | 12 | #To install mariadb-server 13 | RUN apt install -y mariadb-server 14 | 15 | #To install java for zk and jconductor 16 | RUN apt install -y default-jre 17 | -------------------------------------------------------------------------------- /docker/Dockerfile.dev: -------------------------------------------------------------------------------- 1 | FROM ubuntu:20.04 2 | LABEL version="1.8" 3 | ARG DEBIAN_FRONTEND=noninteractive 4 | ENV TZ=Asia/ShangHai 5 | RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone 6 | 7 | RUN apt update 8 | RUN apt install -y cgdb curl gzip jq libaio1 libcurl4 libibverbs1 libicu66 libjsoncpp1 librdmacm1 libreadline5 libstdc++6 libtool liburcu6 libuuid1 tar unzip util-linux vim wget net-tools ninja-build libcurl4-openssl-dev libcppunit-dev uuid-dev libaio-dev nasm autoconf cmake librdmacm-dev pkg-config g++ default-jdk ssh 9 | 10 | #RUN apt install -y openssh-server 11 | #RUN mkdir /run/sshd; chmod 700 /run/sshd 12 | #RUN passwd root 13 | 14 | ##To install mariadb-server 15 | #RUN apt install -y apt-transport-https curl 16 | #RUN curl -o /etc/apt/trusted.gpg.d/mariadb_release_signing_key.asc 'https://mariadb.org/mariadb_release_signing_key.asc' 17 | #RUN sh -c "echo 'deb https://mirrors.aliyun.com/mariadb/repo/10.5/ubuntu focal main' >>/etc/apt/sources.list" 18 | #RUN apt update 19 | #RUN apt install -y mariadb-server 20 | # 21 | 22 | ## to install ant 23 | RUN apt install -y ant 24 | 25 | #COPY mariadb/mariadb.cnf /etc/mysql/mariadb.cnf 26 | #COPY mariadb/50-server.cnf /etc/mysql/mariadb.conf.d/50-server.cnf 27 | #COPY pfcli /opt/pureflash/pfcli 28 | #COPY pfdd /opt/pureflash/pfdd 29 | #COPY qemu-img /opt/pureflash/qemu-img 30 | #COPY pfs /opt/pureflash/pfs 31 | #COPY fio /opt/pureflash/fio 32 | #COPY run-all.sh /opt/pureflash/run-all.sh 33 | #COPY restart-pfc.sh /opt/pureflash/restart-pfc.sh 34 | #COPY restart-pfs.sh /opt/pureflash/restart-pfs.sh 35 | -------------------------------------------------------------------------------- /docker/Dockerfile.dev.2204: -------------------------------------------------------------------------------- 1 | FROM ubuntu:22.04 2 | LABEL version="1.9.0" 3 | ARG DEBIAN_FRONTEND=noninteractive 4 | ENV TZ=Asia/ShangHai 5 | RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone 6 | #COPY apt-ali-sources-2204.list /etc/apt/sources.list 7 | #COPY apt-ali-sources-2204-arm.list /etc/apt/sources.list 8 | RUN apt update 9 | RUN apt install -y python3 10 | RUN apt install -y cgdb curl gzip jq libaio1 libcurl4 libibverbs1 libicu-dev libjsoncpp25 librdmacm1 readline-common libstdc++6 libtool libuuid1 tar unzip util-linux vim wget net-tools ninja-build libcurl4-openssl-dev libcppunit-dev uuid-dev libaio-dev nasm autoconf cmake librdmacm-dev pkg-config g++ default-jdk ant meson libssl-dev ncurses-dev libnuma-dev help2man vim rsync 11 | RUN apt install -y python3 git 12 | RUN apt install -y python3-pip 13 | RUN pip install pyelftools 14 | RUN apt install -y linux-kernel-headers 15 | 16 | RUN apt install -y openssh-server ssh 17 | RUN mkdir /run/sshd; chmod 700 /run/sshd 18 | RUN sed -i 's/#Port 22/Port 6636/' /etc/ssh/sshd_config 19 | RUN sed -i 's/#PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/sshd_config 20 | #RUN passwd root 21 | 22 | ##To install mariadb-server 23 | #RUN apt install -y apt-transport-https curl 24 | #RUN curl -o /etc/apt/trusted.gpg.d/mariadb_release_signing_key.asc 'https://mariadb.org/mariadb_release_signing_key.asc' 25 | #RUN sh -c "echo 'deb https://mirrors.aliyun.com/mariadb/repo/10.5/ubuntu focal main' >>/etc/apt/sources.list" 26 | #RUN apt update 27 | #RUN apt install -y mariadb-server 28 | # 29 | 30 | ## to install ant 31 | 32 | 33 | #COPY mariadb/mariadb.cnf /etc/mysql/mariadb.cnf 34 | #COPY mariadb/50-server.cnf /etc/mysql/mariadb.conf.d/50-server.cnf 35 | #COPY pfcli /opt/pureflash/pfcli 36 | #COPY pfdd /opt/pureflash/pfdd 37 | #COPY qemu-img /opt/pureflash/qemu-img 38 | #COPY pfs /opt/pureflash/pfs 39 | #COPY fio /opt/pureflash/fio 40 | #COPY run-all.sh /opt/pureflash/run-all.sh 41 | #COPY restart-pfc.sh /opt/pureflash/restart-pfc.sh 42 | #COPY restart-pfs.sh /opt/pureflash/restart-pfs.sh 43 | -------------------------------------------------------------------------------- /docker/apt-origin.list: -------------------------------------------------------------------------------- 1 | deb http://archive.ubuntu.com/ubuntu/ jammy main restricted universe multiverse 2 | # deb-src http://archive.ubuntu.com/ubuntu/ jammy main restricted universe multiverse 3 | 4 | deb http://archive.ubuntu.com/ubuntu/ jammy-updates main restricted universe multiverse 5 | # deb-src http://archive.ubuntu.com/ubuntu/ jammy-updates main restricted universe multiverse 6 | 7 | deb http://archive.ubuntu.com/ubuntu/ jammy-backports main restricted universe multiverse 8 | # deb-src http://archive.ubuntu.com/ubuntu/ jammy-backports main restricted universe multiverse 9 | 10 | deb http://security.ubuntu.com/ubuntu/ jammy-security main restricted universe multiverse 11 | # deb-src http://security.ubuntu.com/ubuntu/ jammy-security main restricted universe multiverse 12 | 13 | # 预发布软件源,不建议启用 14 | # deb http://archive.ubuntu.com/ubuntu/ jammy-proposed main restricted universe multiverse 15 | # deb-src http://archive.ubuntu.com/ubuntu/ jammy-proposed main restricted universe multiverse 16 | 17 | -------------------------------------------------------------------------------- /docker/build-all.sh: -------------------------------------------------------------------------------- 1 | SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 2 | DIR=$(pwd) 3 | 4 | function fatal { 5 | echo -e "\033[31m[`date`] $* \033[0m" 6 | exit 1 7 | } 8 | function info { 9 | echo -e "\033[32m[`date`] $* \033[0m" 10 | } 11 | 12 | function assert() 13 | { 14 | local cmd=$* 15 | echo "Run:$cmd" > /dev/stderr 16 | eval '${cmd}' 17 | if [ $? -ne 0 ]; then 18 | fatal "Failed to run:$cmd" 19 | fi 20 | } 21 | 22 | 23 | info "This build script need run in pureflash develop container." 24 | info "Start container with command:\n docker run -ti --rm --network host docker.io/pureflash/pureflash-dev:1.9.1-x64 /bin/bash" 25 | #give time to read above tips 26 | sleep 2 27 | 28 | set -v 29 | PFREPO=${PFREPO:-gitee} 30 | 31 | #export JAVA_HOME=/usr/lib/jvm/jdk-15 32 | #export PATH=$JAVA_HOME/bin/:$PATH 33 | 34 | info "build jconductor from repo:${PFREPO}" 35 | assert git clone https://${PFREPO}.com/cocalele/jconductor.git 36 | cd jconductor/ 37 | assert git submodule update --init 38 | assert ant -f jconductor.xml 39 | 40 | info "build PureFlash" 41 | cd $DIR 42 | assert git clone https://${PFREPO}.com/cocalele/PureFlash.git 43 | cd PureFlash/ 44 | assert git submodule update --init --recursive 45 | mkdir build 46 | cd build 47 | assert cmake -GNinja -DCMAKE_BUILD_TYPE=Debug .. 48 | assert ninja 49 | 50 | info "build fio with pfbd" 51 | cd $DIR 52 | assert git clone https://${PFREPO}.com/cocalele/fio.git 53 | cd fio 54 | #./configure --pfbd-include=$DIR/PureFlash/common/include --pfbd-lib=$DIR/PureFlash/build/bin 55 | assert ./configure --pfbd-include=$DIR/PureFlash/common/include --pfbd-lib=$DIR/PureFlash/build/bin/ --spdk-lib=$DIR/PureFlash/thirdParty/spdk/build/lib --dpdk-lib=$DIR/PureFlash/thirdParty/spdk/dpdk/build/lib 56 | assert make 57 | 58 | cd $DIR 59 | info "build qemu with pfbd" 60 | assert apt install -y libglib2.0-dev libpixman-1-dev python3 git python3-pip libslirp-dev 61 | assert pip3 install -U pip 62 | # apt install -y libfdt-dev #need on ARM 63 | assert git clone https://${PFREPO}.com/cocalele/qemu.git 64 | cd qemu 65 | git checkout v8.1.2-pfbd 66 | PUREFLASH_HOME=$DIR/PureFlash 67 | OSNAME=`$DIR/PureFlash/scripts/osname.sh` 68 | mkdir /usr/include/pfbd 69 | cp -f ${PUREFLASH_HOME}/common/include/pf_client_api.h /usr/include/pfbd/pf_client_api.h 70 | cp -f ${PUREFLASH_HOME}/build/bin/libs5common.a /usr/lib/libs5common.a 71 | cp -f $DIR/PureFlash/pre_build_libs/$OSNAME/libzookeeper_mt.a /usr/lib/libzookeeper_mt.a 72 | cp -f $DIR/PureFlash/pre_build_libs/$OSNAME/libhashtable.a /usr/lib/libhashtable.a 73 | ( cd ${PUREFLASH_HOME}/build/bin; cp -rp libspdk* /usr/lib/) 74 | ( cd ${PUREFLASH_HOME}/build/bin; cp -rp librte* /usr/lib/) 75 | ( cd ${PUREFLASH_HOME}/build/bin; cp -rp dpdk /usr/lib/) 76 | mkdir build 77 | cd build 78 | #assert ../configure --enable-debug --enable-kvm --target-list=x86_64-softmmu --disable-linux-io-uring 79 | assert ../configure --enable-kvm --target-list=x86_64-softmmu --disable-linux-io-uring 80 | assert ninja 81 | 82 | #info "Begin build docker" 83 | #cd $DIR/PureFlash/docker 84 | #assert ./build-docker.sh $DIR/jconductor $DIR/PureFlash/build $DIR/qemu/build $DIR/fio -------------------------------------------------------------------------------- /docker/build-docker.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | function fatal { 3 | echo -e "\033[31m$* \033[0m" 4 | exit 1 5 | } 6 | function assert() 7 | { 8 | local cmd=$* 9 | echo "Run:$cmd" > /dev/stderr 10 | eval '${cmd}' 11 | if [ $? -ne 0 ]; then 12 | fatal "Failed to run:$cmd" 13 | fi 14 | } 15 | COND_HOME=$1 16 | PFS_BUILD=$2 17 | QEMU_BUILD=$3 18 | FIO_BUILD=$4 19 | 20 | if [[ "$COND_HOME" == "" || "$PFS_BUILD" == "" || "$QEMU_BUILD" == "" || "$FIO_BUILD" == "" ]]; then 21 | echo "Usage: build-docker.sh " 22 | exit 1; 23 | fi 24 | 25 | #COND_HOME=/root/v2/jconductor 26 | #PFS_BUILD=/root/v2/ViveNAS/PureFlash/build 27 | 28 | rm -rf jconductor/com 29 | mkdir jconductor 30 | assert cp -rp $COND_HOME/pfconductor.jar jconductor/ 31 | assert cp -rp $COND_HOME/lib jconductor/ 32 | 33 | assert tar xzf $COND_HOME/res/apache-zookeeper-3.5.9-bin.tar.gz 34 | assert mv apache-zookeeper-3.5.9-bin/conf/zoo_sample.cfg apache-zookeeper-3.5.9-bin/conf/zoo.cfg 35 | assert cp -rp $COND_HOME/res/init_s5metadb.sql mariadb/ 36 | assert cp -rp $COND_HOME/pfcli . 37 | assert cp -rp $PFS_BUILD/bin/pfs . 38 | assert cp -rp $PFS_BUILD/bin/pfdd . 39 | #assert cp -rp $QEMU_BUILD/qemu-img . 40 | assert cp -rp $FIO_BUILD/fio . 41 | 42 | PUREFLASH_HOME=../ 43 | assert cp -f $PUREFLASH_HOME/thirdParty/spdk/dpdk/build/lib/librte_eal.so.23 . 44 | assert cp -f $PUREFLASH_HOME/thirdParty/spdk/dpdk/build/lib/librte_mempool.so.23 . 45 | assert cp -f $PUREFLASH_HOME/thirdParty/spdk/dpdk/build/lib/librte_ring.so.23 . 46 | assert cp -f $PUREFLASH_HOME/thirdParty/spdk/dpdk/build/lib/librte_bus_pci.so.23 . 47 | assert cp -f $PUREFLASH_HOME/thirdParty/spdk/dpdk/build/lib/librte_kvargs.so.23 . 48 | assert cp -f $PUREFLASH_HOME/thirdParty/spdk/dpdk/build/lib/librte_telemetry.so.23 . 49 | assert cp -f $PUREFLASH_HOME/thirdParty/spdk/dpdk/build/lib/librte_pci.so.23 . 50 | 51 | 52 | docker build -f Dockerfile -t pureflash/pureflash:1.9.1 . 53 | -------------------------------------------------------------------------------- /docker/conf/pf.conf: -------------------------------------------------------------------------------- 1 | [cluster] 2 | name=cluster1 3 | [zookeeper] 4 | ip=172.21.0.13:2181 5 | [conductor] 6 | mngt_ip=172.21.0.13 7 | [db] 8 | ip=127.0.0.1 9 | user=pureflash 10 | pass=123456 11 | db_name=s5 12 | [client] 13 | conn_type=tcp 14 | -------------------------------------------------------------------------------- /docker/conf/pfc.conf: -------------------------------------------------------------------------------- 1 | [cluster] 2 | name=cluster1 3 | [zookeeper] 4 | ip=127.0.0.1:2181 5 | [conductor] 6 | mngt_ip=127.0.0.1 7 | [db] 8 | ip=127.0.0.1 9 | user=pureflash 10 | pass=123456 11 | db_name=s5 12 | -------------------------------------------------------------------------------- /docker/conf/pfs.conf: -------------------------------------------------------------------------------- 1 | [cluster] 2 | name=cluster1 3 | [zookeeper] 4 | ip=127.0.0.1:2181 5 | 6 | [afs] 7 | mngt_ip=127.0.0.1 8 | id=1 9 | meta_size=10737418240 10 | [engine] 11 | name=aio 12 | #__TRAY_PLACEHOLDER__ 13 | #[tray.0] 14 | # dev = /opt/pureflash/disk1.dat # path of physical flash device 15 | [port.0] 16 | ip=127.0.0.1 17 | [rep_port.0] 18 | ip=127.0.0.1 19 | [tcp_server] 20 | poller_count=8 21 | [replicator] 22 | conn_type=tcp 23 | count=4 24 | -------------------------------------------------------------------------------- /docker/mariadb/mariadb.cnf: -------------------------------------------------------------------------------- 1 | # The MariaDB configuration file 2 | # 3 | # The MariaDB/MySQL tools read configuration files in the following order: 4 | # 0. "/etc/mysql/my.cnf" symlinks to this file, reason why all the rest is read. 5 | # 1. "/etc/mysql/mariadb.cnf" (this file) to set global defaults, 6 | # 2. "/etc/mysql/conf.d/*.cnf" to set global options. 7 | # 3. "/etc/mysql/mariadb.conf.d/*.cnf" to set MariaDB-only options. 8 | # 4. "~/.my.cnf" to set user-specific options. 9 | # 10 | # If the same option is defined multiple times, the last one will apply. 11 | # 12 | # One can use all long options that the program supports. 13 | # Run program with --help to get a list of available options and with 14 | # --print-defaults to see which it would actually understand and use. 15 | # 16 | # If you are new to MariaDB, check out https://mariadb.com/kb/en/basic-mariadb-articles/ 17 | 18 | # 19 | # This group is read both by the client and the server 20 | # use it for options that affect everything 21 | # 22 | [client-server] 23 | # Port or socket location where to connect 24 | port = 3306 25 | socket = /run/mysqld/mysqld.sock 26 | 27 | # Import all .cnf files from configuration directory 28 | !includedir /etc/mysql/conf.d/ 29 | !includedir /etc/mysql/mariadb.conf.d/ 30 | -------------------------------------------------------------------------------- /docker/restart-pfc.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -m 3 | 4 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 5 | 6 | #JAVA_HOME=/opt/pureflash/jdk-17.0.6 7 | export PATH=/opt/pureflash:$PATH 8 | 9 | OLD_PID=$(ps -f |grep jconductor |grep java|awk '{print $2}') 10 | if [ "$OLD_PID" != "" ]; then 11 | echo "OLD_PID:$OLD_PID " 12 | kill -2 $OLD_PID 13 | fi 14 | 15 | echo "Restart PureFlash jconductor..." 16 | JCROOT=$DIR/jconductor 17 | nohup java -classpath $JCROOT/pfconductor.jar:$JCROOT/lib/* \ 18 | -Dorg.slf4j.simpleLogger.showDateTime=true \ 19 | -Dorg.slf4j.simpleLogger.dateTimeFormat="[yyyy/MM/dd H:mm:ss.SSS]" \ 20 | -XX:+HeapDumpOnOutOfMemoryError \ 21 | -Xmx2G \ 22 | com.netbric.s5.conductor.Main -c /etc/pureflash/pfc.conf > /var/log/pfc.log 2>&1 & 23 | status=$? 24 | if [ $status -ne 0 ]; then 25 | echo "Failed to start jconductor: $status" 26 | exit $status 27 | fi 28 | 29 | 30 | -------------------------------------------------------------------------------- /docker/restart-pfs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -m 3 | 4 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 5 | 6 | JAVA_HOME=/usr/lib/jvm/jdk-15/ 7 | export PATH=/opt/pureflash:$JAVA_HOME/bin:$PATH 8 | 9 | 10 | OLD_PID=$(pidof pfs) 11 | if [ "$OLD_PID" != "" ]; then 12 | echo "OLD_PID:$OLD_PID " 13 | kill -2 $OLD_PID 14 | fi 15 | 16 | ulimit -c unlimited 17 | echo "/var/crash/core-%p-%e-%t" > /proc/sys/kernel/core_pattern 18 | 19 | export LD_LIBRARY_PATH=$DIR:$LD_LIBRARY_PATH 20 | echo "Restart PureFlash store..." 21 | nohup $DIR/pfs -c /etc/pureflash/pfs.conf > /var/log/pfs.log 2>&1 & 22 | status=$? 23 | if [ $status -ne 0 ]; then 24 | echo "Failed to start pfs: $status" 25 | exit $status 26 | fi 27 | 28 | -------------------------------------------------------------------------------- /docker/run-all.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -m 3 | 4 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 5 | 6 | #JAVA_HOME=/opt/pureflash/jdk-17.0.6 7 | #export PATH=/opt/pureflash:$JAVA_HOME/bin:$PATH 8 | export PATH=/opt/pureflash:$PATH 9 | 10 | echo "Start MariaDB..." 11 | mysql_install_db --user=mysql --ldata=/var/lib/mysql 12 | /usr/bin/mysqld_safe --user=mysql --datadir='/var/lib/mysql' --log-error=/var/log/mysql/error.log & 13 | status=$? 14 | if [ $status -ne 0 ]; then 15 | echo "Failed to start mysql: $status" 16 | exit $status 17 | fi 18 | if [ "$PFS_DISKS" != "" ]; then 19 | echo "Use disk $PFS_DISKS specified from environment variable PFS_DISKS"; 20 | else 21 | echo "Use data file /opt/pureflash/disk1.dat as disk, only for testing" 22 | if [ ! -f /opt/pureflash/disk1.dat ]; then 23 | echo "Create disk file ..." 24 | truncate -s 20G /opt/pureflash/disk1.dat 25 | fi 26 | export PFS_DISKS="/opt/pureflash/disk1.dat" 27 | fi 28 | 29 | i=0 30 | for d in ${PFS_DISKS//,/ }; do 31 | sed -i "/__TRAY_PLACEHOLDER__/i [tray.$i]\n\tdev = $d" /etc/pureflash/pfs.conf 32 | i=$((i+1)) 33 | done 34 | sed -i "/__TRAY_PLACEHOLDER__/d" /etc/pureflash/pfs.conf 35 | 36 | echo "Waiting mysql start ..." 37 | sleep 3 38 | if ! mysql -e "use s5" ; then 39 | echo "initialize database s5 ..." 40 | mysql -e "source /opt/pureflash/mariadb/init_s5metadb.sql" 41 | mysql -e "GRANT ALL PRIVILEGES ON *.* TO 'pureflash'@'%' IDENTIFIED BY '123456'" 42 | fi 43 | 44 | 45 | echo "Start Zookeeper..." 46 | ZK_HOME=$DIR/apache-zookeeper-3.5.9-bin 47 | $ZK_HOME/bin/zkServer.sh start 48 | status=$? 49 | if [ $status -ne 0 ]; then 50 | echo "Failed to start zookeeper: $status" 51 | exit $status 52 | fi 53 | sleep 2 54 | while ! lsof -i -P -n | grep 2181 ; do echo waiting zk; sleep 1; done 55 | echo "Start PureFlash jconductor..." 56 | JCROOT=$DIR/jconductor 57 | java -classpath $JCROOT/pfconductor.jar:$JCROOT/lib/* \ 58 | -Dorg.slf4j.simpleLogger.showDateTime=true \ 59 | -Dorg.slf4j.simpleLogger.dateTimeFormat="[yyyy/MM/dd H:mm:ss.SSS]" \ 60 | -XX:+HeapDumpOnOutOfMemoryError \ 61 | -Xmx2G \ 62 | com.netbric.s5.conductor.Main -c /etc/pureflash/pfc.conf &> /var/log/pfc.log & 63 | status=$? 64 | if [ $status -ne 0 ]; then 65 | echo "Failed to start jconductor: $status" 66 | exit $status 67 | fi 68 | sleep 3 69 | ulimit -c unlimited 70 | echo "/var/crash/core-%p-%e-%t" > /proc/sys/kernel/core_pattern 71 | 72 | export LD_LIBRARY_PATH=$DIR:$LD_LIBRARY_PATH 73 | echo "Start PureFlash store..." 74 | $DIR/pfs -c /etc/pureflash/pfs.conf &> /var/log/pfs.log & 75 | status=$? 76 | if [ $status -ne 0 ]; then 77 | echo "Failed to start pfs: $status" 78 | exit $status 79 | fi 80 | 81 | echo -n "Waiting disk to be initialized for first run, this may take 60 seconds or longer accord to you disk ..." 82 | while ! pfcli list_disk &>/dev/null ; do 83 | sleep 1 84 | echo -n "." 85 | done 86 | echo "Disk ready" 87 | 88 | echo "Welcome to PureFlash(https://github.com/cocalele/PureFlash) all-in-one box!" 89 | cd 90 | if [ "$NOBASH" == "" ] ; then 91 | bash 92 | fi 93 | 94 | 95 | -------------------------------------------------------------------------------- /docker/sources.list: -------------------------------------------------------------------------------- 1 | deb http://mirrors.aliyun.com/ubuntu/ focal main restricted 2 | deb http://mirrors.aliyun.com/ubuntu/ focal-updates main restricted 3 | deb http://mirrors.aliyun.com/ubuntu/ focal universe 4 | deb http://mirrors.aliyun.com/ubuntu/ focal-updates universe 5 | deb http://mirrors.aliyun.com/ubuntu/ focal multiverse 6 | deb http://mirrors.aliyun.com/ubuntu/ focal-updates multiverse 7 | deb http://mirrors.aliyun.com/ubuntu/ focal-backports main restricted universe multiverse 8 | deb http://mirrors.aliyun.com/ubuntu/ focal-security main restricted 9 | deb http://mirrors.aliyun.com/ubuntu/ focal-security universe 10 | deb http://mirrors.aliyun.com/ubuntu/ focal-security multiverse 11 | -------------------------------------------------------------------------------- /docker/zoo.cfg: -------------------------------------------------------------------------------- 1 | # The number of milliseconds of each tick 2 | tickTime=500 3 | # The number of ticks that the initial 4 | # synchronization phase can take 5 | initLimit=10 6 | # The number of ticks that can pass between 7 | # sending a request and getting an acknowledgement 8 | syncLimit=5 9 | # the directory where the snapshot is stored. 10 | # do not use /tmp for storage, /tmp here is just 11 | # example sakes. 12 | dataDir=/tmp/zookeeper 13 | # the port at which the clients will connect 14 | clientPort=2181 15 | # the maximum number of client connections. 16 | # increase this if you need to handle more clients 17 | #maxClientCnxns=60 18 | # 19 | # Be sure to read the maintenance section of the 20 | # administrator guide before turning on autopurge. 21 | # 22 | # http://zookeeper.apache.org/doc/current/zookeeperAdmin.html#sc_maintenance 23 | # 24 | # The number of snapshots to retain in dataDir 25 | #autopurge.snapRetainCount=3 26 | # Purge task interval in hours 27 | # Set to "0" to disable auto purge feature 28 | #autopurge.purgeInterval=1 29 | 30 | -------------------------------------------------------------------------------- /docs/aof.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cocalele/PureFlash/79d05bec816f7cac34cc7d3f33cbc66787fbf3d8/docs/aof.md -------------------------------------------------------------------------------- /docs/design_ref_cnt.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cocalele/PureFlash/79d05bec816f7cac34cc7d3f33cbc66787fbf3d8/docs/design_ref_cnt.md -------------------------------------------------------------------------------- /docs/heart_beat_design.md: -------------------------------------------------------------------------------- 1 | # 心跳机制 2 | 3 | 4 | ## 设计和实现 5 | - [x] 建立连接时(TCP或RDMA), 记录连接信息到服务端(PfRdmaServer或PfTcpServer)的客户端连接MAP记录中(client_ip_conn_map) 6 | - [] 通过连接下发IO时, 在IO完成时, 更新连接上的最后通信时间 7 | - [x] 开启线程, 检测记录中的连接(目前仅打印) 8 | - [] 如果连接状态正常,且最后通信时间与当前时间相隔一定的时间后, 触发心跳发送 9 | - [] TCP和RDMA实现心跳发送的事件类型 10 | - [] 如果重发心跳一定的次数内, 都失败了, 说明网络异常了, 则关闭连接 11 | 12 | 13 | ## 详情 14 | ### TCP 15 | - 服务端在接受连接成功时记录客户端连接信息(PfTcpServer::accept_connection()) 16 | - 在(int PfTcpServer::init())中开启连接检查线程 17 | 18 | 19 | ### RDMA 20 | 服务端在接受连接成功时记录客户端连接信息(on_connect_request) 21 | 22 | 23 | -------------------------------------------------------------------------------- /docs/images/arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cocalele/PureFlash/79d05bec816f7cac34cc7d3f33cbc66787fbf3d8/docs/images/arch.png -------------------------------------------------------------------------------- /docs/testoutline.md: -------------------------------------------------------------------------------- 1 | # Test ouline of PureFlash 2 | 3 | ## 1. basic IO 4 | 5 | ## 2. cluster management 6 | 7 | ## 3. Snapshot 8 | - create snapshot when volume in degraded state, 9 | point 1: some node in OFFLINE state, can jconductor handle `set_snapseq` failure correctly ? 10 | - CoW fail in cow reading, 11 | ``` 12 | app_context.error_handler->submit_error(t, PfMessageStatus::MSG_STATUS_AIOERROR); 13 | ``` 14 | this line may fail since t is not a fully initialized SubTask 15 | 16 | ## 4. redolog test 17 | -------------------------------------------------------------------------------- /pfs/include/pf_adaptor.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2016 Liu Lele(liu_lele@126.com) 3 | * 4 | * This code is licensed under the GPL. 5 | */ 6 | /** 7 | * Copyright (C), 2014-2015. 8 | * @file 9 | * The common definition of s5afs 10 | */ 11 | 12 | #ifndef _ADAPTOR_H_ 13 | #define _ADAPTOR_H_ 14 | 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | 24 | /** 25 | * brief Define the size of one node in S5afs 26 | * 27 | * Defines the size of one node in S5afs, which is measured in MB. 28 | * This Macro is used for computing available capacity. 29 | */ 30 | #define NODE_SIZE_IN_MB 4 31 | 32 | #define HASH_BUCKET_COUNT 10 33 | #define HASH_NODE_COUNT 128 34 | 35 | /** 36 | * brief Macro pthread_nutex_t 37 | * 38 | * Declare the afs lock 39 | */ 40 | #define afs_lock_mutex pthread_mutex_t 41 | 42 | /** 43 | * brief Macro initialize afs lock 44 | */ 45 | #define afs_lock_init(lock) pthread_mutex_init(lock, NULL) 46 | 47 | /** 48 | * brief Macro lock afs mutex 49 | */ 50 | #define afs_lock(lock) pthread_mutex_lock(lock) 51 | 52 | /** 53 | * brief Macro unlock afs mutex 54 | */ 55 | #define afs_unlock(lock) pthread_mutex_unlock(lock) 56 | 57 | #endif /* _ADAPTOR_H_ */ 58 | 59 | -------------------------------------------------------------------------------- /pfs/include/pf_atslock.h: -------------------------------------------------------------------------------- 1 | #ifndef pf_atslock_h__ 2 | #define pf_atslock_h__ 3 | 4 | int pf_ats_lock(int devfd, int64_t lock_location); 5 | 6 | int pf_ats_unlock(int devfd, int64_t lock_location); 7 | #endif // pf_atslock_h__ 8 | -------------------------------------------------------------------------------- /pfs/include/pf_bgtask_manager.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2016 Liu Lele(liu_lele@126.com) 3 | * 4 | * This code is licensed under the GPL. 5 | */ 6 | // 7 | // Created by liu_l on 10/24/2020. 8 | // 9 | 10 | #ifndef PUREFLASH_PF_BGTASK_MANAGER_H 11 | #define PUREFLASH_PF_BGTASK_MANAGER_H 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | #include "pf_threadpool.h" 20 | 21 | enum TaskType {RECOVERY, SCRUB, GC}; 22 | enum TaskStatus{WAITING, RUNNING, SUCCEEDED, FAILED }; 23 | const char* TaskStatusToStr(TaskStatus s); 24 | 25 | class RestfulReply; 26 | class BackgroundTask; 27 | 28 | typedef std::function TaskExecutor; 29 | class BackgroundTask{ 30 | public: 31 | int64_t id; 32 | TaskType type; 33 | std::string desc; 34 | TaskStatus status; 35 | std::time_t start_time; 36 | std::time_t finish_time; 37 | RestfulReply* result; 38 | 39 | void* arg; 40 | TaskExecutor exec; 41 | }; 42 | class BackgroundTaskManager { 43 | public: 44 | long id_seed; 45 | std::unordered_map task_map; 46 | ThreadPool recovery_thread_pool; 47 | BackgroundTaskManager():id_seed(0), recovery_thread_pool(1) 48 | {} 49 | 50 | BackgroundTask* initiate_task(TaskType type, std::string desc, TaskExecutor exe, void* arg); 51 | void commit_task(BackgroundTask*); 52 | }; 53 | 54 | #endif //PUREFLASH_PF_BGTASK_MANAGER_H 55 | -------------------------------------------------------------------------------- /pfs/include/pf_bitmap.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2016 Liu Lele(liu_lele@126.com) 3 | * 4 | * This code is licensed under the GPL. 5 | */ 6 | // 7 | // Created by liu_l on 10/18/2020. 8 | // 9 | 10 | #ifndef PUREFLASH_PF_BITMAP_H 11 | #define PUREFLASH_PF_BITMAP_H 12 | #include 13 | #include 14 | #include 15 | 16 | class PfBitmap { 17 | public: 18 | uint64_t *bits_data; 19 | int bit_count; 20 | PfBitmap(int bit_count) { 21 | this->bit_count = bit_count; 22 | bits_data = (uint64_t*)calloc(bit_count/8/sizeof(uint64_t)+1, sizeof(uint64_t)); 23 | } 24 | ~PfBitmap() { 25 | free(bits_data); 26 | bits_data=NULL; 27 | } 28 | inline __attribute__((always_inline)) void set_bit(int index) { 29 | bits_data[index/(8*sizeof(uint64_t))] |= (1UL << (index%(8*sizeof(uint64_t)))); 30 | } 31 | inline bool is_empty() { 32 | int len = (int)(bit_count/8/sizeof(uint64_t)+1); 33 | for(int i=0;i 15 | #include "pf_dispatcher.h" 16 | #include "pf_restful_api.h" 17 | 18 | class RestfulReply; 19 | class PfErrorHandler : public PfEventThread { 20 | public: 21 | int submit_error(IoSubTask* t, PfMessageStatus sc); 22 | //int submit_error(PfServerIocb* io, uint64_t rep_id, PfMessageStatus sc); 23 | PfErrorHandler(); 24 | int report_error_to_conductor(uint64_t rep_id, int sc, ErrorReportReply& reply); 25 | 26 | virtual int process_event(int event_type, int arg_i, void* arg_p, void* arg_q) override; 27 | std::string zk_ip; 28 | std::string cluster_name; 29 | int http_timeout; 30 | std::string conductor_ip; 31 | 32 | }; 33 | 34 | 35 | #endif //PUREFLASH_PFERRORHANDLER_H 36 | -------------------------------------------------------------------------------- /pfs/include/pf_iouring_engine.h: -------------------------------------------------------------------------------- 1 | #ifndef pf_iouring_engine_h__ 2 | #define pf_iouring_engine_h__ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include 10 | #include "pf_ioengine.h" 11 | 12 | class PfIouringEngine : public PfIoEngine 13 | { 14 | int fd; 15 | struct io_uring uring; 16 | int seg_cnt_per_dispatcher; 17 | public: 18 | PfIouringEngine(const char* name, int _fd) :PfIoEngine(name), fd(_fd) {}; 19 | int init(); 20 | int submit_io(struct IoSubTask* io, int64_t media_offset, int64_t media_len); 21 | int submit_cow_io(struct CowTask* io, int64_t media_offset, int64_t media_len); 22 | std::thread iouring_poller; 23 | void polling_proc(); 24 | 25 | uint64_t sync_read(void* buffer, uint64_t buf_size, uint64_t offset); 26 | uint64_t sync_write(void* buffer, uint64_t buf_size, uint64_t offset); 27 | uint64_t get_device_cap(); 28 | //int poll_io(int *completions); 29 | }; 30 | #endif // pf_iouring_engine_h__ -------------------------------------------------------------------------------- /pfs/include/pf_main.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2016 Liu Lele(liu_lele@126.com) 3 | * 4 | * This code is licensed under the GPL. 5 | */ 6 | /** 7 | * Copyright (C), 2014-2015. 8 | * @file 9 | * 10 | * This file defines the data structure: toedaemon, and defines its initialization and release func. 11 | */ 12 | 13 | #ifndef AFS_MAIN_H 14 | #define AFS_MAIN_H 15 | 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | 25 | #include "pf_zk_client.h" 26 | #include "pf_app_ctx.h" 27 | #include "pf_flash_store.h" 28 | #include "pf_replicator.h" 29 | #include "pf_dispatcher.h" 30 | #include "pf_error_handler.h" 31 | #include "pf_bgtask_manager.h" 32 | 33 | 34 | class PfTcpServer; 35 | class PfRdmaServer; 36 | 37 | #define MAX_TRAY_COUNT 32 38 | #define MAX_PORT_COUNT 4 39 | #define MAX_DISPATCHER_COUNT 10 40 | #define MAX_REPLICATOR_COUNT 10 41 | #define IO_POOL_SIZE 1024 42 | //#define IO_POOL_SIZE 128 43 | 44 | #define DATA_PORT 0 45 | #define REP_PORT 1 46 | 47 | //STATIC_ASSERT(DEFAULT_OBJ_SIZE == (1< client_ip_conn_map; 56 | std::mutex conn_map_lock; 57 | 58 | std::string mngt_ip; 59 | int store_id; 60 | PfZkClient zk_client; 61 | int64_t meta_size; 62 | int rep_conn_type; 63 | 64 | PfTcpServer* tcp_server; 65 | PfRdmaServer* rdma_server; 66 | std::vector trays; 67 | std::vector disps; 68 | std::vector replicators; 69 | 70 | //int dis_index; 71 | 72 | pthread_mutex_t lock; 73 | std::map opened_volumes; 74 | PfErrorHandler* error_handler; 75 | 76 | 77 | BigMemPool recovery_buf_pool; 78 | BufferPool recovery_io_bd_pool; 79 | 80 | BackgroundTaskManager bg_task_mgr; 81 | int next_client_disp_id; //to assign shared client connection to dispatcher 82 | std::thread cron_thread; 83 | 84 | PfVolume* get_opened_volume(uint64_t vol_id); 85 | int get_ssd_index(std::string ssd_uuid); 86 | int PfRdmaRegisterMr(struct PfRdmaDevContext *dev_ctx); 87 | void PfRdmaUnRegisterMr(); 88 | PfAfsAppContext(); 89 | 90 | PfDispatcher *get_dispatcher(); 91 | void remove_connection(PfConnection* _conn); 92 | void add_connection(PfConnection* _conn); 93 | 94 | int next_shard_replicator_id; //to assign volume shard to replicator 95 | PfReplicator *get_replicator(); 96 | }; 97 | extern PfAfsAppContext app_context; 98 | #endif 99 | 100 | -------------------------------------------------------------------------------- /pfs/include/pf_md5.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2016 Liu Lele(liu_lele@126.com) 3 | * 4 | * This code is licensed under the GPL. 5 | */ 6 | #ifndef pf_md5_h__ 7 | #define pf_md5_h__ 8 | #include 9 | #include "pf_utils.h" 10 | #include "pf_tray.h" 11 | #include "pf_ioengine.h" 12 | #include "isa-l_crypto/md5_mb.h" 13 | 14 | class MD5_CTX; 15 | typedef int dev_handle_t; 16 | 17 | class MD5Stream 18 | { 19 | public: 20 | union { 21 | int fd; 22 | struct { 23 | PfIoEngine *ioengine; 24 | } nvme; 25 | }; 26 | off_t base_offset; 27 | char* buffer; 28 | bool spdk_engine; 29 | size_t data_len; 30 | public: 31 | MD5Stream(int fd); 32 | ~MD5Stream(); 33 | void spdk_eng_init(PfIoEngine *eng); 34 | int alloc_buffer(); 35 | void destroy(); 36 | void reset(off_t offset); 37 | int read(void *buf, size_t count, off_t offset); 38 | int write(void *buf, size_t count, off_t offset); 39 | virtual int write_calc(void *buf, size_t count, off_t offset) = 0; 40 | virtual int read_calc(void *buf, size_t count, off_t offset) = 0; 41 | virtual int finalize(char *result, int is_read) = 0; 42 | }; 43 | 44 | class MD5Stream_ISA_L : public MD5Stream 45 | { 46 | public: 47 | MD5Stream_ISA_L(int fd):MD5Stream(fd){}; 48 | MD5_HASH_CTX ctxpool; 49 | MD5_HASH_CTX_MGR *mgr; 50 | int init(); 51 | int write_calc(void *buf, size_t count, off_t offset); 52 | int read_calc(void *buf, size_t count, off_t offset); 53 | int finalize(char *result, int is_read); 54 | }; 55 | #endif // pf_md5_h__ 56 | -------------------------------------------------------------------------------- /pfs/include/pf_redolog.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2016 Liu Lele(liu_lele@126.com) 3 | * 4 | * This code is licensed under the GPL. 5 | */ 6 | #ifndef pf_redolog_h__ 7 | #define pf_redolog_h__ 8 | #include 9 | #include 10 | 11 | #include "pf_flash_store.h" 12 | #include "pf_tray.h" 13 | 14 | class PfRedoLog 15 | { 16 | enum class ItemType : uint32_t { 17 | ALLOCATE_OBJ = 1, 18 | TRIM_OBJ = 2, 19 | FREE_OBJ = 3, 20 | SNAP_SEQ_CHANGE = 4, 21 | STATUS_CHANGE = 5, 22 | 23 | _REDO_ITEM_TYPE_COUNT_ 24 | }; 25 | 26 | struct Item{ 27 | int64_t phase; 28 | ItemType type; 29 | union { 30 | struct { 31 | struct lmt_key bkey; 32 | struct lmt_entry bentry; 33 | int free_queue_head; 34 | } allocation; 35 | struct { 36 | struct lmt_key bkey; 37 | struct lmt_entry bentry; 38 | int trim_queue_tail; 39 | }trim; 40 | struct { 41 | int obj_id; 42 | int trim_queue_head; 43 | int free_queue_tail; 44 | }free; 45 | struct { 46 | struct lmt_key bkey; 47 | struct lmt_entry bentry; 48 | uint32_t old_snap_seq; 49 | }snap_seq_change; 50 | struct { 51 | struct lmt_key bkey; 52 | struct lmt_entry bentry; 53 | int old_status; 54 | }state_change; 55 | 56 | }; 57 | }; 58 | 59 | public: 60 | int disk_fd; 61 | int64_t phase; 62 | size_t size; 63 | struct PfFlashStore* store; 64 | off_t start_offset; 65 | off_t current_offset; 66 | void* entry_buff; 67 | std::thread auto_save_thread; 68 | 69 | int init(struct PfFlashStore* s); 70 | int set_log_phase(int64_t phase, uint64_t offset); 71 | int replay(int64_t start_phase, int which); 72 | int discard(); 73 | int log_allocation(const struct lmt_key* key, const struct lmt_entry* entry, int free_list_head); 74 | int log_free(int block_id, int trim_list_head, int free_list_tail); 75 | int log_trim(const struct lmt_key* key, const struct lmt_entry* entry, int trim_list_tail); 76 | int log_status_change(const lmt_key* key, const lmt_entry* entry, EntryStatus old_state); 77 | 78 | int redo_allocation(Item* e); 79 | int redo_trim(Item* e); 80 | int redo_free(Item* e); 81 | int log_snap_seq_change(const struct lmt_key* key, const struct lmt_entry* entry, int old_seq); 82 | int redo_snap_seq_change(PfRedoLog::Item* e); 83 | int redo_state_change(PfRedoLog::Item* e); 84 | int stop(); 85 | int start(); 86 | private: 87 | int write_entry(); 88 | }; 89 | 90 | #endif // pf_redolog_h__ 91 | -------------------------------------------------------------------------------- /pfs/include/pf_replica.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2016 Liu Lele(liu_lele@126.com) 3 | * 4 | * This code is licensed under the GPL. 5 | */ 6 | #ifndef pf_replica_h__ 7 | #define pf_replica_h__ 8 | 9 | #include "pf_volume.h" 10 | #include "pf_replicator.h" 11 | 12 | class IoSubTask; 13 | class PfFlashStore; 14 | 15 | 16 | class PfLocalReplica : public PfReplica 17 | { 18 | public: 19 | virtual int submit_io(IoSubTask* subtask); 20 | public: 21 | PfFlashStore* disk; 22 | }; 23 | 24 | class PfSyncRemoteReplica : public PfReplica 25 | { 26 | public: 27 | virtual int submit_io(IoSubTask* subtask); 28 | public: 29 | PfReplicator* replicator; 30 | }; 31 | 32 | #endif // pf_replica_h__ -------------------------------------------------------------------------------- /pfs/include/pf_replicator.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2016 Liu Lele(liu_lele@126.com) 3 | * 4 | * This code is licensed under the GPL. 5 | */ 6 | #ifndef pf_replicator_h__ 7 | #define pf_replicator_h__ 8 | #include 9 | 10 | #include "pf_poller.h" 11 | #include "pf_connection_pool.h" 12 | #include "pf_client_priv.h" 13 | 14 | class RecoverySubTask; 15 | class PfDelayThread : public PfEventThread 16 | { 17 | 18 | public: 19 | //PfDelayThread(PfReplicator* r) { replicator = r; } 20 | PfReplicator* replicator; 21 | int process_event(int event_type, int arg_i, void* arg_p, void* arg_q); 22 | }; 23 | 24 | class PfReplicator : public PfEventThread 25 | { 26 | class PeerAddr 27 | { 28 | public: 29 | int store_id; 30 | PfConnection *conn; 31 | int curr_ip_idx; 32 | std::vector ip; 33 | }; 34 | class PfRepConnectionPool : public PfConnectionPool 35 | { 36 | public: 37 | std::unordered_map peers; 38 | void add_peer(int store_id, std::string ip1, std::string ip2); 39 | void connect_peer(int store_id); 40 | PfConnection* get_conn(int store_id); 41 | }; 42 | 43 | public: 44 | int init(int index, uint16_t* p_id); 45 | int process_event(int event_type, int arg_i, void* arg_p, void* arg_q); 46 | int begin_replicate_io(IoSubTask* t); 47 | int begin_recovery_read_io(RecoverySubTask* t); 48 | inline PfClientIocb* pick_iocb(uint16_t cid, uint32_t cmd_seq){ 49 | //TODO: check cmd_seq 50 | return &iocb_pool.data[cid]; 51 | } 52 | int process_io_complete(PfClientIocb* iocb, int _complete_status); 53 | int handle_conn_close(PfConnection* c); 54 | 55 | int rep_index; 56 | 57 | PfPoller *tcp_poller; 58 | //PfPoller *rdma_poller; 59 | PfRepConnectionPool *conn_pool; 60 | ObjectMemoryPool iocb_pool; 61 | struct replicator_mem_pool mem_pool; 62 | 63 | PfDelayThread delay_thread; 64 | }; 65 | #endif // pf_replicator_h__ 66 | -------------------------------------------------------------------------------- /pfs/include/pf_request.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2016 Liu Lele(liu_lele@126.com) 3 | * 4 | * This code is licensed under the GPL. 5 | */ 6 | /** 7 | * Copyright (C), 2014-2015. 8 | * @file 9 | * This file defines the apis to handle requests. 10 | */ 11 | 12 | #ifndef __TOEDAEMON_CACHE_MGR__ 13 | #define __TOEDAEMON_CACHE_MGR__ 14 | 15 | 16 | #include "pf_adaptor.h" 17 | #include "s5message.h" 18 | struct toedaemon; 19 | /** 20 | * Initialize cache management data for S5afs 21 | * 22 | * Initialize afsc_st, register spy variables. 23 | * 24 | * @param[in] tray_set_count int32_t,the total number of tray_set 25 | * 26 | */ 27 | void register_spy_variables(); 28 | void unregister_spy_variables(); 29 | 30 | /* Release cache management data for S5afs 31 | * 32 | * Release afsc_st, and unregister spy variables. 33 | * 34 | * @return No return. 35 | */ 36 | void cachemgr_release(); 37 | 38 | /** 39 | * The function to handle write request. 40 | * 41 | * @param[in] msg The write request from client. 42 | * @param[in] socket The client socket, used for replying msg. 43 | * @return The length of real write. 44 | * @retval >=0 The length of real write. 45 | * @retval -ENOENT No avaliable nodes in afs for write request. 46 | * @retval -ENOMEM No memory left to allocate variables. 47 | */ 48 | int cachemgr_write_request(pf_message_t *msg, PS5CLTSOCKET socket); 49 | 50 | /** 51 | * The function to handle read request. 52 | * 53 | * @param[in] msg The read request from client. 54 | * @param[in] socket The client socket, used for replying msg. 55 | * @return The length of real write. 56 | * @retval >=0 The length of real write. 57 | * @retval -ENOENT read data does not exist. 58 | * @retval -ENOMEM No memory left to allocate variables. 59 | */ 60 | int cachemgr_read_request(pf_message_t *msg, PS5CLTSOCKET socket); 61 | 62 | /** 63 | * The function to handle delete request. 64 | * 65 | * @param[in] msg The delete request from client. 66 | * @param[in] socket The client socket, used for replying msg. 67 | * @return 0 Success. 68 | */ 69 | int cachemgr_block_delete_request(pf_message_t *msg, PS5CLTSOCKET socket); 70 | 71 | 72 | int cachemgr_nic_client_info_request(pf_message_t *msg, PS5CLTSOCKET socket); 73 | int flash_store_config(struct toedaemon* toe_daemon, conf_file_t fp); 74 | 75 | 76 | #endif //__TOEDAEMON_CACHE_MGR__ 77 | 78 | -------------------------------------------------------------------------------- /pfs/include/pf_scrub.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2016 Liu Lele(liu_lele@126.com) 3 | * 4 | * This code is licensed under the GPL. 5 | */ 6 | // 7 | // Created by liu_l on 12/26/2020. 8 | // 9 | 10 | #ifndef PUREFLASH_PF_SCRUB_H 11 | #define PUREFLASH_PF_SCRUB_H 12 | 13 | #include 14 | #include "isa-l_crypto.h" 15 | #include "pf_volume.h" 16 | #include "pf_restful_api.h" 17 | 18 | #define DIGEST_NWORDS MD5_DIGEST_NWORDS 19 | #define MB_BUFS MD5_MAX_LANES 20 | #define HASH_CTX_MGR MD5_HASH_CTX_MGR 21 | #define HASH_CTX MD5_HASH_CTX 22 | 23 | #define OSSL_THREAD_FUNC md5_ossl_func 24 | #define OSSL_HASH_FUNC MD5 25 | #define MB_THREAD_FUNC md5_mb_func 26 | #define CTX_MGR_INIT md5_ctx_mgr_init 27 | #define CTX_MGR_SUBMIT md5_ctx_mgr_submit 28 | #define CTX_MGR_FLUSH md5_ctx_mgr_flush 29 | #define rounds_buf MD5_MAX_LANES 30 | 31 | class Scrub { 32 | public: 33 | Scrub() noexcept; 34 | ~Scrub(); 35 | int feed_data(void* buf, size_t len, size_t off); 36 | std::string cal_replica(PfFlashStore* s, replica_id_t rep_id); 37 | static int cal_object(PfFlashStore* s, replica_id_t rep_id, int64_t obj_idx, std::list& rst); 38 | private: 39 | HASH_CTX_MGR *mgr = NULL; 40 | HASH_CTX *ctxpool = NULL, *ctx = NULL; 41 | }; 42 | 43 | 44 | #endif //PUREFLASH_PF_SCRUB_H 45 | -------------------------------------------------------------------------------- /pfs/include/pf_server.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2016 Liu Lele(liu_lele@126.com) 3 | * 4 | * This code is licensed under the GPL. 5 | */ 6 | /** 7 | * Copyright (C), 2014-2015. 8 | * @file 9 | * 10 | * S5afs is short for S5 all flash simulator. It is used as the server role to receive the requests from S5 client 11 | * This file defines the apis to initialize/release this server, and the functions to handle request messages. 12 | */ 13 | 14 | #ifndef __S5D_SRV_TOE__ 15 | #define __S5D_SRV_TOE__ 16 | 17 | #include 18 | #include "pf_main.h" 19 | #include "pf_rdma_connection.h" 20 | #include 21 | #define TCP_PORT_BASE 49162 ///tray_name), ns(_ns) {}; 45 | int init(); 46 | int submit_io(struct IoSubTask* io, int64_t media_offset, int64_t media_len); 47 | static int poll_io(int* completions, void* arg); 48 | static void spdk_io_complete(void* ctx, const struct spdk_nvme_cpl* cpl); 49 | int submit_cow_io(struct CowTask* io, int64_t media_offset, int64_t media_len); 50 | static void spdk_cow_io_complete(void* ctx, const struct spdk_nvme_cpl* cpl); 51 | int submit_scc(uint64_t media_len, off_t src, off_t dest, void* (*scc_cb)(void* ctx), void* arg); 52 | static void scc_complete(void* arg, const struct spdk_nvme_cpl* cpl); 53 | 54 | uint64_t sync_write(void* buffer, uint64_t buf_size, uint64_t offset); 55 | uint64_t sync_read(void* buffer, uint64_t buf_size, uint64_t offset); 56 | uint64_t get_device_cap(); 57 | 58 | void spdk_nvme_disconnected_qpair_cb(struct spdk_nvme_qpair* qpair, void* poll_group_ctx); 59 | uint64_t spdk_nvme_bytes_to_blocks(uint64_t offset_bytes, uint64_t* offset_blocks, 60 | uint64_t num_bytes, uint64_t* num_blocks); 61 | int pf_spdk_io_channel_open(int num_qpairs); 62 | int pf_spdk_io_channel_close(struct pf_io_channel *pic); 63 | }; 64 | 65 | #endif // pf_spdk_engine_h__ -------------------------------------------------------------------------------- /pfs/include/pf_stat.h: -------------------------------------------------------------------------------- 1 | #ifndef pf_stat_h__ 2 | #define pf_stat_h__ 3 | 4 | struct DispatchStat 5 | { 6 | int64_t rd_cnt; 7 | int64_t wr_cnt; 8 | int64_t rep_wr_cnt; 9 | 10 | int64_t rd_bytes; 11 | int64_t wr_bytes; 12 | int64_t rep_wr_bytes; 13 | DispatchStat& operator+=(const DispatchStat& rhs) 14 | { 15 | this->rd_cnt += rhs.rd_cnt; 16 | this->wr_cnt += rhs.wr_cnt; 17 | this->rep_wr_cnt += rhs.rep_wr_cnt; 18 | 19 | this->rd_bytes += rhs.rd_bytes; 20 | this->wr_bytes += rhs.wr_bytes; 21 | this->rep_wr_bytes += rhs.rep_wr_bytes; 22 | return *this; // return the result by reference 23 | } 24 | }; 25 | 26 | 27 | #endif // pf_stat_h__ 28 | -------------------------------------------------------------------------------- /pfs/include/pf_sync_replicator.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2016 Liu Lele(liu_lele@126.com) 3 | * 4 | * This code is licensed under the GPL. 5 | */ 6 | 7 | -------------------------------------------------------------------------------- /pfs/include/pf_tcp_server.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2016 Liu Lele(liu_lele@126.com) 3 | * 4 | * This code is licensed under the GPL. 5 | */ 6 | 7 | -------------------------------------------------------------------------------- /pfs/include/pf_trace_defs.h: -------------------------------------------------------------------------------- 1 | #ifndef pf_trace_defs_h__ 2 | #define pf_trace_defs_h__ 3 | 4 | /* Owner definitions */ 5 | #define OWNER_PFS_SPDK_IO 0x2 6 | 7 | /* Object definitions */ 8 | #define OBJECT_SPDK_IO 0x2 9 | 10 | /* Trace group definitions */ 11 | #define TRACE_GROUP_SPDK 0x2 12 | 13 | /* Owner definitions */ 14 | #define OWNER_PFS_DISP_IO 0x3 15 | 16 | /* Object definitions */ 17 | #define OBJECT_DISP_IO 0x3 18 | 19 | /* Trace group definitions */ 20 | #define TRACE_GROUP_DISP 0x3 21 | 22 | /* spdk io tracepoint definitions */ 23 | #define TRACE_DISK_IO_STAT SPDK_TPOINT_ID(TRACE_GROUP_SPDK, 0x0) 24 | 25 | /* disp io tracepoint definitions */ 26 | #define TRACE_DISP_IO_STAT SPDK_TPOINT_ID(TRACE_GROUP_DISP, 0x0) 27 | #define TRACE_DISP_REP_IO_STAT SPDK_TPOINT_ID(TRACE_GROUP_DISP, 0x1) 28 | static inline uint64_t get_us_from_tsc(uint64_t tsc, uint64_t tsc_rate) 29 | { 30 | return tsc * 1000 * 1000 / tsc_rate; 31 | } 32 | 33 | #endif /* SPDK_INTERNAL_TRACE_DEFS */ 34 | -------------------------------------------------------------------------------- /pfs/include/pf_tray.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2016 Liu Lele(liu_lele@126.com) 3 | * 4 | * This code is licensed under the GPL. 5 | */ 6 | #ifndef __S5_TRAY_H__ 7 | #define __S5_TRAY_H__ 8 | 9 | #include 10 | 11 | /* This a base class for device operations like pread, prwite, aios. 12 | * The purpose of this class is to separate OS calls from main store class. 13 | */ 14 | 15 | class PfTray { 16 | public: 17 | virtual ~PfTray() {} 18 | virtual int init(const char *name) = 0; 19 | virtual void destroy() = 0; 20 | virtual int get_num_blocks(long *number) = 0; 21 | virtual ssize_t sync_read(void *buffer, size_t size, __off_t offset) = 0; 22 | virtual ssize_t sync_write(const void *buffer, size_t size, __off_t offset) = 0; 23 | virtual ssize_t async_read(void *buffer, size_t size, __off_t offset, void *callback) = 0; 24 | virtual ssize_t async_write(const void *buffer, size_t size, __off_t offset, void *callback) = 0; 25 | }; 26 | 27 | #endif /* __S5_DEVICE_H__ */ 28 | -------------------------------------------------------------------------------- /pfs/include/pf_volume.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2016 Liu Lele(liu_lele@126.com) 3 | * 4 | * This code is licensed under the GPL. 5 | */ 6 | #ifndef afs_volume_h__ 7 | #define afs_volume_h__ 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include "basetype.h" 15 | #include "pf_fixed_size_queue.h" 16 | #include "pf_volume_type.h" 17 | 18 | class IoSubTask; 19 | class PfFlashStore; 20 | class BufferDescriptor; 21 | 22 | //Replica represent a replica of shard 23 | class PfReplica 24 | { 25 | public: 26 | enum HealthStatus status; 27 | uint64_t id; 28 | uint64_t store_id; 29 | bool is_local; 30 | bool is_primary; 31 | int rep_index; 32 | int ssd_index; 33 | public: 34 | virtual ~PfReplica() {} //to avoid compile warning 35 | virtual int submit_io(IoSubTask* subtask) = 0; //override in pf_replica.h 36 | }; 37 | 38 | 39 | //Shard represent a shard of volume 40 | struct PfShard 41 | { 42 | uint64_t id; 43 | int shard_index; 44 | struct PfReplica* replicas[MAX_REP_COUNT]; 45 | int primary_replica_index; 46 | int duty_rep_index; //which replica the current store node is responsible for 47 | BOOL is_primary_node; //is current node the primary node of this shard 48 | int rep_count; 49 | int snap_seq; 50 | enum HealthStatus status; 51 | uint64_t meta_ver; 52 | 53 | ~PfShard(); 54 | }; 55 | //Volume represent a Volume, 56 | struct PfVolume 57 | { 58 | char name[128]; 59 | uint64_t id; 60 | uint64_t size; 61 | int rep_count; 62 | int shard_count; 63 | std::vector shards; 64 | int snap_seq; 65 | enum HealthStatus status; 66 | uint64_t meta_ver; 67 | 68 | // PfFixedSizeQueue io_buffers; 69 | 70 | PfVolume() : _ref_count(1) {/*other member will inited in convert_argument_to_volume*/} 71 | inline void add_ref() { __sync_fetch_and_add(&_ref_count, 1); } 72 | inline void dec_ref() { 73 | __sync_fetch_and_sub(&_ref_count, 1); 74 | if(_ref_count == 0) 75 | delete this; 76 | } 77 | PfVolume& operator=(PfVolume&& nv); 78 | private: 79 | ~PfVolume(); 80 | int _ref_count; //name similar with rep_count, so add prefix with _ 81 | }; 82 | 83 | #endif // afs_volume_h__ 84 | -------------------------------------------------------------------------------- /pfs/log4crc: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 0 8 | 9 | 0 10 | 1 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | -------------------------------------------------------------------------------- /pfs/pfs_template.conf: -------------------------------------------------------------------------------- 1 | [cluster] 2 | name=cluster1 3 | [zookeeper] 4 | ip=127.0.0.1:2181 5 | 6 | [afs] 7 | mngt_ip=192.168.3.62 8 | id=1 9 | meta_size=5368709120 10 | [tray.0] 11 | dev = /dev/loop0 # path of physical flash device 12 | [port.0] 13 | ip=127.0.0.1 14 | [rep_port.0] 15 | ip=127.0.0.1 16 | -------------------------------------------------------------------------------- /pfs/src/pf_bgtask_manager.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2016 Liu Lele(liu_lele@126.com) 3 | * 4 | * This code is licensed under the GPL. 5 | */ 6 | // 7 | // Created by liu_l on 10/25/2020. 8 | // 9 | 10 | #include 11 | #include "pf_bgtask_manager.h" 12 | #include "pf_restful_api.h" 13 | 14 | const char* TaskStatusToStr(TaskStatus s) 15 | { 16 | #define C_NAME(x) case x: return #x; 17 | static __thread char buf[64]; 18 | switch(s) { 19 | C_NAME(WAITING) 20 | C_NAME(RUNNING) 21 | C_NAME(SUCCEEDED) 22 | C_NAME(FAILED) 23 | default: 24 | snprintf(buf, sizeof(buf), "Unknown TaskStatus:%d", s); 25 | return buf; 26 | } 27 | } 28 | 29 | BackgroundTask* BackgroundTaskManager::initiate_task(TaskType type, std::string desc, TaskExecutor exe, void* arg){ 30 | BackgroundTask *t = new BackgroundTask(); 31 | t->id = ++id_seed; 32 | t->type = type; 33 | t->desc = desc; 34 | t->start_time = std::time(nullptr); 35 | t->arg = arg; 36 | t->exec = exe; 37 | t->status = TaskStatus::WAITING; 38 | 39 | S5LOG_INFO("Initiate background task id:%d", t->id); 40 | task_map[t->id] = t; 41 | return t; 42 | } 43 | void BackgroundTaskManager::commit_task(BackgroundTask* t) 44 | { 45 | recovery_thread_pool.commit([t]()->int { 46 | t->status = TaskStatus::RUNNING; 47 | S5LOG_DEBUG("Begin background task id:%d, %s", t->id, t->desc.c_str()); 48 | t->result = t->exec(t); 49 | S5LOG_DEBUG("Finish background task id:%d, ret_code:%d", t->id, t->result->ret_code); 50 | t->finish_time = std::time(nullptr); 51 | t->status = (t->result->ret_code == 0 ? TaskStatus::SUCCEEDED : TaskStatus::FAILED); 52 | return 0; 53 | }); 54 | 55 | } -------------------------------------------------------------------------------- /pfs/src/pf_bitmap.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2016 Liu Lele(liu_lele@126.com) 3 | * 4 | * This code is licensed under the GPL. 5 | */ 6 | // 7 | // Created by Liu Lele on 10/18/2020. 8 | // 9 | 10 | #include "pf_bitmap.h" 11 | -------------------------------------------------------------------------------- /pfs/src/pf_block_tray.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2016 Liu Lele(liu_lele@126.com) 3 | * 4 | * This code is licensed under the GPL. 5 | */ 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include "pf_block_tray.h" 14 | #include "pf_log.h" 15 | 16 | BlockTray::BlockTray() 17 | { 18 | } 19 | 20 | BlockTray::~BlockTray() 21 | { 22 | } 23 | 24 | int BlockTray::init(const char *name) 25 | { 26 | fd = open(name, O_RDWR|O_DIRECT); 27 | return fd; 28 | } 29 | 30 | void BlockTray::destroy() 31 | { 32 | close(fd); 33 | } 34 | 35 | int BlockTray::get_num_blocks(long *number) 36 | { 37 | struct stat fst; 38 | int rc = fstat(fd, &fst); 39 | if(rc != 0) 40 | { 41 | rc = -errno; 42 | S5LOG_ERROR("Failed fstat, rc:%d", rc); 43 | return rc; 44 | } 45 | if(S_ISBLK(fst.st_mode )) 46 | return ioctl(fd, BLKGETSIZE, number); 47 | else 48 | { 49 | *number = fst.st_size/512; 50 | } 51 | return 0; 52 | } 53 | 54 | ssize_t BlockTray::sync_read(void *buffer, size_t size, __off_t offset) 55 | { 56 | return pread(fd, buffer, size, offset); 57 | } 58 | 59 | ssize_t BlockTray::sync_write(const void *buffer, size_t size, __off_t offset) 60 | { 61 | return pwrite(fd, buffer, size, offset); 62 | } 63 | 64 | ssize_t BlockTray::async_read(void *buffer, size_t size, __off_t offset, void *callback) 65 | { 66 | return 0; 67 | } 68 | 69 | ssize_t BlockTray::async_write(const void *buffer, size_t size, __off_t offset, void *callback) 70 | { 71 | return 0; 72 | } 73 | -------------------------------------------------------------------------------- /pfs/src/pf_error_handler.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2016 Liu Lele(liu_lele@126.com) 3 | * 4 | * This code is licensed under the GPL. 5 | */ 6 | // 7 | // Created by lele on 5/24/20. 8 | // 9 | #include "pf_main.h" 10 | #include "pf_restful_api.h" 11 | #include "pf_client_priv.h" 12 | #include "pf_error_handler.h" 13 | 14 | int PfErrorHandler::report_error_to_conductor(uint64_t rep_id, int sc,ErrorReportReply& reply) 15 | { 16 | 17 | 18 | int retry_times = 5; 19 | for (int i = 0; i < retry_times; i++) 20 | { 21 | if(conductor_ip.empty()) { 22 | conductor_ip = get_master_conductor_ip(zk_ip.c_str(), cluster_name.c_str()); 23 | } 24 | std::string query = format_string("http://%s:49180/s5c/?op=handle_error&rep_id=%lld&sc=%d", 25 | conductor_ip.c_str(), rep_id, sc); 26 | void* reply_buf = pf_http_get(query, http_timeout, 1); 27 | if( reply_buf != NULL) { 28 | DeferCall _rel([reply_buf]() { free(reply_buf); }); 29 | auto j = nlohmann::json::parse((char*)reply_buf); 30 | if(j["ret_code"].get() != 0) { 31 | throw std::runtime_error(format_string("Failed %s, reason:%s", query.c_str(), j["reason"].get().c_str())); 32 | } 33 | j.get_to(reply); 34 | return 0; 35 | } 36 | if (i < retry_times - 1) 37 | { 38 | conductor_ip.clear(); 39 | S5LOG_ERROR("Failed query %s, will retry", query.c_str()); 40 | ::sleep(DEFAULT_HTTP_QUERY_INTERVAL); 41 | } 42 | } 43 | 44 | return -1; 45 | } 46 | 47 | //submit_error should work in asynchronous mode, though now it in synchronized mode 48 | int PfErrorHandler::submit_error(IoSubTask* t, PfMessageStatus sc) 49 | { 50 | int rc = this->event_queue->post_event(EVT_ASK_CONDUCTOR, sc, t); 51 | if(unlikely(rc)){ 52 | S5LOG_ERROR("Failed to submit error, rc:%d", rc); 53 | } 54 | return rc; 55 | } 56 | //int PfErrorHandler::submit_error(PfServerIocb* io, uint64_t rep_id, PfMessageStatus sc) 57 | //{ 58 | // assert(sc == PfMessageStatus::MSG_STATUS_NOT_PRIMARY); 59 | // int rc = this->event_queue->post_event(EVT_ASK_CONDUCTOR, sc, (void*) rep_id, io); 60 | // if (unlikely(rc)) { 61 | // S5LOG_ERROR("Failed to submit error, rc:%d", rc); 62 | // } 63 | // return rc; 64 | // 65 | //} 66 | 67 | PfErrorHandler::PfErrorHandler() 68 | { 69 | this->zk_ip = conf_get(app_context.conf, "zookeeper", "ip", "", TRUE);; 70 | cluster_name = conf_get(app_context.conf, "cluster", "name", "cluster1", FALSE); 71 | http_timeout = conf_get_int(app_context.conf, "client", "handle_error_timeout", 30, FALSE); 72 | } 73 | 74 | int PfErrorHandler::process_event(int event_type, int arg_i, void* arg_p, void* arg_q){ 75 | switch(event_type){ 76 | case EVT_ASK_CONDUCTOR: 77 | { 78 | ErrorReportReply r; 79 | IoSubTask* t = (IoSubTask *)arg_p; 80 | int rc = 0; 81 | PfMessageStatus sc = (PfMessageStatus)arg_i; 82 | rc = report_error_to_conductor(t->rep_id, sc, r); 83 | 84 | 85 | if (rc) { 86 | S5LOG_ERROR("Failed report error to conductor, rc:%d", rc); 87 | t->ops->complete(t, PfMessageStatus::MSG_STATUS_INTERNAL); 88 | } 89 | else { 90 | S5LOG_INFO("Error report get sc:%s, meta_ver:%d", PfMessageStatus2Str(r.action_code), r.meta_ver); 91 | t->ops->complete_meta_ver(t, r.action_code, r.meta_ver); 92 | } 93 | 94 | return rc; 95 | 96 | } 97 | break; 98 | default: 99 | S5LOG_ERROR("Unknown event:%d", event_type); 100 | } 101 | return 0; 102 | } -------------------------------------------------------------------------------- /pfs/src/pf_replica.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2016 Liu Lele(liu_lele@126.com) 3 | * 4 | * This code is licensed under the GPL. 5 | */ 6 | #include "pf_replica.h" 7 | #include "pf_flash_store.h" 8 | 9 | int PfLocalReplica::submit_io(IoSubTask* subtask) 10 | { 11 | return disk->event_queue->post_event(EVT_IO_REQ, 0, subtask); 12 | } 13 | 14 | int PfSyncRemoteReplica::submit_io(IoSubTask* subtask) 15 | { 16 | return replicator->event_queue->post_event(EVT_IO_REQ, 0, subtask); 17 | } -------------------------------------------------------------------------------- /pfs/src/pf_s5message.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2016 Liu Lele(liu_lele@126.com) 3 | * 4 | * This code is licensed under the GPL. 5 | */ 6 | #include 7 | #include 8 | #include "pf_message.h" 9 | 10 | -------------------------------------------------------------------------------- /pfs/src/pf_volume.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2016 Liu Lele(liu_lele@126.com) 3 | * 4 | * This code is licensed under the GPL. 5 | */ 6 | #include "pf_volume.h" 7 | #include "pf_utils.h" 8 | 9 | static const char* status_strs[] = { "OK", "ERROR", "DEGRADED" }; 10 | HealthStatus health_status_from_str(const std::string& status_str) 11 | { 12 | for(int i=0;imeta_ver = vol.meta_ver; 39 | this->size = vol.size; 40 | this->snap_seq = vol.snap_seq; 41 | this->status = vol.status; 42 | for (int i = 0; i < shard_count; i++) { 43 | PfShard* s = shards[i]; 44 | for (int j = 0; j < s->rep_count; j++) { 45 | if (s->replicas[j] == NULL) { 46 | continue; 47 | } 48 | if (s->replicas[j]->status == HealthStatus::HS_RECOVERYING && vol.shards[i]->replicas[j]->status == HealthStatus::HS_ERROR) { 49 | vol.shards[i]->replicas[j]->status = HealthStatus::HS_RECOVERYING; //keep recoverying continue 50 | } 51 | } 52 | 53 | this->shards[i] = vol.shards[i]; 54 | vol.shards[i] = NULL; 55 | delete s; 56 | } 57 | 58 | for (int i = shard_count; i < vol.shards.size(); i++) { //enlarged shard 59 | shards.push_back(vol.shards[i]); 60 | vol.shards[i] = NULL; 61 | } 62 | this->shard_count = vol.shard_count; 63 | return *this; 64 | } 65 | 66 | PfVolume::~PfVolume() 67 | { 68 | S5LOG_DEBUG("Desctruct PfVolume, %d shards", shards.size()); 69 | for(int i=0;ishard_index); 79 | for(int i=0;i< MAX_REP_COUNT; i++) 80 | { 81 | if (replicas[i] == NULL) { 82 | continue; 83 | } 84 | delete replicas[i]; 85 | replicas[i] = NULL; 86 | } 87 | } 88 | -------------------------------------------------------------------------------- /pre_build_libs/centos_7_x86_64/libhashtable.a: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cocalele/PureFlash/79d05bec816f7cac34cc7d3f33cbc66787fbf3d8/pre_build_libs/centos_7_x86_64/libhashtable.a -------------------------------------------------------------------------------- /pre_build_libs/centos_7_x86_64/libzookeeper_mt.a: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cocalele/PureFlash/79d05bec816f7cac34cc7d3f33cbc66787fbf3d8/pre_build_libs/centos_7_x86_64/libzookeeper_mt.a -------------------------------------------------------------------------------- /pre_build_libs/modules/5.15.0-73-generic/os.txt: -------------------------------------------------------------------------------- 1 | Ubuntu 22.04.2 LTS 2 | -------------------------------------------------------------------------------- /pre_build_libs/modules/5.15.0-73-generic/pfkd.ko: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cocalele/PureFlash/79d05bec816f7cac34cc7d3f33cbc66787fbf3d8/pre_build_libs/modules/5.15.0-73-generic/pfkd.ko -------------------------------------------------------------------------------- /pre_build_libs/ubuntu_20.04_x86_64/libhashtable.a: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cocalele/PureFlash/79d05bec816f7cac34cc7d3f33cbc66787fbf3d8/pre_build_libs/ubuntu_20.04_x86_64/libhashtable.a -------------------------------------------------------------------------------- /pre_build_libs/ubuntu_20.04_x86_64/libzookeeper_mt.a: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cocalele/PureFlash/79d05bec816f7cac34cc7d3f33cbc66787fbf3d8/pre_build_libs/ubuntu_20.04_x86_64/libzookeeper_mt.a -------------------------------------------------------------------------------- /pre_build_libs/ubuntu_22.04_aarch64/libhashtable.a: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cocalele/PureFlash/79d05bec816f7cac34cc7d3f33cbc66787fbf3d8/pre_build_libs/ubuntu_22.04_aarch64/libhashtable.a -------------------------------------------------------------------------------- /pre_build_libs/ubuntu_22.04_aarch64/libzkmt.a: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cocalele/PureFlash/79d05bec816f7cac34cc7d3f33cbc66787fbf3d8/pre_build_libs/ubuntu_22.04_aarch64/libzkmt.a -------------------------------------------------------------------------------- /pre_build_libs/ubuntu_22.04_aarch64/libzookeeper_mt.a: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cocalele/PureFlash/79d05bec816f7cac34cc7d3f33cbc66787fbf3d8/pre_build_libs/ubuntu_22.04_aarch64/libzookeeper_mt.a -------------------------------------------------------------------------------- /pre_build_libs/ubuntu_22.04_x86_64/libhashtable.a: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cocalele/PureFlash/79d05bec816f7cac34cc7d3f33cbc66787fbf3d8/pre_build_libs/ubuntu_22.04_x86_64/libhashtable.a -------------------------------------------------------------------------------- /pre_build_libs/ubuntu_22.04_x86_64/libzookeeper_mt.a: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cocalele/PureFlash/79d05bec816f7cac34cc7d3f33cbc66787fbf3d8/pre_build_libs/ubuntu_22.04_x86_64/libzookeeper_mt.a -------------------------------------------------------------------------------- /run-from-docker.txt: -------------------------------------------------------------------------------- 1 | 1. Run PureFlash from docker 2 | ============================ 3 | 4 | ``` 5 | # docker pull pureflash/pureflash:latest 6 | # docker run -it --rm pureflash/pureflash:latest 7 | ``` 8 | For first run, this may take about 60 seconds to initialize a fake disk file. 9 | 10 | 2. PureFlash command line tool 11 | ============================== 12 | 13 | `pfcli` is the command line tool to managment PureFlash storeage system, you can use it like bellow: 14 | 15 | # pfcli list_store 16 | +----+---------------+--------+ 17 | | Id | Management IP | Status | 18 | +----+---------------+--------+ 19 | | 1 | 127.0.0.1 | OK | 20 | +----+---------------+--------+ 21 | 22 | # pfcli list_disk 23 | +----------+--------------------------------------+--------+ 24 | | Store ID | uuid | Status | 25 | +----------+--------------------------------------+--------+ 26 | | 1 | 9ae5b25f-a1b7-4b8d-9fd0-54b578578333 | OK | 27 | +----------+--------------------------------------+--------+ 28 | 29 | 3. Test PureFlash in docker 30 | =========================== 31 | 32 | # pfcli create_volume -v test_v1 -s 2G --rep 1 33 | +------------+---------+------------+----------+--------+ 34 | | Id | Name | Size | RepCount | Status | 35 | +------------+---------+------------+----------+--------+ 36 | | 1107296256 | test_v1 | 2147483648 | 1 | OK | 37 | +------------+---------+------------+----------+--------+ 38 | 39 | # pfdd --rw write --if /dev/zero -v test_v1 --bs 4k --count 10 40 | 41 | # fio -name=test -ioengine=pfbd -volume=test_v1 -iodepth=2 -rw=randwrite -size=2G -bs=4k -direct=1 42 | 43 | pfdd is a tool like dd on Linux system. and fio( https://gitee.com/cocalele/fio.git) is a version with pfbd enabled. 44 | -------------------------------------------------------------------------------- /scripts/env.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 3 | export PATH=$DIR/bin:$PATH 4 | export LD_LIBRARY_PATH=$DIR/bin:$LD_LIBRARY_PATH 5 | export LOG4C_RCPATH=$DIR/bin 6 | 7 | echo "dont forget to run env-java.sh & env-pfc.sh also!" 8 | 9 | -------------------------------------------------------------------------------- /scripts/osname.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | OSNAME=$(awk -F= '{if ($1 == "ID") OS=$2; else if($1 == "VERSION_ID") VER=$2;} END {print OS "_" VER} ' /etc/os-release ) 3 | echo -n ${OSNAME//\"/}_$(uname -m) 4 | -------------------------------------------------------------------------------- /scripts/pfc_supervisor.conf: -------------------------------------------------------------------------------- 1 | [program:pfc] 2 | autostart=true 3 | startretries=10 4 | startsecs=3 5 | command=/usr/bin/pfc -c /etc/pureflash/pfc.conf 6 | redirect_stderr=true 7 | stdout_logfile=/var/log/pfc.log 8 | stopsignal=INT 9 | -------------------------------------------------------------------------------- /scripts/pfs_supervisor.conf: -------------------------------------------------------------------------------- 1 | [program:pfs] 2 | autostart=true 3 | startretries=10 4 | startsecs=3 5 | command=/usr/bin/pfs -c /etc/pureflash/pfs.conf 6 | redirect_stderr=true 7 | stdout_logfile=/var/log/pfs.log 8 | stopsignal=INT 9 | -------------------------------------------------------------------------------- /scripts/tar-client-libs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 3 | PUREFLASH_HOME=$(realpath $DIR/..) 4 | tmpdir="deps-`$DIR/osname.sh`" 5 | mkdir $tmpdir 6 | 7 | 8 | cp ${PUREFLASH_HOME}/common/include/pf_client_api.h $tmpdir/pf_client_api.h 9 | cp ${PUREFLASH_HOME}/build/bin/libs5common.a $tmpdir/libs5common.a 10 | cp ${PUREFLASH_HOME}/build/bin/libzookeeper_mt.a $tmpdir/libzookeeper_mt.a 11 | cp ${PUREFLASH_HOME}/build/bin/libhashtable.a $tmpdir/libhashtable.a 12 | ( cd ${PUREFLASH_HOME}/thirdParty/spdk/build/lib; cp libspdk_rpc.a libspdk_nvme.a libspdk_env_dpdk.a libspdk_util.a libspdk_log.a libspdk_sock.a libspdk_trace.a libspdk_json.a libspdk_jsonrpc.a $DIR/$tmpdir) 13 | ( cd ${PUREFLASH_HOME}/thirdParty/spdk/dpdk/build/lib; cp -rp librte_eal.a librte_mempool.a librte_ring.a librte_telemetry.a librte_kvargs.a librte_pci.a librte_bus_pci.a librte_mempool_ring.a $DIR/$tmpdir ) 14 | 15 | tar czf pureflash-$tmpdir.tgz $tmpdir 16 | -------------------------------------------------------------------------------- /sld/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8) 2 | if(COMMAND cmake_policy) 3 | cmake_policy(SET CMP0015 NEW) 4 | endif() 5 | 6 | PROJECT(sld) 7 | file(GLOB_RECURSE INCS "*.h") 8 | INCLUDE_DIRECTORIES(include ../s5afs/include ) 9 | set (BDD_SRC 10 | bdd/bdd.c 11 | ) 12 | add_executable(bdd ${BDD_SRC} ${INCS}) 13 | 14 | set (S5BD_SRC 15 | s5bd-cli/cmd_parse.c 16 | ) 17 | add_executable(s5bd ${S5BD_SRC} ${INCS}) 18 | 19 | add_custom_target(TARGET ALL 20 | COMMAND echo "==========compiling s5bd driver===============" 21 | COMMAND make clean -C ${CMAKE_CURRENT_SOURCE_DIR}/driver 22 | COMMAND make -C ${CMAKE_CURRENT_SOURCE_DIR}/driver 23 | COMMAND cp -rpfu ${CMAKE_CURRENT_SOURCE_DIR}/driver/s5bd.ko ${CMAKE_BINARY_DIR}/ 24 | COMMAND echo "==========copy s5bd driver to bin=============" 25 | ) 26 | 27 | -------------------------------------------------------------------------------- /sld/driver/Makefile: -------------------------------------------------------------------------------- 1 | MODULE_NAME := s5bd 2 | $(MODULE_NAME)-objs:= s5k_miscdev.o s5k_blkdev.o s5k_imagectx.o s5k_conductor.o 3 | EXTRA_CFLAGS=-I$(PWD)/../include 4 | EXTRA_CFLAGS += -D _S5BD_KERNEL_ 5 | CURRENT = $(shell uname -r) 6 | KDIR = /lib/modules/$(CURRENT)/build/ 7 | 8 | obj-m :=s5bd.o 9 | 10 | SRCS = s5k_miscdev.c s5k_blkdev.c s5k_imagectx.c s5k_conductor.c 11 | 12 | OBJS = $(SRCS:.c=.o) 13 | ccflags-y += -g 14 | 15 | default: 16 | $(MAKE) -C $(KDIR) M=`pwd` modules 17 | clean: 18 | $(MAKE) -C $(KDIR) M=`pwd` clean 19 | $(RM) Module.markers modules.order *.o 20 | 21 | -------------------------------------------------------------------------------- /sld/driver/s5k_basetype.h: -------------------------------------------------------------------------------- 1 | #ifndef __S5K_BASETYPE_H__ 2 | #define __S5K_BASETYPE_H__ 3 | #include 4 | 5 | #ifndef EOK 6 | #define EOK 0 7 | #endif 8 | 9 | #define MERGE_(a,b) a##b 10 | #define LABEL_(a) MERGE_(__check_only, a) 11 | #define __CHECK_ONLY LABEL_(__COUNTER__) 12 | #define ASSERT_SIZE(t,size) typedef char __CHECK_ONLY[ (sizeof(t) == size) ? 1 : -1]; 13 | 14 | 15 | typedef unsigned char uchar; 16 | typedef unsigned int BOOL; 17 | #define TRUE 1 18 | #define FALSE 0 19 | 20 | 21 | #define S5ASSERT(x) \ 22 | if(!(x)) \ 23 | { \ 24 | while(1) \ 25 | { \ 26 | LOG_ERROR(__FILE__ ":%d S5ASSERT:%s", __LINE__, #x); \ 27 | schedule_timeout_uninterruptible(HZ*30); \ 28 | } \ 29 | } \ 30 | 31 | #endif //__S5K_BASETYPE_H__ 32 | -------------------------------------------------------------------------------- /sld/driver/s5k_imagectx.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cocalele/PureFlash/79d05bec816f7cac34cc7d3f33cbc66787fbf3d8/sld/driver/s5k_imagectx.c -------------------------------------------------------------------------------- /sld/driver/s5k_log.h: -------------------------------------------------------------------------------- 1 | #ifndef __S5K_LOG_H__ 2 | #define __S5K_LOG_H__ 3 | 4 | #include 5 | //KERN_EMERG 0/*紧急事件消息,系统崩溃之前提示,表示系统不可用*/ 6 | //KERN_ALERT 1/*报告消息,表示必须立即采取措施*/ 7 | //KERN_CRIT 2/*临界条件,通常涉及严重的硬件或软件操作失败*/ 8 | //KERN_ERR 3/*错误条件,驱动程序常用KERN_ERR来报告硬件的错误*/ 9 | //KERN_WARNING 4/*警告条件,对可能出现问题的情况进行警告*/ 10 | //KERN_NOTICE 5/*正常但又重要的条件,用于提醒*/ 11 | //KERN_INFO 6/*提示信息,如驱动程序启动时,打印硬件信息*/ 12 | //KERN_DEBUG 7/*调试级别的消息*/ 13 | 14 | #define DEBUG_LOG 15 | #ifdef DEBUG_LOG 16 | extern int printk(const char *fmt, ...); 17 | 18 | //#define LOG_DETAIL 19 | #ifdef LOG_DETAIL 20 | #define LOG_FATAL(fmt, arg...) \ 21 | do { \ 22 | printk(KERN_EMERG fmt " %s()-%d.\n", ##arg, __FUNCTION__, __LINE__); \ 23 | }while(0) 24 | 25 | #define LOG_ERROR(fmt, arg...) \ 26 | do { \ 27 | printk(KERN_ERR fmt " %s()-%d.\n", ##arg, __FUNCTION__, __LINE__); \ 28 | }while(0) 29 | 30 | #define LOG_WARN(fmt, arg...) \ 31 | do { \ 32 | printk(KERN_WARNING fmt " %s()-%d.\n", ##arg, __FUNCTION__, __LINE__); \ 33 | }while(0) 34 | 35 | #define LOG_INFO(fmt, arg...) \ 36 | do { \ 37 | printk(KERN_NOTICE fmt " %s()-%d.\n", ##arg, __FUNCTION__, __LINE__); \ 38 | }while(0) 39 | 40 | #define LOG_DEBUG(fmt, arg...) \ 41 | do { \ 42 | printk(KERN_INFO fmt " %s()-%d.\n", ##arg, __FUNCTION__, __LINE__); \ 43 | }while(0) 44 | 45 | #define LOG_TRACE(fmt, arg...) \ 46 | do { \ 47 | printk(KERN_DEBUG fmt " %s()-%d.\n", ##arg, __FUNCTION__, __LINE__); \ 48 | }while(0) 49 | #else 50 | #define LOG_FATAL(fmt, arg...) \ 51 | do { \ 52 | printk(KERN_EMERG fmt "\n", ##arg); \ 53 | }while(0) 54 | 55 | #define LOG_ERROR(fmt, arg...) \ 56 | do { \ 57 | printk(KERN_ERR fmt "\n", ##arg); \ 58 | }while(0) 59 | 60 | #define LOG_WARN(fmt, arg...) \ 61 | do { \ 62 | printk(KERN_WARNING fmt "\n", ##arg); \ 63 | }while(0) 64 | 65 | #define LOG_INFO(fmt, arg...) \ 66 | do { \ 67 | printk(KERN_NOTICE fmt "\n", ##arg); \ 68 | }while(0) 69 | 70 | #define LOG_DEBUG(fmt, arg...) \ 71 | do { \ 72 | printk(KERN_INFO fmt "\n", ##arg); \ 73 | }while(0) 74 | 75 | #define LOG_TRACE(fmt, arg...) \ 76 | do { \ 77 | printk(KERN_DEBUG fmt "\n", ##arg); \ 78 | }while(0) 79 | #endif 80 | 81 | #else 82 | #define LOG_FATAL(fmt, arg...) 83 | #define LOG_ERROR(fmt, arg...) 84 | #define LOG_WARN(fmt, arg...) 85 | #define LOG_INFO(fmt, arg...) 86 | #define LOG_DEBUG(fmt, arg...) 87 | #define LOG_TRACE(fmt, arg...) 88 | #endif 89 | 90 | #endif //__S5K_LOG_H__ 91 | -------------------------------------------------------------------------------- /sld/driver/s5k_spy.h: -------------------------------------------------------------------------------- 1 | #ifndef spy_h__ 2 | #define spy_h__ 3 | #include "s5k_basetype.h" 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #define IO_STAT_IOPS "iops" 9 | #define IO_STAT_BANDWIDTH "band_width" 10 | #define IO_STAT_HITRATIO "hit_ratio" 11 | #define IO_STAT_AVERAGE_LATENCY "average_latency" 12 | 13 | #define STAT_IO_TIME_INTERVAL 2 14 | #define STAT_VAR_LEN 150 15 | #define SPY_BUF_LEN_MAX 4096 16 | 17 | #define FLOAT_PRECISION (10000) 18 | #define POINT_LEFT(addr) ((long)*(double*)addr) 19 | #define POINT_RIGHT(addr) ((long)((*(double*)addr-(long)*(double*)addr) * FLOAT_PRECISION)) 20 | 21 | #define DEF_SPY_PORT 2002 22 | 23 | enum variable_type 24 | { 25 | vt_uint32, 26 | vt_int32, 27 | vt_int64, 28 | vt_prop_int64, 29 | vt_cstr, 30 | vt_float 31 | }; 32 | typedef int64_t (*property_getter)(void* obj); 33 | void spy_start(int spy_port); 34 | void spy_end(void); 35 | //EXPORT_SYMBOL(spy_end); 36 | int spy_get_port(void); 37 | int* get_spy_port_ptr(void); 38 | 39 | /** 40 | * register a variable to be read on run time. 41 | * 42 | * 43 | * @param[in] name variable's name, spy clinet use this name to read this variable 44 | * @param[in] var_addr variable's address, must be a global available memory that can 45 | * be access at any time 46 | * @param[in] var_type variable's type, vt_uint32, vt_int32, vt_int64 or vt_float 47 | * @param[in] comment info about the variable 48 | */ 49 | void spy_register_variable(const char* name, const void* var_addr, enum variable_type var_type, const char* comment); 50 | 51 | /** 52 | * register a property. 53 | * 54 | * 55 | * A property is a value accessed via a getter function. For example, *depth* is a property of queue 56 | to get a queue's depth, one should use code: 57 | int64_t depth = queue_get_depth(queue); 58 | to register such a property, call spy_register_property like 59 | int64_t s5_get_qdepth(s5session* s5s); 60 | spy_register_property("s5_qdepth", s5_get_qdepth, s5session, vt_prop_int64, info); 61 | so far, only property type int64_t is supported. i.e. prop_type must be vt_prop_int64 62 | * 63 | * @param[in] name variable's name, spy clinet use this name to read this variable 64 | * @param[in] get_func fun pointer of type property_getter, spy call this fun to get variable 65 | * @param[in] obj_to_get param of fun get_func 66 | * @param[in] prop_type must be vt_prop_int64 67 | * @param[in] comment info about the variable 68 | */ 69 | void spy_register_property(const char* name, 70 | property_getter get_func, 71 | void* obj_to_get, 72 | enum variable_type prop_type, 73 | const char* comment 74 | ); 75 | /** 76 | * unregister a varable that registered by fun spy_register_varable or spy_register_property 77 | */ 78 | void spy_unregister(const char* name); 79 | #ifdef __cplusplus 80 | } 81 | #endif 82 | 83 | #endif // spy_h__ 84 | -------------------------------------------------------------------------------- /sld/include/bdd_log.h: -------------------------------------------------------------------------------- 1 | #ifndef __BDD_LOG_H__ 2 | #define __BDD_LOG_H__ 3 | #include 4 | #include 5 | 6 | 7 | #define BDD_LOG_FILE "/var/log/bdd.log" 8 | 9 | #define LOG2FILE 10 | #ifdef LOG2FILE 11 | 12 | #define LOG_ERROR(fmt, arg...) \ 13 | do { \ 14 | time_t rawtime = time(NULL); \ 15 | char stime[64]; \ 16 | strftime(stime, sizeof(stime), "%Y-%m-%d %H:%M:%S", localtime(&rawtime)); \ 17 | FILE* logfile = fopen(BDD_LOG_FILE, "a+"); \ 18 | fprintf(logfile, "[%s] Error: " fmt "\n", stime, ##arg); \ 19 | fclose(logfile); \ 20 | printf("[%s] Error: " fmt "\n", stime, ##arg); \ 21 | }while(0) 22 | #define LOG_WARN(fmt, arg...) \ 23 | do { \ 24 | time_t rawtime = time(NULL); \ 25 | char stime[64]; \ 26 | strftime(stime, sizeof(stime), "%Y-%m-%d %H:%M:%S", localtime(&rawtime)); \ 27 | FILE* logfile = fopen(BDD_LOG_FILE, "a+"); \ 28 | fprintf(logfile, "[%s] Warning: " fmt "\n", stime, ##arg); \ 29 | fclose(logfile); \ 30 | printf("[%s] Warning: " fmt "\n", stime, ##arg); \ 31 | }while(0) 32 | #define LOG_INFO(fmt, arg...) \ 33 | do { \ 34 | time_t rawtime = time(NULL); \ 35 | char stime[64]; \ 36 | strftime(stime, sizeof(stime), "%Y-%m-%d %H:%M:%S", localtime(&rawtime)); \ 37 | FILE* logfile = fopen(BDD_LOG_FILE, "a+"); \ 38 | fprintf(logfile,"[%s] Info: " fmt " \n", stime, ##arg); \ 39 | fclose(logfile); \ 40 | printf("[%s] Info: " fmt "\n", stime, ##arg); \ 41 | }while(0) 42 | #define LOG_TRACE(fmt, arg...) \ 43 | do { \ 44 | time_t rawtime = time(NULL); \ 45 | char stime[64]; \ 46 | strftime(stime, sizeof(stime), "%Y-%m-%d %H:%M:%S", localtime(&rawtime)); \ 47 | FILE* logfile = fopen(BDD_LOG_FILE, "a+"); \ 48 | fprintf(logfile, "[%s] Trace: " fmt "\n", stime, ##arg); \ 49 | fclose(logfile); \ 50 | printf("[%s] Trace: " fmt "\n", stime, ##arg); \ 51 | }while(0) 52 | 53 | #else 54 | 55 | #define LOG_ERROR(fmt, arg...) \ 56 | do { \ 57 | printf(fmt, ##arg); \ 58 | }while(0) 59 | #define LOG_WARN(fmt, arg...) \ 60 | do { \ 61 | printf(fmt, ##arg); \ 62 | }while(0) 63 | #define LOG_INFO(fmt, arg...) \ 64 | do { \ 65 | printf(fmt, ##arg); \ 66 | }while(0) 67 | #define LOG_TRACE(fmt, arg...) \ 68 | do { \ 69 | printf(fmt, ##arg); \ 70 | }while(0) 71 | #endif 72 | 73 | #endif //__BDD_LOG_H__ 74 | -------------------------------------------------------------------------------- /sld/include/s5ioctl.h: -------------------------------------------------------------------------------- 1 | /* s5ioctl.h - xxxx */ 2 | 3 | /* 4 | * Copyright (c) 2015 NetBric Systems, Inc. 5 | * 6 | * The right to copy, distribute, modify or otherwise make use 7 | * of this software may be licensed only pursuant to the terms 8 | * of an applicable NetBric license agreement. 9 | */ 10 | 11 | 12 | #ifndef __S5IOCTL_H__ 13 | #define __S5IOCTL_H__ 14 | 15 | /* misc device name*/ 16 | #define DEVICENAME "s5bd" 17 | 18 | #ifndef MAX_IP_LENGTH 19 | #define MAX_IP_LENGTH 16 20 | #endif 21 | 22 | //hard code in genhd.c:L246 kernel version:3.10 23 | #ifndef MAX_DEVICE_NAME_LEN 24 | #define MAX_DEVICE_NAME_LEN 16 25 | #endif 26 | 27 | #ifndef MAX_DEVICE_NUM 28 | #define MAX_DEVICE_NUM 64 29 | #endif 30 | #define MAX_CONDUCTOR_CNT 2 31 | 32 | #include 33 | #include "bdd_message.h" 34 | 35 | #define BDDBIN "/var/tmp/bdd.bin" //change directory to /etc/s5 later 36 | #define BDDBINBAK "/var/tmp/.bdd.bin.bak" //change directory to /etc/s5 later 37 | 38 | struct ioctlparam; 39 | 40 | /** 41 | *define ioctl codes for interfacing between kernel_module and user program 42 | */ 43 | #define S5BDMONITOR_CODE 0xcc 44 | 45 | #define MAP_DEVICE _IOWR (S5BDMONITOR_CODE, 0x0, struct ioctlparam) 46 | #define UNMAP_DEVICE _IOWR (S5BDMONITOR_CODE, 0x1, struct ioctlparam) 47 | #define LIST_DEVICE _IOWR (S5BDMONITOR_CODE, 0x2, struct ioctlparam) 48 | 49 | #define BDEVICE_INACTIVE 0 50 | #define BDEVICE_ACTIVE 1 51 | 52 | typedef struct bdevice 53 | { 54 | /* mgt_device_ctx index, same as s5bd%d index */ 55 | 56 | int bddev_id; 57 | 58 | /* get from s5bd command tool */ 59 | 60 | struct device_info dinfo; 61 | } bdevice_t; 62 | 63 | typedef struct ioctlparam 64 | { 65 | struct bdevice bdev; 66 | 67 | /* ioctrl return value */ 68 | int retval; 69 | 70 | } __attribute__((packed)) ioctlparam_t; 71 | 72 | #endif /* __S5IOCTL_H__ */ 73 | 74 | -------------------------------------------------------------------------------- /testing/create_vol.sh: -------------------------------------------------------------------------------- 1 | 2 | VM_PER_HOST=12 3 | DISK_PER_VM=1 4 | 5 | VOL_SIZE=128G 6 | HOST_IP=($1 $2 $3) 7 | 8 | for ((i=0;i<${#HOST_IP[@]};i++)); do 9 | for ((j=0;j qemu.log & 16 | 17 | 18 | mkdir /$1 19 | cd $VMDIR 20 | qemu-img create -f qcow2 -F qcow2 -b /vm-centos8/centos8-with-iostat.qcow2 $OSDISK 21 | #qemu-img info $OSDISK 22 | cp /root/v2/qemu/pc-bios/efi-virtio.rom . 23 | cp /usr/share/qemu/keymaps/en-us . 24 | cp /vm-centos8/QEMU_EFI-pflash.raw . 25 | 26 | set -v 27 | nohup /root/v2/qemu-system-aarch64 -cpu host -M virt,gic-version=max -enable-kvm -smp 8 -m 8G \ 28 | -drive if=virtio,file=$VMDIR/$OSDISK \ 29 | -nic user,hostfwd=tcp::$2-:22 -vnc :$(($2-20022)) \ 30 | -device virtio-gpu-pci -bios $VMDIR/QEMU_EFI-pflash.raw \ 31 | -drive format=raw,file=pfbd:$3,if=virtio &> qemu.log & 32 | 33 | -------------------------------------------------------------------------------- /testing/test_1.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 3 | source $DIR/utils.sh 4 | 5 | VOL1=test_1 6 | COND_IP=$(pfcli get_pfc) 7 | read DB_IP DB_NAME DB_USER DB_PASS <<< $(assert pfcli get_conn_str) 8 | export DB_IP DB_NAME DB_USER DB_PASS 9 | 10 | curlex "http://$COND_IP:49180/s5c/?op=delete_volume&volume_name=$VOL1" 11 | sleep 1 12 | 13 | info "Creating volume $VOL1" 14 | NODE_CNT=$(pfcli list_store |grep OK |wc -l) 15 | if [ $((NODE_CNT < 3 )) ] ; then REP_CNT=1; else REP_CNT=3; fi 16 | 17 | assert curlex "http://$COND_IP:49180/s5c/?op=create_volume&volume_name=$VOL1&size=$((5<<30))&rep_cnt=$REP_CNT" 18 | 19 | count1=$( get_obj_count $VOL1 ) 20 | SNAP1_FILE=${VOL1}_snap1.dat 21 | SNAP1_OUT_FILE=${VOL1}_snap1_out.dat 22 | SNAP2_FILE=${VOL1}_snap2.dat 23 | SNAP2_OUT_FILE=${VOL1}_snap2_out.dat 24 | 25 | assert dd if=/dev/urandom bs=4K count=512 of=$SNAP1_FILE 26 | info "Writing to volume" 27 | assert pfdd --count 512 --rw write --bs 4k -v $VOL1 --if $SNAP1_FILE 28 | assert_equal $( get_obj_count $VOL1 ) $((count1 + REP_CNT)) 29 | 30 | info "Create snapshot snap1" 31 | assert curlex "http://$COND_IP:49180/s5c/?op=create_snapshot&volume_name=$VOL1&snapshot_name=snap1" 32 | 33 | 34 | assert cp $SNAP1_FILE $SNAP2_FILE 35 | assert dd if=/dev/urandom bs=4k count=32 conv=nocreat,notrunc of=$SNAP2_FILE 36 | info "Writing to volume again" 37 | assert pfdd --count 32 --rw write --bs 4k -v $VOL1 --if $SNAP2_FILE 38 | assert_equal $( get_obj_count $VOL1 ) $((count1 + REP_CNT * 2)) 39 | 40 | info "Now compare snapshot data" 41 | SRC_MD5=$(dd if=$SNAP1_FILE bs=4k count=512 | md5sum -b) 42 | assert pfdd --count 512 --rw read --bs 4k -v $VOL1 --snapshot snap1 --of $SNAP1_OUT_FILE 43 | SNAP1_MD5=$(dd if=$SNAP1_OUT_FILE bs=4k count=512 | md5sum -b) 44 | assert_equal "$SRC_MD5" "$SNAP1_MD5" 45 | 46 | info "Now compare HEAD data" 47 | SRC2_MD5=$(dd if=$SNAP2_FILE bs=4k count=512 | md5sum -b) 48 | assert pfdd --count 512 --rw read --bs 4k -v $VOL1 --of $SNAP2_OUT_FILE 49 | SNAP2_MD5=$(dd if=$SNAP2_OUT_FILE bs=4k count=512 | md5sum -b) 50 | assert_equal "$SRC2_MD5" "$SNAP2_MD5" 51 | 52 | 53 | info "Test OK" 54 | -------------------------------------------------------------------------------- /testing/test_2_store_error.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 3 | source $DIR/utils.sh 4 | 5 | VOL_NAME=test_2 6 | VOL_SIZE=$((5<<30)) #5G on my testing platform 7 | COND_IP=$(pfcli get_pfc) 8 | read DB_IP DB_NAME DB_USER DB_PASS <<< $(assert pfcli get_conn_str) 9 | export DB_IP DB_NAME DB_USER DB_PASS 10 | 11 | pfcli delete_volume -v $VOL_NAME 12 | assert pfcli create_volume -v $VOL_NAME -s $VOL_SIZE -r 3 13 | assert "fio --enghelp | grep pfbd " 14 | fio -name=test -ioengine=pfbd -volume=$VOL_NAME -iodepth=8 -rw=randwrite -size=$VOL_SIZE -bs=4k -direct=1 & 15 | FIO_PID=$! 16 | sleep 10 #wait fio to start 17 | 18 | PRIMARY_IP=$(query_db "select mngt_ip from t_store where id in (select store_id from v_replica_ext where is_primary=1 and volume_name='$VOL_NAME') limit 1") 19 | info "Primary node is:$PRIMARY_IP" 20 | 21 | STORE_IP=$(query_db "select mngt_ip from t_store where id in (select store_id from v_replica_ext where is_primary=0 and volume_name='$VOL_NAME') limit 1") 22 | info "stop slave node $STORE_IP" 23 | 24 | ssh root@$STORE_IP supervisorctl stop pfs #stop pfs 25 | sleep 3 26 | 27 | assert_equal $(query_db "select status from t_volume where name='$VOL_NAME'") "DEGRADED" 28 | 29 | assert wait $FIO_PID 30 | 31 | info "Test OK" 32 | -------------------------------------------------------------------------------- /testing/test_3_recovery.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 3 | source $DIR/utils.sh 4 | 5 | VOL_NAME=test_3 6 | VOL_SIZE=$((5<<30)) #5G on my testing platform 7 | COND_IP=$(pfcli get_pfc) 8 | read DB_IP DB_NAME DB_USER DB_PASS <<< $(assert pfcli get_conn_str) 9 | export DB_IP DB_NAME DB_USER DB_PASS 10 | REP_CNT=$(get_rep_count) 11 | 12 | pfcli delete_volume -v $VOL_NAME 13 | assert pfcli create_volume -v $VOL_NAME -s $VOL_SIZE -r $REP_CNT 14 | assert "fio --enghelp | grep pfbd " 15 | fio -name=test -ioengine=pfbd -volume=$VOL_NAME -iodepth=1 -rw=randwrite -size=$VOL_SIZE -bs=4k -direct=1 & 16 | FIO_PID=$! 17 | sleep 10 #wait fio to start 18 | 19 | PRIMARY_IP=$(query_db "select mngt_ip from t_store where id in (select store_id from v_replica_ext where is_primary=1 and volume_name='$VOL_NAME') limit 1") 20 | info "Primary node is:$PRIMARY_IP" 21 | 22 | STORE_IP=$(query_db "select mngt_ip from t_store where id in (select store_id from v_replica_ext where is_primary=0 and volume_name='$VOL_NAME') limit 1") 23 | info "stop slave node $STORE_IP" 24 | stop_pfs $STORE_IP #stop pfs 25 | sleep 3 26 | 27 | info "check volume status should DEGRADED" 28 | assert_equal $(query_db "select status from t_volume where name='$VOL_NAME'") "DEGRADED" 29 | 30 | info "start slave node $STORE_IP" 31 | start_pfs $STORE_IP #start pfs 32 | sleep 3 33 | 34 | assert async_curl "http://$COND_IP:49180/s5c/?op=recovery_volume&volume_name=$VOL_NAME" 35 | 36 | assert_equal $(query_db "select status from t_volume where name='$VOL_NAME'") "OK" 37 | sleep 3 38 | 39 | if pidof fio | grep $FIO_PID ; then 40 | info "FIO still running, that's OK, kill it" 41 | kill -INT $FIO_PID 42 | else 43 | assert wait $FIO_PID 44 | fi 45 | info "Test OK" 46 | -------------------------------------------------------------------------------- /testing/test_4_move_replica.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #set -xv 3 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 4 | source $DIR/utils.sh 5 | 6 | VOL_NAME=test_r3 7 | VOL_SIZE=$((5<<30)) #5G on my testing platform 8 | COND_IP=$(pfcli get_pfc) 9 | read DB_IP DB_NAME DB_USER DB_PASS <<< $(assert pfcli get_conn_str) 10 | export DB_IP DB_NAME DB_USER DB_PASS 11 | 12 | pfcli delete_volume -v $VOL_NAME 13 | assert pfcli create_volume -v $VOL_NAME -s $VOL_SIZE -r 3 14 | query_db "select hex(replica_id), is_primary, store_id, tray_uuid from v_replica_ext where volume_name='$VOL_NAME'" 15 | 16 | #assert "fio --enghelp | grep pfbd " 17 | assert pfdd --rw write --if /dev/zero -v $VOL_NAME --bs 4k --count 10 18 | PRIMARY_IP=$(query_db "select mngt_ip from t_store where id in (select store_id from v_replica_ext where is_primary=1 and volume_name='$VOL_NAME') limit 1") 19 | read rep_id store_id tray_uuid store_ip <<< $(query_db "select replica_id , store_id , tray_uuid, s.mngt_ip from v_replica_ext, t_store s where volume_name='$VOL_NAME' and is_primary=0 and store_id=s.id limit 1") 20 | 21 | info "Primary node is:$PRIMARY_IP Slave node is:$store_ip" 22 | 23 | #choose a target node 24 | read target_uuid target_id <<< $(query_db "select uuid, store_id from t_tray where store_id not in (select store_id from v_replica_ext where volume_name='$VOL_NAME')") 25 | 26 | query_db "select replica_id , store_id , tray_uuid from v_replica_ext where volume_name='$VOL_NAME'" | grep $target_uuid 27 | assert_not_eq $? 0 28 | 29 | info "move replica $rep_id from store $store_id ssd $tray_uuid to store $target_id ssd $target_uuid" 30 | 31 | 32 | info "check volume status should OK" 33 | assert_equal $(query_db "select status from t_volume where name='$VOL_NAME'") "OK" 34 | 35 | 36 | assert async_curl "http://$COND_IP:49180/s5c/?op=move_volume&volume_name=$VOL_NAME&from_store=$store_id&from_ssd_uuid=$tray_uuid&target_store=$target_id&target_ssd_uuid=$target_uuid" 37 | 38 | #now, volume should not lay on original ssd 39 | query_db "select replica_id , store_id , tray_uuid from v_replica_ext where volume_name='$VOL_NAME'" | grep $tray_uuid 40 | assert_not_eq $? 0 41 | 42 | 43 | info "check volume status should OK" 44 | assert_equal $(query_db "select status from t_volume where name='$VOL_NAME'") "OK" 45 | 46 | info "Test OK" 47 | -------------------------------------------------------------------------------- /testing/test_5_aof_io.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 4 | source $DIR/utils.sh 5 | #set -xv 6 | 7 | FIFO_IN=/tmp/pf_aof_in 8 | AOF_SRC_DAT=/tmp/aof_src.dat 9 | AOF_OUT_DAT=/tmp/aof_out.dat 10 | 11 | function aof_len() 12 | { 13 | assert pfdd --rw read --of /tmp/pfhead -v $1 14 | read LO HI <<< $(hexdump -n 8 -s 8 -v -e '/4 "%d "' /tmp/pfhead) 15 | echo $(( (HI<<16) + LO)) 16 | } 17 | function cleanup { 18 | pkill aof_helper 19 | pkill sleep 20 | # rm -f /tmp/pfhead $FIFO_IN $AOF_SRC_DAT $AOF_OUT_DAT 21 | } 22 | trap cleanup EXIT 23 | 24 | VOL_NAME=test_5_aof 25 | VOL_SIZE=$((5<<30)) #5G on my testing platform 26 | COND_IP=$(pfcli get_pfc) 27 | read DB_IP DB_NAME DB_USER DB_PASS <<< $(assert pfcli get_conn_str) 28 | export DB_IP DB_NAME DB_USER DB_PASS 29 | 30 | pfcli delete_volume -v $VOL_NAME 31 | 32 | 33 | dd if=/dev/urandom bs=1M count=10 of=$AOF_SRC_DAT 34 | rm -f $FIFO_IN 35 | assert mkfifo $FIFO_IN 36 | sleep 1000 > $FIFO_IN & 37 | SLP_PID=$! 38 | 39 | aof_helper $VOL_NAME /etc/pureflash/pf.conf < $FIFO_IN & 40 | HELPER_PID=$! 41 | sleep 2 42 | 43 | assert_equal "$(pidof aof_helper)" "$HELPER_PID" 44 | echo "a 1024 $AOF_SRC_DAT 0" > $FIFO_IN 45 | echo "s" > $FIFO_IN 46 | assert_equal "$(aof_len $VOL_NAME)" "1024" 47 | for (( i=0; i<5; i++ )); do 48 | echo "a $((1<<20)) $AOF_SRC_DAT $(( (i<<20) + 1024 ))" > $FIFO_IN 49 | done 50 | echo "s" > $FIFO_IN 51 | 52 | FILE_LEN=$(aof_len $VOL_NAME) 53 | 54 | assert_equal "$FILE_LEN" "$(( (5<<20) + 1024 ))" 55 | 56 | function read_check() 57 | { 58 | # r dst_file dst_off 59 | info "read len:$1 from:$2" 60 | echo "r $1 $2 $AOF_OUT_DAT $2" > $FIFO_IN 61 | sleep 1 #wait command execution 62 | assert_equal "$(dd if=$AOF_OUT_DAT bs=1024 skip=$(($2>>10)) count=$(($1>>10)) | md5sum -b)" "$(dd if=$AOF_SRC_DAT bs=1024 skip=$(($2>>10)) count=$(($1>>10)) | md5sum -b)" 63 | } 64 | #to test 65 | read_check 8192 0 66 | #1. read unaligned data 67 | read_check 1024 $((7<<10)) 68 | read_check 1024 $((8<<10)) 69 | read_check 1024 $((6<<10)) 70 | 71 | read_check 4096 $((1<<20 - 1024)) 72 | read_check $((3<<20)) $((1<<20 - 2048)) 73 | 74 | 75 | #2. read data in write buffer 76 | echo "a $(( (1<<20)+4096)) $AOF_SRC_DAT $FILE_LEN" > $FIFO_IN 77 | sleep 1 78 | read_check $((8<<10)) $(( FILE_LEN - (5<<10) )) 79 | read_check $((8<<10)) $(( FILE_LEN + (5<<10) )) 80 | 81 | #3. read exceed file length 82 | 83 | echo "q" > $FIFO_IN 84 | assert wait $HELPER_PID 85 | kill $SLP_PID 86 | 87 | info "========Test OK!==========" 88 | -------------------------------------------------------------------------------- /testing/test_7_restart.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 3 | source $DIR/utils.sh 4 | 5 | VOL_NAME=test_7 6 | COND_IP=$(pfcli get_pfc) 7 | read DB_IP DB_NAME DB_USER DB_PASS <<< $(assert pfcli get_conn_str) 8 | export DB_IP DB_NAME DB_USER DB_PASS 9 | 10 | curlex "http://$COND_IP:49180/s5c/?op=delete_volume&volume_name=$VOL_NAME" 11 | sleep 1 12 | 13 | info "Creating volume $VOL_NAME" 14 | NODE_CNT=$(pfcli list_store |grep OK |wc -l) 15 | REP_CNT=1 16 | 17 | assert curlex "http://$COND_IP:49180/s5c/?op=create_volume&volume_name=$VOL_NAME&size=$((5<<30))&rep_cnt=$REP_CNT" 18 | 19 | count1=$( get_obj_count $VOL_NAME ) 20 | SNAP1_FILE=${VOL_NAME}_snap1.dat 21 | #SNAP1_OUT_FILE=${VOL_NAME}_snap1_out.dat 22 | SNAP2_FILE=${VOL_NAME}_snap2.dat 23 | #SNAP2_OUT_FILE=${VOL_NAME}_snap2_out.dat 24 | 25 | assert dd if=/dev/urandom bs=4K count=512 of=$SNAP1_FILE 26 | info "Writing to volume" 27 | assert pfdd --count 512 --rw write --bs 4k -v $VOL_NAME --if $SNAP1_FILE 28 | assert_equal $( get_obj_count $VOL_NAME ) $((count1 + REP_CNT)) 29 | 30 | info "Create snapshot snap1" 31 | assert curlex "http://$COND_IP:49180/s5c/?op=create_snapshot&volume_name=$VOL_NAME&snapshot_name=snap1" 32 | 33 | 34 | assert cp $SNAP1_FILE $SNAP2_FILE 35 | assert dd if=/dev/urandom bs=4k count=32 conv=nocreat,notrunc of=$SNAP2_FILE 36 | info "Writing to volume again" 37 | assert pfdd --count 32 --rw write --bs 4k -v $VOL_NAME --if $SNAP2_FILE 38 | assert_equal $( get_obj_count $VOL_NAME ) $((count1 + REP_CNT * 2)) 39 | 40 | info "Now compare snapshot data" 41 | SRC_MD5=$(dd if=$SNAP1_FILE bs=4k count=512 | md5sum -b) 42 | #assert pfdd --count 512 --rw read --bs 4k -v $VOL_NAME --snapshot snap1 --of $SNAP1_OUT_FILE 43 | SNAP1_MD5=$(assert pfdd --count 512 --rw read --bs 4k -v $VOL_NAME --snapshot snap1 --of /dev/stdout | md5sum -b) 44 | assert_equal "$SRC_MD5" "$SNAP1_MD5" 45 | 46 | info "Now compare HEAD data" 47 | SRC2_MD5=$(dd if=$SNAP2_FILE bs=4k count=512 | md5sum -b) 48 | #assert pfdd --count 512 --rw read --bs 4k -v $VOL_NAME --of $SNAP2_OUT_FILE 49 | SNAP2_MD5=$(assert pfdd --count 512 --rw read --bs 4k -v $VOL_NAME --of /dev/stdout | md5sum -b) 50 | assert_equal "$SRC2_MD5" "$SNAP2_MD5" 51 | 52 | PRIMARY_IP=$(query_db "select mngt_ip from t_store where id in (select store_id from v_replica_ext where is_primary=1 and volume_name='$VOL_NAME') limit 1") 53 | info "stop pfs on $PRIMARY_IP" 54 | stop_pfs $PRIMARY_IP 55 | sleep 5 56 | info "start pfs on $PRIMARY_IP" 57 | start_pfs $PRIMARY_IP 58 | sleep 3 59 | SNAP2_MD5=$(assert pfdd --count 512 --rw read --bs 4k -v $VOL_NAME --of /dev/stdout | md5sum -b) 60 | assert_equal "$SRC2_MD5" "$SNAP2_MD5" 61 | SNAP1_MD5=$(assert pfdd --count 512 --rw read --bs 4k -v $VOL_NAME --snapshot snap1 --of /dev/stdout | md5sum -b) 62 | assert_equal "$SRC_MD5" "$SNAP1_MD5" 63 | 64 | info "Test OK" 65 | -------------------------------------------------------------------------------- /testing/test_fio.sh: -------------------------------------------------------------------------------- 1 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 2 | source $DIR/utils.sh 3 | 4 | HOST_IP=($1 $2 $3) 5 | 6 | 7 | declare -A VDISKS 8 | declare -A FIOPIDS 9 | 10 | VM_PER_HOST=2 11 | DISK_PER_VM=1 12 | 13 | VOL_SIZE=128G 14 | 15 | 16 | VM_CNT=${#HOST_IP[@]} 17 | 18 | cleanup() 19 | { 20 | info "kill fio" 21 | for ((i=0;i vol_h${i}_v${j}_d0.log & 66 | FIOPIDS[$FIO_CNT]=$! 67 | info "FIO[$FIO_CNT] pid is ${FIOPIDS[$FIO_CNT]} " 68 | FIO_CNT=$((FIO_CNT+1)) 69 | 70 | done 71 | done 72 | # rpc.py -s `pwd`/vhost-liu.sock framework_set_scheduler static 73 | 74 | info "Waiting fio jobs complete ..." 75 | for ((i=0;i