├── .build.yml ├── .github └── workflows │ └── build.yml ├── LICENSE ├── Makefile ├── README.md ├── RELEASE.md ├── analysis └── act_latency.py ├── build ├── os_version └── version ├── config ├── act_index.conf └── act_storage.conf ├── pkg ├── Makefile.deb ├── Makefile.rpm ├── deb │ ├── control │ ├── postinst │ └── prerm └── rpm │ └── act.spec └── src ├── common ├── cfg.c ├── cfg.h ├── clock.h ├── hardware.c ├── hardware.h ├── histogram.c ├── histogram.h ├── io.c ├── io.h ├── queue.c ├── queue.h ├── random.c ├── random.h ├── trace.c ├── trace.h └── version.h ├── index ├── act_index.c ├── cfg_index.c └── cfg_index.h ├── prep └── act_prep.c └── storage ├── act_storage.c ├── cfg_storage.c └── cfg_storage.h /.build.yml: -------------------------------------------------------------------------------- 1 | name: act 2 | 3 | container: 4 | - base: 5 | - docker.qe.aerospike.com/build/aerospike-server:arm-rhel-8 6 | - docker.qe.aerospike.com/build/aerospike-server:arm-rhel-9 7 | - docker.qe.aerospike.com/build/aerospike-server:arm-rhel-10 8 | - docker.qe.aerospike.com/build/aerospike-server:arm-debian-12 9 | - docker.qe.aerospike.com/build/aerospike-server:arm-debian-13 10 | - docker.qe.aerospike.com/build/aerospike-server:arm-ubuntu-20.04 11 | - docker.qe.aerospike.com/build/aerospike-server:arm-ubuntu-22.04 12 | - docker.qe.aerospike.com/build/aerospike-server:arm-ubuntu-24.04 13 | - docker.qe.aerospike.com/build/aerospike-server:arm-amazonlinux-2023 14 | 15 | - docker.qe.aerospike.com/build/aerospike-server:x86-rhel-8 16 | - docker.qe.aerospike.com/build/aerospike-server:x86-rhel-9 17 | - docker.qe.aerospike.com/build/aerospike-server:x86-rhel-10 18 | - docker.qe.aerospike.com/build/aerospike-server:x86-debian-12 19 | - docker.qe.aerospike.com/build/aerospike-server:x86-debian-13 20 | - docker.qe.aerospike.com/build/aerospike-server:x86-ubuntu-20.04 21 | - docker.qe.aerospike.com/build/aerospike-server:x86-ubuntu-22.04 22 | - docker.qe.aerospike.com/build/aerospike-server:x86-ubuntu-24.04 23 | - docker.qe.aerospike.com/build/aerospike-server:x86-amazonlinux-2023 24 | 25 | build: 26 | - name: default 27 | script: 28 | - make all 29 | - make $PKG 30 | artifact: 31 | - target/bin/* 32 | - target/packages/* 33 | -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: Build ACT 2 | 3 | on: 4 | workflow_dispatch: 5 | 6 | jobs: 7 | buils_debian_x86_64: 8 | runs-on: ubuntu-latest 9 | strategy: 10 | matrix: 11 | container: [ 'debian:10', 'debian:11' ] 12 | container: 13 | image: ${{ matrix.container }} 14 | options: "--entrypoint /bin/bash" 15 | steps: 16 | - name: Install Dependencies (Debian) 17 | run: | 18 | apt-get -y update 19 | apt-get -y install make gcc libc6-dev git build-essential 20 | 21 | - name: Clone Source 22 | run: git clone https://github.com/aerospike/act.git 23 | 24 | - name: build act (debian) 25 | working-directory: ./act 26 | run: | 27 | echo "REV=$(make -f pkg/Makefile.deb print-REV)" >> $GITHUB_ENV 28 | echo "OS=$(make -f pkg/Makefile.deb print-OS)" >> $GITHUB_ENV 29 | echo "ARCH=$(make -f pkg/Makefile.deb print-ARCH)" >> $GITHUB_ENV 30 | echo "GITHUB_SHA_SHORT=$(echo $GITHUB_SHA | cut -c 1-6)" >> $GITHUB_ENV 31 | make all deb 32 | 33 | - name: Upload Artifacts 34 | uses: actions/upload-artifact@v3 35 | with: 36 | name: act_${{ env.REV }}-1${{ env.OS }}_${{ env.ARCH }}.deb 37 | path: /__w/act/act/act/target/packages/act_* 38 | if-no-files-found: error 39 | 40 | build_ubuntu_x86_64: 41 | strategy: 42 | matrix: 43 | os: [ 'ubuntu-20.04', 'ubuntu-18.04' ] 44 | runs-on: ${{ matrix.os }} 45 | steps: 46 | - name: Install Dependencies 47 | run: sudo apt-get install make gcc libc6-dev 48 | 49 | - name: Get Sources 50 | uses: actions/checkout@v3 51 | with: 52 | fetch-depth: 0 53 | 54 | - name: Set env 55 | run: | 56 | echo "REV=$(make -f pkg/Makefile.deb print-REV)" >> $GITHUB_ENV 57 | echo "OS=$(make -f pkg/Makefile.deb print-OS)" >> $GITHUB_ENV 58 | echo "ARCH=$(make -f pkg/Makefile.deb print-ARCH)" >> $GITHUB_ENV 59 | echo "GITHUB_SHA_SHORT=$(echo $GITHUB_SHA | cut -c 1-6)" >> $GITHUB_ENV 60 | 61 | - name: build 62 | run: make all deb 63 | 64 | - name: Upload Artifact 65 | uses: actions/upload-artifact@v3 66 | with: 67 | name: act_${{ env.REV }}-1${{ env.OS }}_${{ env.ARCH }}.deb 68 | path: target/packages 69 | if-no-files-found: error 70 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | LICENSE 2 | 3 | Copyright (c) 2008-2012 Aerospike, Inc. All rights reserved. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | this software and associated documentation files (the "Software"), to deal in 7 | the Software without restriction, including without limitation the rights to 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 9 | of the Software, and to permit persons to whom the Software is furnished to do 10 | so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | 24 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Make all or any of: act_storage, act_index, act_prep. 2 | 3 | ARCH = $(shell uname -m) 4 | 5 | ifeq ($(ARCH), x86_64) 6 | CFLAGS = -march=nocona 7 | else ifeq ($(ARCH), aarch64) 8 | CFLAGS = -mcpu=neoverse-n1 9 | else 10 | $(error unhandled arch "$(ARCH)") 11 | endif 12 | 13 | DIR_TARGET = target 14 | DIR_OBJ = $(DIR_TARGET)/obj 15 | DIR_BIN = $(DIR_TARGET)/bin 16 | 17 | DIR_PKG = $(DIR_TARGET)/packages 18 | DIR_RPM = pkg/rpm/RPMS 19 | DIR_DEB = pkg/deb/DEBS 20 | 21 | SRC_DIRS = common index prep storage 22 | OBJ_DIRS = $(SRC_DIRS:%=$(DIR_OBJ)/src/%) 23 | 24 | COMMON_SRC = cfg.c hardware.c histogram.c io.c queue.c random.c trace.c 25 | INDEX_SRC = act_index.c cfg_index.c 26 | STORAGE_SRC = act_storage.c cfg_storage.c 27 | 28 | INDEX_SOURCES = $(COMMON_SRC:%=src/common/%) $(INDEX_SRC:%=src/index/%) 29 | PREP_SOURCES = $(COMMON_SRC:%=src/common/%) src/prep/act_prep.c 30 | STORAGE_SOURCES = $(COMMON_SRC:%=src/common/%) $(STORAGE_SRC:%=src/storage/%) 31 | 32 | INDEX_OBJECTS = $(INDEX_SOURCES:%.c=$(DIR_OBJ)/%.o) 33 | PREP_OBJECTS = $(PREP_SOURCES:%.c=$(DIR_OBJ)/%.o) 34 | STORAGE_OBJECTS = $(STORAGE_SOURCES:%.c=$(DIR_OBJ)/%.o) 35 | 36 | INDEX_BINARY = $(DIR_BIN)/act_index 37 | PREP_BINARY = $(DIR_BIN)/act_prep 38 | STORAGE_BINARY = $(DIR_BIN)/act_storage 39 | 40 | ALL_OBJECTS = $(INDEX_OBJECTS) $(PREP_OBJECTS) $(STORAGE_OBJECTS) 41 | ALL_DEPENDENCIES = $(ALL_OBJECTS:%.o=%.d) 42 | 43 | CFLAGS += -g -fno-common -std=gnu99 -Wall -D_REENTRANT -D_FILE_OFFSET_BITS=64 44 | CFLAGS += -D_GNU_SOURCE -MMD 45 | LDFLAGS = $(CFLAGS) 46 | INCLUDES = -Isrc -I/usr/include 47 | LIBRARIES = -lpthread -lrt 48 | 49 | default: all 50 | 51 | all: act_index act_prep act_storage 52 | 53 | target_dir: 54 | /bin/mkdir -p $(DIR_BIN) $(OBJ_DIRS) $(DIR_PKG) 55 | 56 | act_index: target_dir $(INDEX_OBJECTS) 57 | echo "Linking $@" 58 | $(CC) $(LDFLAGS) -o $(INDEX_BINARY) $(INDEX_OBJECTS) $(LIBRARIES) 59 | 60 | act_prep: target_dir $(PREP_OBJECTS) 61 | echo "Linking $@" 62 | $(CC) $(LDFLAGS) -o $(PREP_BINARY) $(PREP_OBJECTS) $(LIBRARIES) 63 | 64 | act_storage: target_dir $(STORAGE_OBJECTS) 65 | echo "Linking $@" 66 | $(CC) $(LDFLAGS) -o $(STORAGE_BINARY) $(STORAGE_OBJECTS) $(LIBRARIES) 67 | 68 | -include $(ALL_DEPENDENCIES) 69 | 70 | $(DIR_OBJ)/%.o: %.c 71 | echo "Building $@" 72 | $(CC) $(CFLAGS) -o $@ -c $(INCLUDES) $< 73 | 74 | .PHONY: rpm 75 | rpm: 76 | $(MAKE) -f pkg/Makefile.rpm 77 | 78 | .PHONY: deb 79 | deb: 80 | $(MAKE) -f pkg/Makefile.deb 81 | 82 | # For now we only clean everything. 83 | .PHONY: clean 84 | clean: 85 | /bin/rm -rf $(DIR_TARGET) 86 | /bin/rm -rf $(DIR_RPM) 87 | /bin/rm -rf $(DIR_DEB) 88 | /bin/rm -rf dist 89 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Aerospike Certification Tool (ACT) 2 | 3 | This project is maintained by [Aerospike](http://www.aerospike.com) 4 | 5 | ### Overview 6 | ------------ 7 | 8 | ACT provides a pair of programs for testing and certifying flash/SSD devices' 9 | performance for Aerospike Database data and index storage. ACT measures latency 10 | during a mixed load of read and write operations while modeling the Aerospike 11 | Database server's I/O pattern as closely as practical. 12 | 13 | ACT allows you to test a single device or multiple devices, using your actual 14 | connector/controller hardware. 15 | 16 | There are two programs: act_storage models Aeropike Database data storage I/O 17 | patterns, and act_index models Aeropike Database index storage I/O patterns for 18 | Aerospike Database's "All Flash" mode. 19 | 20 | The purpose of this certification is: 21 | 22 | 1. Determine if an SSD device(s) will stand up to the demands of a high-speed 23 | real-time database. 24 | 2. Evaluate the upper limits of the throughput you can expect from a device(s). 25 | 26 | Not all SSDs can handle the high volume of transactions required by high 27 | performance real-time databases like Aerospike Database. Many SSDs are rated 28 | for 100K+ reads/writes per second, but in production the actual load they can 29 | withstand for sustained periods of time is generally much lower. In the process 30 | of testing many common SSDs in high-throughput tests, Aerospike developed this 31 | certification tool, ACT, that you can use to test/certify an SSD for yourself. 32 | 33 | We have found performance – especially latency – of SSDs to be highly dependent 34 | on the write load the SSD is subjected to. Over the first few hours of a test, 35 | performance can be excellent, but past the 4- to 12-hour mark (depending on the 36 | device), performance can suffer. 37 | 38 | The ACT tool allows you to test an SSD device(s) for yourself. In addition, 39 | Aerospike has tested a variety of SSDs and has specific recommendations. For 40 | more information, visit the Aerospike Database documentation at: 41 | http://www.aerospike.com/docs/operations/plan/ssd/ssd_certification.html. 42 | 43 | #### What ACT Does 44 | ------------------ 45 | 46 | By default, act_storage performs a combination of large (128K) block reads and 47 | writes and small (1.5K) block reads, simulating standard real-time Aerospike 48 | Database data read/write and defragmentation loads. 49 | 50 | By default, act_index performs a mixture of 4K reads and writes, simulating 51 | standard real-time Aerospike Database "All Flash" index device loads. 52 | 53 | Latencies are measured for a long enough period of time (typically 24 hours) to 54 | evaluate device stability and overall performance. 55 | 56 | **Traffic/Loading** 57 | 58 | You can simulate: 59 | 60 | * "Nx" load - 1x load (2000 reads/sec and 1000 writes/sec per device) times N 61 | * any other stress load or high-performance load (custom configurable) 62 | 63 | **Latency Rate Analysis** 64 | 65 | ACT's output shows latency broken into intervals of 2^n ms: 1, 2, 4, 8 ... ms 66 | (analysis program's display intervals are configurable). 67 | 68 | For example, a test might indicate that 0.25% of requests failed to complete in 69 | 1 ms or less and 0.01% of requests failed to complete in 8 ms or less. 70 | 71 | **Methodology for act_storage** 72 | 73 | The small read operations model client read requests. Requests are done at the 74 | specified rate by a number of service threads. 75 | 76 | The large-block read and write operations model the Aerospike server's write 77 | requests and defragmentation process. The operations occur at a rate determined 78 | by the specified write request rate, and are executed from one dedicated 79 | large-block read thread and one dedicated large-block write thread per device. 80 | 81 | **Methodology for act_index** 82 | 83 | The 4K device reads model index element access that occurs during client read 84 | and write requests, and defragmentation. One device read is executed on service 85 | threads for each client read, and for each client write. In addition, more 86 | reads are executed in "cache threads" to model index element access during 87 | defragmentation. 88 | 89 | The "cache threads" also execute all the 4k device writes, which model index 90 | element changes due to client write requests and defragmentation. 91 | 92 | Unlike the Aerospike Database "All Flash" mode, act_index does not mmap files in 93 | mounted directories on the devices - it models the raw device I/O pattern, 94 | assuming no caching benefit from mmap. Therefore to configure act_index we 95 | simply specify the devices. 96 | 97 | #### Process for Certifying Device(s) for 30x Performance 98 | --------------------------------------------------------- 99 | 100 | In general, we recommend that you certify a device for 30x performance. Many 101 | devices do not pass the 30x certification. If you do not have a high-volume 102 | application, you may find that a 10x or 20x certification will be sufficient. 103 | The instructions below describe the 30x certification process, but you may need 104 | to adjust the test based on your requirements. 105 | 106 | To certify a device(s) for 30x performance with Aerospike Database requires two 107 | stages: 108 | 109 | 1. Test a single device to determine performance using the hardware 110 | configuration and connectors. The single-device certification will help you 111 | determine individual device performance. 112 | 2. If you will be using multiple devices, you can then run ACT to test multiple 113 | devices to see how the results will be affected by the capacity of the bus or 114 | the throughput of the RAID controller that is managing your devices. 115 | 116 | The test process with ACT is the same for both stages, but in the first stage 117 | you are testing a device and in the second stage, you are testing the 118 | linearity/scalability of your connector with multiple devices installed. 119 | 120 | The single-device stage takes 48 hours. The multi-device stage takes an 121 | additional 48 hours. 122 | 123 | ##### The first stage is to certify a single device, to test the device itself and the connection. 124 | 125 | Begin by installing your SSD device. Our website has more details about 126 | installing SSDs in different environments and configurations at 127 | http://www.aerospike.com/docs/operations/plan/ssd/ssd_setup.html. 128 | 129 | **Test 1: Test under high loads** 130 | 131 | Run ACT for 24 hrs using the 30x test (60000 reads/sec and 30000 writes/sec). 132 | The device passes this test if less than 5% of operations fail to complete in 133 | 1 ms or less. 134 | 135 | Many devices fail this test and are unsuitable for use with Aerospike Database. 136 | 137 | **Test 2: Stress test to ensure the device does not fail under excessive loads** 138 | 139 | Run a 60x test for 24 hrs (120000 reads/sec and 60000 writes/sec). The device 140 | passes this test if ACT runs to completion, regardless of the error rate. 141 | 142 | **If you are testing a single device, then the device is certified when it passes Test 1 and Test 2.** 143 | 144 | ##### The second stage is to certify multiple devices, to make sure that performance scales linearly when you add devices. 145 | 146 | Install the additional SSDs to be tested. Our website has more details about 147 | installing SSDs in different environments and configurations at 148 | http://www.aerospike.com/docs/operations/plan/ssd/ssd_setup.html. 149 | 150 | **Test 3: Repeat Test 1, with all devices installed: Test under high loads** 151 | 152 | Run ACT for 24 hrs using the 30x test (60000 reads/sec and 30000 writes/sec per 153 | device). The devices pass this test if less than 5% of operations fail to 154 | complete in 1 ms or less. 155 | 156 | **Test 4: Repeat Test 2, with all devices installed: Stress test to ensure the devices do not fail under excessive loads** 157 | 158 | Run a 60x test for 24 hrs (120000 reads/sec and 60000 writes/sec per device). 159 | The devices pass this test if ACT runs to completion, regardless of the error 160 | rate. 161 | 162 | **The devices are certified if they pass Test 3 and Test 4.** 163 | 164 | Once the device(s) has been certified, the device can be used with Aerospike 165 | Database. 166 | 167 |   168 | 169 | #### Determining Expected Performance at Higher Throughput 170 | ---------------------------------------------------------- 171 | 172 | If your application is going to have high volumes of transactions and your 173 | device(s) passes the 30x certification, we recommend that you test your device 174 | to determine its upper limit on transaction processing latency. This will help 175 | you determine how many SSDs you will need to run your application when you are 176 | fully scaled up. 177 | 178 | To certify a device(s) at higher levels of performance, do the certification 179 | process as described above, but use higher loads (80x, 100x, etc.). Test the 180 | device(s) at progressively higher rates until more than 5% of operations fail in 181 | 1 ms. 182 | 183 | For example, if you test at 60x and less than 5% of operations fail to complete 184 | in 1 ms, re-run the test at 80x, etc. When the device completes the test at a 185 | particular speed with *more* than 5% of operations failing to complete in 1 ms 186 | (i.e., fails the test), then the device is certified at the next lower level 187 | where the device DOES have fewer than 5% of errors in under 1 ms. 188 | 189 | If your device is testing well at higher loads, you may want to shorten the test 190 | time. Running ACT for six hours will give you a good idea whether your device 191 | can pass ACT testing at a given traffic volume. Before certifying your device 192 | at a given traffic level, we recommend a full 24-hour test. 193 | 194 | As before, test a single device first, and then test with multiple devices to 195 | make sure that the performance scales linearly with your connector/controller. 196 | 197 | ### Getting Started 198 | -------------------- 199 | 200 | **Download the ACT package through git:** 201 | 202 | ``` 203 | $ git clone https://github.com/aerospike/act.git 204 | ``` 205 | This creates an /act directory. 206 | 207 | Alternately you can download the ZIP or TAR file from the links at the left. 208 | When you unpack/untar the file, it creates an /aerospike-act- 209 | directory. 210 | 211 | **Install the Required Libraries** 212 | 213 | Before you can build ACT, you need to install some libraries. 214 | 215 | For CentOS: 216 | ``` 217 | $ sudo yum install make gcc 218 | ``` 219 | 220 | For Debian or Ubuntu: 221 | ``` 222 | $ sudo apt-get install make gcc libc6-dev 223 | ``` 224 | 225 | **Build the package.** 226 | 227 | ``` 228 | $ cd act OR cd /aerospike-act- 229 | $ make 230 | ``` 231 | 232 | This will create 3 binaries in a target/bin directory: 233 | 234 | * ***act_prep***: This executable prepares a device for ACT by writing zeroes 235 | on every sector of the disk and then filling it up with random data (salting). 236 | This simulates a normal production state. 237 | 238 | * ***act_storage***: The executable for modeling Aerospike Database data 239 | storage device I/O patterns. 240 | 241 | * ***act_index***: The executable for modeling Aerospike Database "All Flash" 242 | mode index device I/O patterns. 243 | 244 | ### Running the ACT Certification Process 245 | ----------------------------------------- 246 | 247 | To certify your device(s), first determine what certification test you will run, 248 | as described above in **Process for Certifying a Drive(s) for 3x Performance** 249 | or **Determining Expected Performance at Higher Throughput**. 250 | 251 | For each certification test with ACT, you must perform the following steps: 252 | 253 | 1. Prepare the device(s) with act_prep -- only the first time you test. 254 | 2. Create the config file for your test. 255 | 3. Run the test, sending the results to a log file. 256 | 4. Analyze log file output using the /analysis/act_latency.py script. 257 | 5. Determine pass/fail for the test. 258 | 259 | The details of these steps are described in detail below. 260 | 261 | **The tests destroy all data on the devices being tested!** 262 | 263 | When preparing devices and running tests, make sure the devices are specified by 264 | name correctly. 265 | 266 | Make sure the test device is not mounted. 267 | 268 | #### 1. Prepare the Drives with act_prep - First Time Only 269 | ---------------------------------------------------------- 270 | 271 | The first time you test a device(s), you must prepare the device(s) by first 272 | cleaning them (writing zeros everywhere) and then "salting" them (writing random 273 | data everywhere) with act_prep. 274 | 275 | act_prep takes a device name as its only command-line parameter. For a typical 276 | 240GB SSD, act_prep takes 30-60+ minutes to run. The time varies depending on 277 | the device and the capacity. 278 | 279 | If you are testing multiple devices, you can run act_prep on all of the devices 280 | in parallel. Preparing multiple devices in parallel does not take a lot more 281 | time than preparing a single device, so this step should only take an hour or 282 | two. 283 | 284 | For example, to clean and salt the device /dev/sdc: 285 | (over-provisioned using hdparm) 286 | ``` 287 | $ sudo ./act_prep /dev/sdc & 288 | ``` 289 | If you are using a RAID controller / over-provisioned using fdisk, make sure you 290 | specify the partition and not the raw device. If the raw device is used then ACT 291 | will wipe out the partition table and this will invalidate the test. 292 | ``` 293 | $ sudo ./act_prep /dev/sdc1 & 294 | ``` 295 | 296 | #### 2. Create a Configuration File 297 | ----------------------------------- 298 | 299 | Create your config file by copying the appropriate example config file in the 300 | /config directory and modifying it, as described in the 301 | **ACT Configuration Reference** below. The example files are for the standard 302 | 1x load (2000 reads/sec and 1000 writes/sec per device). 303 | 304 | Copy act_storage.conf to run the normal data storage modeling tests, or copy 305 | act_index.conf to run "All Flash" mode tests for index devices. 306 | 307 | #### 3. Run the test 308 | -------------------- 309 | 310 | From the ACT installation directory, run: 311 | ``` 312 | $ sudo ./target/bin/act_storage actconfig.txt > output.txt & 313 | ``` 314 | where: 315 | ``` 316 | * actconfig.txt - path/name of your config file 317 | * output.txt - path/name of your log file 318 | ``` 319 | If running ACT from a remote terminal, it is best to run it as a background 320 | process, or within a "screen". To verify that ACT is running, tail the output 321 | text file with the -f option. 322 | 323 | Note that if the device(s) being tested performs so badly that ACT cannot keep 324 | up with the specified load, ACT will halt before the configured test duration 325 | has elapsed. ACT may also halt prematurely if it encounters unexpected device 326 | I/O or system errors. 327 | 328 | #### 4. Analyze ACT Output 329 | -------------------------- 330 | 331 | Run /analysis/act_latency.py to process the ACT log file and tabulate data. 332 | Note that you can run the script when the test is not yet complete, and you will 333 | see the partial results. 334 | 335 | For example: 336 | ``` 337 | $ ./analysis/act_latency.py -l output.txt 338 | ``` 339 | 340 | where: 341 | ``` 342 | -l - required parameter that specifies the path/name of the log file generated by ACT 343 | ``` 344 | 345 | and optionally: 346 | ``` 347 | -h - optional parameter specifying histogram name(s): defaults are small read latency histograms 348 | -t - optional parameter specifying slice length; default is 3600 sec (1 hour) 349 | -s - optional parameter specifying start threshold for display; default is 0 (1 ms/us) 350 | -n - optional parameter specifying number of thresholds to display; default is 7 351 | -e - optional parameter specifying display threshold frequency; default is (every) 1 352 | -x - optional parameter indicating that throughputs should also be displayed: default is no 353 | ``` 354 | 355 | The Python script analyzes the ACT output in time slices as specified, and 356 | displays latency data at various verification intervals for each slice. 357 | 358 | The example output below is for an **act_storage** 12-hour test (each slice is 359 | an hour), run with options -n 3 (display 3 thresholds) and -e 3 (display every 360 | 3rd threshold). The **reads** table shows read latencies accumulated over all 361 | devices. So for example, in the 5th hour, 1.68% of reads failed to complete in 362 | under 1 ms. 363 | 364 | ``` 365 | reads 366 | %>(ms) 367 | slice 1 8 64 368 | ----- ------ ------ ------ 369 | 1 1.67 0.00 0.00 370 | 2 1.38 0.00 0.00 371 | 3 1.80 0.14 0.00 372 | 4 1.43 0.00 0.00 373 | 5 1.68 0.00 0.00 374 | 6 1.37 0.00 0.00 375 | 7 1.44 0.00 0.00 376 | 8 1.41 0.00 0.00 377 | 9 2.70 0.73 0.00 378 | 10 1.54 0.00 0.00 379 | 11 1.53 0.00 0.00 380 | 12 1.47 0.00 0.00 381 | ----- ------ ------ ------ 382 | avg 1.62 0.07 0.00 383 | max 2.70 0.73 0.00 384 | ``` 385 | 386 | The script will also echo the configuration used to generate the log file, along 387 | with other basic information, above the latency tables. (We do not show his 388 | output in the example above.) 389 | 390 | #### 5. Evaluate Device(s) by the Standard Pass/Fail Criteria 391 | ------------------------------------------------------------- 392 | 393 | ##### Passing a Performance Test 394 | In any one-hour period of an ACT performance test, we expect that: 395 | 396 | - fewer than 5% of transactions fail to complete in 1 ms 397 | - fewer than 1% of transactions fail to complete in 8 ms 398 | - fewer than 0.1% of transactions fail to complete in 64 ms 399 | 400 | The **max** line of the output shows the highest values observed in any single 401 | slice (hour) of time, and the values on the max line should not exceed the 402 | allowable error values specified above. 403 | 404 | In the example output above, we show only 12 hours of results, and the device 405 | passes because the worst performance in any slice was 2.7% of transactions 406 | failing to complete within 1 ms, 0.73% of transactions failing to complete in 407 | less than 8 ms and no transactions failing to complete within 64 ms. 408 | 409 | A device(s) which does not exceed these error thresholds in 24 hours passes the 410 | load test. 411 | 412 | ##### Passing a Stress Test 413 | When doing stress testing at a level ABOVE where the device is certified, a 414 | device passes the test if ACT runs to completion, regardless of the number of 415 | errors. 416 | 417 | ## ACT Configuration Reference 418 | ------------------------------ 419 | 420 | #### Modifying the Config File 421 | ------------------------------ 422 | 423 | This package includes two example config files, one for act_storage 424 | (/config/act_storage.conf) and one for act_index (/config/act_index.conf). 425 | 426 | Chose the one appropriate for the test you wish to run. (This is usually 427 | act_storage. Run act_index only if you are testing devices for storing indexes 428 | when running Aerospike Database in "All Flash" mode.) 429 | 430 | First, you must be sure to set the correct device name(s). Then you should 431 | adjust the transaction request rates. 432 | 433 | Each example config file has the transaction request rates for a 1x load with a 434 | single device. To generate a config file for an Nx load, simply multiply those 435 | rates by N, and by the number of devices you are testing with, if using multiple 436 | devices. 437 | 438 | For example, to generate a config file for a single-device 60x load, change 439 | read-reqs-per-sec to 120000, and write-reqs-per-sec to 60000. 440 | 441 | Or, to generate a config file for a four-device 60x load, change 442 | read-reqs-per-sec to 480000, and write-reqs-per-sec to 240000. 443 | 444 | You may of course run customized loads, including read-only loads (set 445 | write-reqs-per-sec to 0) or write-only loads (set read-reqs-per-sec to 0). 446 | 447 | The other fields in the configuration files should generally not be changed, but 448 | you may do so to run highly customized tests. 449 | 450 | #### Format of Lines in the Config File 451 | --------------------------------------- 452 | 453 | All fields use a 454 | ``` 455 | name-token: value 456 | ``` 457 | format, and must be on a single line. Field order in the file is unimportant. 458 | To add comments, add a line(s) that begin with '#'. 459 | 460 | ### Fields that you Must Change: 461 | 462 | **device-names** 463 | Comma-separated list of device names (full path) to test. For example: 464 | ``` 465 | device-names: /dev/sdb,/dev/sdc 466 | ``` 467 | Make sure the devices named are entered correctly. 468 | 469 | ### Fields that you will Almost Always Change: 470 | 471 | **read-reqs-per-sec** 472 | Read transactions/second to simulate. Note that this is not per device. 473 | For 30 times (30x) the normal load for four devices, this value would be 474 | 30 x 4 x 2000 = 240000. Formula: n x number of devices x 2000. 475 | 476 | **write-reqs-per-sec** 477 | Write transactions/second to simulate. For act_storage, this value along with 478 | record-bytes, large-block-op-kbytes, defrag-lwm-pct, and others, determines the 479 | rate of large-block operations. Note that this is not per device. 480 | For 30 times (30x) the normal load for four devices, this value would be 481 | 30 x 4 x 1000 = 120000. Formula: n x number of devices x 1000. 482 | 483 | ### Fields that you may Sometimes Change: 484 | 485 | **test-duration-sec** 486 | Duration of the entire test, in seconds. Note that it has to be a single 487 | number, e.g. use 86400, not 60 x 60 x 24. The default is one day (24 hours). 488 | 489 | ### Fields that you will Rarely or Never Change: 490 | 491 | **service-threads** 492 | Total number of service threads on which requests are generated and done. If a 493 | test stops with a message like "... ACT can't do requested load ...", it doesn't 494 | mean the devices failed, it just means the transaction rates specified are too 495 | high to achieve with the configured number of service threads. Try testing 496 | again with more service threads. The default service-threads is 5x the number 497 | of CPUs, detected by ACT at runtime. 498 | 499 | **cache-threads (act_index ONLY)** 500 | Number of threads from which to execute all 4K writes, and 4K reads due to 501 | index access during defragmentation. These threads model the system threads 502 | that would do these device I/O operations behind mmap. The default 503 | cache-threads is 8. 504 | 505 | **report-interval-sec** 506 | Interval between generating observations, in seconds. This is the smallest 507 | granularity that you can analyze. Default is 1 sec. The 508 | /analysis/act_latency.py script aggregates these observations into slices, 509 | typically hour-long groups. 510 | 511 | **microsecond-histograms** 512 | Flag that specifies what time units the histogram buckets will use -- yes means 513 | use microseconds, no means use milliseconds. If this field is left out, the 514 | default is no. 515 | 516 | **record-bytes (act_storage ONLY)** 517 | Size of a record in bytes. This determines the size of a read operation -- just 518 | record-bytes rounded up to a multiple of 512 bytes (or whatever the device's 519 | minimum direct op size). Along with write-reqs-per-sec, large-block-op-kbytes, 520 | and others, this item determines the rate of large-block operations. 521 | record-bytes is rounded up to a multiple of 16 bytes to model Aerospike Database 522 | storage granularity. For example, if record-bytes is 1500, write-reqs-per-sec 523 | is 1000, and large-block-op-kbytes is 128, we write (1504 x 1000) bytes per 524 | second, or (1504 x 1000) / (128 x 1024) = 11.4746 large blocks per second. With 525 | defrag-lwm-pct 50, we double this to simulate defragmentation where blocks 526 | depleted to 50%-used are re-packed, yielding a large-block write (and read) rate 527 | of 22.949 blocks per second. May not be larger than 8Mbytes. 528 | 529 | **record-bytes-range-max (act_storage ONLY)** 530 | If set, simulate a range of record sizes from record-bytes up to 531 | record-bytes-range-max. Therefore if set, it must be larger than record-bytes. 532 | The simulation models a linear distribution of sizes within the range. The 533 | default record-bytes-range-max is 0, meaning no range -- model all records with 534 | size record-bytes. May not be larger than 8Mbytes. 535 | 536 | **large-block-op-kbytes (act_storage ONLY)** 537 | Size written and read in each large-block write and large-block read operation 538 | respectively, in Kbytes. May not be larger than 8192 (8Mbytes). 539 | 540 | **replication-factor** 541 | Simulate the device load you would see if this node was in a cluster with the 542 | specified replication-factor. Increasing replication-factor increases the write 543 | load, e.g. replication-factor 2 doubles the write load. For act_storage, this 544 | doubles the large-block read and write rates. It can also affect the 545 | record-sized internal read rate if update-pct is non-zero. The default 546 | replication-factor is 1. 547 | 548 | **update-pct (act_storage ONLY)** 549 | Simulate the device load you would see if this percentage of write requests were 550 | updates, as opposed to replaces. Updates cause the current version of a record 551 | to be read before the modified version is written, while replaces do not need to 552 | read the current version. Therefore a non-zero update-pct will generate a 553 | bigger internal record-sized read rate. E.g. if read-reqs-per-sec is 2000 and 554 | write-reqs-per-sec is 1000, the internal read-req rate will be somewhere between 555 | 2000 (update-pct 0), and 2000 + 1000 = 3000 (update-pct 100). The default 556 | update-pct is 0. 557 | 558 | **defrag-lwm-pct** 559 | Simulate the device load you would see if this was the defrag threshold. The 560 | lower the threshold, the emptier large blocks are when we defragment them (pack 561 | the remaining records into new blocks), and the lower the "write amplification" 562 | caused by defragmentation. E.g. if defrag-lwm-pct is 50, the write 563 | amplification will be 2x, meaning defragmentation doubles the internal effective 564 | storage write rate, which (for act_storage) is manifest as the large-block read 565 | and write rates. For act_index, defragmentation generates an extra internal 566 | index device read and write load. The default defrag-lwm-pct is 50. 567 | 568 | **no-defrag-reads (act_storage ONLY)** 569 | Flag to model Aerospike 7.0+ storage-engine memory with device/file backing. 570 | This models defrag for a given write load as usual, but without the large-block 571 | reads. To simulate storage-engine memory with device backing, in addition to 572 | setting this flag, do not specify a read load (read-reqs-per-sec), and do not 573 | set the tomb-raider flag. The default no-defrag-reads is no. 574 | 575 | **compress-pct (act_storage ONLY)** 576 | Generate compressible data when writing to devices. With compress-pct 100, the 577 | data is fully random (not compressible). Lower values cause runs of zeros to 578 | be interleaved with random data such that the data should be compressible to the 579 | specified percentage of original size. The compressibility of data may affect 580 | performance on some devices, especially those supporting in-line compression. 581 | The default compress-pct is 100. 582 | 583 | **disable-odsync** 584 | Option to not set O_DSYNC when opening file descriptors. Don't configure this 585 | true if configuring commit-to-device. The default disable-odsync is no (i.e. 586 | O_DSYNC is set by default). 587 | 588 | **commit-to-device (act_storage ONLY)** 589 | Flag to model the mode where Aerospike commits each record to device 590 | synchronously, instead of flushing large blocks full of records. This causes a 591 | device I/O load with many small, variable-sized writes. Large block writes (and 592 | reads) still occur to model defragmentation, but the rate of these is reduced. 593 | The default commit-to-device is no. 594 | 595 | **tomb-raider (act_storage ONLY)** 596 | Flag to model the Aerospike tomb raider. This simply spawns a thread per device 597 | in which the device is read from beginning to end, one large block at a time. 598 | The thread sleeps for tomb-raider-sleep-usec microseconds between each block. 599 | When the end of the device is reached, we repeat, reading from the beginning. 600 | (In other words, we don't model Aerospike's tomb-raider-period.) The default 601 | tomb-raider is no. 602 | 603 | **tomb-raider-sleep-usec (act_storage ONLY)** 604 | How long to sleep in each device's tomb raider thread between large-block reads. 605 | The default tomb-raider-sleep-usec is 1000, or 1 millisecond. 606 | 607 | **max-lag-sec** 608 | How much the large-block operations (act_storage) or cache-thread operations 609 | (act_index) are allowed to lag behind their target rates before the ACT test 610 | fails. Also, how much the service threads that generate and do requests are 611 | allowed to lag behind their target rates before the ACT test is stopped. Note 612 | that this doesn't necessarily mean the devices failed the test - it means the 613 | transaction rates specified are too high to achieve with the configured number 614 | of service threads. Note - max-lag-sec 0 is a special value for which the test 615 | will not be stopped due to lag. The default max-lag-sec is 10. 616 | -------------------------------------------------------------------------------- /RELEASE.md: -------------------------------------------------------------------------------- 1 | ## Improvements 2 | * [TOOLS-3019] - (ACT) Add support for Debian 13. 3 | * [TOOLS-3129] - (ACT) Add support for Redhat 10. 4 | * [TOOLS-3134] - (ACT) Remove support for Debian 11. 5 | -------------------------------------------------------------------------------- /analysis/act_latency.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | # ------------------------------------------------ 4 | # act_latency.py 5 | # 6 | # Analyze an act_storage or act_index output file. 7 | # Typical print_usage: 8 | # $ ./act_latency.py -l act_out.txt 9 | # where act_out.txt is output generated by act_storage or act_index, and which 10 | # uses defaults: 11 | # (-h - depends on config found in act_out.txt) 12 | # -t 3600 13 | # -s 0 14 | # -n 7 15 | # -e 1 16 | # (-x - not set) 17 | # ------------------------------------------------ 18 | 19 | 20 | # ========================================================== 21 | # Imports. 22 | # 23 | 24 | from __future__ import print_function 25 | 26 | import getopt 27 | import re 28 | import sys 29 | 30 | # ========================================================== 31 | # Compatibility. 32 | # 33 | 34 | if sys.version_info[0] == 3: 35 | long = int 36 | 37 | 38 | # ========================================================== 39 | # Constants. 40 | # 41 | 42 | BUCKET_LABELS = ("00", "01", "02", "03", "04", "05", "06", "07", "08", "09", 43 | "10", "11", "12", "13", "14", "15", "16") 44 | ALL_BUCKETS = len(BUCKET_LABELS) 45 | BUCKET_PATTERNS = [re.compile('.*?\(' + b + ': (.*?)\).*?') 46 | for b in BUCKET_LABELS] 47 | GAP = " " 48 | 49 | 50 | class Args(object): 51 | log = None 52 | histograms = [] 53 | slice = 3600 54 | start_bucket = 0 55 | num_buckets = 7 56 | every_nth = 1 57 | extra = False 58 | 59 | 60 | class Hist(object): 61 | scale_label = "" 62 | underline = "" 63 | max_bucket = 0 64 | bucket_range = None 65 | display_range = None 66 | slice_time = 0 67 | 68 | def __init__(self, name): 69 | self.name = name 70 | 71 | self.pre_pad = "" 72 | self.old_total = 0 73 | self.old_values = [0] * Hist.max_bucket 74 | self.slice_total = 0 75 | self.slice_values = [0] * Hist.max_bucket 76 | self.rate = 0.0 77 | self.avg_rate = 0.0 78 | self.max_rate = 0.0 79 | self.overs = [0.0] * Hist.max_bucket 80 | self.avg_overs = [0.0] * Hist.max_bucket 81 | self.max_overs = [0.0] * Hist.max_bucket 82 | 83 | 84 | # ========================================================== 85 | # Main. 86 | # 87 | 88 | def main(): 89 | get_args() 90 | file_id = open_log_file() 91 | find_max_bucket() 92 | hists = [Hist(name) for name in Args.histograms] 93 | 94 | print_table_header(hists) 95 | num_slices = print_latency_slices(hists, file_id) 96 | print_latency_aggregates(hists, num_slices) 97 | 98 | 99 | # ========================================================== 100 | # Helper functions. 101 | # 102 | 103 | # ------------------------------------------------ 104 | # Get and sanity-check command line arguments. 105 | # 106 | def get_args(): 107 | # Echo the command line arguments. 108 | print("act_latency.py " + " ".join(sys.argv[1:])) 109 | 110 | # Read the input arguments: 111 | try: 112 | opts, args = getopt.getopt( 113 | sys.argv[1:], "l:h:t:s:n:e:x", 114 | ["log=", "histogram=", "slice=", "start_bucket=", "num_buckets=", 115 | "every_nth=", "extra"]) 116 | except getopt.GetoptError as err: 117 | print(str(err)) 118 | print_usage() 119 | sys.exit(-1) 120 | 121 | # Set the arguments: 122 | for o, a in opts: 123 | if o == "-l" or o == "--log": 124 | Args.log = a 125 | elif o == "-h" or o == "--histogram": 126 | Args.histograms.append(a) 127 | elif o == "-t" or o == "--slice": 128 | Args.slice = long(a) 129 | elif o == "-s" or o == "--start_bucket": 130 | Args.start_bucket = int(a) 131 | elif o == "-n" or o == "--num_buckets": 132 | Args.num_buckets = int(a) 133 | elif o == "-e" or o == "--every_nth": 134 | Args.every_nth = int(a) 135 | elif o == "-x" or o == "--extra": 136 | Args.extra = True 137 | 138 | # Sanity-check the arguments: 139 | if Args.log is None: 140 | print_usage() 141 | sys.exit(-1) 142 | 143 | if Args.slice < 1: 144 | print("slice must be more than 0") 145 | sys.exit(-1) 146 | 147 | if Args.start_bucket < 0 or Args.start_bucket >= ALL_BUCKETS: 148 | print("start_bucket must be non-negative and less than " + ALL_BUCKETS) 149 | sys.exit(-1) 150 | 151 | if Args.num_buckets < 1: 152 | print("num_buckets must be more than 0") 153 | sys.exit(-1) 154 | 155 | if Args.every_nth < 1: 156 | print("every_nth must be more than 0") 157 | sys.exit(-1) 158 | 159 | 160 | # ------------------------------------------------ 161 | # Print usage. 162 | # 163 | def print_usage(): 164 | print("Usage:") 165 | print(" -l act_storage or act_index output file") 166 | print(" MANDATORY - NO DEFAULT") 167 | print(" e.g. act_out.txt") 168 | print(" -h histogram to analyse") 169 | print(" default: depends on config read from output file") 170 | print(" -t analysis slice interval in seconds") 171 | print(" default: 3600") 172 | print(" -s start display from this bucket") 173 | print(" default: 0") 174 | print(" -n number of buckets to display") 175 | print(" default: 7") 176 | print(" -e show start bucket then every n-th bucket") 177 | print(" default: 1") 178 | print(" -x (show extra information for each slice)") 179 | print(" default: not set") 180 | 181 | 182 | # ------------------------------------------------ 183 | # Open log file and validate header information. 184 | # 185 | def open_log_file(): 186 | # Open the log file: 187 | try: 188 | file_id = open(Args.log, "r") 189 | except IOError: 190 | print("log file " + Args.log + " not found") 191 | sys.exit(-1) 192 | 193 | # Find and echo the version: 194 | line = file_id.readline() 195 | 196 | while line and not line.startswith("ACT version"): 197 | line = file_id.readline() 198 | 199 | if not line: 200 | print(Args.log + " ACT version not found") 201 | sys.exit(-1) 202 | 203 | version = line.split(" ")[2].strip() 204 | print(Args.log + " is ACT version " + version + "\n") 205 | numeric_version = float(version) 206 | 207 | if numeric_version < 6.0 or numeric_version >= 7.0: 208 | print(Args.log + " ACT version not compatible") 209 | sys.exit(-1) 210 | 211 | # Find the reporting interval: 212 | line = file_id.readline() 213 | 214 | while line and not line.startswith("report-interval-sec"): 215 | line = file_id.readline() 216 | 217 | if not line: 218 | print("can't find report interval") 219 | sys.exit(-1) 220 | 221 | interval = long(line.split(" ")[1]) 222 | 223 | if interval < 1: 224 | print("reporting interval must be more than 0") 225 | sys.exit(-1) 226 | 227 | # Find the histograms' scale: 228 | Hist.scale_label = " %>(ms)" 229 | file_id.seek(0, 0) 230 | line = file_id.readline() 231 | 232 | while line and not line.startswith("microsecond-histograms"): 233 | line = file_id.readline() 234 | 235 | if not line: 236 | print("can't find histograms' scale, assuming milliseconds") 237 | file_id.seek(0, 0) 238 | elif line.split(" ")[1].startswith("y"): 239 | Hist.scale_label = " %>(us)" 240 | 241 | # Adjust the slice time if necessary: 242 | Hist.slice_time = ((Args.slice + interval - 1) // interval) * interval 243 | 244 | if Hist.slice_time != Args.slice: 245 | print("analyzing time slices of " + str(Hist.slice_time) + " seconds") 246 | 247 | # Echo the config from the log file: 248 | file_id.seek(0, 0) 249 | line = file_id.readline() 250 | 251 | while line and not line.endswith("CONFIGURATION\n"): 252 | line = file_id.readline() 253 | 254 | if not line: 255 | print("can't find configuration") 256 | sys.exit(-1) 257 | 258 | if line.startswith("ACT-STORAGE") or line.startswith("ACT-INDEX"): 259 | if not Args.histograms: 260 | Args.histograms = ["reads"] 261 | else: 262 | print("can't recognize configuration") 263 | sys.exit(-1) 264 | 265 | line = line.strip() 266 | 267 | while line: 268 | print(line) 269 | line = file_id.readline().strip() 270 | 271 | print("") 272 | 273 | line = file_id.readline() 274 | 275 | while line and not line.startswith("DERIVED CONFIGURATION"): 276 | line = file_id.readline() 277 | 278 | if not line: 279 | print("can't find derived configuration") 280 | sys.exit(-1) 281 | 282 | line = line.strip() 283 | 284 | while line: 285 | print(line) 286 | line = file_id.readline().strip() 287 | 288 | print("") 289 | 290 | # Echo the histogram names from the log file: 291 | file_id.seek(0, 0) 292 | line = file_id.readline() 293 | 294 | while line and not line.startswith("HISTOGRAM NAMES\n"): 295 | line = file_id.readline() 296 | 297 | if not line: 298 | print("can't find histogram names") 299 | sys.exit(-1) 300 | 301 | line = line.strip() 302 | 303 | while line: 304 | print(line) 305 | line = file_id.readline().strip() 306 | 307 | print("") 308 | 309 | return file_id 310 | 311 | 312 | # ------------------------------------------------ 313 | # Find index + 1 of last bucket to display. 314 | # 315 | def find_max_bucket(): 316 | num_buckets = Args.num_buckets 317 | 318 | for b in range(Args.start_bucket, ALL_BUCKETS, Args.every_nth): 319 | Hist.max_bucket = b + 1 320 | 321 | if num_buckets == 1: 322 | break 323 | else: 324 | num_buckets -= 1 325 | 326 | Hist.bucket_range = range(Hist.max_bucket) 327 | Hist.display_range = range( 328 | Args.start_bucket, Hist.max_bucket, Args.every_nth) 329 | 330 | 331 | # ------------------------------------------------ 332 | # Print table header. 333 | # 334 | def print_table_header(hists): 335 | prefix = "slice" 336 | threshold_labels = "" 337 | threshold_underline = "" 338 | 339 | for i in Hist.display_range: 340 | threshold_labels += "%7s" % (pow(2, i)) 341 | threshold_underline += " ------" 342 | 343 | if Args.extra: 344 | threshold_labels += " rate" 345 | threshold_underline += " ----------" 346 | 347 | len_table = len(threshold_labels) 348 | 349 | for i in range(1, len(hists)): 350 | prev_name_len = 1 + len(hists[i - 1].name) 351 | 352 | if prev_name_len > len_table: 353 | hists[i].pre_pad = " " * (prev_name_len - len_table) 354 | 355 | names_out = " " * len(prefix) 356 | units_out = " " * len(prefix) 357 | labels_out = prefix 358 | Hist.underline = "-" * len(prefix) 359 | 360 | for hist in hists: 361 | names_out += GAP + " " + hist.name.ljust(len_table - 1) 362 | units_out += GAP + hist.pre_pad + Hist.scale_label.ljust(len_table) 363 | labels_out += GAP + hist.pre_pad + threshold_labels 364 | Hist.underline += GAP + hist.pre_pad + threshold_underline 365 | 366 | print(names_out) 367 | print(units_out) 368 | print(labels_out) 369 | print(Hist.underline) 370 | 371 | 372 | # ------------------------------------------------ 373 | # Generate latency lines. 374 | # 375 | def print_latency_slices(hists, file_id): 376 | # Initialization before processing time slices: 377 | which_slice = 0 378 | after_time = Hist.slice_time 379 | 380 | # Process all the time slices: 381 | while True: 382 | if not read_chunk(file_id, after_time, hists): 383 | # Note - we ignore the (possible) incomplete slice at the end. 384 | break 385 | 386 | # Print this slice's percentages over thresholds: 387 | which_slice += 1 388 | print_slice_line(which_slice, hists) 389 | 390 | # Prepare for next slice: 391 | after_time += Hist.slice_time 392 | 393 | if which_slice == 0: 394 | print("could not find " + str(Hist.slice_time) + " seconds of data") 395 | sys.exit(-1) 396 | 397 | return which_slice 398 | 399 | 400 | # ------------------------------------------------ 401 | # Generate latency aggregate lines. 402 | # 403 | def print_latency_aggregates(hists, num_slices): 404 | for hist in hists: 405 | if Args.extra: 406 | hist.avg_rate /= num_slices 407 | 408 | for i in Hist.display_range: 409 | hist.avg_overs[i] /= num_slices 410 | 411 | print(Hist.underline) 412 | print_avg_line(hists) 413 | print_max_line(hists) 414 | 415 | 416 | # ------------------------------------------------ 417 | # Get the data chunk reported by act at the specified after_time. 418 | # 419 | def read_chunk(file_id, after_time, hists): 420 | find_line = "after " + str(after_time) + " " 421 | 422 | while True: 423 | line = file_id.readline() 424 | 425 | if not line: 426 | return False 427 | 428 | if line.startswith(find_line): 429 | break 430 | 431 | got_chunk = False 432 | line = file_id.readline() 433 | 434 | while line and line.strip(): 435 | for hist in hists: 436 | if line.startswith(hist.name): 437 | line = read_bucket_values(line, file_id, hist) 438 | got_chunk = True 439 | break 440 | else: 441 | line = file_id.readline() 442 | 443 | return got_chunk 444 | 445 | 446 | # ------------------------------------------------ 447 | # Print a latency data output line. 448 | # 449 | def print_slice_line(slice_tag, hists): 450 | output = "%5s" % (slice_tag) 451 | 452 | for hist in hists: 453 | output += GAP + hist.pre_pad 454 | 455 | for i in Hist.display_range: 456 | output += "%7.2f" % (hist.overs[i]) 457 | 458 | if Args.extra: 459 | output += "%11.1f" % (hist.rate) 460 | 461 | print(output) 462 | 463 | 464 | # ------------------------------------------------ 465 | # Print a latency average data output line. 466 | # 467 | def print_avg_line(hists): 468 | output = " avg" 469 | 470 | for hist in hists: 471 | output += GAP + hist.pre_pad 472 | 473 | for i in Hist.display_range: 474 | output += "%7.2f" % (hist.avg_overs[i]) 475 | 476 | if Args.extra: 477 | output += "%11.1f" % (hist.avg_rate) 478 | 479 | print(output) 480 | 481 | 482 | # ------------------------------------------------ 483 | # Print a latency maximum data output line. 484 | # 485 | def print_max_line(hists): 486 | output = " max" 487 | 488 | for hist in hists: 489 | output += GAP + hist.pre_pad 490 | 491 | for i in Hist.display_range: 492 | output += "%7.2f" % (hist.max_overs[i]) 493 | 494 | if Args.extra: 495 | output += "%11.1f" % (hist.max_rate) 496 | 497 | print(output) 498 | 499 | 500 | # ------------------------------------------------ 501 | # Get one set of bucket values. 502 | # 503 | def read_bucket_values(line, file_id, hist): 504 | values = [0] * Hist.max_bucket 505 | total, line = read_total_ops(line, file_id) 506 | b_min = 0 507 | 508 | while True: 509 | found = 0 510 | 511 | for b in Hist.bucket_range[b_min:]: 512 | r = BUCKET_PATTERNS[b] 513 | 514 | if r.search(line): 515 | found += 1 516 | values[b] = long(r.search(line).group(1)) 517 | 518 | if found == 0: 519 | break 520 | 521 | line = file_id.readline() 522 | b_min += found 523 | 524 | hist.slice_total = total - hist.old_total 525 | hist.slice_values = [a - b for a, b in zip(values, hist.old_values)] 526 | hist.old_total = total 527 | hist.old_values = values 528 | bucket_percentages_over(hist) 529 | bucket_aggregations(hist) 530 | 531 | return line 532 | 533 | 534 | # ------------------------------------------------ 535 | # Parse a histogram total from a act output line. 536 | # 537 | def read_total_ops(line, file_id): 538 | total = long(line[line.find("(") + 1: line.find(" total)")]) 539 | line = file_id.readline() 540 | 541 | return total, line 542 | 543 | 544 | # ------------------------------------------------ 545 | # Get the percentage excesses for every bucket. 546 | # 547 | def bucket_percentages_over(hist): 548 | hist.overs = [0.0] * Hist.max_bucket 549 | 550 | if hist.slice_total == 0: 551 | return 552 | 553 | delta = 0 554 | 555 | for b in Hist.bucket_range: 556 | delta += hist.slice_values[b] 557 | hist.overs[b] = round( 558 | ((hist.slice_total - delta) * 100.0) / hist.slice_total, 2) 559 | 560 | 561 | # ------------------------------------------------ 562 | # Track maximums and totals to calculate averages. 563 | # 564 | def bucket_aggregations(hist): 565 | hist.rate = round(float(hist.slice_total) / Hist.slice_time, 1) 566 | 567 | if Args.extra: 568 | hist.avg_rate += hist.rate 569 | 570 | if hist.rate > hist.max_rate: 571 | hist.max_rate = hist.rate 572 | 573 | for i in Hist.display_range: 574 | hist.avg_overs[i] += hist.overs[i] 575 | 576 | if hist.overs[i] > hist.max_overs[i]: 577 | hist.max_overs[i] = hist.overs[i] 578 | 579 | 580 | # ========================================================== 581 | # Execution. 582 | # 583 | 584 | if __name__ == "__main__": 585 | main() 586 | -------------------------------------------------------------------------------- /build/os_version: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # ------------------------------------------------------------------------------ 3 | # Copyright 2012-2015 Aerospike, Inc. 4 | # 5 | # Portions may be licensed to Aerospike, Inc. under one or more contributor 6 | # license agreements. 7 | # 8 | # Licensed under the Apache License, Version 2.0 (the "License"); you may not 9 | # use this file except in compliance with the License. You may obtain a copy of 10 | # the License at http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 15 | # License for the specific language governing permissions and limitations under 16 | # the License. 17 | # ------------------------------------------------------------------------------ 18 | 19 | OPT_LONG=0 20 | 21 | if [ "$1" = "-long" ] 22 | then 23 | OPT_LONG=1 24 | fi 25 | 26 | error() { 27 | echo 'error:' $* >&2 28 | } 29 | 30 | main() { 31 | 32 | local kernel='' 33 | local distro_id='' 34 | local distro_version='' 35 | local distro_long='' 36 | local distro_short='' 37 | 38 | # Make sure this script is running on Linux 39 | # The script is not designed to work on non-Linux 40 | # operating systems. 41 | kernel=$(uname -s | tr '[:upper:]' '[:lower:]') 42 | if [ "$kernel" != 'linux' ] 43 | then 44 | error "$kernel is not supported." 45 | exit 1 46 | fi 47 | 48 | if [ -f /etc/os-release ] 49 | then 50 | . /etc/os-release 51 | distro_id=${ID,,} 52 | distro_version=${VERSION_ID} 53 | elif [ -f /etc/issue ] 54 | then 55 | issue=$(cat /etc/issue | tr '[:upper:]' '[:lower:]') 56 | case "$issue" in 57 | *'centos'* ) 58 | distro_id='centos' 59 | ;; 60 | *'redhat'* | *'rhel'* | *'red hat'* ) 61 | distro_id='rhel' 62 | ;; 63 | *'debian'* ) 64 | distro_id='debian' 65 | ;; 66 | * ) 67 | error "/etc/issue contained an unsupported linux distibution: $issue" 68 | exit 1 69 | ;; 70 | esac 71 | 72 | case "$distro_id" in 73 | 'centos' | 'rhel' ) 74 | local release='' 75 | if [ -f /etc/centos-release ]; then 76 | release=$(cat /etc/centos-release | tr '[:upper:]' '[:lower:]') 77 | elif [ -f /etc/redhat-release ]; then 78 | release=$(cat /etc/redhat-release | tr '[:upper:]' '[:lower:]') 79 | fi 80 | release_version=${release##*release} 81 | distro_version=${release_version%%.*} 82 | ;; 83 | 'debian' ) 84 | debian_version=$(cat /etc/debian_version | tr '[:upper:]' '[:lower:]') 85 | distro_version=${debian_version%%.*} 86 | ;; 87 | * ) 88 | error "/etc/issue contained an unsupported linux distibution: $issue" 89 | exit 1 90 | ;; 91 | esac 92 | fi 93 | 94 | distro_id=${distro_id//[[:space:]]/} 95 | distro_version=${distro_version//[[:space:]]/} 96 | 97 | # Second chance for pre-release versions. 98 | if [ -z "$distro_version" ] 99 | then 100 | case "$distro_id" in 101 | 'debian' ) 102 | debian_version=$(cat /etc/debian_version | tr '[:upper:]' '[:lower:]') 103 | if [[ "$debian_version" = "buster"* ]] 104 | then 105 | debian_version=10 106 | fi 107 | if [[ "$debian_version" = "bullseye"* ]] 108 | then 109 | debian_version=11 110 | fi 111 | if [[ "$debian_version" = "bookworm"* ]] 112 | then 113 | debian_version=12 114 | fi 115 | distro_version=${debian_version%%.*} 116 | 117 | ;; 118 | esac 119 | fi 120 | 121 | case "$distro_id" in 122 | 'centos' ) 123 | distro_long="${distro_id}${distro_version%%.*}" 124 | distro_short="el${distro_version%%.*}" 125 | ;; 126 | 'rhel' | 'redhat' | 'red hat' ) 127 | distro_long="${distro_id}${distro_version%%.*}" 128 | distro_short="el${distro_version%%.*}" 129 | ;; 130 | 'fedora' ) 131 | if [ "$distro_version" -gt "15" ] 132 | then 133 | distro_version=7 134 | elif [ "$distro_version" -gt "10" ] 135 | then 136 | distro_version=6 137 | else 138 | error "Unsupported linux distibution: $distro_id $distro_version" 139 | exit 1 140 | fi 141 | distro_long="centos${distro_version}" 142 | distro_short="el${distro_version}" 143 | ;; 144 | * ) 145 | distro_long="${distro_id}${distro_version}" 146 | distro_short="${distro_id}${distro_version}" 147 | ;; 148 | esac 149 | 150 | if [ "$OPT_LONG" = "1" ] 151 | then 152 | echo "${distro_long}" 153 | else 154 | echo "${distro_short}" 155 | fi 156 | exit 0 157 | } 158 | 159 | main 160 | -------------------------------------------------------------------------------- /build/version: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | rev=`git describe --always` 4 | subbuild=`echo $rev | awk -F'-' '{print $2}'` 5 | if [ "$subbuild" != "" ] 6 | then 7 | # rev=`echo $rev | awk -F'-' '{printf("%s-%s\n",$1,$2)}'` 8 | rev=`echo $rev | awk -F'-' '{printf("%s\n",$1)}'` 9 | fi 10 | echo $rev 11 | -------------------------------------------------------------------------------- /config/act_index.conf: -------------------------------------------------------------------------------- 1 | # 2 | # ACT-index config file. 3 | # 4 | 5 | # Mandatory device name(s) as comma-separated list: 6 | device-names: /dev/sdc? 7 | 8 | # Mandatory non-zero test duration: 9 | test-duration-sec: 86400 10 | 11 | #--------------------------------------- 12 | # Transaction request rates. 13 | # 14 | # The standard "1x" load is 1000 writes and 2000 reads per second. To generate 15 | # a standard "Nx" load, multiply these numbers by N. If testing with more than 16 | # one device, also multiply by the number of devices. (The configured rates are 17 | # spread across all devices in the test.) 18 | # 19 | 20 | read-reqs-per-sec: 2000 21 | write-reqs-per-sec: 1000 22 | 23 | #--------------------------------------- 24 | # Items with default values. 25 | # 26 | # All remaining configuration items are shown below with default values. To try 27 | # non-default values, just un-comment the relevant items and change the values. 28 | # See README.md for more information. 29 | # 30 | 31 | # service-threads: 40? # default is 5x detected number of CPUs 32 | # cache-threads: 8 33 | 34 | # report-interval-sec: 1 35 | # microsecond-histograms: no 36 | 37 | # replication-factor: 1 38 | # defrag-lwm-pct: 50 39 | 40 | # disable-odsync: no 41 | 42 | # max-lag-sec: 10 43 | -------------------------------------------------------------------------------- /config/act_storage.conf: -------------------------------------------------------------------------------- 1 | # 2 | # ACT-storage config file. 3 | # 4 | 5 | # Mandatory device name(s) as comma-separated list: 6 | device-names: /dev/sdc? 7 | 8 | # Mandatory non-zero test duration: 9 | test-duration-sec: 86400 10 | 11 | #--------------------------------------- 12 | # Transaction request rates. 13 | # 14 | # The standard "1x" load is 1000 writes and 2000 reads per second. To generate 15 | # a standard "Nx" load, multiply these numbers by N. If testing with more than 16 | # one device, also multiply by the number of devices. (The configured rates are 17 | # spread across all devices in the test.) 18 | # 19 | 20 | read-reqs-per-sec: 2000 21 | write-reqs-per-sec: 1000 22 | 23 | #--------------------------------------- 24 | # Items with default values. 25 | # 26 | # All remaining configuration items are shown below with default values. To try 27 | # non-default values, just un-comment the relevant items and change the values. 28 | # See README.md for more information. 29 | # 30 | 31 | # service-threads: 40? # default is 5x detected number of CPUs 32 | 33 | # report-interval-sec: 1 34 | # microsecond-histograms: no 35 | 36 | # record-bytes: 1536 37 | # record-bytes-range-max: 0 38 | # large-block-op-kbytes: 128 39 | 40 | # replication-factor: 1 41 | # update-pct: 0 42 | # defrag-lwm-pct: 50 43 | # no-defrag-reads: no 44 | 45 | # compress-pct: 100 46 | # disable-odsync: no 47 | 48 | # commit-to-device: no 49 | 50 | # tomb-raider: no 51 | # tomb-raider-sleep-usec: 0 52 | 53 | # max-lag-sec: 10 54 | -------------------------------------------------------------------------------- /pkg/Makefile.deb: -------------------------------------------------------------------------------- 1 | # Build act distribution. 2 | 3 | export DEB_SOURCE_ROOT = $(shell echo `pwd`/dist) 4 | export DEB_BUILD_ROOT = $(DEB_SOURCE_ROOT)/BUILD 5 | export CL_BASE = $(DEB_BUILD_ROOT)/opt/aerospike 6 | export ETC_BASE = $(DEB_BUILD_ROOT)/etc/aerospike 7 | 8 | 9 | DIR_PKG = target/packages 10 | REV = $(shell grep VERSION src/common/version.h | tr -s ' ' | cut -d' ' -f3 | tr -d '"') 11 | BLD_ID = $(shell git describe 2>/dev/null; if [ $${?} != 0 ]; then echo 'unknown'; fi) 12 | OS = $(shell build/os_version) 13 | ARCH=$(shell uname -m) 14 | MANIFEST_DIR = manifest/TEMP 15 | 16 | ifeq ($(ARCH), x86_64) 17 | ARCH=amd64 18 | endif 19 | 20 | ifeq ($(ARCH), aarch64) 21 | ARCH=arm64 22 | endif 23 | 24 | .PHONY: default 25 | default: dist 26 | 27 | .PHONY: dist 28 | dist: 29 | 30 | # Build act package. 31 | @echo $(REV) 32 | @echo $(OS) 33 | rm -rf $(DEB_BUILD_ROOT)/* 34 | mkdir -p $(DEB_BUILD_ROOT)/DEBIAN 35 | mkdir -p $(DEB_BUILD_ROOT)/usr/bin 36 | mkdir -p $(DIR_PKG) 37 | install -m 755 pkg/deb/postinst $(DEB_BUILD_ROOT)/DEBIAN/postinst 38 | install -m 755 pkg/deb/prerm $(DEB_BUILD_ROOT)/DEBIAN/prerm 39 | install -m 644 pkg/deb/control $(DEB_BUILD_ROOT)/DEBIAN/control 40 | 41 | mkdir -p $(CL_BASE) 42 | mkdir -p $(ETC_BASE) 43 | mkdir -p $(CL_BASE)/bin 44 | 45 | # act 46 | install -m 755 target/bin/act_* $(CL_BASE)/bin/ 47 | install -m 755 analysis/act_latency.py $(CL_BASE)/bin/ 48 | install -m 755 config/act_index.conf $(ETC_BASE)/ 49 | install -m 755 config/act_storage.conf $(ETC_BASE)/ 50 | 51 | # Create symlinks to /usr/bin 52 | mkdir -p $(DEB_BUILD_ROOT)/usr/bin 53 | ln -sf /opt/aerospike/bin/act_index $(DEB_BUILD_ROOT)/usr/bin/act_index 54 | ln -sf /opt/aerospike/bin/act_prep $(DEB_BUILD_ROOT)/usr/bin/act_prep 55 | ln -sf /opt/aerospike/bin/act_storage $(DEB_BUILD_ROOT)/usr/bin/act_storage 56 | ln -sf /opt/aerospike/bin/act_latency.py $(DEB_BUILD_ROOT)/usr/bin/act_latency.py 57 | 58 | 59 | sed 's/@VERSION@/'$(REV)'/g' $(DEB_BUILD_ROOT)/DEBIAN/control 60 | sed -i 's/@ARCH@/'$(ARCH)'/g' $(DEB_BUILD_ROOT)/DEBIAN/control 61 | fakeroot dpkg-deb -Z xz --build $(DEB_BUILD_ROOT) $(DIR_PKG)/act_$(REV)-1$(OS)_$(ARCH).deb 62 | rm -rf dist 63 | 64 | distclean: 65 | rm -rf $(DEB_SOURCE_ROOT) 66 | rm -rf target/packages 67 | 68 | print-% : ; @echo $($*) 69 | -------------------------------------------------------------------------------- /pkg/Makefile.rpm: -------------------------------------------------------------------------------- 1 | # Build ACT rpm distribution. 2 | 3 | export RPM_SOURCE_ROOT = $(shell echo `pwd`/dist) 4 | export RPM_BUILD_ROOT = $(RPM_SOURCE_ROOT)/BUILD 5 | export CL_BASE = $(RPM_BUILD_ROOT)/opt/aerospike 6 | export ETC_BASE = $(RPM_BUILD_ROOT)/etc/aerospike 7 | 8 | MANIFEST_DIR = manifest/TEMP 9 | DIR_PKG = target/packages 10 | REV = $(shell grep VERSION src/common/version.h | tr -s ' ' | cut -d' ' -f3 | tr -d '"') 11 | BLD_ID = $(shell git describe 2>/dev/null; if [ $${?} != 0 ]; then echo 'unknown'; fi) 12 | OS = $(shell build/os_version) 13 | ARCH=$(shell uname -m) 14 | 15 | .PHONY: default 16 | default: dist 17 | mkdir -p $(DIR_PKG) 18 | mkdir -p $(RPM_BUILD_ROOT) 19 | mkdir -p $(RPM_SOURCE_ROOT)/RPMS/x86_64 20 | mkdir -p $(RPM_BUILD_ROOT)/usr/bin 21 | 22 | sed 's/@VERSION@/'$(REV)'/g' pkg/act_v.spec 23 | sed -i 's/@RELEASE@/'$(OS)'/g' pkg/act_v.spec 24 | sed -i 's/@ARCH@/'$(ARCH)'/g' pkg/act_v.spec 25 | 26 | rpmbuild --noclean -bb -vv --define "dist .$(OS)" --buildroot $(RPM_BUILD_ROOT) pkg/act_v.spec 27 | find $(RPM_SOURCE_ROOT)/RPMS -type f -exec mv {} $(DIR_PKG) \; 28 | rm -rf pkg/act_v.spec dist 29 | 30 | distclean: 31 | rm -rf $(RPM_BUILD_ROOT) 32 | rm -rf target/packages: 33 | 34 | .PHONY: dist 35 | dist: 36 | 37 | mkdir -p $(CL_BASE) 38 | mkdir -p $(ETC_BASE) 39 | mkdir -p $(CL_BASE)/bin 40 | 41 | # act 42 | install -m 755 target/bin/act_* $(CL_BASE)/bin/ 43 | install -m 755 analysis/act_latency.py $(CL_BASE)/bin/ 44 | install -m 755 config/act_index.conf $(ETC_BASE)/ 45 | install -m 755 config/act_storage.conf $(ETC_BASE)/ 46 | 47 | print-% : ; @echo $($*) 48 | -------------------------------------------------------------------------------- /pkg/deb/control: -------------------------------------------------------------------------------- 1 | Package: act 2 | Version: @VERSION@ 3 | Section: Databases 4 | Priority: optional 5 | Architecture: @ARCH@ 6 | Depends: libc6 (>= 2.7) 7 | Maintainer: Aerospike, Inc. 8 | Description: Aerospike Certification Tool 9 | -------------------------------------------------------------------------------- /pkg/deb/postinst: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -e 4 | 5 | case "$1" in 6 | configure) 7 | 8 | echo "Installing Aerospike Certification Tool" 9 | 10 | # create aerospike group if it isn't already there 11 | if ! getent group aerospike >/dev/null; then 12 | groupadd -r aerospike 13 | fi 14 | 15 | # create aerospike user if it isn't already there 16 | if ! getent passwd aerospike >/dev/null; then 17 | useradd -r -d /opt/aerospike -c 'Aerospike server' -g aerospike aerospike 18 | fi 19 | 20 | chown -R aerospike:aerospike /opt/aerospike 21 | 22 | ;; 23 | esac 24 | 25 | exit 0 26 | -------------------------------------------------------------------------------- /pkg/deb/prerm: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -e 4 | 5 | case "$1" in 6 | remove) 7 | 8 | echo "Removing Aerospike Certification Tool" 9 | # rm -f /opt/aerospike/bin/act_* 10 | ;; 11 | esac 12 | 13 | exit 0 14 | -------------------------------------------------------------------------------- /pkg/rpm/act.spec: -------------------------------------------------------------------------------- 1 | Name: act 2 | Version: @VERSION@ 3 | Release: 1%{?dist} 4 | Summary: The Aerospike Certification Tool 5 | License: Apache 2.0 license 6 | Group: Application 7 | BuildArch: @ARCH@ 8 | %description 9 | ACT provides a pair of programs for testing and certifying flash/SSD devices' performance for Aerospike Database data and index storage. 10 | %define _topdir dist 11 | %define __spec_install_post /usr/lib/rpm/brp-compress 12 | 13 | %package tools 14 | Summary: The Aerospike Certification Tool 15 | Group: Applications 16 | %description tools 17 | Tools for use with the Aerospike database 18 | %files 19 | %defattr(-,aerospike,aerospike) 20 | /opt/aerospike/bin/act_index 21 | /opt/aerospike/bin/act_prep 22 | /opt/aerospike/bin/act_storage 23 | /opt/aerospike/bin/act_latency.py 24 | %defattr(-,root,root) 25 | /usr/bin/act_index 26 | /usr/bin/act_prep 27 | /usr/bin/act_storage 28 | /usr/bin/act_latency.py 29 | 30 | %config(noreplace) 31 | /etc/aerospike/act_storage.conf 32 | /etc/aerospike/act_index.conf 33 | 34 | %prep 35 | ln -sf /opt/aerospike/bin/act_index %{buildroot}/usr/bin/act_index 36 | ln -sf /opt/aerospike/bin/act_prep %{buildroot}/usr/bin/act_prep 37 | ln -sf /opt/aerospike/bin/act_storage %{buildroot}/usr/bin/act_storage 38 | ln -sf /opt/aerospike/bin/act_latency.py %{buildroot}/usr/bin/act_latency.py 39 | 40 | %pre tools 41 | echo Installing /opt/aerospike/act 42 | if ! id -g aerospike >/dev/null 2>&1; then 43 | echo "Adding group aerospike" 44 | /usr/sbin/groupadd -r aerospike 45 | fi 46 | if ! id -u aerospike >/dev/null 2>&1; then 47 | echo "Adding user aerospike" 48 | /usr/sbin/useradd -r -d /opt/aerospike -c 'Aerospike server' -g aerospike aerospike 49 | fi 50 | 51 | %preun tools 52 | if [ $1 -eq 0 ] 53 | then 54 | echo Removing /opt/aerospike/act 55 | fi 56 | -------------------------------------------------------------------------------- /src/common/cfg.c: -------------------------------------------------------------------------------- 1 | /* 2 | * cfg.c 3 | * 4 | * Copyright (c) 2018-2020 Aerospike, Inc. All rights reserved. 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in 14 | * all copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | 25 | //========================================================== 26 | // Includes. 27 | // 28 | 29 | #include "cfg.h" 30 | 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | 38 | 39 | //========================================================== 40 | // Public API. 41 | // 42 | 43 | void 44 | parse_device_names(size_t max_num_devices, char names[][MAX_DEVICE_NAME_SIZE], 45 | uint32_t* p_num_devices) 46 | { 47 | const char* val; 48 | 49 | while ((val = strtok(NULL, ",;" WHITE_SPACE)) != NULL) { 50 | if (*p_num_devices == max_num_devices) { 51 | printf("ERROR: too many device names\n"); 52 | *p_num_devices = 0; 53 | return; 54 | } 55 | 56 | size_t name_len = strlen(val); 57 | 58 | if (name_len == 0 || name_len >= MAX_DEVICE_NAME_SIZE) { 59 | printf("ERROR: bad device name '%s'\n", val); 60 | *p_num_devices = 0; 61 | return; 62 | } 63 | 64 | strcpy(names[*p_num_devices], val); 65 | (*p_num_devices)++; 66 | } 67 | } 68 | 69 | uint32_t 70 | parse_uint32() 71 | { 72 | const char* val = strtok(NULL, WHITE_SPACE); 73 | 74 | if (val == NULL) { 75 | printf("ERROR: missing integer config value\n"); 76 | return 0; 77 | } 78 | 79 | uint64_t u64_val = strtoul(val, NULL, 10); 80 | 81 | if (u64_val > UINT32_MAX) { 82 | printf("ERROR: %s overflows unsigned int\n", val); 83 | return 0; 84 | } 85 | 86 | return (uint32_t)u64_val; 87 | } 88 | 89 | bool 90 | parse_yes_no() 91 | { 92 | const char* val = strtok(NULL, WHITE_SPACE); 93 | 94 | return val != NULL && *val == 'y'; 95 | } 96 | -------------------------------------------------------------------------------- /src/common/cfg.h: -------------------------------------------------------------------------------- 1 | /* 2 | * cfg.h 3 | * 4 | * Copyright (c) 2018-2020 Aerospike, Inc. All rights reserved. 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in 14 | * all copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | 25 | #pragma once 26 | 27 | //========================================================== 28 | // Includes. 29 | // 30 | 31 | #include 32 | #include 33 | #include 34 | #include 35 | 36 | 37 | //========================================================== 38 | // Typedefs & constants. 39 | // 40 | 41 | #define WHITE_SPACE " \t\n\r" 42 | #define MAX_DEVICE_NAME_SIZE 128 43 | 44 | 45 | //========================================================== 46 | // Public API. 47 | // 48 | 49 | void parse_device_names(size_t max_num_devices, 50 | char names[][MAX_DEVICE_NAME_SIZE], uint32_t* p_num_devices); 51 | uint32_t parse_uint32(); 52 | bool parse_yes_no(); 53 | 54 | static inline void 55 | configuration_error(const char* tag) 56 | { 57 | printf("ERROR: invalid or missing configuration of '%s'\n", tag); 58 | } 59 | -------------------------------------------------------------------------------- /src/common/clock.h: -------------------------------------------------------------------------------- 1 | /* 2 | * clock.h 3 | * 4 | * Copyright (c) 2011-2020 Aerospike, Inc. All rights reserved. 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in 14 | * all copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | 25 | #pragma once 26 | 27 | //========================================================== 28 | // Includes. 29 | // 30 | 31 | #include 32 | #include 33 | 34 | 35 | //========================================================== 36 | // Public API. 37 | // 38 | 39 | static inline uint64_t 40 | get_ms() 41 | { 42 | struct timespec ts; 43 | clock_gettime(CLOCK_MONOTONIC, &ts); 44 | return ((uint64_t)ts.tv_nsec / 1000000) + ((uint64_t)ts.tv_sec * 1000); 45 | } 46 | 47 | static inline uint64_t 48 | get_us() 49 | { 50 | struct timespec ts; 51 | clock_gettime(CLOCK_MONOTONIC, &ts); 52 | return ((uint64_t)ts.tv_nsec / 1000) + ((uint64_t)ts.tv_sec * 1000000); 53 | } 54 | 55 | static inline uint64_t 56 | get_ns() 57 | { 58 | struct timespec ts; 59 | clock_gettime(CLOCK_MONOTONIC, &ts); 60 | return (uint64_t)ts.tv_nsec + ((uint64_t)ts.tv_sec * 1000000000); 61 | } 62 | -------------------------------------------------------------------------------- /src/common/hardware.c: -------------------------------------------------------------------------------- 1 | /* 2 | * hardware.c 3 | * 4 | * Copyright (c) 2018-2020 Aerospike, Inc. All rights reserved. 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in 14 | * all copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | 25 | //========================================================== 26 | // Includes. 27 | // 28 | 29 | #include "hardware.h" 30 | 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include 39 | #include 40 | #include 41 | #include 42 | 43 | #include "trace.h" 44 | 45 | 46 | //========================================================== 47 | // Typedefs & constants. 48 | // 49 | 50 | typedef enum { 51 | FILE_RES_OK, 52 | FILE_RES_NOT_FOUND, 53 | FILE_RES_ERROR 54 | } file_res; 55 | 56 | 57 | //========================================================== 58 | // Forward declarations. 59 | // 60 | 61 | static file_res read_list(const char* path, cpu_set_t* mask); 62 | static file_res read_index(const char* path, uint16_t* val); 63 | static file_res read_file(const char* path, void* buf, size_t* limit); 64 | 65 | 66 | //========================================================== 67 | // Public API. 68 | // 69 | 70 | uint32_t 71 | num_cpus() 72 | { 73 | cpu_set_t os_cpus_online; 74 | 75 | if (read_list("/sys/devices/system/cpu/online", &os_cpus_online) != 76 | FILE_RES_OK) { 77 | printf("ERROR: couldn't read list of online CPUs\n"); 78 | return 0; 79 | } 80 | 81 | uint16_t n_cpus = 0; 82 | uint16_t n_os_cpus; 83 | 84 | for (n_os_cpus = 0; n_os_cpus < CPU_SETSIZE; n_os_cpus++) { 85 | char path[1000]; 86 | 87 | snprintf(path, sizeof(path), 88 | "/sys/devices/system/cpu/cpu%hu/topology/physical_package_id", 89 | n_os_cpus); 90 | 91 | uint16_t i_os_package; 92 | file_res res = read_index(path, &i_os_package); 93 | 94 | if (res == FILE_RES_NOT_FOUND) { 95 | break; // we've processed all available CPUs - done 96 | } 97 | 98 | if (res != FILE_RES_OK) { 99 | printf("ERROR: reading OS package index from %s\n", path); 100 | return 0; 101 | } 102 | 103 | // Only consider CPUs that are actually in use. 104 | if (CPU_ISSET(n_os_cpus, &os_cpus_online)) { 105 | n_cpus++; 106 | } 107 | } 108 | 109 | if (n_os_cpus == CPU_SETSIZE) { 110 | printf("ERROR: too many CPUs\n"); 111 | return 0; 112 | } 113 | 114 | printf("detected %" PRIu32 " CPUs\n\n", n_cpus); 115 | 116 | return n_cpus; 117 | } 118 | 119 | 120 | //========================================================== 121 | // Local helpers. 122 | // 123 | 124 | static file_res 125 | read_list(const char* path, cpu_set_t* mask) 126 | { 127 | char buf[1000]; 128 | size_t limit = sizeof(buf); 129 | file_res res = read_file(path, buf, &limit); 130 | 131 | if (res != FILE_RES_OK) { 132 | return res; 133 | } 134 | 135 | buf[limit - 1] = '\0'; 136 | CPU_ZERO(mask); 137 | 138 | char* at = buf; 139 | 140 | while (true) { 141 | char* delim; 142 | uint64_t from = strtoul(at, &delim, 10); 143 | uint64_t thru; 144 | 145 | if (*delim == ',' || *delim == '\0'){ 146 | thru = from; 147 | } 148 | else if (*delim == '-') { 149 | at = delim + 1; 150 | thru = strtoul(at, &delim, 10); 151 | } 152 | else { 153 | printf("ERROR: invalid list '%s' in %s\n", buf, path); 154 | return FILE_RES_ERROR; 155 | } 156 | 157 | if (from >= CPU_SETSIZE || thru >= CPU_SETSIZE || from > thru) { 158 | printf("ERROR: invalid list '%s' in %s\n", buf, path); 159 | return FILE_RES_ERROR; 160 | } 161 | 162 | for (size_t i = from; i <= thru; ++i) { 163 | CPU_SET(i, mask); 164 | } 165 | 166 | if (*delim == '\0') { 167 | break; 168 | } 169 | 170 | at = delim + 1; 171 | } 172 | 173 | return FILE_RES_OK; 174 | } 175 | 176 | static file_res 177 | read_index(const char* path, uint16_t* val) 178 | { 179 | char buf[100]; 180 | size_t limit = sizeof(buf); 181 | file_res res = read_file(path, buf, &limit); 182 | 183 | if (res != FILE_RES_OK) { 184 | return res; 185 | } 186 | 187 | buf[limit - 1] = '\0'; 188 | 189 | char* end; 190 | uint64_t x = strtoul(buf, &end, 10); 191 | 192 | if (*end != '\0' || x >= CPU_SETSIZE) { 193 | printf("ERROR: invalid index '%s' in %s\n", buf, path); 194 | return FILE_RES_ERROR; 195 | } 196 | 197 | *val = (uint16_t)x; 198 | 199 | return FILE_RES_OK; 200 | } 201 | 202 | static file_res 203 | read_file(const char* path, void* buf, size_t* limit) 204 | { 205 | int32_t fd = open(path, O_RDONLY); 206 | 207 | if (fd < 0) { 208 | if (errno == ENOENT) { 209 | return FILE_RES_NOT_FOUND; 210 | } 211 | 212 | printf("ERROR: couldn't open file %s for reading: %d '%s'\n", path, 213 | errno, act_strerror(errno)); 214 | return FILE_RES_ERROR; 215 | } 216 | 217 | size_t total = 0; 218 | 219 | while (total < *limit) { 220 | ssize_t len = read(fd, (uint8_t*)buf + total, *limit - total); 221 | 222 | if (len < 0) { 223 | printf("ERROR: couldn't read file %s: %d '%s'\n", path, errno, 224 | act_strerror(errno)); 225 | close(fd); 226 | return FILE_RES_ERROR; 227 | } 228 | 229 | if (len == 0) { 230 | break; // EOF 231 | } 232 | 233 | total += (size_t)len; 234 | } 235 | 236 | close(fd); 237 | 238 | if (total == *limit) { 239 | printf("ERROR: read buffer too small for file %s\n", path); 240 | return FILE_RES_ERROR; 241 | } 242 | 243 | *limit = total; 244 | 245 | return FILE_RES_OK; 246 | } 247 | -------------------------------------------------------------------------------- /src/common/hardware.h: -------------------------------------------------------------------------------- 1 | /* 2 | * hardware.h 3 | * 4 | * Copyright (c) 2018-2020 Aerospike, Inc. All rights reserved. 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in 14 | * all copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | 25 | #pragma once 26 | 27 | //========================================================== 28 | // Includes. 29 | // 30 | 31 | #include 32 | 33 | 34 | //========================================================== 35 | // Public API. 36 | // 37 | 38 | uint32_t num_cpus(); 39 | -------------------------------------------------------------------------------- /src/common/histogram.c: -------------------------------------------------------------------------------- 1 | /* 2 | * histogram.c 3 | * 4 | * Copyright (c) 2011-2020 Aerospike, Inc. All rights reserved. 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in 14 | * all copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | 25 | //========================================================== 26 | // Includes. 27 | // 28 | 29 | #include "histogram.h" 30 | 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | 38 | 39 | //========================================================== 40 | // Typedefs & constants. 41 | // 42 | 43 | //------------------------------------------------ 44 | // BYTE_MSB[n] returns the position of the most 45 | // significant bit. If no bits are set (n = 0) it 46 | // returns 0. Otherwise the positions are 1 ... 8 47 | // from low to high, so e.g. n = 13 returns 4: 48 | // 49 | // bits: 0 0 0 0 1 1 0 1 50 | // position: 8 7 6 5 [4] 3 2 1 51 | // 52 | static const char BYTE_MSB[] = { 53 | 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 54 | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 55 | 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 56 | 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 57 | 58 | 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 59 | 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 60 | 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 61 | 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 62 | 63 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 64 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 65 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 66 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 67 | 68 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 69 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 70 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 71 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 72 | }; 73 | 74 | 75 | //========================================================== 76 | // Forward declarations. 77 | // 78 | 79 | static int msb(uint64_t n); 80 | 81 | 82 | //========================================================== 83 | // Public API. 84 | // 85 | 86 | //------------------------------------------------ 87 | // Create a histogram. There's no destroy(), but 88 | // you can just free the histogram. 89 | // 90 | histogram* 91 | histogram_create(histogram_scale scale) 92 | { 93 | histogram* h = malloc(sizeof(histogram)); 94 | 95 | if (h == NULL) { 96 | printf("ERROR: creating histogram (malloc)\n"); 97 | return NULL; 98 | } 99 | 100 | memset((void*)h->counts, 0, sizeof(h->counts)); 101 | 102 | switch (scale) { 103 | case HIST_MILLISECONDS: 104 | h->time_div = 1000 * 1000; 105 | break; 106 | case HIST_MICROSECONDS: 107 | h->time_div = 1000; 108 | break; 109 | default: 110 | printf("ERROR: creating histogram (scale parameter)\n"); 111 | free(h); 112 | return NULL; 113 | } 114 | 115 | return h; 116 | } 117 | 118 | //------------------------------------------------ 119 | // Dump a histogram to stdout. 120 | // 121 | // Note - DO NOT change the output format in this 122 | // method - act_latency.py assumes this format. 123 | // 124 | void 125 | histogram_dump(histogram* h, const char* tag) 126 | { 127 | uint64_t counts[N_BUCKETS]; 128 | uint32_t i = N_BUCKETS; 129 | uint32_t j = 0; 130 | uint64_t total = 0; 131 | 132 | for (uint32_t b = 0; b < N_BUCKETS; b++) { 133 | counts[b] = __atomic_load_n(&h->counts[b], __ATOMIC_RELAXED); 134 | 135 | if (counts[b] != 0) { 136 | if (i > b) { 137 | i = b; 138 | } 139 | 140 | j = b; 141 | total += counts[b]; 142 | } 143 | } 144 | 145 | char buf[200]; 146 | int pos = 0; 147 | uint32_t k = 0; 148 | 149 | buf[0] = '\0'; 150 | 151 | printf("%s (%" PRIu64 " total)\n", tag, total); 152 | 153 | for ( ; i <= j; i++) { 154 | if (counts[i] == 0) { // print only non-zero columns 155 | continue; 156 | } 157 | 158 | pos += sprintf(buf + pos, " (%02u: %010" PRIu64 ")", i, counts[i]); 159 | 160 | if ((k & 3) == 3) { // maximum of 4 printed columns per line 161 | printf("%s\n", buf); 162 | pos = 0; 163 | buf[0] = '\0'; 164 | } 165 | 166 | k++; 167 | } 168 | 169 | if (pos > 0) { 170 | printf("%s\n", buf); 171 | } 172 | } 173 | 174 | //------------------------------------------------ 175 | // Insert a time interval data point. The interval 176 | // is specified in nanoseconds, and converted to 177 | // milliseconds or microseconds as appropriate. 178 | // Generates a histogram with either: 179 | // 180 | // bucket millisecond range 181 | // ------ ----------------- 182 | // 0 0 to 1 (more exactly, 0.999999) 183 | // 1 1 to 2 (more exactly, 1.999999) 184 | // 2 2 to 4 (more exactly, 3.999999) 185 | // 3 4 to 8 (more exactly, 7.999999) 186 | // 4 8 to 16 (more exactly, 15.999999) 187 | // etc. 188 | // 189 | // or: 190 | // 191 | // bucket microsecond range 192 | // ------ ----------------- 193 | // 0 0 to 1 (more exactly, 0.999) 194 | // 1 1 to 2 (more exactly, 1.999) 195 | // 2 2 to 4 (more exactly, 3.999) 196 | // 3 4 to 8 (more exactly, 7.999) 197 | // 4 8 to 16 (more exactly, 15.999) 198 | // etc. 199 | // 200 | void 201 | histogram_insert_data_point(histogram* h, uint64_t delta_ns) 202 | { 203 | uint64_t delta_t = delta_ns / h->time_div; 204 | int bucket = 0; 205 | 206 | if (delta_t != 0) { 207 | bucket = msb(delta_t); 208 | } 209 | 210 | __atomic_fetch_add(&h->counts[bucket], 1, __ATOMIC_RELAXED); 211 | } 212 | 213 | 214 | //========================================================== 215 | // Local helpers. 216 | // 217 | 218 | //------------------------------------------------ 219 | // Returns the position of the most significant 220 | // bit of n. Positions are 1 ... 64 from low to 221 | // high, so: 222 | // 223 | // n msb(n) 224 | // -------- ------ 225 | // 0 0 226 | // 1 1 227 | // 2 ... 3 2 228 | // 4 ... 7 3 229 | // 8 ... 15 4 230 | // etc. 231 | // 232 | static int 233 | msb(uint64_t n) 234 | { 235 | int shift = 0; 236 | 237 | while (true) { 238 | uint64_t n_div_256 = n >> 8; 239 | 240 | if (n_div_256 == 0) { 241 | return shift + (int)BYTE_MSB[n]; 242 | } 243 | 244 | n = n_div_256; 245 | shift += 8; 246 | } 247 | 248 | // Should never get here. 249 | printf("ERROR: msb calculation\n"); 250 | return -1; 251 | } 252 | -------------------------------------------------------------------------------- /src/common/histogram.h: -------------------------------------------------------------------------------- 1 | /* 2 | * histogram.h 3 | * 4 | * Copyright (c) 2011-2020 Aerospike, Inc. All rights reserved. 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in 14 | * all copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | 25 | #pragma once 26 | 27 | //========================================================== 28 | // Includes. 29 | // 30 | 31 | #include 32 | 33 | 34 | //========================================================== 35 | // Typedefs & constants. 36 | // 37 | 38 | #define N_BUCKETS (1 + 64) 39 | 40 | typedef enum { 41 | HIST_MILLISECONDS, 42 | HIST_MICROSECONDS, 43 | HIST_SCALE_MAX_PLUS_1 44 | } histogram_scale; 45 | 46 | typedef struct histogram_s { 47 | uint32_t time_div; 48 | uint64_t counts[N_BUCKETS]; 49 | } histogram; 50 | 51 | 52 | //========================================================== 53 | // Public API. 54 | // 55 | 56 | histogram* histogram_create(histogram_scale scale); 57 | void histogram_dump(histogram* h, const char* tag); 58 | void histogram_insert_data_point(histogram* h, uint64_t delta_ns); 59 | -------------------------------------------------------------------------------- /src/common/io.c: -------------------------------------------------------------------------------- 1 | /* 2 | * io.c 3 | * 4 | * Copyright (c) 2020 Aerospike, Inc. All rights reserved. 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in 14 | * all copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | 25 | //========================================================== 26 | // Includes. 27 | // 28 | 29 | #include "io.h" 30 | 31 | #include 32 | #include 33 | #include 34 | #include 35 | 36 | 37 | //========================================================== 38 | // Public API. 39 | // 40 | 41 | bool 42 | pread_all(int fd, void* buf, size_t size, off_t offset) 43 | { 44 | ssize_t result; 45 | 46 | while ((result = pread(fd, buf, size, offset)) != (ssize_t)size) { 47 | if (result < 0) { 48 | return false; // let the caller log errors 49 | } 50 | 51 | if (result == 0) { // should only happen if caller passed 0 size 52 | printf("ERROR: pread() returned 0\n"); 53 | return false; 54 | } 55 | 56 | buf += result; 57 | offset += result; 58 | size -= result; 59 | } 60 | 61 | return true; 62 | } 63 | 64 | bool 65 | pwrite_all(int fd, const void* buf, size_t size, off_t offset) 66 | { 67 | ssize_t result; 68 | 69 | while ((result = pwrite(fd, buf, size, offset)) != (ssize_t)size) { 70 | if (result < 0) { 71 | return false; // let the caller log errors 72 | } 73 | 74 | if (result == 0) { // should only happen if caller passed 0 size 75 | printf("ERROR: pwrite() returned 0\n"); 76 | return false; 77 | } 78 | 79 | buf += result; 80 | offset += result; 81 | size -= result; 82 | } 83 | 84 | return true; 85 | } 86 | 87 | bool 88 | write_all(int fd, const void* buf, size_t size) 89 | { 90 | ssize_t result; 91 | 92 | while ((result = write(fd, buf, size)) != (ssize_t)size) { 93 | if (result < 0) { 94 | return false; // let the caller log errors 95 | } 96 | 97 | if (result == 0) { // should only happen if caller passed 0 size 98 | printf("ERROR: write() returned 0\n"); 99 | return false; 100 | } 101 | 102 | buf += result; 103 | size -= result; 104 | } 105 | 106 | return true; 107 | } 108 | -------------------------------------------------------------------------------- /src/common/io.h: -------------------------------------------------------------------------------- 1 | /* 2 | * io.h 3 | * 4 | * Copyright (c) 2018-2020 Aerospike, Inc. All rights reserved. 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in 14 | * all copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | 25 | #pragma once 26 | 27 | //========================================================== 28 | // Includes. 29 | // 30 | 31 | #include 32 | #include 33 | #include 34 | 35 | 36 | //========================================================== 37 | // Public API. 38 | // 39 | 40 | bool pread_all(int fd, void* buf, size_t size, off_t offset); 41 | bool pwrite_all(int fd, const void* buf, size_t size, off_t offset); 42 | bool write_all(int fd, const void* buf, size_t size); 43 | -------------------------------------------------------------------------------- /src/common/queue.c: -------------------------------------------------------------------------------- 1 | /* 2 | * queue.c 3 | * 4 | * Copyright (c) 2011-2020 Aerospike, Inc. All rights reserved. 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in 14 | * all copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | 25 | //========================================================== 26 | // Includes. 27 | // 28 | 29 | #include "queue.h" 30 | 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include 38 | 39 | 40 | //========================================================== 41 | // Typedefs & constants. 42 | // 43 | 44 | #define Q_ALLOC_SZ (64 * 1024) 45 | 46 | 47 | //========================================================== 48 | // Forward Declarations 49 | // 50 | 51 | static bool q_resize(queue* q, uint32_t new_sz); 52 | static void q_unwrap(queue* q); 53 | 54 | 55 | //========================================================== 56 | // Inlines & macros. 57 | // 58 | 59 | #define Q_SZ(_q) (_q->write_offset - _q->read_offset) 60 | #define Q_EMPTY(_q) (_q->write_offset == _q->read_offset) 61 | #define Q_ELE_PTR(_q, _i) (&_q->elements[(_i % _q->alloc_sz) * _q->ele_size]) 62 | 63 | 64 | //========================================================== 65 | // Public API. 66 | // 67 | 68 | //------------------------------------------------ 69 | // Create a queue. 70 | // 71 | queue* 72 | queue_create(size_t ele_size) 73 | { 74 | queue* q = malloc( sizeof(queue)); 75 | 76 | if (q == NULL) { 77 | printf("ERROR: creating queue (malloc)\n"); 78 | return NULL; 79 | } 80 | 81 | q->elements = malloc(Q_ALLOC_SZ * ele_size); 82 | 83 | if (q->elements == NULL) { 84 | printf("ERROR: creating queue (malloc)\n"); 85 | free(q); 86 | return NULL; 87 | } 88 | 89 | q->alloc_sz = Q_ALLOC_SZ; 90 | q->write_offset = q->read_offset = 0; 91 | q->ele_size = ele_size; 92 | 93 | if (pthread_mutex_init(&q->lock, NULL) != 0) { 94 | printf("ERROR: creating queue (mutex init)\n"); 95 | free(q->elements); 96 | free(q); 97 | return NULL; 98 | } 99 | 100 | return q; 101 | } 102 | 103 | //------------------------------------------------ 104 | // Destroy a queue. 105 | // 106 | void 107 | queue_destroy(queue* q) 108 | { 109 | pthread_mutex_destroy(&q->lock); 110 | 111 | free(q->elements); 112 | free(q); 113 | } 114 | 115 | //------------------------------------------------ 116 | // Get the number of elements in the queue. 117 | // 118 | uint32_t 119 | queue_sz(queue* q) 120 | { 121 | pthread_mutex_lock(&q->lock); 122 | 123 | uint32_t rv = Q_SZ(q); 124 | 125 | pthread_mutex_unlock(&q->lock); 126 | 127 | return rv; 128 | } 129 | 130 | //------------------------------------------------ 131 | // Push an element to the tail of the queue. 132 | // 133 | bool 134 | queue_push(queue* q, const void* ele_ptr) 135 | { 136 | pthread_mutex_lock(&q->lock); 137 | 138 | if (Q_SZ(q) == q->alloc_sz) { 139 | if (! q_resize(q, q->alloc_sz * 2)) { 140 | pthread_mutex_unlock(&q->lock); 141 | return false; 142 | } 143 | } 144 | 145 | memcpy(Q_ELE_PTR(q, q->write_offset), ele_ptr, q->ele_size); 146 | q->write_offset++; 147 | 148 | // We're at risk of overflowing the write offset if it's too big. 149 | if ((q->write_offset & 0xC0000000) != 0) { 150 | q_unwrap(q); 151 | } 152 | 153 | pthread_mutex_unlock(&q->lock); 154 | 155 | return true; 156 | } 157 | 158 | //------------------------------------------------ 159 | // Pop an element from the head of the queue. 160 | // 161 | bool 162 | queue_pop(queue* q, void* ele_ptr) 163 | { 164 | pthread_mutex_lock(&q->lock); 165 | 166 | if (Q_EMPTY(q)) { 167 | pthread_mutex_unlock(&q->lock); 168 | return false; 169 | } 170 | 171 | memcpy(ele_ptr, Q_ELE_PTR(q, q->read_offset), q->ele_size); 172 | q->read_offset++; 173 | 174 | if (q->read_offset == q->write_offset) { 175 | q->read_offset = q->write_offset = 0; 176 | } 177 | 178 | pthread_mutex_unlock(&q->lock); 179 | 180 | return true; 181 | } 182 | 183 | 184 | //========================================================== 185 | // Local helpers. 186 | // 187 | 188 | //------------------------------------------------ 189 | // Change allocated capacity - called under lock. 190 | // 191 | static bool 192 | q_resize(queue* q, uint32_t new_sz) 193 | { 194 | if (q->read_offset % q->alloc_sz == 0) { 195 | // Queue not fragmented - just realloc. 196 | q->elements = realloc(q->elements, new_sz * q->ele_size); 197 | 198 | if (q->elements == NULL) { 199 | printf("ERROR: resizing queue (realloc)\n"); 200 | return false; 201 | } 202 | 203 | q->read_offset = 0; 204 | q->write_offset = q->alloc_sz; 205 | } 206 | else { 207 | uint8_t* new_q = malloc(new_sz * q->ele_size); 208 | 209 | if (new_q == NULL) { 210 | printf("ERROR: resizing queue (malloc)\n"); 211 | return false; 212 | } 213 | 214 | // end_size is used bytes in old queue from insert point to end. 215 | uint32_t end_size = 216 | (q->alloc_sz - (q->read_offset % q->alloc_sz)) * q->ele_size; 217 | 218 | memcpy(&new_q[0], Q_ELE_PTR(q, q->read_offset), end_size); 219 | memcpy(&new_q[end_size], &q->elements[0], 220 | (q->alloc_sz * q->ele_size) - end_size); 221 | 222 | free(q->elements); 223 | q->elements = new_q; 224 | 225 | q->write_offset = q->alloc_sz; 226 | q->read_offset = 0; 227 | } 228 | 229 | q->alloc_sz = new_sz; 230 | 231 | return true; 232 | } 233 | 234 | //------------------------------------------------ 235 | // Reset read & write offsets - called under lock. 236 | // 237 | static void 238 | q_unwrap(queue* q) 239 | { 240 | uint32_t sz = Q_SZ(q); 241 | 242 | q->read_offset %= q->alloc_sz; 243 | q->write_offset = q->read_offset + sz; 244 | } 245 | -------------------------------------------------------------------------------- /src/common/queue.h: -------------------------------------------------------------------------------- 1 | /* 2 | * queue.h 3 | * 4 | * Copyright (c) 2011-2020 Aerospike, Inc. All rights reserved. 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in 14 | * all copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | 25 | #pragma once 26 | 27 | //========================================================== 28 | // Includes. 29 | // 30 | 31 | #include 32 | #include 33 | #include 34 | #include 35 | 36 | 37 | //========================================================== 38 | // Typedefs & constants. 39 | // 40 | 41 | typedef struct queue_s { 42 | uint32_t alloc_sz; // number of elements currently allocated 43 | uint32_t read_offset; // head of queue 44 | uint32_t write_offset; // tail of queue - write is always >= read 45 | size_t ele_size; // size of (every) element in bytes 46 | pthread_mutex_t lock; // the lock - used in thread-safe mode 47 | uint8_t* elements; // the elements' bytes 48 | } queue; 49 | 50 | 51 | //========================================================== 52 | // Public API. 53 | // 54 | 55 | queue* queue_create(size_t ele_size); 56 | void queue_destroy(queue* q); 57 | uint32_t queue_sz(queue* q); 58 | bool queue_push(queue* q, const void* ele_ptr); 59 | bool queue_pop(queue* q, void* ele_ptr); 60 | -------------------------------------------------------------------------------- /src/common/random.c: -------------------------------------------------------------------------------- 1 | /* 2 | * random.c 3 | * 4 | * Copyright (c) 2011-2020 Aerospike, Inc. All rights reserved. 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in 14 | * all copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | 25 | //========================================================== 26 | // Includes. 27 | // 28 | 29 | #include "random.h" 30 | 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | 37 | 38 | //========================================================== 39 | // Typedefs & constants. 40 | // 41 | 42 | #define INTERVAL_SIZE 512 43 | #define WRITES_PER_INTERVAL (INTERVAL_SIZE / sizeof(uint64_t)) 44 | 45 | 46 | //========================================================== 47 | // Forward declarations. 48 | // 49 | 50 | static inline uint64_t xorshift128plus(); 51 | 52 | 53 | //========================================================== 54 | // Globals. 55 | // 56 | 57 | static __thread uint64_t tl_seed0; 58 | static __thread uint64_t tl_seed1; 59 | 60 | 61 | //========================================================== 62 | // Public API. 63 | // 64 | 65 | //------------------------------------------------ 66 | // Seed for system rand() call. 67 | // 68 | void 69 | rand_seed() 70 | { 71 | srand(time(NULL)); 72 | } 73 | 74 | //------------------------------------------------ 75 | // Seed a thread for generating a random sequence. 76 | // 77 | void 78 | rand_seed_thread() 79 | { 80 | tl_seed0 = ((uint64_t)rand() << 32) | (uint64_t)rand(); 81 | tl_seed1 = ((uint64_t)rand() << 32) | (uint64_t)rand(); 82 | } 83 | 84 | //------------------------------------------------ 85 | // Get a random uint32_t. 86 | // 87 | uint32_t 88 | rand_32() 89 | { 90 | return (uint32_t)xorshift128plus(); 91 | } 92 | 93 | //------------------------------------------------ 94 | // Get a random uint64_t. 95 | // 96 | uint64_t 97 | rand_64() 98 | { 99 | return xorshift128plus(); 100 | } 101 | 102 | //------------------------------------------------ 103 | // Fill a buffer with random bits. For buffers 104 | // larger than INTERVAL_SIZE, rand_pct specifies 105 | // how much to randomize. The rest is zeroed. 106 | // 107 | void 108 | rand_fill(uint8_t* p_buffer, uint32_t size, uint32_t rand_pct) 109 | { 110 | uint64_t* p_write = (uint64_t*)p_buffer; 111 | uint64_t* p_end = (uint64_t*)(p_buffer + size); 112 | // ... relies on size being a multiple of 8, which it will be. 113 | 114 | if (rand_pct < 100) { 115 | // Split writes per interval as specified by rand_pct. (Calculate 116 | // n_zeros first so rand_pct = 1 yields n_rands = 1 instead of 0.) 117 | uint32_t n_zeros = (WRITES_PER_INTERVAL * (100 - rand_pct)) / 100; 118 | uint32_t n_rands = WRITES_PER_INTERVAL - n_zeros; 119 | 120 | for (uint32_t i = size / INTERVAL_SIZE; i != 0; i--) { 121 | for (uint32_t z = n_zeros; z != 0; z--) { 122 | *p_write++ = 0; 123 | } 124 | 125 | for (uint32_t r = n_rands; r != 0; r--) { 126 | *p_write++ = xorshift128plus(); 127 | } 128 | } 129 | } 130 | 131 | while (p_write < p_end) { 132 | *p_write++ = xorshift128plus(); 133 | } 134 | } 135 | 136 | 137 | //========================================================== 138 | // Local helpers. 139 | // 140 | 141 | //------------------------------------------------ 142 | // One step in generating a random sequence. 143 | // 144 | static inline uint64_t 145 | xorshift128plus() 146 | { 147 | uint64_t s1 = tl_seed0; 148 | uint64_t s0 = tl_seed1; 149 | 150 | tl_seed0 = s0; 151 | s1 ^= s1 << 23; 152 | tl_seed1 = s1 ^ s0 ^ (s1 >> 17) ^ (s0 >> 26); 153 | 154 | return tl_seed1 + s0; 155 | } 156 | -------------------------------------------------------------------------------- /src/common/random.h: -------------------------------------------------------------------------------- 1 | /* 2 | * random.h 3 | * 4 | * Copyright (c) 2011-2020 Aerospike, Inc. All rights reserved. 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in 14 | * all copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | 25 | #pragma once 26 | 27 | //========================================================== 28 | // Includes. 29 | // 30 | 31 | #include 32 | 33 | 34 | //========================================================== 35 | // Public API. 36 | // 37 | 38 | void rand_seed(); 39 | void rand_seed_thread(); 40 | uint32_t rand_32(); 41 | uint64_t rand_64(); 42 | void rand_fill(uint8_t* p_buffer, uint32_t size, uint32_t rand_pct); 43 | -------------------------------------------------------------------------------- /src/common/trace.c: -------------------------------------------------------------------------------- 1 | /* 2 | * trace.c 3 | * 4 | * Copyright (c) 2018-2020 Aerospike, Inc. All rights reserved. 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in 14 | * all copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | 25 | //========================================================== 26 | // Includes. 27 | // 28 | 29 | #include "trace.h" 30 | 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include 38 | 39 | 40 | //========================================================== 41 | // Typedefs & constants. 42 | // 43 | 44 | typedef void (*action_t)(int sig, siginfo_t* info, void* ctx); 45 | 46 | #define MAX_BACKTRACE_DEPTH 50 47 | 48 | 49 | //========================================================== 50 | // Forward declarations. 51 | // 52 | 53 | static void act_sig_handle_abort(int sig_num, siginfo_t* info, void* ctx); 54 | static void act_sig_handle_bus(int sig_num, siginfo_t* info, void* ctx); 55 | static void act_sig_handle_fpe(int sig_num, siginfo_t* info, void* ctx); 56 | static void act_sig_handle_ill(int sig_num, siginfo_t* info, void* ctx); 57 | static void act_sig_handle_segv(int sig_num, siginfo_t* info, void* ctx); 58 | 59 | static void reraise_signal(int sig_num); 60 | static void set_action(int sig_num, action_t act); 61 | static void set_handler(int sig_num, sighandler_t hand); 62 | 63 | extern char __executable_start; 64 | 65 | 66 | //========================================================== 67 | // Inlines & macros. 68 | // 69 | 70 | // Macro instead of inline just to be sure it can't affect the stack? 71 | #define PRINT_BACKTRACE() \ 72 | do { \ 73 | void* bt[MAX_BACKTRACE_DEPTH]; \ 74 | int sz = backtrace(bt, MAX_BACKTRACE_DEPTH); \ 75 | \ 76 | char trace[MAX_BACKTRACE_DEPTH * 20]; \ 77 | int off = 0; \ 78 | \ 79 | for (int i = 0; i < sz; i++) { \ 80 | off += snprintf(trace + off, sizeof(trace) - off, " 0x%lx", \ 81 | (uint64_t)bt[i]); \ 82 | } \ 83 | \ 84 | printf("stacktrace: found %d frames:%s offset 0x%lx\n", sz, trace, \ 85 | (uint64_t)&__executable_start); \ 86 | \ 87 | char** syms = backtrace_symbols(bt, sz); \ 88 | \ 89 | if (syms != NULL) { \ 90 | for (int i = 0; i < sz; ++i) { \ 91 | printf("stacktrace: frame %d: %s\n", i, syms[i]); \ 92 | } \ 93 | } \ 94 | else { \ 95 | printf("stacktrace: found no symbols\n"); \ 96 | } \ 97 | \ 98 | fflush(stdout); \ 99 | } while (0); 100 | 101 | 102 | //========================================================== 103 | // Public API. 104 | // 105 | 106 | void 107 | signal_setup() 108 | { 109 | set_action(SIGABRT, act_sig_handle_abort); 110 | set_action(SIGBUS, act_sig_handle_bus); 111 | set_action(SIGFPE, act_sig_handle_fpe); 112 | set_action(SIGILL, act_sig_handle_ill); 113 | set_action(SIGSEGV, act_sig_handle_segv); 114 | } 115 | 116 | 117 | //========================================================== 118 | // Local helpers - signal handlers. 119 | // 120 | 121 | static void 122 | act_sig_handle_abort(int sig_num, siginfo_t* info, void* ctx) 123 | { 124 | printf("SIGABRT received\n"); 125 | PRINT_BACKTRACE(); 126 | reraise_signal(sig_num); 127 | } 128 | 129 | static void 130 | act_sig_handle_bus(int sig_num, siginfo_t* info, void* ctx) 131 | { 132 | printf("SIGBUS received\n"); 133 | PRINT_BACKTRACE(); 134 | reraise_signal(sig_num); 135 | } 136 | 137 | static void 138 | act_sig_handle_fpe(int sig_num, siginfo_t* info, void* ctx) 139 | { 140 | printf("SIGFPE received\n"); 141 | PRINT_BACKTRACE(); 142 | reraise_signal(sig_num); 143 | } 144 | 145 | static void 146 | act_sig_handle_ill(int sig_num, siginfo_t* info, void* ctx) 147 | { 148 | printf("SIGILL received\n"); 149 | PRINT_BACKTRACE(); 150 | reraise_signal(sig_num); 151 | } 152 | 153 | static void 154 | act_sig_handle_segv(int sig_num, siginfo_t* info, void* ctx) 155 | { 156 | printf("SIGSEGV received\n"); 157 | PRINT_BACKTRACE(); 158 | reraise_signal(sig_num); 159 | } 160 | 161 | 162 | //========================================================== 163 | // Local helpers - signal handling. 164 | // 165 | 166 | static void 167 | reraise_signal(int sig_num) 168 | { 169 | set_handler(sig_num, SIG_DFL); 170 | raise(sig_num); 171 | } 172 | 173 | static void 174 | set_action(int sig_num, action_t act) 175 | { 176 | struct sigaction sa; 177 | memset(&sa, 0, sizeof(sa)); 178 | 179 | sa.sa_sigaction = act; 180 | sigemptyset(&sa.sa_mask); 181 | // SA_SIGINFO prefers sa_sigaction over sa_handler. 182 | sa.sa_flags = SA_RESTART | SA_SIGINFO; 183 | 184 | if (sigaction(sig_num, &sa, NULL) < 0) { 185 | printf("ERROR: could not register signal handler for %d\n", sig_num); 186 | fflush(stdout); 187 | _exit(-1); 188 | } 189 | } 190 | 191 | static void 192 | set_handler(int sig_num, sighandler_t hand) 193 | { 194 | struct sigaction sa; 195 | memset(&sa, 0, sizeof(sa)); 196 | 197 | sa.sa_handler = hand; 198 | sigemptyset(&sa.sa_mask); 199 | // No SA_SIGINFO; use sa_handler. 200 | sa.sa_flags = SA_RESTART; 201 | 202 | if (sigaction(sig_num, &sa, NULL) < 0) { 203 | printf("ERROR: could not register signal handler for %d\n", sig_num); 204 | fflush(stdout); 205 | _exit(-1); 206 | } 207 | } 208 | -------------------------------------------------------------------------------- /src/common/trace.h: -------------------------------------------------------------------------------- 1 | /* 2 | * trace.h 3 | * 4 | * Copyright (c) 2018-2020 Aerospike, Inc. All rights reserved. 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in 14 | * all copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | 25 | #pragma once 26 | 27 | //========================================================== 28 | // Includes. 29 | // 30 | 31 | #include 32 | #include 33 | 34 | 35 | //========================================================== 36 | // Public API. 37 | // 38 | 39 | void signal_setup(); 40 | 41 | // _GNU_SOURCE gives us strerror_r() which is thread-safe. 42 | #define act_strerror(err) strerror_r(err, (char*)alloca(200), 200) 43 | -------------------------------------------------------------------------------- /src/common/version.h: -------------------------------------------------------------------------------- 1 | #define VERSION "6.7" 2 | -------------------------------------------------------------------------------- /src/index/act_index.c: -------------------------------------------------------------------------------- 1 | /* 2 | * act_index.c 3 | * 4 | * Aerospike Index Certifiction Tool - Simulates and validates primary index 5 | * SSDs for real-time database use. 6 | * 7 | * Kevin Porter & Andrew Gooding, 2018. 8 | * 9 | * Copyright (c) 2018-2020 Aerospike, Inc. All rights reserved. 10 | * 11 | * Permission is hereby granted, free of charge, to any person obtaining a copy 12 | * of this software and associated documentation files (the "Software"), to deal 13 | * in the Software without restriction, including without limitation the rights 14 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 15 | * copies of the Software, and to permit persons to whom the Software is 16 | * furnished to do so, subject to the following conditions: 17 | * 18 | * The above copyright notice and this permission notice shall be included in 19 | * all copies or substantial portions of the Software. 20 | * 21 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 22 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 23 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 24 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 25 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 26 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 27 | * SOFTWARE. 28 | */ 29 | 30 | //========================================================== 31 | // Includes. 32 | // 33 | 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include 39 | #include 40 | #include 41 | #include 42 | #include 43 | #include 44 | #include 45 | #include 46 | #include 47 | #include 48 | #include 49 | 50 | #include "common/cfg.h" 51 | #include "common/clock.h" 52 | #include "common/hardware.h" 53 | #include "common/histogram.h" 54 | #include "common/io.h" 55 | #include "common/queue.h" 56 | #include "common/random.h" 57 | #include "common/trace.h" 58 | #include "common/version.h" 59 | 60 | #include "cfg_index.h" 61 | 62 | 63 | //========================================================== 64 | // Typedefs & constants. 65 | // 66 | 67 | typedef struct device_s { 68 | const char* name; 69 | uint64_t n_io_offsets; 70 | queue* fd_q; 71 | histogram* read_hist; 72 | histogram* write_hist; 73 | char read_hist_tag[MAX_DEVICE_NAME_SIZE + 1 + 5]; 74 | char write_hist_tag[MAX_DEVICE_NAME_SIZE + 1 + 6]; 75 | } device; 76 | 77 | typedef struct trans_req_s { 78 | device* dev; 79 | uint64_t offset; 80 | } trans_req; 81 | 82 | #define IO_SIZE 4096 83 | #define BUNDLE_SIZE 100 84 | 85 | 86 | //========================================================== 87 | // Forward declarations. 88 | // 89 | 90 | static void* run_cache_simulation(void* pv_unused); 91 | static void* run_service(void* pv_unused); 92 | 93 | static bool discover_device(device* dev); 94 | static void fd_close_all(device* dev); 95 | static int fd_get(device* dev); 96 | static void fd_put(device* dev, int fd); 97 | static void read_and_report(trans_req* read_req, uint8_t* buf); 98 | static void read_cache_and_report(uint8_t* buf); 99 | static uint64_t read_from_device(device* dev, uint64_t offset, uint8_t* buf); 100 | static void write_cache_and_report(uint8_t* buf); 101 | static uint64_t write_to_device(device* dev, uint64_t offset, 102 | const uint8_t* buf); 103 | 104 | 105 | //========================================================== 106 | // Globals. 107 | // 108 | 109 | static device* g_devices; 110 | 111 | static volatile bool g_running; 112 | static uint64_t g_run_start_us; 113 | 114 | static histogram* g_read_hist; 115 | static histogram* g_write_hist; 116 | 117 | 118 | //========================================================== 119 | // Inlines & macros. 120 | // 121 | 122 | static inline uint8_t* 123 | align_4096(const uint8_t* stack_buffer) 124 | { 125 | return (uint8_t*)(((uint64_t)stack_buffer + 4095) & ~4095ULL); 126 | } 127 | 128 | static inline uint64_t 129 | random_io_offset(const device* dev) 130 | { 131 | return (rand_64() % dev->n_io_offsets) * IO_SIZE; 132 | } 133 | 134 | static inline uint64_t 135 | safe_delta_ns(uint64_t start_ns, uint64_t stop_ns) 136 | { 137 | return start_ns > stop_ns ? 0 : stop_ns - start_ns; 138 | } 139 | 140 | 141 | //========================================================== 142 | // Main. 143 | // 144 | 145 | int 146 | main(int argc, char* argv[]) 147 | { 148 | signal_setup(); 149 | 150 | printf("\nACT version %s\n", VERSION); 151 | printf("Index device IO test\n"); 152 | printf("Copyright 2020 by Aerospike. All rights reserved.\n\n"); 153 | 154 | if (! index_configure(argc, argv)) { 155 | exit(-1); 156 | } 157 | 158 | device devices[g_icfg.num_devices]; 159 | 160 | g_devices = devices; 161 | 162 | histogram_scale scale = 163 | g_icfg.us_histograms ? HIST_MICROSECONDS : HIST_MILLISECONDS; 164 | 165 | if (! (g_read_hist = histogram_create(scale)) || 166 | ! (g_write_hist = histogram_create(scale))) { 167 | exit(-1); 168 | } 169 | 170 | for (uint32_t d = 0; d < g_icfg.num_devices; d++) { 171 | device* dev = &g_devices[d]; 172 | 173 | dev->name = (const char*)g_icfg.device_names[d]; 174 | 175 | if (! (dev->fd_q = queue_create(sizeof(int))) || 176 | ! discover_device(dev) || 177 | ! (dev->read_hist = histogram_create(scale)) || 178 | ! (dev->write_hist = histogram_create(scale))) { 179 | exit(-1); 180 | } 181 | 182 | sprintf(dev->read_hist_tag, "%s-reads", dev->name); 183 | sprintf(dev->write_hist_tag, "%s-writes", dev->name); 184 | } 185 | 186 | rand_seed(); 187 | 188 | g_run_start_us = get_us(); 189 | 190 | uint64_t run_stop_us = g_run_start_us + g_icfg.run_us; 191 | 192 | g_running = true; 193 | 194 | pthread_t cache_tids[g_icfg.cache_threads]; 195 | bool has_write_load = g_icfg.cache_thread_reads_and_writes_per_sec != 0; 196 | 197 | if (has_write_load) { 198 | for (uint32_t n = 0; n < g_icfg.cache_threads; n++) { 199 | if (pthread_create(&cache_tids[n], NULL, run_cache_simulation, 200 | NULL) != 0) { 201 | printf("ERROR: create cache thread\n"); 202 | exit(-1); 203 | } 204 | } 205 | } 206 | 207 | pthread_t svc_tids[g_icfg.service_threads]; 208 | 209 | for (uint32_t k = 0; k < g_icfg.service_threads; k++) { 210 | if (pthread_create(&svc_tids[k], NULL, run_service, NULL) != 0) { 211 | printf("ERROR: create service thread\n"); 212 | exit(-1); 213 | } 214 | } 215 | 216 | printf("\nHISTOGRAM NAMES\n"); 217 | 218 | printf("reads\n"); 219 | 220 | for (uint32_t d = 0; d < g_icfg.num_devices; d++) { 221 | printf("%s\n", g_devices[d].read_hist_tag); 222 | } 223 | 224 | if (has_write_load) { 225 | printf("writes\n"); 226 | 227 | for (uint32_t d = 0; d < g_icfg.num_devices; d++) { 228 | printf("%s\n", g_devices[d].write_hist_tag); 229 | } 230 | } 231 | 232 | printf("\n"); 233 | 234 | uint64_t now_us = 0; 235 | uint64_t count = 0; 236 | 237 | while (g_running && (now_us = get_us()) < run_stop_us) { 238 | count++; 239 | 240 | int64_t sleep_us = (int64_t) 241 | ((count * g_icfg.report_interval_us) - 242 | (now_us - g_run_start_us)); 243 | 244 | if (sleep_us > 0) { 245 | usleep((uint32_t)sleep_us); 246 | } 247 | 248 | printf("after %" PRIu64 " sec:\n", 249 | (count * g_icfg.report_interval_us) / 1000000); 250 | 251 | histogram_dump(g_read_hist, "reads"); 252 | 253 | for (uint32_t d = 0; d < g_icfg.num_devices; d++) { 254 | histogram_dump(g_devices[d].read_hist, 255 | g_devices[d].read_hist_tag); 256 | } 257 | 258 | if (has_write_load) { 259 | histogram_dump(g_write_hist, "writes"); 260 | 261 | for (uint32_t d = 0; d < g_icfg.num_devices; d++) { 262 | histogram_dump(g_devices[d].write_hist, 263 | g_devices[d].write_hist_tag); 264 | } 265 | } 266 | 267 | printf("\n"); 268 | fflush(stdout); 269 | } 270 | 271 | g_running = false; 272 | 273 | for (uint32_t k = 0; k < g_icfg.service_threads; k++) { 274 | pthread_join(svc_tids[k], NULL); 275 | } 276 | 277 | if (has_write_load) { 278 | for (uint32_t n = 0; n < g_icfg.cache_threads; n++) { 279 | pthread_join(cache_tids[n], NULL); 280 | } 281 | } 282 | 283 | for (uint32_t d = 0; d < g_icfg.num_devices; d++) { 284 | device* dev = &g_devices[d]; 285 | 286 | fd_close_all(dev); 287 | queue_destroy(dev->fd_q); 288 | free(dev->read_hist); 289 | free(dev->write_hist); 290 | } 291 | 292 | free(g_read_hist); 293 | free(g_write_hist); 294 | 295 | return 0; 296 | } 297 | 298 | 299 | //========================================================== 300 | // Local helpers - thread "run" functions. 301 | // 302 | 303 | //------------------------------------------------ 304 | // Runs in every (mmap) cache simulation thread, 305 | // does all writes, and reads that don't occur in 306 | // service threads, i.e. reads due to defrag. 307 | // 308 | static void* 309 | run_cache_simulation(void* pv_unused) 310 | { 311 | rand_seed_thread(); 312 | 313 | uint8_t stack_buffer[IO_SIZE + 4096]; 314 | uint8_t* buf = align_4096(stack_buffer); 315 | 316 | uint64_t target_factor = 317 | 1000000ull * g_icfg.num_devices * g_icfg.cache_threads; 318 | 319 | uint64_t count = 0; 320 | 321 | while (g_running) { 322 | for (uint32_t i = 0; i < BUNDLE_SIZE; i++) { 323 | read_cache_and_report(buf); 324 | write_cache_and_report(buf); 325 | } 326 | 327 | count += BUNDLE_SIZE; 328 | 329 | // TODO - someday (count * target_factor) may overflow a uint64_t. 330 | uint64_t target_us = (count * target_factor) / 331 | g_icfg.cache_thread_reads_and_writes_per_sec; 332 | 333 | int64_t sleep_us = (int64_t)(target_us - (get_us() - g_run_start_us)); 334 | 335 | if (sleep_us > 0) { 336 | usleep((uint32_t)sleep_us); 337 | } 338 | else if (g_icfg.max_lag_usec != 0 && 339 | sleep_us < -(int64_t)g_icfg.max_lag_usec) { 340 | printf("ERROR: cache thread device IO can't keep up\n"); 341 | printf("drive(s) can't keep up - test stopped\n"); 342 | g_running = false; 343 | } 344 | } 345 | 346 | return NULL; 347 | } 348 | 349 | //------------------------------------------------ 350 | // Service threads - generate and do device reads 351 | // corresponding to read and write request index 352 | // lookups. 353 | // 354 | static void* 355 | run_service(void* pv_unused) 356 | { 357 | rand_seed_thread(); 358 | 359 | uint64_t count = 0; 360 | uint64_t reads_per_sec = 361 | g_icfg.service_thread_reads_per_sec / g_icfg.service_threads; 362 | 363 | while (g_running) { 364 | uint32_t random_dev_index = rand_32() % g_icfg.num_devices; 365 | device* random_dev = &g_devices[random_dev_index]; 366 | 367 | trans_req read_req = { 368 | .dev = random_dev, 369 | .offset = random_io_offset(random_dev) 370 | }; 371 | 372 | uint8_t stack_buffer[IO_SIZE + 4096]; 373 | uint8_t* buf = align_4096(stack_buffer); 374 | 375 | read_and_report(&read_req, buf); 376 | 377 | count++; 378 | 379 | int64_t sleep_us = (int64_t) 380 | (((count * 1000000) / reads_per_sec) - 381 | (get_us() - g_run_start_us)); 382 | 383 | if (sleep_us > 0) { 384 | usleep((uint32_t)sleep_us); 385 | } 386 | else if (g_icfg.max_lag_usec != 0 && 387 | sleep_us < -(int64_t)g_icfg.max_lag_usec) { 388 | printf("ERROR: read request generator can't keep up\n"); 389 | printf("ACT can't do requested load - test stopped\n"); 390 | printf("try configuring more 'service-threads'\n"); 391 | g_running = false; 392 | } 393 | } 394 | 395 | return NULL; 396 | } 397 | 398 | 399 | //========================================================== 400 | // Local helpers - generic. 401 | // 402 | 403 | //------------------------------------------------ 404 | // Discover device storage capacity, etc. 405 | // 406 | static bool 407 | discover_device(device* dev) 408 | { 409 | int fd = fd_get(dev); 410 | 411 | if (fd == -1) { 412 | return false; 413 | } 414 | 415 | uint64_t device_bytes = 0; 416 | 417 | if (g_icfg.file_size == 0) { 418 | ioctl(fd, BLKGETSIZE64, &device_bytes); 419 | } 420 | else { // undocumented file mode 421 | device_bytes = g_icfg.file_size; 422 | 423 | if (ftruncate(fd, (off_t)device_bytes) != 0) { 424 | printf("ERROR: ftruncate file %s errno %d '%s'\n", dev->name, errno, 425 | act_strerror(errno)); 426 | fd_put(dev, fd); 427 | return false; 428 | } 429 | } 430 | 431 | fd_put(dev, fd); 432 | 433 | if (device_bytes == 0) { 434 | printf("ERROR: %s ioctl to discover size\n", dev->name); 435 | return false; 436 | } 437 | 438 | printf("%s size = %" PRIu64 " bytes\n", dev->name, device_bytes); 439 | 440 | dev->n_io_offsets = device_bytes / IO_SIZE; 441 | 442 | return true; 443 | } 444 | 445 | //------------------------------------------------ 446 | // Close all file descriptors for a device. 447 | // 448 | static void 449 | fd_close_all(device* dev) 450 | { 451 | int fd; 452 | 453 | while (queue_pop(dev->fd_q, (void*)&fd)) { 454 | close(fd); 455 | } 456 | } 457 | 458 | //------------------------------------------------ 459 | // Get a safe file descriptor for a device. 460 | // 461 | static int 462 | fd_get(device* dev) 463 | { 464 | int fd = -1; 465 | 466 | if (! queue_pop(dev->fd_q, (void*)&fd)) { 467 | int direct_flags = O_DIRECT | (g_icfg.disable_odsync ? 0 : O_DSYNC); 468 | int flags = O_RDWR | (g_icfg.file_size == 0 ? direct_flags : O_CREAT); 469 | 470 | fd = open(dev->name, flags, S_IRUSR | S_IWUSR); 471 | 472 | if (fd == -1) { 473 | printf("ERROR: open device %s errno %d '%s'\n", dev->name, errno, 474 | act_strerror(errno)); 475 | } 476 | } 477 | 478 | return fd; 479 | } 480 | 481 | //------------------------------------------------ 482 | // Recycle a safe file descriptor for a device. 483 | // 484 | static void 485 | fd_put(device* dev, int fd) 486 | { 487 | queue_push(dev->fd_q, (void*)&fd); 488 | } 489 | 490 | //------------------------------------------------ 491 | // Do one transaction read operation and report. 492 | // 493 | static void 494 | read_and_report(trans_req* read_req, uint8_t* buf) 495 | { 496 | uint64_t start_time = get_ns(); 497 | uint64_t stop_time = read_from_device(read_req->dev, read_req->offset, buf); 498 | 499 | if (stop_time != -1) { 500 | histogram_insert_data_point(g_read_hist, 501 | safe_delta_ns(start_time, stop_time)); 502 | histogram_insert_data_point(read_req->dev->read_hist, 503 | safe_delta_ns(start_time, stop_time)); 504 | } 505 | } 506 | 507 | //------------------------------------------------ 508 | // Do one cache thread read operation and report. 509 | // 510 | static void 511 | read_cache_and_report(uint8_t* buf) 512 | { 513 | uint32_t random_device_index = rand_32() % g_icfg.num_devices; 514 | device* p_device = &g_devices[random_device_index]; 515 | uint64_t offset = random_io_offset(p_device); 516 | 517 | uint64_t start_time = get_ns(); 518 | uint64_t stop_time = read_from_device(p_device, offset, buf); 519 | 520 | if (stop_time != -1) { 521 | histogram_insert_data_point(g_read_hist, 522 | safe_delta_ns(start_time, stop_time)); 523 | histogram_insert_data_point(p_device->read_hist, 524 | safe_delta_ns(start_time, stop_time)); 525 | } 526 | } 527 | 528 | //------------------------------------------------ 529 | // Do one device read operation. 530 | // 531 | static uint64_t 532 | read_from_device(device* dev, uint64_t offset, uint8_t* buf) 533 | { 534 | int fd = fd_get(dev); 535 | 536 | if (fd == -1) { 537 | return -1; 538 | } 539 | 540 | if (! pread_all(fd, buf, IO_SIZE, offset)) { 541 | close(fd); 542 | printf("ERROR: reading %s: %d '%s'\n", dev->name, errno, 543 | act_strerror(errno)); 544 | return -1; 545 | } 546 | 547 | uint64_t stop_ns = get_ns(); 548 | 549 | fd_put(dev, fd); 550 | 551 | return stop_ns; 552 | } 553 | 554 | //------------------------------------------------ 555 | // Do one cache thread write operation and report. 556 | // 557 | static void 558 | write_cache_and_report(uint8_t* buf) 559 | { 560 | // Salt the buffer each time. 561 | rand_fill(buf, IO_SIZE, 100); 562 | 563 | uint32_t random_device_index = rand_32() % g_icfg.num_devices; 564 | device* p_device = &g_devices[random_device_index]; 565 | uint64_t offset = random_io_offset(p_device); 566 | 567 | uint64_t start_time = get_ns(); 568 | uint64_t stop_time = write_to_device(p_device, offset, buf); 569 | 570 | if (stop_time != -1) { 571 | histogram_insert_data_point(g_write_hist, 572 | safe_delta_ns(start_time, stop_time)); 573 | histogram_insert_data_point(p_device->write_hist, 574 | safe_delta_ns(start_time, stop_time)); 575 | } 576 | } 577 | 578 | //------------------------------------------------ 579 | // Do one device write operation. 580 | // 581 | static uint64_t 582 | write_to_device(device* dev, uint64_t offset, const uint8_t* buf) 583 | { 584 | int fd = fd_get(dev); 585 | 586 | if (fd == -1) { 587 | return -1; 588 | } 589 | 590 | if (! pwrite_all(fd, buf, IO_SIZE, offset)) { 591 | close(fd); 592 | printf("ERROR: writing %s: %d '%s'\n", dev->name, errno, 593 | act_strerror(errno)); 594 | return -1; 595 | } 596 | 597 | uint64_t stop_ns = get_ns(); 598 | 599 | fd_put(dev, fd); 600 | 601 | return stop_ns; 602 | } 603 | -------------------------------------------------------------------------------- /src/index/cfg_index.c: -------------------------------------------------------------------------------- 1 | /* 2 | * cfg_index.c 3 | * 4 | * Copyright (c) 2018-2020 Aerospike, Inc. All rights reserved. 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in 14 | * all copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | 25 | //========================================================== 26 | // Includes. 27 | // 28 | 29 | #include "cfg_index.h" 30 | 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include 38 | 39 | #include "common/cfg.h" 40 | #include "common/hardware.h" 41 | #include "common/trace.h" 42 | 43 | 44 | //========================================================== 45 | // Typedefs & constants. 46 | // 47 | 48 | static const char TAG_DEVICE_NAMES[] = "device-names"; 49 | static const char TAG_FILE_SIZE_MBYTES[] = "file-size-mbytes"; 50 | static const char TAG_SERVICE_THREADS[] = "service-threads"; 51 | static const char TAG_CACHE_THREADS[] = "cache-threads"; 52 | static const char TAG_TEST_DURATION_SEC[] = "test-duration-sec"; 53 | static const char TAG_REPORT_INTERVAL_SEC[] = "report-interval-sec"; 54 | static const char TAG_MICROSECOND_HISTOGRAMS[] = "microsecond-histograms"; 55 | static const char TAG_READ_REQS_PER_SEC[] = "read-reqs-per-sec"; 56 | static const char TAG_WRITE_REQS_PER_SEC[] = "write-reqs-per-sec"; 57 | static const char TAG_REPLICATION_FACTOR[] = "replication-factor"; 58 | static const char TAG_DEFRAG_LWM_PCT[] = "defrag-lwm-pct"; 59 | static const char TAG_DISABLE_ODSYNC[] = "disable-odsync"; 60 | static const char TAG_MAX_LAG_SEC[] = "max-lag-sec"; 61 | 62 | 63 | //========================================================== 64 | // Forward declarations. 65 | // 66 | 67 | static bool check_configuration(); 68 | static bool derive_configuration(); 69 | static void echo_configuration(); 70 | 71 | 72 | //========================================================== 73 | // Globals. 74 | // 75 | 76 | // Configuration instance, showing non-zero defaults. 77 | index_cfg g_icfg = { 78 | .cache_threads = 8, 79 | .report_interval_us = 1000000, 80 | .replication_factor = 1, 81 | .defrag_lwm_pct = 50, 82 | .max_lag_usec = 1000000 * 10 83 | }; 84 | 85 | 86 | //========================================================== 87 | // Public API. 88 | // 89 | 90 | bool 91 | index_configure(int argc, char* argv[]) 92 | { 93 | if (argc != 2) { 94 | printf("usage: act_index [config filename]\n"); 95 | return false; 96 | } 97 | 98 | FILE* config_file = fopen(argv[1], "r"); 99 | 100 | if (config_file == NULL) { 101 | printf("ERROR: couldn't open config file %s errno %d '%s'\n", argv[1], 102 | errno, act_strerror(errno)); 103 | return false; 104 | } 105 | 106 | char line[1024]; 107 | 108 | while (fgets(line, sizeof(line), config_file) != NULL) { 109 | char* comment = strchr(line, '#'); 110 | 111 | if (comment != NULL) { 112 | *comment = '\0'; 113 | } 114 | 115 | const char* tag = strtok(line, ":" WHITE_SPACE); 116 | 117 | if (tag == NULL) { 118 | continue; 119 | } 120 | 121 | if (strcmp(tag, TAG_DEVICE_NAMES) == 0) { 122 | parse_device_names(MAX_NUM_INDEX_DEVICES, g_icfg.device_names, 123 | &g_icfg.num_devices); 124 | } 125 | else if (strcmp(tag, TAG_FILE_SIZE_MBYTES) == 0) { 126 | g_icfg.file_size = (uint64_t)parse_uint32() << 20; 127 | } 128 | else if (strcmp(tag, TAG_SERVICE_THREADS) == 0) { 129 | g_icfg.service_threads = parse_uint32(); 130 | } 131 | else if (strcmp(tag, TAG_CACHE_THREADS) == 0) { 132 | g_icfg.cache_threads = parse_uint32(); 133 | } 134 | else if (strcmp(tag, TAG_TEST_DURATION_SEC) == 0) { 135 | g_icfg.run_us = (uint64_t)parse_uint32() * 1000000; 136 | } 137 | else if (strcmp(tag, TAG_REPORT_INTERVAL_SEC) == 0) { 138 | g_icfg.report_interval_us = (uint64_t)parse_uint32() * 1000000; 139 | } 140 | else if (strcmp(tag, TAG_MICROSECOND_HISTOGRAMS) == 0) { 141 | g_icfg.us_histograms = parse_yes_no(); 142 | } 143 | else if (strcmp(tag, TAG_READ_REQS_PER_SEC) == 0) { 144 | g_icfg.read_reqs_per_sec = parse_uint32(); 145 | } 146 | else if (strcmp(tag, TAG_WRITE_REQS_PER_SEC) == 0) { 147 | g_icfg.write_reqs_per_sec = parse_uint32(); 148 | } 149 | else if (strcmp(tag, TAG_REPLICATION_FACTOR) == 0) { 150 | g_icfg.replication_factor = parse_uint32(); 151 | } 152 | else if (strcmp(tag, TAG_DEFRAG_LWM_PCT) == 0) { 153 | g_icfg.defrag_lwm_pct = parse_uint32(); 154 | } 155 | else if (strcmp(tag, TAG_DISABLE_ODSYNC) == 0) { 156 | g_icfg.disable_odsync = parse_yes_no(); 157 | } 158 | else if (strcmp(tag, TAG_MAX_LAG_SEC) == 0) { 159 | g_icfg.max_lag_usec = (uint64_t)parse_uint32() * 1000000; 160 | } 161 | else { 162 | printf("ERROR: ignoring unknown config item '%s'\n", tag); 163 | return false; 164 | } 165 | } 166 | 167 | fclose(config_file); 168 | 169 | if (! check_configuration() || ! derive_configuration()) { 170 | return false; 171 | } 172 | 173 | echo_configuration(); 174 | 175 | return true; 176 | } 177 | 178 | 179 | //========================================================== 180 | // Local helpers. 181 | // 182 | 183 | static bool 184 | check_configuration() 185 | { 186 | if (g_icfg.num_devices == 0) { 187 | configuration_error(TAG_DEVICE_NAMES); 188 | return false; 189 | } 190 | 191 | if (g_icfg.service_threads == 0 && 192 | (g_icfg.service_threads = 5 * num_cpus()) == 0) { 193 | configuration_error(TAG_SERVICE_THREADS); 194 | return false; 195 | } 196 | 197 | if (g_icfg.cache_threads == 0) { 198 | configuration_error(TAG_CACHE_THREADS); 199 | return false; 200 | } 201 | 202 | if (g_icfg.run_us == 0) { 203 | configuration_error(TAG_TEST_DURATION_SEC); 204 | return false; 205 | } 206 | 207 | if (g_icfg.report_interval_us == 0) { 208 | configuration_error(TAG_REPORT_INTERVAL_SEC); 209 | return false; 210 | } 211 | 212 | if (g_icfg.replication_factor == 0) { 213 | configuration_error(TAG_REPLICATION_FACTOR); 214 | return false; 215 | } 216 | 217 | if (g_icfg.defrag_lwm_pct >= 100) { 218 | configuration_error(TAG_DEFRAG_LWM_PCT); 219 | return false; 220 | } 221 | 222 | return true; 223 | } 224 | 225 | static bool 226 | derive_configuration() 227 | { 228 | if (g_icfg.read_reqs_per_sec + g_icfg.write_reqs_per_sec == 0) { 229 | printf("ERROR: %s and %s can't both be zero\n", TAG_READ_REQS_PER_SEC, 230 | TAG_WRITE_REQS_PER_SEC); 231 | return false; 232 | } 233 | 234 | // 'replication-factor' > 1 causes replica writes. 235 | uint32_t effective_write_reqs_per_sec = 236 | g_icfg.replication_factor * g_icfg.write_reqs_per_sec; 237 | 238 | // On the service threads, we'll have 1 4K device read per read request, and 239 | // 1 4K device read per write request (including replica writes). 240 | g_icfg.service_thread_reads_per_sec = 241 | g_icfg.read_reqs_per_sec + effective_write_reqs_per_sec; 242 | 243 | // Load must be enough to calculate service thread rates safely. 244 | if (g_icfg.service_thread_reads_per_sec / g_icfg.service_threads == 0) { 245 | printf("ERROR: load config too small\n"); 246 | return false; 247 | } 248 | 249 | // On the cache threads, we'll have extra 4K device reads per write request 250 | // due to defrag. We'll also have 1 4K device write per write request, plus 251 | // extras due to defrag. The total 4K device writes is equal to the extra 252 | // 4K device reads (really!), so just keep one number for both. 253 | double cache_thread_reads_and_writes_per_write = 254 | 100.0 / (double)(100 - g_icfg.defrag_lwm_pct); 255 | // For example: 256 | // defrag-lwm-pct = 50: r/w-per-write = 100/(100 - 50) = 2.0 (default) 257 | // defrag-lwm-pct = 60: r/w-per-write = 100/(100 - 60) = 2.5 258 | // defrag-lwm-pct = 40: r/w-per-write = 100/(100 - 40) = 1.666... 259 | 260 | g_icfg.cache_thread_reads_and_writes_per_sec = 261 | effective_write_reqs_per_sec * 262 | cache_thread_reads_and_writes_per_write; 263 | 264 | return true; 265 | } 266 | 267 | static void 268 | echo_configuration() 269 | { 270 | printf("ACT-INDEX CONFIGURATION\n"); 271 | 272 | printf("%s:", TAG_DEVICE_NAMES); 273 | 274 | for (uint32_t d = 0; d < g_icfg.num_devices; d++) { 275 | printf(" %s", g_icfg.device_names[d]); 276 | } 277 | 278 | printf("\nnum-devices: %" PRIu32 "\n", g_icfg.num_devices); 279 | 280 | if (g_icfg.file_size != 0) { // undocumented - don't always expose 281 | printf("%s: %" PRIu64 "\n", TAG_FILE_SIZE_MBYTES, 282 | g_icfg.file_size >> 20); 283 | } 284 | 285 | printf("%s: %" PRIu32 "\n", TAG_SERVICE_THREADS, 286 | g_icfg.service_threads); 287 | printf("%s: %" PRIu32 "\n", TAG_CACHE_THREADS, 288 | g_icfg.cache_threads); 289 | printf("%s: %" PRIu64 "\n", TAG_TEST_DURATION_SEC, 290 | g_icfg.run_us / 1000000); 291 | printf("%s: %" PRIu64 "\n", TAG_REPORT_INTERVAL_SEC, 292 | g_icfg.report_interval_us / 1000000); 293 | printf("%s: %s\n", TAG_MICROSECOND_HISTOGRAMS, 294 | g_icfg.us_histograms ? "yes" : "no"); 295 | printf("%s: %" PRIu32 "\n", TAG_READ_REQS_PER_SEC, 296 | g_icfg.read_reqs_per_sec); 297 | printf("%s: %" PRIu32 "\n", TAG_WRITE_REQS_PER_SEC, 298 | g_icfg.write_reqs_per_sec); 299 | printf("%s: %" PRIu32 "\n", TAG_REPLICATION_FACTOR, 300 | g_icfg.replication_factor); 301 | printf("%s: %" PRIu32 "\n", TAG_DEFRAG_LWM_PCT, 302 | g_icfg.defrag_lwm_pct); 303 | printf("%s: %s\n", TAG_DISABLE_ODSYNC, 304 | g_icfg.disable_odsync ? "yes" : "no"); 305 | printf("%s: %" PRIu64 "\n", TAG_MAX_LAG_SEC, 306 | g_icfg.max_lag_usec / 1000000); 307 | 308 | printf("\nDERIVED CONFIGURATION\n"); 309 | 310 | printf("service-thread-reads-per-sec: %" PRIu64 "\n", 311 | g_icfg.service_thread_reads_per_sec); 312 | printf("cache-thread-reads-and-writes-per-sec: %" PRIu64 "\n", 313 | g_icfg.cache_thread_reads_and_writes_per_sec); 314 | 315 | printf("\n"); 316 | } 317 | -------------------------------------------------------------------------------- /src/index/cfg_index.h: -------------------------------------------------------------------------------- 1 | /* 2 | * cgf_index.h 3 | * 4 | * Copyright (c) 2018-2020 Aerospike, Inc. All rights reserved. 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in 14 | * all copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | 25 | #pragma once 26 | 27 | //========================================================== 28 | // Includes. 29 | // 30 | 31 | #include 32 | #include 33 | 34 | #include "common/cfg.h" 35 | 36 | 37 | //========================================================== 38 | // Typedefs & constants. 39 | // 40 | 41 | #define MAX_NUM_INDEX_DEVICES 16 42 | 43 | typedef struct index_cfg_s { 44 | char device_names[MAX_NUM_INDEX_DEVICES][MAX_DEVICE_NAME_SIZE]; 45 | uint32_t num_devices; // derived by counting device names 46 | uint64_t file_size; // undocumented feature - use files 47 | uint32_t service_threads; 48 | uint32_t cache_threads; 49 | uint64_t run_us; // converted from literal units in seconds 50 | uint64_t report_interval_us; // converted from literal units in seconds 51 | bool us_histograms; 52 | uint32_t read_reqs_per_sec; 53 | uint32_t write_reqs_per_sec; 54 | uint32_t replication_factor; 55 | uint32_t defrag_lwm_pct; 56 | bool disable_odsync; 57 | uint64_t max_lag_usec; // converted from literal units in seconds 58 | 59 | // Derived from literal configuration: 60 | uint64_t service_thread_reads_per_sec; 61 | uint64_t cache_thread_reads_and_writes_per_sec; 62 | } index_cfg; 63 | 64 | 65 | //========================================================== 66 | // Globals. 67 | // 68 | 69 | extern index_cfg g_icfg; 70 | 71 | 72 | //========================================================== 73 | // Public API. 74 | // 75 | 76 | bool index_configure(int argc, char* argv[]); 77 | -------------------------------------------------------------------------------- /src/prep/act_prep.c: -------------------------------------------------------------------------------- 1 | /* 2 | * act_prep.c 3 | * 4 | * Copyright (c) 2011-2020 Aerospike, Inc. All rights reserved. 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in 14 | * all copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | 25 | //========================================================== 26 | // Includes. 27 | // 28 | 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include 39 | #include 40 | #include 41 | #include 42 | #include 43 | 44 | #include "common/hardware.h" 45 | #include "common/io.h" 46 | #include "common/random.h" 47 | #include "common/trace.h" 48 | 49 | 50 | //========================================================== 51 | // Typedefs & constants. 52 | // 53 | 54 | #define NUM_SALT_THREADS 8 55 | #define NUM_ZERO_THREADS 8 56 | #define LARGE_BLOCK_BYTES (1024 * 128) 57 | 58 | 59 | //========================================================== 60 | // Forward declarations. 61 | // 62 | 63 | static void* run_salt(void* pv_n); 64 | static void* run_zero(void* pv_n); 65 | 66 | static uint8_t* act_valloc(size_t size); 67 | static bool create_zero_buffer(); 68 | static bool discover_num_blocks(); 69 | 70 | 71 | //========================================================== 72 | // Globals. 73 | // 74 | 75 | static char* g_device_name = NULL; 76 | static uint64_t g_num_large_blocks = 0; 77 | static uint8_t* g_p_zero_buffer = NULL; 78 | static uint64_t g_blocks_per_salt_thread = 0; 79 | static uint64_t g_blocks_per_zero_thread = 0; 80 | static uint64_t g_extra_blocks_to_zero = 0; 81 | static uint64_t g_extra_blocks_to_salt = 0; 82 | static uint64_t g_extra_bytes_to_zero = 0; 83 | 84 | 85 | //========================================================== 86 | // Inlines & macros. 87 | // 88 | 89 | static inline int 90 | fd_get() 91 | { 92 | // Note - not bothering to set O_DSYNC. Rigor is unnecessary for salting, 93 | // and we're not trying to measure performance here - just go fast. 94 | return open(g_device_name, O_DIRECT | O_RDWR, S_IRUSR | S_IWUSR); 95 | } 96 | 97 | 98 | //========================================================== 99 | // Main. 100 | // 101 | 102 | int 103 | main(int argc, char* argv[]) 104 | { 105 | signal_setup(); 106 | 107 | if (argc != 2) { 108 | printf("usage: act_prep [device name]\n"); 109 | exit(0); 110 | } 111 | 112 | char device_name[strlen(argv[1]) + 1]; 113 | 114 | strcpy(device_name, argv[1]); 115 | g_device_name = device_name; 116 | 117 | if (! discover_num_blocks()) { 118 | exit(-1); 119 | } 120 | 121 | //------------------------ 122 | // Begin zeroing. 123 | 124 | printf("cleaning device %s\n", g_device_name); 125 | 126 | if (! create_zero_buffer()) { 127 | exit(-1); 128 | } 129 | 130 | pthread_t zero_threads[NUM_ZERO_THREADS]; 131 | 132 | for (uint32_t n = 0; n < NUM_ZERO_THREADS; n++) { 133 | if (pthread_create(&zero_threads[n], NULL, run_zero, 134 | (void*)(uint64_t)n) != 0) { 135 | printf("ERROR: creating zero thread %" PRIu32 "\n", n); 136 | exit(-1); 137 | } 138 | } 139 | 140 | for (uint32_t n = 0; n < NUM_ZERO_THREADS; n++) { 141 | pthread_join(zero_threads[n], NULL); 142 | } 143 | 144 | free(g_p_zero_buffer); 145 | 146 | //------------------------ 147 | // Begin salting. 148 | 149 | printf("salting device %s\n", g_device_name); 150 | 151 | rand_seed(); 152 | 153 | pthread_t salt_threads[NUM_SALT_THREADS]; 154 | 155 | for (uint32_t n = 0; n < NUM_SALT_THREADS; n++) { 156 | if (pthread_create(&salt_threads[n], NULL, run_salt, 157 | (void*)(uint64_t)n) != 0) { 158 | printf("ERROR: creating salt thread %" PRIu32 "\n", n); 159 | exit(-1); 160 | } 161 | } 162 | 163 | for (uint32_t n = 0; n < NUM_SALT_THREADS; n++) { 164 | pthread_join(salt_threads[n], NULL); 165 | } 166 | 167 | return 0; 168 | } 169 | 170 | 171 | //========================================================== 172 | // Local helpers - thread "run" functions. 173 | // 174 | 175 | //------------------------------------------------ 176 | // Runs in all (NUM_SALT_THREADS) salt_threads, 177 | // salts a portion of the device. 178 | // 179 | static void* 180 | run_salt(void* pv_n) 181 | { 182 | rand_seed_thread(); 183 | 184 | uint32_t n = (uint32_t)(uint64_t)pv_n; 185 | 186 | uint64_t offset = n * g_blocks_per_salt_thread * LARGE_BLOCK_BYTES; 187 | uint64_t blocks_to_salt = g_blocks_per_salt_thread; 188 | uint64_t progress_blocks = 0; 189 | bool last_thread = n + 1 == NUM_SALT_THREADS; 190 | 191 | if (last_thread) { 192 | blocks_to_salt += g_extra_blocks_to_salt; 193 | progress_blocks = blocks_to_salt / 100; 194 | 195 | if (! progress_blocks) { 196 | progress_blocks = 1; 197 | } 198 | } 199 | 200 | // printf("thread %d: blks-to-salt = %" PRIu64 ", prg-blks = %" PRIu64 "\n", n, 201 | // blocks_to_salt, progress_blocks); 202 | 203 | uint8_t* buf = act_valloc(LARGE_BLOCK_BYTES); 204 | 205 | if (! buf) { 206 | printf("ERROR: valloc in salt thread %" PRIu32 "\n", n); 207 | return NULL; 208 | } 209 | 210 | int fd = fd_get(); 211 | 212 | if (fd == -1) { 213 | printf("ERROR: open in salt thread %" PRIu32 "\n", n); 214 | free(buf); 215 | return NULL; 216 | } 217 | 218 | if (lseek(fd, offset, SEEK_SET) != offset) { 219 | printf("ERROR: seek in salt thread %" PRIu32 "\n", n); 220 | close(fd); 221 | free(buf); 222 | return NULL; 223 | } 224 | 225 | for (uint64_t b = 0; b < blocks_to_salt; b++) { 226 | rand_fill(buf, LARGE_BLOCK_BYTES, 100); 227 | 228 | if (! write_all(fd, buf, LARGE_BLOCK_BYTES)) { 229 | printf("ERROR: write in salt thread %" PRIu32 "\n", n); 230 | break; 231 | } 232 | 233 | if (progress_blocks && ! (b % progress_blocks)) { 234 | printf("."); 235 | fflush(stdout); 236 | } 237 | } 238 | 239 | if (progress_blocks) { 240 | printf("\n"); 241 | } 242 | 243 | close(fd); 244 | free(buf); 245 | 246 | return NULL; 247 | } 248 | 249 | //------------------------------------------------ 250 | // Runs in all (NUM_ZERO_THREADS) zero_threads, 251 | // zeros a portion of the device. 252 | // 253 | static void* 254 | run_zero(void* pv_n) 255 | { 256 | uint32_t n = (uint32_t)(uint64_t)pv_n; 257 | 258 | uint64_t offset = n * g_blocks_per_zero_thread * LARGE_BLOCK_BYTES; 259 | uint64_t blocks_to_zero = g_blocks_per_zero_thread; 260 | uint64_t progress_blocks = 0; 261 | bool last_thread = n + 1 == NUM_ZERO_THREADS; 262 | 263 | if (last_thread) { 264 | blocks_to_zero += g_extra_blocks_to_zero; 265 | progress_blocks = blocks_to_zero / 100; 266 | 267 | if (! progress_blocks) { 268 | progress_blocks = 1; 269 | } 270 | } 271 | 272 | // printf("thread %d: blks-to-zero = %" PRIu64 ", prg-blks = %" PRIu64 "\n", n, 273 | // blocks_to_zero, progress_blocks); 274 | 275 | int fd = fd_get(); 276 | 277 | if (fd == -1) { 278 | printf("ERROR: open in zero thread %" PRIu32 "\n", n); 279 | return NULL; 280 | } 281 | 282 | if (lseek(fd, offset, SEEK_SET) != offset) { 283 | printf("ERROR: seek in zero thread %" PRIu32 "\n", n); 284 | close(fd); 285 | return NULL; 286 | } 287 | 288 | for (uint64_t b = 0; b < blocks_to_zero; b++) { 289 | if (! write_all(fd, g_p_zero_buffer, LARGE_BLOCK_BYTES)) { 290 | printf("ERROR: write in zero thread %" PRIu32 "\n", n); 291 | break; 292 | } 293 | 294 | if (progress_blocks && ! (b % progress_blocks)) { 295 | printf("."); 296 | fflush(stdout); 297 | } 298 | } 299 | 300 | if (progress_blocks) { 301 | printf("\n"); 302 | } 303 | 304 | if (last_thread) { 305 | if (! write_all(fd, g_p_zero_buffer, g_extra_bytes_to_zero)) { 306 | printf("ERROR: write in zero thread %" PRIu32 "\n", n); 307 | } 308 | } 309 | 310 | close(fd); 311 | 312 | return NULL; 313 | } 314 | 315 | 316 | //========================================================== 317 | // Local helpers - generic. 318 | // 319 | 320 | //------------------------------------------------ 321 | // Aligned memory allocation. 322 | // 323 | static uint8_t* 324 | act_valloc(size_t size) 325 | { 326 | void* pv; 327 | 328 | return posix_memalign(&pv, 4096, size) == 0 ? (uint8_t*)pv : NULL; 329 | } 330 | 331 | //------------------------------------------------ 332 | // Allocate and zero one large block sized buffer. 333 | // 334 | static bool 335 | create_zero_buffer() 336 | { 337 | g_p_zero_buffer = act_valloc(LARGE_BLOCK_BYTES); 338 | 339 | if (! g_p_zero_buffer) { 340 | printf("ERROR: zero buffer act_valloc()\n"); 341 | return false; 342 | } 343 | 344 | memset(g_p_zero_buffer, 0, LARGE_BLOCK_BYTES); 345 | 346 | return true; 347 | } 348 | 349 | //------------------------------------------------ 350 | // Discover device storage capacity. 351 | // 352 | static bool 353 | discover_num_blocks() 354 | { 355 | int fd = fd_get(); 356 | 357 | if (fd == -1) { 358 | printf("ERROR: opening device %s\n", g_device_name); 359 | return false; 360 | } 361 | 362 | uint64_t device_bytes = 0; 363 | 364 | ioctl(fd, BLKGETSIZE64, &device_bytes); 365 | close(fd); 366 | 367 | g_num_large_blocks = device_bytes / LARGE_BLOCK_BYTES; 368 | g_extra_bytes_to_zero = device_bytes % LARGE_BLOCK_BYTES; 369 | 370 | g_blocks_per_zero_thread = g_num_large_blocks / NUM_ZERO_THREADS; 371 | g_blocks_per_salt_thread = g_num_large_blocks / NUM_SALT_THREADS; 372 | 373 | g_extra_blocks_to_zero = g_num_large_blocks % NUM_ZERO_THREADS; 374 | g_extra_blocks_to_salt = g_num_large_blocks % NUM_SALT_THREADS; 375 | 376 | printf("%s size = %" PRIu64 " bytes, %" PRIu64 " large blocks\n", 377 | g_device_name, device_bytes, g_num_large_blocks); 378 | 379 | return true; 380 | } 381 | -------------------------------------------------------------------------------- /src/storage/act_storage.c: -------------------------------------------------------------------------------- 1 | /* 2 | * act_storage.c 3 | * 4 | * Aerospike Certifiction Tool - Simulates and validates SSDs for real-time 5 | * database use. 6 | * 7 | * Joey Shurtleff & Andrew Gooding, 2011. 8 | * 9 | * Copyright (c) 2011-2020 Aerospike, Inc. All rights reserved. 10 | * 11 | * Permission is hereby granted, free of charge, to any person obtaining a copy 12 | * of this software and associated documentation files (the "Software"), to deal 13 | * in the Software without restriction, including without limitation the rights 14 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 15 | * copies of the Software, and to permit persons to whom the Software is 16 | * furnished to do so, subject to the following conditions: 17 | * 18 | * The above copyright notice and this permission notice shall be included in 19 | * all copies or substantial portions of the Software. 20 | * 21 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 22 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 23 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 24 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 25 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 26 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 27 | * SOFTWARE. 28 | */ 29 | 30 | //========================================================== 31 | // Includes. 32 | // 33 | 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include 39 | #include 40 | #include 41 | #include 42 | #include 43 | #include 44 | #include 45 | #include 46 | #include 47 | #include 48 | #include 49 | 50 | #include "common/cfg.h" 51 | #include "common/clock.h" 52 | #include "common/hardware.h" 53 | #include "common/histogram.h" 54 | #include "common/io.h" 55 | #include "common/queue.h" 56 | #include "common/random.h" 57 | #include "common/trace.h" 58 | #include "common/version.h" 59 | 60 | #include "cfg_storage.h" 61 | 62 | 63 | //========================================================== 64 | // Typedefs & constants. 65 | // 66 | 67 | typedef struct device_s { 68 | const char* name; 69 | uint64_t n_large_blocks; 70 | uint64_t n_read_offsets; 71 | uint64_t n_write_offsets; 72 | uint32_t min_op_bytes; 73 | uint32_t min_commit_bytes; 74 | uint32_t read_bytes; 75 | uint32_t write_bytes; 76 | uint32_t n_read_sizes; 77 | uint32_t n_write_sizes; 78 | queue* fd_q; 79 | pthread_t large_block_read_thread; 80 | pthread_t large_block_write_thread; 81 | pthread_t tomb_raider_thread; 82 | histogram* read_hist; 83 | histogram* write_hist; 84 | char read_hist_tag[MAX_DEVICE_NAME_SIZE + 1 + 5]; 85 | char write_hist_tag[MAX_DEVICE_NAME_SIZE + 1 + 6]; 86 | } device; 87 | 88 | typedef struct trans_req_s { 89 | device* dev; 90 | uint64_t offset; 91 | uint32_t size; 92 | } trans_req; 93 | 94 | #define SPLIT_RESOLUTION (1024 * 1024) 95 | 96 | #define LO_IO_MIN_SIZE 512 97 | #define HI_IO_MIN_SIZE 4096 98 | 99 | 100 | //========================================================== 101 | // Forward declarations. 102 | // 103 | 104 | static void* run_service(void* pv_unused); 105 | static void* run_large_block_reads(void* pv_dev); 106 | static void* run_large_block_writes(void* pv_dev); 107 | static void* run_tomb_raider(void* pv_dev); 108 | 109 | static uint8_t* act_valloc(size_t size); 110 | static bool discover_device(device* dev); 111 | static uint64_t discover_min_op_bytes(int fd, const char* name); 112 | static void discover_read_pattern(device* dev); 113 | static void discover_write_pattern(device* dev); 114 | static void fd_close_all(device* dev); 115 | static int fd_get(device* dev); 116 | static void fd_put(device* dev, int fd); 117 | static void read_and_report(trans_req* read_req, uint8_t* buf); 118 | static void read_and_report_large_block(device* dev, uint8_t* buf); 119 | static uint64_t read_from_device(device* dev, uint64_t offset, uint32_t size, 120 | uint8_t* buf); 121 | static void write_and_report(trans_req* write_req, uint8_t* buf); 122 | static void write_and_report_large_block(device* dev, uint8_t* buf, 123 | uint64_t count); 124 | static uint64_t write_to_device(device* dev, uint64_t offset, uint32_t size, 125 | const uint8_t* buf); 126 | 127 | 128 | //========================================================== 129 | // Globals. 130 | // 131 | 132 | static device* g_devices; 133 | 134 | static volatile bool g_running; 135 | static uint64_t g_run_start_us; 136 | 137 | static histogram* g_large_block_read_hist; 138 | static histogram* g_large_block_write_hist; 139 | static histogram* g_read_hist; 140 | static histogram* g_write_hist; 141 | 142 | 143 | //========================================================== 144 | // Inlines & macros. 145 | // 146 | 147 | static inline uint8_t* 148 | align_4096(const uint8_t* stack_buffer) 149 | { 150 | return (uint8_t*)(((uint64_t)stack_buffer + 4095) & ~4095ULL); 151 | } 152 | 153 | static inline uint64_t 154 | random_large_block_offset(const device* dev) 155 | { 156 | return (rand_64() % dev->n_large_blocks) * g_scfg.large_block_ops_bytes; 157 | } 158 | 159 | static inline uint64_t 160 | random_read_offset(const device* dev) 161 | { 162 | return (rand_64() % dev->n_read_offsets) * dev->min_op_bytes; 163 | } 164 | 165 | static inline uint32_t 166 | random_read_size(const device* dev) 167 | { 168 | if (dev->n_read_sizes == 1) { 169 | return dev->read_bytes; 170 | } 171 | 172 | return dev->read_bytes + 173 | (dev->min_op_bytes * (rand_32() % dev->n_read_sizes)); 174 | } 175 | 176 | static inline uint64_t 177 | random_write_offset(const device* dev) 178 | { 179 | return (rand_64() % dev->n_write_offsets) * dev->min_commit_bytes; 180 | } 181 | 182 | static inline uint32_t 183 | random_write_size(const device* dev) 184 | { 185 | if (dev->n_write_sizes == 1) { 186 | return dev->write_bytes; 187 | } 188 | 189 | return dev->write_bytes + 190 | (dev->min_commit_bytes * (rand_32() % dev->n_write_sizes)); 191 | } 192 | 193 | static inline uint64_t 194 | safe_delta_ns(uint64_t start_ns, uint64_t stop_ns) 195 | { 196 | return start_ns > stop_ns ? 0 : stop_ns - start_ns; 197 | } 198 | 199 | 200 | //========================================================== 201 | // Main. 202 | // 203 | 204 | int 205 | main(int argc, char* argv[]) 206 | { 207 | signal_setup(); 208 | 209 | printf("\nACT version %s\n", VERSION); 210 | printf("Storage device IO test\n"); 211 | printf("Copyright 2020 by Aerospike. All rights reserved.\n\n"); 212 | 213 | if (! storage_configure(argc, argv)) { 214 | exit(-1); 215 | } 216 | 217 | device devices[g_scfg.num_devices]; 218 | 219 | g_devices = devices; 220 | 221 | histogram_scale scale = 222 | g_scfg.us_histograms ? HIST_MICROSECONDS : HIST_MILLISECONDS; 223 | 224 | if (! (g_large_block_read_hist = histogram_create(scale)) || 225 | ! (g_large_block_write_hist = histogram_create(scale)) || 226 | ! (g_read_hist = histogram_create(scale)) || 227 | ! (g_write_hist = histogram_create(scale))) { 228 | exit(-1); 229 | } 230 | 231 | for (uint32_t n = 0; n < g_scfg.num_devices; n++) { 232 | device* dev = &g_devices[n]; 233 | 234 | dev->name = (const char*)g_scfg.device_names[n]; 235 | 236 | if (! (dev->fd_q = queue_create(sizeof(int))) || 237 | ! discover_device(dev) || 238 | ! (dev->read_hist = histogram_create(scale)) || 239 | ! (dev->write_hist = histogram_create(scale))) { 240 | exit(-1); 241 | } 242 | 243 | sprintf(dev->read_hist_tag, "%s-reads", dev->name); 244 | sprintf(dev->write_hist_tag, "%s-writes", dev->name); 245 | } 246 | 247 | rand_seed(); 248 | 249 | g_run_start_us = get_us(); 250 | 251 | uint64_t run_stop_us = g_run_start_us + g_scfg.run_us; 252 | 253 | g_running = true; 254 | 255 | if (g_scfg.write_reqs_per_sec != 0) { 256 | for (uint32_t n = 0; n < g_scfg.num_devices; n++) { 257 | device* dev = &g_devices[n]; 258 | 259 | if (! g_scfg.no_defrag_reads && 260 | pthread_create(&dev->large_block_read_thread, NULL, 261 | run_large_block_reads, (void*)dev) != 0) { 262 | printf("ERROR: create large op read thread\n"); 263 | exit(-1); 264 | } 265 | 266 | if (pthread_create(&dev->large_block_write_thread, NULL, 267 | run_large_block_writes, (void*)dev) != 0) { 268 | printf("ERROR: create large op write thread\n"); 269 | exit(-1); 270 | } 271 | } 272 | } 273 | 274 | if (g_scfg.tomb_raider) { 275 | for (uint32_t n = 0; n < g_scfg.num_devices; n++) { 276 | device* dev = &g_devices[n]; 277 | 278 | if (pthread_create(&dev->tomb_raider_thread, NULL, 279 | run_tomb_raider, (void*)dev) != 0) { 280 | printf("ERROR: create tomb raider thread\n"); 281 | exit(-1); 282 | } 283 | } 284 | } 285 | 286 | // Yes, it's ok to run with only large-block operations. 287 | bool do_transactions = 288 | g_scfg.internal_read_reqs_per_sec + 289 | g_scfg.internal_write_reqs_per_sec != 0; 290 | 291 | pthread_t svc_tids[g_scfg.service_threads]; 292 | 293 | if (do_transactions) { 294 | for (uint32_t k = 0; k < g_scfg.service_threads; k++) { 295 | if (pthread_create(&svc_tids[k], NULL, run_service, NULL) != 0) { 296 | printf("ERROR: create service thread\n"); 297 | exit(-1); 298 | } 299 | } 300 | } 301 | 302 | // Equivalent: g_scfg.internal_read_reqs_per_sec != 0. 303 | bool do_reads = g_scfg.read_reqs_per_sec != 0; 304 | 305 | // Equivalent: g_scfg.internal_write_reqs_per_sec != 0. 306 | bool do_commits = g_scfg.commit_to_device && g_scfg.write_reqs_per_sec != 0; 307 | 308 | printf("\nHISTOGRAM NAMES\n"); 309 | 310 | if (do_reads) { 311 | printf("reads\n"); 312 | 313 | for (uint32_t d = 0; d < g_scfg.num_devices; d++) { 314 | printf("%s\n", g_devices[d].read_hist_tag); 315 | } 316 | } 317 | 318 | if (g_scfg.write_reqs_per_sec != 0) { 319 | printf("large-block-reads\n"); 320 | printf("large-block-writes\n"); 321 | } 322 | 323 | if (do_commits) { 324 | printf("writes\n"); 325 | 326 | for (uint32_t d = 0; d < g_scfg.num_devices; d++) { 327 | printf("%s\n", g_devices[d].write_hist_tag); 328 | } 329 | } 330 | 331 | printf("\n"); 332 | 333 | uint64_t now_us = 0; 334 | uint64_t count = 0; 335 | 336 | while (g_running && (now_us = get_us()) < run_stop_us) { 337 | count++; 338 | 339 | int64_t sleep_us = (int64_t) 340 | ((count * g_scfg.report_interval_us) - 341 | (now_us - g_run_start_us)); 342 | 343 | if (sleep_us > 0) { 344 | usleep((uint32_t)sleep_us); 345 | } 346 | 347 | printf("after %" PRIu64 " sec:\n", 348 | (count * g_scfg.report_interval_us) / 1000000); 349 | 350 | if (do_reads) { 351 | histogram_dump(g_read_hist, "reads"); 352 | 353 | for (uint32_t d = 0; d < g_scfg.num_devices; d++) { 354 | device* dev = &g_devices[d]; 355 | 356 | histogram_dump(dev->read_hist, dev->read_hist_tag); 357 | } 358 | } 359 | 360 | if (g_scfg.write_reqs_per_sec != 0) { 361 | histogram_dump(g_large_block_read_hist, "large-block-reads"); 362 | histogram_dump(g_large_block_write_hist, "large-block-writes"); 363 | } 364 | 365 | if (do_commits) { 366 | histogram_dump(g_write_hist, "writes"); 367 | 368 | for (uint32_t d = 0; d < g_scfg.num_devices; d++) { 369 | device* dev = &g_devices[d]; 370 | 371 | histogram_dump(dev->write_hist, dev->write_hist_tag); 372 | } 373 | } 374 | 375 | printf("\n"); 376 | fflush(stdout); 377 | } 378 | 379 | g_running = false; 380 | 381 | if (do_transactions) { 382 | for (uint32_t k = 0; k < g_scfg.service_threads; k++) { 383 | pthread_join(svc_tids[k], NULL); 384 | } 385 | } 386 | 387 | for (uint32_t d = 0; d < g_scfg.num_devices; d++) { 388 | device* dev = &g_devices[d]; 389 | 390 | if (g_scfg.tomb_raider) { 391 | pthread_join(dev->tomb_raider_thread, NULL); 392 | } 393 | 394 | if (g_scfg.write_reqs_per_sec != 0) { 395 | pthread_join(dev->large_block_read_thread, NULL); 396 | pthread_join(dev->large_block_write_thread, NULL); 397 | } 398 | 399 | fd_close_all(dev); 400 | queue_destroy(dev->fd_q); 401 | free(dev->read_hist); 402 | free(dev->write_hist); 403 | } 404 | 405 | free(g_large_block_read_hist); 406 | free(g_large_block_write_hist); 407 | free(g_read_hist); 408 | free(g_write_hist); 409 | 410 | return 0; 411 | } 412 | 413 | 414 | //========================================================== 415 | // Local helpers - thread "run" functions. 416 | // 417 | 418 | //------------------------------------------------ 419 | // Service threads - generate and do reads, and if 420 | // commit-to-device, writes. 421 | // 422 | static void* 423 | run_service(void* pv_unused) 424 | { 425 | rand_seed_thread(); 426 | 427 | uint64_t count = 0; 428 | 429 | uint32_t total_reqs_per_sec = 430 | g_scfg.internal_read_reqs_per_sec + 431 | g_scfg.internal_write_reqs_per_sec; 432 | 433 | uint32_t reqs_per_sec = total_reqs_per_sec / g_scfg.service_threads; 434 | 435 | uint64_t read_split = (uint64_t)SPLIT_RESOLUTION * 436 | g_scfg.internal_read_reqs_per_sec / total_reqs_per_sec; 437 | 438 | while (g_running) { 439 | uint32_t random_dev_index = rand_32() % g_scfg.num_devices; 440 | device* random_dev = &g_devices[random_dev_index]; 441 | 442 | if (read_split > rand_64() % SPLIT_RESOLUTION) { 443 | trans_req read_req = { 444 | .dev = random_dev, 445 | .offset = random_read_offset(random_dev), 446 | .size = random_read_size(random_dev) 447 | }; 448 | 449 | uint8_t stack_buffer[read_req.size + 4096]; 450 | uint8_t* buf = align_4096(stack_buffer); 451 | 452 | read_and_report(&read_req, buf); 453 | } 454 | else { 455 | trans_req write_req = { 456 | .dev = random_dev, 457 | .offset = random_write_offset(random_dev), 458 | .size = random_write_size(random_dev) 459 | }; 460 | 461 | uint8_t stack_buffer[write_req.size + 4096]; 462 | uint8_t* buf = align_4096(stack_buffer); 463 | 464 | write_and_report(&write_req, buf); 465 | } 466 | 467 | count++; 468 | 469 | int64_t sleep_us = (int64_t) 470 | (((count * 1000000) / reqs_per_sec) - 471 | (get_us() - g_run_start_us)); 472 | 473 | if (sleep_us > 0) { 474 | usleep((uint32_t)sleep_us); 475 | } 476 | else if (g_scfg.max_lag_usec != 0 && 477 | sleep_us < -(int64_t)g_scfg.max_lag_usec) { 478 | printf("ERROR: service thread can't keep up\n"); 479 | printf("ACT can't do requested load - test stopped\n"); 480 | printf("try configuring more 'service-threads'\n"); 481 | g_running = false; 482 | } 483 | } 484 | 485 | return NULL; 486 | } 487 | 488 | //------------------------------------------------ 489 | // Runs in every device large-block read thread, 490 | // executes large-block reads at a constant rate. 491 | // 492 | static void* 493 | run_large_block_reads(void* pv_dev) 494 | { 495 | rand_seed_thread(); 496 | 497 | device* dev = (device*)pv_dev; 498 | 499 | uint8_t* buf = act_valloc(g_scfg.large_block_ops_bytes); 500 | 501 | if (buf == NULL) { 502 | printf("ERROR: large block read buffer act_valloc()\n"); 503 | g_running = false; 504 | return NULL; 505 | } 506 | 507 | uint64_t count = 0; 508 | 509 | while (g_running) { 510 | read_and_report_large_block(dev, buf); 511 | 512 | count++; 513 | 514 | uint64_t target_us = (uint64_t) 515 | ((double)(count * 1000000 * g_scfg.num_devices) / 516 | g_scfg.large_block_reads_per_sec); 517 | 518 | int64_t sleep_us = (int64_t)(target_us - (get_us() - g_run_start_us)); 519 | 520 | if (sleep_us > 0) { 521 | usleep((uint32_t)sleep_us); 522 | } 523 | else if (g_scfg.max_lag_usec != 0 && 524 | sleep_us < -(int64_t)g_scfg.max_lag_usec) { 525 | printf("ERROR: large block reads can't keep up\n"); 526 | printf("drive(s) can't keep up - test stopped\n"); 527 | g_running = false; 528 | } 529 | } 530 | 531 | free(buf); 532 | 533 | return NULL; 534 | } 535 | 536 | //------------------------------------------------ 537 | // Runs in every device large-block write thread, 538 | // executes large-block writes at a constant rate. 539 | // 540 | static void* 541 | run_large_block_writes(void* pv_dev) 542 | { 543 | rand_seed_thread(); 544 | 545 | device* dev = (device*)pv_dev; 546 | 547 | uint8_t* buf = act_valloc(g_scfg.large_block_ops_bytes); 548 | 549 | if (buf == NULL) { 550 | printf("ERROR: large block write buffer act_valloc()\n"); 551 | g_running = false; 552 | return NULL; 553 | } 554 | 555 | uint64_t count = 0; 556 | 557 | while (g_running) { 558 | write_and_report_large_block(dev, buf, count); 559 | 560 | count++; 561 | 562 | uint64_t target_us = (uint64_t) 563 | ((double)(count * 1000000 * g_scfg.num_devices) / 564 | g_scfg.large_block_writes_per_sec); 565 | 566 | int64_t sleep_us = (int64_t)(target_us - (get_us() - g_run_start_us)); 567 | 568 | if (sleep_us > 0) { 569 | usleep((uint32_t)sleep_us); 570 | } 571 | else if (g_scfg.max_lag_usec != 0 && 572 | sleep_us < -(int64_t)g_scfg.max_lag_usec) { 573 | printf("ERROR: large block writes can't keep up\n"); 574 | printf("drive(s) can't keep up - test stopped\n"); 575 | g_running = false; 576 | } 577 | } 578 | 579 | free(buf); 580 | 581 | return NULL; 582 | } 583 | 584 | //------------------------------------------------ 585 | // Runs in every device tomb raider thread, 586 | // executes continuous large-block reads. 587 | // 588 | static void* 589 | run_tomb_raider(void* pv_dev) 590 | { 591 | device* dev = (device*)pv_dev; 592 | 593 | uint8_t* buf = act_valloc(g_scfg.large_block_ops_bytes); 594 | 595 | if (buf == NULL) { 596 | printf("ERROR: tomb raider buffer act_valloc()\n"); 597 | g_running = false; 598 | return NULL; 599 | } 600 | 601 | uint64_t offset = 0; 602 | uint64_t end = dev->n_large_blocks * g_scfg.large_block_ops_bytes; 603 | 604 | while (g_running) { 605 | if (g_scfg.tomb_raider_sleep_us != 0) { 606 | usleep(g_scfg.tomb_raider_sleep_us); 607 | } 608 | 609 | read_from_device(dev, offset, g_scfg.large_block_ops_bytes, buf); 610 | 611 | offset += g_scfg.large_block_ops_bytes; 612 | 613 | if (offset == end) { 614 | offset = 0; 615 | } 616 | } 617 | 618 | free(buf); 619 | 620 | return NULL; 621 | } 622 | 623 | 624 | //========================================================== 625 | // Local helpers - generic. 626 | // 627 | 628 | //------------------------------------------------ 629 | // Aligned memory allocation. 630 | // 631 | static uint8_t* 632 | act_valloc(size_t size) 633 | { 634 | void* pv; 635 | 636 | return posix_memalign(&pv, 4096, size) == 0 ? (uint8_t*)pv : 0; 637 | } 638 | 639 | //------------------------------------------------ 640 | // Discover device storage capacity, etc. 641 | // 642 | static bool 643 | discover_device(device* dev) 644 | { 645 | int fd = fd_get(dev); 646 | 647 | if (fd == -1) { 648 | return false; 649 | } 650 | 651 | uint64_t device_bytes = 0; 652 | 653 | if (g_scfg.file_size == 0) { 654 | ioctl(fd, BLKGETSIZE64, &device_bytes); 655 | } 656 | else { // undocumented file mode 657 | device_bytes = g_scfg.file_size; 658 | 659 | if (ftruncate(fd, (off_t)device_bytes) != 0) { 660 | printf("ERROR: ftruncate file %s errno %d '%s'\n", dev->name, errno, 661 | act_strerror(errno)); 662 | fd_put(dev, fd); 663 | return false; 664 | } 665 | } 666 | 667 | dev->n_large_blocks = device_bytes / g_scfg.large_block_ops_bytes; 668 | dev->min_op_bytes = discover_min_op_bytes(fd, dev->name); 669 | fd_put(dev, fd); 670 | 671 | if (dev->n_large_blocks == 0) { 672 | printf("ERROR: %s ioctl to discover size\n", dev->name); 673 | return false; 674 | } 675 | 676 | if (dev->min_op_bytes == 0) { 677 | return false; 678 | } 679 | 680 | printf("%s size = %" PRIu64 " bytes, %" PRIu64 " large blocks, " 681 | "minimum IO size = %" PRIu32 " bytes\n", 682 | dev->name, device_bytes, dev->n_large_blocks, 683 | dev->min_op_bytes); 684 | 685 | discover_read_pattern(dev); 686 | 687 | if (g_scfg.commit_to_device) { 688 | discover_write_pattern(dev); 689 | } 690 | // else - write load is all accounted for with large-block writes. 691 | 692 | return true; 693 | } 694 | 695 | //------------------------------------------------ 696 | // Discover device's minimum direct IO op size. 697 | // 698 | static uint64_t 699 | discover_min_op_bytes(int fd, const char* name) 700 | { 701 | uint8_t* buf = act_valloc(HI_IO_MIN_SIZE); 702 | 703 | if (buf == NULL) { 704 | printf("ERROR: IO min size buffer act_valloc()\n"); 705 | return 0; 706 | } 707 | 708 | size_t read_sz = LO_IO_MIN_SIZE; 709 | 710 | while (read_sz <= HI_IO_MIN_SIZE) { 711 | if (pread_all(fd, (void*)buf, read_sz, 0)) { 712 | free(buf); 713 | return read_sz; 714 | } 715 | 716 | read_sz <<= 1; // LO_IO_MIN_SIZE and HI_IO_MIN_SIZE are powers of 2 717 | } 718 | 719 | printf("ERROR: %s read failed at all sizes from %u to %u bytes\n", name, 720 | LO_IO_MIN_SIZE, HI_IO_MIN_SIZE); 721 | 722 | free(buf); 723 | 724 | return 0; 725 | } 726 | 727 | //------------------------------------------------ 728 | // Discover device's read request pattern. 729 | // 730 | static void 731 | discover_read_pattern(device* dev) 732 | { 733 | // Total number of "min-op"-sized blocks on the device. (Excluding 734 | // fractional large block at end of device, if such.) 735 | uint64_t n_min_op_blocks = 736 | (dev->n_large_blocks * g_scfg.large_block_ops_bytes) / 737 | dev->min_op_bytes; 738 | 739 | // Number of "min-op"-sized blocks per (smallest) read request. 740 | uint32_t read_req_min_op_blocks = 741 | (g_scfg.record_stored_bytes + dev->min_op_bytes - 1) / 742 | dev->min_op_bytes; 743 | 744 | // Size in bytes per (smallest) read request. 745 | dev->read_bytes = read_req_min_op_blocks * dev->min_op_bytes; 746 | 747 | // Number of "min-op"-sized blocks per (largest) read request. 748 | uint32_t read_req_min_op_blocks_rmx = 749 | (g_scfg.record_stored_bytes_rmx + dev->min_op_bytes - 1) / 750 | dev->min_op_bytes; 751 | 752 | // Number of read request sizes in configured range. 753 | dev->n_read_sizes = 754 | read_req_min_op_blocks_rmx - read_req_min_op_blocks + 1; 755 | 756 | // Total number of sites on device to read from. (Make sure the last site 757 | // has room for largest possible read request.) 758 | dev->n_read_offsets = n_min_op_blocks - read_req_min_op_blocks_rmx + 1; 759 | } 760 | 761 | //------------------------------------------------ 762 | // Discover device's write request pattern. 763 | // 764 | static void 765 | discover_write_pattern(device* dev) 766 | { 767 | // Total number of "min-op"-sized blocks on the device. (Excluding 768 | // fractional large block at end of device, if such.) 769 | uint64_t n_min_op_blocks = 770 | (dev->n_large_blocks * g_scfg.large_block_ops_bytes) / 771 | dev->min_op_bytes; 772 | 773 | // Number of "min-op"-sized blocks per (smallest) write request. 774 | uint32_t write_req_min_op_blocks = 775 | (g_scfg.record_stored_bytes + dev->min_op_bytes - 1) / 776 | dev->min_op_bytes; 777 | 778 | // Size in bytes per (smallest) write request. 779 | dev->write_bytes = write_req_min_op_blocks * dev->min_op_bytes; 780 | 781 | // Number of "min-op"-sized blocks per (largest) write request. 782 | uint32_t write_req_min_op_blocks_rmx = 783 | (g_scfg.record_stored_bytes_rmx + dev->min_op_bytes - 1) / 784 | dev->min_op_bytes; 785 | 786 | // Number of write request sizes in configured range. 787 | dev->n_write_sizes = 788 | write_req_min_op_blocks_rmx - write_req_min_op_blocks + 1; 789 | 790 | // Total number of sites on device to write to. (Make sure the last site 791 | // has room for largest possible write request.) 792 | dev->n_write_offsets = n_min_op_blocks - write_req_min_op_blocks_rmx + 1; 793 | } 794 | 795 | //------------------------------------------------ 796 | // Close all file descriptors for a device. 797 | // 798 | static void 799 | fd_close_all(device* dev) 800 | { 801 | int fd; 802 | 803 | while (queue_pop(dev->fd_q, (void*)&fd)) { 804 | close(fd); 805 | } 806 | } 807 | 808 | //------------------------------------------------ 809 | // Get a safe file descriptor for a device. 810 | // 811 | static int 812 | fd_get(device* dev) 813 | { 814 | int fd = -1; 815 | 816 | if (! queue_pop(dev->fd_q, (void*)&fd)) { 817 | int direct_flags = O_DIRECT | (g_scfg.disable_odsync ? 0 : O_DSYNC); 818 | int flags = O_RDWR | (g_scfg.file_size == 0 ? direct_flags : O_CREAT); 819 | 820 | fd = open(dev->name, flags, S_IRUSR | S_IWUSR); 821 | 822 | if (fd == -1) { 823 | printf("ERROR: open device %s errno %d '%s'\n", dev->name, errno, 824 | act_strerror(errno)); 825 | } 826 | } 827 | 828 | return fd; 829 | } 830 | 831 | //------------------------------------------------ 832 | // Recycle a safe file descriptor for a device. 833 | // 834 | static void 835 | fd_put(device* dev, int fd) 836 | { 837 | queue_push(dev->fd_q, (void*)&fd); 838 | } 839 | 840 | //------------------------------------------------ 841 | // Do one transaction read operation and report. 842 | // 843 | static void 844 | read_and_report(trans_req* read_req, uint8_t* buf) 845 | { 846 | uint64_t start_time = get_ns(); 847 | uint64_t stop_time = read_from_device(read_req->dev, read_req->offset, 848 | read_req->size, buf); 849 | 850 | if (stop_time != -1) { 851 | histogram_insert_data_point(g_read_hist, 852 | safe_delta_ns(start_time, stop_time)); 853 | histogram_insert_data_point(read_req->dev->read_hist, 854 | safe_delta_ns(start_time, stop_time)); 855 | } 856 | } 857 | 858 | //------------------------------------------------ 859 | // Do one large block read operation and report. 860 | // 861 | static void 862 | read_and_report_large_block(device* dev, uint8_t* buf) 863 | { 864 | uint64_t offset = random_large_block_offset(dev); 865 | uint64_t start_time = get_ns(); 866 | uint64_t stop_time = read_from_device(dev, offset, 867 | g_scfg.large_block_ops_bytes, buf); 868 | 869 | if (stop_time != -1) { 870 | histogram_insert_data_point(g_large_block_read_hist, 871 | safe_delta_ns(start_time, stop_time)); 872 | } 873 | } 874 | 875 | //------------------------------------------------ 876 | // Do one device read operation. 877 | // 878 | static uint64_t 879 | read_from_device(device* dev, uint64_t offset, uint32_t size, uint8_t* buf) 880 | { 881 | int fd = fd_get(dev); 882 | 883 | if (fd == -1) { 884 | return -1; 885 | } 886 | 887 | if (! pread_all(fd, buf, size, offset)) { 888 | close(fd); 889 | printf("ERROR: reading %s: %d '%s'\n", dev->name, errno, 890 | act_strerror(errno)); 891 | return -1; 892 | } 893 | 894 | uint64_t stop_ns = get_ns(); 895 | 896 | fd_put(dev, fd); 897 | 898 | return stop_ns; 899 | } 900 | 901 | //------------------------------------------------ 902 | // Do one transaction write operation and report. 903 | // 904 | static void 905 | write_and_report(trans_req* write_req, uint8_t* buf) 906 | { 907 | // Salt each record. 908 | rand_fill(buf, write_req->size, g_scfg.compress_pct); 909 | 910 | uint64_t start_time = get_ns(); 911 | uint64_t stop_time = write_to_device(write_req->dev, write_req->offset, 912 | write_req->size, buf); 913 | 914 | if (stop_time != -1) { 915 | histogram_insert_data_point(g_write_hist, 916 | safe_delta_ns(start_time, stop_time)); 917 | histogram_insert_data_point(write_req->dev->write_hist, 918 | safe_delta_ns(start_time, stop_time)); 919 | } 920 | } 921 | 922 | //------------------------------------------------ 923 | // Do one large block write operation and report. 924 | // 925 | static void 926 | write_and_report_large_block(device* dev, uint8_t* buf, uint64_t count) 927 | { 928 | // Salt the block each time. 929 | rand_fill(buf, g_scfg.large_block_ops_bytes, g_scfg.compress_pct); 930 | 931 | uint64_t offset = random_large_block_offset(dev); 932 | uint64_t start_time = get_ns(); 933 | uint64_t stop_time = write_to_device(dev, offset, 934 | g_scfg.large_block_ops_bytes, buf); 935 | 936 | if (stop_time != -1) { 937 | histogram_insert_data_point(g_large_block_write_hist, 938 | safe_delta_ns(start_time, stop_time)); 939 | } 940 | } 941 | 942 | //------------------------------------------------ 943 | // Do one device write operation. 944 | // 945 | static uint64_t 946 | write_to_device(device* dev, uint64_t offset, uint32_t size, const uint8_t* buf) 947 | { 948 | int fd = fd_get(dev); 949 | 950 | if (fd == -1) { 951 | return -1; 952 | } 953 | 954 | if (! pwrite_all(fd, buf, size, offset)) { 955 | close(fd); 956 | printf("ERROR: writing %s: %d '%s'\n", dev->name, errno, 957 | act_strerror(errno)); 958 | return -1; 959 | } 960 | 961 | uint64_t stop_ns = get_ns(); 962 | 963 | fd_put(dev, fd); 964 | 965 | return stop_ns; 966 | } 967 | -------------------------------------------------------------------------------- /src/storage/cfg_storage.c: -------------------------------------------------------------------------------- 1 | /* 2 | * cfg_storage.c 3 | * 4 | * Copyright (c) 2018-2020 Aerospike, Inc. All rights reserved. 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in 14 | * all copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | 25 | //========================================================== 26 | // Includes. 27 | // 28 | 29 | #include "cfg_storage.h" 30 | 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include 38 | 39 | #include "common/cfg.h" 40 | #include "common/hardware.h" 41 | #include "common/trace.h" 42 | 43 | 44 | //========================================================== 45 | // Typedefs & constants. 46 | // 47 | 48 | static const char TAG_DEVICE_NAMES[] = "device-names"; 49 | static const char TAG_FILE_SIZE_MBYTES[] = "file-size-mbytes"; 50 | static const char TAG_SERVICE_THREADS[] = "service-threads"; 51 | static const char TAG_TEST_DURATION_SEC[] = "test-duration-sec"; 52 | static const char TAG_REPORT_INTERVAL_SEC[] = "report-interval-sec"; 53 | static const char TAG_MICROSECOND_HISTOGRAMS[] = "microsecond-histograms"; 54 | static const char TAG_READ_REQS_PER_SEC[] = "read-reqs-per-sec"; 55 | static const char TAG_WRITE_REQS_PER_SEC[] = "write-reqs-per-sec"; 56 | static const char TAG_RECORD_BYTES[] = "record-bytes"; 57 | static const char TAG_RECORD_BYTES_RANGE_MAX[] = "record-bytes-range-max"; 58 | static const char TAG_LARGE_BLOCK_OP_KBYTES[] = "large-block-op-kbytes"; 59 | static const char TAG_REPLICATION_FACTOR[] = "replication-factor"; 60 | static const char TAG_UPDATE_PCT[] = "update-pct"; 61 | static const char TAG_DEFRAG_LWM_PCT[] = "defrag-lwm-pct"; 62 | static const char TAG_NO_DEFRAG_READS[] = "no-defrag-reads"; 63 | static const char TAG_COMPRESS_PCT[] = "compress-pct"; 64 | static const char TAG_DISABLE_ODSYNC[] = "disable-odsync"; 65 | static const char TAG_COMMIT_TO_DEVICE[] = "commit-to-device"; 66 | static const char TAG_TOMB_RAIDER[] = "tomb-raider"; 67 | static const char TAG_TOMB_RAIDER_SLEEP_USEC[] = "tomb-raider-sleep-usec"; 68 | static const char TAG_MAX_LAG_SEC[] = "max-lag-sec"; 69 | 70 | // As in Aerospike server. 71 | #define RBLOCK_SIZE 16 72 | #define WBLOCK_SIZE (8 * 1024 * 1024) 73 | 74 | 75 | //========================================================== 76 | // Forward declarations. 77 | // 78 | 79 | static bool check_configuration(); 80 | static bool derive_configuration(); 81 | static void echo_configuration(); 82 | 83 | 84 | //========================================================== 85 | // Globals. 86 | // 87 | 88 | // Configuration instance, showing non-zero defaults. 89 | storage_cfg g_scfg = { 90 | .report_interval_us = 1000000, 91 | .record_bytes = 1536, 92 | .large_block_ops_bytes = 1024 * 128, 93 | .replication_factor = 1, 94 | .defrag_lwm_pct = 50, 95 | .compress_pct = 100, 96 | .max_lag_usec = 1000000 * 10 97 | }; 98 | 99 | 100 | //========================================================== 101 | // Inlines & macros. 102 | // 103 | 104 | static inline bool 105 | is_power_of_2(uint32_t value) 106 | { 107 | return (value & (value - 1)) == 0; 108 | } 109 | 110 | static inline uint32_t 111 | round_up_to_rblock(uint32_t size) 112 | { 113 | return (size + (RBLOCK_SIZE - 1)) & -RBLOCK_SIZE; 114 | } 115 | 116 | 117 | //========================================================== 118 | // Public API. 119 | // 120 | 121 | bool 122 | storage_configure(int argc, char* argv[]) 123 | { 124 | if (argc != 2) { 125 | printf("usage: act_storage [config filename]\n"); 126 | return false; 127 | } 128 | 129 | FILE* config_file = fopen(argv[1], "r"); 130 | 131 | if (config_file == NULL) { 132 | printf("ERROR: couldn't open config file %s errno %d '%s'\n", argv[1], 133 | errno, act_strerror(errno)); 134 | return false; 135 | } 136 | 137 | char line[4096]; 138 | 139 | while (fgets(line, sizeof(line), config_file) != NULL) { 140 | char* comment = strchr(line, '#'); 141 | 142 | if (comment != NULL) { 143 | *comment = '\0'; 144 | } 145 | 146 | const char* tag = strtok(line, ":" WHITE_SPACE); 147 | 148 | if (tag == NULL) { 149 | continue; 150 | } 151 | 152 | if (strcmp(tag, TAG_DEVICE_NAMES) == 0) { 153 | parse_device_names(MAX_NUM_STORAGE_DEVICES, g_scfg.device_names, 154 | &g_scfg.num_devices); 155 | } 156 | else if (strcmp(tag, TAG_FILE_SIZE_MBYTES) == 0) { 157 | g_scfg.file_size = (uint64_t)parse_uint32() << 20; 158 | } 159 | else if (strcmp(tag, TAG_SERVICE_THREADS) == 0) { 160 | g_scfg.service_threads = parse_uint32(); 161 | } 162 | else if (strcmp(tag, TAG_TEST_DURATION_SEC) == 0) { 163 | g_scfg.run_us = (uint64_t)parse_uint32() * 1000000; 164 | } 165 | else if (strcmp(tag, TAG_REPORT_INTERVAL_SEC) == 0) { 166 | g_scfg.report_interval_us = (uint64_t)parse_uint32() * 1000000; 167 | } 168 | else if (strcmp(tag, TAG_MICROSECOND_HISTOGRAMS) == 0) { 169 | g_scfg.us_histograms = parse_yes_no(); 170 | } 171 | else if (strcmp(tag, TAG_READ_REQS_PER_SEC) == 0) { 172 | g_scfg.read_reqs_per_sec = parse_uint32(); 173 | } 174 | else if (strcmp(tag, TAG_WRITE_REQS_PER_SEC) == 0) { 175 | g_scfg.write_reqs_per_sec = parse_uint32(); 176 | } 177 | else if (strcmp(tag, TAG_RECORD_BYTES) == 0) { 178 | g_scfg.record_bytes = parse_uint32(); 179 | } 180 | else if (strcmp(tag, TAG_RECORD_BYTES_RANGE_MAX) == 0) { 181 | g_scfg.record_bytes_rmx = parse_uint32(); 182 | } 183 | else if (strcmp(tag, TAG_LARGE_BLOCK_OP_KBYTES) == 0) { 184 | g_scfg.large_block_ops_bytes = parse_uint32() * 1024; 185 | } 186 | else if (strcmp(tag, TAG_REPLICATION_FACTOR) == 0) { 187 | g_scfg.replication_factor = parse_uint32(); 188 | } 189 | else if (strcmp(tag, TAG_UPDATE_PCT) == 0) { 190 | g_scfg.update_pct = parse_uint32(); 191 | } 192 | else if (strcmp(tag, TAG_DEFRAG_LWM_PCT) == 0) { 193 | g_scfg.defrag_lwm_pct = parse_uint32(); 194 | } 195 | else if (strcmp(tag, TAG_NO_DEFRAG_READS) == 0) { 196 | g_scfg.no_defrag_reads = parse_yes_no(); 197 | } 198 | else if (strcmp(tag, TAG_COMPRESS_PCT) == 0) { 199 | g_scfg.compress_pct = parse_uint32(); 200 | } 201 | else if (strcmp(tag, TAG_DISABLE_ODSYNC) == 0) { 202 | g_scfg.disable_odsync = parse_yes_no(); 203 | } 204 | else if (strcmp(tag, TAG_COMMIT_TO_DEVICE) == 0) { 205 | g_scfg.commit_to_device = parse_yes_no(); 206 | } 207 | else if (strcmp(tag, TAG_TOMB_RAIDER) == 0) { 208 | g_scfg.tomb_raider = parse_yes_no(); 209 | } 210 | else if (strcmp(tag, TAG_TOMB_RAIDER_SLEEP_USEC) == 0) { 211 | g_scfg.tomb_raider_sleep_us = parse_uint32(); 212 | } 213 | else if (strcmp(tag, TAG_MAX_LAG_SEC) == 0) { 214 | g_scfg.max_lag_usec = (uint64_t)parse_uint32() * 1000000; 215 | } 216 | else { 217 | printf("ERROR: ignoring unknown config item '%s'\n", tag); 218 | return false; 219 | } 220 | } 221 | 222 | fclose(config_file); 223 | 224 | if (! check_configuration() || ! derive_configuration()) { 225 | return false; 226 | } 227 | 228 | echo_configuration(); 229 | 230 | return true; 231 | } 232 | 233 | 234 | //========================================================== 235 | // Local helpers. 236 | // 237 | 238 | static bool 239 | check_configuration() 240 | { 241 | if (g_scfg.num_devices == 0) { 242 | configuration_error(TAG_DEVICE_NAMES); 243 | return false; 244 | } 245 | 246 | if (g_scfg.service_threads == 0 && 247 | (g_scfg.service_threads = 5 * num_cpus()) == 0) { 248 | configuration_error(TAG_SERVICE_THREADS); 249 | return false; 250 | } 251 | 252 | if (g_scfg.run_us == 0) { 253 | configuration_error(TAG_TEST_DURATION_SEC); 254 | return false; 255 | } 256 | 257 | if (g_scfg.report_interval_us == 0) { 258 | configuration_error(TAG_REPORT_INTERVAL_SEC); 259 | return false; 260 | } 261 | 262 | if (g_scfg.record_bytes == 0 || g_scfg.record_bytes > WBLOCK_SIZE) { 263 | configuration_error(TAG_RECORD_BYTES); 264 | return false; 265 | } 266 | 267 | if (g_scfg.record_bytes_rmx != 0 && 268 | (g_scfg.record_bytes_rmx <= g_scfg.record_bytes || 269 | g_scfg.record_bytes_rmx > WBLOCK_SIZE)) { 270 | configuration_error(TAG_RECORD_BYTES_RANGE_MAX); 271 | return false; 272 | } 273 | 274 | if (g_scfg.large_block_ops_bytes > WBLOCK_SIZE || 275 | ! is_power_of_2(g_scfg.large_block_ops_bytes)) { 276 | configuration_error(TAG_LARGE_BLOCK_OP_KBYTES); 277 | return false; 278 | } 279 | 280 | if (g_scfg.replication_factor == 0) { 281 | configuration_error(TAG_REPLICATION_FACTOR); 282 | return false; 283 | } 284 | 285 | if (g_scfg.update_pct > 100) { 286 | configuration_error(TAG_UPDATE_PCT); 287 | return false; 288 | } 289 | 290 | if (g_scfg.defrag_lwm_pct >= 100) { 291 | configuration_error(TAG_DEFRAG_LWM_PCT); 292 | return false; 293 | } 294 | 295 | if (g_scfg.compress_pct > 100) { 296 | configuration_error(TAG_COMPRESS_PCT); 297 | return false; 298 | } 299 | 300 | if (g_scfg.disable_odsync && g_scfg.commit_to_device) { 301 | configuration_error(TAG_DISABLE_ODSYNC); 302 | return false; 303 | } 304 | 305 | return true; 306 | } 307 | 308 | static bool 309 | derive_configuration() 310 | { 311 | if (g_scfg.read_reqs_per_sec + g_scfg.write_reqs_per_sec == 0) { 312 | printf("ERROR: %s and %s can't both be zero\n", TAG_READ_REQS_PER_SEC, 313 | TAG_WRITE_REQS_PER_SEC); 314 | return false; 315 | } 316 | 317 | // Non-zero update-pct causes client writes to generate internal reads. 318 | g_scfg.internal_read_reqs_per_sec = g_scfg.read_reqs_per_sec + 319 | (g_scfg.write_reqs_per_sec * g_scfg.update_pct / 100); 320 | 321 | // 'replication-factor' > 1 causes replica writes (which are replaces). 322 | uint32_t internal_write_reqs_per_sec = 323 | g_scfg.replication_factor * g_scfg.write_reqs_per_sec; 324 | 325 | g_scfg.record_stored_bytes = round_up_to_rblock(g_scfg.record_bytes); 326 | 327 | g_scfg.record_stored_bytes_rmx = g_scfg.record_bytes_rmx == 0 ? 328 | g_scfg.record_stored_bytes : 329 | round_up_to_rblock(g_scfg.record_bytes_rmx); 330 | 331 | // Assumes linear probability distribution across size range. 332 | uint32_t avg_record_stored_bytes = 333 | (g_scfg.record_stored_bytes + g_scfg.record_stored_bytes_rmx) / 2; 334 | 335 | // "Original" means excluding write rate due to defrag. 336 | double original_write_rate_in_large_blocks_per_sec = 337 | (double)internal_write_reqs_per_sec / 338 | (double)(g_scfg.large_block_ops_bytes / avg_record_stored_bytes); 339 | 340 | double defrag_write_amplification = 341 | 100.0 / (double)(100 - g_scfg.defrag_lwm_pct); 342 | // For example: 343 | // defrag-lwm-pct = 50: amplification = 100/(100 - 50) = 2.0 (default) 344 | // defrag-lwm-pct = 60: amplification = 100/(100 - 60) = 2.5 345 | // defrag-lwm-pct = 40: amplification = 100/(100 - 40) = 1.666... 346 | 347 | // Large block read rate always matches overall write rate. 348 | g_scfg.large_block_reads_per_sec = 349 | original_write_rate_in_large_blocks_per_sec * 350 | defrag_write_amplification; 351 | 352 | if (g_scfg.commit_to_device) { 353 | // In 'commit-to-device' mode, only write rate caused by defrag is done 354 | // via large block writes. 355 | g_scfg.large_block_writes_per_sec = 356 | original_write_rate_in_large_blocks_per_sec * 357 | (defrag_write_amplification - 1.0); 358 | 359 | // "Original" writes are done individually. 360 | g_scfg.internal_write_reqs_per_sec = internal_write_reqs_per_sec; 361 | } 362 | else { 363 | // Normally, overall write rate is all done via large block writes. 364 | g_scfg.large_block_writes_per_sec = g_scfg.large_block_reads_per_sec; 365 | } 366 | 367 | // To simulate the new storage-engine memory where defrag reads from RAM. 368 | if (g_scfg.no_defrag_reads) { 369 | g_scfg.large_block_reads_per_sec = 0; 370 | } 371 | 372 | // Non-zero load must be enough to calculate service thread rates safely. 373 | uint32_t total_reqs_per_sec = 374 | g_scfg.internal_read_reqs_per_sec + 375 | g_scfg.internal_write_reqs_per_sec; 376 | 377 | if (total_reqs_per_sec != 0 && 378 | total_reqs_per_sec / g_scfg.service_threads == 0) { 379 | printf("ERROR: load config too small\n"); 380 | return false; 381 | } 382 | 383 | return true; 384 | } 385 | 386 | static void 387 | echo_configuration() 388 | { 389 | printf("ACT-STORAGE CONFIGURATION\n"); 390 | 391 | printf("%s:", TAG_DEVICE_NAMES); 392 | 393 | for (uint32_t d = 0; d < g_scfg.num_devices; d++) { 394 | printf(" %s", g_scfg.device_names[d]); 395 | } 396 | 397 | printf("\nnum-devices: %" PRIu32 "\n", g_scfg.num_devices); 398 | 399 | if (g_scfg.file_size != 0) { // undocumented - don't always expose 400 | printf("%s: %" PRIu64 "\n", TAG_FILE_SIZE_MBYTES, 401 | g_scfg.file_size >> 20); 402 | } 403 | 404 | printf("%s: %" PRIu32 "\n", TAG_SERVICE_THREADS, 405 | g_scfg.service_threads); 406 | printf("%s: %" PRIu64 "\n", TAG_TEST_DURATION_SEC, 407 | g_scfg.run_us / 1000000); 408 | printf("%s: %" PRIu64 "\n", TAG_REPORT_INTERVAL_SEC, 409 | g_scfg.report_interval_us / 1000000); 410 | printf("%s: %s\n", TAG_MICROSECOND_HISTOGRAMS, 411 | g_scfg.us_histograms ? "yes" : "no"); 412 | printf("%s: %" PRIu32 "\n", TAG_READ_REQS_PER_SEC, 413 | g_scfg.read_reqs_per_sec); 414 | printf("%s: %" PRIu32 "\n", TAG_WRITE_REQS_PER_SEC, 415 | g_scfg.write_reqs_per_sec); 416 | printf("%s: %" PRIu32 "\n", TAG_RECORD_BYTES, 417 | g_scfg.record_bytes); 418 | printf("%s: %" PRIu32 "\n", TAG_RECORD_BYTES_RANGE_MAX, 419 | g_scfg.record_bytes_rmx); 420 | printf("%s: %" PRIu32 "\n", TAG_LARGE_BLOCK_OP_KBYTES, 421 | g_scfg.large_block_ops_bytes / 1024); 422 | printf("%s: %" PRIu32 "\n", TAG_REPLICATION_FACTOR, 423 | g_scfg.replication_factor); 424 | printf("%s: %" PRIu32 "\n", TAG_UPDATE_PCT, 425 | g_scfg.update_pct); 426 | printf("%s: %" PRIu32 "\n", TAG_DEFRAG_LWM_PCT, 427 | g_scfg.defrag_lwm_pct); 428 | printf("%s: %s\n", TAG_NO_DEFRAG_READS, 429 | g_scfg.no_defrag_reads ? "yes" : "no"); 430 | printf("%s: %" PRIu32 "\n", TAG_COMPRESS_PCT, 431 | g_scfg.compress_pct); 432 | printf("%s: %s\n", TAG_DISABLE_ODSYNC, 433 | g_scfg.disable_odsync ? "yes" : "no"); 434 | printf("%s: %s\n", TAG_COMMIT_TO_DEVICE, 435 | g_scfg.commit_to_device ? "yes" : "no"); 436 | printf("%s: %s\n", TAG_TOMB_RAIDER, 437 | g_scfg.tomb_raider ? "yes" : "no"); 438 | printf("%s: %" PRIu32 "\n", TAG_TOMB_RAIDER_SLEEP_USEC, 439 | g_scfg.tomb_raider_sleep_us); 440 | printf("%s: %" PRIu64 "\n", TAG_MAX_LAG_SEC, 441 | g_scfg.max_lag_usec / 1000000); 442 | 443 | printf("\nDERIVED CONFIGURATION\n"); 444 | 445 | printf("record-stored-bytes: %" PRIu32 " ... %" PRIu32 "\n", 446 | g_scfg.record_stored_bytes, g_scfg.record_stored_bytes_rmx); 447 | printf("internal-read-reqs-per-sec: %" PRIu32 "\n", 448 | g_scfg.internal_read_reqs_per_sec); 449 | printf("internal-write-reqs-per-sec: %" PRIu32 "\n", 450 | g_scfg.internal_write_reqs_per_sec); 451 | printf("large-block-reads-per-sec: %.2lf\n", 452 | g_scfg.large_block_reads_per_sec); 453 | printf("large-block-writes-per-sec: %.2lf\n", 454 | g_scfg.large_block_writes_per_sec); 455 | 456 | printf("\n"); 457 | } 458 | -------------------------------------------------------------------------------- /src/storage/cfg_storage.h: -------------------------------------------------------------------------------- 1 | /* 2 | * cfg_storage.h 3 | * 4 | * Copyright (c) 2018-2020 Aerospike, Inc. All rights reserved. 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in 14 | * all copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | 25 | #pragma once 26 | 27 | //========================================================== 28 | // Includes. 29 | // 30 | 31 | #include 32 | #include 33 | 34 | #include "common/cfg.h" 35 | 36 | 37 | //========================================================== 38 | // Typedefs & constants. 39 | // 40 | 41 | #define MAX_NUM_STORAGE_DEVICES 128 42 | 43 | typedef struct storage_cfg_s { 44 | char device_names[MAX_NUM_STORAGE_DEVICES][MAX_DEVICE_NAME_SIZE]; 45 | uint32_t num_devices; // derived by counting device names 46 | uint64_t file_size; // undocumented feature - use files 47 | uint32_t service_threads; 48 | uint64_t run_us; // converted from literal units in seconds 49 | uint64_t report_interval_us; // converted from literal units in seconds 50 | bool us_histograms; 51 | uint32_t read_reqs_per_sec; 52 | uint32_t write_reqs_per_sec; 53 | uint32_t record_bytes; 54 | uint32_t record_bytes_rmx; 55 | uint32_t large_block_ops_bytes; // converted from literal units in Kbytes 56 | uint32_t replication_factor; 57 | uint32_t update_pct; 58 | uint32_t defrag_lwm_pct; 59 | bool no_defrag_reads; 60 | uint32_t compress_pct; 61 | bool disable_odsync; 62 | bool commit_to_device; 63 | bool tomb_raider; 64 | uint32_t tomb_raider_sleep_us; 65 | uint64_t max_lag_usec; // converted from literal units in seconds 66 | 67 | // Derived from literal configuration: 68 | uint32_t record_stored_bytes; 69 | uint32_t record_stored_bytes_rmx; 70 | uint32_t internal_read_reqs_per_sec; 71 | uint32_t internal_write_reqs_per_sec; 72 | double large_block_reads_per_sec; 73 | double large_block_writes_per_sec; 74 | } storage_cfg; 75 | 76 | 77 | //========================================================== 78 | // Globals. 79 | // 80 | 81 | extern storage_cfg g_scfg; 82 | 83 | 84 | //========================================================== 85 | // Public API. 86 | // 87 | 88 | bool storage_configure(int argc, char* argv[]); 89 | --------------------------------------------------------------------------------