├── .flake8
├── .github
    └── workflows
    │   ├── build.yaml
    │   └── code_check.yaml
├── .gitignore
├── .pre-commit-config.yaml
├── LICENSE
├── README.md
├── drop_buffer_cache.py
├── examples
    └── docker
    │   ├── README.md
    │   ├── run-smallfile-client-tests.sh
    │   └── smallfile
    │       ├── Dockerfile
    │       └── launch.sh
├── fallocate.py
├── gen-fake-rsptimes.sh
├── invoke_process.py
├── launch_smf_host.py
├── launcher_thread.py
├── multi_thread_workload.py
├── output_results.py
├── parse.py
├── parse_slave.py
├── parser_data_types.py
├── poetry.lock
├── profile.sh
├── profile_workload.py
├── pyproject.toml
├── regtest.sh
├── smallfile.py
├── smallfile_cli.py
├── smallfile_remote.py
├── smallfile_rsptimes_stats.py
├── smf_test_params.py
├── ssh_thread.py
├── sync_files.py
└── yaml_parser.py


/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | max-line-length = 160
3 | # E203: flake8 and black disagree on array slices see https://github.com/psf/black/issues/1859
4 | extend-ignore = E203
5 | 


--------------------------------------------------------------------------------
/.github/workflows/build.yaml:
--------------------------------------------------------------------------------
 1 | name: Package Build
 2 | on:
 3 |   push:
 4 |     branches:
 5 |       - main
 6 |   pull_request:
 7 | jobs:
 8 |   build:
 9 |     name: Package building
10 |     runs-on: ubuntu-latest
11 |     strategy:
12 |       matrix:
13 |         python-version: ['3.9', 'pypy3.9']
14 | 
15 |     steps:
16 |       - name: Check out code
17 |         uses: actions/checkout@v3
18 |       - name: Set up Python
19 |         uses: actions/setup-python@v4
20 |         with:
21 |           python-version: 3.9
22 |       - name: Install poetry
23 |         run: pip install poetry
24 |       - name: Run build
25 |         run: poetry build
26 |       - name: Upload artifacts
27 |         uses: actions/upload-artifact@v3
28 |         with:
29 |           name: Artifacts
30 |           path: dist
31 |           if-no-files-found: error
32 | 


--------------------------------------------------------------------------------
/.github/workflows/code_check.yaml:
--------------------------------------------------------------------------------
 1 | name: Code Check
 2 | on:
 3 |   push:
 4 |     branches:
 5 |       - main
 6 |   pull_request:
 7 | jobs:
 8 |   linux_check:
 9 |     name: Code Check on Linux
10 |     runs-on: ubuntu-latest
11 |     steps:
12 |       - name: Check out code
13 |         uses: actions/checkout@v3
14 |       - name: Set up Python
15 |         uses: actions/setup-python@v4
16 |         with:
17 |           python-version: 3.9
18 |       - name: Install black
19 |         run: pip install black
20 |       - name: Run black test
21 |         run: black --check .
22 |       - name: Install isort
23 |         run: pip install isort
24 |       - name: Run isort test
25 |         run: isort --profile=black --check *.py
26 |       - name: Install flake8
27 |         run: pip install flake8
28 |       - name: Run flake8 test
29 |         run: flake8 *.py
30 |       - name: Install coverage
31 |         run: pip install coverage html2text
32 |       - name: Install required dependencies
33 |         run: pip install PyYAML
34 |       - name: Run unit tests
35 |         run: python3 -m coverage run -m unittest yaml_parser.py invoke_process.py smallfile.py
36 |       - name: Collect coverage report
37 |         run: |
38 |           python3 -m coverage html
39 |       - name: Publish coverage report to job summary
40 |         run: html2text --ignore-images --ignore-links -b 0 htmlcov/index.html >> $GITHUB_STEP_SUMMARY
41 |       - name: Upload coverage results
42 |         uses: actions/upload-artifact@v3
43 |         with:
44 |           name: coverage
45 |           path: htmlcov
46 |           if-no-files-found: error
47 |   windows:
48 |     name: Code Check on Windows
49 |     runs-on: windows-latest
50 |     steps:
51 |       - name: Check out code
52 |         uses: actions/checkout@v3
53 |       - name: Set up Python
54 |         uses: actions/setup-python@v4
55 |         with:
56 |           python-version: 3.9
57 |       - name: Install coverage
58 |         run: pip install coverage html2text
59 |       - name: Install required dependencies
60 |         run: pip install PyYAML
61 |       - name: Run unit tests
62 |         run: python3 -m coverage run -m unittest yaml_parser.py invoke_process.py smallfile.py
63 |       - name: Collect coverage report
64 |         run: |
65 |           python3 -m coverage html
66 |       - name: Publish coverage report to job summary
67 |         run: html2text --ignore-images --ignore-links -b 0 htmlcov/index.html >> $env:GITHUB_STEP_SUMMARY
68 |       - name: Upload coverage results
69 |         uses: actions/upload-artifact@v3
70 |         with:
71 |           name: win-coverage
72 |           path: htmlcov
73 |           if-no-files-found: error
74 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.log
2 | *.pyc
3 | .pdbrc
4 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | repos:
 3 |   - repo: https://github.com/python/black.git
 4 |     rev: 22.10.0
 5 |     hooks:
 6 |       - id: black
 7 |         name: black (python3)
 8 |         language_version: python3
 9 |         args: ["--check"]
10 |   - repo: https://github.com/pycqa/isort
11 |     rev: 5.12.0
12 |     hooks:
13 |       - id: isort
14 |         name: isort (python3)
15 |         language_version: python3
16 |         args: ["--check", "--profile=black"]
17 |   - repo: https://github.com/pycqa/flake8
18 |     rev: '5.0.4'
19 |     hooks:
20 |     -   id: flake8
21 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | smallfile
  2 | =========
  3 | 
  4 | A distributed workload generator for POSIX-like filesystems.
  5 | 
  6 | New features:
  7 | * support for Kubernetes and benchmark-operator
  8 | * YAML input format for parameters
  9 | 
 10 | # Table of contents
 11 | 
 12 | [License](#license)
 13 | 
 14 | [Introduction](#introduction)
 15 | 
 16 | [What it can do](#what-it-can-do)
 17 | 
 18 | [Restrictions](#restrictions)
 19 | 
 20 | [How to specify test](#how-to-specify-test)
 21 | 
 22 | [Results](#results)
 23 | 
 24 | [Postprocessing of response time data](#postprocessing-of-response-time-data)
 25 | 
 26 | [How to run correctly](#how-to-run-correctly)
 27 | 
 28 | [Avoiding caching effects](#avoiding-caching-effects)
 29 | 
 30 | [Use of pause and auto-pause option](#use-of-pause-and-auto-pause-options)
 31 | 
 32 | [Use with distributed filesystems](#use-with-distributed-filesystems)
 33 | 
 34 | [The dreaded startup timeout error](#the-dreaded-startup-timeout-error)
 35 | 
 36 | [Use with local filesystems](#use-with-local-filesystems)
 37 | 
 38 | [Use of subdirectories](#use-of-subdirectories)
 39 | 
 40 | [Sharing directories across threads](#sharing-directories-across-threads)
 41 | 
 42 | [Hashing files into directory tree](#hashing-files-into-directory-tree)
 43 | 
 44 | [Random file size distribution option](#random-file-size-distribution-option)
 45 | 
 46 | [Asynchronous file copy performance](#asynchronous-file-copy-performance)
 47 | 
 48 | [Comparable Benchmarks](#comparable-benchmarks)
 49 | 
 50 | [Design principles](#design-principles)
 51 | 
 52 | [Synchronization](#synchronization)
 53 | 
 54 | [Test parameter transmission](#test-parameter-transmission)
 55 | 
 56 | [Launching remote worker threads](#launching-remote-worker-threads)
 57 | 
 58 | [Returning results](#returning-results)
 59 | 
 60 | 
 61 | License
 62 | =========
 63 | Copyright [2012] [Ben England]
 64 | 
 65 | Licensed under the Apache License, Version 2.0 (the "License");
 66 | you may not use files except in compliance with the License.
 67 | You may obtain a copy of the License at
 68 | 
 69 |   http://www.apache.org/licenses/LICENSE-2.0
 70 | 
 71 | Unless required by applicable law or agreed to in writing, software
 72 | distributed under the License is distributed on an "AS IS" BASIS,
 73 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 74 | See the License for the specific language governing permissions and
 75 | limitations under the License.
 76 | 
 77 | Introduction
 78 | =========
 79 | 
 80 | smallfile is a python-based distributed POSIX workload generator 
 81 | which can be used to quickly measure performance for a
 82 | variety of metadata-intensive workloads across an entire
 83 | cluster.  It has no dependencies on any specific filesystem or implementation 
 84 | It was written to complement use of fio and iozone benchmark for measuring performance 
 85 | of large-file workloads, and borrows some concepts from iozone.
 86 | and Ric Wheeler's fs_mark.  It was developed by Ben England starting in March 2009.
 87 | 
 88 | What it can do
 89 | --------
 90 | 
 91 | * multi-host - manages workload generators on multiple hosts
 92 | * containers - can run on sets of docker containers
 93 | * aggregates throughput - for entire set of hosts
 94 | * synchronizes workload generation - can start and stop workload generator threads at approximately same time
 95 | * pure workloads - only one kind of operation in each run (as opposed to mixed workloads)
 96 | * extensible - easy to extend to new workload types
 97 | * scriptable - provides CLI for scripted use, but workload generator is separate so a GUI is possible
 98 | * file size distributions - supports either fixed file size or random exponential file size
 99 | * traces response times - can capture response time data in .csv format, provides utility to reduce this data to statistics
100 | * Windows support - different launching method, see below
101 | * verification of read data -- writes unique data pattern in all files, can verify data read against this pattern
102 | * incompressibility - can write random data pattern that is incompressible
103 | * async replication support - can measure time required for files to appear in a directory tree
104 | * fs coherency test - in multi-host tests, can force all clients to read files written by different client
105 | 
106 | python 2.7, python 3, pypy and pypy3 are supported.   pypy3 can increase throughput by up to 100% where interpreter is the bottleneck -- however at present pypy and pypy3 do not support pyyaml, at least not in Fedora 31.
107 | 
108 | Restrictions
109 | -----------
110 | 
111 | * for a multi-host test, all workload generators and the test driver must provide access to the same shared directory
112 | * does not support mixed workloads (mixture of different operation types)
113 | * is not accurate on single-threaded tests in memory resident filesystem
114 | * requires all hosts to have the same DNS domain name (plan to remove this
115 |   restriction)
116 | * does not support HTTP access (use COSBench/ssbench for this)
117 | * does not support mixture of Windows and non-Windows clients
118 | * For POSIX-like operating systems, we have only tested with Linux, but there
119 |   is a high probability that it would work with Apple OS and other UNIXes.
120 | * Have only tested Windows XP and Windows 7, but any Win32-compatible Windows would probably work with this.
121 | 
122 | How to specify test
123 | ============
124 | 
125 | You must use a directory visible to all participating hosts to run a
126 | distributed test.
127 | 
128 | You can include multiple hosts in a test in 1 of 2 ways:
129 | * provide password-less ssh access to these hosts from the test driver
130 | * run the launcher daemon on each host (more about this below)
131 | 
132 | This latter method is particularly useful for containers where we may not want to have each container running sshd.  To see more about this, look for the --launch-by-daemon parameter below.
133 | 
134 | To see what parameters are supported by smallfile_cli.py, do 
135 | 
136 |     # python smallfile_cli.py --help
137 | 
138 | Boolean true/false parameters can be set to either Y
139 | (true) or N (false). Every command consists of a sequence of parameter
140 | name-value pairs with the format --name value .  To see what default values are,
141 | use --help option.
142 | 
143 | The parameters are (from most useful to least useful):
144 | 
145 |  * --yaml-input-file -- specify parameters in YAML instead of on command line
146 |  * --operation -- operation type (see list below for choices)
147 |  * --top -- top-level directory, all file accesses are done inside this
148 |   directory tree. If you wish to use multiple mountpoints,provide a list of
149 |   top-level directories separated by comma (no whitespace).
150 |  * --response-times – if Y then save response time for each file operation in a
151 |   rsptimes\*csv file in the shared network directory. Record format is
152 |   operation-type, start-time, response-time. The operation type is included so
153 |   that you can run different workloads at the same time and easily merge the
154 |   data from these runs. The start-time field is the time that the file
155 |   operation started, down to microsecond resolution. The response time field is
156 |   the file operation duration down to microsecond resolution.
157 |  * --output-json - if specified then write results in JSON format to the specified pathname for easier postprocessing.
158 |  * --host-set -- comma-separated set of hosts used for this test, or file containing list of hosts
159 |   names allowed. Default: non-distributed test.
160 |  * --launch-by-daemon - if specified, then ssh will not be used to launch test, see section titled "launching remote worker threads"
161 |  * --files -- how many files should each thread process? 
162 |  * --threads -- how many workload generator threads should each smallfile_cli.py process create? 
163 |  * --auto-pause -- if Y then smallfile will auto-adjust the pause time between files
164 |  * --file-size -- total amount of data accessed per file.   If zero then no
165 |   reads or writes are performed. 
166 |  * --file-size-distribution – only supported value today is exponential.
167 |  * --record-size -- record size in KB, how much data is transferred in a single
168 |   read or write system call.  If 0 then it is set to the minimum of the file
169 |   size and 1-MiB record size limit.
170 |  * --files-per-dir -- maximum number of files contained in any one directory.
171 |  * --dirs-per-dir -- maximum number of subdirectories contained in any one
172 |   directory.
173 |  * --fsync -- if Y then an fsync() call is inserted before closing a created/modified/appended file.
174 |  * --hash-into-dirs – if Y then assign next file to a directory using a hash
175 |   function, otherwise assign next –files-per-dir files to next directory.
176 |  * --same-dir -- if Y then threads will share a single directory.
177 |  * --network-sync-dir – don't need to specify unless you run a multi-host test
178 |   and the –top parameter points to a non-shared directory (see discussion
179 |   below). Default: network_shared subdirectory under –top dir.
180 |  * --permute-host-dirs – if Y then have each host process a different
181 |   subdirectory tree than it otherwise would (see below for directory tree
182 |   structure).
183 |  * --xattr-size -- size of extended attribute value in bytes (names begin with
184 |   'user.smallfile-') 
185 |  * --xattr-count -- number of extended attributes per file
186 |  * --cleanup-delay-usec-per-file -- insert a delay after "cleanup" 
187 |  * --prefix -- a string prefix to prepend to files (so they don't collide with
188 | previous runs for example)
189 |  * --suffix -- a string suffix to append to files (so they don't collide with
190 |   previous runs for example)
191 |  * --incompressible – if Y then generate a pure-random file that
192 |   will not be compressible (useful for tests where intermediate network or file
193 |   copy utility attempts to compress data
194 |  * --record-ctime-size -- if Y then label each created file with an
195 |   xattr containing a time of creation and a file size. This will be used by
196 |   –await-create operation to compute performance of asynchonous file
197 |   replication/copy.
198 |  * --finish -- if Y, thread will complete all requested file operations even if
199 |   measurement has finished.
200 |  * --stonewall -- if Y then thread will measure throughput as soon as it detects
201 |   that another thread has finished.
202 |  * --verify-read – if Y then smallfile will verify read data is correct.
203 |  * --remote-pgm-dir – don't need to specify this unless the smallfile software
204 |   lives in a different directory on the target hosts and the test-driver host. 
205 |  * --pause -- integer (microseconds) each thread will wait before starting next
206 |   file.
207 | 
208 | Operation types are:
209 | 
210 | * create -- create a file and write data to it
211 | * append -- open an existing file and append data to it 
212 | * delete -- delete a file 
213 | * rename -- rename a file 
214 | * delete_renamed -- delete a file that had previously been renamed
215 | * read -- read an existing file 
216 | * stat -- just read metadata from an existing file 
217 | * chmod -- change protection mask for file
218 | * setxattr -- set extended attribute values in each file 
219 | * getxattr -- read extended attribute values in each file 
220 | * symlink -- create a symlink pointing to each file (create must be run
221 | beforehand) 
222 | * mkdir -- create a subdirectory with 1 file in it 
223 | * rmdir -- remove a subdirectory and its 1 file
224 | * readdir -- scan directories only, don't read files or their metadata
225 | * ls-l -- scan directories and read basic file metadata
226 | * cleanup -- delete any pre-existing files from a previous run 
227 | * swift-put -- simulates OpenStack Swift behavior when doing PUT operation
228 | * swift-get -- simulates OpenStack Swift behavior for each GET operation.
229 | * overwrite -- overwrite existing files.
230 | * truncate-overwrite -- truncate existing file and then write data to it.
231 | 
232 | For example, if you want to run smallfile_cli.py on 1 host with 8 threads
233 | each creating 2 GB of 1-MiB files, you can use these options:
234 | 
235 |     # python smallfile_cli.py --operation create --threads 8 \  
236 |        --file-size 1024 --files 2048 --top /mnt/gfs/smf
237 | 
238 | To run a 4-host test doing same thing:
239 | 
240 |     # python smallfile_cli.py --operation create --threads 8 \  
241 |        --file-size 1024 --files 2048 --top /mnt/gfs/smf \  
242 |        --host-set host1,host2,host3,host4 
243 | 
244 | Note: You can only perform a read operation on files that were generated with smallfile (using same parameters).
245 | 
246 | Errors encountered by worker threads will be saved in /var/tmp/invoke-N.log where N is the thread number. After each test, a summary of thread results is displayed, and overall test results are aggregated for you, in three ways:
247 | 
248 |  * files/sec – most relevant for smaller file sizes
249 |  * IOPS -- application I/O operations per sec, rate of read()/write()
250 |  * MB/s -- megabytes/sec (really MiB/sec), data transfer rate
251 | 
252 | Users should never need to run smallfile.py -- this is the python class which
253 | implements the workload generator. Developers can run this module to invoke its
254 | unit test however:
255 | 
256 |     # python smallfile.py 
257 | 
258 | To run just one unit test module, for example:
259 | 
260 |     # python -m unittest smallfile.Test.test_c3_Symlink
261 | 
262 | How to specify parameters in YAML
263 | =============
264 | 
265 | Sometimes it's more convenient to specify inputs in a YAML file when using a CI system such as Jenkins.  Smallfile has a flat YAML file format where the parameter name in yaml is the same as on the CLI except that the leading "--" is removed and a colon is appended to the parameter name.  For example:
266 | ```
267 | top: /mnt/xfs1/smf
268 | host-set: host1,host2
269 | ```
270 | 
271 | 
272 | Results
273 | =======
274 | 
275 | All tests display a "files/sec" result.  If the test performs reads or writes,
276 | then a "MB/sec" data transfer rate and an "IOPS" result (i.e. total read or
277 | write calls/sec) are also displayed.  Each thread participating in the test
278 | keeps track of total number of files and I/O requests that it processes during
279 | the test measurement interval.  These results are rolled up per host if it is a
280 | single-host test.  For a multi-host test, the per-thread results for each host
281 | are saved in a file within the --top directory, and the test master then reads
282 | in all of the saved results from its slaves to compute the aggregate result
283 | across all client hosts.  The percentage of requested files which were
284 | processed in the measurement interval is also displayed, and if the number is
285 | lower than a threshold (default 70%) then an error is raised.
286 | 
287 | Postprocessing of response time data
288 | --------
289 | 
290 | If you specify **--response-times Y** in the command, smallfile will save response time of each operation in per-thread output files in the shared directory as rsptimes\*.csv.   For example, you can turn these into an X-Y scatterplot so that you can see how response time varies over time.   For example:
291 | 
292 |     # python smallfile_cli.py --response-times Y
293 |     # ls -ltr /var/tmp/smf/network_shared/rsptimes*.csv
294 | 
295 | You should see 1 .csv file per thread.  These files can be loaded into any
296 | spreadsheet application and graphed.  An x-y scatterplot can be useful to see
297 | changes over time in response time.
298 | 
299 | But if you just want statistics, you can generate these using the postprocessing command:
300 | 
301 |     # python smallfile_rsptimes_stats.py /var/tmp/smf/network_shared
302 | 
303 | This will generate statistics summary in ../rsptimes-summary.csv , in this example you would find it in /var/tmp/smf/.  The file is in a form suitable for loading into a spreadsheet and graphing.  A simple example is generated using the regression test **gen-fake-rsptimes.sh** .  The result of this test is output like this:
304 | 
305 | ```
306 | filtering out suffix .foo.com from hostnames
307 | rsp. time result summary at: /tmp/12573.tmp/../rsptime-summary.csv
308 | ```
309 | The first line illustrates that you can remove a common hostname suffix in the output so that it is easier to read and graph.  In this test we pass the optional parameter **--common-hostname-suffix foo.com** to smallfile_rsptimes_stats.py.  The inputs to smallfile_rsptimes_stats.py are contained in ```/tmp/12573.tmp/``` and the output looks like this:
310 | ```
311 | 
312 | $ more /tmp/12573.tmp/../rsptime-summary.csv
313 | host:thread, samples, min, max, mean, %dev, 50 %ile, 90 %ile, 95 %ile, 99 %ile, 
314 | all:all,320, 1.000000, 40.000000, 20.500000, 56.397441, 20.500000, 36.100000, 38.050000, 40.000000, 
315 | 
316 | host-21:all,160, 1.000000, 40.000000, 20.500000, 56.486046, 20.500000, 36.100000, 38.050000, 40.000000, 
317 | host-22:all,160, 1.000000, 40.000000, 20.500000, 56.486046, 20.500000, 36.100000, 38.050000, 40.000000, 
318 | 
319 | host-21:01,40, 1.000000, 40.000000, 20.500000, 57.026595, 20.500000, 36.100000, 38.050000, 39.610000, 
320 | host-21:02,40, 1.000000, 40.000000, 20.500000, 57.026595, 20.500000, 36.100000, 38.050000, 39.610000, 
321 | host-21:03,40, 1.000000, 40.000000, 20.500000, 57.026595, 20.500000, 36.100000, 38.050000, 39.610000, 
322 | host-21:04,40, 1.000000, 40.000000, 20.500000, 57.026595, 20.500000, 36.100000, 38.050000, 39.610000, 
323 | host-22:01,40, 1.000000, 40.000000, 20.500000, 57.026595, 20.500000, 36.100000, 38.050000, 39.610000, 
324 | host-22:02,40, 1.000000, 40.000000, 20.500000, 57.026595, 20.500000, 36.100000, 38.050000, 39.610000, 
325 | host-22:03,40, 1.000000, 40.000000, 20.500000, 57.026595, 20.500000, 36.100000, 38.050000, 39.610000, 
326 | host-22:04,40, 1.000000, 40.000000, 20.500000, 57.026595, 20.500000, 36.100000, 38.050000, 39.610000, 
327 | ```
328 | * record 1 - contains headers for each column
329 | * record 2 - contains aggregate response time statistics for the entire distributed system, if it consists of more than 1 host
330 | * record 4-5 - contains per-host aggregate statistics
331 | * record 7-end - contains per-thread stats, sorted by host then thread
332 | 
333 | You'll notice that even though all the threads have the same simulated response times, the 99th percentile values for each thread are different than the aggregate stats per host or for the entire test!  How can this be?  Percentiles are computed using the [numpy.percentiles](https://docs.scipy.org/doc/numpy/reference/generated/numpy.percentile.html) function, which linearly interpolates to obtain percentile values.  In the aggregate stats, the 99th percentile is linearly interpolated between two samples of 40 seconds, whereas in the per-thread results the 99th percentile is interpolated between samples of 40 and 39 seconds.  
334 | 
335 | 
336 | How to run correctly
337 | =============
338 | 
339 | Here are some things you need to know in order to get valid results - it is not
340 | enough to just specify the workload that you want.
341 | 
342 | Avoiding caching effects
343 | ==========
344 | 
345 | THere are two types of caching effects that we wish to avoid, data caching and
346 | metadata caching.  If the average object size is sufficiently large, we need
347 | only be concerned about data caching effects.  In order to avoid data caching
348 | effects during a large-object read test, the Linux buffer cache on all servers
349 | must be cleared. In part this is done using the command: "echo 1 > /proc/sys/vm/drop_caches" on all hosts.  However, some filesystems such as
350 | Gluster have their own internal caches - in that case you might even need to
351 | remount the filesystem or even restart the storage pool/volume.
352 | 
353 | Use of pause and auto pause options
354 | ==========
355 | 
356 | Normally, smallfile stops the throughput measurement for the test as soon as
357 | the first thread finishes processing all its files.  In some filesystems, the first thread that starts running will be operating at much higher speed (example: NFS writes) and can easily finish before other threads have a chance to get started.  This immediately invalidates the test.  To make this less likely, it is possible to insert a per-file delay into each
358 | thread with the **--pause** option so that the other threads have a chance to
359 | participate in the test during the measurement interval.    It is preferable to
360 | run a longer test instead, because in some cases you might otherwise restrict
361 | throughput unintentionally.  But if you know that your throughput upper bound
362 | is X files/sec and you have N threads running, then your per-thread throughput
363 | should be no more than N/X, so a reasonable pause would be something like 3X/N
364 | microseconds.  For  example, if you know that you cannot do better than 100000
365 | files/sec and you have 20 threads running,try a 60/100000 = 600 microsecond
366 | pause.  Verify that this isn't affecting throughput by reducing the pause and
367 | running a longer test.
368 | 
369 | However, this pause parameter is hard to use and requires you to run tests before you set it.
370 | To get all threads to run at a speed closer to each other, the auto-pause parameter has been added.
371 | This parameter is a boolean defaulting to False for now, so the same test doesn't start to give different results unexpectedly.
372 | If set to True, then smallfile will continually adjust the time between files based on the response time it measures during the run (for that thread).
373 | It does this by maintaining a record of the last N response times, taking the average, and then computing a pause time from that.
374 | Why should this work?   If we think of the cluster as a black box, all the smallfile filesystem calls have to pass through that black box
375 | and this means that threads exert backpressure on each other indirectly through the response time that they experience.  We want to keep the pause time low enough that the system stays busy, but not so low that one thread can finish before another one even gets started.  One problem with this approach is client-side caching, which can decouple response times of the threads on different hosts.   However, it is usually possible to drop cache on all hosts to prevent client-side caching.
376 | 
377 | Clearly the pause and auto-pause parameters are mutually exclusive - you only use 1 of the 2.
378 | 
379 | Use of cleanup-delay-usec-per-file option
380 | =========================================
381 | Some distributed filesystems do not actually recycle file space at the moment you delete the file. 
382 | They may wait some time and then do it asynchronously to enable the application to proceed more quickly.
383 | This can cause subsequent test performance to compete with the space-recycling activity, resulting in
384 | variable results.   The "cleanup-delay-usec-per-file" option gives you a way to work around this problem.
385 | If you set it to non-zero, then during the "cleanup" operation (and only this one), 
386 | a time delay will be computed by multiplying the number of files processed by this parameter, and 
387 | smallfile will sleep for this time duration before proceeding to subsequent operations.
388 | You can take advantage of this by structuring your tests so that each sample operation sequence,
389 | such as create,read,rename,delete-renamed , is followed by a "cleanup" op.   You can then cause smallfile to 
390 | pause for a while after each sample, before the next sample begins.
391 | 
392 | Use with distributed filesystems
393 | ---------
394 | 
395 | With distributed filesystems, it is necessary to have multiple hosts
396 | simultaneously applying workload to measure the performance of a distributed
397 | filesystem. The –host-set parameter lets you specify a comma-separated list of
398 | hosts to use, or you can just specify a filename containing a list of hosts, 1 host per record.  
399 | The latter is certainly the more convenient option for large clusters.
400 | 
401 | For any distributed filesystem test, there must be a single directory which is
402 | shared across all hosts, both test driver and worker hosts, that can be used to
403 | pass test parameters, pass back results, and coordinate activity across the
404 | hosts. This is referred to below as the “shared directory” in what follows. By
405 | default this is the network_shared/ subdirectory of the –top directory, but you
406 | can override this default by specifying the –network-sync-dir directory
407 | parameter, see the next section for why this is useful.
408 | 
409 | Some distributed filesystems, such as NFS, have relaxed,
410 | eventual-consistency caching of directories; this will cause problems for the
411 | smallfile benchmark. To work around this problem, you can use a separate NFS
412 | mountpoint exported from a Linux NFS server, mounted with the option actimeo=1
413 | (to limit duration of time NFS will cache directory entries and metadata). You
414 | then reference this mountpoint using the –network-sync-dir option of smallfile.
415 | For example:
416 | 
417 | ```
418 | # mount -t nfs -o actimeo=1 your-linux-server:/your/nfs/export /mnt/nfs
419 | # ./smallfile_cli.py –top /your/distributed/filesystem \
420 |     –network-sync-dir /mnt/nfs/smf-shared
421 | ```
422 | 
423 | For non-Windows tests, the user must set up password-less ssh between the test
424 | driver and the host. If security is an issue, a non-root username can be used
425 | throughout, since smallfile requires no special privileges. Edit the
426 | $HOME/.ssh/authorized_keys file to contain the public key of the account on the
427 | test driver. The test driver will bypass the .ssh/known_hosts file by using -o
428 | StrictHostKeyChecking=no option in the ssh command.
429 | 
430 | For Windows tests, each worker host must be running the launch_smf_host.py
431 | program that polls the shared network directory for a file that contains the
432 | command to launch smallfile_remote.py in the same way that would happen with
433 | ssh on non-Windows tests. The command-line parameters on each Windows host
434 | would be something like this:
435 | 
436 |     start python launch_smf_host.py –shared z:\smf\network_shared –as-host %hostname%
437 | 
438 | Then from the test driver, you could run specifying your hosts:
439 | 
440 |     python smallfile_cli.py –top z:\smf –host-set gprfc023,gprfc024
441 | 
442 | The dreaded startup timeout error
443 | ============
444 | 
445 | If you get the error "Exception: starting signal not seen within 11 seconds" when running a distributed test with a lot of subdirectories, the problem may be caused by insufficient time for the worker threads to get ready to run the test.   In some cases, this was caused by a flaw in smallfile's timeout calculation (which we believe is fixed).  However, before smallfile actually starts a test, each worker thread must prepare a directory tree to hold the files that will be used in the test.   This ensures that we are not measuring directory creation overhead when running a file create test, for example.  For some filesystems, directory creation can be more expensive at scale.  We take this into account with the --min-dirs-per-sec parameter, which defaults to a value more appropriate for local filesystems.   If we are doing a large distributed filesystem test, it may be necessary to lower this parameter somewhat, based on the filesystem's performance, which you can measure using --operation mkdir, and then use a value of about half what you see there.  This will result in a larger timeout value, which you can obtain using "--output-json your-test.json" -- look for the 'startup-timeout' and 'host-timeout' parameters in this file to see what timeout is being calculated.
446 | 
447 | 
448 | Use with local filesystems
449 | -----------
450 | 
451 | There are cases where you want to use a distributed filesystem test on
452 | host-local filesystems. One such example is virtualization, where the “local”
453 | filesystem is really layered on a virtual disk image which may be stored in a
454 | network filesystem. The benchmark needs to share certain files across hosts to
455 | return results and synchronize threads. In such a case, you specify the
456 | –network-sync-dir directory-pathname parameter to have the benchmark use a
457 | directory in some shared filesystem external to the test directory (specified
458 | with –top parameter). By default, if this parameter is not specified then the
459 | shared directory will be the subdirectory network-dir underneath the directory
460 | specified with the –top parameter.
461 | 
462 | Use of subdirectories
463 | ----------
464 | 
465 | Before a test even starts, the smallfile benchmark ensures that the
466 | directories needed by that test already exist (there is a specific operation
467 | type for testing performance of subdirectory creation and deletion). If the top
468 | directory (specified by –top parameter) is D, then the top per-thread directory
469 | is D/host/dTT where TT is a 2-digit thread number and “host” is the hostname.
470 | If the test is not a distributed test, then it's just whatever host the
471 | benchmark command was issued on, otherwise it is each of the hosts specified by
472 | the –host-set parameter. The first F files (where F is the value of the
473 | –files-per-dir) parameter are placed in this top per-thread directory. If the
474 | test uses more than F files/thread, then at least one subdirectory from the
475 | first level of subdirectories must be used; these subdirectories have the path
476 | T/host/dTT/dNNN where NNN is the subdirectory number. Suppose the value of the
477 | parameter –subdirs-per-dir is D. Then there are at most D subdirectories of the
478 | top per-thread directory. If the test requires more than D(F+1) files per
479 | thread, then a second level of subdirectories will have to be created, with
480 | pathnames like T/host/dTT/dNNN/dMMM . This process of adding subdirectories
481 | continues in this fashion until there are sufficient subdirectories to hold all
482 | the files. The purpose of this approach is to simulate a mixture of directories
483 | and files, and to not require the user to specify how many levels of
484 | directories are required.
485 | 
486 | The use of multiple mountpoints is supported. This features is useful for
487 | testing NFS, etc.
488 | 
489 | Note that the test harness does not have to scan the directories to figure out
490 | which files to read or write – it simply generates the filename sequence
491 | itself. If you want to test directory scanning speed, use readdir or ls-l
492 | operations. 
493 | 
494 | Sharing directories across threads
495 | ---------
496 | 
497 | Some applications require that many threads, possibly spread across many host
498 | machines, need to share a set of directories. The --same-dir parameter makes it
499 | possible for the benchmark to test this situation. By default this parameter is
500 | set to N, which means each thread has its own non-overlapping directory tree.
501 | This setting provides the best performance and scalability. However, if the
502 | user sets this parameter to Y, then the top per-thread directory for all
503 | threads will be T instead of T/host/dTT as described in preceding section.
504 | 
505 | Hashing files into directory tree
506 | ----------
507 | 
508 | For applications which create very large numbers of small files (millions for
509 | example), it is impossible or at the very least impractical to place them all
510 | in the same directory, whether or not the filesystem supports so many files in
511 | a single directory. There are two ways which applications can use to solve this
512 | problem:
513 | 
514 |  * insert files into 1 directory at a time – can create I/O and lock contention for the directory metadata
515 |  * insert files into many directories at the same time – relieves I/O and lock contention for directory metadata, but increases the amount of metadata caching needed to avoid cache misses
516 | 
517 | The –hash-into-dirs parameter is intended to enable simulation of this latter
518 | mode of operation. By default, the value of this parameter is N, and in this
519 | case a smallfile thread will sequentially access directories one at a time. In
520 | other words, the first D (where D = value of –files-per-dir parameter) files
521 | will be assigned to the top per-thread directory, then the next D files will be
522 | assigned to the next per-thread directory, and so on. However, if the
523 | –hash-into-dirs parameter is set to Y, then the number of the file being
524 | accessed by the thread will be hashed into the set of directories that are
525 | being used by this thread. 
526 | 
527 | Random file size distribution option
528 | -------------
529 | 
530 | In real life, users don't create files that all have the same size. Typically
531 | there is a file size distribution with a majority of small files and a lesser
532 | number of larger files. This benchmark supports use of the random exponential
533 | distribution to approximate that behavior. If you specify
534 | 
535 |      --file-size-distribution exponential --file-size S
536 | 
537 | The meaning of the –file-size parameter changes to the maximum file size (S
538 | KB), and the mean file size becomes S/8. All file sizes are rounded down to the
539 | nearest kilobyte boundary, and the smallest allowed file size is 1 KB. When
540 | this option is used, the smallfile benchmark saves the seed for each thread's
541 | random number generator object in a .seed file stored in the TMPDIR directory
542 | (typically /var/tmp). This allows the file reader to recreate the sequence of
543 | random numbers used by the file writer to generate file sizes, so that the
544 | reader knows exactly how big each file should be without asking the file system
545 | for this information. The append operation works in the same way. All other
546 | operations are metadata operations and do not require that the file size be
547 | known in advance.
548 | 
549 | 
550 | Asynchronous file copy performance
551 | ---------
552 | 
553 | When we want to measure performance of an asynchronous file copy (example:
554 | Gluster geo-replication), we can use smallfile to create the original directory
555 | tree, but then we can use the new await-create operation type to wait for files
556 | to appear at the file copy destination. To do this, we need to specify a
557 | separate network sync directory. So for example, to create the original
558 | directory tree, we could use a command like:
559 | 
560 |     # ./smallfile_cli.py --top /mnt/glusterfs-master/smf \  
561 |         --threads 16 --files 2000 --file-size 1024 \  
562 |         --operation create –incompressible Y --record-ctime-size Y
563 | 
564 | Suppose that this mountpoint is connected to a Gluster “master” volume which is
565 | being geo-replicated to a “slave” volume in a remote site asynchronously. We
566 | can measure the performance of this process using a command like this, where
567 | /mnt/glusterfs-slave is a read-only mountpoint accessing the slave volume.
568 | 
569 |     # ./smallfile_cli.py --top /mnt/glusterfs-slave/smf \  
570 |          --threads 16 --files 2000 --file-size 1024 \  
571 |          --operation await-create –incompressible Y \  
572 |          --network-sync-dir /tmp/other
573 | 
574 | Requirements:
575 | 
576 | * The parameters controlling file sizes, directory tree, and number of files must match in the two commands.
577 | * The --incompressible option must be set if you want to avoid situation where async copy software can compress data to exceed network bandwidth.
578 | * The first command must use the –record-ctime-size Y option so that the await-create operation knows when the original file was created and how big it was.
579 | 
580 | How does this work? The first command records information in a user-defined xattr for each file so that the second command, the await-create operation can calculate time required to copy the file, which is recorded as a “response time”, and so that it knows that the entire file reached the destination.
581 | 
582 | Comparable Benchmarks
583 | ==============
584 | 
585 | There are many existing performance test benchmarks. I have tried just about
586 | all the ones that I've heard of. Here are the ones I have looked at, I'm sure
587 | there are many more that I failed to include here.
588 | 
589 | * Bonnie++ -- works well for a single host, but you cannot generate load from multiple hosts because the benchmark will not synchronize its activities, so different phases of the benchmark will be running at the same time, whether you want them to or not.
590 | 
591 | * iozone -- this is a great tool for large-file testing, but it can only do 1 file/thread in its current form.
592 | 
593 | * postmark -- works fine for a single client, not as useful for multi-client tests
594 | 
595 | * grinder -- has not to date been useful for filesystem testing, though it works well for web services testing.
596 | 
597 | * JMeter – has been used successfully by others in the past.
598 | 
599 | * fs_mark -- Ric Wheeler's filesystem benchmark, is very good at creating files
600 | 
601 | * fio -- Linux test tool -- broader coverage of Linux system calls particularly around async. and direct I/O.  Now has multi-host capabilities
602 | 
603 | * diskperf – open-source tool that generates limited small-file workloads for a single host.
604 | 
605 | * dbench – developed by samba team
606 | 
607 | * SPECsfs – not open-source, but "netmist" component has some mixed-workload, multi-host workload generation capabilities, configured similarly to iozone, but with a wider range of workloads.
608 | 
609 | Design principles
610 | =============
611 | 
612 | A cluster-aware test tool ideally should:
613 | 
614 | * start threads on all hosts at same time
615 | * stop measurement of throughput for all threads at the same time
616 | * be easy to use in all file system environments
617 | * be highly portable and be trivial to install
618 | * have very low overhead
619 | * not require threads to synchronize (be embarrassingly parallel) 
620 | 
621 | Although there may be some useful tests that involve thread synchronization or contention, we don't want the tool to force thread synchronization or contention for resources. 
622 | 
623 | In order to run prolonged small-file tests (which is a requirement for scalability to very large clusters), each thread has to be able to use more than one directory.   Since some filesystems perform very differently as the files/directory ratio increases, and most applications and users do not rely on having huge file/directory ratios, this is also important for testing the filesystem with a realistic use case.  This benchmark does something similar to Ric Wheeler's fs_mark benchmark with multiple directory levels.   This benchmark imposes no hard limit on how many directories can be used and how deep the directory tree can go.  Instead, it creates directories according to these constraints:
624 | 
625 | * files (and directories) are placed as close to the root of the directory hierarchy as possible
626 | * no directory contains more than the number of files specified in the --files-per-dir test parameter
627 | * no directory contains more than number of subdirectories specified in the --dirs-per-dir test parameter
628 | 
629 | 
630 | Synchronization
631 | --------------
632 | 
633 | For non-kubernetes environments, 
634 | a single directory is used to synchronize the threads and hosts. This may seem
635 | problematic, but we assume here that the file system is not very busy when the
636 | test is run (otherwise why would you run a load test on it?). So if a file is
637 | created by one thread, it will quickly be visible on the others, as long as the
638 | filesystem is not heavily loaded.
639 | 
640 | If it's a single-host test, any directory is sharable amongst threads, but in a
641 | multi-host test only a directory shared by all participating hosts can be used.
642 | If the –top test directory is in a network-accessible file system (could be NFS
643 | or Gluster for example), then the synchronization directory is by default in
644 | the network_shared subdirectory by default and need not be specified. If the
645 | –top directory is in a host-local filesystem, then the –network-sync-dir option
646 | must be used to specify the synchronization directory. When a network directory
647 | is used, change propagation between hosts cannot be assumed to occur in under
648 | two seconds.
649 | 
650 | We use the concept of a "starting gate" -- each thread does all preparation for
651 | test, then waits for a special file, the "starting gate", to appear in the
652 | shared area. When a thread arrives at the starting gate, it announces its
653 | arrival by creating a filename with the host and thread ID embedded in it. When
654 | all threads have arrived, the controlling process will see all the expected
655 | "thread ready" files, and will then create the starting gate file. When the
656 | starting gate is seen, the thread pauses for a couple of seconds, then
657 | commences generating workload. This initial pause reduces time required for all
658 | threads to see the starting gate, thereby minimizing chance of some threads
659 | being unable to start on time. Synchronous thread startup reduces the "warmup
660 | time" of the system significantly.
661 | 
662 | We also need a checkered flag (borrowing from car racing metaphor). Once test
663 | starts, each thread looks for a stonewall file in the synchronization
664 | directory. If this file exists, then the thread stops measuring throughput at
665 | this time (but can (and does by default) optionally continue to perform
666 | requested number of operations). Consequently throughput measurements for each
667 | thread may be added to obtain an accurate aggregate throughput number. This
668 | practice is sometimes called "stonewalling" in the performance testing world.
669 | 
670 | Synchronization operations in theory do not require the worker threads to read
671 | the synchronization directory. For distributed tests, the test driver host has
672 | to check whether the various per-host synchronization files exist, but this
673 | does not require a readdir operation. The test driver does this check in such a
674 | way that the number of file lookups is only slightly more than the number of
675 | hosts, and this does not require reading the entire directory, only doing a set
676 | of lookup operations on individual files, so it's O(n) scalable as well.
677 | 
678 | The bad news is that some filesystems do not synchronize directories quickly
679 | without an explicit readdir() operation, so we are at present doing
680 | os.listdir() as a workaround -- this may have to be revisited for very large
681 | tests.
682 | 
683 | 
684 | Test parameter transmission
685 | --------
686 | 
687 | The results of the command line parse are saved in a smf_test_params object and
688 | stored in a python pickle file, which is a representation independent of CPU
689 | architecture or operating system. The file is placed in the shared network
690 | directory. Remote worker processes are invoked via the smallfile_remote.py
691 | command and read this file to discover test parameters.
692 | 
693 | Launching remote worker threads
694 | ----------
695 | 
696 | With Kubernetes, smallfile relies on Kubernetes to launch remote "pods" that each contain a smallfile_cli.py process.   In the case of the benchmark-operator
697 | implementation, the redis key-value store is used to synchronize these pods so that all pods start running smallfile_cli.py at the same time.
698 | 
699 | For multi-host non-Windows non-Kubernetes environments, the test driver launches worker threads using parallel ssh commands to invoke the smallfile_remote.py program, and when this program exits, that is how the test driver discovers that the remote threads on this host have completed.  This works both for bare metal hosts and for virtual machines.
700 | 
701 | For Windows environments, ssh usage is more problematic. In Windows, ssh daemon "sshd" requires installation of cygwin, a Windows app that emulates a Linux-like environment, but we really want to test with native win32 environment instead. For containers, sshd is not typically available as a way to get inside the container.  So a different launching method is used (and this method works on non-Windows environments as well). 
702 | 
703 | First you start launch_smf_host.py in each workload generator.    You must specify --top parameter for each remote host or container.  
704 | 
705 | For Windows workload generators, if you are running smallfile_cli.py from a non-Windows host you may need --substitute-top parameter followed by the Windows path to the top directory, usually not the same as in Linux/Unix.  For example:
706 | 
707 |     % start python launch_smf_host.py --top /mnt/cifs/smf --substitute-top z:\smf
708 | 
709 | For containers, You must specify each daemon's unique ID in the command line - for example, if this is running in a container, then the hostname may not be unique.  This unique ID will be used by the launch_smf_host.py daemon in the container to search for requests from the test driver to run a test.  For example:
710 | 
711 |     # ./launch_smf_host.py --top /mnt/sharedfs/smf --host-set container_2
712 | 
713 | Next, you run smallfile_cli.py with "--launch-by-daemon Y" option and pass --host-set followed by a list of the Daemon IDs that you want to participate in the test.  For example:
714 | 
715 |     # ./smallfile_cli.py --launch-by-daemon Y --host-set container_1,container_2
716 | 
717 | This second step will result in a set of files being created in the shared network directory, 1 per daemon, that provide the daemon with the test parameters that it is to use.  The existence of this file will tell the daemon to start a test.  Everything else works the same as with ssh method.
718 | 
719 | Returning results
720 | -----------------
721 | 
722 | For either single-host or multi-host tests, each test thread is implemented as
723 | a smf_invocation object and all thread state is kept there.  Results are
724 | returned by using python "pickle" files to serialize the state of these
725 | per-thread objects containing details of each thread's progress during the
726 | test.  The pickle files are stored in the shared synchronization directory.
727 | 
728 | smallfile_cli.py has the option to output all results in a JSON format for easy parsing.  In the case of benchmark-operator, this data is pushed to Elasticsearch as "documents" which can then be viewed or visualized with Kibana or Grafana, for example.
729 | 


--------------------------------------------------------------------------------
/drop_buffer_cache.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import ctypes
 3 | import ctypes.util
 4 | import os
 5 | import sys
 6 | 
 7 | 
 8 | class DropBufferCacheException(Exception):
 9 |     pass
10 | 
11 | 
12 | # Drop 'buffer' cache for the given range of the given file.
13 | 
14 | POSIX_FADV_DONTNEED = 4
15 | OK = 0
16 | 
17 | 
18 | # this function is used if we can't load the real libc function
19 | 
20 | 
21 | def noop_libc_function(*args):
22 |     return 0
23 | 
24 | 
25 | # I have no idea what this code really does, but strace says it works.
26 | # does this code work under Cygwin?
27 | 
28 | 
29 | def load_libc_function(func_name):
30 |     func = noop_libc_function
31 |     try:
32 |         libc = ctypes.CDLL(ctypes.util.find_library("c"))
33 |         func = getattr(libc, func_name)
34 |     except AttributeError:
35 |         # print("Unable to locate %s in libc.  Leaving as a no-op."% func_name)
36 |         pass
37 |     except Exception:
38 |         # libc not available
39 |         pass
40 |     return func
41 | 
42 | 
43 | # do this at module load time
44 | 
45 | _posix_fadvise = load_libc_function("posix_fadvise64")
46 | 
47 | 
48 | def drop_buffer_cache(fd, offset, length):
49 |     ret = _posix_fadvise(
50 |         fd, ctypes.c_uint64(offset), ctypes.c_uint64(length), POSIX_FADV_DONTNEED
51 |     )
52 |     if ret != OK:
53 |         raise DropBufferCacheException(
54 |             "posix_fadvise64(%s, %s, %s, 4) -> %s" % (fd, offset, length, ret)
55 |         )
56 | 
57 | 
58 | # unit test
59 | 
60 | if __name__ == "__main__":
61 |     fd = os.open("/tmp/foo", os.O_WRONLY | os.O_CREAT)
62 |     if sys.version.startswith("3"):
63 |         ret = os.write(fd, bytes("hi there", "UTF-8"))
64 |     elif sys.version.startswith("2"):
65 |         ret = os.write(fd, "hi there")
66 |     else:
67 |         raise DropBufferCacheException("unrecognized python version %s" % sys.version)
68 |     assert ret == 8
69 |     drop_buffer_cache(fd, 0, 8)
70 |     os.close(fd)
71 | 


--------------------------------------------------------------------------------
/examples/docker/README.md:
--------------------------------------------------------------------------------
 1 | To run this example, first build a container image using:
 2 | 
 3 |     # docker build smallfile
 4 | 
 5 | Tag it however you want.  This is the image consumed by the run script.  To run the containers, 
 6 | just edit the parameters at the top of **run-smallfile-client-tests.sh** if needed and then:
 7 | 
 8 |     # ./run-smallfile-client-tests <your-smallfile-dir> <your-image>
 9 | 
10 | This should put all its output in the **logs** subdir.  
11 | 
12 | You can set the **KEEP__OLD__CONTAINERS** environment variables to re-use existing containers,
13 | and you can set the **LEAVE__CONTAINERS__RUNNING** environment variable to leave the containers 
14 | running when the script exits so that you can run your own smallfile commands or debug any problems with the containers.
15 | 


--------------------------------------------------------------------------------
/examples/docker/run-smallfile-client-tests.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # this script demonstrates how to run smallfile in docker containers
 3 | # and drive load from all of those containers 
 4 | # the same ideas should work for Kubernetes (or OpenShift) pods
 5 | 
 6 | # parameter 1 is directory where smallfile lives on the test driver host
 7 | # this is not the same as where it lives in the container (/)
 8 | topdir=$1
 9 | 
10 | # parameter 2 is test directory where smallfile should access files
11 | # this should be same for both container and this script
12 | smallfile_dir=$2
13 | 
14 | # parameter 3 is the docker image name for the container that 
15 | # you built with ./smallfile/Dockerfile
16 | image=$3
17 | 
18 | if [ "$3" = "" ] ; then
19 |   echo 'usage: run-smallfile-docker-test.sh top-dir smallfile-dir image'
20 |   exit 1
21 | fi
22 | 
23 | smallfile_cli=$smallfile_dir/smallfile_cli.py
24 | 
25 | # container counts should be powers of 2
26 | min_containers=1
27 | max_containers=4
28 | 
29 | # workload parameters
30 | total_files=100000
31 | oplist='create read delete cleanup'
32 | 
33 | # you should not have to edit below this line
34 | 
35 | d="sudo docker "
36 | 
37 | function shutdown_containers()
38 | {
39 |   echo "cleaning up any old containers..."
40 |   $d ps
41 |   c=$1
42 |   for n in `seq 1 $c` ; do 
43 |     $d stop --time=1 smf-svr$n
44 |     $d logs smf-svr$n
45 |     $d rm smf-svr$n
46 |   done
47 | }
48 | 
49 | # create filesystem directory shared by containers
50 | 
51 | sudo rm -rf $topdir
52 | mkdir -pv $topdir
53 | 
54 | # create top-level log directory
55 | 
56 | timestamp=`date "+%m-%d-%H-%M-%S" `
57 | logdir=`pwd`/logs/$timestamp
58 | mkdir -pv $logdir
59 | rm -f logs/latest.l
60 | ln -sv $logdir logs/latest.l
61 | cp $0 $logdir/
62 | 
63 | if [ -z "$KEEP_OLD_CONTAINERS" ] ; then
64 |   shutdown_containers $max_containers 
65 | 
66 |   echo "starting up new set of containers"
67 |   rm -fv $logdir/smf-servers.list
68 |   for n in `seq 1 $max_containers` ; do
69 |     cmd="$d run -v $topdir:$topdir:z -e topdir=$topdir -e smf_launch_id="container_$n" -d --name smf-svr$n $image"
70 |     echo "$cmd"
71 |     $cmd
72 |     echo "container_$n" >> $logdir/smf-servers.list
73 |   done
74 | fi
75 | 
76 | sleep 1
77 | 
78 | count=$min_containers
79 | env | grep ETA_
80 | while [ $count -le $max_containers ] ; do 
81 |   (( files_per_thread = $total_files / $count ))
82 |   for op in $oplist ; do
83 |     rundir=$logdir/count.$count.op.$op
84 |     mkdir -pv $rundir
85 |     head -n $count $logdir/smf-servers.list > $rundir/smf-servers.list
86 |     cmd="$smallfile_cli --top $topdir --output-json json.log "
87 |     cmd="$cmd --launch-by-daemon Y --host-set=$rundir/smf-servers.list --remote-pgm-dir /smallfile "
88 |     cmd="$cmd --threads 1 --files $files_per_thread --file-size 4 --operation $op"
89 |     echo "$cmd"
90 |     ( cd $rundir ; nice $cmd 2>&1 | tee run.log ) || break
91 |   done
92 |   if [ $? != 0 ] ; then break ; fi
93 |   (( count = $count * 2 ))
94 | done
95 | 
96 | if [ -z "$LEAVE_RUNNING" ] ; then
97 |   shutdown_containers $max_containers
98 | fi
99 | 


--------------------------------------------------------------------------------
/examples/docker/smallfile/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM docker.io/centos:7
2 | MAINTAINER Ben England <bengland@redhat.com>
3 | RUN yum install -y python git PyYAML
4 | RUN git clone https://github.com/distributed-system-analysis/smallfile
5 | RUN ln -sv /smallfile/smallfile_remote.py /usr/local/bin
6 | COPY launch.sh /
7 | CMD /launch.sh
8 | 


--------------------------------------------------------------------------------
/examples/docker/smallfile/launch.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -x
 2 | # this script is run by a container which should be launched 
 3 | # something like this:
 4 | #
 5 | #  #  d run -v /var/tmp/smfdocker:/var/tmp/smfdocker:z \
 6 | #         -e topdir=/var/tmp/smfdocker \
 7 | #         -e smf_launch_id=container_1 \
 8 | #          bengland/smallfile:20190115
 9 | #
10 | # specifically you have to pass 2 environment variables:
11 | #   topdir - points to container-local directory
12 | #   smf_launch_id - what container name should be
13 | #   the -v volume option just imports a directory from the
14 | #   host with SELinux set up to allow this (:z suffix)
15 | #
16 | launcher=/smallfile/launch_smf_host.py
17 | ls -l $launcher
18 | echo "topdir: $topdir"
19 | echo "container_id: $smf_launch_id"
20 | ls -l $topdir
21 | rpm -q python2
22 | LOGLEVEL_DEBUG=1 /usr/bin/python $launcher --top ${topdir} --as-host ${smf_launch_id}
23 | 


--------------------------------------------------------------------------------
/fallocate.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import ctypes
 3 | import ctypes.util
 4 | import os
 5 | import sys
 6 | 
 7 | # reserve space for the contents of file before you write it
 8 | # (hope to disable preallocation)
 9 | 
10 | OK = 0
11 | NOTOK = 0x01
12 | 
13 | # the mode argument to fallocate is defined in /usr/include/linux/falloc.h
14 | 
15 | FALLOC_FL_KEEP_SIZE = 0x01  # default is extend size
16 | FALLOC_FL_PUNCH_HOLE = 0x02  # de-allocates range
17 | 
18 | 
19 | # this function is used if we can't load the real libc function
20 | 
21 | 
22 | def noop_libc_function(*args):
23 |     return OK
24 | 
25 | 
26 | # I have no idea what this code really does, but strace says it works.
27 | # does this code work under Cygwin?
28 | 
29 | 
30 | def load_libc_function(func_name):
31 |     func = noop_libc_function
32 |     try:
33 |         libc = ctypes.CDLL(ctypes.util.find_library("c"))
34 |         func = getattr(libc, func_name)
35 |     except AttributeError:
36 |         # print("Unable to locate %s in libc.  Leaving as a no-op."% func_name)
37 |         pass
38 |     except Exception:
39 |         pass
40 |     return func
41 | 
42 | 
43 | # do this at module load time
44 | 
45 | _posix_fallocate = load_libc_function("fallocate64")
46 | 
47 | 
48 | # mode is one of FALLOC constants above
49 | 
50 | 
51 | def fallocate(fd, mode, offset, length):
52 |     return _posix_fallocate(fd, mode, ctypes.c_uint64(offset), ctypes.c_uint64(length))
53 | 
54 | 
55 | # unit test
56 | 
57 | if __name__ == "__main__":
58 |     fd = os.open("/tmp/foo", os.O_WRONLY | os.O_CREAT)
59 |     assert fd > 0x02
60 |     ret = fallocate(fd, FALLOC_FL_KEEP_SIZE, 0, 8)
61 |     assert ret == OK
62 |     if sys.version.startswith("3"):
63 |         ret = os.write(fd, bytes("hi there", "UTF-8"))
64 |     elif sys.version.startswith("2"):
65 |         ret = os.write(fd, "hi there")
66 |     else:
67 |         print("unrecognized python version %s" % sys.version)
68 |         sys.exit(NOTOK)
69 |     assert ret == 8
70 |     os.close(fd)
71 |     print("SUCCESS")
72 | 


--------------------------------------------------------------------------------
/gen-fake-rsptimes.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # script to generate artificial response time files for regression test
 3 | 
 4 | dir=`dirname $0`
 5 | if [ -z "$dir" ] ; then
 6 | 	dir='.'
 7 | fi
 8 | RSPTIMES_POSTPROCESSOR=$dir/smallfile_rsptimes_stats.py
 9 | rspdir=/tmp/$$.tmp
10 | rm -rf $rspdir
11 | mkdir $rspdir
12 | start=0 
13 | for n in `seq 1 40` ; do 
14 | 	(( start = $start + $n )) 
15 | 	for t in `seq -f "%02g" 1 4` ; do 
16 | 		for h in host-21.foo.com host-22.foo.com ; do 
17 | 			echo some-operation,$start,$n >> \
18 | 			  $rspdir/rsptimes_${t}_${h}_op-name_`date +%s.00`.csv
19 | 		done
20 | 	done
21 | done
22 | $RSPTIMES_POSTPROCESSOR --common-hostname-suffix foo.com $rspdir
23 | 


--------------------------------------------------------------------------------
/invoke_process.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | """
  4 | invoke_process.py
  5 | launch multiple subprocesses running SmallfileWorkload instance
  6 | Copyright 2012 -- Ben England
  7 | Licensed under the Apache License at http://www.apache.org/licenses/LICENSE-2.0
  8 | See Appendix on this page for instructions pertaining to license.
  9 | """
 10 | 
 11 | import multiprocessing
 12 | import os
 13 | import shutil
 14 | import time
 15 | 
 16 | import smallfile
 17 | from smallfile import SMFRunException, unittest_module
 18 | from sync_files import touch
 19 | 
 20 | # this class launches multiple threads with SmallfileWorkload instances
 21 | # we do this because we can use > 1 core this way, with python threading,
 22 | # it doesn't really use > 1 core because of the GIL (global lock)
 23 | # occasional status reports could be sent back using pipe as well
 24 | 
 25 | 
 26 | class subprocess(multiprocessing.Process):
 27 |     def __init__(self, invocation):
 28 |         multiprocessing.Process.__init__(self)
 29 |         (conn1, conn2) = multiprocessing.Pipe(False)
 30 |         self.receiver = conn1  # master process receives test result data here
 31 |         self.sender = conn2  # slave process sends test result data here
 32 |         invocation.buf = None
 33 |         invocation.biggest_buf = None
 34 |         invocation.log = None
 35 |         self.invoke = invocation  # all workload generated by this object
 36 | 
 37 |     def run(self):
 38 |         try:
 39 |             self.invoke.do_workload()
 40 |             self.invoke.log.debug(
 41 |                 "exiting subprocess and returning invoke " + str(self.invoke)
 42 |             )
 43 |         except Exception as e:
 44 |             print(
 45 |                 "Exception seen in thread %s host %s (tail %s) "
 46 |                 % (self.invoke.tid, self.invoke.onhost, self.invoke.log_fn())
 47 |             )
 48 |             self.invoke.log.error(str(e))
 49 |             self.status = self.invoke.NOTOK
 50 |         finally:
 51 |             self.rsptimes = None  # response time array already saved to file
 52 |             # reduce amount of data returned from this thread
 53 |             # by eliminating references objects that are no longer needed
 54 |             self.invoke.log = None  # log objects cannot be serialized
 55 |             self.invoke.buf = None
 56 |             self.invoke.biggest_buf = None
 57 |             self.invoke.rsptimes = None
 58 |             self.invoke.loggers = None
 59 |             self.invoke.file_dirs = None
 60 |             self.sender.send(self.invoke)
 61 | 
 62 | 
 63 | # below are unit tests for SmallfileWorkload
 64 | # including multi-threaded test
 65 | # to run, just do "python invoke_process.py"
 66 | 
 67 | 
 68 | class Test(unittest_module.TestCase):
 69 |     def setUp(self):
 70 |         self.invok = smallfile.SmallfileWorkload()
 71 |         self.invok.debug = True
 72 |         self.invok.verbose = True
 73 |         self.invok.tid = "regtest"
 74 |         self.invok.start_log()
 75 |         shutil.rmtree(self.invok.src_dirs[0], ignore_errors=True)
 76 |         os.makedirs(self.invok.src_dirs[0])
 77 | 
 78 |     def test_multiproc_stonewall(self):
 79 |         self.invok.log.info("starting stonewall test")
 80 |         thread_ready_timeout = 4
 81 |         thread_count = 4
 82 |         for tree in self.invok.top_dirs:
 83 |             shutil.rmtree(tree)
 84 |             os.mkdir(tree)
 85 |         for dir in self.invok.src_dirs:
 86 |             os.mkdir(dir)
 87 |         for dir in self.invok.dest_dirs:
 88 |             os.mkdir(dir)
 89 |         os.mkdir(self.invok.network_dir)
 90 |         self.invok.starting_gate = os.path.join(self.invok.network_dir, "starting-gate")
 91 |         sgate_file = self.invok.starting_gate
 92 |         invokeList = []
 93 |         for j in range(0, thread_count):
 94 |             s = smallfile.SmallfileWorkload()
 95 | 
 96 |             # s.log_to_stderr = True
 97 | 
 98 |             s.verbose = True
 99 |             s.tid = str(j)
100 |             s.prefix = "thr_"
101 |             s.suffix = "foo"
102 |             s.iterations = 10
103 |             s.stonewall = False
104 |             s.starting_gate = sgate_file
105 |             invokeList.append(s)
106 |         threadList = []
107 |         for s in invokeList:
108 |             threadList.append(subprocess(s))
109 |         for t in threadList:
110 |             t.start()
111 |         threads_ready = True
112 |         for i in range(0, thread_ready_timeout):
113 |             threads_ready = True
114 |             for s in invokeList:
115 |                 thread_ready_file = s.gen_thread_ready_fname(s.tid)
116 |                 if not os.path.exists(thread_ready_file):
117 |                     threads_ready = False
118 |             if threads_ready:
119 |                 break
120 |             time.sleep(1)
121 |         if not threads_ready:
122 |             raise SMFRunException(
123 |                 "threads did not show up within %d seconds" % thread_ready_timeout
124 |             )
125 |         time.sleep(1)
126 |         touch(sgate_file)
127 |         for t in threadList:
128 |             rtnd_invok = t.receiver.recv()
129 |             t.join()
130 |             self.invok.log.info(str(rtnd_invok))
131 |             assert rtnd_invok.elapsed_time is not None
132 |             assert rtnd_invok.rq_final is not None
133 |             assert rtnd_invok.filenum_final is not None
134 |             if rtnd_invok.status != rtnd_invok.OK:
135 |                 raise SMFRunException(
136 |                     "subprocess failure for %s invocation %s: "
137 |                     % (str(t), str(rtnd_invok))
138 |                 )
139 | 
140 | 
141 | # so you can just do "python invoke_process.py" to test it
142 | 
143 | if __name__ == "__main__":
144 |     unittest_module.main()
145 | 


--------------------------------------------------------------------------------
/launch_smf_host.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # launch_smf_host.py
  3 | # background process which waits for smallfile_remote.py workload to run
  4 | # you must run this process using the python interpreter explicitly.
  5 | # This handles the case where we are running windows or other non-linux OS.
  6 | # it also handles containers, which typically don't come with sshd
  7 | #
  8 | # if you are doing all Linux with containers, then
  9 | # you need to specify container ID in Docker startup command
 10 | # if your mountpoint for the shared storage is /mnt/fs:
 11 | #   CMD: python launch_smf_host.py --top $top_dir --as-host container$container_id
 12 | # you could include this as the last line in your docker file
 13 | # and fill in top_dir and container_id as environment variables in
 14 | # your docker run command using the -e option
 15 | # # docker run -e top_dir=/mnt/fs/smf -e container_id="container-2"
 16 | #
 17 | # we substitute --top directory with --substitute_top directory
 18 | # so that Windows clients can run with Linux test drivers,
 19 | # which cannot have the same pathname for the shared directory
 20 | # as the Windows clients, so you don't need to specify
 21 | # --substitute_top in any other situation.
 22 | #
 23 | # Example for Windows:
 24 | # if mountpoint on Linux test driver is /mnt/cifs/testshare
 25 | # and mountpoint on Windows is z:\
 26 | # you run:
 27 | #   python launch_smf_host.py \
 28 | #              --top /mnt/cifs/testshare/smf
 29 | #              --substitute_top z:\smf
 30 | #
 31 | 
 32 | 
 33 | import errno
 34 | import logging
 35 | import os
 36 | import socket
 37 | import sys
 38 | import time
 39 | 
 40 | import smallfile
 41 | 
 42 | OK = 0
 43 | NOTOK = 1
 44 | 
 45 | 
 46 | def start_log(prefix=socket.gethostname()):
 47 |     log = logging.getLogger(prefix)
 48 |     if os.getenv("LOGLEVEL_DEBUG") is not None:
 49 |         log.setLevel(logging.DEBUG)
 50 |     else:
 51 |         log.setLevel(logging.INFO)
 52 |     log_format = prefix + "%(asctime)s - %(levelname)s - %(message)s"
 53 |     formatter = logging.Formatter(log_format)
 54 | 
 55 |     h = logging.StreamHandler()
 56 |     h.setFormatter(formatter)
 57 |     log.addHandler(h)
 58 | 
 59 |     h2 = logging.FileHandler("/var/tmp/launch_smf_host.%s.log" % prefix)
 60 |     h2.setFormatter(formatter)
 61 |     log.addHandler(h2)
 62 | 
 63 |     log.info("starting log")
 64 |     return log
 65 | 
 66 | 
 67 | def usage(msg):
 68 |     print(msg)
 69 |     print(
 70 |         "usage: python launch_smf_host.py"
 71 |         "--top top-directory "
 72 |         "[ --substitute-top synonym-directory ]"
 73 |         "[ --as-host as-host-name ] "
 74 |     )
 75 |     sys.exit(NOTOK)
 76 | 
 77 | 
 78 | # parse command line
 79 | 
 80 | if len(sys.argv) < 3:
 81 |     usage("required command line arguments missing")
 82 | 
 83 | substitute_dir = None
 84 | top_dir = None
 85 | as_host = smallfile.get_hostname(None)
 86 | j = 1
 87 | while j < len(sys.argv):
 88 |     if len(sys.argv) == j + 1:
 89 |         usage("every parameter name must have a value")
 90 |     nm = sys.argv[j]
 91 |     if len(nm) < 3:
 92 |         usage("parameter name must be > 3 characters long and start with --")
 93 |     nm = nm[2:]
 94 |     val = sys.argv[j + 1]
 95 |     j += 2
 96 |     if nm == "substitute-top":
 97 |         substitute_dir = val
 98 |     elif nm == "top":
 99 |         top_dir = val
100 |     elif nm == "as-host":
101 |         as_host = val
102 |     else:
103 |         usage("unrecognized parameter --%s" % nm)
104 | if not top_dir:
105 |     usage("you must specify --top directory")
106 | log = start_log(prefix=as_host)
107 | log.info(
108 |     "substitute-top %s, top directory %s, as-host %s"
109 |     % (substitute_dir, top_dir, as_host)
110 | )
111 | 
112 | # look for launch files, read smallfile_remote.py command from them,
113 | # and execute, substituting --shared directory for --top directory,
114 | # to allow samba to work with Linux test driver
115 | 
116 | network_shared_path = os.path.join(top_dir, "network_shared")
117 | 
118 | launch_fn = os.path.join(network_shared_path, as_host) + ".smf_launch"
119 | if os.path.exists(launch_fn):  # avoid left-over launch files
120 |     os.unlink(launch_fn)
121 | log.info("launch filename " + launch_fn)
122 | 
123 | shutdown_fn = os.path.join(network_shared_path, "shutdown_launchers.tmp")
124 | log.info("daemon shutdown filename " + shutdown_fn)
125 | while True:
126 |     try:
127 |         with open(launch_fn, "r") as f:
128 |             cmd = f.readline().strip()
129 |         os.unlink(launch_fn)
130 |         if substitute_dir is not None:
131 |             cmd = cmd.replace(substitute_dir, top_dir)
132 |         log.debug("spawning cmd: %s" % cmd)
133 |         rc = os.system(cmd)
134 |         if rc != OK:
135 |             log.debug("ERROR: return code %d for cmd %s" % (rc, cmd))
136 |     except IOError as e:
137 |         if e.errno != errno.ENOENT:
138 |             raise e
139 |     finally:
140 |         if os.path.exists(shutdown_fn):  # avoid left-over launch files
141 |             log.info("saw daemon shutdown file %s, exiting" % shutdown_fn)
142 |             sys.exit(0)
143 |         time.sleep(1)
144 | 


--------------------------------------------------------------------------------
/launcher_thread.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | """
 4 | launcher_thread.py
 5 | 
 6 | manages parallel execution of shell commands on remote hosts
 7 | it assumes there is a poller on each remote host, launch_smf_host.py,
 8 | it waits for files of form '*.smf_launch' in the shared directory
 9 | and when it finds one,
10 | it reads in the command to start the worker from it and launches it.
11 | This takes the place of an sshd thread launching it.
12 | Copyright 2012 -- Ben England
13 | Licensed under the Apache License at http://www.apache.org/licenses/LICENSE-2.0
14 | See Appendix on this page for instructions pertaining to license.
15 | """
16 | 
17 | import os
18 | import threading
19 | import time
20 | 
21 | from smallfile import ensure_deleted
22 | from sync_files import write_sync_file
23 | 
24 | # this class is just used to create a python thread
25 | # for each remote host that we want to use as a workload generator
26 | # the thread just saves a command in a shared directory
27 | # for the remote host or container to run,
28 | # then waits for the result to appear in the same shared directory
29 | 
30 | 
31 | class launcher_thread(threading.Thread):
32 |     def __init__(self, prm, remote_host, remote_cmd_in):
33 |         threading.Thread.__init__(self)
34 |         self.prm = prm  # test parameters
35 |         self.remote_host = remote_host
36 |         self.remote_cmd = remote_cmd_in
37 |         self.status = None
38 | 
39 |     def run(self):
40 |         master_invoke = self.prm.master_invoke
41 |         launch_fn = (
42 |             os.path.join(master_invoke.network_dir, self.remote_host) + ".smf_launch"
43 |         )
44 |         pickle_fn = master_invoke.host_result_filename(self.remote_host)
45 |         abortfn = master_invoke.abort_fn()
46 |         ensure_deleted(launch_fn)
47 |         ensure_deleted(pickle_fn)
48 |         if self.prm.master_invoke.verbose:
49 |             print("wrote command %s to launch file %s" % (self.remote_cmd, launch_fn))
50 |         write_sync_file(launch_fn, self.remote_cmd)
51 |         pickle_fn = master_invoke.host_result_filename(self.remote_host)
52 |         # print('waiting for pickle file %s'%pickle_fn)
53 |         self.status = master_invoke.NOTOK  # premature exit means failure
54 |         while not os.path.exists(pickle_fn):
55 |             # print('%s not seen'%pickle_fn)
56 |             if os.path.exists(abortfn):
57 |                 if master_invoke.verbose:
58 |                     print("test abort seen by host " + self.remote_host)
59 |                 return
60 |             time.sleep(1.0)
61 |         self.status = master_invoke.OK  # success!
62 | 


--------------------------------------------------------------------------------
/multi_thread_workload.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import copy
  3 | import os
  4 | import random
  5 | import sys
  6 | import time
  7 | 
  8 | import invoke_process
  9 | import output_results
 10 | import smallfile
 11 | from smallfile import NOTOK, OK, SMFResultException, SMFRunException, abort_test
 12 | from sync_files import (
 13 |     ensure_deleted,
 14 |     ensure_dir_exists,
 15 |     touch,
 16 |     write_pickle,
 17 |     write_sync_file,
 18 | )
 19 | 
 20 | 
 21 | def create_worker_list(prm):
 22 |     # for each thread set up SmallfileWorkload instance,
 23 |     # create a thread instance, and delete the thread-ready file
 24 | 
 25 |     thread_list = []
 26 |     for k in range(0, prm.thread_count):
 27 |         nextinv = copy.copy(prm.master_invoke)
 28 |         nextinv.tid = "%02d" % k
 29 |         if not prm.master_invoke.is_shared_dir:
 30 |             nextinv.src_dirs = [
 31 |                 d + os.sep + prm.master_invoke.onhost + os.sep + "thrd_" + nextinv.tid
 32 |                 for d in nextinv.src_dirs
 33 |             ]
 34 |             nextinv.dest_dirs = [
 35 |                 d + os.sep + prm.master_invoke.onhost + os.sep + "thrd_" + nextinv.tid
 36 |                 for d in nextinv.dest_dirs
 37 |             ]
 38 |         t = invoke_process.subprocess(nextinv)
 39 |         thread_list.append(t)
 40 |         ensure_deleted(nextinv.gen_thread_ready_fname(nextinv.tid))
 41 |     return thread_list
 42 | 
 43 | 
 44 | # what follows is code that gets done on each host
 45 | 
 46 | 
 47 | def run_multi_thread_workload(prm):
 48 |     master_invoke = prm.master_invoke
 49 |     prm_slave = prm.is_slave
 50 |     verbose = master_invoke.verbose
 51 |     host = master_invoke.onhost
 52 | 
 53 |     if not prm_slave:
 54 |         master_invoke.create_top_dirs(False)
 55 | 
 56 |     if prm_slave:
 57 |         time.sleep(1.1)
 58 |         for d in master_invoke.top_dirs:
 59 |             ensure_dir_exists(d)
 60 |         for dlist in [master_invoke.src_dirs, master_invoke.dest_dirs]:
 61 |             for d in dlist:
 62 |                 ensure_dir_exists(d)
 63 |                 if verbose:
 64 |                     print(host + " saw " + str(d))
 65 | 
 66 |     # for each thread set up SmallfileWorkload instance,
 67 |     # create a thread instance, and delete the thread-ready file
 68 | 
 69 |     thread_list = create_worker_list(prm)
 70 |     my_host_invoke = thread_list[0].invoke
 71 | 
 72 |     # start threads, wait for them to reach starting gate
 73 |     # to do this, look for thread-ready files
 74 | 
 75 |     for t in thread_list:
 76 |         ensure_deleted(t.invoke.gen_thread_ready_fname(t.invoke.tid))
 77 |     for t in thread_list:
 78 |         t.start()
 79 |     if verbose:
 80 |         print("started %d worker threads on host %s" % (len(thread_list), host))
 81 | 
 82 |     # wait for all threads to reach the starting gate
 83 |     # this makes it more likely that they will start simultaneously
 84 | 
 85 |     startup_timeout = prm.startup_timeout
 86 |     if smallfile.is_windows_os:
 87 |         print("adding time for Windows synchronization")
 88 |         startup_timeout += 30
 89 |     abort_fname = my_host_invoke.abort_fn()
 90 |     thread_count = len(thread_list)
 91 |     thread_to_wait_for = 0
 92 |     for sec in range(0, startup_timeout * 2):
 93 |         for k in range(thread_to_wait_for, thread_count):
 94 |             t = thread_list[k]
 95 |             fn = t.invoke.gen_thread_ready_fname(t.invoke.tid)
 96 |             if not os.path.exists(fn):
 97 |                 if verbose:
 98 |                     print("thread %d thread-ready file %s not found..." % (k, fn))
 99 |                 break
100 |             thread_to_wait_for = k + 1
101 |         if thread_to_wait_for == thread_count:
102 |             break
103 |         if os.path.exists(abort_fname):
104 |             break
105 |         time.sleep(0.5)
106 | 
107 |     # if all threads didn't make it to the starting gate
108 | 
109 |     if thread_to_wait_for < thread_count:
110 |         abort_test(abort_fname, thread_list)
111 |         raise SMFRunException(
112 |             "only %d threads reached starting gate within %d sec"
113 |             % (thread_to_wait_for, startup_timeout)
114 |         )
115 | 
116 |     # declare that this host is at the starting gate
117 | 
118 |     if prm_slave:
119 |         host_ready_fn = my_host_invoke.gen_host_ready_fname()
120 |         if my_host_invoke.verbose:
121 |             print(
122 |                 "host %s creating ready file %s"
123 |                 % (my_host_invoke.onhost, host_ready_fn)
124 |             )
125 |         touch(host_ready_fn)
126 | 
127 |     sg = my_host_invoke.starting_gate
128 |     if not prm_slave:  # special case of no --host-set parameter
129 |         try:
130 |             write_sync_file(sg, "hi there")
131 |             if verbose:
132 |                 print("wrote starting gate file")
133 |         except IOError as e:
134 |             print("error writing starting gate for threads: %s" % str(e))
135 |         prm.test_start_time = time.time()
136 | 
137 |     # wait for starting_gate file to be created by test driver
138 |     # every second we resume scan from last host file not found
139 | 
140 |     if verbose:
141 |         print("awaiting " + sg)
142 |     if prm_slave:
143 |         for sec in range(0, prm.host_startup_timeout + 10):
144 |             # hack to ensure that directory is up to date
145 |             #   ndlist = os.listdir(my_host_invoke.network_dir)
146 |             # if verbose: print(str(ndlist))
147 |             if os.path.exists(sg):
148 |                 break
149 |             time.sleep(0.5)
150 |         if not os.path.exists(sg):
151 |             abort_test(my_host_invoke.abort_fn(), thread_list)
152 |             raise SMFRunException(
153 |                 "starting signal not seen within %d seconds" % prm.host_startup_timeout
154 |             )
155 |     if verbose:
156 |         print("starting test on host " + host + " in 2 seconds")
157 |     time.sleep(2 + random.random())  # let other hosts see starting gate file
158 | 
159 |     # FIXME: don't timeout the test,
160 |     # instead check thread progress and abort if you see any of them stalled
161 |     # but if servers are heavily loaded you can't rely on filesystem
162 | 
163 |     # wait for all threads on this host to finish
164 | 
165 |     for t in thread_list:
166 |         if verbose:
167 |             print("waiting for thread %s" % t.invoke.tid)
168 |         t.invoke = t.receiver.recv()  # to get results from sub-process
169 |         t.join()
170 | 
171 |     # if not a slave of some other host, print results (for this host)
172 | 
173 |     exit_status = OK
174 |     if not prm_slave:
175 |         try:
176 |             invoke_list = [t.invoke for t in thread_list]
177 |             output_results.output_results(invoke_list, prm)
178 |         except SMFResultException as e:
179 |             print("ERROR: " + str(e))
180 |             exit_status = NOTOK
181 |     else:
182 |         # if we are participating in a multi-host test
183 |         # then write out this host's result in pickle format
184 |         # so test driver can pick up result
185 | 
186 |         result_filename = master_invoke.host_result_filename(prm.as_host)
187 |         if verbose:
188 |             print("writing invokes to: " + result_filename)
189 |         invok_list = [t.invoke for t in thread_list]
190 |         if verbose:
191 |             print("saving result to filename %s" % result_filename)
192 |         for ivk in invok_list:
193 |             ivk.buf = None
194 |             ivk.biggest_buf = None
195 |         write_pickle(result_filename, invok_list)
196 |         time.sleep(1.2)  # for benefit of NFS with actimeo=1
197 | 
198 |     sys.exit(exit_status)
199 | 


--------------------------------------------------------------------------------
/output_results.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import json
  4 | import os
  5 | import time
  6 | from copy import deepcopy
  7 | 
  8 | import smallfile
  9 | from smallfile import KB_PER_GB, OK, SMFResultException
 10 | 
 11 | BYTES_PER_KiB = 1024.0
 12 | KiB_PER_MiB = 1024.0
 13 | 
 14 | 
 15 | class result_stats:
 16 |     # start with zeroing because we'll add
 17 |     # other objects of this type to it
 18 | 
 19 |     def __init__(self):
 20 |         self.status = OK
 21 |         self.elapsed = 0.0
 22 |         self.files = 0
 23 |         self.records = 0
 24 |         self.files_per_sec = 0.0
 25 |         self.IOPS = 0.0
 26 |         self.MiBps = 0.0
 27 | 
 28 |     def get_from_invoke(self, invk, record_sz_kb):
 29 |         if invk.elapsed_time is None:
 30 |             print(
 31 |                 "WARNING: thread %s on host %s never completed"
 32 |                 % (invk.tid, invk.onhost)
 33 |             )
 34 |         self.status = invk.status
 35 |         self.elapsed = (
 36 |             invk.elapsed_time if invk.elapsed_time is not None else 100000000.0
 37 |         )
 38 |         self.files = invk.filenum_final if invk.filenum_final is not None else 0
 39 |         self.records = invk.rq_final if invk.rq_final is not None else 0
 40 |         if invk.elapsed_time is not None and invk.elapsed_time > 0.0:
 41 |             self.files_per_sec = invk.filenum_final / invk.elapsed_time
 42 |             if invk.rq_final > 0:
 43 |                 self.IOPS = invk.rq_final / invk.elapsed_time
 44 |                 self.MiBps = (
 45 |                     invk.rq_final * record_sz_kb / KiB_PER_MiB
 46 |                 ) / invk.elapsed_time
 47 | 
 48 |     # add component's fields to this object
 49 | 
 50 |     def add_to(self, component):
 51 |         # status is not ok if any component's status is not ok
 52 |         if self.status == OK:
 53 |             self.status = component.status
 54 |         # elapsed time is max of any component's elapsed time
 55 |         self.elapsed = max(self.elapsed, component.elapsed)
 56 |         self.files += component.files
 57 |         self.records += component.records
 58 |         if component.elapsed > 0.0:
 59 |             self.files_per_sec += component.files_per_sec
 60 |             try:
 61 |                 self.IOPS += component.IOPS
 62 |                 self.MiBps += component.MiBps
 63 |             except KeyError:
 64 |                 pass
 65 | 
 66 |     # insert values into dictionary
 67 | 
 68 |     def add_to_dict(self, target):
 69 |         if self.status != OK:
 70 |             target["status"] = os.strerror(self.status)
 71 |         target["elapsed"] = self.elapsed
 72 |         target["files"] = self.files
 73 |         target["records"] = self.records
 74 |         target["filesPerSec"] = self.files_per_sec
 75 |         if self.records > 0:
 76 |             target["IOPS"] = self.IOPS
 77 |             target["MiBps"] = self.MiBps
 78 | 
 79 | 
 80 | def output_results(invoke_list, test_params):
 81 |     if len(invoke_list) < 1:
 82 |         raise SMFResultException("no pickled invokes read, so no results")
 83 |     my_host_invoke = invoke_list[0]  # pick a representative one
 84 |     rszkb = my_host_invoke.record_sz_kb
 85 |     if rszkb == 0:
 86 |         rszkb = my_host_invoke.total_sz_kb
 87 |     if rszkb * my_host_invoke.BYTES_PER_KB > my_host_invoke.biggest_buf_size:
 88 |         rszkb = my_host_invoke.biggest_buf_size / my_host_invoke.BYTES_PER_KB
 89 | 
 90 |     rslt = {}
 91 |     rslt["host"] = {}
 92 |     stats_by_host = {}
 93 |     cluster = stats_by_host["stats"] = result_stats()
 94 | 
 95 |     for invk in invoke_list:  # for each parallel SmallfileWorkload
 96 |         # add up work that it did
 97 |         # and determine time interval over which test ran
 98 | 
 99 |         if not isinstance(invk, smallfile.SmallfileWorkload):
100 |             raise SMFResultException("invoke is of wrong type: %s" % str(invk))
101 |         if invk.status:
102 |             status = "ERR: " + os.strerror(invk.status)
103 |         else:
104 |             status = "ok"
105 |         fmt = "host = %s,thr = %s,elapsed = %s"
106 |         fmt += ",files = %s,records = %s,status = %s"
107 |         print(
108 |             fmt
109 |             % (
110 |                 invk.onhost,
111 |                 invk.tid,
112 |                 str(invk.elapsed_time),
113 |                 str(invk.filenum_final),
114 |                 str(invk.rq_final),
115 |                 status,
116 |             )
117 |         )
118 | 
119 |         per_thread = result_stats()
120 |         per_thread.get_from_invoke(invk, rszkb)
121 | 
122 |         # for JSON, show nesting of threads within hosts
123 | 
124 |         try:
125 |             per_host = stats_by_host[invk.onhost]
126 |         except KeyError:
127 |             # first time this host was seen
128 |             stats_by_host[invk.onhost] = per_host = {}
129 |             per_host["thread"] = {}
130 |             per_host["stats"] = result_stats()
131 |         per_host["thread"][invk.tid] = per_thread
132 |         per_host["stats"].add_to(per_thread)
133 |         cluster.add_to(per_thread)
134 | 
135 |     # now counters are all added up, generate JSON
136 | 
137 |     for invk in invoke_list:  # for each parallel SmallfileWorkload
138 |         per_host = stats_by_host[invk.onhost]
139 |         try:
140 |             per_host_json = rslt["host"][invk.onhost]
141 |         except KeyError:
142 |             rslt["host"][invk.onhost] = per_host_json = {}
143 |             per_host["stats"].add_to_dict(per_host_json)
144 |             per_host_json["thread"] = {}
145 |         per_host_json["thread"][invk.tid] = per_thread_json = {}
146 |         per_thread = per_host["thread"][invk.tid]
147 |         per_thread.add_to_dict(per_thread_json)
148 | 
149 |     cluster.add_to_dict(rslt)
150 | 
151 |     # if there is only 1 host in results,
152 |     # and no host was specified,
153 |     # then remove that level from
154 |     # result hierarchy, not needed
155 | 
156 |     if len(rslt["host"].keys()) == 1 and test_params.host_set is None:
157 |         hostkey = list(rslt["host"].keys())[0]
158 |         threads_in_host = rslt["host"][hostkey]["thread"]
159 |         rslt["thread"] = threads_in_host
160 |         del rslt["host"]
161 | 
162 |     print("total threads = %d" % len(invoke_list))
163 |     rslt["totalhreads"] = len(invoke_list)
164 | 
165 |     print("total files = %d" % cluster.files)
166 | 
167 |     if cluster.records > 0:
168 |         print("total IOPS = %d" % cluster.IOPS)
169 |         total_data_gb = cluster.records * rszkb * 1.0 / KB_PER_GB
170 |         print("total data = %9.3f GiB" % total_data_gb)
171 |         rslt["totalDataGB"] = total_data_gb
172 | 
173 |     if not test_params.host_set:
174 |         test_params.host_set = ["localhost"]
175 |     json_test_params = deepcopy(test_params)
176 |     json_test_params.host_set = ",".join(test_params.host_set)
177 | 
178 |     if len(invoke_list) < len(test_params.host_set) * test_params.thread_count:
179 |         print("WARNING: failed to get some responses from workload generators")
180 |     max_files = my_host_invoke.iterations * len(invoke_list)
181 |     pct_files = 100.0 * cluster.files / max_files
182 |     print(
183 |         "%6.2f%% of requested files processed, warning threshold is %6.2f%%"
184 |         % (pct_files, smallfile.pct_files_min)
185 |     )
186 |     rslt["pctFilesDone"] = pct_files
187 | 
188 |     print("elapsed time = %9.3f" % cluster.elapsed)
189 |     rslt["startTime"] = test_params.test_start_time
190 |     rslt["status"] = os.strerror(cluster.status)
191 | 
192 |     # output start time in elasticsearch-friendly format
193 | 
194 |     rslt["date"] = time.strftime(
195 |         "%Y-%m-%dT%H:%M:%S.000Z", time.gmtime(test_params.test_start_time)
196 |     )
197 | 
198 |     # don't output meaningless fields
199 | 
200 |     if cluster.elapsed < 0.001:  # can't compute rates if it ended too quickly
201 |         print("WARNING: test must run longer than a millisecond")
202 |     else:
203 |         print("files/sec = %f" % cluster.files_per_sec)
204 |         if cluster.records > 0:
205 |             print("IOPS = %f" % cluster.IOPS)
206 |             print("MiB/sec = %f" % cluster.MiBps)
207 | 
208 |     # if JSON output requested, generate it here
209 | 
210 |     if test_params.output_json:
211 |         json_obj = json_test_params.to_json()
212 |         json_obj["results"] = rslt
213 |         with open(test_params.output_json, "w") as jsonf:
214 |             json.dump(json_obj, jsonf, indent=4)
215 | 
216 |     # finally, throw exceptions if something bad happened
217 |     # wait until here to do it so we can see test results
218 | 
219 |     if cluster.status != OK:
220 |         print("WARNING: at least one thread encountered error, test may be incomplete")
221 |     elif pct_files < smallfile.pct_files_min:
222 |         print(
223 |             "WARNING: not enough total files processed before 1st thread finished, change test parameters"
224 |         )
225 | 


--------------------------------------------------------------------------------
/parse.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | """
  4 | parse.py -- parses CLI commands for smallfile_cli.py
  5 | 
  6 | Copyright 2012 -- Ben England
  7 | Licensed under the Apache License at http://www.apache.org/licenses/LICENSE-2.0
  8 | See Appendix on this page for instructions pertaining to license.
  9 | """
 10 | 
 11 | import argparse
 12 | import os
 13 | 
 14 | import smallfile
 15 | import smf_test_params
 16 | from parser_data_types import (
 17 |     SmfParseException,
 18 |     boolean,
 19 |     directory_list,
 20 |     file_size_distrib,
 21 |     host_set,
 22 |     non_negative_integer,
 23 |     positive_integer,
 24 | )
 25 | from smallfile import SmallfileWorkload
 26 | 
 27 | yaml_parser_installed = False
 28 | try:
 29 |     import yaml_parser
 30 | 
 31 |     yaml_parser_installed = True
 32 | except ImportError:
 33 |     pass
 34 | 
 35 | # parse command line
 36 | # return smf_test_params.smf_test_params instance
 37 | # defining all test parameters.
 38 | # default does short test in /var/tmp so you can see the program run
 39 | 
 40 | 
 41 | def parse():
 42 |     # store as much as you can in SmallfileWorkload object
 43 |     # so per-thread invocations inherit
 44 | 
 45 |     test_params = smf_test_params.smf_test_params()
 46 |     inv = test_params.master_invoke  # for convenience
 47 | 
 48 |     parser = argparse.ArgumentParser(description="parse smallfile CLI parameters")
 49 |     add = parser.add_argument
 50 |     add("--yaml-input-file", help="input YAML file containing all parameters below")
 51 |     add(
 52 |         "--output-json",
 53 |         default=test_params.output_json,
 54 |         help="if true then output JSON-format version of results",
 55 |     )
 56 |     add(
 57 |         "--response-times",
 58 |         type=boolean,
 59 |         default=inv.measure_rsptimes,
 60 |         help="if true then record response time of each file op",
 61 |     )
 62 |     add(
 63 |         "--network-sync-dir",
 64 |         help="if --top not shared filesystem, provide shared filesystem directory",
 65 |     )
 66 |     add(
 67 |         "--operation",
 68 |         default="cleanup",
 69 |         choices=SmallfileWorkload.all_op_names,
 70 |         help="type of operation to perform on each file",
 71 |     )
 72 |     add(
 73 |         "--top",
 74 |         type=directory_list,
 75 |         default=inv.top_dirs,
 76 |         help="top directory or directories used by smallfile",
 77 |     )
 78 |     add(
 79 |         "--host-set",
 80 |         type=host_set,
 81 |         default=test_params.host_set,
 82 |         help="list of workload generator hosts (or file containing it) ",
 83 |     )
 84 |     add(
 85 |         "--launch-by-daemon",
 86 |         type=boolean,
 87 |         default=test_params.launch_by_daemon,
 88 |         help="use non-ssh launcher to get test running",
 89 |     )
 90 |     add(
 91 |         "--files",
 92 |         type=positive_integer,
 93 |         default=inv.iterations,
 94 |         help="files processed per thread",
 95 |     )
 96 |     add(
 97 |         "--threads",
 98 |         type=positive_integer,
 99 |         default=test_params.thread_count,
100 |         help="threads per client",
101 |     )
102 |     add(
103 |         "--files-per-dir",
104 |         type=positive_integer,
105 |         default=inv.files_per_dir,
106 |         help="files per (sub)directory",
107 |     )
108 |     add(
109 |         "--dirs-per-dir",
110 |         type=positive_integer,
111 |         default=inv.dirs_per_dir,
112 |         help="subdirectories per directory",
113 |     )
114 |     add(
115 |         "--record-size",
116 |         type=positive_integer,
117 |         default=inv.record_sz_kb,
118 |         help="record size (KB)",
119 |     )
120 |     add(
121 |         "--file-size",
122 |         type=non_negative_integer,
123 |         default=inv.total_sz_kb,
124 |         help="subdirectories per directory",
125 |     )
126 |     add(
127 |         "--file-size-distribution",
128 |         type=file_size_distrib,
129 |         default=inv.filesize_distr,
130 |         help='file size can be constant ("fixed") or random ("exponential")',
131 |     )
132 |     add(
133 |         "--fsync",
134 |         type=boolean,
135 |         default=inv.fsync,
136 |         help="call fsync() after each file is written/modified",
137 |     )
138 |     add(
139 |         "--xattr-size",
140 |         type=non_negative_integer,
141 |         default=inv.xattr_size,
142 |         help="extended attribute size (bytes)",
143 |     )
144 |     add(
145 |         "--xattr-count",
146 |         type=non_negative_integer,
147 |         default=inv.xattr_count,
148 |         help="number of extended attributes per file",
149 |     )
150 |     add(
151 |         "--pause",
152 |         type=non_negative_integer,
153 |         default=inv.pause_between_files,
154 |         help="pause between each file (microsec)",
155 |     )
156 |     add(
157 |         "--auto-pause",
158 |         type=boolean,
159 |         default=inv.auto_pause,
160 |         help="adjust pause between files automatically based on response times",
161 |     )
162 |     add(
163 |         "--cleanup-delay-usec-per-file",
164 |         type=non_negative_integer,
165 |         default=inv.cleanup_delay_usec_per_file,
166 |         help="time to delay after cleanup per file (microsec)",
167 |     )
168 |     add(
169 |         "--stonewall",
170 |         type=boolean,
171 |         default=inv.stonewall,
172 |         help="stop measuring as soon as first thread is done",
173 |     )
174 |     add(
175 |         "--finish",
176 |         type=boolean,
177 |         default=inv.finish_all_rq,
178 |         help="stop processing files as soon as first thread is done",
179 |     )
180 |     add("--prefix", default=inv.prefix, help="filename prefix")
181 |     add("--suffix", default=inv.suffix, help="filename suffix")
182 |     add(
183 |         "--hash-into-dirs",
184 |         type=boolean,
185 |         default=inv.hash_to_dir,
186 |         help="if true then pseudo-randomly place files into directories",
187 |     )
188 |     add(
189 |         "--same-dir",
190 |         type=boolean,
191 |         default=inv.is_shared_dir,
192 |         help="if true then all threads share the same directories",
193 |     )
194 |     add(
195 |         "--verbose",
196 |         type=boolean,
197 |         default=inv.verbose,
198 |         help="if true then log extra messages about test",
199 |     )
200 |     add(
201 |         "--permute-host-dirs",
202 |         type=boolean,
203 |         default=test_params.permute_host_dirs,
204 |         help="if true then shift clients to different host directories",
205 |     )
206 |     add(
207 |         "--record-ctime-size",
208 |         type=boolean,
209 |         default=inv.record_ctime_size,
210 |         help="if true then update file xattr with ctime+size",
211 |     )
212 |     add(
213 |         "--verify-read",
214 |         type=boolean,
215 |         default=inv.verify_read,
216 |         help="if true then check that data read = data written",
217 |     )
218 |     add(
219 |         "--incompressible",
220 |         type=boolean,
221 |         default=inv.incompressible,
222 |         help="if true then non-compressible data written",
223 |     )
224 | 
225 |     # these parameters shouldn't be used by mere mortals
226 |     add(
227 |         "--min-dirs-per-sec",
228 |         type=positive_integer,
229 |         default=test_params.min_directories_per_sec,
230 |         help=argparse.SUPPRESS,
231 |     )
232 |     add(
233 |         "--log-to-stderr",
234 |         type=boolean,
235 |         default=inv.log_to_stderr,
236 |         help=argparse.SUPPRESS,
237 |     )
238 |     add("--remote-pgm-dir", default=test_params.remote_pgm_dir, help=argparse.SUPPRESS)
239 |     add("--slave", help=argparse.SUPPRESS)
240 |     add("--as-host", help=argparse.SUPPRESS)
241 |     add(
242 |         "--host-count",
243 |         type=positive_integer,
244 |         default=0,
245 |         help="total number of hosts/pods participating in smallfile test",
246 |     )
247 | 
248 |     args = parser.parse_args()
249 | 
250 |     inv.opname = args.operation
251 |     test_params.top_dirs = [os.path.abspath(p) for p in args.top]
252 |     test_params.launch_by_daemon = args.launch_by_daemon
253 |     inv.iterations = args.files
254 |     test_params.thread_count = inv.threads = args.threads
255 |     inv.files_per_dir = args.files_per_dir
256 |     inv.dirs_per_dir = args.dirs_per_dir
257 |     inv.record_sz_kb = args.record_size
258 |     inv.total_sz_kb = args.file_size
259 |     test_params.size_distribution = inv.filesize_distr = args.file_size_distribution
260 |     inv.xattr_size = args.xattr_size
261 |     inv.xattr_count = args.xattr_count
262 |     inv.prefix = args.prefix
263 |     inv.suffix = args.suffix
264 |     inv.hash_to_dir = args.hash_into_dirs
265 |     inv.pause_between_files = args.pause
266 |     inv.auto_pause = args.auto_pause
267 |     test_params.cleanup_delay_usec_per_file = (
268 |         inv.cleanup_delay_usec_per_file
269 |     ) = args.cleanup_delay_usec_per_file
270 |     inv.stonewall = args.stonewall
271 |     inv.finish_all_rq = args.finish
272 |     inv.measure_rsptimes = args.response_times
273 |     inv.fsync = args.fsync
274 |     inv.record_ctime_size = args.record_ctime_size
275 |     test_params.permute_host_dirs = args.permute_host_dirs
276 |     test_params.output_json = args.output_json
277 |     inv.incompressible = args.incompressible
278 |     inv.verify_read = args.verify_read
279 |     test_params.min_directories_per_sec = args.min_dirs_per_sec
280 |     inv.is_shared_dir = args.same_dir
281 |     inv.verbose = args.verbose
282 |     inv.log_to_stderr = args.log_to_stderr
283 |     test_params.remote_pgm_dir = args.remote_pgm_dir
284 |     test_params.network_sync_dir = args.network_sync_dir
285 |     test_params.is_slave = args.slave
286 |     inv.onhost = smallfile.get_hostname(args.as_host)
287 |     test_params.host_set = args.host_set
288 |     inv.total_hosts = args.host_count
289 | 
290 |     # if YAML input was used, update test_params object with this
291 |     # YAML parameters override CLI parameters
292 | 
293 |     if args.yaml_input_file:
294 |         if not yaml_parser_installed:
295 |             raise SmfParseException("python yaml module not available - is this PyPy?")
296 |         yaml_parser.parse_yaml(test_params, args.yaml_input_file)
297 | 
298 |     # total_hosts is a parameter that allows pod workloads to know
299 |     # how many other pods are doing the same thing
300 | 
301 |     if inv.total_hosts == 0:
302 |         if test_params.host_set is not None:
303 |             inv.total_hosts = len(test_params.host_set)
304 |         else:
305 |             inv.total_hosts = 1
306 | 
307 |     # network_sync_dir is where python processes share state
308 | 
309 |     if not test_params.network_sync_dir:
310 |         test_params.network_sync_dir = os.path.join(
311 |             test_params.top_dirs[0], "network_shared"
312 |         )
313 | 
314 |     # validate parameters further now that we know what they all are
315 | 
316 |     sdmsg = "directory %s containing network sync dir. must exist on all hosts (including this one)"
317 |     parentdir = os.path.dirname(test_params.network_sync_dir)
318 |     if not os.path.isdir(parentdir) and args.host_set is not None:
319 |         raise SmfParseException(sdmsg % parentdir)
320 | 
321 |     if inv.record_sz_kb > inv.total_sz_kb and inv.total_sz_kb != 0:
322 |         raise SmfParseException("record size cannot exceed file size")
323 | 
324 |     if inv.record_sz_kb == 0 and inv.verbose:
325 |         print(
326 |             "record size not specified,large files will default to record size %d KB"
327 |             % (SmallfileWorkload.biggest_buf_size / inv.BYTES_PER_KB)
328 |         )
329 | 
330 |     if test_params.top_dirs:
331 |         for d in test_params.top_dirs:
332 |             if len(d) < 6:
333 |                 raise SmfParseException(
334 |                     "directory less than 6 characters, cannot use top of filesystem, too dangerous"
335 |                 )
336 |             if not os.path.isdir(d) and test_params.network_sync_dir is not None:
337 |                 raise SmfParseException(
338 |                     "you must ensure that shared directory "
339 |                     + d
340 |                     + " is accessible from this host and every remote host in test"
341 |                 )
342 |     if test_params.top_dirs:
343 |         inv.set_top(test_params.top_dirs)
344 |     else:
345 |         test_params.top_dirs = inv.top_dirs
346 | 
347 |     if test_params.network_sync_dir:
348 |         inv.network_dir = test_params.network_sync_dir
349 |     else:
350 |         test_params.network_sync_dir = inv.network_dir
351 |     inv.starting_gate = os.path.join(inv.network_dir, "starting_gate.tmp")
352 | 
353 |     if inv.iterations < 10:
354 |         inv.stonewall = False
355 | 
356 |     if inv.opname == "cleanup" and (inv.auto_pause or (inv.pause_between_files > 0)):
357 |         inv.auto_pause = False
358 |         inv.pause_between_files = 0
359 |         print("do not need pause between files during cleanup")
360 |     if inv.total_hosts * inv.threads == 1:
361 |         inv.auto_pause = False
362 |         inv.pause_between_files = 0
363 |         print("do not need pause between files for single-threaded workload")
364 |     if inv.auto_pause and inv.pause_between_files > 0:
365 |         inv.pause_between_files = 0
366 |         print("pause parameter not needed with auto-pause Y, setting pause to 0")
367 | 
368 |     # create must finish all files so that subsequent ops have the files they need
369 |     # cleanup must finish all files so that all remnants of last test are removed
370 | 
371 |     if (
372 |         ["cleanup", "create", "mkdir"].__contains__(inv.opname)
373 |     ) and not inv.finish_all_rq:
374 |         print("changing --finish to true for op type %s" % inv.opname)
375 |         inv.finish_all_rq = True
376 | 
377 |     if not test_params.is_slave:
378 |         prm_list = test_params.human_readable()
379 |         for prm_name, prm_value in prm_list:
380 |             print("%40s : %s" % (prm_name, prm_value))
381 | 
382 |     inv.reset()
383 |     test_params.recalculate_timeouts()
384 |     return test_params
385 | 


--------------------------------------------------------------------------------
/parse_slave.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | """
 4 | parse_slave.py -- parses SSH cmd for invocation of smallfile_remote.py
 5 | 
 6 | Copyright 2012 -- Ben England
 7 | Licensed under the Apache License at http://www.apache.org/licenses/LICENSE-2.0
 8 | See Appendix on this page for instructions pertaining to license.
 9 | """
10 | 
11 | import argparse
12 | import os
13 | import pickle
14 | import time
15 | 
16 | import smallfile
17 | 
18 | 
19 | def parse():
20 |     """
21 |     parse command line and return unpickled test params
22 | 
23 |     pass via --network-sync-dir option
24 |     optionally pass host identity of this remote invocation
25 |     """
26 |     parser = argparse.ArgumentParser(description="parse remote smallfile parameters")
27 |     parser.add_argument(
28 |         "--network-sync-dir", help="directory used to synchronize with test driver"
29 |     )
30 |     parser.add_argument(
31 |         "--as-host",
32 |         default=smallfile.get_hostname(None),
33 |         help="directory used to synchronize with test driver",
34 |     )
35 |     args = parser.parse_args()
36 | 
37 |     param_pickle_fname = os.path.join(args.network_sync_dir, "param.pickle")
38 |     if not os.path.exists(param_pickle_fname):
39 |         time.sleep(1.1)
40 |     params = None
41 |     with open(param_pickle_fname, "rb") as pickled_params:
42 |         params = pickle.load(pickled_params)
43 |     params.is_slave = True
44 |     params.as_host = args.as_host
45 |     params.master_invoke.onhost = args.as_host
46 |     return params
47 | 


--------------------------------------------------------------------------------
/parser_data_types.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import os
 3 | 
 4 | from smallfile import SmallfileWorkload
 5 | 
 6 | TypeExc = argparse.ArgumentTypeError
 7 | 
 8 | # if we throw exceptions, do it with this
 9 | # so caller can specifically catch them
10 | 
11 | 
12 | class SmfParseException(Exception):
13 |     pass
14 | 
15 | 
16 | # the next few routines implement data types
17 | # of smallfile parameters
18 | 
19 | 
20 | def boolean(boolstr):
21 |     if isinstance(boolstr, bool):
22 |         return boolstr
23 |     b = boolstr.lower()
24 |     if b == "y" or b == "yes" or b == "t" or b == "true":
25 |         bval = True
26 |     elif b == "n" or b == "no" or b == "f" or b == "false":
27 |         bval = False
28 |     else:
29 |         raise TypeExc("boolean value must be y|yes|t|true|n|no|f|false")
30 |     return bval
31 | 
32 | 
33 | def positive_integer(posint_str):
34 |     intval = int(posint_str)
35 |     if intval <= 0:
36 |         raise TypeExc("integer value greater than zero expected")
37 |     return intval
38 | 
39 | 
40 | def non_negative_integer(nonneg_str):
41 |     intval = int(nonneg_str)
42 |     if intval < 0:
43 |         raise TypeExc("non-negative integer value expected")
44 |     return intval
45 | 
46 | 
47 | def host_set(hostname_list_str):
48 |     if os.path.isfile(hostname_list_str):
49 |         with open(hostname_list_str, "r") as f:
50 |             hostname_list = [record.strip() for record in f.readlines()]
51 |     else:
52 |         hostname_list = hostname_list_str.strip().split(",")
53 |         if len(hostname_list) < 2:
54 |             hostname_list = hostname_list_str.strip().split()
55 |         if len(hostname_list) == 0:
56 |             raise TypeExc("host list must be non-empty")
57 |     return hostname_list
58 | 
59 | 
60 | def directory_list(directory_list_str):
61 |     directory_list = directory_list_str.strip().split(",")
62 |     if len(directory_list) == 1:
63 |         directory_list = directory_list_str.strip().split()
64 |     if len(directory_list) == 0:
65 |         raise TypeExc("directory list must be non-empty")
66 |     return directory_list
67 | 
68 | 
69 | def file_size_distrib(fsdistrib_str):
70 |     # FIXME: should be a data type
71 |     if fsdistrib_str == "exponential":
72 |         return SmallfileWorkload.fsdistr_random_exponential
73 |     elif fsdistrib_str == "fixed":
74 |         return SmallfileWorkload.fsdistr_fixed
75 |     else:
76 |         # should never get here
77 |         raise TypeExc('file size distribution must be either "exponential" or "fixed"')
78 | 


--------------------------------------------------------------------------------
/poetry.lock:
--------------------------------------------------------------------------------
  1 | [[package]]
  2 | name = "cfgv"
  3 | version = "3.3.1"
  4 | description = "Validate configuration and produce human readable error messages."
  5 | category = "dev"
  6 | optional = false
  7 | python-versions = ">=3.6.1"
  8 | 
  9 | [[package]]
 10 | name = "distlib"
 11 | version = "0.3.6"
 12 | description = "Distribution utilities"
 13 | category = "dev"
 14 | optional = false
 15 | python-versions = "*"
 16 | 
 17 | [[package]]
 18 | name = "filelock"
 19 | version = "3.8.0"
 20 | description = "A platform independent file lock."
 21 | category = "dev"
 22 | optional = false
 23 | python-versions = ">=3.7"
 24 | 
 25 | [package.extras]
 26 | docs = ["furo (>=2022.6.21)", "sphinx (>=5.1.1)", "sphinx-autodoc-typehints (>=1.19.1)"]
 27 | testing = ["covdefaults (>=2.2)", "coverage (>=6.4.2)", "pytest (>=7.1.2)", "pytest-cov (>=3)", "pytest-timeout (>=2.1)"]
 28 | 
 29 | [[package]]
 30 | name = "identify"
 31 | version = "2.5.8"
 32 | description = "File identification library for Python"
 33 | category = "dev"
 34 | optional = false
 35 | python-versions = ">=3.7"
 36 | 
 37 | [package.extras]
 38 | license = ["ukkonen"]
 39 | 
 40 | [[package]]
 41 | name = "nodeenv"
 42 | version = "1.7.0"
 43 | description = "Node.js virtual environment builder"
 44 | category = "dev"
 45 | optional = false
 46 | python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*"
 47 | 
 48 | [package.dependencies]
 49 | setuptools = "*"
 50 | 
 51 | [[package]]
 52 | name = "numpy"
 53 | version = "1.23.4"
 54 | description = "NumPy is the fundamental package for array computing with Python."
 55 | category = "main"
 56 | optional = false
 57 | python-versions = ">=3.8"
 58 | 
 59 | [[package]]
 60 | name = "platformdirs"
 61 | version = "2.5.3"
 62 | description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"."
 63 | category = "dev"
 64 | optional = false
 65 | python-versions = ">=3.7"
 66 | 
 67 | [package.extras]
 68 | docs = ["furo (>=2022.9.29)", "proselint (>=0.13)", "sphinx (>=5.3)", "sphinx-autodoc-typehints (>=1.19.4)"]
 69 | test = ["appdirs (==1.4.4)", "pytest (>=7.2)", "pytest-cov (>=4)", "pytest-mock (>=3.10)"]
 70 | 
 71 | [[package]]
 72 | name = "pre-commit"
 73 | version = "2.20.0"
 74 | description = "A framework for managing and maintaining multi-language pre-commit hooks."
 75 | category = "dev"
 76 | optional = false
 77 | python-versions = ">=3.7"
 78 | 
 79 | [package.dependencies]
 80 | cfgv = ">=2.0.0"
 81 | identify = ">=1.0.0"
 82 | nodeenv = ">=0.11.1"
 83 | pyyaml = ">=5.1"
 84 | toml = "*"
 85 | virtualenv = ">=20.0.8"
 86 | 
 87 | [[package]]
 88 | name = "pyyaml"
 89 | version = "6.0"
 90 | description = "YAML parser and emitter for Python"
 91 | category = "dev"
 92 | optional = false
 93 | python-versions = ">=3.6"
 94 | 
 95 | [[package]]
 96 | name = "scipy"
 97 | version = "1.9.3"
 98 | description = "Fundamental algorithms for scientific computing in Python"
 99 | category = "main"
100 | optional = false
101 | python-versions = ">=3.8"
102 | 
103 | [package.dependencies]
104 | numpy = ">=1.18.5,<1.26.0"
105 | 
106 | [package.extras]
107 | dev = ["flake8", "mypy", "pycodestyle", "typing_extensions"]
108 | doc = ["matplotlib (>2)", "numpydoc", "pydata-sphinx-theme (==0.9.0)", "sphinx (!=4.1.0)", "sphinx-panels (>=0.5.2)", "sphinx-tabs"]
109 | test = ["asv", "gmpy2", "mpmath", "pytest", "pytest-cov", "pytest-xdist", "scikit-umfpack", "threadpoolctl"]
110 | 
111 | [[package]]
112 | name = "setuptools"
113 | version = "65.5.1"
114 | description = "Easily download, build, install, upgrade, and uninstall Python packages"
115 | category = "dev"
116 | optional = false
117 | python-versions = ">=3.7"
118 | 
119 | [package.extras]
120 | docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-hoverxref (<2)", "sphinx-inline-tabs", "sphinx-notfound-page (==0.8.3)", "sphinx-reredirects", "sphinxcontrib-towncrier"]
121 | testing = ["build[virtualenv]", "filelock (>=3.4.0)", "flake8 (<5)", "flake8-2020", "ini2toml[lite] (>=0.9)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pip (>=19.1)", "pip-run (>=8.8)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)", "pytest-perf", "pytest-timeout", "pytest-xdist", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"]
122 | testing-integration = ["build[virtualenv]", "filelock (>=3.4.0)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pytest", "pytest-enabler", "pytest-xdist", "tomli", "virtualenv (>=13.0.0)", "wheel"]
123 | 
124 | [[package]]
125 | name = "toml"
126 | version = "0.10.2"
127 | description = "Python Library for Tom's Obvious, Minimal Language"
128 | category = "dev"
129 | optional = false
130 | python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*"
131 | 
132 | [[package]]
133 | name = "virtualenv"
134 | version = "20.16.6"
135 | description = "Virtual Python Environment builder"
136 | category = "dev"
137 | optional = false
138 | python-versions = ">=3.6"
139 | 
140 | [package.dependencies]
141 | distlib = ">=0.3.6,<1"
142 | filelock = ">=3.4.1,<4"
143 | platformdirs = ">=2.4,<3"
144 | 
145 | [package.extras]
146 | docs = ["proselint (>=0.13)", "sphinx (>=5.3)", "sphinx-argparse (>=0.3.2)", "sphinx-rtd-theme (>=1)", "towncrier (>=22.8)"]
147 | testing = ["coverage (>=6.2)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=21.3)", "pytest (>=7.0.1)", "pytest-env (>=0.6.2)", "pytest-freezegun (>=0.4.2)", "pytest-mock (>=3.6.1)", "pytest-randomly (>=3.10.3)", "pytest-timeout (>=2.1)"]
148 | 
149 | [metadata]
150 | lock-version = "1.1"
151 | python-versions = "^3.9"
152 | content-hash = "a317a9659ebfc0c32916fee3f38d7d2d8608924ac84d5fadb9dab5c58deded19"
153 | 
154 | [metadata.files]
155 | cfgv = [
156 |     {file = "cfgv-3.3.1-py2.py3-none-any.whl", hash = "sha256:c6a0883f3917a037485059700b9e75da2464e6c27051014ad85ba6aaa5884426"},
157 |     {file = "cfgv-3.3.1.tar.gz", hash = "sha256:f5a830efb9ce7a445376bb66ec94c638a9787422f96264c98edc6bdeed8ab736"},
158 | ]
159 | distlib = [
160 |     {file = "distlib-0.3.6-py2.py3-none-any.whl", hash = "sha256:f35c4b692542ca110de7ef0bea44d73981caeb34ca0b9b6b2e6d7790dda8f80e"},
161 |     {file = "distlib-0.3.6.tar.gz", hash = "sha256:14bad2d9b04d3a36127ac97f30b12a19268f211063d8f8ee4f47108896e11b46"},
162 | ]
163 | filelock = [
164 |     {file = "filelock-3.8.0-py3-none-any.whl", hash = "sha256:617eb4e5eedc82fc5f47b6d61e4d11cb837c56cb4544e39081099fa17ad109d4"},
165 |     {file = "filelock-3.8.0.tar.gz", hash = "sha256:55447caa666f2198c5b6b13a26d2084d26fa5b115c00d065664b2124680c4edc"},
166 | ]
167 | identify = [
168 |     {file = "identify-2.5.8-py2.py3-none-any.whl", hash = "sha256:48b7925fe122720088aeb7a6c34f17b27e706b72c61070f27fe3789094233440"},
169 |     {file = "identify-2.5.8.tar.gz", hash = "sha256:7a214a10313b9489a0d61467db2856ae8d0b8306fc923e03a9effa53d8aedc58"},
170 | ]
171 | nodeenv = [
172 |     {file = "nodeenv-1.7.0-py2.py3-none-any.whl", hash = "sha256:27083a7b96a25f2f5e1d8cb4b6317ee8aeda3bdd121394e5ac54e498028a042e"},
173 |     {file = "nodeenv-1.7.0.tar.gz", hash = "sha256:e0e7f7dfb85fc5394c6fe1e8fa98131a2473e04311a45afb6508f7cf1836fa2b"},
174 | ]
175 | numpy = [
176 |     {file = "numpy-1.23.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:95d79ada05005f6f4f337d3bb9de8a7774f259341c70bc88047a1f7b96a4bcb2"},
177 |     {file = "numpy-1.23.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:926db372bc4ac1edf81cfb6c59e2a881606b409ddc0d0920b988174b2e2a767f"},
178 |     {file = "numpy-1.23.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c237129f0e732885c9a6076a537e974160482eab8f10db6292e92154d4c67d71"},
179 |     {file = "numpy-1.23.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a8365b942f9c1a7d0f0dc974747d99dd0a0cdfc5949a33119caf05cb314682d3"},
180 |     {file = "numpy-1.23.4-cp310-cp310-win32.whl", hash = "sha256:2341f4ab6dba0834b685cce16dad5f9b6606ea8a00e6da154f5dbded70fdc4dd"},
181 |     {file = "numpy-1.23.4-cp310-cp310-win_amd64.whl", hash = "sha256:d331afac87c92373826af83d2b2b435f57b17a5c74e6268b79355b970626e329"},
182 |     {file = "numpy-1.23.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:488a66cb667359534bc70028d653ba1cf307bae88eab5929cd707c761ff037db"},
183 |     {file = "numpy-1.23.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ce03305dd694c4873b9429274fd41fc7eb4e0e4dea07e0af97a933b079a5814f"},
184 |     {file = "numpy-1.23.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8981d9b5619569899666170c7c9748920f4a5005bf79c72c07d08c8a035757b0"},
185 |     {file = "numpy-1.23.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7a70a7d3ce4c0e9284e92285cba91a4a3f5214d87ee0e95928f3614a256a1488"},
186 |     {file = "numpy-1.23.4-cp311-cp311-win32.whl", hash = "sha256:5e13030f8793e9ee42f9c7d5777465a560eb78fa7e11b1c053427f2ccab90c79"},
187 |     {file = "numpy-1.23.4-cp311-cp311-win_amd64.whl", hash = "sha256:7607b598217745cc40f751da38ffd03512d33ec06f3523fb0b5f82e09f6f676d"},
188 |     {file = "numpy-1.23.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:7ab46e4e7ec63c8a5e6dbf5c1b9e1c92ba23a7ebecc86c336cb7bf3bd2fb10e5"},
189 |     {file = "numpy-1.23.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:a8aae2fb3180940011b4862b2dd3756616841c53db9734b27bb93813cd79fce6"},
190 |     {file = "numpy-1.23.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8c053d7557a8f022ec823196d242464b6955a7e7e5015b719e76003f63f82d0f"},
191 |     {file = "numpy-1.23.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a0882323e0ca4245eb0a3d0a74f88ce581cc33aedcfa396e415e5bba7bf05f68"},
192 |     {file = "numpy-1.23.4-cp38-cp38-win32.whl", hash = "sha256:dada341ebb79619fe00a291185bba370c9803b1e1d7051610e01ed809ef3a4ba"},
193 |     {file = "numpy-1.23.4-cp38-cp38-win_amd64.whl", hash = "sha256:0fe563fc8ed9dc4474cbf70742673fc4391d70f4363f917599a7fa99f042d5a8"},
194 |     {file = "numpy-1.23.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c67b833dbccefe97cdd3f52798d430b9d3430396af7cdb2a0c32954c3ef73894"},
195 |     {file = "numpy-1.23.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f76025acc8e2114bb664294a07ede0727aa75d63a06d2fae96bf29a81747e4a7"},
196 |     {file = "numpy-1.23.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:12ac457b63ec8ded85d85c1e17d85efd3c2b0967ca39560b307a35a6703a4735"},
197 |     {file = "numpy-1.23.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95de7dc7dc47a312f6feddd3da2500826defdccbc41608d0031276a24181a2c0"},
198 |     {file = "numpy-1.23.4-cp39-cp39-win32.whl", hash = "sha256:f2f390aa4da44454db40a1f0201401f9036e8d578a25f01a6e237cea238337ef"},
199 |     {file = "numpy-1.23.4-cp39-cp39-win_amd64.whl", hash = "sha256:f260da502d7441a45695199b4e7fd8ca87db659ba1c78f2bbf31f934fe76ae0e"},
200 |     {file = "numpy-1.23.4-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:61be02e3bf810b60ab74e81d6d0d36246dbfb644a462458bb53b595791251911"},
201 |     {file = "numpy-1.23.4-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:296d17aed51161dbad3c67ed6d164e51fcd18dbcd5dd4f9d0a9c6055dce30810"},
202 |     {file = "numpy-1.23.4-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:4d52914c88b4930dafb6c48ba5115a96cbab40f45740239d9f4159c4ba779962"},
203 |     {file = "numpy-1.23.4.tar.gz", hash = "sha256:ed2cc92af0efad20198638c69bb0fc2870a58dabfba6eb722c933b48556c686c"},
204 | ]
205 | platformdirs = [
206 |     {file = "platformdirs-2.5.3-py3-none-any.whl", hash = "sha256:0cb405749187a194f444c25c82ef7225232f11564721eabffc6ec70df83b11cb"},
207 |     {file = "platformdirs-2.5.3.tar.gz", hash = "sha256:6e52c21afff35cb659c6e52d8b4d61b9bd544557180440538f255d9382c8cbe0"},
208 | ]
209 | pre-commit = [
210 |     {file = "pre_commit-2.20.0-py2.py3-none-any.whl", hash = "sha256:51a5ba7c480ae8072ecdb6933df22d2f812dc897d5fe848778116129a681aac7"},
211 |     {file = "pre_commit-2.20.0.tar.gz", hash = "sha256:a978dac7bc9ec0bcee55c18a277d553b0f419d259dadb4b9418ff2d00eb43959"},
212 | ]
213 | pyyaml = [
214 |     {file = "PyYAML-6.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d4db7c7aef085872ef65a8fd7d6d09a14ae91f691dec3e87ee5ee0539d516f53"},
215 |     {file = "PyYAML-6.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9df7ed3b3d2e0ecfe09e14741b857df43adb5a3ddadc919a2d94fbdf78fea53c"},
216 |     {file = "PyYAML-6.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:77f396e6ef4c73fdc33a9157446466f1cff553d979bd00ecb64385760c6babdc"},
217 |     {file = "PyYAML-6.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a80a78046a72361de73f8f395f1f1e49f956c6be882eed58505a15f3e430962b"},
218 |     {file = "PyYAML-6.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:f84fbc98b019fef2ee9a1cb3ce93e3187a6df0b2538a651bfb890254ba9f90b5"},
219 |     {file = "PyYAML-6.0-cp310-cp310-win32.whl", hash = "sha256:2cd5df3de48857ed0544b34e2d40e9fac445930039f3cfe4bcc592a1f836d513"},
220 |     {file = "PyYAML-6.0-cp310-cp310-win_amd64.whl", hash = "sha256:daf496c58a8c52083df09b80c860005194014c3698698d1a57cbcfa182142a3a"},
221 |     {file = "PyYAML-6.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d4b0ba9512519522b118090257be113b9468d804b19d63c71dbcf4a48fa32358"},
222 |     {file = "PyYAML-6.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:81957921f441d50af23654aa6c5e5eaf9b06aba7f0a19c18a538dc7ef291c5a1"},
223 |     {file = "PyYAML-6.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:afa17f5bc4d1b10afd4466fd3a44dc0e245382deca5b3c353d8b757f9e3ecb8d"},
224 |     {file = "PyYAML-6.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dbad0e9d368bb989f4515da330b88a057617d16b6a8245084f1b05400f24609f"},
225 |     {file = "PyYAML-6.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:432557aa2c09802be39460360ddffd48156e30721f5e8d917f01d31694216782"},
226 |     {file = "PyYAML-6.0-cp311-cp311-win32.whl", hash = "sha256:bfaef573a63ba8923503d27530362590ff4f576c626d86a9fed95822a8255fd7"},
227 |     {file = "PyYAML-6.0-cp311-cp311-win_amd64.whl", hash = "sha256:01b45c0191e6d66c470b6cf1b9531a771a83c1c4208272ead47a3ae4f2f603bf"},
228 |     {file = "PyYAML-6.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:897b80890765f037df3403d22bab41627ca8811ae55e9a722fd0392850ec4d86"},
229 |     {file = "PyYAML-6.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:50602afada6d6cbfad699b0c7bb50d5ccffa7e46a3d738092afddc1f9758427f"},
230 |     {file = "PyYAML-6.0-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:48c346915c114f5fdb3ead70312bd042a953a8ce5c7106d5bfb1a5254e47da92"},
231 |     {file = "PyYAML-6.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:98c4d36e99714e55cfbaaee6dd5badbc9a1ec339ebfc3b1f52e293aee6bb71a4"},
232 |     {file = "PyYAML-6.0-cp36-cp36m-win32.whl", hash = "sha256:0283c35a6a9fbf047493e3a0ce8d79ef5030852c51e9d911a27badfde0605293"},
233 |     {file = "PyYAML-6.0-cp36-cp36m-win_amd64.whl", hash = "sha256:07751360502caac1c067a8132d150cf3d61339af5691fe9e87803040dbc5db57"},
234 |     {file = "PyYAML-6.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:819b3830a1543db06c4d4b865e70ded25be52a2e0631ccd2f6a47a2822f2fd7c"},
235 |     {file = "PyYAML-6.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:473f9edb243cb1935ab5a084eb238d842fb8f404ed2193a915d1784b5a6b5fc0"},
236 |     {file = "PyYAML-6.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0ce82d761c532fe4ec3f87fc45688bdd3a4c1dc5e0b4a19814b9009a29baefd4"},
237 |     {file = "PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:231710d57adfd809ef5d34183b8ed1eeae3f76459c18fb4a0b373ad56bedcdd9"},
238 |     {file = "PyYAML-6.0-cp37-cp37m-win32.whl", hash = "sha256:c5687b8d43cf58545ade1fe3e055f70eac7a5a1a0bf42824308d868289a95737"},
239 |     {file = "PyYAML-6.0-cp37-cp37m-win_amd64.whl", hash = "sha256:d15a181d1ecd0d4270dc32edb46f7cb7733c7c508857278d3d378d14d606db2d"},
240 |     {file = "PyYAML-6.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0b4624f379dab24d3725ffde76559cff63d9ec94e1736b556dacdfebe5ab6d4b"},
241 |     {file = "PyYAML-6.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:213c60cd50106436cc818accf5baa1aba61c0189ff610f64f4a3e8c6726218ba"},
242 |     {file = "PyYAML-6.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9fa600030013c4de8165339db93d182b9431076eb98eb40ee068700c9c813e34"},
243 |     {file = "PyYAML-6.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:277a0ef2981ca40581a47093e9e2d13b3f1fbbeffae064c1d21bfceba2030287"},
244 |     {file = "PyYAML-6.0-cp38-cp38-win32.whl", hash = "sha256:d4eccecf9adf6fbcc6861a38015c2a64f38b9d94838ac1810a9023a0609e1b78"},
245 |     {file = "PyYAML-6.0-cp38-cp38-win_amd64.whl", hash = "sha256:1e4747bc279b4f613a09eb64bba2ba602d8a6664c6ce6396a4d0cd413a50ce07"},
246 |     {file = "PyYAML-6.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:055d937d65826939cb044fc8c9b08889e8c743fdc6a32b33e2390f66013e449b"},
247 |     {file = "PyYAML-6.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e61ceaab6f49fb8bdfaa0f92c4b57bcfbea54c09277b1b4f7ac376bfb7a7c174"},
248 |     {file = "PyYAML-6.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d67d839ede4ed1b28a4e8909735fc992a923cdb84e618544973d7dfc71540803"},
249 |     {file = "PyYAML-6.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cba8c411ef271aa037d7357a2bc8f9ee8b58b9965831d9e51baf703280dc73d3"},
250 |     {file = "PyYAML-6.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:40527857252b61eacd1d9af500c3337ba8deb8fc298940291486c465c8b46ec0"},
251 |     {file = "PyYAML-6.0-cp39-cp39-win32.whl", hash = "sha256:b5b9eccad747aabaaffbc6064800670f0c297e52c12754eb1d976c57e4f74dcb"},
252 |     {file = "PyYAML-6.0-cp39-cp39-win_amd64.whl", hash = "sha256:b3d267842bf12586ba6c734f89d1f5b871df0273157918b0ccefa29deb05c21c"},
253 |     {file = "PyYAML-6.0.tar.gz", hash = "sha256:68fb519c14306fec9720a2a5b45bc9f0c8d1b9c72adf45c37baedfcd949c35a2"},
254 | ]
255 | scipy = [
256 |     {file = "scipy-1.9.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1884b66a54887e21addf9c16fb588720a8309a57b2e258ae1c7986d4444d3bc0"},
257 |     {file = "scipy-1.9.3-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:83b89e9586c62e787f5012e8475fbb12185bafb996a03257e9675cd73d3736dd"},
258 |     {file = "scipy-1.9.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1a72d885fa44247f92743fc20732ae55564ff2a519e8302fb7e18717c5355a8b"},
259 |     {file = "scipy-1.9.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d01e1dd7b15bd2449c8bfc6b7cc67d630700ed655654f0dfcf121600bad205c9"},
260 |     {file = "scipy-1.9.3-cp310-cp310-win_amd64.whl", hash = "sha256:68239b6aa6f9c593da8be1509a05cb7f9efe98b80f43a5861cd24c7557e98523"},
261 |     {file = "scipy-1.9.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b41bc822679ad1c9a5f023bc93f6d0543129ca0f37c1ce294dd9d386f0a21096"},
262 |     {file = "scipy-1.9.3-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:90453d2b93ea82a9f434e4e1cba043e779ff67b92f7a0e85d05d286a3625df3c"},
263 |     {file = "scipy-1.9.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:83c06e62a390a9167da60bedd4575a14c1f58ca9dfde59830fc42e5197283dab"},
264 |     {file = "scipy-1.9.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:abaf921531b5aeaafced90157db505e10345e45038c39e5d9b6c7922d68085cb"},
265 |     {file = "scipy-1.9.3-cp311-cp311-win_amd64.whl", hash = "sha256:06d2e1b4c491dc7d8eacea139a1b0b295f74e1a1a0f704c375028f8320d16e31"},
266 |     {file = "scipy-1.9.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:5a04cd7d0d3eff6ea4719371cbc44df31411862b9646db617c99718ff68d4840"},
267 |     {file = "scipy-1.9.3-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:545c83ffb518094d8c9d83cce216c0c32f8c04aaf28b92cc8283eda0685162d5"},
268 |     {file = "scipy-1.9.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0d54222d7a3ba6022fdf5773931b5d7c56efe41ede7f7128c7b1637700409108"},
269 |     {file = "scipy-1.9.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cff3a5295234037e39500d35316a4c5794739433528310e117b8a9a0c76d20fc"},
270 |     {file = "scipy-1.9.3-cp38-cp38-win_amd64.whl", hash = "sha256:2318bef588acc7a574f5bfdff9c172d0b1bf2c8143d9582e05f878e580a3781e"},
271 |     {file = "scipy-1.9.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:d644a64e174c16cb4b2e41dfea6af722053e83d066da7343f333a54dae9bc31c"},
272 |     {file = "scipy-1.9.3-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:da8245491d73ed0a994ed9c2e380fd058ce2fa8a18da204681f2fe1f57f98f95"},
273 |     {file = "scipy-1.9.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4db5b30849606a95dcf519763dd3ab6fe9bd91df49eba517359e450a7d80ce2e"},
274 |     {file = "scipy-1.9.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c68db6b290cbd4049012990d7fe71a2abd9ffbe82c0056ebe0f01df8be5436b0"},
275 |     {file = "scipy-1.9.3-cp39-cp39-win_amd64.whl", hash = "sha256:5b88e6d91ad9d59478fafe92a7c757d00c59e3bdc3331be8ada76a4f8d683f58"},
276 |     {file = "scipy-1.9.3.tar.gz", hash = "sha256:fbc5c05c85c1a02be77b1ff591087c83bc44579c6d2bd9fb798bb64ea5e1a027"},
277 | ]
278 | setuptools = [
279 |     {file = "setuptools-65.5.1-py3-none-any.whl", hash = "sha256:d0b9a8433464d5800cbe05094acf5c6d52a91bfac9b52bcfc4d41382be5d5d31"},
280 |     {file = "setuptools-65.5.1.tar.gz", hash = "sha256:e197a19aa8ec9722928f2206f8de752def0e4c9fc6953527360d1c36d94ddb2f"},
281 | ]
282 | toml = [
283 |     {file = "toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b"},
284 |     {file = "toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"},
285 | ]
286 | virtualenv = [
287 |     {file = "virtualenv-20.16.6-py3-none-any.whl", hash = "sha256:186ca84254abcbde98180fd17092f9628c5fe742273c02724972a1d8a2035108"},
288 |     {file = "virtualenv-20.16.6.tar.gz", hash = "sha256:530b850b523c6449406dfba859d6345e48ef19b8439606c5d74d7d3c9e14d76e"},
289 | ]
290 | 


--------------------------------------------------------------------------------
/profile.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -x
 2 | #
 3 | # script to run python profile module to profile some smallfile workloads
 4 | #
 5 | top=/run/ben/smfprofile
 6 | if [ ! -d $top ] ; then
 7 |   python ./smallfile_cli.py --top $top --threads 1 --files 100000 --file-size 1 --operation cleanup
 8 |   python ./smallfile_cli.py --top $top --threads 1 --files 100000 --file-size 1 --operation create
 9 | fi
10 | touch $top/network_shared/starting_gate
11 | OPNAME=read COUNT=100000 TOP=$top python <<EOF > read-profile.log
12 | import profile
13 | profile.run('import profile_workload', 'profile.tmp')
14 | import pstats
15 | p = pstats.Stats('profile.tmp')
16 | p.sort_stats('cumulative').print_stats()
17 | EOF
18 | OPNAME=append COUNT=100000 TOP=$top python <<EOF > append-profile.log
19 | import profile
20 | profile.run('import profile_workload', 'profile.tmp')
21 | import pstats
22 | p = pstats.Stats('profile.tmp')
23 | p.sort_stats('cumulative').print_stats()
24 | EOF
25 | 


--------------------------------------------------------------------------------
/profile_workload.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | #
 3 | # python program used by profile.sh to generate profile of smallfile workloads
 4 | #
 5 | 
 6 | import os
 7 | import socket
 8 | 
 9 | import smallfile
10 | 
11 | top = os.getenv("TOP")
12 | count = int(os.getenv("COUNT"))
13 | invk = smallfile.SmallfileWorkload()
14 | invk.tid = "00"
15 | invk.src_dirs = [
16 |     os.path.join(top, "file_srcdir", socket.gethostname(), "thrd_" + invk.tid)
17 | ]
18 | invk.dest_dirs = [
19 |     os.path.join(top, "file_dstdir", socket.gethostname(), "thrd_" + invk.tid)
20 | ]
21 | invk.network_dir = top + os.sep + "network_shared"
22 | invk.record_sz_kb = 0
23 | invk.total_sz_kb = 1
24 | invk.starting_gate = os.path.join(invk.network_dir, "starting_gate")
25 | invk.stonewall = True
26 | invk.finish_all_rq = True
27 | invk.opname = os.getenv("OPNAME")
28 | invk.iterations = count
29 | print(invk)
30 | invk.do_workload()
31 | print(invk)
32 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "smallfile"
 3 | version = "1.0.2"
 4 | description = "A distributed workload generator for POSIX-like filesystems"
 5 | authors = ["distributed-system-analysis authors"]
 6 | license = "Apache-2.0"
 7 | readme = "README.md"
 8 | homepage = "https://github.com/distributed-system-analysis/smallfile"
 9 | packages = [
10 |    { include="*.py", from="."  },
11 | ]
12 | include = [
13 |    { path="*.sh", format="sdist" },
14 |    { path="examples", format="sdist" },
15 |    { path="poetry.lock", format="sdist" },
16 |    { path="Dockerfile", format="sdist" },
17 | ]
18 | [tool.poetry.dependencies]
19 | python = "^3.9"
20 | numpy = "^1.23.4"
21 | scipy = "^1.9.3"
22 | 
23 | [tool.poetry.group.dev.dependencies]
24 | pre-commit = "^2.20.0"
25 | 
26 | [build-system]
27 | requires = ["poetry-core"]
28 | build-backend = "poetry.core.masonry.api"
29 | 


--------------------------------------------------------------------------------
/regtest.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | # smallfile regression test
  3 | #
  4 | # you can set the environment variable PYTHON_PROG 
  5 | # to switch between python3 and python2
  6 | # for example: # PYTHON_PROG=python3 bash regtest.sh
  7 | # python3 at present doesn't seem to support xattr module 
  8 | # so some smallfile operations are not yet supported under python3, 
  9 | # the regression test knows how to deal with that.
 10 | #
 11 | # you can have it use a directory in a tmpfs mountpoint, 
 12 | # this is recommended so as not to wear out laptop drive.
 13 | # by default, newer distros have /run tmpfs mountpoint with sufficient space 
 14 | # so this is default, but TMPDIR environment variable overrides
 15 | # xattrs won't be tested if you use tmpfs for $testdir
 16 | #
 17 | # for really long runs you can't fit in tmpfs mountpoint so
 18 | # $bigtmp defaults to /var/tmp, but you can override with BIGTMP
 19 | # environment variable.  Recommend you use SSD for $bigtmp
 20 | #
 21 | # ext4 doesn't support xattrs by default.  
 22 | # To run a test on xattr-related operation types,  
 23 | # set TMPDIR to an XFS filesystem.  
 24 | # You can create an XFS filesystem by using a loopback device, for example:
 25 | #
 26 | #   dd if=/dev/zero of=/var/tmp/xfs-fs.img bs=1024k count=1k
 27 | #   losetup /dev/loop4 /var/tmp/xfs-fs.img
 28 | #   mkfs -t xfs /dev/loop4
 29 | #   mkdir -p /mnt/xattrtest
 30 | #   mount -t xfs -o noatime,inode64 /dev/loop4 /mnt/xattrtest
 31 | #   export TMPDIR=/mnt/xattrtest/smf
 32 | #   mkdir /mnt/xattrtest/smf
 33 | #   -- run test ---
 34 | #   unset TMPDIR
 35 | #   umount /mnt/xattrtest
 36 | #   losetup -e /dev/loop4
 37 | #   rm -fv /var/tmp/xfs-fs.img
 38 | #
 39 | # we don't use "tee" program to display results as they are happening 
 40 | # because this erases any failure status code returned by smallfile, 
 41 | # and this status code is vital to regression test.  
 42 | # Instead we log all smallfile output to smfregtest.log 
 43 | # where it can be analyzed later  if failure occurs
 44 | #
 45 | 
 46 | localhost_name="$1"
 47 | if [ -z "$localhost_name" ] ; then localhost_name="localhost" ; fi
 48 | 
 49 | nfs_svc="nfs"
 50 | (find /usr/lib/systemd/system | grep nfs-server) && nfs_svc="nfs-server"
 51 | 
 52 | # xattrs must be set to zero if using tmpfs, since tmpfs doesn't support xattrs
 53 | 
 54 | testdir="${TMPDIR:-/var/tmp}/smf"
 55 | xattrs=0
 56 | if [ -d $testdir ] ; then
 57 | 	(df $testdir | grep -q tmpfs) || xattrs=1
 58 | fi
 59 | bigtmp="${BIGTMP:-/var/tmp}/smf"
 60 | nfsdir=/var/tmp/smfnfs
 61 | OK=0
 62 | NOTOK=1
 63 | GREP="grep -q "
 64 | PYTHON=${PYTHON_PROG:-python3}
 65 | iam=$USER
 66 | if [ "$USER" != "root" ] ; then
 67 | 	SUDO=sudo
 68 | fi
 69 | logf=/tmp/onetest.log
 70 | 
 71 | assertok() {
 72 |   status=$1
 73 |   if [ $status != $OK ] ; then
 74 |     echo "ERROR: unexpected failure status $status"
 75 |     exit $NOTOK
 76 |   fi
 77 | }
 78 | 
 79 | assertfail() {
 80 |   status=$1
 81 |   if [ $status == $OK ] ; then
 82 |     echo "ERROR: unexpected success status $status"
 83 |     exit $NOTOK
 84 |   fi
 85 | }
 86 | 
 87 | # test assertion mechanism
 88 | 
 89 | cp -r /foo/bar/no-such-dir /tmp/ 
 90 | assertfail $?
 91 | rm -rf /tmp/junk && cp -r . /tmp/junk
 92 | assertok $?
 93 | 
 94 | # also echo smallfile command to the log so you can see what was last attempt
 95 | # if you expect it to fail, pass "true" (anything really) in param 2
 96 | 
 97 | runsmf() {
 98 |   smfcmd="$1"
 99 |   expectfail="$2"
100 |   echo
101 |   echo "$smfcmd" 
102 |   s=$OK
103 |   $smfcmd > $logf || s=$? || echo -n
104 |   echo status $s
105 |   cat $logf
106 |   if [ -n "$expectfail" ] ; then assertfail $s ; fi
107 |   if [ -z "$expectfail" ] ; then assertok $s ; fi
108 | }
109 | 
110 | 
111 | # idempotent routine to unmount NFS mountpoint, clean up old files,
112 | # recreate directory and NFS mountpoint, and remount NFS mountpoint
113 | 
114 | cleanup() {
115 |   if [ `grep $nfsdir /proc/mounts | wc -l` -gt 0 ] ; then $SUDO umount $nfsdir ; fi
116 |   assertok $?
117 |   $SUDO exportfs -ua
118 |   assertok $?
119 |   rm -rf /var/tmp/invoke*.log
120 |   assertok $?
121 |   mkdir -pv $testdir
122 |   assertok $?
123 |   chown -v $iam:$iam $testdir 
124 |   assertok $?
125 |   chown -v $iam:$iam $testdir/.. || \
126 |     echo $iam cannot own parent directory of $testdir
127 |   chmod -v 777 $testdir
128 |   assertok $?
129 |   touch $testdir/letmein
130 |   assertok $?
131 |   $SUDO exportfs -v -o rw,no_root_squash,sync,fsid=15 localhost:$testdir
132 |   assertok $?
133 |   $SUDO rm -rf $nfsdir
134 |   assertok $?
135 |   mkdir -p $nfsdir
136 |   assertok $?
137 |   chown -v $iam:$iam $nfsdir
138 |   assertok $?
139 |   chmod -v 777 $nfsdir
140 |   assertok $?
141 |   sleep 1
142 |   $SUDO mount -t nfs -o nfsvers=3,tcp,actimeo=1 $localhost_name:$testdir $nfsdir
143 |   assertok $?
144 |   $SUDO chmod -v 777 $nfsdir
145 |   assertok $?
146 |   $SUDO exportfs -v | grep -q $testdir 2>/tmp/ee
147 |   assertok $?
148 |   df $nfsdir | grep $nfsdir
149 |   assertok $?
150 |   touch $nfsdir/letmein
151 |   assertok $?
152 | }
153 | 
154 | 
155 | # if distro is REALLY old or hates systemd, systemctl might not be in use
156 | 
157 | is_systemctl=1
158 | $SUDO systemctl > /tmp/junk
159 | if [ $? != $OK ] ; then  # chances are it's pre-systemctl Linux distro, use "service" instead
160 |   is_systemctl=0
161 | fi
162 | 
163 | 
164 | # start service in distro-independent way
165 | 
166 | start_service()
167 | {
168 | svcname=$1
169 | echo "attempting to start service $svcname"
170 | if [ $is_systemctl == 1 ] ; then
171 |   $SUDO systemctl restart $svcname
172 | else
173 |   $SUDO service $svcname restart
174 | fi
175 | assertok $?
176 | return $s
177 | }
178 | 
179 | 
180 | # since we do not know if this is python2 or 3, just
181 | # try all the packages in the list before giving up
182 | # in some cases, package install may not be necessary
183 | # because the module is built into python in later versions
184 | # parameter 1 is the name used to import the package in python
185 | # parameters 2-N are the package names that might be needed to get it
186 | 
187 | install_python_module_from_package()
188 | {
189 | 	pkgname=$1
190 | 	shift
191 | 	pkglist=$*
192 | 	(echo "import $pkgname" | $PYTHON) || \
193 | 	 ((for p in $pkglist ; do \
194 | 	    $SUDO yum install -y $p || echo "attempted install of $p" ; \
195 |            done) ; \
196 | 	   (echo "import $pkgname" | $PYTHON) )
197 | 	assertok $?
198 | }
199 | 
200 | ps awux | grep sshd || sshd_was_off=1
201 | start_service sshd
202 | start_service $nfs_svc
203 | 
204 | $SUDO yum install -y python2 python3 || \
205 |   echo 'best-effort python2 and python3 install'
206 | 
207 | install_python_module_from_package \
208 | 	unittest \
209 | 	python-unittest python-unittest2 python3-unittest
210 | 
211 | install_python_module_from_package \
212 | 	numpy \
213 | 	python2-numpy python3-numpy
214 | 
215 | install_python_module_from_package \
216 | 	scipy \
217 | 	python2-scipy python3-scipy
218 | 
219 | install_python_module_from_package \
220 | 	yaml \
221 | 	python2-pyyaml python3-pyyaml
222 | 
223 | # to get xattr, in RHEL8 you need to enable this repo:
224 | #  subscription-manager repos --enable=codeready-builder-for-rhel-8-x86_64-source-rpms
225 | 
226 | $SUDO yum install -y gcc python2-pip python3-pip || \
227 | 	echo "best-effort pip install"
228 | 
229 | install_python_module_from_package xattr python3-pyxattr || \
230 | 	(pip install pyxattr || pip2 install pyxattr || pip3 install pyxattr ; \
231 | 	(echo 'import xattr' | $PYTHON) )
232 | assertok $?
233 | 
234 | # run the smallfile.py module's unit test
235 | 
236 | echo "running smallfile.py unit test"
237 | $PYTHON smallfile.py
238 | assertok $?
239 | 
240 | # run the invoke_process.py unit test
241 | 
242 | echo "running invoke_process.py unit test"
243 | $PYTHON invoke_process.py
244 | assertok $?
245 | 
246 | # run drop_buffer_cache.py unit test
247 | 
248 | echo "running drop_buffer_cache.py unit test"
249 | $PYTHON drop_buffer_cache.py
250 | assertok $?
251 | 
252 | # run yaml parser unit test
253 | 
254 | echo "running YAML parser unit test"
255 | $PYTHON yaml_parser.py
256 | assertok $?
257 | 
258 | # set up NFS mountpoint
259 | 
260 | cleanup
261 | 
262 | # test simplest smallfile_cli commands, using non-default dirs
263 | 
264 | echo "simplest smallfile_cli.py commands"
265 | 
266 | scmd="$PYTHON smallfile_cli.py "
267 | cleanup
268 | rm -fv $testdir/{starting_gate,stonewall}.tmp 2>/tmp/e
269 | runsmf "$scmd"
270 | ls -l $testdir/network_shared/{starting_gate,stonewall}.tmp
271 | assertok $?
272 | 
273 | non_dflt_dir=/var/tmp/foo
274 | scmd="$PYTHON smallfile_cli.py --top $non_dflt_dir "
275 | cleanup
276 | rm -rf $non_dflt_dir
277 | mkdir $non_dflt_dir
278 | runsmf "$scmd"
279 | (cd $non_dflt_dir/network_shared ; ls -l {starting_gate,stonewall}.tmp)
280 | assertok $?
281 | 
282 | scmd="$scmd --host-set localhost"
283 | cleanup
284 | rm -rf $non_dflt_dir
285 | mkdir $non_dflt_dir
286 | runsmf "$scmd"
287 | (cd $non_dflt_dir/network_shared ; \
288 |  ls -l {starting_gate,stonewall}.tmp param.pickle host_ready.localhost.tmp) 2>/tmp/e
289 | assertok $?
290 | 
291 | # test parsing
292 | 
293 | nonnegmsg="integer value greater than zero expected"
294 | echo "testing parsing"
295 | scmd="$PYTHON smallfile_cli.py --top $testdir "
296 | cleanup
297 | runsmf "$scmd --files 0" assertfail
298 | $GREP "$nonnegmsg" $logf
299 | 
300 | cleanup
301 | runsmf "$scmd --threads 0" assertfail
302 | $GREP "$nonnegmsg" $logf
303 | 
304 | runsmf "$scmd --files -1" 		assertfail
305 | runsmf "$scmd --record-size -1" 	assertfail
306 | runsmf "$scmd --file-size -1" 		assertfail
307 | runsmf "$scmd --files-per-dir 0" 	assertfail
308 | runsmf "$scmd --dirs-per-dir 0" 	assertfail
309 | runsmf "$scmd --record-size -1" 	assertfail
310 | runsmf "$scmd --record-size a" 		assertfail
311 | runsmf "$scmd --top /" 			assertfail
312 | runsmf "$scmd --response-times foo"	assertfail
313 | runsmf "$scmd --stonewall foo" 		assertfail
314 | runsmf "$scmd --finish foo"		assertfail
315 | runsmf "$scmd --host-count -5" 		assertfail
316 | runsmf "$scmd --auto-pause foo"		assertfail
317 | 
318 | cat > $nfsdir/bad.yaml <<EOF
319 | --file-size 30
320 | EOF
321 | runsmf "$PYTHON smallfile_cli.py --yaml-input $nfsdir/bad.yaml" assertfail
322 | 
323 | # run a command with all CLI options and verify that they were successfully parsed
324 | 
325 | cleanup
326 | rm -rf $nfsdir/smf
327 | mkdir -p $nfsdir/smf
328 | scmd="$PYTHON smallfile_cli.py --top $nfsdir/smf "
329 | scmd="$scmd --verify-read N --response-times Y --finish N --stonewall N --permute-host-dirs Y --verbose Y"
330 | scmd="$scmd --same-dir Y --operation create --threads 5 --files 20 --files-per-dir 5 --dirs-per-dir 3"
331 | scmd="$scmd --record-size 6 --file-size 30 --file-size-distribution exponential --prefix a --suffix b"
332 | scmd="$scmd --hash-into-dirs Y --pause 5 --auto-pause Y --host-set $localhost_name --output-json $nfsdir/smf.json"
333 | runsmf "$scmd"
334 | expect_strs=( 'verify read? : N' \
335 |         "hosts in test : \['$localhost_name'\]" \
336 |         'file size distribution : random exponential'\
337 |         'filename prefix : a' \
338 |         'filename suffix : b' \
339 |         'hash file number into dir.? : Y' \
340 |         'pause between files (microsec) : 5' \
341 |         'finish all requests? : N' \
342 |         'stonewall? : N' \
343 |         'measure response times? : Y' \
344 |         'log to stderr? : False' \
345 |         'permute host directories? : Y' \
346 |         'verbose? : True' \
347 |         'response times? : Y' \
348 |         'finish all requests? : N' \
349 |         'threads share directories? : Y' \
350 |         'pause between files (microsec) : 5' \
351 | 	'auto-pause? : Y' \
352 |         "top test directory(s) : \['$nfsdir/smf'\]" \
353 |         'operation : create' \
354 |         'threads : 5' \
355 |         'files/thread : 20' \
356 |         'files per dir : 5' \
357 |         'dirs per dir : 3' \
358 |         'record size (KB, 0 = maximum) : 6' \
359 |         'file size (KB) : 30' )
360 | expect_ct=${#expect_strs[*]}
361 | for j in $(seq 0 $expect_ct) ; do 
362 |   expected_str="${expect_strs[$j]}"
363 |   $GREP "$expected_str" $logf || \
364 | 	  (echo "expecting: $expected_str" ; exit $NOTOK)
365 | done
366 | 
367 | # now do same thing in YAML to verify 
368 | 
369 | cleanup
370 | rm -rf $nfsdir/smf
371 | mkdir -p $nfsdir/smf
372 | yamlfile=$testdir/regtest.yaml
373 | 
374 | cat > $yamlfile <<EOF
375 | top: $nfsdir/smf
376 | verify-read: N
377 | response-times: Y
378 | finish: n
379 | stonewall: false
380 | permute-host-dirs: y
381 | verbose: yes
382 | same-dir: Y
383 | operation: create
384 | threads: 5
385 | files: 20
386 | files-per-dir: 5
387 | dirs-per-dir: 3
388 | record-size: 6
389 | file-size: 30
390 | file-size-distribution: exponential
391 | prefix: a
392 | suffix: b
393 | hash-into-dirs:   yes
394 | pause: 5
395 | auto-pause: Y
396 | cleanup-delay-usec-per-file: 500
397 | host-set: $localhost_name
398 | output-json: $nfsdir/smf.json
399 | EOF
400 | 
401 | # argparse recognizes unambiguous abbreviations of param. names
402 | scmd="$PYTHON smallfile_cli.py --yaml-input $yamlfile"
403 | runsmf "$scmd"
404 | for k in `seq 0 $expect_ct` ; do 
405 |   expected_str="${expect_strs[$k]}"
406 |   $GREP "$expected_str" $logf || \
407 | 	  (echo "expecting: $expected_str" ; exit $NOTOK)
408 | done
409 | 
410 | 
411 | echo "parsing JSON output"
412 | smfpretty=/var/tmp/smfpretty.json
413 | $PYTHON -m json.tool < $nfsdir/smf.json > $smfpretty
414 | json_strs=( 'params' 'file-size' 'file-size-distr' 'files-per-dir' \
415 | 	    'files-per-thread' 'finish-all-requests' 'fname-prefix' \
416 | 	    'fname-suffix' 'fsync-after-modify' 'hash-to-dir' 'host-set' \
417 | 	    'network-sync-dir' 'operation' 'pause-between-files' \
418 | 	    'permute-host-dirs' 'share-dir' 'stonewall' 'threads' \
419 | 	    'top' 'verify-read' 'xattr-count' 'xattr-size' \
420 | 	    'files-per-sec' 'pct-files-done' \
421 | 	    'per-thread' '00' 'elapsed' \
422 | 	    'in-thread' 'files' 'records' 'status' 'IOPS' 'MiBps' )
423 | 	    
424 | expect_ct=${#json_strs[*]}
425 | for j in `seq 1 $expect_ct` ; do
426 |   (( k = $j + 1 ))
427 |   expected_str="${json_strs[$k]}"
428 |   $GREP "$expected_str" $smfpretty ||
429 | 	  (echo "expecting: $expected_str" ; exit $NOTOK)
430 | done
431 | 
432 | supported_ops()
433 | {
434 |         multitop=''
435 |         if [ "$2" = 'multitop' ] ; then multitop='multiple-topdirs' ; fi
436 |  
437 |         # python3 does not support xattr-related ops yet, I forget why
438 |         xattr_ops="setxattr getxattr swift-put swift-get"
439 |         if [ "$PYTHON" = "python3" -o "$PYTHON" = "pypy" ] ; then xattr_ops='' ; fi
440 |         if [ $xattrs -eq 0 ] ; then xattr_ops='' ; fi
441 | 
442 |         # directory-reading ops do not work with multiple top-level directories at present
443 |         single_topdir_ops='readdir ls-l'
444 |         if [ -n "$multitop" ] ; then single_topdir_ops='' ; fi
445 |         
446 |         # for debug only: ops="create cleanup"
447 |         ops="cleanup create append overwrite truncate-overwrite read $single_topdir_ops chmod stat $xattr_ops symlink mkdir rmdir rename delete-renamed"
448 |         echo $ops
449 | }
450 | 
451 | common_params=\
452 | "$PYTHON smallfile_cli.py --files 1000 --files-per-dir 5 --dirs-per-dir 2 --threads 4 --file-size 4 --record-size 16 --file-size 32  --verify-read Y --response-times N --xattr-count 9 --xattr-size 253 --stonewall N"
453 | 
454 | # also test response time percentile analysis
455 | 
456 | echo "*** run one long cleanup test with huge directory and 1 thread ***"
457 | 
458 | cleanup_test_params="$common_params --threads 1 --files 1000000 --files-per-dir 1000000 --file-size 0"
459 | rm -fv /tmp/smf.json $testdir/*rsptime*csv
460 | runsmf "$cleanup_test_params --top $testdir --operation create --response-times Y --output-json /tmp/smf.json"
461 | 
462 | start_time=$(tr '",' '  ' < /tmp/smf.json | awk '/startTime/{print $NF}')
463 | echo "start time was $start_time"
464 | $PYTHON smallfile_rsptimes_stats.py --start-time $start_time --time-interval 1 $testdir/network_shared
465 | int_start_time=$(echo $start_time | awk -F. '{ print $1 }')
466 | echo "rounded-down start time was $int_start_time"
467 | grep $int_start_time $testdir/network_shared/stats-rsptimes.csv || exit $NOTOK
468 | runsmf "$cleanup_test_params --top $testdir --operation cleanup"
469 | 
470 | echo "*** run one test with many threads ***"
471 | 
472 | many_thread_params="$common_params --threads 30 --files 10000 --files-per-dir 10 --file-size 0"
473 | runsmf "$many_thread_params --top $testdir --operation create"
474 | runsmf "$many_thread_params --top $testdir --operation cleanup"
475 | 
476 | echo "******** testing non-distributed operations" | tee -a $f
477 | 
478 | cleanup
479 | for op in `supported_ops $xattrs ''` ; do
480 |   echo
481 |   echo "testing local op $op"
482 |   runsmf "$common_params --operation $op"
483 | done
484 | 
485 | echo "******** simulating distributed operations with launch-by-daemon" | tee -a $f
486 | 
487 | cleanup
488 | rm -fv $testdir/shutdown_launchers.tmp
489 | $PYTHON launch_smf_host.py --top $testdir --as-host foo &
490 | worker_pids="$!"
491 | $PYTHON launch_smf_host.py --top $testdir --as-host bar &
492 | worker_pids="$worker_pids $!"
493 | sleep 2
494 | daemon_params=\
495 | "$PYTHON smallfile_cli.py --launch-by-daemon Y --host-set foo,bar --top $testdir \
496 | --verify-read Y --response-times N --remote-pgm-dir `pwd` --cleanup-delay-usec-per-file 200 \
497 | --files 1000 --files-per-dir 5 --dirs-per-dir 2 --threads 4 --file-size 4"
498 | 
499 | for op in `supported_ops $xattrs ''` ; do
500 |   echo
501 |   echo "testing local op $op"
502 |   runsmf "$daemon_params --operation $op"
503 | done
504 | touch $testdir/network_shared/shutdown_launchers.tmp
505 | echo "waiting for launcher daemons to shut down..."
506 | for p in $worker_pids ; do
507 |   wait $p || exit $NOTOK
508 | done
509 | echo "launchers shut down"
510 | rm -fv $testdir/network_shared/shutdown_launchers.tmp
511 | 
512 | # we do these kinds of tests to support non-distributed filesystems and NFS exports of them
513 | 
514 | echo "******** testing non-distributed ops with multiple top-level directories" | tee -a $f
515 | 
516 | topdirlist="${testdir}1,${testdir}2,${testdir}3,${testdir}4"
517 | scmd="$PYTHON smallfile_cli.py --top $topdirlist "
518 | topdirlist_nocomma=`echo $topdirlist | sed 's/,/ /g'`
519 | for d in $topdirlist_nocomma ; do
520 |   $SUDO mkdir -pv $d
521 |   $SUDO chown -v $iam:$iam $d
522 |   $SUDO chmod -v 777 $d
523 | done
524 | cleanup
525 | for op in `supported_ops $xattrs 'multitop'` ; do
526 |   echo
527 |   echo "testing local op $op"
528 |   runsmf "$common_params --top $topdirlist --operation $op"
529 | done
530 | for d in $topdirlist_nocomma ; do $SUDO rm -rf $d ; done
531 | 
532 | # these kinds of tests are needed for distributed filesystems or NFS/SMB exports
533 | 
534 | echo "******** testing distributed operations" | tee -a $f
535 | 
536 | # as long as we use NFS for regression tests, NFS does not support xattrs at present
537 | save_xattrs=$xattrs
538 | xattrs=0
539 | 
540 | cleanup
541 | for op in `supported_ops $xattrs ''` ; do
542 |   echo
543 |   echo "testing distributed op $op"
544 |   runsmf "$common_params --host-set $localhost_name --stonewall Y --pause 500 --operation $op"
545 | done
546 | 
547 | # we do these tests for virtualization (many KVM guests or containers, shared storage but no shared fs)
548 | 
549 | echo "******* testing distributed operation with a host-local fs" | tee -a $f
550 | 
551 | cleanup
552 | for op in `supported_ops $xattrs ''` ; do
553 |   rm -rf $nfsdir/sync
554 |   mkdir $nfsdir/sync
555 |   echo
556 |   echo "testing remote-but-local op $op"
557 |   runsmf "$common_params --top $testdir --network-sync-dir $nfsdir/sync --host-set $localhost_name --operation $op"
558 | done
559 | 
560 | 
561 | echo "*** run one long test of creates and reads ***" | tee -a $f
562 | 
563 | cleanup
564 | xattrs=$save_xattrs
565 | rm -rf $bigtmp
566 | mkdir -pv $bigtmp
567 | chown -v $iam:$iam $bigtmp
568 | chmod -v 777 $bigtmp
569 | bigtest_params="$common_params --top $bigtmp --files 20000 --file-size 0 --record-size 4 "
570 | bigtest_params="$bigtest_params --files-per-dir 3 --dirs-per-dir 2 --threads 10 --stonewall Y --pause 10"
571 | for op in create read ; do
572 |   echo "big test with op $op"
573 |   runsmf "$bigtest_params --operation $op "
574 | done
575 | # could be out of space on root filesystem, so cleanup
576 | rm -rf /var/tmp/invoke*.log
577 | runsmf "$bigtest_params --operation cleanup "
578 | 
579 | $GREP $nfsdir /proc/mounts
580 | if [ $? == $OK ] ; then
581 |   $SUDO umount -v $nfsdir
582 |   $SUDO rm -rf $nfsdir
583 | fi
584 | $SUDO exportfs -uav
585 | $SUDO rm -rf $testdir $bigtmp
586 | $SUDO systemctl stop $nfs_svc
587 | if [ -n "$sshd_was_off" ] ; then
588 | 	$SUDO systemctl stop sshd
589 | fi
590 | echo 'SUCCESS!'
591 | 


--------------------------------------------------------------------------------
/smallfile_cli.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | # because it uses the "multiprocessing" python module instead of "threading"
  5 | # module, it can scale to many cores
  6 | # all the heavy lifting is done in "invocation" module,
  7 | # this script just adds code to run multi-process tests
  8 | # this script parses CLI commands, sets up test, runs it and prints results
  9 | #
 10 | # how to run:
 11 | #
 12 | # ./smallfile_cli.py
 13 | #
 14 | 
 15 | """
 16 | smallfile_cli.py
 17 | 
 18 | CLI user interface for generating metadata-intensive workloads
 19 | Copyright 2012 -- Ben England
 20 | Licensed under the Apache License at http://www.apache.org/licenses/LICENSE-2.0
 21 | See Appendix on this page for instructions pertaining to license.
 22 | """
 23 | 
 24 | import errno
 25 | import os
 26 | import os.path
 27 | import pickle
 28 | import sys
 29 | import time
 30 | 
 31 | import launcher_thread
 32 | import multi_thread_workload
 33 | import output_results
 34 | import parse
 35 | import smallfile
 36 | import ssh_thread
 37 | import sync_files
 38 | from smallfile import NOTOK, OK, SMFRunException, ensure_deleted
 39 | 
 40 | # FIXME: should be monitoring progress, not total elapsed time
 41 | 
 42 | min_files_per_sec = 15
 43 | pct_files_min = 70  # minimum percentage of files for valid test
 44 | 
 45 | 
 46 | # run a multi-host test
 47 | 
 48 | 
 49 | def run_multi_host_workload(prm):
 50 |     prm_host_set = prm.host_set
 51 |     prm_permute_host_dirs = prm.permute_host_dirs
 52 |     master_invoke = prm.master_invoke
 53 | 
 54 |     starting_gate = master_invoke.starting_gate
 55 |     verbose = master_invoke.verbose
 56 | 
 57 |     if os.getenv("PYPY"):
 58 |         python_prog = os.getenv("PYPY")
 59 |     elif sys.version.startswith("2"):
 60 |         python_prog = "python"
 61 |     elif sys.version.startswith("3"):
 62 |         python_prog = "python3"
 63 |     else:
 64 |         raise SMFRunException("unrecognized python version %s" % sys.version)
 65 | 
 66 |     # construct list of ssh threads to invoke in parallel
 67 | 
 68 |     master_invoke.create_top_dirs(True)
 69 |     pickle_fn = os.path.join(prm.master_invoke.network_dir, "param.pickle")
 70 | 
 71 |     # if verbose: print('writing ' + pickle_fn))
 72 | 
 73 |     sync_files.write_pickle(pickle_fn, prm)
 74 | 
 75 |     # print('python_prog = %s'%python_prog)
 76 | 
 77 |     remote_thread_list = []
 78 |     host_ct = len(prm_host_set)
 79 |     for j in range(0, len(prm_host_set)):
 80 |         remote_host = prm_host_set[j]
 81 |         smf_remote_pgm = os.path.join(prm.remote_pgm_dir, "smallfile_remote.py")
 82 |         this_remote_cmd = "%s %s --network-sync-dir %s " % (
 83 |             python_prog,
 84 |             smf_remote_pgm,
 85 |             prm.master_invoke.network_dir,
 86 |         )
 87 | 
 88 |         # this_remote_cmd = remote_cmd
 89 | 
 90 |         if prm_permute_host_dirs:
 91 |             this_remote_cmd += " --as-host %s" % prm_host_set[(j + 1) % host_ct]
 92 |         else:
 93 |             this_remote_cmd += " --as-host %s" % remote_host
 94 |         if verbose:
 95 |             print(this_remote_cmd)
 96 |         if smallfile.is_windows_os or prm.launch_by_daemon:
 97 |             remote_thread_list.append(
 98 |                 launcher_thread.launcher_thread(prm, remote_host, this_remote_cmd)
 99 |             )
100 |         else:
101 |             remote_thread_list.append(
102 |                 ssh_thread.ssh_thread(remote_host, this_remote_cmd)
103 |             )
104 | 
105 |     # start them
106 | 
107 |     for t in remote_thread_list:
108 |         if not prm.launch_by_daemon:
109 |             # pace starts so that we don't get ssh errors
110 |             time.sleep(0.1)
111 |         t.start()
112 | 
113 |     # wait for hosts to arrive at starting gate
114 |     # if only one host, then no wait will occur
115 |     # as starting gate file is already present
116 |     # every second we resume scan from last host file not found
117 |     # FIXME: for very large host sets,
118 |     # timeout only if no host responds within X seconds
119 | 
120 |     exception_seen = None
121 |     hosts_ready = False  # set scope outside while loop
122 |     abortfn = master_invoke.abort_fn()
123 |     last_host_seen = -1
124 |     sec = 0.0
125 |     sec_delta = 0.5
126 |     host_timeout = prm.host_startup_timeout
127 |     if smallfile.is_windows_os:
128 |         host_timeout += 20
129 |     h = None
130 | 
131 |     try:
132 |         while sec < host_timeout:
133 |             # HACK to force directory entry coherency for Gluster
134 |             ndirlist = os.listdir(master_invoke.network_dir)
135 |             if master_invoke.verbose:
136 |                 print("shared dir list: " + str(ndirlist))
137 |             hosts_ready = True
138 |             if os.path.exists(abortfn):
139 |                 raise SMFRunException("worker host signaled abort")
140 |             for j in range(last_host_seen + 1, len(prm_host_set)):
141 |                 h = prm_host_set[j]
142 |                 fn = master_invoke.gen_host_ready_fname(h.strip())
143 |                 if verbose:
144 |                     print("checking for host filename " + fn)
145 |                 if not os.path.exists(fn):
146 |                     hosts_ready = False
147 |                     break
148 |                 last_host_seen = j  # saw this host's ready file
149 |                 # we exit while loop only if no hosts in host_timeout seconds
150 |                 sec = 0.0
151 |             if hosts_ready:
152 |                 break
153 | 
154 |             # if one of ssh threads has died, no reason to continue
155 | 
156 |             kill_remaining_threads = False
157 |             for t in remote_thread_list:
158 |                 if not smallfile.thrd_is_alive(t):
159 |                     print("thread %s on host %s has died" % (t, str(h)))
160 |                     kill_remaining_threads = True
161 |                     break
162 |             if kill_remaining_threads:
163 |                 break
164 | 
165 |             # be patient for large tests
166 |             # give user some feedback about
167 |             # how many hosts have arrived at the starting gate
168 | 
169 |             time.sleep(sec_delta)
170 |             sec += sec_delta
171 |             sec_delta += 1
172 |             if verbose:
173 |                 print("last_host_seen=%d sec=%d" % (last_host_seen, sec))
174 |     except KeyboardInterrupt as e:
175 |         print("saw SIGINT signal, aborting test")
176 |         exception_seen = e
177 |         hosts_ready = False
178 |     except Exception as e:
179 |         exception_seen = e
180 |         hosts_ready = False
181 |         print("saw exception %s, aborting test" % str(e))
182 |     if not hosts_ready:
183 |         smallfile.abort_test(abortfn, [])
184 |         if h is not None:
185 |             print("ERROR: host %s did not reach starting gate" % h)
186 |         else:
187 |             print("no host reached starting gate")
188 |         if not exception_seen:
189 |             raise SMFRunException(
190 |                 "hosts did not reach starting gate within %d seconds" % host_timeout
191 |             )
192 |         else:
193 |             print("saw exception %s, aborting test" % str(exception_seen))
194 |         sys.exit(NOTOK)
195 |     else:
196 |         # ask all hosts to start the test
197 |         # this is like firing the gun at the track meet
198 | 
199 |         try:
200 |             sync_files.write_sync_file(starting_gate, "hi")
201 |             prm.test_start_time = time.time()
202 |             print(
203 |                 "starting all threads by creating starting gate file %s" % starting_gate
204 |             )
205 |         except IOError as e:
206 |             print("error writing starting gate: %s" % os.strerror(e.errno))
207 | 
208 |     # wait for them to finish
209 | 
210 |     for t in remote_thread_list:
211 |         t.join()
212 |         if t.status != OK:
213 |             print(
214 |                 "ERROR: ssh thread for host %s completed with status %d"
215 |                 % (t.remote_host, t.status)
216 |             )
217 | 
218 |     # attempt to aggregate results by reading pickle files
219 |     # containing SmallfileWorkload instances
220 |     # with counters and times that we need
221 | 
222 |     try:
223 |         all_ok = NOTOK
224 |         invoke_list = []
225 |         one_shot_delay = True
226 |         for h in prm_host_set:  # for each host in test
227 |             # read results for each thread run in that host
228 |             # from python pickle of the list of SmallfileWorkload objects
229 | 
230 |             pickle_fn = master_invoke.host_result_filename(h)
231 |             if verbose:
232 |                 print("reading pickle file: %s" % pickle_fn)
233 |             host_invoke_list = []
234 |             try:
235 |                 if one_shot_delay and not os.path.exists(pickle_fn):
236 |                     # all threads have joined already, they are done
237 |                     # we allow > 1 sec
238 |                     # for this (NFS) client to see other clients' files
239 | 
240 |                     time.sleep(1.2)
241 |                     one_shot_delay = False
242 |                 with open(pickle_fn, "rb") as pickle_file:
243 |                     host_invoke_list = pickle.load(pickle_file)
244 |                 if verbose:
245 |                     print(" read %d invoke objects" % len(host_invoke_list))
246 |                 invoke_list.extend(host_invoke_list)
247 |                 ensure_deleted(pickle_fn)
248 |             except IOError as e:
249 |                 if e.errno != errno.ENOENT:
250 |                     raise e
251 |                 print("  pickle file %s not found" % pickle_fn)
252 | 
253 |         output_results.output_results(invoke_list, prm)
254 |         all_ok = OK
255 |     except IOError as e:
256 |         print("host %s filename %s: %s" % (h, pickle_fn, str(e)))
257 |     except KeyboardInterrupt:
258 |         print("control-C signal seen (SIGINT)")
259 | 
260 |     sys.exit(all_ok)
261 | 
262 | 
263 | # main routine that does everything for this workload
264 | 
265 | 
266 | def run_workload():
267 |     # if a --host-set parameter was passed,
268 |     # it's a multi-host workload
269 |     # each remote instance will wait
270 |     # until all instances have reached starting gate
271 | 
272 |     try:
273 |         params = parse.parse()
274 |     except parse.SmfParseException as e:
275 |         print("ERROR: " + str(e))
276 |         print("use --help option to get CLI syntax")
277 |         sys.exit(NOTOK)
278 | 
279 |     # for multi-host test
280 | 
281 |     if params.host_set and not params.is_slave:
282 |         return run_multi_host_workload(params)
283 |     return multi_thread_workload.run_multi_thread_workload(params)
284 | 
285 | 
286 | # for future windows compatibility,
287 | # all global code (not contained in a class or subroutine)
288 | # must be moved to within a routine unless it's trivial (like constants)
289 | # because windows doesn't support fork().
290 | 
291 | if __name__ == "__main__":
292 |     run_workload()
293 | 


--------------------------------------------------------------------------------
/smallfile_remote.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # because it uses the "multiprocessing" python module instead of "threading"
 4 | # module, it can scale to many cores
 5 | # all the heavy lifting is done in "invocation" module,
 6 | # this script just adds code to run multi-process tests
 7 | # this script parses CLI commands, sets up test, runs it and prints results
 8 | #
 9 | # how to run:
10 | #
11 | # ./smallfile_cli.py
12 | #
13 | 
14 | # smallfile modules
15 | 
16 | """
17 | smallfile_cli.py
18 | CLI user interface for generating metadata-intensive workloads
19 | Copyright 2012 -- Ben England
20 | Licensed under the Apache License at http://www.apache.org/licenses/LICENSE-2.0
21 | See Appendix on this page for instructions pertaining to license.
22 | """
23 | 
24 | import multi_thread_workload
25 | import parse_slave
26 | 
27 | # main routine that does everything for this workload
28 | 
29 | 
30 | def run_workload():
31 |     # if a --host-set parameter was passed, it's a multi-host workload
32 |     # each remote instance will wait until all instances reach starting gate
33 | 
34 |     params = parse_slave.parse()
35 |     if params.master_invoke.verbose:
36 |         print("slave params: %s" % str(params))
37 |     return multi_thread_workload.run_multi_thread_workload(params)
38 | 
39 | 
40 | # for windows compatibility,
41 | # all global code (not contained in a class or subroutine)
42 | # must be moved to within a routine unless it's trivial (like constants)
43 | # because windows doesn't support fork().
44 | 
45 | if __name__ == "__main__":
46 |     run_workload()
47 | 


--------------------------------------------------------------------------------
/smallfile_rsptimes_stats.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | #
  3 | # smallfile_rsptimes_stats.py -- python program to reduce response time sample data from smallfile benchmark to
  4 | # statistics.
  5 | #
  6 | # in addition to stats for individual thread, it shows per-client and cluster-wide stats
  7 | # smallfile at present produces response time data in the /var/tmp/ directory
  8 | # within each workload generator
  9 | # it is the user's responsibility to copy the data back
 10 | # to a directory (on the test driver perhaps).
 11 | # this means that the files from each workload generator have to have
 12 | # the workload generator hostname embedded in them
 13 | # so that they can all be co-located in a single directory.
 14 | # since there is no standard method for this yet,
 15 | # this program has to be adjusted to parse the filenames
 16 | # and extract 2 fields, thread number and short hostname
 17 | #
 18 | # the start-time parameter is optional but if it is specified
 19 | # the percentiles-vs-time time column will have this added to it
 20 | # this could be useful for ingesting data into a repository like
 21 | # elastic search and displaying it side-by-side with other performance
 22 | # data collected during a test run.  The default of 0 just outputs
 23 | # time since start of test (like before).  The start time as
 24 | # seconds since the epoch (1970) can be obtained from the JSON
 25 | # output in the 'start-time' field.
 26 | 
 27 | 
 28 | import bisect
 29 | import os
 30 | import re
 31 | import sys
 32 | from sys import argv
 33 | 
 34 | import numpy
 35 | import scipy
 36 | import scipy.stats
 37 | 
 38 | time_infinity = 1 << 62
 39 | 
 40 | # edit this list if you want additional percentiles
 41 | 
 42 | percentiles = [50, 90, 95, 99]
 43 | min_rsptime_samples = 5
 44 | start_time = 0.0
 45 | 
 46 | 
 47 | def usage(msg):
 48 |     print("ERROR: %s" % msg)
 49 |     print("usage: python smallfile_rsptimes_stats.py ")
 50 |     print("           [--common-hostname-suffix my.suffix] ")
 51 |     print("           [--time-interval positive-integer-seconds] ")
 52 |     print("           [--start-time seconds-since-1970] ")
 53 |     print("           directory")
 54 |     sys.exit(1)
 55 | 
 56 | 
 57 | # parse files once, we assume here that we can hold them in RAM
 58 | # so we don't have to keep reading them
 59 | # by keeping them in RAM we allow binary search for starting
 60 | # time since we want to isolate set of samples in a time interval
 61 | 
 62 | 
 63 | def parse_rsptime_file(result_dir, csv_pathname):
 64 |     samples = []
 65 |     with open(os.path.join(result_dir, csv_pathname), "r") as f:
 66 |         records = [line.strip() for line in f.readlines()]
 67 |         for sample in records:
 68 |             components = sample.split(",")
 69 |             op = components[0]
 70 |             at_time = float(components[1])
 71 |             if start_time > 0:
 72 |                 at_time += start_time
 73 |             rsp_time = float(components[2])
 74 |             samples.append((op, at_time, rsp_time))
 75 |     return samples
 76 | 
 77 | 
 78 | # to be used for sorting based on tuple components
 79 | 
 80 | 
 81 | def get_at_time(rsptime_tuple):
 82 |     (_, at_time, _) = rsptime_tuple
 83 |     return at_time
 84 | 
 85 | 
 86 | def get_rsp_time(rsptime_tuple):
 87 |     (_, _, rsp_time) = rsptime_tuple
 88 |     return rsp_time
 89 | 
 90 | 
 91 | def do_sorting(sample_set, already_sorted=False):
 92 |     """
 93 |     this function avoids duplication of sorting
 94 |     """
 95 |     if not already_sorted:
 96 |         sorted_samples = sorted(sample_set, key=get_at_time)
 97 |     else:
 98 |         sorted_samples = sample_set
 99 |     sorted_keys = list(map(get_at_time, sorted_samples))
100 |     sorted_rsptimes = sorted(list(map(get_rsp_time, sample_set)))
101 |     return (sorted_samples, sorted_keys, sorted_rsptimes)
102 | 
103 | 
104 | # leverage python binary search module "bisect"
105 | # obtained from https://docs.python.org/2/library/bisect.html#searching-sorted-lists
106 | 
107 | 
108 | def find_le(a, x):
109 |     # find highest index with value < x
110 |     i = bisect.bisect_right(a, x)
111 |     return i
112 | 
113 | 
114 | def find_gt(a, x):
115 |     # find lowest index with value >= x
116 |     i = bisect.bisect_left(a, x)
117 |     if i < len(a):
118 |         return i
119 |     # since the only thing we are doing with this result
120 |     # is to extract a slice of an array,
121 |     # returning len(a) is a valid thing
122 |     # raise ValueError
123 | 
124 | 
125 | # if you want this to calculate stats for a time_interval
126 | # t specify from_time and to_time
127 | 
128 | 
129 | def reduce_thread_set(sorted_samples_tuple, from_time=0, to_time=time_infinity):
130 |     # FIXME: need binary search to
131 |     # efficiently find beginning of time interval
132 |     (sorted_samples, sorted_keys, sorted_times) = sorted_samples_tuple
133 |     if to_time < time_infinity:
134 |         start_index = find_le(sorted_keys, from_time)
135 |         end_index = find_gt(sorted_keys, to_time)
136 |         # replace sorted_times with just the response times in time interval
137 |         sorted_times = sorted(map(get_rsp_time, sorted_samples[start_index:end_index]))
138 |     sample_count = len(sorted_times)
139 |     if sample_count < min_rsptime_samples:
140 |         return None
141 |     mintime = sorted_times[0]
142 |     maxtime = sorted_times[-1]
143 |     mean = scipy.stats.tmean(sorted_times)
144 |     stdev = scipy.stats.tstd(sorted_times)
145 |     pctdev = 100.0 * stdev / mean
146 |     pctiles = []
147 |     for p in percentiles:
148 |         pctiles.append(numpy.percentile(sorted_times, float(p), overwrite_input=True))
149 |     return (sample_count, mintime, maxtime, mean, pctdev, pctiles)
150 | 
151 | 
152 | # format the stats for output to a csv file
153 | 
154 | 
155 | def format_stats(all_stats):
156 |     if all_stats is None:
157 |         return " 0,,,,," + ",,,,,,,,,,,,,,,,"[0 : len(percentiles) - 1]
158 |     (sample_count, mintime, maxtime, mean, pctdev, pctiles) = all_stats
159 |     partial_record = "%d, %f, %f, %f, %f, " % (
160 |         sample_count,
161 |         mintime,
162 |         maxtime,
163 |         mean,
164 |         pctdev,
165 |     )
166 |     for p in pctiles:
167 |         partial_record += "%f, " % p
168 |     return partial_record
169 | 
170 | 
171 | # FIXME: convert to argparse module, more compact and standard
172 | # define default parameter values
173 | 
174 | hosts = {}
175 | suffix = ""
176 | argindex = 1
177 | argcount = len(argv)
178 | time_interval = 10
179 | 
180 | # parse any optional parameters
181 | 
182 | while argindex < argcount:
183 |     pname = argv[argindex]
184 |     if not pname.startswith("--"):
185 |         break
186 |     if argindex == argcount - 1:
187 |         usage("every parameter consists of a --name and a value")
188 |     pval = argv[argindex + 1]
189 |     argindex += 2
190 |     pname = pname[2:]
191 |     if pname == "common-hostname-suffix":
192 |         suffix = pval
193 |         if not suffix.startswith("."):
194 |             suffix = "." + pval
195 |     elif pname == "time-interval":
196 |         time_interval = int(pval)
197 |     elif pname == "start-time":
198 |         start_time = float(pval)
199 |     else:
200 |         usage("--%s: no such optional parameter defined" % pname)
201 | 
202 | if suffix != "":
203 |     print("filtering out suffix %s from hostnames" % suffix)
204 | print("time interval is %d seconds" % time_interval)
205 | 
206 | # this regex plucks out a tuple of 2 values:
207 | #
208 | # thread number
209 | # hostname
210 | 
211 | regex = r"rsptimes_([0-9]{2})_([0-9,a-z,\-,\.]*)%s_[-,a-z]*_[.,0-9]*.csv"
212 | 
213 | # filter out redundant suffix, if any, in hostname
214 | 
215 | new_regex = regex % suffix
216 | 
217 | # now parse hostnames and files
218 | 
219 | if argindex != argcount - 1:
220 |     usage("need directory where response time files are")
221 | 
222 | directory = argv[argindex]
223 | if not os.path.isdir(directory):
224 |     usage("%s: directory containing result csv files was not provided" % directory)
225 | 
226 | # process the results
227 | # we show individual threads, per-host groupings and all threads together
228 | 
229 | samples_by_thread = {}
230 | hosts = {}
231 | 
232 | pathnames = filter(
233 |     lambda path: path.startswith("rsptimes") and path.endswith(".csv"),
234 |     os.listdir(directory),
235 | )
236 | max_thread = 0
237 | for p in pathnames:
238 |     m = re.match(new_regex, p)
239 |     if not m:
240 |         sys.stderr.write("warning: pathname could not be matched by regex: %s\n" % p)
241 |         continue
242 |     (threadstr, host) = m.group(1, 2)
243 |     thread = int(threadstr)
244 |     if max_thread < thread:
245 |         max_thread = thread
246 |     try:
247 |         perhost_dict = hosts[host]
248 |     except KeyError:
249 |         perhost_dict = {}
250 |         hosts[host] = perhost_dict
251 |     # load response times for this file into memory
252 |     # save what file it came from too
253 |     samples = parse_rsptime_file(directory, p)
254 |     perhost_dict[threadstr] = (p, samples)
255 | 
256 | hostcount = len(hosts.keys())
257 | if hostcount == 0:
258 |     usage("%s: no .csv response time log files were found" % directory)
259 | 
260 | summary_pathname = os.path.join(directory, "stats-rsptimes.csv")
261 | header = "host:thread, samples, min, max, mean, %dev, "
262 | for p in percentiles:
263 |     header += "%d%%ile, " % p
264 | 
265 | with open(summary_pathname, "w") as outf:
266 |     outf.write(header + "\n")
267 | 
268 |     # aggregate response times across all threads and whole test duration
269 |     # if there is only 1 host, no need for cluster-wide stats
270 | 
271 |     cluster_sample_set = None
272 |     if len(hosts.keys()) > 1:
273 |         outf.write("cluster-wide stats:\n")
274 |         cluster_sample_set = []
275 |         for per_host_dict in hosts.values():
276 |             for _, samples in per_host_dict.values():
277 |                 cluster_sample_set.extend(samples)
278 |         sorted_cluster_tuple = do_sorting(cluster_sample_set)
279 |         cluster_results = reduce_thread_set(sorted_cluster_tuple)
280 |         outf.write("all-hosts:all-thrd," + format_stats(cluster_results) + "\n")
281 |         outf.write("\n")
282 | 
283 |     # show them if there is variation amongst clients (could be network)
284 |     # if there is only 1 thread per host, no need for per-host stats
285 |     # assumption: all hosts have 1 thread/host or all hosts have > 1 thread/host
286 | 
287 |     host_keys = list(hosts.keys())
288 |     first_host = host_keys[0]
289 |     if len(first_host) > 1:
290 |         outf.write("per-host stats:\n")
291 |         for h in sorted(hosts.keys()):
292 |             sample_set = []
293 |             for _, samples in hosts[h].values():
294 |                 sample_set.extend(samples)
295 |             sorted_host_tuple = do_sorting(sample_set)
296 |             host_results = reduce_thread_set(sorted_host_tuple)
297 |             outf.write(h + ":" + "all-thrd" + "," + format_stats(host_results) + "\n")
298 |         outf.write("\n")
299 | 
300 |     # show per-thread results so we can see if client Cephfs mountpoint is fair
301 | 
302 |     outf.write("per-thread stats:\n")
303 |     for h in sorted(hosts.keys()):
304 |         threadset = hosts[h]
305 |         for t in sorted(threadset.keys()):
306 |             (_, samples) = threadset[t]
307 |             sorted_thrd_tuple = do_sorting(samples, already_sorted=True)
308 |             thrd_results = reduce_thread_set(sorted_thrd_tuple)
309 |             outf.write(h + ":" + t + "," + format_stats(thrd_results) + "\n")
310 |     outf.write("\n")
311 | 
312 |     # generate cluster-wide percentiles over time
313 |     # to show if latency spikes occur
314 |     # first get max end time of any request,
315 |     # round that down to quantized time interval
316 | 
317 |     end_time = -1
318 |     for h in hosts.keys():
319 |         threadset = hosts[h]
320 |         for t in threadset.keys():
321 |             (_, samples) = threadset[t]
322 |             if len(samples) > 0:
323 |                 (_, max_at_time, max_rsp_time) = samples[-1]
324 |             else:
325 |                 max_at_time = 0.0
326 |                 max_rsp_time = 0.0
327 |             end_time = max(end_time, max_at_time + max_rsp_time)
328 |     quantized_end_time = (int(end_time) // time_interval) * time_interval
329 | 
330 |     # if there is only 1 interval, cannot do percentiles vs time
331 |     # else for each time interval calculate percentiles of samples
332 |     # in that time interval
333 | 
334 |     if quantized_end_time > 0:
335 |         outf.write("cluster-wide response time stats over time:\n")
336 |         outf.write("time-since-start(sec), " + header + "\n")
337 | 
338 |         # avoid re-sorting all response time samples
339 |         # if possible (and it often is)
340 | 
341 |         if cluster_sample_set is None:
342 |             cluster_sample_set = []
343 |             for per_host_dict in hosts.values():
344 |                 for _, samples in per_host_dict.values():
345 |                     cluster_sample_set.extend(samples)
346 |             sorted_cluster_tuple = do_sorting(cluster_sample_set)
347 |         for from_t in range(int(start_time), quantized_end_time, time_interval):
348 |             to_t = from_t + time_interval
349 |             results_in_interval = reduce_thread_set(
350 |                 sorted_cluster_tuple, from_time=from_t, to_time=to_t
351 |             )
352 |             outf.write("%-8d, all-hosts:all-thrd, " % from_t)
353 |             outf.write(format_stats(results_in_interval) + "\n")
354 |         outf.write("\n")
355 | 
356 | 
357 | print("rsp. time result summary at: %s" % summary_pathname)
358 | 


--------------------------------------------------------------------------------
/smf_test_params.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # this class represents the entire set of test parameters
  3 | 
  4 | # calculate timeouts to allow for initialization delays
  5 | # while directory tree is created
  6 | 
  7 | import os
  8 | import sys
  9 | 
 10 | import smallfile
 11 | 
 12 | # convert boolean value into 'Y' or 'N'
 13 | 
 14 | 
 15 | def bool2YN(boolval):
 16 |     if boolval:
 17 |         return "Y"
 18 |     return "N"
 19 | 
 20 | 
 21 | class smf_test_params:
 22 |     def __init__(
 23 |         self,
 24 |         host_set=None,
 25 |         thread_count=2,
 26 |         remote_pgm_dir=os.path.abspath(os.path.dirname(sys.argv[0])),
 27 |         top_dirs=None,
 28 |         network_sync_dir=None,
 29 |         slave=False,
 30 |         permute_host_dirs=False,
 31 |         output_json=None,
 32 |     ):
 33 |         # this field used to calculate timeouts
 34 |         self.min_directories_per_sec = 50
 35 |         self.cleanup_delay_usec_per_sec = 0
 36 |         self.output_json = output_json
 37 |         self.version = "3.2"
 38 |         self.as_host = None
 39 |         self.host_set = host_set
 40 |         self.thread_count = thread_count
 41 |         self.master_invoke = smallfile.SmallfileWorkload()
 42 |         self.remote_pgm_dir = remote_pgm_dir
 43 |         self.top_dirs = top_dirs
 44 |         if top_dirs:
 45 |             self.master_invoke.set_top(top_dirs)
 46 |         self.network_sync_dir = network_sync_dir
 47 |         if network_sync_dir:
 48 |             self.master_invoke.network_dir = network_sync_dir
 49 |         self.launch_by_daemon = False
 50 |         self.is_slave = slave
 51 |         self.permute_host_dirs = permute_host_dirs
 52 |         self.startup_timeout = 0
 53 |         self.host_startup_timeout = 0
 54 |         self.test_start_time = None
 55 | 
 56 |     # calculate timeouts assuming 2 directories per second
 57 | 
 58 |     def recalculate_timeouts(self):
 59 |         total_files = self.master_invoke.iterations * self.thread_count
 60 |         # ignore subdirs per dir, this is a good estimate
 61 |         if self.host_set is not None:
 62 |             total_files *= len(self.host_set)
 63 |         dirs = total_files // self.master_invoke.files_per_dir
 64 | 
 65 |         # we have to create both src_dir and dst_dir trees so times 2
 66 |         # allow some time for thread synchronization
 67 |         dir_creation_overhead = (self.thread_count // 30) + (
 68 |             (dirs * 2) // self.min_directories_per_sec
 69 |         )
 70 | 
 71 |         # allow for creating list of pathnames if millions of files per dir
 72 |         file_creation_overhead = max(1, self.master_invoke.files_per_dir // 300000)
 73 | 
 74 |         # allow no less than 2 seconds to account for NTP inaccuracy
 75 |         self.startup_timeout = 2 + file_creation_overhead + dir_creation_overhead
 76 | 
 77 |         self.host_startup_timeout = self.startup_timeout
 78 |         if self.host_set is not None:
 79 |             # allow extra time for inter-host synchronization
 80 |             self.host_startup_timeout += 5 + (len(self.host_set) // 2)
 81 | 
 82 |     def __str__(self):
 83 |         fmt = "smf_test_params: version=%s json=%s as_host=%s host_set=%s "
 84 |         fmt += "launch_by_daemon=%s "
 85 |         fmt += "thread_count=%d remote_pgm_dir=%s "
 86 |         fmt += "slave=%s permute_host_dirs=%s startup_timeout=%d "
 87 |         fmt += "host_timeout=%d smf_invoke=%s "
 88 |         return fmt % (
 89 |             str(self.version),
 90 |             str(self.output_json),
 91 |             str(self.as_host),
 92 |             str(self.host_set),
 93 |             str(self.launch_by_daemon),
 94 |             self.thread_count,
 95 |             self.remote_pgm_dir,
 96 |             str(self.is_slave),
 97 |             str(self.permute_host_dirs),
 98 |             self.startup_timeout,
 99 |             self.host_startup_timeout,
100 |             str(self.master_invoke),
101 |         )
102 | 
103 |     # display results of parse so user knows what default values are
104 |     # most important parameters come first
105 |     # display host set first because this can be very long,
106 |     # this way the rest of the parameters appear together on the screen
107 |     # this function returns a list of (name, value) pairs for each param.
108 | 
109 |     def human_readable(self):
110 |         inv = self.master_invoke
111 |         if inv.filesize_distr == smallfile.SmallfileWorkload.fsdistr_fixed:
112 |             fsdistr_str = "fixed"
113 |         else:
114 |             fsdistr_str = "random exponential"
115 |         prm_list = [
116 |             ("version", self.version),
117 |             ("hosts in test", "%s" % self.host_set),
118 |             ("launch by daemon", "%s" % str(self.launch_by_daemon)),
119 |             ("top test directory(s)", str(self.top_dirs)),
120 |             ("operation", inv.opname),
121 |             ("files/thread", "%d" % inv.iterations),
122 |             ("threads", "%d" % self.thread_count),
123 |             ("record size (KB, 0 = maximum)", "%d" % inv.record_sz_kb),
124 |             ("file size (KB)", "%d" % inv.total_sz_kb),
125 |             ("file size distribution", fsdistr_str),
126 |             ("files per dir", "%d" % inv.files_per_dir),
127 |             ("dirs per dir", "%d" % inv.dirs_per_dir),
128 |             ("threads share directories?", "%s" % bool2YN(inv.is_shared_dir)),
129 |             ("filename prefix", inv.prefix),
130 |             ("filename suffix", inv.suffix),
131 |             ("hash file number into dir.?", bool2YN(inv.hash_to_dir)),
132 |             ("fsync after modify?", bool2YN(inv.fsync)),
133 |             ("incompressible?", bool2YN(inv.incompressible)),
134 |             ("pause between files (microsec)", "%d" % inv.pause_between_files),
135 |             ("auto-pause?", bool2YN(inv.auto_pause)),
136 |             (
137 |                 "delay after cleanup per file (microsec)",
138 |                 "%d" % inv.cleanup_delay_usec_per_file,
139 |             ),
140 |             ("minimum directories per sec", "%d" % int(self.min_directories_per_sec)),
141 |             ("total hosts", "%d" % inv.total_hosts),
142 |             ("finish all requests?", "%s" % bool2YN(inv.finish_all_rq)),
143 |             ("stonewall?", "%s" % bool2YN(inv.stonewall)),
144 |             ("measure response times?", "%s" % bool2YN(inv.measure_rsptimes)),
145 |             ("verify read?", "%s" % bool2YN(inv.verify_read)),
146 |             ("verbose?", bool2YN(inv.verbose)),
147 |             ("log to stderr?", bool2YN(inv.log_to_stderr)),
148 |         ]
149 |         if smallfile.xattr_installed:
150 |             prm_list.extend(
151 |                 [
152 |                     ("ext.attr.size", "%d" % inv.xattr_size),
153 |                     ("ext.attr.count", "%d" % inv.xattr_count),
154 |                 ]
155 |             )
156 |         if self.host_set:
157 |             prm_list.extend(
158 |                 [("permute host directories?", "%s" % bool2YN(self.permute_host_dirs))]
159 |             )
160 |             if self.remote_pgm_dir:
161 |                 prm_list.append(("remote program directory", self.remote_pgm_dir))
162 |             if self.network_sync_dir:
163 |                 prm_list.append(("network thread sync. dir.", self.network_sync_dir))
164 |         return prm_list
165 | 
166 |     # add any parameters that might be relevant to
167 |     # data analysis here, can skip parameters that
168 |     # don't affect test results
169 |     # don't convert to JSON here, so that caller
170 |     # can insert test results before conversion
171 | 
172 |     def to_json(self):
173 |         # put params a level down so results can be
174 |         # inserted at same level
175 | 
176 |         json_dictionary = {}
177 |         p = {}
178 |         json_dictionary["params"] = p
179 | 
180 |         inv = self.master_invoke
181 | 
182 |         # put host-set at top because it can be very long
183 |         # and we want rest of parameters to be grouped together
184 | 
185 |         p["host_set"] = self.host_set
186 |         p["launch_by_daemon"] = self.launch_by_daemon
187 |         p["version"] = self.version
188 |         p["top"] = ",".join(self.top_dirs)
189 |         p["operation"] = inv.opname
190 |         p["files_per_thread"] = inv.iterations
191 |         p["threads"] = self.thread_count
192 |         p["file_size"] = inv.total_sz_kb
193 |         p["file_size_distr"] = self.size_distribution
194 |         p["files_per_dir"] = inv.files_per_dir
195 |         p["share_dir"] = bool2YN(inv.is_shared_dir)
196 |         p["fname_prefix"] = inv.prefix
197 |         p["fname_suffix"] = inv.suffix
198 |         p["hash_to_dir"] = bool2YN(inv.hash_to_dir)
199 |         p["fsync_after_modify"] = bool2YN(inv.fsync)
200 |         p["pause_between_files"] = str(inv.pause_between_files)
201 |         p["auto_pause"] = str(inv.auto_pause)
202 |         p["cleanup_delay_usec_per_file"] = str(inv.cleanup_delay_usec_per_file)
203 |         p["finish_all_requests"] = bool2YN(inv.finish_all_rq)
204 |         p["stonewall"] = bool2YN(inv.stonewall)
205 |         p["verify_read"] = bool2YN(inv.verify_read)
206 |         p["xattr_size"] = str(inv.xattr_size)
207 |         p["xattr_count"] = str(inv.xattr_count)
208 |         p["permute_host_dirs"] = bool2YN(self.permute_host_dirs)
209 |         p["network_sync_dir"] = self.network_sync_dir
210 |         p["min_directories_per_sec"] = self.min_directories_per_sec
211 |         p["total_hosts"] = inv.total_hosts
212 | 
213 |         # include startup-timeout and host-timeout to make possible
214 |         # diagnosis of timeout problems, but we don't normally need them
215 |         # so don't include in human-readable output
216 | 
217 |         p["startup_timeout"] = self.startup_timeout
218 |         p["host_timeout"] = self.host_startup_timeout
219 | 
220 |         return json_dictionary
221 | 


--------------------------------------------------------------------------------
/ssh_thread.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | """
 4 | ssh_thread.py -- manages parallel execution of shell commands on remote hosts
 5 | Copyright 2012 -- Ben England
 6 | Licensed under the Apache License at http://www.apache.org/licenses/LICENSE-2.0
 7 | See Appendix on this page for instructions pertaining to license.
 8 | """
 9 | 
10 | import os
11 | import threading
12 | 
13 | # this class is just used to create a python thread
14 | # for each remote host that we want to use as a workload generator
15 | # the thread just executes an ssh command to run this program on a remote host
16 | 
17 | 
18 | class ssh_thread(threading.Thread):
19 |     ssh_prefix = "ssh -x -o StrictHostKeyChecking=no "
20 | 
21 |     def __str__(self):
22 |         return "ssh-thread:%s:%s:%s" % (
23 |             self.remote_host,
24 |             str(self.status),
25 |             self.remote_cmd,
26 |         )
27 | 
28 |     def __init__(self, remote_host, remote_cmd_in):
29 |         threading.Thread.__init__(self)
30 |         self.remote_host = remote_host
31 |         self.remote_cmd = '%s %s "%s"' % (
32 |             self.ssh_prefix,
33 |             self.remote_host,
34 |             remote_cmd_in,
35 |         )
36 |         # print('thread cmd %s'%self.remote_cmd)
37 |         self.status = None
38 | 
39 |     def run(self):
40 |         self.status = os.system(self.remote_cmd)
41 | 


--------------------------------------------------------------------------------
/sync_files.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import errno
 3 | import os
 4 | import pickle
 5 | 
 6 | 
 7 | class SMFSyncFileException(Exception):
 8 |     pass
 9 | 
10 | 
11 | notyet = ".notyet"
12 | 
13 | 
14 | def touch(fpath):
15 |     try:
16 |         with open(fpath, "w") as sgf:
17 |             sgf.write("hi")
18 |             sgf.flush()
19 |             os.fsync(sgf.fileno())
20 |     except OSError as e:
21 |         if e.errno != errno.EEXIST:
22 |             raise e
23 | 
24 | 
25 | def write_sync_file(fpath, contents):
26 |     with open(fpath + notyet, "w") as sgf:
27 |         sgf.write(contents)
28 |         sgf.flush()
29 |         os.fsync(sgf.fileno())  # file should close when you exit block
30 |     os.rename(fpath + notyet, fpath)
31 | 
32 | 
33 | def write_pickle(fpath, obj):
34 |     with open(fpath + notyet, "wb") as result_file:
35 |         pickle.dump(obj, result_file)
36 |         result_file.flush()
37 |         os.fsync(result_file.fileno())  # or else reader may not see data
38 |     os.rename(fpath + notyet, fpath)
39 | 
40 | 
41 | # create directory if it's not already there
42 | 
43 | 
44 | def ensure_dir_exists(dirpath):
45 |     if not os.path.exists(dirpath):
46 |         parent_path = os.path.dirname(dirpath)
47 |         if parent_path == dirpath:
48 |             raise SMFSyncFileException(
49 |                 "ensure_dir_exists: cannot obtain parent path of non-existent path: "
50 |                 + dirpath
51 |             )
52 |         ensure_dir_exists(parent_path)
53 |         try:
54 |             os.mkdir(dirpath)
55 |         except os.error as e:
56 |             if e.errno != errno.EEXIST:  # workaround for filesystem bug
57 |                 raise e
58 |     else:
59 |         if not os.path.isdir(dirpath):
60 |             raise SMFSyncFileException(
61 |                 "%s already exists and is not a directory!" % dirpath
62 |             )
63 | 
64 | 
65 | # avoid exception if file we wish to delete is not there
66 | 
67 | 
68 | def ensure_deleted(fn):
69 |     try:
70 |         if os.path.lexists(fn):
71 |             os.unlink(fn)
72 |     except Exception as e:
73 |         # could be race condition with other client processes/hosts
74 |         # if was race condition, file will no longer be there
75 |         if os.path.exists(fn):
76 |             raise SMFSyncFileException(
77 |                 "exception while ensuring %s deleted: %s" % (fn, str(e))
78 |             )
79 | 


--------------------------------------------------------------------------------
/yaml_parser.py:
--------------------------------------------------------------------------------
  1 | """
  2 | module to parse YAML input file containing smallfile parameters
  3 | YAML parameter names are identical to CLI parameter names
  4 | except that the leading "--" is removed
  5 | modifies test_params object with contents of YAML file
  6 | """
  7 | 
  8 | import os
  9 | import tempfile
 10 | 
 11 | import yaml
 12 | 
 13 | import smallfile
 14 | import smf_test_params
 15 | from parser_data_types import (
 16 |     SmfParseException,
 17 |     TypeExc,
 18 |     boolean,
 19 |     file_size_distrib,
 20 |     host_set,
 21 |     non_negative_integer,
 22 |     positive_integer,
 23 | )
 24 | from smallfile import unittest_module
 25 | 
 26 | 
 27 | def parse_yaml(test_params, input_yaml_file):
 28 |     inv = test_params.master_invoke
 29 |     y = {}
 30 |     with open(input_yaml_file, "r") as f:
 31 |         try:
 32 |             y = yaml.safe_load(f)
 33 |             if y is None:
 34 |                 y = {}
 35 |             if type(y) is not dict:
 36 |                 raise SmfParseException(
 37 |                     "yaml.safe_load did not return dictionary - check input file format"
 38 |                 )
 39 |         except yaml.YAMLError as e:
 40 |             raise SmfParseException("YAML parse error: %s" % e)
 41 | 
 42 |     try:
 43 |         for k in y.keys():
 44 |             v = y[k]
 45 |             if k == "yaml-input-file":
 46 |                 raise SmfParseException(
 47 |                     "cannot specify YAML input file from within itself!"
 48 |                 )
 49 |             elif k == "output-json":
 50 |                 test_params.output_json = v
 51 |             elif k == "response-times":
 52 |                 inv.measure_rsptimes = boolean(v)
 53 |             elif k == "network-sync-dir":
 54 |                 inv.network_dir = boolean(v)
 55 |             elif k == "operation":
 56 |                 if not smallfile.SmallfileWorkload.all_op_names.__contains__(v):
 57 |                     raise SmfParseException('operation "%s" not recognized')
 58 |                 inv.opname = v
 59 |             elif k == "top":
 60 |                 test_params.top_dirs = [os.path.abspath(p) for p in y["top"].split(",")]
 61 |             elif k == "host-set":
 62 |                 test_params.host_set = host_set(v)
 63 |             elif k == "total-hosts":
 64 |                 inv.total_hosts = positive_integer(v)
 65 |             elif k == "files":
 66 |                 inv.iterations = positive_integer(v)
 67 |             elif k == "threads":
 68 |                 test_params.thread_count = positive_integer(v)
 69 |             elif k == "files-per-dir":
 70 |                 inv.files_per_dir = positive_integer(v)
 71 |             elif k == "dirs-per-dir":
 72 |                 inv.dirs_per_dir = positive_integer(v)
 73 |             elif k == "record-size":
 74 |                 inv.record_sz_kb = positive_integer(v)
 75 |             elif k == "file-size":
 76 |                 inv.total_sz_kb = non_negative_integer(v)
 77 |             elif k == "file-size-distribution":
 78 |                 test_params.size_distribution = inv.filesize_distr = file_size_distrib(
 79 |                     v
 80 |                 )
 81 |             elif k == "fsync":
 82 |                 inv.fsync = boolean(v)
 83 |             elif k == "xattr-size":
 84 |                 inv.xattr_size = positive_integer(v)
 85 |             elif k == "xattr-count":
 86 |                 inv.xattr_count = positive_integer(v)
 87 |             elif k == "pause":
 88 |                 inv.pause_between_files = non_negative_integer(v)
 89 |             elif k == "auto-pause":
 90 |                 inv.auto_pause = boolean(v)
 91 |             elif k == "cleanup-delay-usec-per-file":
 92 |                 inv.cleanup_delay_usec_per_file = (
 93 |                     test_params.cleanup_delay_usec_per_file
 94 |                 ) = non_negative_integer(v)
 95 |             elif k == "stonewall":
 96 |                 inv.stonewall = boolean(v)
 97 |             elif k == "finish":
 98 |                 inv.finish_all_rq = boolean(v)
 99 |             elif k == "prefix":
100 |                 inv.prefix = v
101 |             elif k == "suffix":
102 |                 inv.suffix = v
103 |             elif k == "hash-into-dirs":
104 |                 inv.hash_to_dir = boolean(v)
105 |             elif k == "same-dir":
106 |                 inv.is_shared_dir = boolean(v)
107 |             elif k == "verbose":
108 |                 inv.verbose = boolean(v)
109 |             elif k == "permute-host-dirs":
110 |                 test_params.permute_host_dirs = boolean(v)
111 |             elif k == "record-time-size":
112 |                 inv.record_ctime_size = boolean(v)
113 |             elif k == "verify-read":
114 |                 inv.verify_read = boolean(v)
115 |             elif k == "incompressible":
116 |                 inv.incompressible = boolean(v)
117 |             elif k == "min-dirs-per-sec":
118 |                 test_params.min_directories_per_sec = positive_integer(v)
119 |             elif k == "log-to-stderr":
120 |                 raise SmfParseException("%s: not allowed in YAML input" % k)
121 |             elif k == "remote-pgm-dir":
122 |                 raise SmfParseException("%s: not allowed in YAML input" % k)
123 |             else:
124 |                 raise SmfParseException("%s: unrecognized input parameter name" % k)
125 |     except TypeExc as e:
126 |         emsg = 'YAML parse error for key "%s" : %s' % (k, str(e))
127 |         raise SmfParseException(emsg)
128 | 
129 | 
130 | class TestYamlParse(unittest_module.TestCase):
131 |     def setUp(self):
132 |         self.params = smf_test_params.smf_test_params()
133 | 
134 |     def tearDown(self):
135 |         self.params = None
136 | 
137 |     def test_parse_empty(self):
138 |         fn = os.path.join(tempfile.gettempdir(), "sample_parse_empty.yaml")
139 |         with open(fn, "w") as f:
140 |             f.write("\n")
141 |         parse_yaml(self.params, fn)
142 |         # just looking for no exception here
143 | 
144 |     def test_parse_all(self):
145 |         fn = os.path.join(tempfile.gettempdir(), "sample_parse.yaml")
146 |         with open(fn, "w") as f:
147 |             f.write("operation: create\n")
148 |         parse_yaml(self.params, fn)
149 |         assert self.params.master_invoke.opname == "create"
150 | 
151 |     def test_parse_negint(self):
152 |         fn = os.path.join(tempfile.gettempdir(), "sample_parse_negint.yaml")
153 |         with open(fn, "w") as f:
154 |             f.write("files: -3\n")
155 |         try:
156 |             parse_yaml(self.params, fn)
157 |         except SmfParseException as e:
158 |             msg = str(e)
159 |             if not msg.__contains__("greater than zero"):
160 |                 raise e
161 | 
162 |     def test_parse_hostset(self):
163 |         fn = os.path.join(tempfile.gettempdir(), "sample_parse_hostset.yaml")
164 |         with open(fn, "w") as f:
165 |             f.write("host-set: host-foo,host-bar\n")
166 |         parse_yaml(self.params, fn)
167 |         assert self.params.host_set == ["host-foo", "host-bar"]
168 | 
169 |     def test_parse_fsdistr_exponential(self):
170 |         fn = os.path.join(
171 |             tempfile.gettempdir(), "sample_parse_fsdistr_exponential.yaml"
172 |         )
173 |         with open(fn, "w") as f:
174 |             f.write("file-size-distribution: exponential\n")
175 |         parse_yaml(self.params, fn)
176 |         assert (
177 |             self.params.master_invoke.filesize_distr
178 |             == smallfile.SmallfileWorkload.fsdistr_random_exponential
179 |         )
180 | 
181 |     def test_parse_dir_list(self):
182 |         fn = os.path.join(tempfile.gettempdir(), "sample_parse_dirlist.yaml")
183 |         with open(fn, "w") as f:
184 |             f.write("top: foo,bar \n")
185 |         parse_yaml(self.params, fn)
186 |         mydir = os.getcwd()
187 |         topdirs = [os.path.join(mydir, d) for d in ["foo", "bar"]]
188 |         assert self.params.top_dirs == topdirs
189 | 
190 | 
191 | if __name__ == "__main__":
192 |     unittest_module.main()
193 | 


--------------------------------------------------------------------------------