├── .gitignore ├── CHANGELOG.md ├── LICENSE ├── README.md ├── onionperf ├── __init__.py ├── analysis.py ├── filtering.py ├── measurement.py ├── model.py ├── monitor.py ├── onionperf ├── reprocessing.py ├── tests │ ├── data │ │ ├── bin │ │ │ ├── script │ │ │ └── script_non_exe │ │ ├── config │ │ ├── dirs │ │ │ └── abcdefg.txt │ │ ├── logs │ │ │ ├── onionperf.tgen.log │ │ │ ├── onionperf.torctl.log │ │ │ ├── onionperf_2019-01-10_23:59:59.tgen.log │ │ │ └── onionperf_2019-01-10_23:59:59.torctl.log │ │ ├── parse_error │ │ ├── simplefile │ │ └── simplefile.xz │ ├── test_analysis.py │ ├── test_measurement.py │ ├── test_reprocessing.py │ └── test_utils.py ├── util.py └── visualization.py ├── requirements.txt ├── schema ├── onionperf-3.0.json └── onionperf-4.0.json └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | onionperf-data 2 | onionperf-private 3 | venv 4 | *.json.xz 5 | *.pdf 6 | *.csv 7 | *.pyc 8 | .coverage 9 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changes in version 0.8 - 2020-09-16 2 | 3 | - Add a new `onionperf filter` mode that takes an OnionPerf analysis 4 | results file or directory as input, applies filters, and produces 5 | new OnionPerf analysis results file(s) as output. Bump the analysis 6 | version number to 4.0 to include additional filter metadata defined 7 | in a 'filters' field and an optional 'filtered\_out' field per Tor 8 | circuit. Implements #33260. 9 | 10 | # Changes in version 0.7 - 2020-09-01 11 | 12 | - Add `onionperf measure --drop-guards` parameter to use and drop 13 | guards and circuit build timeouts every given number of hours, if 14 | supported by the Tor version. Implements #33399. 15 | - Remove the `onionperf measure --oneshot` switch and replace it with 16 | new switches `--tgen-pause-initial`, `--tgen-pause-between`, 17 | `--tgen-transfer-size`, and `--tgen-num-transfers ` to further 18 | configure the generated TGen model. Implemets #33432. 19 | 20 | # Changes in version 0.6 - 2020-08-08 21 | 22 | - Update to TGen 1.0.0, use TGenTools for parsing TGen log files, and 23 | update analysis results file version to 3.0. Implements #33974. 24 | - Remove summaries from analysis results files, and remove the 25 | `onionperf analyze -s/--do-simple-parse` switch. Implements #40005. 26 | - Add JSON schema for analysis results file format 3.0. Implements 27 | #40003. 28 | - Correctly compute the start time of failed streams as part of the 29 | update to TGen and TGenTools 1.0.0. Fixes #30362. 30 | - Refine error codes shown in visualizations into TOR or TGEN errors. 31 | Implements #34218. 32 | 33 | # Changes in version 0.5 - 2020-07-02 34 | 35 | - Add new graph showing the cumulative distribution function of 36 | throughput in Mbps. Implements #33257. 37 | - Improve `README.md` to make it more useful to developers and 38 | researchers. Implements #40001. 39 | - Always include the `error_code` column in visualization CSV output, 40 | regardless of whether data contains measurements with an error code 41 | or not. Fixes #40004. 42 | - Write generated torrc files to disk for debugging purposes. 43 | Implements #40002. 44 | 45 | # Changes in version 0.4 - 2020-06-16 46 | 47 | - Include all measurements when analyzing log files at midnight as 48 | part of `onionperf measure`, not just the ones from the day before. 49 | Also add `onionperf analyze -x/--date-prefix` switch to prepend a 50 | given date string to an analysis results file. Fixes #29369. 51 | - Add `size`, `last_modified`, and `sha256` fields to index.xml. 52 | Implements #29365. 53 | - Add support for single onion services using the switch `onionperf 54 | measure -s/--single-onion`. Implements #29368. 55 | - Remove unused `onionperf measure --traffic-model` switch. 56 | Implements #29370. 57 | - Make `onionperf measure -o/--onion-only` and `onionperf measure 58 | -i/--inet-only` switches mutually exclusive. Fixes #34316. 59 | - Accept one or more paths to analysis results files or directories 60 | of such files per dataset in `onionperf visualize -d/--data` to 61 | include all contained measurements in a dataset. Implements #34191. 62 | 63 | # Changes in version 0.3 - 2020-05-30 64 | 65 | - Automatically compress logs when rotating them. Fixes #33396. 66 | - Update to Python 3. Implements #29367. 67 | - Integrate reprocessing mode into analysis mode. Implements #34142. 68 | - Record download times of smaller file sizes from partial completion 69 | times. Implements #26673. 70 | - Stop generating .tpf files. Implements #34141. 71 | - Update analysis results file version to 2.0. Implements #34224. 72 | - Export visualized data to a CSV file. Implements #33258. 73 | - Remove version 2 onion service support. Implements #33434. 74 | - Reduce timeout and stallout values. Implements #34024. 75 | - Remove 50 KiB and 1 MiB downloads. Implements #34023. 76 | - Remove existing Tor control log visualizations. Implements #34214. 77 | - Update to Networkx version 2.4. Fixes #34298. 78 | - Update time to first/last byte definitions to include the time 79 | between starting a measurement and receiving the first/last byte. 80 | Implements #34215. 81 | - Update `requirements.txt` to actual requirements, and switch from 82 | distutils to setuptools. Fixes #30586. 83 | - Split visualizations into public and onion service measurements. 84 | Fixes #34216. 85 | 86 | # Changes from before 2020-04 87 | 88 | - Changes made before 2020-04 are not listed here. See `git log` for 89 | details. 90 | 91 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | To the extent that a federal employee is an author of a portion of 2 | this software or a derivative work thereof, no copyright is claimed by 3 | the United States Government, as represented by the Secretary of the 4 | Navy ("GOVERNMENT") under Title 17, U.S. Code. All Other Rights 5 | Reserved. 6 | 7 | Redistribution and use in source and binary forms, with or without 8 | modification, are permitted provided that the following conditions are 9 | met: 10 | * Redistributions of source code must retain the above copyright 11 | notice, this list of conditions and the following disclaimer. 12 | * Redistributions in binary form must reproduce the above 13 | copyright notice, this list of conditions and the following disclaimer 14 | in the documentation and/or other materials provided with the 15 | distribution. 16 | * Neither the names of the copyright owners nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | GOVERNMENT ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION 32 | AND DISCLAIMS ANY LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER 33 | RESULTING FROM THE USE OF THIS SOFTWARE. 34 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # OnionPerf 2 | 3 | * [Overview](#overview) 4 | + [What does OnionPerf do?](#what-does-onionperf-do-) 5 | + [What does OnionPerf *not* do?](#what-does-onionperf--not--do-) 6 | * [Installation](#installation) 7 | + [Tor](#tor) 8 | + [TGen](#tgen) 9 | + [OnionPerf](#onionperf-1) 10 | * [Measurement](#measurement) 11 | + [Starting and stopping measurements](#starting-and-stopping-measurements) 12 | + [Output directories and files](#output-directories-and-files) 13 | + [Changing Tor configurations](#changing-tor-configurations) 14 | + [Changing the TGen traffic model](#changing-the-tgen-traffic-model) 15 | + [Sharing measurement results](#sharing-measurement-results) 16 | + [Troubleshooting](#troubleshooting) 17 | * [Analysis](#analysis) 18 | + [Analyzing measurement results](#analyzing-measurement-results) 19 | + [Filtering measurement results](#filtering-measurement-results) 20 | + [Visualizing measurement results](#visualizing-measurement-results) 21 | + [Interpreting the PDF output format](#interpreting-the-pdf-output-format) 22 | + [Interpreting the CSV output format](#interpreting-the-csv-output-format) 23 | + [Visualizations on Tor Metrics](#visualizations-on-tor-metrics) 24 | * [Contributing](#contributing) 25 | 26 | ## Overview 27 | 28 | ### What does OnionPerf do? 29 | 30 | OnionPerf measures performance of bulk file downloads over Tor. Together with its predecessor, Torperf, OnionPerf has been used to measure long-term performance trends in the Tor network since 2009. It is also being used to perform short-term performance experiments to compare different Tor configurations or implementations. 31 | 32 | OnionPerf uses multiple processes and threads to download random data through Tor while tracking the performance of those downloads. The data is served and fetched on localhost using two TGen (traffic generator) processes, and is transferred through Tor using Tor client processes and an ephemeral Tor onion service. Tor control information and TGen performance statistics are logged to disk and analyzed once per day to produce a JSON analysis file that can later be used to visualize changes in Tor client performance over time. 33 | 34 | ### What does OnionPerf *not* do? 35 | 36 | OnionPerf does not attempt to simulate complex traffic patterns like a web-browsing user or a voice-chatting user. It measures a very specific user model: a bulk 5 MiB file download over Tor. 37 | 38 | OnionPerf does not interfere with how Tor selects paths and builds circuits, other than setting configuration values as specified by the user. As a result it cannot be used to measure specific relays nor to scan the entire Tor network. 39 | 40 | ## Installation 41 | 42 | OnionPerf has several dependencies in order to perform measurements or analyze and visualize measurement results. These dependencies include Tor, TGen (traffic generator), and a few Python packages. 43 | 44 | The following description was written with a Debian system in mind but should be transferable to other Linux distributions and possibly even other operating systems. 45 | 46 | ### Tor 47 | 48 | OnionPerf relies on the `tor` binary to start a Tor process on the client side to make client requests and another Tor process on the server side to host onion services. 49 | 50 | The easiest way to satisfy this dependency is to install the `tor` package, which puts the `tor` binary into the `PATH` where OnionPerf will find it. Optionally, systemd can be instructed to make sure that `tor` is never started as a service: 51 | 52 | ```shell 53 | sudo apt install tor 54 | sudo systemctl stop tor.service 55 | sudo systemctl mask tor.service 56 | ``` 57 | 58 | Alternatively, Tor can be built from source: 59 | 60 | ```shell 61 | sudo apt install automake build-essential libevent-dev libssl-dev zlib1g-dev 62 | cd ~/ 63 | git clone https://git.torproject.org/tor.git 64 | cd tor/ 65 | ./autogen.sh 66 | ./configure --disable-asciidoc 67 | make 68 | ``` 69 | 70 | In this case the resulting `tor` binary can be found in `~/tor/src/app/tor` and needs to be passed to OnionPerf's `--tor` parameter when doing measurements. 71 | 72 | ### TGen 73 | 74 | OnionPerf uses TGen to generate traffic on client and server side for its measurements. Installing dependencies, cloning TGen to a subdirectory in the user's home directory, and building TGen is done as follows: 75 | 76 | ```shell 77 | sudo apt install cmake libglib2.0-dev libigraph0-dev make 78 | cd ~/ 79 | git clone https://github.com/shadow/tgen.git 80 | cd tgen/ 81 | mkdir build 82 | cd build/ 83 | cmake .. 84 | make 85 | ``` 86 | 87 | The TGen binary will be contained in `~/tgen/build/src/tgen`, which is also the path that needs to be passed to OnionPerf's `--tgen` parameter when doing measurements. 88 | 89 | ### OnionPerf 90 | 91 | OnionPerf is written in Python 3. The following instructions assume that a Python virtual environment is being used, even though installation is also possible without that. 92 | 93 | The virtual environment is created, activated, and tested using: 94 | 95 | ```shell 96 | sudo apt install python3-venv 97 | cd ~/ 98 | python3 -m venv venv 99 | source venv/bin/activate 100 | which python3 101 | ``` 102 | 103 | The last command should output something like `~/venv/bin/python3` as the path to the `python3` binary used in the virtual environment. 104 | 105 | The next step is to clone the OnionPerf repository and install its requirements: 106 | 107 | ```shell 108 | git clone https://git.torproject.org/onionperf.git 109 | pip3 install --no-cache -r onionperf/requirements.txt 110 | ``` 111 | 112 | The final step is to install OnionPerf and print out the usage information to see if the installation was successful: 113 | 114 | ```shell 115 | cd onionperf/ 116 | python3 setup.py install 117 | cd ~/ 118 | onionperf --help 119 | ``` 120 | 121 | The virtual environment is deactivated with the following command: 122 | 123 | ```shell 124 | deactivate 125 | ``` 126 | 127 | However, in order to perform measurements or analyses, the virtual environment needs to be activated first. This will ensure all the paths are found. 128 | 129 | If needed, unit tests are run with the following command: 130 | 131 | ```shell 132 | cd ~/onionperf/ 133 | python3 -m nose --with-coverage --cover-package=onionperf 134 | ``` 135 | 136 | ## Measurement 137 | 138 | Performing measurements with OnionPerf is done by starting an `onionperf` process that itself starts several other processes and keeps running until it is interrupted by the user. During this time it performs new measurements every 5 minutes and logs measurement results to files. 139 | 140 | Ideally, OnionPerf is run detached from the terminal session using tmux, systemd, or similar, except for the most simple test runs. The specifics for using these tools are not covered in this document. 141 | 142 | ### Starting and stopping measurements 143 | 144 | The most trivial configuration is to measure onion services only. In that case, OnionPerf runs without needing any additional configuration. For direct measurements via exit nodes, firewall rules or port forwarding may be required to allow inbound connections to the TGen server. 145 | 146 | Starting these measurements is as simple as: 147 | 148 | ```shell 149 | cd ~/ 150 | onionperf measure --onion-only --tgen ~/tgen/build/tgen --tor ~/tor/src/app/tor 151 | ``` 152 | 153 | OnionPerf logs its main output on the console and then waits indefinitely until the user presses `CTRL-C` for graceful shutdown. It does not, however, print out measurement results or progress on the console, just a heartbeat message every hour. 154 | 155 | OnionPerf's `measure` mode has several command-line parameters for customizing measurements. See the following command for usage information: 156 | 157 | ```shell 158 | onionperf measure --help 159 | ``` 160 | 161 | ### Output directories and files 162 | 163 | OnionPerf writes several files to two subdirectories in the current working directory while doing measurements: 164 | 165 | - `onionperf-data/` is the main directory containing measurement results. 166 | - `htdocs/` is created at the first UTC midnight after starting and contains measurement analysis result files that can be shared via a local web server. 167 | - `$date.onionperf.analysis.json.xz` contains extracted metrics in OnionPerf's analysis JSON format. 168 | - `index.xml` contains a directory index with file names, sizes, last-modified times, and SHA-256 digests. 169 | - `tgen-client/` is the working directory of the client-side `tgen` process. 170 | - `log_archive/` is created at the first UTC midnight after starting and contains compressed log files from previous UTC days. 171 | - `onionperf.tgen.log` is the current log file. 172 | - `tgen.graphml.xml` is the traffic model file generated by OnionPerf and used by TGen. 173 | - `tgen-server/` is the working directory of the server-side `tgen` process with the same structure as `tgen-client/`. 174 | - `tor-client/` is the working directory of the client-side `tor` process. 175 | - `log_archive/` is created at the first UTC midnight after starting and contains compressed log files from previous UTC days. 176 | - `onionperf.tor.log` is the current log file containing log messages by the client-side `tor` process. 177 | - `onionperf.torctl.log` is the current log file containing controller events obtained by OnionPerf connecting to the control port of the client-side `tor` process. 178 | - `[...]` (several other files written by the client-side `tor` process to its data directory) 179 | - `tor-server/` is the working directory of the server-side `tor` process with the same structure as `tor-client/`. 180 | - `onionperf-private/` contains private keys of the onion services used for measurements and potentially other files that are not meant to be published together with measurement results. 181 | 182 | ### Changing Tor configurations 183 | 184 | OnionPerf generates Tor configurations for both client-side and server-side `tor` processes. There are a few ways to add Tor configuration lines: 185 | 186 | - If the `BASETORRC` environment variable is set, OnionPerf appends its own configuration options to the contents of that variable. Example: 187 | 188 | ```shell 189 | BASETORRC=$'Option1 Foo\nOption2 Bar\n' onionperf ... 190 | ``` 191 | 192 | - If the `--torclient-conf-file` and/or `--torserver-conf-file` command-line arguments are given, the contents of those files are appended to the configurations of client-side and/or server-side `tor` process. 193 | - If the `--additional-client-conf` command-line argument is given, its content is appended to the configuration of the client-side `tor` process. 194 | 195 | These options can be used, for example, to change the default measurement setup use bridges (or pluggable transports) by passing bridge addresses as additional client configuration lines as follows: 196 | 197 | ```shell 198 | onionperf measure --additional-client-conf="UseBridges 1\nBridge 72.14.177.231:9001 AC0AD4107545D4AF2A595BC586255DEA70AF119D\nBridge 195.91.239.8:9001 BA83F62551545655BBEBBFF353A45438D73FD45A\nBridge 148.63.111.136:35577 768C8F8313FF9FF8BBC915898343BC8B238F3770" 199 | ``` 200 | 201 | ### Changing the TGen traffic model 202 | 203 | OnionPerf is a relatively simple tool that can be adapted to do more complex measurements beyond what can be configured on the command line. 204 | 205 | For example, the hard-coded traffic model generated by OnionPerf and executed by the TGen processes is to send a small request from client to server and receive a relatively large response of 5 MiB of random data back. This model can be changed by editing `~/onionperf/onionperf/model.py`, rebuilding, and restarting measurements. For specifics, see the [TGen 206 | documentation](https://github.com/shadow/tgen/blob/master/doc/TGen-Overview.md) 207 | and [TGen traffic model examples](https://github.com/shadow/tgen/blob/master/tools/scripts/generate_tgen_config.py). 208 | 209 | ### Sharing measurement results 210 | 211 | Measurement results can be further analyzed and visualized on the measuring host. But in many cases it's more convenient to do analysis and visualization on another host, also to compare measurements from different hosts to each other. 212 | 213 | There are at least two common ways of sharing measurement results: 214 | 215 | 1. Creating a tarball of the `onionperf-data/` directory; and 216 | 2. Using a local web server to serve the contents of the `onionperf-data/` directory. 217 | 218 | The details of doing either of these two methods are not covered in this document. 219 | 220 | ### Troubleshooting 221 | 222 | If anything goes wrong while doing measurements, OnionPerf typically informs the user in its console output. This is also the first place to look for investigating any issues. 223 | 224 | The second place would be to check the log files in `~/onionperf-data/tgen-client/` or `~/onionperf-data/tor-client/`. 225 | 226 | The most common configuration problems are probably related to firewall and port forwarding for doing direct (non onion-service) measurements. The specifics for setting up the firewall are out of scope for this document. 227 | 228 | Another class of common issues of long-running measurements is that one of the `tgen` or `tor` processes dies for reasons or hints (hopefully) to be found in their respective log files. 229 | 230 | In order to avoid extended downtimes it is recommended to deploy monitoring tools that check whether measurement results produced by OnionPerf are fresh. The specifics are, again, out of scope for this document. 231 | 232 | ## Analysis 233 | 234 | The next steps after performing measurements are to analyze and optionally visualize measurement results. 235 | 236 | ### Analyzing measurement results 237 | 238 | While performing measurements, OnionPerf writes quite verbose log files to disk. The first step in the analysis is to parse these log files, extract key metrics, and write smaller and more structured measurement results to disk. This is done with OnionPerf's `analyze` mode. 239 | 240 | For example, the following command analyzes current log files of a running (or stopped) OnionPerf instance (as opposed to log-rotated, compressed files from previous days): 241 | 242 | ```shell 243 | onionperf analyze --tgen ~/onionperf-data/tgen-client/onionperf.tgen.log --torctl ~/onionperf-data/tor-client/onionperf.torctl.log 244 | ``` 245 | 246 | The output analysis file is written to `onionperf.analysis.json.xz` in the current working directory. The file format is described in more detail in `schema/onionperf-3.0.json`. 247 | 248 | The same analysis files are written automatically as part of ongoing measurements once per day at UTC midnight and can be found in `onionperf-data/htdocs/`. 249 | 250 | OnionPerf's `analyze` mode has several command-line parameters for customizing the analysis step: 251 | 252 | ```shell 253 | onionperf analyze --help 254 | ``` 255 | 256 | ### Filtering measurement results 257 | 258 | The `filter` subcommand can be used to filter out measurement results based on given criteria. This subcommand is typically used in combination with the `visualize` subcommand. The workflow is to apply one or more filters and then visualize only those measurements with an existing mapping between TGen transfers/streams and Tor streams/circuits. 259 | 260 | Currently, OnionPerf measurement results can be filtered based on Tor relay fingerprints found in Tor circuits, although support for filtering based on Tor streams and/or TGen transfers/streams may be added in the future. 261 | 262 | The `filter` mode takes a list of fingerprints and one or more existing analysis files as inputs and outputs new analysis files with the same contents as the input analysis files plus annotations on those Tor circuits that have been filtered out. If a directory of analysis files is given to '-i', the structure and filenames of that directory are preserved under the path specified with '-o'. 263 | 264 | For example, the analysis file produced above can be filtered with the following command, which retains only those Tor circuits with fingerprints contained in the file 'fingerprints.txt': 265 | 266 | ```shell 267 | onionperf filter -i onionperf.analysis.json.xz -o filtered.onionperf.analysis.json.xz --include-fingerprints fingerprints.txt 268 | ``` 269 | 270 | OnionPerf's `filter` command usage can be inspected with: 271 | 272 | ```shell 273 | onionperf filter --help 274 | ``` 275 | 276 | ### Visualizing measurement results 277 | 278 | Step two in the analysis is to process analysis files with OnionPerf's `visualize` mode which produces CSV and PDF files as output. 279 | 280 | For example, the analysis file produced above can be visualized with the following command, using "Test Measurements" as label for the data set: 281 | 282 | ```shell 283 | onionperf visualize --data onionperf.analysis.json.xz "Test Measurements" 284 | ``` 285 | 286 | As a result, two files are written to the current working directory: 287 | 288 | - `onionperf.viz.$datetime.csv` contains visualized data in a CSV file format; and 289 | - `onionperf.viz.$datetime.pdf` contains visualizations in a PDF file format. 290 | 291 | For analysis files containing tor circuit filters, only measurements with an existing mapping between TGen transfers/streams Tor streams/circuits which have not been marked as 'filtered\_out' are visualized. 292 | 293 | Similar to the other modes, OnionPerf's `visualize` mode has command-line parameters for customizing the visualization step: 294 | 295 | ```shell 296 | onionperf visualize --help 297 | ``` 298 | 299 | ### Interpreting the PDF output format 300 | 301 | The PDF output file contains visualizations of the following metrics: 302 | 303 | - Time to download first (last) byte, which is defined as elapsed time between starting a measurement and receiving the first (last) byte of the HTTP response. 304 | - Throughput, which is computed from the elapsed time between receiving 0.5 and 1 MiB of the response. 305 | - Number of downloads. 306 | - Number and type of failures. 307 | 308 | ### Interpreting the CSV output format 309 | 310 | The CSV output file contains the same data that is visualized in the PDF file. It contains the following columns: 311 | 312 | - `id` is the identifier used in the TGen client logs which may be useful to look up more details about a specific measurement. 313 | - `error_code` is an optional error code if a measurement did not succeed. 314 | - `filesize_bytes` is the requested file size in bytes. 315 | - `label` is the data set label as given in the `--data/-d` parameter to the `visualize` mode. 316 | - `server` is set to either `onion` for onion service measurements or `public` for direct measurements. 317 | - `start` is the measurement start time. 318 | - `time_to_first_byte` is the time in seconds (with microsecond precision) to download the first byte. 319 | - `time_to_last_byte` is the time in seconds (with microsecond precision) to download the last byte. 320 | 321 | ### Visualizations on Tor Metrics 322 | 323 | The analysis and visualization steps above can all be done by using the OnionPerf tool. In addition to that it's possible to visualize OnionPerf analysis files using other tools. 324 | 325 | For example, the [Tor Metrics website](https://metrics.torproject.org/torperf.html) contains various graphs based OnionPerf data. 326 | 327 | ## Contributing 328 | 329 | The OnionPerf code is developed at https://gitlab.torproject.org/tpo/metrics/onionperf. 330 | 331 | Contributions to OnionPerf are welcome and encouraged! 332 | 333 | -------------------------------------------------------------------------------- /onionperf/__init__.py: -------------------------------------------------------------------------------- 1 | ''' 2 | OnionPerf 3 | Authored by Rob Jansen, 2015 4 | Copyright 2015-2020 The Tor Project 5 | See LICENSE for licensing information 6 | ''' 7 | 8 | __all__ = [ 9 | 'analysis', 10 | 'measurement', 11 | 'model', 12 | 'monitor', 13 | 'util', 14 | 'visualization', 15 | ] 16 | -------------------------------------------------------------------------------- /onionperf/analysis.py: -------------------------------------------------------------------------------- 1 | ''' 2 | OnionPerf 3 | Authored by Rob Jansen, 2015 4 | Copyright 2015-2020 The Tor Project 5 | See LICENSE for licensing information 6 | ''' 7 | 8 | import os, re, json, datetime, logging 9 | 10 | from abc import ABCMeta, abstractmethod 11 | 12 | # stem imports 13 | from stem import CircEvent, CircStatus, CircPurpose, StreamStatus 14 | from stem.response.events import CircuitEvent, CircMinorEvent, StreamEvent, BuildTimeoutSetEvent 15 | from stem.response import ControlMessage, convert 16 | 17 | # tgentools imports 18 | from tgentools.analysis import Analysis, TGenParser 19 | 20 | # onionperf imports 21 | from . import util 22 | 23 | class OPAnalysis(Analysis): 24 | 25 | def __init__(self, nickname=None, ip_address=None): 26 | super().__init__(nickname, ip_address) 27 | self.json_db = {'type': 'onionperf', 'version': '4.0', 'data': {}} 28 | self.torctl_filepaths = [] 29 | 30 | def add_torctl_file(self, filepath): 31 | self.torctl_filepaths.append(filepath) 32 | 33 | def analyze(self, date_filter=None): 34 | if self.did_analysis: 35 | return 36 | 37 | self.date_filter = date_filter 38 | super().analyze(do_complete=True, date_filter=self.date_filter) 39 | torctl_parser = TorCtlParser(date_filter=self.date_filter) 40 | 41 | for (filepaths, parser, json_db_key) in [(self.torctl_filepaths, torctl_parser, 'tor')]: 42 | if len(filepaths) > 0: 43 | for filepath in filepaths: 44 | logging.info("parsing log file at {0}".format(filepath)) 45 | parser.parse(util.DataSource(filepath)) 46 | 47 | if self.nickname is None: 48 | parsed_name = parser.get_name() 49 | if parsed_name is not None: 50 | self.nickname = parsed_name 51 | elif self.hostname is not None: 52 | self.nickname = self.hostname 53 | else: 54 | self.nickname = "unknown" 55 | 56 | if self.measurement_ip is None: 57 | self.measurement_ip = "unknown" 58 | 59 | self.json_db['data'].setdefault(self.nickname, {'measurement_ip': self.measurement_ip}).setdefault(json_db_key, parser.get_data()) 60 | self.json_db['data'][self.nickname]["tgen"].pop("heartbeats") 61 | self.json_db['data'][self.nickname]["tgen"].pop("init_ts") 62 | self.json_db['data'][self.nickname]["tgen"].pop("stream_summary") 63 | self.did_analysis = True 64 | 65 | def save(self, filename=None, output_prefix=os.getcwd(), do_compress=True, date_prefix=None, sort_keys=True): 66 | if filename is None: 67 | base_filename = "onionperf.analysis.json.xz" 68 | if date_prefix is not None: 69 | filename = "{0}.{1}".format(util.date_to_string(date_prefix), base_filename) 70 | elif self.date_filter is not None: 71 | filename = "{0}.{1}".format(util.date_to_string(self.date_filter), base_filename) 72 | else: 73 | filename = base_filename 74 | 75 | filepath = os.path.abspath(os.path.expanduser("{0}/{1}".format(output_prefix, filename))) 76 | if not os.path.exists(output_prefix): 77 | os.makedirs(output_prefix) 78 | 79 | logging.info("saving analysis results to {0}".format(filepath)) 80 | 81 | outf = util.FileWritable(filepath, do_compress=do_compress) 82 | json.dump(self.json_db, outf, sort_keys=sort_keys, separators=(',', ': '), indent=2) 83 | outf.close() 84 | 85 | logging.info("done!") 86 | 87 | 88 | def get_tgen_streams(self, node): 89 | try: 90 | return self.json_db['data'][node]['tgen']['streams'] 91 | except: 92 | return None 93 | 94 | def get_tgen_transfers(self, node): 95 | try: 96 | return self.json_db['data'][node]['tgen']['transfers'] 97 | except: 98 | return None 99 | 100 | def get_tor_circuits(self, node): 101 | try: 102 | return self.json_db['data'][node]['tor']['circuits'] 103 | except: 104 | return None 105 | 106 | def set_tor_circuits(self, node, tor_circuits): 107 | self.json_db['data'][node]['tor']['circuits'] = tor_circuits 108 | 109 | def get_tor_streams(self, node): 110 | try: 111 | return self.json_db['data'][node]['tor']['streams'] 112 | except: 113 | return None 114 | 115 | @classmethod 116 | def load(cls, filename="onionperf.analysis.json.xz", input_prefix=os.getcwd()): 117 | filepath = os.path.abspath(os.path.expanduser("{0}".format(filename))) 118 | if not os.path.exists(filepath): 119 | filepath = os.path.abspath(os.path.expanduser("{0}/{1}".format(input_prefix, filename))) 120 | if not os.path.exists(filepath): 121 | logging.warning("file does not exist at '{0}'".format(filepath)) 122 | return None 123 | 124 | logging.info("loading analysis results from {0}".format(filepath)) 125 | 126 | inf = util.DataSource(filepath) 127 | inf.open() 128 | db = json.load(inf.get_file_handle()) 129 | inf.close() 130 | 131 | logging.info("done!") 132 | 133 | if 'type' not in db or 'version' not in db: 134 | logging.warning("'type' or 'version' not present in database") 135 | return None 136 | elif db['type'] != 'onionperf' or str(db['version']) >= '5.': 137 | logging.warning("type or version not supported (type={0}, version={1})".format(db['type'], db['version'])) 138 | return None 139 | else: 140 | analysis_instance = cls() 141 | analysis_instance.json_db = db 142 | return analysis_instance 143 | 144 | class Parser(object, metaclass=ABCMeta): 145 | @abstractmethod 146 | def parse(self, source): 147 | pass 148 | @abstractmethod 149 | def get_data(self): 150 | pass 151 | @abstractmethod 152 | def get_name(self): 153 | pass 154 | 155 | 156 | class TorStream(object): 157 | def __init__(self, sid): 158 | self.stream_id = sid 159 | self.circuit_id = None 160 | self.unix_ts_start = None 161 | self.unix_ts_end = None 162 | self.failure_reason_local = None 163 | self.failure_reason_remote = None 164 | self.source = None 165 | self.target = None 166 | self.elapsed_seconds = [] 167 | self.last_purpose = None 168 | 169 | def add_event(self, purpose, status, arrived_at): 170 | if purpose is not None: 171 | self.last_purpose = purpose 172 | key = "{0}:{1}".format(self.last_purpose, status) 173 | self.elapsed_seconds.append([key, arrived_at]) 174 | 175 | def set_circ_id(self, circ_id): 176 | if circ_id is not None: 177 | self.circuit_id = circ_id 178 | 179 | def set_start_time(self, unix_ts): 180 | if self.unix_ts_start is None: 181 | self.unix_ts_start = unix_ts 182 | 183 | def set_end_time(self, unix_ts): 184 | self.unix_ts_end = unix_ts 185 | 186 | def set_local_failure(self, reason): 187 | self.failure_reason_local = reason 188 | 189 | def set_remote_failure(self, reason): 190 | self.failure_reason_remote = reason 191 | 192 | def set_target(self, target): 193 | self.target = target 194 | 195 | def set_source(self, source): 196 | self.source = source 197 | 198 | def get_data(self): 199 | if self.unix_ts_start is None or self.unix_ts_end is None: 200 | return None 201 | d = self.__dict__ 202 | for item in d['elapsed_seconds']: 203 | item[1] = item[1] - self.unix_ts_start 204 | del(d['last_purpose']) 205 | if d['failure_reason_local'] is None: del(d['failure_reason_local']) 206 | if d['failure_reason_remote'] is None: del(d['failure_reason_remote']) 207 | if d['source'] is None: del(d['source']) 208 | if d['target'] is None: del(d['target']) 209 | return d 210 | 211 | def __str__(self): 212 | return('stream id=%d circ_id=%s %s' % (self.id, self.circ_id, 213 | ' '.join(['%s=%s' % (event, arrived_at) 214 | for (event, arrived_at) in sorted(self.elapsed_seconds, key=lambda item: item[1])]))) 215 | 216 | class TorCircuit(object): 217 | def __init__(self, cid): 218 | self.circuit_id = cid 219 | self.unix_ts_start = None 220 | self.unix_ts_end = None 221 | self.failure_reason_local = None 222 | self.failure_reason_remote = None 223 | self.buildtime_seconds = None 224 | self.build_timeout = None 225 | self.build_quantile = None 226 | self.elapsed_seconds = [] 227 | self.path = [] 228 | 229 | def add_event(self, event, arrived_at): 230 | self.elapsed_seconds.append([str(event), arrived_at]) 231 | 232 | def add_hop(self, hop, arrived_at): 233 | self.path.append(["${0}~{1}".format(hop[0], hop[1]), arrived_at]) 234 | 235 | def set_launched(self, unix_ts, build_timeout, build_quantile): 236 | if self.unix_ts_start is None: 237 | self.unix_ts_start = unix_ts 238 | self.build_timeout = build_timeout 239 | self.build_quantile = build_quantile 240 | 241 | def set_end_time(self, unix_ts): 242 | self.unix_ts_end = unix_ts 243 | 244 | def set_local_failure(self, reason): 245 | self.failure_reason_local = reason 246 | 247 | def set_remote_failure(self, reason): 248 | self.failure_reason_remote = reason 249 | 250 | def set_build_time(self, unix_ts): 251 | if self.buildtime_seconds is None: 252 | self.buildtime_seconds = unix_ts 253 | 254 | def get_data(self): 255 | if self.unix_ts_start is None or self.unix_ts_end is None: 256 | return None 257 | d = self.__dict__ 258 | for item in d['elapsed_seconds']: 259 | item[1] = item[1] - self.unix_ts_start 260 | for item in d['path']: 261 | item[1] = item[1] - self.unix_ts_start 262 | if d['buildtime_seconds'] is None: 263 | del(d['buildtime_seconds']) 264 | else: 265 | d['buildtime_seconds'] = self.buildtime_seconds - self.unix_ts_start 266 | if len(d['path']) == 0: del(d['path']) 267 | if d['failure_reason_local'] is None: del(d['failure_reason_local']) 268 | if d['failure_reason_remote'] is None: del(d['failure_reason_remote']) 269 | if d['build_timeout'] is None: del(d['build_timeout']) 270 | if d['build_quantile'] is None: del(d['build_quantile']) 271 | return d 272 | 273 | def __str__(self): 274 | return('circuit id=%d %s' % (self.id, ' '.join(['%s=%s' % 275 | (event, arrived_at) for (event, arrived_at) in 276 | sorted(self.elapsed_seconds, key=lambda item: item[1])]))) 277 | 278 | class TorCtlParser(Parser): 279 | 280 | def __init__(self, date_filter=None): 281 | ''' date_filter should be given in UTC ''' 282 | self.circuits_state = {} 283 | self.circuits = {} 284 | self.streams_state = {} 285 | self.streams = {} 286 | self.name = None 287 | self.boot_succeeded = False 288 | self.build_timeout_last = None 289 | self.build_quantile_last = None 290 | self.date_filter = date_filter 291 | 292 | def __handle_circuit(self, event, arrival_dt): 293 | # first make sure we have a circuit object 294 | cid = int(event.id) 295 | circ = self.circuits_state.setdefault(cid, TorCircuit(cid)) 296 | is_hs_circ = True if event.purpose in (CircPurpose.HS_CLIENT_INTRO, CircPurpose.HS_CLIENT_REND, \ 297 | CircPurpose.HS_SERVICE_INTRO, CircPurpose.HS_SERVICE_REND) else False 298 | 299 | # now figure out what status we want to track 300 | key = None 301 | if isinstance(event, CircuitEvent): 302 | if event.status == CircStatus.LAUNCHED: 303 | circ.set_launched(arrival_dt, self.build_timeout_last, self.build_quantile_last) 304 | 305 | key = "{0}:{1}".format(event.purpose, event.status) 306 | circ.add_event(key, arrival_dt) 307 | 308 | if event.status == CircStatus.EXTENDED: 309 | circ.add_hop(event.path[-1], arrival_dt) 310 | elif event.status == CircStatus.FAILED: 311 | circ.set_local_failure(event.reason) 312 | if event.remote_reason is not None and event.remote_reason != '': 313 | circ.set_remote_failure(event.remote_reason) 314 | elif event.status == CircStatus.BUILT: 315 | circ.set_build_time(arrival_dt) 316 | if is_hs_circ: 317 | key = event.hs_state 318 | if event.rend_query is not None and event.rend_query != '': 319 | key = "{0}:{1}".format(key, event.rend_query) 320 | circ.add_event(key, arrival_dt) 321 | 322 | if event.status == CircStatus.CLOSED or event.status == CircStatus.FAILED: 323 | circ.set_end_time(arrival_dt) 324 | started, built, ended = circ.unix_ts_start, circ.buildtime_seconds, circ.unix_ts_end 325 | 326 | data = circ.get_data() 327 | if data is not None: 328 | self.circuits[cid] = data 329 | self.circuits_state.pop(cid) 330 | 331 | elif isinstance(event, CircMinorEvent): 332 | if event.purpose != event.old_purpose or event.event != CircEvent.PURPOSE_CHANGED: 333 | key = "{0}:{1}".format(event.event, event.purpose) 334 | circ.add_event(key, arrival_dt) 335 | 336 | if is_hs_circ: 337 | key = event.hs_state 338 | if event.rend_query is not None and event.rend_query != '': 339 | key = "{0}:{1}".format(key, event.rend_query) 340 | circ.add_event(key, arrival_dt) 341 | 342 | def __handle_stream(self, event, arrival_dt): 343 | sid = int(event.id) 344 | strm = self.streams_state.setdefault(sid, TorStream(sid)) 345 | 346 | if event.circ_id is not None: 347 | strm.set_circ_id(event.circ_id) 348 | 349 | strm.add_event(event.purpose, event.status, arrival_dt) 350 | strm.set_target(event.target) 351 | 352 | if event.status == StreamStatus.NEW or event.status == StreamStatus.NEWRESOLVE: 353 | strm.set_start_time(arrival_dt) 354 | strm.set_source(event.source_addr) 355 | elif event.status == StreamStatus.FAILED: 356 | strm.set_local_failure(event.reason) 357 | if event.remote_reason is not None and event.remote_reason != '': 358 | strm.set_remote_failure(event.remote_reason) 359 | 360 | if event.status == StreamStatus.CLOSED or event.status == StreamStatus.FAILED: 361 | strm.set_end_time(arrival_dt) 362 | stream_type = strm.last_purpose 363 | started, ended = strm.unix_ts_start, strm.unix_ts_end 364 | 365 | data = strm.get_data() 366 | if data is not None: 367 | self.streams[sid] = data 368 | self.streams_state.pop(sid) 369 | 370 | def __handle_buildtimeout(self, event, arrival_dt): 371 | self.build_timeout_last = event.timeout 372 | self.build_quantile_last = event.quantile 373 | 374 | def __handle_event(self, event, arrival_dt): 375 | if isinstance(event, (CircuitEvent, CircMinorEvent)): 376 | self.__handle_circuit(event, arrival_dt) 377 | elif isinstance(event, StreamEvent): 378 | self.__handle_stream(event, arrival_dt) 379 | elif isinstance(event, BuildTimeoutSetEvent): 380 | self.__handle_buildtimeout(event, arrival_dt) 381 | 382 | def __is_date_valid(self, date_to_check): 383 | if self.date_filter is None: 384 | # we are not asked to filter, so every date is valid 385 | return True 386 | else: 387 | # we are asked to filter, so the line is only valid if the date matches the filter 388 | # both the filter and the unix timestamp should be in UTC at this point 389 | return util.do_dates_match(self.date_filter, date_to_check) 390 | 391 | def __parse_line(self, line): 392 | if not self.boot_succeeded: 393 | if re.search("Starting\storctl\sprogram\son\shost", line) is not None: 394 | parts = line.strip().split() 395 | if len(parts) < 11: 396 | return True 397 | self.name = parts[10] 398 | if re.search("Bootstrapped\s100", line) is not None: 399 | self.boot_succeeded = True 400 | elif re.search("BOOTSTRAP", line) is not None and re.search("PROGRESS=100", line) is not None: 401 | self.boot_succeeded = True 402 | 403 | # parse with stem 404 | timestamps, sep, raw_event_str = line.partition(" 650 ") 405 | if sep == '': 406 | return True 407 | 408 | # event.arrived_at is also available but at worse granularity 409 | unix_ts = float(timestamps.strip().split()[2]) 410 | 411 | # check if we should ignore the line 412 | line_date = datetime.datetime.utcfromtimestamp(unix_ts).date() 413 | if not self.__is_date_valid(line_date): 414 | return True 415 | 416 | event = ControlMessage.from_str("{0} {1}".format(sep.strip(), raw_event_str)) 417 | convert('EVENT', event) 418 | self.__handle_event(event, unix_ts) 419 | 420 | return True 421 | 422 | def parse(self, source): 423 | source.open(newline='\r\n') 424 | for line in source: 425 | # ignore line parsing errors 426 | try: 427 | if self.__parse_line(line): 428 | continue 429 | else: 430 | break 431 | except: 432 | continue 433 | source.close() 434 | 435 | def get_data(self): 436 | return {'circuits': self.circuits, 'streams': self.streams} 437 | 438 | def get_name(self): 439 | return self.name 440 | -------------------------------------------------------------------------------- /onionperf/filtering.py: -------------------------------------------------------------------------------- 1 | ''' 2 | OnionPerf 3 | Authored by Rob Jansen, 2015 4 | Copyright 2015-2020 The Tor Project 5 | See LICENSE for licensing information 6 | ''' 7 | 8 | import re 9 | from onionperf.analysis import OPAnalysis 10 | 11 | class Filtering(object): 12 | 13 | def __init__(self): 14 | self.fingerprints_to_include = None 15 | self.fingerprints_to_exclude = None 16 | self.fingerprint_pattern = re.compile("\$?([0-9a-fA-F]{40})") 17 | 18 | def include_fingerprints(self, path): 19 | self.fingerprints_to_include = [] 20 | self.fingerprints_to_include_path = path 21 | with open(path, 'rt') as f: 22 | for line in f: 23 | fingerprint_match = self.fingerprint_pattern.match(line) 24 | if fingerprint_match: 25 | fingerprint = fingerprint_match.group(1).upper() 26 | self.fingerprints_to_include.append(fingerprint) 27 | 28 | def exclude_fingerprints(self, path): 29 | self.fingerprints_to_exclude = [] 30 | self.fingerprints_to_exclude_path = path 31 | with open(path, 'rt') as f: 32 | for line in f: 33 | fingerprint_match = self.fingerprint_pattern.match(line) 34 | if fingerprint_match: 35 | fingerprint = fingerprint_match.group(1).upper() 36 | self.fingerprints_to_exclude.append(fingerprint) 37 | 38 | def filter_tor_circuits(self, analysis): 39 | if self.fingerprints_to_include is None and self.fingerprints_to_exclude is None: 40 | return 41 | filters = analysis.json_db.setdefault("filters", {}) 42 | tor_circuits_filters = filters.setdefault("tor/circuits", []) 43 | if self.fingerprints_to_include: 44 | tor_circuits_filters.append({"name": "include_fingerprints", "filepath": self.fingerprints_to_include_path }) 45 | if self.fingerprints_to_exclude: 46 | tor_circuits_filters.append({"name": "exclude_fingerprints", "filepath": self.fingerprints_to_exclude_path }) 47 | for source in analysis.get_nodes(): 48 | tor_circuits = analysis.get_tor_circuits(source) 49 | filtered_circuit_ids = [] 50 | for circuit_id, tor_circuit in tor_circuits.items(): 51 | keep = False 52 | if "path" in tor_circuit: 53 | path = tor_circuit["path"] 54 | keep = True 55 | for long_name, _ in path: 56 | fingerprint_match = self.fingerprint_pattern.match(long_name) 57 | if fingerprint_match: 58 | fingerprint = fingerprint_match.group(1).upper() 59 | if self.fingerprints_to_include is not None and fingerprint not in self.fingerprints_to_include: 60 | keep = False 61 | break 62 | if self.fingerprints_to_exclude is not None and fingerprint in self.fingerprints_to_exclude: 63 | keep = False 64 | break 65 | if not keep: 66 | tor_circuits[circuit_id]["filtered_out"] = True 67 | tor_circuits[circuit_id] = dict(sorted(tor_circuit.items())) 68 | 69 | def apply_filters(self, input_path, output_dir, output_file): 70 | analysis = OPAnalysis.load(filename=input_path) 71 | self.filter_tor_circuits(analysis) 72 | analysis.json_db["version"] = '4.0' 73 | analysis.json_db = dict(sorted(analysis.json_db.items())) 74 | analysis.save(filename=output_file, output_prefix=output_dir, sort_keys=False) 75 | 76 | -------------------------------------------------------------------------------- /onionperf/measurement.py: -------------------------------------------------------------------------------- 1 | ''' 2 | OnionPerf 3 | Authored by Rob Jansen, 2015 4 | Copyright 2015-2020 The Tor Project 5 | See LICENSE for licensing information 6 | ''' 7 | 8 | import binascii, hashlib 9 | import os, traceback, subprocess, threading, queue, logging, time, datetime, re, shlex 10 | from lxml import etree 11 | 12 | # stem imports 13 | from stem.util import str_tools 14 | from stem.control import Controller 15 | from stem.version import Version, Requirement, get_system_tor_version 16 | from stem import __version__ as stem_version 17 | 18 | class TGenConf(object): 19 | """Represents a TGen configuration, for both client and server.""" 20 | def __init__(self, listen_port=None, connect_ip=None, connect_port=None, tor_ctl_port=None, tor_socks_port=None): 21 | self.listen_port = str(listen_port) 22 | self.tor_ctl_port = tor_ctl_port 23 | self.tor_socks_port = tor_socks_port 24 | # TGen clients use connect_ip and connect_port. 25 | self.connect_ip = connect_ip 26 | self.connect_port = connect_port 27 | 28 | # onionperf imports 29 | from . import analysis, monitor, model, util 30 | 31 | def generate_docroot_index(docroot_path): 32 | root = etree.Element("files") 33 | with os.scandir(docroot_path) as files: 34 | for entry in files: 35 | if not entry.name == 'index.xml' and entry.is_file(): 36 | e = etree.SubElement(root, "file") 37 | e.set("name", entry.name) 38 | stat_result = entry.stat() 39 | e.set("size", str(stat_result.st_size)) 40 | mtime = datetime.datetime.fromtimestamp(stat_result.st_mtime) 41 | e.set("last_modified", mtime.replace(microsecond=0).isoformat(sep=' ')) 42 | with open(entry, 'rb') as f: 43 | fbytes = f.read() 44 | e.set("sha256", binascii.b2a_base64(hashlib.sha256(fbytes).digest(), newline=False)) 45 | with open("{0}/index.xml".format(docroot_path), 'wb') as f: 46 | et = etree.ElementTree(root) 47 | et.write(f, pretty_print=True, xml_declaration=True) 48 | 49 | def readline_thread_task(instream, q): 50 | # wait for lines from stdout until the EOF 51 | for line in iter(instream.readline, b''): q.put(line) 52 | 53 | def watchdog_thread_task(cmd, cwd, writable, done_ev, send_stdin, ready_search_str, ready_ev, no_relaunch): 54 | 55 | # launch or re-launch (or don't re-launch, if no_relaunch is set) our sub 56 | # process until we are told to stop if we fail too many times in too short 57 | # of time, give up and exit 58 | failure_times = [] 59 | pause_time_seconds = 0 60 | while done_ev.is_set() is False: 61 | if pause_time_seconds > 0: 62 | time.sleep(pause_time_seconds) 63 | 64 | stdin_handle = subprocess.PIPE if send_stdin is not None else None 65 | subp = subprocess.Popen(shlex.split(cmd), cwd=cwd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, stdin=stdin_handle) 66 | 67 | # send some data to stdin if requested 68 | if send_stdin is not None: 69 | subp.stdin.write(send_stdin) 70 | subp.stdin.close() 71 | 72 | # wait for a string to appear in stdout if requested 73 | if ready_search_str is not None: 74 | boot_re = re.compile(ready_search_str) 75 | for bytes in iter(subp.stdout.readline, b''): 76 | line = bytes.decode('utf-8') 77 | writable.write(line) 78 | if boot_re.search(line): 79 | break # got it! 80 | 81 | # now the process is running *and* 'ready' 82 | if ready_ev is not None: 83 | ready_ev.set() 84 | 85 | # a helper will block on stdout and return lines back to us in a queue 86 | stdout_q = queue.Queue() 87 | t = threading.Thread(target=readline_thread_task, args=(subp.stdout, stdout_q)) 88 | t.start() 89 | 90 | # collect output from the helper and write it, continuously checking to make 91 | # sure that the subprocess is still alive and the master doesn't want us to quit 92 | while subp.poll() is None and done_ev.is_set() is False: 93 | try: 94 | bytes = stdout_q.get(True, 1) 95 | writable.write(bytes.decode('utf-8')) 96 | except queue.Empty: 97 | # the queue is empty and the get() timed out, recheck loop conditions 98 | continue 99 | 100 | # either the process died, or we should shut down gracefully 101 | 102 | # if the process is still running, stop it 103 | if subp.poll() is None: 104 | # we collected no exit code, so it is still running 105 | subp.terminate() 106 | subp.wait() 107 | elif done_ev.is_set(): 108 | logging.info("command '{}' finished as expected".format(cmd)) 109 | elif no_relaunch: 110 | logging.info("command '{}' finished on its own".format(cmd)) 111 | # our command finished on its own. time to terminate. 112 | done_ev.set() 113 | else: 114 | logging.warning("command '{}' finished before expected".format(cmd)) 115 | now = time.time() 116 | # remove failures that happened more than an hour ago 117 | while len(failure_times) > 0 and failure_times[0] < (now-3600.0): 118 | failure_times.pop(0) 119 | # add a new failure that just occurred 120 | failure_times.append(now) 121 | pause_time_seconds = 30 122 | 123 | # the subp should be stopped now, flush any remaining lines 124 | #subp.stdout.close() # results in concurrent write error 125 | 126 | # the helper should stop since stdout was closed 127 | t.join() 128 | 129 | # helper thread is done, make sure we drain the remaining lines from the stdout queue 130 | while not stdout_q.empty(): 131 | bytes = stdout_q.get_nowait() 132 | writable.write(bytes.decode('utf-8')) 133 | # if we have too many failures, exit the watchdog to propogate the error up 134 | if len(failure_times) > 10: 135 | break 136 | # now loop around: either the master asked us to stop, or the subp died and we relaunch it 137 | 138 | # too many failures, or master asked us to stop, close the writable before exiting thread 139 | writable.close() 140 | 141 | def logrotate_thread_task(writables, tgen_writable, torctl_writable, docroot, nickname, done_ev): 142 | next_midnight = None 143 | 144 | while not done_ev.wait(1): 145 | # get time 146 | utcnow = datetime.datetime.utcnow() 147 | 148 | # setup the next expiration time (midnight tonight) 149 | if next_midnight is None: 150 | next_midnight = datetime.datetime(utcnow.year, utcnow.month, utcnow.day, 23, 59, 59) 151 | # make sure we are not already past the above time today 152 | if (next_midnight - utcnow).total_seconds() < 0: 153 | next_midnight -= datetime.timedelta(1) # subtract 1 day 154 | 155 | # if we are past midnight, launch the rotate task 156 | if (next_midnight - utcnow).total_seconds() < 0: 157 | # handle the general writables we are watching 158 | for w in writables: 159 | w.rotate_file(filename_datetime=next_midnight) 160 | 161 | # handle tgen and tor writables specially, and do analysis 162 | if tgen_writable is not None or torctl_writable is not None: 163 | try: 164 | 165 | # get our public ip address, do this every night in case it changes 166 | public_measurement_ip_guess = util.get_ip_address() 167 | 168 | # set up the analysis object with our log files 169 | anal = analysis.OPAnalysis(nickname=nickname, ip_address=public_measurement_ip_guess) 170 | if tgen_writable is not None: 171 | anal.add_tgen_file(tgen_writable.rotate_file(filename_datetime=next_midnight)) 172 | if torctl_writable is not None: 173 | anal.add_torctl_file(torctl_writable.rotate_file(filename_datetime=next_midnight)) 174 | 175 | # run the analysis, i.e. parse the files 176 | anal.analyze() 177 | 178 | # save the results in onionperf json format in the www docroot 179 | anal.save(output_prefix=docroot, do_compress=True, date_prefix=next_midnight.date()) 180 | 181 | # update the xml index in docroot 182 | generate_docroot_index(docroot) 183 | except Exception as e: 184 | logging.warning("Caught and ignored exception in TorPerf log parser: {0}".format(repr(e))) 185 | logging.warning("Formatted traceback: {0}".format(traceback.format_exc())) 186 | # reset our timer 187 | next_midnight = None 188 | 189 | class Measurement(object): 190 | 191 | def __init__(self, tor_bin_path, tgen_bin_path, datadir_path, privatedir_path, nickname, additional_client_conf=None, torclient_conf_file=None, torserver_conf_file=None, single_onion=False, drop_guards_interval_hours=0): 192 | self.tor_bin_path = tor_bin_path 193 | self.tgen_bin_path = tgen_bin_path 194 | self.datadir_path = datadir_path 195 | self.privatedir_path = privatedir_path 196 | self.nickname = nickname 197 | self.threads = None 198 | self.done_event = None 199 | self.hs_v3_service_id = None 200 | self.www_docroot = "{0}/htdocs".format(self.datadir_path) 201 | self.base_config = os.environ['BASETORRC'] if "BASETORRC" in os.environ else "" 202 | self.additional_client_conf = additional_client_conf 203 | self.torclient_conf_file = torclient_conf_file 204 | self.torserver_conf_file = torserver_conf_file 205 | self.single_onion = single_onion 206 | self.drop_guards_interval_hours = drop_guards_interval_hours 207 | 208 | def run(self, do_onion=True, do_inet=True, tgen_model=None, tgen_client_conf=None, tgen_server_conf=None): 209 | ''' 210 | only `tgen_server_conf.listen_port` are "public" and need to be opened on the firewall. 211 | if `tgen_client_conf.connect_port` != `tgen_server_conf.listen_port`, then you should have installed a forwarding rule in the firewall. 212 | all ports need to be unique though, and unique among multiple onionperf instances. 213 | 214 | here are some sane defaults: 215 | tgen_client_conf.listen_port=58888, tgen_client_conf.connect_port=8080, tgen_client_conf.tor_ctl_port=59050, tgen_client_conf.tor_socks_port=59000, 216 | tgen_server_conf.listen_port=8080, tgen_server_conf.tor_ctl_port=59051, tgen_server_conf.tor_socks_port=59001 217 | ''' 218 | self.threads = [] 219 | self.done_event = threading.Event() 220 | 221 | if tgen_client_conf is None: 222 | tgen_client_conf = TGenConf(listen_port=58888, 223 | connect_ip='0.0.0.0', 224 | connect_port=8080, 225 | tor_ctl_port=59050, 226 | tor_socks_port=59000) 227 | if tgen_server_conf is None: 228 | tgen_server_conf = TGenConf(listen_port=8080, 229 | tor_ctl_port=59051, 230 | tor_socks_port=59001) 231 | 232 | # if ctrl-c is pressed, shutdown child processes properly 233 | try: 234 | # make sure stem and Tor supports ephemeral HS (version >= 0.2.7.1-alpha) 235 | # and also the NEWNYM mode that clears descriptor cache (version >= 0.2.7.3-rc) 236 | if do_onion: 237 | try: 238 | tor_version = get_system_tor_version(self.tor_bin_path) 239 | if tor_version < Requirement.ADD_ONION or tor_version < Version('0.2.7.3-rc'): # ADD_ONION is a stem 1.4.0 feature 240 | logging.warning("OnionPerf in onion mode requires Tor version >= 0.2.7.3-rc, you have {0}, aborting".format(tor_version)) 241 | return 242 | except: 243 | logging.warning("OnionPerf in onion mode requires stem version >= 1.4.0, you have {0}, aborting".format(stem_version)) 244 | return 245 | 246 | logging.info("Bootstrapping started...") 247 | logging.info("Log files for the client and server processes will be placed in {0}".format(self.datadir_path)) 248 | 249 | general_writables = [] 250 | tgen_client_writable, torctl_client_writable = None, None 251 | 252 | if do_onion or do_inet: 253 | tgen_model.port = tgen_server_conf.listen_port 254 | general_writables.append(self.__start_tgen_server(tgen_model)) 255 | 256 | if do_onion: 257 | logging.info("Onion Service private keys will be placed in {0}".format(self.privatedir_path)) 258 | # one must not have an open socks port when running a single 259 | # onion service. see tor's man page for more information. 260 | if self.single_onion: 261 | tgen_server_conf.tor_socks_port = 0 262 | tor_writable, torctl_writable = self.__start_tor_server(tgen_server_conf.tor_ctl_port, 263 | tgen_server_conf.tor_socks_port, 264 | {tgen_client_conf.connect_port:tgen_server_conf.listen_port}) 265 | general_writables.append(tor_writable) 266 | general_writables.append(torctl_writable) 267 | 268 | if do_onion or do_inet: 269 | tor_writable, torctl_client_writable = self.__start_tor_client(tgen_client_conf.tor_ctl_port, tgen_client_conf.tor_socks_port) 270 | general_writables.append(tor_writable) 271 | 272 | server_urls = [] 273 | if do_onion and self.hs_v3_service_id is not None: 274 | server_urls.append("{0}.onion:{1}".format(self.hs_v3_service_id, tgen_client_conf.connect_port)) 275 | if do_inet: 276 | connect_ip = tgen_client_conf.connect_ip if tgen_client_conf.connect_ip != '0.0.0.0' else util.get_ip_address() 277 | server_urls.append("{0}:{1}".format(connect_ip, tgen_client_conf.connect_port)) 278 | tgen_model.servers = server_urls 279 | 280 | if do_onion or do_inet: 281 | assert len(server_urls) > 0 282 | 283 | tgen_model.port = tgen_client_conf.listen_port 284 | tgen_model.socks_port = tgen_client_conf.tor_socks_port 285 | tgen_client_writable = self.__start_tgen_client(tgen_model) 286 | 287 | self.__start_log_processors(general_writables, tgen_client_writable, torctl_client_writable) 288 | 289 | logging.info("Bootstrapping finished, entering heartbeat loop") 290 | time.sleep(1) 291 | while True: 292 | if tgen_model.num_transfers: 293 | # This function blocks until our TGen client process 294 | # terminated on its own. 295 | self.__wait_for_tgen_client() 296 | break 297 | 298 | if self.__is_alive(): 299 | logging.info("All helper processes seem to be alive :)") 300 | else: 301 | logging.warning("Some parallel components failed too many times or have died :(") 302 | logging.info("We are in a broken state, giving up and exiting now") 303 | break 304 | 305 | logging.info("Next main process heartbeat is in 1 hour (helper processes run on their own schedule)") 306 | logging.info("press CTRL-C for graceful shutdown...") 307 | time.sleep(3600) 308 | else: 309 | logging.info("No measurement mode set, nothing to do") 310 | 311 | except KeyboardInterrupt: 312 | logging.info("Interrupt received, please wait for graceful shutdown") 313 | self.__is_alive() 314 | finally: 315 | logging.info("Cleaning up child processes now...") 316 | 317 | if self.hs_v3_service_id is not None: 318 | try: 319 | with Controller.from_port(port=self.hs_v3_control_port) as torctl: 320 | torctl.authenticate() 321 | torctl.remove_ephemeral_hidden_service(self.hs_v3_service_id) 322 | except: pass # this fails to authenticate if tor proc is dead 323 | 324 | # logging.disable(logging.INFO) 325 | self.done_event.set() 326 | for t in self.threads: 327 | logging.info("Joining {0} thread...".format(t.getName())) 328 | t.join() 329 | time.sleep(1) 330 | # logging.disable(logging.NOTSET) 331 | 332 | logging.info("Child processes terminated") 333 | logging.info("Child process cleanup complete!") 334 | logging.info("Exiting") 335 | 336 | def __start_log_processors(self, general_writables, tgen_writable, torctl_writable): 337 | # rotate the log files, and then parse out the measurement data 338 | logrotate_args = (general_writables, tgen_writable, torctl_writable, self.www_docroot, self.nickname, self.done_event) 339 | logrotate = threading.Thread(target=logrotate_thread_task, name="logrotate", args=logrotate_args) 340 | logrotate.start() 341 | self.threads.append(logrotate) 342 | 343 | def __start_tgen_client(self, tgen_model_conf): 344 | return self.__start_tgen("client", tgen_model_conf) 345 | 346 | def __start_tgen_server(self, tgen_model_conf): 347 | return self.__start_tgen("server", tgen_model_conf) 348 | 349 | def __start_tgen(self, name, tgen_model_conf): 350 | logging.info("Starting TGen {0} process on port {1}...".format(name, tgen_model_conf.port)) 351 | tgen_datadir = "{0}/tgen-{1}".format(self.datadir_path, name) 352 | if not os.path.exists(tgen_datadir): os.makedirs(tgen_datadir) 353 | 354 | tgen_confpath = "{0}/tgen.graphml.xml".format(tgen_datadir) 355 | if os.path.exists(tgen_confpath): os.remove(tgen_confpath) 356 | 357 | if tgen_model_conf.socks_port is None: 358 | model.ListenModel(tgen_port="{0}".format(tgen_model_conf.port)).dump_to_file(tgen_confpath) 359 | logging.info("TGen server running at 0.0.0.0:{0}".format(tgen_model_conf.port)) 360 | else: 361 | tgen_model = model.TorperfModel(tgen_model_conf) 362 | tgen_model.dump_to_file(tgen_confpath) 363 | 364 | tgen_logpath = "{0}/onionperf.tgen.log".format(tgen_datadir) 365 | tgen_writable = util.FileWritable(tgen_logpath) 366 | logging.info("Logging TGen {1} process output to {0}".format(tgen_logpath, name)) 367 | 368 | tgen_cmd = "{0} {1}".format(self.tgen_bin_path, tgen_confpath) 369 | # If we're running in "one-shot mode", TGen client will terminate on 370 | # its own and we don't need our watchdog to restart the process. 371 | no_relaunch = (name == "client" and tgen_model_conf.num_transfers) 372 | tgen_args = (tgen_cmd, tgen_datadir, tgen_writable, self.done_event, None, None, None, no_relaunch) 373 | tgen_watchdog = threading.Thread(target=watchdog_thread_task, name="tgen_{0}_watchdog".format(name), args=tgen_args) 374 | tgen_watchdog.start() 375 | self.threads.append(tgen_watchdog) 376 | 377 | return tgen_writable 378 | 379 | def create_tor_config(self, control_port, socks_port, tor_datadir, name): 380 | """ 381 | This function generates a tor configuration based on a default 382 | template. This template is appended to any tor configuration inherited 383 | via the BASETORRC environment variable. Configuration in any additional 384 | tor client/server config files are then appended depending on whether 385 | "name" points to client or server. Any additional client configuration 386 | specified as a string is also added if the client is being configured. 387 | 388 | Finally, if there is no specific mention of either using Entry Guards 389 | (by default enabled) or bridges (by default disabled) the configurator 390 | appends an option to override the use of Entry Guards, to avoid 391 | measuring the guard node multiple times. 392 | """ 393 | 394 | tor_config_template = self.base_config + "RunAsDaemon 0\nORPort 0\nDirPort 0\nControlPort {0}\nSocksPort {1}\nSocksListenAddress 127.0.0.1\nClientOnly 1\n\ 395 | WarnUnsafeSocks 0\nSafeLogging 0\nMaxCircuitDirtiness 60 seconds\nDataDirectory {2}\nDataDirectoryGroupReadable 1\nLog INFO stdout\n" 396 | tor_config = tor_config_template.format(control_port, socks_port, tor_datadir) 397 | if name == "server" and self.torserver_conf_file: 398 | with open(self.torserver_conf_file, 'r') as f: 399 | tor_config += f.read() 400 | if name == "client" and self.torclient_conf_file: 401 | with open(self.torclient_conf_file, 'r') as f: 402 | tor_config = tor_config + f.read() 403 | if name == "client" and self.additional_client_conf: 404 | tor_config += self.additional_client_conf 405 | if not 'UseEntryGuards' in tor_config and not 'UseBridges' in tor_config and self.drop_guards_interval_hours == 0: 406 | tor_config += "UseEntryGuards 0\n" 407 | if name == "server" and self.single_onion: 408 | tor_config += "HiddenServiceSingleHopMode 1\nHiddenServiceNonAnonymousMode 1\n" 409 | return tor_config 410 | 411 | def start_onion_service(self, 412 | control_port, 413 | hs_port_mapping, 414 | key_path): 415 | logging.info("Creating ephemeral hidden service...") 416 | 417 | with Controller.from_port(port=control_port) as torctl: 418 | torctl.authenticate() 419 | if not os.path.exists(key_path): 420 | response = torctl.create_ephemeral_hidden_service( 421 | hs_port_mapping, 422 | detached=True, 423 | await_publication=True, 424 | key_content='ED25519-V3') 425 | with open(key_path, 'w') as key_file: 426 | key_file.write('%s:%s' % (response.private_key_type, 427 | response.private_key)) 428 | else: 429 | with open(key_path) as key_file: 430 | key_type, key_content = key_file.read().split(':', 1) 431 | response = torctl.create_ephemeral_hidden_service( 432 | hs_port_mapping, 433 | detached=True, 434 | await_publication=True, 435 | key_content=key_content, 436 | key_type=key_type) 437 | self.hs_v3_service_id = response.service_id 438 | self.hs_v3_control_port = control_port 439 | 440 | logging.info("Ephemeral hidden service is available at {0}.onion".format(response.service_id)) 441 | return response.service_id 442 | 443 | def __start_tor_client(self, control_port, socks_port): 444 | return self.__start_tor("client", control_port, socks_port) 445 | 446 | def __start_tor_server(self, control_port, socks_port, hs_port_mapping): 447 | return self.__start_tor("server", control_port, socks_port, hs_port_mapping) 448 | 449 | def __start_tor(self, name, control_port, socks_port, hs_port_mapping=None): 450 | logging.info("Starting Tor {0} process with ControlPort={1}, SocksPort={2}...".format(name, control_port, socks_port)) 451 | tor_datadir = "{0}/tor-{1}".format(self.datadir_path, name) 452 | key_path_v3 = "{0}/os_key_v3".format(self.privatedir_path) 453 | 454 | if not os.path.exists(tor_datadir): os.makedirs(tor_datadir) 455 | tor_config = self.create_tor_config(control_port,socks_port,tor_datadir,name) 456 | tor_confpath = "{0}/torrc".format(tor_datadir) 457 | with open(tor_confpath, 'wt') as f: 458 | f.write(tor_config) 459 | 460 | tor_logpath = "{0}/onionperf.tor.log".format(tor_datadir) 461 | tor_writable = util.FileWritable(tor_logpath) 462 | logging.info("Logging Tor {0} process output to {1}".format(name, tor_logpath)) 463 | 464 | # from stem.process import launch_tor_with_config 465 | # tor_subp = launch_tor_with_config(tor_config, tor_cmd=self.tor_bin_path, completion_percent=100, init_msg_handler=None, timeout=None, take_ownership=False) 466 | tor_cmd = "{0} -f -".format(self.tor_bin_path) 467 | tor_stdin_bytes = str_tools._to_bytes(tor_config) 468 | tor_ready_str = "Bootstrapped 100" 469 | tor_ready_ev = threading.Event() 470 | tor_args = (tor_cmd, tor_datadir, tor_writable, self.done_event, tor_stdin_bytes, tor_ready_str, tor_ready_ev, False) 471 | tor_watchdog = threading.Thread(target=watchdog_thread_task, name="tor_{0}_watchdog".format(name), args=tor_args) 472 | tor_watchdog.start() 473 | self.threads.append(tor_watchdog) 474 | 475 | # wait until Tor finishes bootstrapping 476 | tor_ready_ev.wait() 477 | 478 | torctl_logpath = "{0}/onionperf.torctl.log".format(tor_datadir) 479 | torctl_writable = util.FileWritable(torctl_logpath) 480 | logging.info("Logging Tor {0} control port monitor output to {1}".format(name, torctl_logpath)) 481 | 482 | # give a few seconds to make sure Tor had time to start listening on the control port 483 | time.sleep(3) 484 | 485 | torctl_events = [e for e in monitor.get_supported_torctl_events() if e not in ['DEBUG', 'INFO', 'NOTICE', 'WARN', 'ERR']] 486 | newnym_interval_seconds = 300 487 | torctl_args = (control_port, torctl_writable, torctl_events, newnym_interval_seconds, self.drop_guards_interval_hours, self.done_event) 488 | torctl_helper = threading.Thread(target=monitor.tor_monitor_run, name="torctl_{0}_helper".format(name), args=torctl_args) 489 | torctl_helper.start() 490 | self.threads.append(torctl_helper) 491 | 492 | if hs_port_mapping is not None: 493 | self.start_onion_service(control_port, hs_port_mapping, key_path_v3) 494 | 495 | return tor_writable, torctl_writable 496 | 497 | def __wait_for_tgen_client(self): 498 | logging.info("Waiting for TGen client to finish.") 499 | for t in self.threads: 500 | if t.getName() == "tgen_client_watchdog": 501 | while t.is_alive(): 502 | time.sleep(1) 503 | logging.info("TGen client finished.") 504 | 505 | def __is_alive(self): 506 | all_alive = True 507 | for t in self.threads: 508 | t_name = t.getName() 509 | if t.is_alive(): 510 | logging.info("{0} is alive".format(t_name)) 511 | else: 512 | logging.warning("{0} is dead!".format(t_name)) 513 | all_alive = False 514 | return all_alive 515 | -------------------------------------------------------------------------------- /onionperf/model.py: -------------------------------------------------------------------------------- 1 | ''' 2 | OnionPerf 3 | Authored by Rob Jansen, 2015 4 | Copyright 2015-2020 The Tor Project 5 | See LICENSE for licensing information 6 | ''' 7 | 8 | from abc import ABCMeta, abstractmethod 9 | from io import StringIO 10 | from networkx import read_graphml, write_graphml, DiGraph 11 | 12 | class TGenModel(object, metaclass=ABCMeta): 13 | ''' 14 | an action-dependency graph model for Shadow's traffic generator 15 | ''' 16 | 17 | def dump_to_string(self): 18 | s = StringIO() 19 | write_graphml(self.graph, s) 20 | return s.getvalue() 21 | 22 | def dump_to_file(self, filename): 23 | write_graphml(self.graph, filename) 24 | 25 | class TGenLoadableModel(TGenModel): 26 | 27 | def __init__(self, graph): 28 | self.graph = graph 29 | 30 | @classmethod 31 | def from_file(cls, filename): 32 | graph = read_graphml(filename) 33 | model_instance = cls(graph) 34 | return model_instance 35 | 36 | @classmethod 37 | def from_string(cls, string): 38 | s = StringIO() 39 | s.write(string) 40 | graph = read_graphml(s) 41 | model_instance = cls(graph) 42 | return model_instance 43 | 44 | class TGenModelConf(object): 45 | """Represents a TGen traffic model configuration.""" 46 | def __init__(self, pause_initial=300, num_transfers=1, transfer_size="5 MiB", 47 | continuous_transfers=False, pause_between=300, port=None, servers=[], 48 | socks_port=None): 49 | self.pause_initial = pause_initial 50 | self.pause_between = pause_between 51 | self.num_transfers = num_transfers 52 | self.transfer_size = transfer_size 53 | self.continuous_transfers = continuous_transfers 54 | self.port = port 55 | self.servers = servers 56 | self.socks_port = socks_port 57 | 58 | 59 | class GeneratableTGenModel(TGenModel, metaclass=ABCMeta): 60 | 61 | @abstractmethod 62 | def generate(self): 63 | pass 64 | 65 | class ListenModel(GeneratableTGenModel): 66 | 67 | def __init__(self, tgen_port="8888"): 68 | self.tgen_port = tgen_port 69 | self.graph = self.generate() 70 | 71 | def generate(self): 72 | g = DiGraph() 73 | g.add_node("start", serverport=self.tgen_port, loglevel="info", heartbeat="1 minute") 74 | return g 75 | 76 | 77 | class TorperfModel(GeneratableTGenModel): 78 | 79 | def __init__(self, config): 80 | self.config = config 81 | self.graph = self.generate() 82 | 83 | def generate(self): 84 | server_str = ','.join(self.config.servers) 85 | g = DiGraph() 86 | 87 | if self.config.socks_port is not None: 88 | g.add_node("start", 89 | serverport=self.config.port, 90 | peers=server_str, 91 | loglevel="info", 92 | heartbeat="1 minute", 93 | socksproxy="127.0.0.1:{0}".format(self.config.socks_port)) 94 | else: 95 | g.add_node("start", 96 | serverport=self.config.port, 97 | peers=server_str, 98 | loglevel="info", 99 | heartbeat="1 minute") 100 | 101 | g.add_node("pause_initial", 102 | time="%d seconds" % self.config.pause_initial) 103 | g.add_node("stream", 104 | sendsize="0", 105 | recvsize=self.config.transfer_size, 106 | timeout="270 seconds", 107 | stallout="0 seconds") 108 | g.add_node("pause_between", 109 | time="%d seconds" % self.config.pause_between) 110 | 111 | g.add_edge("start", "pause_initial") 112 | g.add_edge("pause_initial", "stream") 113 | g.add_edge("pause_initial", "pause_between") 114 | g.add_edge("pause_between", "stream") 115 | g.add_edge("pause_between", "pause_between") 116 | 117 | # only add an end node if we need to stop 118 | if not self.config.continuous_transfers: 119 | # one-shot mode, i.e., end after configured number of transfers 120 | g.add_node("end", 121 | count="%d" % self.config.num_transfers) 122 | # check for end condition after every transfer 123 | g.add_edge("stream", "end") 124 | 125 | return g 126 | 127 | 128 | def dump_example_tgen_torperf_model(domain_name, onion_name): 129 | # the server listens on 8888, the client uses Tor to come back directly, and using a hidden serv 130 | server = ListenModel(tgen_port="8888") 131 | public_server_str = "{0}:8888".format(domain_name) 132 | onion_server_str = "{0}:8890".format(onion_name) 133 | client = TorperfModel(tgen_port="8889", socksproxy="localhost:9001", tgen_servers=[public_server_str, onion_server_str]) 134 | 135 | # save to specified paths 136 | server.dump_to_file("tgen.server.torperf.graphml.xml") 137 | client.dump_to_file("tgen.client.torperf.graphml.xml") 138 | -------------------------------------------------------------------------------- /onionperf/monitor.py: -------------------------------------------------------------------------------- 1 | ''' 2 | OnionPerf 3 | Authored by Rob Jansen, 2015 4 | Copyright 2015-2020 The Tor Project 5 | See LICENSE for licensing information 6 | ''' 7 | 8 | import datetime 9 | from time import sleep 10 | from socket import gethostname 11 | from functools import partial 12 | 13 | # stem imports 14 | from stem.control import EventType, Controller, Signal 15 | 16 | def get_supported_torctl_events(): 17 | return list(EventType) 18 | 19 | class TorMonitor(object): 20 | 21 | def __init__(self, tor_ctl_port, writable, events=get_supported_torctl_events()): 22 | self.tor_ctl_port = tor_ctl_port 23 | self.writable = writable 24 | self.events = events 25 | 26 | def run(self, newnym_interval_seconds=None, drop_guards_interval_hours=0, done_ev=None): 27 | with Controller.from_port(port=self.tor_ctl_port) as torctl: 28 | torctl.authenticate() 29 | 30 | vers_str = "Starting torctl program on host {2} using Tor version {0} status={1}\n".format(torctl.get_info('version'), torctl.get_info('status/version/current'), gethostname()) 31 | self.__log(self.writable, vers_str) 32 | 33 | boot_str = "{0}\n".format(torctl.get_info('status/bootstrap-phase')) 34 | self.__log(self.writable, boot_str) 35 | 36 | # register for async events! 37 | # some events are only supported in newer versions of tor, so ignore errors from older tors 38 | event_handler = partial(TorMonitor.__handle_tor_event, self, self.writable,) 39 | for e in self.events: 40 | if e in EventType: 41 | # try to add all events that this stem supports 42 | # silently ignore those that our Tor does not support 43 | try: 44 | torctl.add_event_listener(event_handler, EventType[e]) 45 | except: 46 | self.__log(self.writable, "[WARNING] event %s is recognized by stem but not by tor\n" % e) 47 | pass 48 | else: 49 | try: 50 | torctl.add_event_listener(event_handler, e) 51 | except: 52 | self.__log(self.writable, "[ERROR] unrecognized event %s in tor\n" % e) 53 | return 54 | 55 | # let stem run its threads and log all of the events, until user interrupts 56 | try: 57 | interval_count = 0 58 | if newnym_interval_seconds is not None: 59 | next_newnym = newnym_interval_seconds 60 | next_drop_guards = 0 61 | while done_ev is None or not done_ev.is_set(): 62 | # if self.filepath != '-' and os.path.exists(self.filepath): 63 | # with open(self.filepath, 'rb') as sizef: 64 | # msg = "tor-ctl-logger[port={0}] logged {1} bytes to {2}, press CTRL-C to quit".format(self.tor_ctl_port, os.fstat(sizef.fileno()).st_size, self.filepath) 65 | # logging.info(msg) 66 | if drop_guards_interval_hours > 0 and interval_count >= next_drop_guards: 67 | next_drop_guards += drop_guards_interval_hours * 3600 68 | torctl.drop_guards() 69 | drop_timeouts_response = torctl.msg("DROPTIMEOUTS") 70 | if not drop_timeouts_response.is_ok(): 71 | self.__log(self.writable, "[WARNING] unrecognized command DROPTIMEOUTS in tor\n") 72 | sleep(1) 73 | interval_count += 1 74 | if newnym_interval_seconds is not None and interval_count >= next_newnym: 75 | next_newnym += newnym_interval_seconds 76 | torctl.signal(Signal.NEWNYM) 77 | 78 | except KeyboardInterrupt: 79 | pass # the user hit ctrl+c 80 | 81 | self.writable.close() 82 | 83 | def __handle_tor_event(self, writable, event): 84 | self.__log(writable, event.raw_content()) 85 | 86 | def __log(self, writable, msg): 87 | now = datetime.datetime.now() 88 | utcnow = datetime.datetime.utcnow() 89 | epoch = datetime.datetime(1970, 1, 1) 90 | unix_ts = (utcnow - epoch).total_seconds() 91 | writable.write("{0} {1:.02f} {2}".format(now.strftime("%Y-%m-%d %H:%M:%S"), unix_ts, msg)) 92 | 93 | def tor_monitor_run(tor_ctl_port, writable, events, newnym_interval_seconds, drop_guards_interval_hours, done_ev): 94 | torctl_monitor = TorMonitor(tor_ctl_port, writable, events) 95 | torctl_monitor.run(newnym_interval_seconds=newnym_interval_seconds, drop_guards_interval_hours=drop_guards_interval_hours, done_ev=done_ev) 96 | -------------------------------------------------------------------------------- /onionperf/reprocessing.py: -------------------------------------------------------------------------------- 1 | from onionperf.analysis import OPAnalysis 2 | from onionperf import util 3 | from functools import partial 4 | from multiprocessing import Pool, cpu_count 5 | import datetime 6 | import fnmatch 7 | import logging 8 | import os 9 | import re 10 | import sys 11 | 12 | 13 | def collect_logs(dirpath, pattern): 14 | logs = [] 15 | for root, dirnames, filenames in os.walk(dirpath): 16 | for filename in fnmatch.filter(sorted(filenames), pattern): 17 | logs.append(os.path.join(root, filename)) 18 | return logs 19 | 20 | 21 | def match(tgen_logs, tor_logs, date_filter): 22 | log_pairs = [] 23 | for tgen_log in tgen_logs: 24 | m = re.search(r'(\d+-\d+-\d+)', tgen_log) 25 | if m: 26 | date = m.group(0) 27 | fdate = datetime.datetime.strptime(date, "%Y-%m-%d") 28 | found = False 29 | if date_filter is None or util.do_dates_match(date_filter, fdate): 30 | for tor_log in tor_logs: 31 | if date in tor_log: 32 | log_pairs.append((tgen_log, tor_log, fdate)) 33 | found = True 34 | break 35 | if not found: 36 | logging.warning( 37 | 'Skipping file {0}, could not find a match for it'. 38 | format(tgen_log)) 39 | 40 | else: 41 | logging.warning( 42 | 'Filename {0} does not contain a date'.format(tgen_log)) 43 | if not log_pairs: 44 | logging.warning( 45 | 'Could not find any log matches. No analyses will be performed') 46 | return log_pairs 47 | 48 | 49 | def analyze_func(prefix, nick, pair): 50 | analysis = OPAnalysis(nickname=nick) 51 | logging.info('Analysing pair for date {0}'.format(pair[2])) 52 | analysis.add_tgen_file(pair[0]) 53 | analysis.add_torctl_file(pair[1]) 54 | analysis.analyze(date_filter=pair[2]) 55 | analysis.save(output_prefix=prefix) 56 | return 1 57 | 58 | 59 | def multiprocess_logs(log_pairs, prefix, nick=None): 60 | pool = Pool(cpu_count()) 61 | analyses = None 62 | try: 63 | func = partial(analyze_func, prefix, nick) 64 | mr = pool.map_async(func, log_pairs) 65 | pool.close() 66 | while not mr.ready(): 67 | mr.wait(1) 68 | except KeyboardInterrupt: 69 | logging.info("interrupted, terminating process pool") 70 | pool.terminate() 71 | pool.join() 72 | sys.exit() 73 | except Exception as e: 74 | logging.error(e) 75 | -------------------------------------------------------------------------------- /onionperf/tests/data/bin/script: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/torproject/onionperf/118d14990ce4821232ed6751c6a3d2b05e872dbe/onionperf/tests/data/bin/script -------------------------------------------------------------------------------- /onionperf/tests/data/bin/script_non_exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/torproject/onionperf/118d14990ce4821232ed6751c6a3d2b05e872dbe/onionperf/tests/data/bin/script_non_exe -------------------------------------------------------------------------------- /onionperf/tests/data/config: -------------------------------------------------------------------------------- 1 | UseBridges 1 2 | -------------------------------------------------------------------------------- /onionperf/tests/data/dirs/abcdefg.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/torproject/onionperf/118d14990ce4821232ed6751c6a3d2b05e872dbe/onionperf/tests/data/dirs/abcdefg.txt -------------------------------------------------------------------------------- /onionperf/tests/data/parse_error: -------------------------------------------------------------------------------- 1 | 2018-04-14 21:10:04 1523740204.809894 [message] [shd-tgen-transfer.c:803] [_tgentransfer_log] [transfer-error] transport TCP,17,NULL:37.218.247.40:26006,NULL:0.0.0.0:0,146.0.73.4:146.0.73.4:1313,state=SUCCESS,error=NONE transfer (null),26847,op-nl,NONE,0,(null),0,state=ERROR,error=AUTH total-bytes-read=1 total-bytes-write=0 payload-bytes-write=0/0 (-nan%) usecs-to-socket-create=0 usecs-to-socket-connect=8053676879205 usecs-to-proxy-init=-1 usecs-to-proxy-choice=-1 usecs-to-proxy-request=-1 usecs-to-proxy-response=-1 usecs-to-command=-1 usecs-to-response=-1 usecs-to-first-byte=-1 usecs-to-last-byte=-1 usecs-to-checksum=-1 2 | -------------------------------------------------------------------------------- /onionperf/tests/data/simplefile: -------------------------------------------------------------------------------- 1 | onionperf -------------------------------------------------------------------------------- /onionperf/tests/data/simplefile.xz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/torproject/onionperf/118d14990ce4821232ed6751c6a3d2b05e872dbe/onionperf/tests/data/simplefile.xz -------------------------------------------------------------------------------- /onionperf/tests/test_analysis.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pkg_resources 3 | from nose.tools import * 4 | from onionperf import util 5 | from tgentools import analysis 6 | 7 | 8 | def absolute_data_path(relative_path=""): 9 | """ 10 | Returns an absolute path for test data given a relative path. 11 | """ 12 | return pkg_resources.resource_filename("onionperf", 13 | "tests/data/" + relative_path) 14 | 15 | DATA_DIR = absolute_data_path() 16 | LINE_ERROR = '2019-04-22 14:41:20 1555940480.647663 [message] [tgen-stream.c:1618] [_tgenstream_log] [stream-error] transport [fd=12,local=localhost:127.0.0.1:46878,proxy=localhost:127.0.0.1:43735,remote=dc34og3c3aqdqntblnxkstzfvh7iy7llojd4fi5j23y2po32ock2k7ad.onion:0.0.0.0:8080,state=ERROR,error=READ] stream [id=4,vertexid=stream5m,name=cyan,peername=(null),sendsize=0,recvsize=5242880,sendstate=SEND_COMMAND,recvstate=RECV_NONE,error=PROXY] bytes [total-bytes-recv=0,total-bytes-send=0,payload-bytes-recv=0,payload-bytes-send=0,payload-progress-recv=0.00%,payload-progress-send=100.00%] times [created-ts=5948325159988,usecs-to-socket-create=11,usecs-to-socket-connect=210,usecs-to-proxy-init=283,usecs-to-proxy-choice=348,usecs-to-proxy-request=412,usecs-to-proxy-response=-1,usecs-to-command=-1,usecs-to-response=-1,usecs-to-first-byte-recv=-1,usecs-to-last-byte-recv=-1,usecs-to-checksum-recv=-1,usecs-to-first-byte-send=-1,usecs-to-last-byte-send=-1,usecs-to-checksum-send=-1,now-ts=5948446579043]' 17 | 18 | NO_PARSE_LINE = '2018-04-14 21:10:04 1523740204.809894 [message] [tgen-stream.c:1618] [_tgenstream_log] [stream-error] transport [fd=17,local=localhost:127.0.0.1:46878,proxy=localhost:127.0.0.1:43735,remote=dc34og3c3aqdqntblnxkstzfvh7iy7llojd4fi5j23y2po32ock2k7ad.onion:0.0.0.0:8080,state=SUCCESS,error=NONE] stream [id=4,vertexid=stream5m,name=cyan,peername=(null),sendsize=0,recvsize=5242880,sendstate=SEND_COMMAND,recvstate=RECV_NONE,error=PROXY] bytes [total-bytes-recv=1,total-bytes-send=0,payload-bytes-recv=0,payload-bytes-send=0,payload-progress-recv=0.00%,payload-progress-send=100.00%] times [created-ts=5948325159988,usecs-to-socket-create=0,usecs-to-socket-connect=210,usecs-to-proxy-init=-1,usecs-to-proxy-choice=-1,usecs-to-proxy-request=-1,usecs-to-proxy-response=-1,usecs-to-command=-1,usecs-to-response=-1,usecs-to-first-byte-recv=-1,usecs-to-last-byte-recv=-1,usecs-to-checksum-recv=-1,usecs-to-first-byte-send=-1,usecs-to-last-byte-send=-1,usecs-to-checksum-send=-1,now-ts=5948446579043]' 19 | 20 | 21 | def test_stream_status_event(): 22 | stream = analysis.StreamStatusEvent(LINE_ERROR) 23 | assert_equals(stream.is_success, False) 24 | assert_equals(stream.is_error, False) 25 | assert_equals(stream.is_complete, False) 26 | assert_equals(stream.unix_ts_end, 1555940480.647663) 27 | assert_equals(stream.transport_info['local'], 'localhost:127.0.0.1:46878') 28 | assert_equals(stream.transport_info['proxy'], 'localhost:127.0.0.1:43735') 29 | assert_equals( 30 | stream.transport_info['remote'], 31 | 'dc34og3c3aqdqntblnxkstzfvh7iy7llojd4fi5j23y2po32ock2k7ad.onion:0.0.0.0:8080' 32 | ) 33 | assert_equals(stream.stream_id, '4:12:localhost:127.0.0.1:46878:dc34og3c3aqdqntblnxkstzfvh7iy7llojd4fi5j23y2po32ock2k7ad.onion:0.0.0.0:8080') 34 | assert_equals(stream.stream_info['name'], 'cyan') 35 | assert_equals(stream.stream_info['recvsize'], '5242880') 36 | assert_equals(stream.stream_info['peername'], '(null)') 37 | assert_equals(stream.stream_info['error'], 'PROXY') 38 | assert_equals(stream.byte_info['total-bytes-recv'], '0') 39 | assert_equals(stream.byte_info['total-bytes-send'], '0') 40 | assert_equals(stream.byte_info['payload-bytes-recv'], '0') 41 | 42 | 43 | def test_stream_complete_event_init(): 44 | complete = analysis.StreamCompleteEvent(LINE_ERROR) 45 | assert_equals(complete.is_complete, True) 46 | assert_equals(complete.time_info['usecs-to-proxy-init'], '283') 47 | assert_equals(complete.time_info['usecs-to-proxy-request'], '412') 48 | assert_equals(complete.time_info['usecs-to-proxy-choice'], '348') 49 | assert_equals(complete.time_info['usecs-to-socket-connect'], '210') 50 | assert_equals(complete.time_info['usecs-to-socket-create'], '11') 51 | assert_equals(complete.unix_ts_start, 1555940359.2286081) 52 | 53 | 54 | def test_stream_error_event(): 55 | error = analysis.StreamErrorEvent(LINE_ERROR) 56 | assert_equals(error.is_error, True) 57 | assert_equals(error.is_success, False) 58 | 59 | 60 | def test_stream_success_event_init(): 61 | success = analysis.StreamSuccessEvent(LINE_ERROR) 62 | assert_equals(success.is_success, True) 63 | 64 | 65 | def test_stream_object_init(): 66 | error = analysis.StreamErrorEvent(LINE_ERROR) 67 | s = analysis.Stream(error.stream_id) 68 | assert_equals(s.id, '4:12:localhost:127.0.0.1:46878:dc34og3c3aqdqntblnxkstzfvh7iy7llojd4fi5j23y2po32ock2k7ad.onion:0.0.0.0:8080') 69 | assert_equals(s.last_event, None) 70 | 71 | 72 | def test_stream_object_add_event(): 73 | error = analysis.StreamErrorEvent(LINE_ERROR) 74 | s = analysis.Stream(error.stream_id) 75 | s.add_event(error) 76 | assert_equals(s.last_event, error) 77 | 78 | 79 | @raises(KeyError) 80 | def test_stream_object_get_data_error(): 81 | error = analysis.StreamErrorEvent(LINE_ERROR) 82 | s = analysis.Stream(error.stream_id) 83 | s.add_event(error) 84 | s.get_data()['elapsed_seconds']['payload_progress_recv'] 85 | 86 | 87 | def test_stream_object_get_data_no_error(): 88 | success = analysis.StreamSuccessEvent(LINE_ERROR) 89 | s = analysis.Stream(success.stream_id) 90 | s.add_event(success) 91 | assert_true( 92 | s.get_data()['elapsed_seconds']['payload_progress_recv'] is not None) 93 | 94 | 95 | def test_stream_object_end_to_end(): 96 | error = analysis.StreamErrorEvent(LINE_ERROR) 97 | s = analysis.Stream(error.stream_id) 98 | s.add_event(error) 99 | assert_equals( 100 | s.get_data(), { 101 | 'is_success': False, 102 | 'is_error': True, 103 | 'is_complete': True, 104 | 'unix_ts_end': 1555940480.647663, 105 | 'transport_info': {'fd': '12', 106 | 'local': 'localhost:127.0.0.1:46878', 107 | 'proxy': 'localhost:127.0.0.1:43735', 108 | 'remote': 'dc34og3c3aqdqntblnxkstzfvh7iy7llojd4fi5j23y2po32ock2k7ad.onion:0.0.0.0:8080', 109 | 'state': 'ERROR', 110 | 'error': 'READ'}, 111 | 'stream_info': { 112 | 'id': '4', 113 | 'vertexid': 'stream5m', 114 | 'name': 'cyan', 115 | 'peername': '(null)', 116 | 'sendsize': '0', 117 | 'recvsize': '5242880', 118 | 'sendstate': 'SEND_COMMAND', 119 | 'recvstate': 'RECV_NONE', 120 | 'error': 'PROXY' 121 | }, 122 | 'byte_info': { 123 | 'total-bytes-recv': '0', 124 | 'total-bytes-send': '0', 125 | 'payload-bytes-recv': '0', 126 | 'payload-bytes-send': '0', 127 | 'payload-progress-recv': '0.00%', 128 | 'payload-progress-send': '100.00%' 129 | }, 130 | 'time_info': { 131 | 'created-ts': '5948325159988', 132 | 'usecs-to-socket-create': '11', 133 | 'usecs-to-socket-connect': '210', 134 | 'usecs-to-proxy-init': '283', 135 | 'usecs-to-proxy-choice': '348', 136 | 'usecs-to-proxy-request': '412', 137 | 'usecs-to-proxy-response': '-1', 138 | 'usecs-to-command': '-1', 139 | 'usecs-to-response': '-1', 140 | 'usecs-to-first-byte-recv': '-1', 141 | 'usecs-to-last-byte-recv': '-1', 142 | 'usecs-to-checksum-recv': '-1', 143 | 'usecs-to-first-byte-send': '-1', 144 | 'usecs-to-last-byte-send': '-1', 145 | 'usecs-to-checksum-send': '-1', 146 | 'now-ts': '5948446579043' 147 | }, 148 | 'stream_id': '4:12:localhost:127.0.0.1:46878:dc34og3c3aqdqntblnxkstzfvh7iy7llojd4fi5j23y2po32ock2k7ad.onion:0.0.0.0:8080', 149 | 'unix_ts_start': 1555940359.2286081 150 | }) 151 | 152 | def test_parsing_parse_error(): 153 | parser = analysis.TGenParser() 154 | parser.parse(util.DataSource(DATA_DIR + 'parse_error')) 155 | -------------------------------------------------------------------------------- /onionperf/tests/test_measurement.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pkg_resources 3 | from nose.tools import assert_equals 4 | from onionperf import measurement 5 | 6 | 7 | def absolute_data_path(relative_path=""): 8 | """ 9 | Returns an absolute path for test data given a relative path. 10 | """ 11 | return pkg_resources.resource_filename("onionperf", 12 | "tests/data/" + relative_path) 13 | 14 | 15 | DATA_DIR = absolute_data_path() 16 | 17 | 18 | def test_create_tor_config_env_var(): 19 | """ 20 | This test uses Measurement.create_tor_config to 21 | create a configuration string for tor when the BASETORRC env variable is set. 22 | It first sets the environment variable, then initializes an empty 23 | measurement and then calls create_tor_config with a series of well known 24 | variables. The resulting config is tested against the expected config for 25 | both client and server. Also 26 | this tests if the contents of the env variable are correctly recorded in 27 | the class attribute base_config. 28 | The environment variable is unset only if the test is successful. 29 | """ 30 | 31 | os.environ["BASETORRC"] = "UseBridges 1\n" 32 | meas = measurement.Measurement(None, None, None, None, None, None, None, None, 33 | None) 34 | known_config = "UseBridges 1\nRunAsDaemon 0\nORPort 0\nDirPort 0\nControlPort 9001\nSocksPort 9050\nSocksListenAddress 127.0.0.1\nClientOnly 1\n\ 35 | WarnUnsafeSocks 0\nSafeLogging 0\nMaxCircuitDirtiness 60 seconds\nDataDirectory /tmp/\nDataDirectoryGroupReadable 1\nLog INFO stdout\n" 36 | 37 | config_client = meas.create_tor_config(9001, 9050, "/tmp/", "client") 38 | config_server = meas.create_tor_config(9001, 9050, "/tmp/", "server") 39 | assert_equals(config_client, known_config) 40 | assert_equals(config_server, known_config) 41 | assert_equals(meas.base_config, "UseBridges 1\n") 42 | del os.environ["BASETORRC"] 43 | 44 | 45 | def test_create_tor_config_client_lines(): 46 | """ 47 | This test uses Measurement.create_tor_config to create a configuration 48 | string for tor when additional client config is specified. 49 | It initializes an empty measurement, setting the additional_client_config 50 | parameter. The resulting configuration is then tested against the expected 51 | configuration for both client and server. 52 | """ 53 | 54 | known_config = "RunAsDaemon 0\nORPort 0\nDirPort 0\nControlPort 9001\nSocksPort 9050\nSocksListenAddress 127.0.0.1\nClientOnly 1\n\ 55 | WarnUnsafeSocks 0\nSafeLogging 0\nMaxCircuitDirtiness 60 seconds\nDataDirectory /tmp/\nDataDirectoryGroupReadable 1\nLog INFO stdout\nUseBridges 1\n" 56 | 57 | known_config_server = "RunAsDaemon 0\nORPort 0\nDirPort 0\nControlPort 9001\nSocksPort 9050\nSocksListenAddress 127.0.0.1\nClientOnly 1\n\ 58 | WarnUnsafeSocks 0\nSafeLogging 0\nMaxCircuitDirtiness 60 seconds\nDataDirectory /tmp/\nDataDirectoryGroupReadable 1\nLog INFO stdout\nUseEntryGuards 0\n" 59 | 60 | meas = measurement.Measurement(None, None, None, None, None, 61 | "UseBridges 1\n", None, None, False) 62 | config_client = meas.create_tor_config(9001, 9050, "/tmp/", "client") 63 | config_server = meas.create_tor_config(9001, 9050, "/tmp/", "server") 64 | assert_equals(config_client, known_config) 65 | assert_equals(config_server, known_config_server) 66 | 67 | 68 | def test_create_tor_config_client_file(): 69 | """ 70 | This test uses Measurement.create_tor_config to create a configuration 71 | string for tor when additional client config is specified. 72 | It initializes an empty measurement, setting the additional_client_config 73 | parameter. The resulting configuration is then tested against the expected 74 | configuration for both client and server. 75 | """ 76 | 77 | known_config_server = "RunAsDaemon 0\nORPort 0\nDirPort 0\nControlPort 9001\nSocksPort 9050\nSocksListenAddress 127.0.0.1\nClientOnly 1\n\ 78 | WarnUnsafeSocks 0\nSafeLogging 0\nMaxCircuitDirtiness 60 seconds\nDataDirectory /tmp/\nDataDirectoryGroupReadable 1\nLog INFO stdout\nUseEntryGuards 0\n" 79 | 80 | known_config = "RunAsDaemon 0\nORPort 0\nDirPort 0\nControlPort 9001\nSocksPort 9050\nSocksListenAddress 127.0.0.1\nClientOnly 1\n\ 81 | WarnUnsafeSocks 0\nSafeLogging 0\nMaxCircuitDirtiness 60 seconds\nDataDirectory /tmp/\nDataDirectoryGroupReadable 1\nLog INFO stdout\nUseBridges 1\n" 82 | 83 | meas = measurement.Measurement(None, None, None, None, None, None, 84 | absolute_data_path("config"), None, False) 85 | config_client = meas.create_tor_config(9001, 9050, "/tmp/", "client") 86 | config_server = meas.create_tor_config(9001, 9050, "/tmp/", "server") 87 | assert_equals(config_client, known_config) 88 | assert_equals(config_server, known_config_server) 89 | 90 | 91 | def test_create_tor_config_server_file(): 92 | """ 93 | This test uses Measurement.create_tor_config to create a configuration 94 | string for tor when additional server config is specified in a file. 95 | It initializes an empty measurement, setting the additional_client_config 96 | parameter. The resulting configuration is then tested against the expected 97 | configuration for both client and server. 98 | """ 99 | 100 | known_config_server = "RunAsDaemon 0\nORPort 0\nDirPort 0\nControlPort 9001\nSocksPort 9050\nSocksListenAddress 127.0.0.1\nClientOnly 1\n\ 101 | WarnUnsafeSocks 0\nSafeLogging 0\nMaxCircuitDirtiness 60 seconds\nDataDirectory /tmp/\nDataDirectoryGroupReadable 1\nLog INFO stdout\nUseBridges 1\n" 102 | 103 | known_config = "RunAsDaemon 0\nORPort 0\nDirPort 0\nControlPort 9001\nSocksPort 9050\nSocksListenAddress 127.0.0.1\nClientOnly 1\n\ 104 | WarnUnsafeSocks 0\nSafeLogging 0\nMaxCircuitDirtiness 60 seconds\nDataDirectory /tmp/\nDataDirectoryGroupReadable 1\nLog INFO stdout\nUseEntryGuards 0\n" 105 | 106 | meas = measurement.Measurement(None, None, None, None, None, None, None, 107 | absolute_data_path("config"), False) 108 | config_client = meas.create_tor_config(9001, 9050, "/tmp/", "client") 109 | config_server = meas.create_tor_config(9001, 9050, "/tmp/", "server") 110 | assert_equals(config_client, known_config) 111 | assert_equals(config_server, known_config_server) 112 | -------------------------------------------------------------------------------- /onionperf/tests/test_reprocessing.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pkg_resources 3 | import datetime 4 | import tempfile 5 | import shutil 6 | from nose.tools import * 7 | from onionperf import analysis 8 | from onionperf import reprocessing 9 | 10 | 11 | def absolute_data_path(relative_path=""): 12 | """ 13 | Returns an absolute path for test data given a relative path. 14 | """ 15 | return pkg_resources.resource_filename("onionperf", 16 | "tests/data/" + relative_path) 17 | 18 | 19 | DATA_DIR = absolute_data_path() 20 | 21 | def test_log_collection_tgen(): 22 | log_list = reprocessing.collect_logs(DATA_DIR, '*tgen.log') 23 | well_known_list = [ DATA_DIR + 'logs/onionperf.tgen.log', DATA_DIR + 'logs/onionperf_2019-01-10_23:59:59.tgen.log' ] 24 | assert_equals(log_list, well_known_list ) 25 | 26 | def test_log_collection_torctl(): 27 | log_list = reprocessing.collect_logs(DATA_DIR, '*torctl.log') 28 | well_known_list = [ DATA_DIR + 'logs/onionperf.torctl.log', DATA_DIR + 'logs/onionperf_2019-01-10_23:59:59.torctl.log' ] 29 | assert_equals(log_list, well_known_list ) 30 | 31 | def test_log_match(): 32 | tgen_logs = reprocessing.collect_logs(DATA_DIR, '*tgen.log') 33 | torctl_logs = reprocessing.collect_logs(DATA_DIR, '*torctl.log') 34 | log_pairs = reprocessing.match(tgen_logs, torctl_logs, None) 35 | well_known_list = [(DATA_DIR + 'logs/onionperf_2019-01-10_23:59:59.tgen.log', DATA_DIR + 'logs/onionperf_2019-01-10_23:59:59.torctl.log', datetime.datetime(2019, 1, 10, 0, 0))] 36 | assert_equals(log_pairs, well_known_list) 37 | 38 | def test_log_match_no_log_date(): 39 | tgen_logs = reprocessing.collect_logs(DATA_DIR, '*perf.tgen.log') 40 | torctl_logs = reprocessing.collect_logs(DATA_DIR, '*perf.torctl.log') 41 | log_pairs = reprocessing.match(tgen_logs, torctl_logs, None) 42 | well_known_list = [] 43 | assert_equals(log_pairs, well_known_list) 44 | 45 | def test_log_match_with_filter_date(): 46 | tgen_logs = reprocessing.collect_logs(DATA_DIR, '*tgen.log') 47 | torctl_logs = reprocessing.collect_logs(DATA_DIR, '*torctl.log') 48 | test_date = datetime.date(2019, 1, 10) 49 | log_pairs = reprocessing.match(tgen_logs, torctl_logs, test_date) 50 | well_known_list = [(DATA_DIR + 'logs/onionperf_2019-01-10_23:59:59.tgen.log', DATA_DIR + 'logs/onionperf_2019-01-10_23:59:59.torctl.log', datetime.datetime(2019, 1, 10, 0, 0))] 51 | assert_equals(log_pairs, well_known_list) 52 | 53 | def test_log_match_with_wrong_filter_date(): 54 | tgen_logs = reprocessing.collect_logs(DATA_DIR, '*tgen.log') 55 | torctl_logs = reprocessing.collect_logs(DATA_DIR, '*torctl.log') 56 | test_date = datetime.date(2017, 1, 1) 57 | log_pairs = reprocessing.match(tgen_logs, torctl_logs, test_date) 58 | well_known_list = [] 59 | assert_equals(log_pairs, well_known_list) 60 | 61 | def test_analyze_func_json(): 62 | pair = (DATA_DIR + 'logs/onionperf_2019-01-10_23:59:59.tgen.log', DATA_DIR + 'logs/onionperf_2019-01-10_23:59:59.torctl.log', datetime.datetime(2019, 1, 10, 0, 0)) 63 | work_dir = tempfile.mkdtemp() 64 | reprocessing.analyze_func(work_dir, None, pair) 65 | json_file = os.path.join(work_dir, "2019-01-10.onionperf.analysis.json.xz") 66 | assert(os.path.exists(json_file)) 67 | shutil.rmtree(work_dir) 68 | 69 | def test_multiprocess_logs(): 70 | pairs = [(DATA_DIR + 'logs/onionperf_2019-01-10_23:59:59.tgen.log', DATA_DIR + 'logs/onionperf_2019-01-10_23:59:59.torctl.log', datetime.datetime(2019, 1, 10, 0, 0))] 71 | work_dir = tempfile.mkdtemp() 72 | reprocessing.multiprocess_logs(pairs, work_dir) 73 | json_file = os.path.join(work_dir, "2019-01-10.onionperf.analysis.json.xz") 74 | assert(os.path.exists(json_file)) 75 | shutil.rmtree(work_dir) 76 | 77 | def test_end_to_end(): 78 | tgen_logs = reprocessing.collect_logs(DATA_DIR, '*tgen.log') 79 | torctl_logs = reprocessing.collect_logs(DATA_DIR, '*torctl.log') 80 | log_pairs = reprocessing.match(tgen_logs, torctl_logs, None) 81 | work_dir = tempfile.mkdtemp() 82 | reprocessing.multiprocess_logs(log_pairs, work_dir) 83 | json_file = os.path.join(work_dir, "2019-01-10.onionperf.analysis.json.xz") 84 | assert(os.path.exists(json_file)) 85 | shutil.rmtree(work_dir) 86 | -------------------------------------------------------------------------------- /onionperf/tests/test_utils.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import hashlib 3 | import os 4 | import pkg_resources 5 | import shutil 6 | import sys 7 | import tempfile 8 | 9 | from nose.tools import assert_equals 10 | 11 | from onionperf import util 12 | 13 | def absolute_data_path(relative_path=""): 14 | """ 15 | Returns an absolute path for test data given a relative path. 16 | """ 17 | return pkg_resources.resource_filename("onionperf", 18 | "tests/data/" + 19 | relative_path) 20 | 21 | DATA_DIR = absolute_data_path() 22 | 23 | def test_make_dir(): 24 | """ 25 | Creates a temporary working directory, and then a directory within it. 26 | The second directory is created using util.make_dir_path. 27 | Ensures that the path exists, is a directory and is not a symbolic link. 28 | Removes the temporary working directory only if successful. 29 | """ 30 | work_dir = tempfile.mkdtemp() 31 | test_path = os.path.join(work_dir, "test") 32 | util.make_dir_path(test_path) 33 | assert(os.path.exists(test_path)) 34 | assert(os.path.isdir(test_path)) 35 | assert(not os.path.islink(test_path)) 36 | shutil.rmtree(work_dir) 37 | 38 | def test_find_file_paths(): 39 | """ 40 | Uses util.find_file_paths to find an existing file in the test data directory, given a pattern. 41 | The function returns the full path to the file. 42 | """ 43 | pattern = ["abcdef"] 44 | paths = util.find_file_paths(DATA_DIR, pattern) 45 | print(paths) 46 | assert_equals(paths, [absolute_data_path("dirs/abcdefg.txt")]) 47 | 48 | def test_find_file_paths_with_dash(): 49 | """ 50 | Uses util.find_file_paths to find an existing file in the test data directory, given a 51 | pattern. Ensures the path returned by the function defaults to stdin if there 52 | is a dash detected at the end of the given directory. 53 | """ 54 | pattern = ["abcdef"] 55 | paths = util.find_file_paths(DATA_DIR + "/-", pattern) 56 | assert_equals(paths, ['-']) 57 | 58 | #def test_find_file_paths_pairs(): 59 | # """ 60 | # Uses util.find_file_paths_pairs to find existing files in the test data 61 | # directory matching either of two given patterns. 62 | # it returns tuples consisting of a list containing matching file and an empty list. 63 | # The position of the empty lists is dependent on which pattern was matched. 64 | # If a file matches the first pattern, the second item in the tuple will be empty. 65 | # If a file matches the second pattern, the first item in the tuple will be empty. 66 | # """ 67 | # first_pattern = ['.*tgen\.log'] 68 | # second_pattern = ['.*torctl\.log'] 69 | # paths = util.find_file_paths_pairs(DATA_DIR, first_pattern, second_pattern) 70 | # assert_equals(paths, [ 71 | # ([], [absolute_data_path("logs/onionperf20190101.torctl.log")]), 72 | # ([], [absolute_data_path("logs/onionperf.torctl.log")]), 73 | # ([absolute_data_path("logs/onionperf.tgen.log")], []), 74 | # ([absolute_data_path("logs/onionperf20190101.tgen.log")], []) 75 | # ]) 76 | 77 | def test_find_path_with_binpath(): 78 | """ 79 | Creates a temporary named file, uses util.find_path with a filename to find its 80 | full path and then compares is to that returned by the tempfile function. 81 | Removes the created named temporary file only if successful. 82 | """ 83 | temp_file = tempfile.NamedTemporaryFile() 84 | work_path = util.find_path(temp_file.name, temp_file.name) 85 | assert_equals(work_path, temp_file.name) 86 | temp_file.close() 87 | 88 | 89 | def test_find_path_with_which(): 90 | """ 91 | Creates a temporary named file, and makes it executable. 92 | Uses util.find_path with a name and search path to find its full 93 | path, and then compares is to that returned by the tempfile function. Removes 94 | the created named temporary file only if successful. 95 | """ 96 | 97 | temp_file = tempfile.NamedTemporaryFile() 98 | os.chmod(temp_file.name, 0o775) 99 | work_path = util.find_path(None, temp_file.name, tempfile.tempdir) 100 | assert_equals(work_path, temp_file.name) 101 | temp_file.close() 102 | 103 | def test_is_exe(): 104 | """ 105 | Uses util.is_exe to test if paths point to executable files. 106 | Checks an executable file path is accepted 107 | Checks a non-executable file path is not accepted 108 | Checks a directory path is not accepted 109 | """ 110 | assert(util.is_exe(absolute_data_path("bin/script"))) 111 | assert(not util.is_exe(absolute_data_path("bin/script_no_exe"))) 112 | assert(not util.is_exe(absolute_data_path("bin/"))) 113 | 114 | def test_which(): 115 | """ 116 | Uses util.which with an executable file and a search path in the test data 117 | directory. Checks the full path of the file is identified. 118 | """ 119 | test_binary = util.which("script", search_path=absolute_data_path("bin/")) 120 | assert_equals(test_binary, absolute_data_path("bin/script")) 121 | 122 | def test_which_not_executable(): 123 | """ 124 | Uses util.which to test an non-executable file 125 | in the test data directory. Checks the non-executable file is not 126 | identified as a program to run. 127 | """ 128 | test_binary = util.which( 129 | "script_non_exe", search_path=absolute_data_path("bin/")) 130 | assert_equals(test_binary, None) 131 | 132 | def test_which_full_path(): 133 | """ 134 | Uses util.which with the full path of an executable file and a 135 | search path. 136 | Checks the full path of the file is identified. 137 | """ 138 | test_binary = util.which( 139 | absolute_data_path("bin/script"), 140 | search_path=absolute_data_path("bin/")) 141 | assert_equals(test_binary, absolute_data_path("bin/script")) 142 | 143 | 144 | def test_date_to_string(): 145 | """ 146 | Uses util.date_to_string with a datetime object. 147 | Returns a correctly formatted string. 148 | """ 149 | date_object = datetime.datetime(2018, 11, 27, 11) 150 | date_string = util.date_to_string(date_object) 151 | assert_equals(date_string, "2018-11-27") 152 | 153 | def test_date_to_string_is_none(): 154 | """ 155 | Uses util.date_to_string with a None object. 156 | Returns an empty string. 157 | """ 158 | date_string = util.date_to_string(None) 159 | assert_equals(date_string, "") 160 | 161 | def test_dates_match(): 162 | """ 163 | Uses util.dates_match with two matching datetime objects. 164 | """ 165 | first_date = datetime.datetime(2018, 11, 27, 10) 166 | second_date = datetime.datetime(2018, 11, 27, 11) 167 | assert(util.do_dates_match(first_date, second_date)) 168 | 169 | def test_dates_match_false(): 170 | """ 171 | Uses util.dates_match with two non-matching datetime objects. 172 | """ 173 | first_date = datetime.datetime(2018, 11, 27, 10) 174 | second_date = datetime.datetime(2016, 11, 27, 11) 175 | assert_equals(util.do_dates_match(first_date, second_date), False) 176 | 177 | def test_find_ip_address_url(): 178 | """ 179 | Uses util.find_ip_address_url with a string containing an IPv4 address. 180 | The ip address is returned as a string. 181 | """ 182 | ip_address = util.find_ip_address_url("Your IP address appears to be: 70.70.70.70") 183 | assert_equals(ip_address, "70.70.70.70") 184 | 185 | def test_find_ip_address_url_none(): 186 | """ 187 | Uses util.find_ip_address_url with a string containing no IPv4 address. 188 | This should return None. 189 | """ 190 | ip_address = util.find_ip_address_url("Your IP address appears to be") 191 | assert_equals(ip_address, None) 192 | 193 | def test_get_random_free_port(): 194 | """ 195 | Uses util.get_random_free_port to get a port number. 196 | Asserts the port exists, and it is a high-numbered port 197 | between 10000 and 60000. 198 | """ 199 | port = util.get_random_free_port() 200 | assert(port is not None) 201 | assert(port < 60000) 202 | assert(port >= 10000) 203 | 204 | def test_data_source_stdin(): 205 | """ 206 | Creates a new util.DataSource object with stdin input. When calling 207 | util.DataSource.open(), this should set stdin as the DataSource.source 208 | for this object. 209 | """ 210 | test_data_source = util.DataSource("-") 211 | test_data_source.open() 212 | assert_equals(test_data_source.source, sys.stdin) 213 | 214 | def test_data_source_file(): 215 | """ 216 | Creates a new util.DataSource object with an uncompressed input file. When calling 217 | util.DataSource.open(), this should set the file handle as the DataSource.source 218 | for this object. 219 | DataSouce.source is verified against the contents of the input file. 220 | """ 221 | test_data_source = util.DataSource(absolute_data_path("simplefile")) 222 | test_data_source.open() 223 | data_source_file_handle = test_data_source.source 224 | data_source_contents = data_source_file_handle.read() 225 | assert_equals(data_source_contents, "onionperf") 226 | 227 | def test_data_source_compressed_file(): 228 | """ 229 | Creates a new util.DataSource object with a compressed input file. When 230 | calling util.DataSource.open(), this should set the output of an xzprocess (an 231 | uncompressed file handle) as the DataSource.source for this object, and set 232 | DataSource.compress to True. 233 | Verifies DataSource.compress is set to True. 234 | DataSouce.source is verified against the contents of the input file. 235 | """ 236 | test_data_source = util.DataSource(absolute_data_path("simplefile.xz")) 237 | test_data_source.open() 238 | data_source_file_handle = test_data_source.source 239 | data_source_contents = data_source_file_handle.read() 240 | assert_equals(data_source_contents, "onionperf") 241 | assert(test_data_source.compress) 242 | 243 | def test_file_writable(): 244 | """ 245 | Creates a new util.FileWritable object using a temporary filename. 246 | Writes a string to it using util.FileWritable.write(). 247 | The checksum of this file is compared to a good known checksum. 248 | The temporary file is only removed if the test is successful. 249 | """ 250 | temp_file = tempfile.NamedTemporaryFile() 251 | test_writable = util.FileWritable(temp_file.name) 252 | test_writable.write("onionperf") 253 | test_writable.close() 254 | expected_checksum = "5001ed4ab25b52543946fa63da829d4eeab1bd254c89ffdad0877186e074b385" 255 | with open(temp_file.name, 'rb') as f: 256 | file_bytes = f.read() 257 | file_checksum = hashlib.sha256(file_bytes).hexdigest() 258 | assert_equals(file_checksum, expected_checksum) 259 | temp_file.close() 260 | 261 | def test_file_writable_compressed(): 262 | """ 263 | Creates a new util.FileWritable object using a temporary filename and 264 | compression. Writes a string to it using util.FileWritable.write(). 265 | The checksum of this file is compared to a good known checksum. 266 | The temporary file is only removed if the test is successful. 267 | """ 268 | 269 | temp_file = tempfile.NamedTemporaryFile(suffix=".xz") 270 | test_writable = util.FileWritable(temp_file.name, True) 271 | test_writable.write("onionperf") 272 | test_writable.close() 273 | expected_checksum = "3556b3bee6bb56d0a42676cbbf5784ebe4151fe65b0797f42260f93212e2df11" 274 | with open(temp_file.name, 'rb') as f: 275 | file_bytes = f.read() 276 | file_checksum = hashlib.sha256(file_bytes).hexdigest() 277 | assert_equals(file_checksum, expected_checksum) 278 | temp_file.close() 279 | 280 | def test_file_writable_with_stout(): 281 | """ 282 | Creates a new util.FileWritable object using stdout. 283 | Checks the util.FileWritable.file attribute is set to stdout. 284 | """ 285 | test_writable = util.FileWritable("-") 286 | assert_equals(test_writable.file, sys.stdout) 287 | 288 | def test_file_writable_rotate_file(): 289 | """ 290 | Creates a temporary working directory. 291 | Creates a new util.FileWritable object in the working directory. 292 | Rotates file using util.FileWritable.rotate_file with a fixed date and time. 293 | Checks path log_archive has been created in the working directory. 294 | Checks path log_archive is a directory. 295 | Checks file with the appropiate name has been rotated in the log_archive directory. 296 | Removes working directory only if successful. 297 | """ 298 | work_dir = tempfile.mkdtemp() 299 | test_writable = util.FileWritable(os.path.join(work_dir, "logfile")) 300 | test_writable.write("onionperf") 301 | test_writable.rotate_file(datetime.datetime(2018, 11, 27, 0, 0, 0)) 302 | created_dir = os.path.join(work_dir, "log_archive") 303 | rotated_file = os.path.join(created_dir, "logfile_2018-11-27_00:00:00.gz") 304 | assert(os.path.exists(created_dir)) 305 | assert(os.path.isdir(created_dir)) 306 | assert(os.path.exists(rotated_file)) 307 | shutil.rmtree(work_dir) 308 | -------------------------------------------------------------------------------- /onionperf/util.py: -------------------------------------------------------------------------------- 1 | ''' 2 | OnionPerf 3 | Authored by Rob Jansen, 2015 4 | Copyright 2015-2020 The Tor Project 5 | See LICENSE for licensing information 6 | ''' 7 | 8 | import sys, os, socket, logging, random, re, shutil, datetime, urllib.request, urllib.parse, urllib.error, gzip, lzma 9 | from threading import Lock 10 | from io import StringIO 11 | from abc import ABCMeta, abstractmethod 12 | 13 | LINEFORMATS = "k-,r-,b-,g-,c-,m-,y-,k--,r--,b--,g--,c--,m--,y--,k:,r:,b:,g:,c:,m:,y:,k-.,r-.,b-.,g-.,c-.,m-.,y-." 14 | 15 | def make_dir_path(path): 16 | p = os.path.abspath(os.path.expanduser(path)) 17 | if not os.path.exists(p): 18 | os.makedirs(p) 19 | 20 | def find_file_paths(searchpath, patterns): 21 | paths = [] 22 | if searchpath.endswith("/-"): paths.append("-") 23 | else: 24 | for root, dirs, files in os.walk(searchpath): 25 | for name in files: 26 | found = False 27 | fpath = os.path.join(root, name) 28 | fbase = os.path.basename(fpath) 29 | for pattern in patterns: 30 | if re.search(pattern, fbase): found = True 31 | if found: paths.append(fpath) 32 | return paths 33 | 34 | def find_file_paths_pairs(searchpath, patterns_a, patterns_b): 35 | paths = [] 36 | for root, dirs, files in os.walk(searchpath): 37 | for name in files: 38 | fpath = os.path.join(root, name) 39 | fbase = os.path.basename(fpath) 40 | 41 | paths_a = [] 42 | found = False 43 | for pattern in patterns_a: 44 | if re.search(pattern, fbase): 45 | found = True 46 | if found: 47 | paths_a.append(fpath) 48 | 49 | paths_b = [] 50 | found = False 51 | for pattern in patterns_b: 52 | if re.search(pattern, fbase): 53 | found = True 54 | if found: 55 | paths_b.append(fpath) 56 | 57 | if len(paths_a) > 0 or len(paths_b) > 0: 58 | paths.append((paths_a, paths_b)) 59 | return paths 60 | 61 | def find_path(binpath, defaultname, search_path=None): 62 | # find the path to tor 63 | if binpath is not None: 64 | binpath = os.path.abspath(os.path.expanduser(binpath)) 65 | else: 66 | w = which(defaultname, search_path) 67 | if w is not None: 68 | binpath = os.path.abspath(os.path.expanduser(w)) 69 | else: 70 | logging.error("You did not specify a path to a '{0}' binary, and one does not exist in your PATH".format(defaultname)) 71 | return None 72 | # now make sure the path exists 73 | if os.path.exists(binpath): 74 | logging.info("Using '{0}' binary at {1}".format(defaultname, binpath)) 75 | else: 76 | logging.error("Path to '{0}' binary does not exist: {1}".format(defaultname, binpath)) 77 | return None 78 | # we found it and it exists 79 | return binpath 80 | 81 | def is_exe(fpath): 82 | return os.path.isfile(fpath) and os.access(fpath, os.X_OK) 83 | 84 | def which(program, search_path=None): 85 | if search_path is None: 86 | search_path = os.environ["PATH"] 87 | fpath, fname = os.path.split(program) 88 | if fpath: 89 | if is_exe(program): 90 | return program 91 | else: 92 | for path in search_path.split(os.pathsep): 93 | exe_file = os.path.join(path, program) 94 | if is_exe(exe_file): 95 | return exe_file 96 | return None 97 | 98 | def timestamp_to_seconds(stamp): # unix timestamp 99 | return float(stamp) 100 | 101 | def date_to_string(date_object): 102 | if date_object is not None: 103 | return "{:04d}-{:02d}-{:02d}".format(date_object.year, date_object.month, date_object.day) 104 | else: 105 | return "" 106 | 107 | def do_dates_match(date1, date2): 108 | year_matches = True if date1.year == date2.year else False 109 | month_matches = True if date1.month == date2.month else False 110 | day_matches = True if date1.day == date2.day else False 111 | if year_matches and month_matches and day_matches: 112 | return True 113 | else: 114 | return False 115 | 116 | def find_ip_address_url(data): 117 | """ 118 | Parses a string using a regular expression for identifying IPv4 addressses. 119 | If more than one IP address is found, only the first one is returned. 120 | If no IP address is found, the function returns None . 121 | 122 | :param data: string 123 | :returns: string 124 | """ 125 | 126 | ip_address = None 127 | if data is not None and len(data) > 0: 128 | ip_list = re.findall(r'[\d]{1,3}\.[\d]{1,3}\.[\d]{1,3}\.[\d]{1,3}', data) 129 | if ip_list is not None and len(ip_list) > 0: 130 | ip_address = ip_list[0] 131 | return ip_address 132 | 133 | def find_ip_address_local(): 134 | """ 135 | Determines the local IP address of the host by opening a socket 136 | connection to an external address. In doing so, the address used by the 137 | host for initiating connections can be retrieved and then returned. 138 | 139 | :returns: string 140 | """ 141 | 142 | s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) 143 | s.connect(("8.8.8.8", 53)) 144 | ip_address = s.getsockname()[0] 145 | s.close() 146 | return ip_address 147 | 148 | def get_ip_address(): 149 | """ 150 | Determines the public IPv4 address of the vantage point using the 151 | check.torproject.org service. If it is not possible to reach the service, 152 | or to parse the result recieved, it will fall back to determining the local 153 | IP address used for outbound connections. 154 | 155 | :returns: string 156 | """ 157 | ip_address = None 158 | try: 159 | data = urllib.request.urlopen('https://check.torproject.org/').read().decode('utf-8') 160 | ip_address = find_ip_address_url(data) 161 | if not ip_address: 162 | logging.error( 163 | "Unable to determine IP address from check.torproject.org. " 164 | "The site was successfully contacted but the result could " 165 | "not be parsed. Maybe the service is down? Falling back to " 166 | "finding your IP locally...") 167 | ip_address = find_ip_address_local() 168 | except IOError: 169 | logging.warning( 170 | "An IOError occured attempting to contact check.torproject.org. " 171 | "This will affect measurements unless your machine has a public " 172 | "IP address. Falling back to finding your IP locally...") 173 | ip_address = find_ip_address_local() 174 | return ip_address 175 | 176 | def get_random_free_port(): 177 | """ 178 | Picks a random high port and checks its availability by opening a 179 | socket connection to localhost on this port. If this raises an exception 180 | the process is repeated until a free port is found. 181 | 182 | :returns: int 183 | """ 184 | 185 | while True: 186 | s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 187 | port = random.randint(10000, 60000) 188 | rc = s.connect_ex(('127.0.0.1', port)) 189 | s.close() 190 | if rc != 0: # error connecting, port is available 191 | return port 192 | 193 | class DataSource(object): 194 | def __init__(self, filename, compress=False): 195 | self.filename = filename 196 | self.compress = compress 197 | self.source = None 198 | 199 | def __iter__(self): 200 | if self.source is None: 201 | self.open() 202 | return self.source 203 | 204 | def __next__(self): 205 | return next(self.source) if self.source is not None else None 206 | 207 | def open(self, newline=None): 208 | if self.source is None: 209 | if self.filename == '-': 210 | self.source = sys.stdin 211 | elif self.compress or self.filename.endswith(".xz"): 212 | self.compress = True 213 | self.source = lzma.open(self.filename, mode='rt', newline=newline) 214 | elif self.filename.endswith(".gz"): 215 | self.compress = True 216 | self.source = gzip.open(self.filename, 'rt', newline=newline) 217 | else: 218 | self.source = open(self.filename, 'rt', newline=newline) 219 | 220 | def get_file_handle(self): 221 | if self.source is None: 222 | self.open() 223 | return self.source 224 | 225 | def close(self): 226 | if self.source is not None: self.source.close() 227 | 228 | 229 | class Writable(object, metaclass=ABCMeta): 230 | @abstractmethod 231 | def write(self, msg): 232 | pass 233 | 234 | @abstractmethod 235 | def close(self): 236 | pass 237 | 238 | class FileWritable(Writable): 239 | 240 | def __init__(self, filename, do_compress=False, do_truncate=False): 241 | self.filename = filename 242 | self.do_compress = do_compress 243 | self.do_truncate = do_truncate 244 | self.file = None 245 | self.lock = Lock() 246 | 247 | if self.filename == '-': 248 | self.file = sys.stdout 249 | elif self.do_compress or self.filename.endswith(".xz"): 250 | self.do_compress = True 251 | if not self.filename.endswith(".xz"): 252 | self.filename += ".xz" 253 | 254 | def write(self, msg): 255 | self.lock.acquire() 256 | if self.file is None: self.__open_nolock() 257 | if self.file is not None: self.file.write(msg) 258 | self.lock.release() 259 | 260 | def open(self): 261 | self.lock.acquire() 262 | self.__open_nolock() 263 | self.lock.release() 264 | 265 | def __open_nolock(self): 266 | if self.do_compress: 267 | self.file = lzma.open(self.filename, mode='wt') 268 | else: 269 | self.file = open(self.filename, 'wt' if self.do_truncate else 'at', 1) 270 | 271 | def close(self): 272 | self.lock.acquire() 273 | self.__close_nolock() 274 | self.lock.release() 275 | 276 | def __close_nolock(self): 277 | if self.file is not None: 278 | self.file.close() 279 | self.file = None 280 | 281 | def rotate_file(self, filename_datetime=datetime.datetime.now()): 282 | self.lock.acquire() 283 | 284 | # build up the new filename with an embedded timestamp and ending in .gz 285 | base = os.path.basename(self.filename) 286 | base_noext = os.path.splitext(os.path.splitext(base)[0])[0] 287 | ts = filename_datetime.strftime("%Y-%m-%d_%H:%M:%S") 288 | new_base = base.replace(base_noext, "{0}_{1}".format(base_noext, ts)) 289 | new_filename = self.filename.replace(base, "log_archive/{0}.gz".format(new_base)) 290 | 291 | make_dir_path(os.path.dirname(new_filename)) 292 | 293 | # close and copy the old file, then truncate and reopen the old file 294 | self.__close_nolock() 295 | with open(self.filename, 'rb') as f_in, gzip.open(new_filename, 'wb') as f_out: 296 | shutil.copyfileobj(f_in, f_out) 297 | with open(self.filename, 'ab') as f_in: 298 | f_in.truncate(0) 299 | self.__open_nolock() 300 | 301 | self.lock.release() 302 | # return new file name so it can be processed if desired 303 | return new_filename 304 | 305 | class MemoryWritable(Writable): 306 | 307 | def __init__(self): 308 | self.str_buffer = StringIO() 309 | 310 | def write(self, msg): 311 | self.str_buffer.write() 312 | 313 | def readline(self): 314 | return self.str_buffer.readline() 315 | 316 | def close(self): 317 | self.str_buffer.close() 318 | -------------------------------------------------------------------------------- /onionperf/visualization.py: -------------------------------------------------------------------------------- 1 | ''' 2 | OnionPerf 3 | Authored by Rob Jansen, 2015 4 | Copyright 2015-2020 The Tor Project 5 | See LICENSE for licensing information 6 | ''' 7 | 8 | import matplotlib; matplotlib.use('Agg') # for systems without X11 9 | from matplotlib.backends.backend_pdf import PdfPages 10 | import time 11 | from abc import abstractmethod, ABCMeta 12 | import matplotlib.pyplot as plt 13 | import pandas as pd 14 | from pandas.plotting import register_matplotlib_converters 15 | import seaborn as sns 16 | import datetime 17 | import numpy as np 18 | 19 | class Visualization(object, metaclass=ABCMeta): 20 | 21 | def __init__(self): 22 | self.datasets = [] 23 | register_matplotlib_converters() 24 | 25 | def add_dataset(self, analyses, label): 26 | self.datasets.append((analyses, label)) 27 | 28 | @abstractmethod 29 | def plot_all(self, output_prefix): 30 | pass 31 | 32 | class TGenVisualization(Visualization): 33 | 34 | def plot_all(self, output_prefix): 35 | if len(self.datasets) > 0: 36 | prefix = output_prefix + '.' if output_prefix is not None else '' 37 | ts = time.strftime("%Y-%m-%d_%H:%M:%S") 38 | self.__extract_data_frame() 39 | self.data.to_csv("{0}onionperf.viz.{1}.csv".format(prefix, ts)) 40 | sns.set_context("paper") 41 | self.page = PdfPages("{0}onionperf.viz.{1}.pdf".format(prefix, ts)) 42 | self.__plot_firstbyte_ecdf() 43 | self.__plot_firstbyte_time() 44 | self.__plot_lastbyte_ecdf() 45 | self.__plot_lastbyte_box() 46 | self.__plot_lastbyte_bar() 47 | self.__plot_lastbyte_time() 48 | self.__plot_throughput_ecdf() 49 | self.__plot_downloads_count() 50 | self.__plot_errors_count() 51 | self.__plot_errors_time() 52 | self.page.close() 53 | 54 | def __extract_data_frame(self): 55 | streams = [] 56 | for (analyses, label) in self.datasets: 57 | for analysis in analyses: 58 | for client in analysis.get_nodes(): 59 | tor_streams_by_source_port = {} 60 | tor_streams = analysis.get_tor_streams(client) 61 | for tor_stream in tor_streams.values(): 62 | if "source" in tor_stream and ":" in tor_stream["source"]: 63 | source_port = tor_stream["source"].split(":")[1] 64 | tor_streams_by_source_port.setdefault(source_port, []).append(tor_stream) 65 | tor_circuits = analysis.get_tor_circuits(client) 66 | tgen_streams = analysis.get_tgen_streams(client) 67 | tgen_transfers = analysis.get_tgen_transfers(client) 68 | while tgen_streams or tgen_transfers: 69 | error_code = None 70 | source_port = None 71 | unix_ts_end = None 72 | # Explanation of the math below for computing Mbps: For 1 MiB and 5 MiB 73 | # downloads we can extract the number of seconds that have elapsed between 74 | # receiving bytes 524,288 and 1,048,576, which is a total amount of 524,288 75 | # bytes or 4,194,304 bits or 4.194304 megabits. We want the reciprocal of 76 | # that value with unit megabits per second. 77 | if tgen_streams: 78 | stream_id, stream_data = tgen_streams.popitem() 79 | stream = {"id": stream_id, "label": label, 80 | "filesize_bytes": int(stream_data["stream_info"]["recvsize"]), 81 | "error_code": None} 82 | stream["server"] = "onion" if ".onion:" in stream_data["transport_info"]["remote"] else "public" 83 | if "time_info" in stream_data: 84 | s = stream_data["time_info"] 85 | if "usecs-to-first-byte-recv" in s: 86 | stream["time_to_first_byte"] = float(s["usecs-to-first-byte-recv"])/1000000 87 | if "usecs-to-last-byte-recv" in s: 88 | stream["time_to_last_byte"] = float(s["usecs-to-last-byte-recv"])/1000000 89 | if "elapsed_seconds" in stream_data: 90 | s = stream_data["elapsed_seconds"] 91 | if stream_data["stream_info"]["recvsize"] == "5242880" and "0.2" in s["payload_progress_recv"]: 92 | stream["mbps"] = 4.194304 / (s["payload_progress_recv"]["0.2"] - s["payload_progress_recv"]["0.1"]) 93 | if "error" in stream_data["stream_info"] and stream_data["stream_info"]["error"] != "NONE": 94 | error_code = stream_data["stream_info"]["error"] 95 | if "local" in stream_data["transport_info"] and len(stream_data["transport_info"]["local"].split(":")) > 2: 96 | source_port = stream_data["transport_info"]["local"].split(":")[2] 97 | if "unix_ts_end" in stream_data: 98 | unix_ts_end = stream_data["unix_ts_end"] 99 | if "unix_ts_start" in stream_data: 100 | stream["start"] = datetime.datetime.utcfromtimestamp(stream_data["unix_ts_start"]) 101 | elif tgen_transfers: 102 | transfer_id, transfer_data = tgen_transfers.popitem() 103 | stream = {"id": transfer_id, "label": label, 104 | "filesize_bytes": transfer_data["filesize_bytes"], 105 | "error_code": None} 106 | stream["server"] = "onion" if ".onion:" in transfer_data["endpoint_remote"] else "public" 107 | if "elapsed_seconds" in transfer_data: 108 | s = transfer_data["elapsed_seconds"] 109 | if "payload_progress" in s: 110 | if transfer_data["filesize_bytes"] == 1048576 and "1.0" in s["payload_progress"]: 111 | stream["mbps"] = 4.194304 / (s["payload_progress"]["1.0"] - s["payload_progress"]["0.5"]) 112 | if transfer_data["filesize_bytes"] == 5242880 and "0.2" in s["payload_progress"]: 113 | stream["mbps"] = 4.194304 / (s["payload_progress"]["0.2"] - s["payload_progress"]["0.1"]) 114 | if "first_byte" in s: 115 | stream["time_to_first_byte"] = s["first_byte"] 116 | if "last_byte" in s: 117 | stream["time_to_last_byte"] = s["last_byte"] 118 | if "error_code" in transfer_data and transfer_data["error_code"] != "NONE": 119 | error_code = transfer_data["error_code"] 120 | if "endpoint_local" in transfer_data and len(transfer_data["endpoint_local"].split(":")) > 2: 121 | source_port = transfer_data["endpoint_local"].split(":")[2] 122 | if "unix_ts_end" in transfer_data: 123 | unix_ts_end = transfer_data["unix_ts_end"] 124 | if "unix_ts_start" in transfer_data: 125 | stream["start"] = datetime.datetime.utcfromtimestamp(transfer_data["unix_ts_start"]) 126 | tor_stream = None 127 | tor_circuit = None 128 | if source_port and unix_ts_end: 129 | for s in tor_streams_by_source_port[source_port]: 130 | if abs(unix_ts_end - s["unix_ts_end"]) < 150.0: 131 | tor_stream = s 132 | break 133 | if tor_stream and "circuit_id" in tor_stream: 134 | circuit_id = tor_stream["circuit_id"] 135 | if str(circuit_id) in tor_circuits: 136 | tor_circuit = tor_circuits[circuit_id] 137 | if error_code: 138 | if error_code == "PROXY": 139 | error_code_parts = ["TOR"] 140 | else: 141 | error_code_parts = ["TGEN", error_code] 142 | if tor_stream: 143 | if "failure_reason_local" in tor_stream: 144 | error_code_parts.append(tor_stream["failure_reason_local"]) 145 | if "failure_reason_remote" in tor_stream: 146 | error_code_parts.append(tor_stream["failure_reason_remote"]) 147 | stream["error_code"] = "/".join(error_code_parts) 148 | 149 | if "filters" in analysis.json_db.keys() and analysis.json_db["filters"]["tor/circuits"]: 150 | if tor_circuit and "filtered_out" not in tor_circuit.keys(): 151 | streams.append(stream) 152 | else: 153 | streams.append(stream) 154 | self.data = pd.DataFrame.from_records(streams, index="id") 155 | 156 | def __plot_firstbyte_ecdf(self): 157 | for server in self.data["server"].unique(): 158 | self.__draw_ecdf(x="time_to_first_byte", hue="label", hue_name="Data set", 159 | data=self.data[self.data["server"] == server], 160 | title="Time to download first byte from {0} service".format(server), 161 | xlabel="Download time (s)", ylabel="Cumulative Fraction") 162 | 163 | def __plot_firstbyte_time(self): 164 | for bytes in np.sort(self.data["filesize_bytes"].unique()): 165 | for server in self.data["server"].unique(): 166 | self.__draw_timeplot(x="start", y="time_to_first_byte", hue="label", hue_name="Data set", 167 | data=self.data[(self.data["server"] == server) & (self.data["filesize_bytes"] == bytes)], 168 | title="Time to download first of {0} bytes from {1} service over time".format(bytes, server), 169 | xlabel="Download start time", ylabel="Download time (s)") 170 | 171 | def __plot_lastbyte_ecdf(self): 172 | for bytes in np.sort(self.data["filesize_bytes"].unique()): 173 | for server in self.data["server"].unique(): 174 | self.__draw_ecdf(x="time_to_last_byte", hue="label", hue_name="Data set", 175 | data=self.data[(self.data["server"] == server) & (self.data["filesize_bytes"] == bytes)], 176 | title="Time to download last of {0} bytes from {1} service".format(bytes, server), 177 | xlabel="Download time (s)", ylabel="Cumulative Fraction") 178 | 179 | def __plot_lastbyte_box(self): 180 | for bytes in np.sort(self.data["filesize_bytes"].unique()): 181 | for server in self.data["server"].unique(): 182 | self.__draw_boxplot(x="label", y="time_to_last_byte", 183 | data=self.data[(self.data["server"] == server) & (self.data["filesize_bytes"] == bytes)], 184 | title="Time to download last of {0} bytes from {1} service".format(bytes, server), 185 | xlabel="Data set", ylabel="Download time (s)") 186 | 187 | def __plot_lastbyte_bar(self): 188 | for bytes in np.sort(self.data["filesize_bytes"].unique()): 189 | for server in self.data["server"].unique(): 190 | self.__draw_barplot(x="label", y="time_to_last_byte", 191 | data=self.data[(self.data["server"] == server) & (self.data["filesize_bytes"] == bytes)], 192 | title="Mean time to download last of {0} bytes from {1} service".format(bytes, server), 193 | xlabel="Data set", ylabel="Downloads time (s)") 194 | 195 | def __plot_lastbyte_time(self): 196 | for bytes in np.sort(self.data["filesize_bytes"].unique()): 197 | for server in self.data["server"].unique(): 198 | self.__draw_timeplot(x="start", y="time_to_last_byte", hue="label", hue_name="Data set", 199 | data=self.data[(self.data["server"] == server) & (self.data["filesize_bytes"] == bytes)], 200 | title="Time to download last of {0} bytes from {1} service over time".format(bytes, server), 201 | xlabel="Download start time", ylabel="Download time (s)") 202 | 203 | def __plot_throughput_ecdf(self): 204 | for server in self.data["server"].unique(): 205 | self.__draw_ecdf(x="mbps", hue="label", hue_name="Data set", 206 | data=self.data[self.data["server"] == server], 207 | title="Throughput when downloading from {0} server".format(server), 208 | xlabel="Throughput (Mbps)", ylabel="Cumulative Fraction") 209 | 210 | def __plot_downloads_count(self): 211 | for bytes in np.sort(self.data["filesize_bytes"].unique()): 212 | for server in self.data["server"].unique(): 213 | self.__draw_countplot(x="label", 214 | data=self.data[(self.data["server"] == server) & (self.data["filesize_bytes"] == bytes)], 215 | xlabel="Data set", ylabel="Downloads completed (#)", 216 | title="Number of downloads of {0} bytes completed from {1} service".format(bytes, server)) 217 | 218 | def __plot_errors_count(self): 219 | for server in self.data["server"].unique(): 220 | if self.data[self.data["server"] == server]["error_code"].count() > 0: 221 | self.__draw_countplot(x="error_code", hue="label", hue_name="Data set", 222 | data=self.data[self.data["server"] == server], 223 | xlabel="Error code", ylabel="Downloads failed (#)", 224 | title="Number of downloads failed from {0} service".format(server)) 225 | 226 | def __plot_errors_time(self): 227 | for server in self.data["server"].unique(): 228 | if self.data[self.data["server"] == server]["error_code"].count() > 0: 229 | self.__draw_stripplot(x="start", y="error_code", hue="label", hue_name="Data set", 230 | data=self.data[self.data["server"] == server], 231 | xlabel="Download start time", ylabel="Error code", 232 | title="Downloads failed over time from {0} service".format(server)) 233 | 234 | def __draw_ecdf(self, x, hue, hue_name, data, title, xlabel, ylabel): 235 | data = data.dropna(subset=[x]) 236 | p0 = data[x].quantile(q=0.0, interpolation="lower") 237 | p99 = data[x].quantile(q=0.99, interpolation="higher") 238 | ranks = data.groupby(hue)[x].rank(pct=True) 239 | ranks.name = "rank_pct" 240 | result = pd.concat([data[[hue, x]], ranks], axis=1) 241 | result = result.append(pd.DataFrame({hue: data[hue].unique(), 242 | x: p0 - (p99 - p0) * 0.05, "rank_pct": 0.0}), 243 | ignore_index=True, sort=False) 244 | result = result.append(pd.DataFrame({hue: data[hue].unique(), 245 | x: p99 + (p99 - p0) * 0.05, "rank_pct": 1.0}), 246 | ignore_index=True, sort=False) 247 | result = result.rename(columns={hue: hue_name}) 248 | plt.figure() 249 | g = sns.lineplot(data=result, x=x, y="rank_pct", 250 | hue=hue_name, drawstyle="steps-post") 251 | g.set(title=title, xlabel=xlabel, ylabel=ylabel, 252 | xlim=(p0 - (p99 - p0) * 0.03, p99 + (p99 - p0) * 0.03)) 253 | sns.despine() 254 | self.page.savefig() 255 | plt.close() 256 | 257 | def __draw_timeplot(self, x, y, hue, hue_name, data, title, xlabel, ylabel): 258 | plt.figure() 259 | data = data.dropna(subset=[y]) 260 | data = data.rename(columns={hue: hue_name}) 261 | xmin = data[x].min() 262 | xmax = data[x].max() 263 | ymax = data[y].max() 264 | g = sns.scatterplot(data=data, x=x, y=y, hue=hue_name, alpha=0.5) 265 | g.set(title=title, xlabel=xlabel, ylabel=ylabel, 266 | xlim=(xmin - 0.03 * (xmax - xmin), xmax + 0.03 * (xmax - xmin)), 267 | ylim=(-0.05 * ymax, ymax * 1.05)) 268 | plt.xticks(rotation=10) 269 | sns.despine() 270 | self.page.savefig() 271 | plt.close() 272 | 273 | def __draw_boxplot(self, x, y, data, title, xlabel, ylabel): 274 | plt.figure() 275 | data = data.dropna(subset=[y]) 276 | g = sns.boxplot(data=data, x=x, y=y, sym="") 277 | g.set(title=title, xlabel=xlabel, ylabel=ylabel, ylim=(0, None)) 278 | sns.despine() 279 | self.page.savefig() 280 | plt.close() 281 | 282 | def __draw_barplot(self, x, y, data, title, xlabel, ylabel): 283 | plt.figure() 284 | data = data.dropna(subset=[y]) 285 | g = sns.barplot(data=data, x=x, y=y, ci=None) 286 | g.set(title=title, xlabel=xlabel, ylabel=ylabel) 287 | sns.despine() 288 | self.page.savefig() 289 | plt.close() 290 | 291 | def __draw_countplot(self, x, data, title, xlabel, ylabel, hue=None, hue_name=None): 292 | plt.figure() 293 | if hue is not None: 294 | data = data.rename(columns={hue: hue_name}) 295 | g = sns.countplot(data=data.dropna(subset=[x]), x=x, hue=hue_name) 296 | g.set(xlabel=xlabel, ylabel=ylabel, title=title) 297 | sns.despine() 298 | self.page.savefig() 299 | plt.close() 300 | 301 | def __draw_stripplot(self, x, y, hue, hue_name, data, title, xlabel, ylabel): 302 | plt.figure() 303 | data = data.rename(columns={hue: hue_name}) 304 | xmin = data[x].min() 305 | xmax = data[x].max() 306 | data = data.dropna(subset=[y]) 307 | g = sns.stripplot(data=data, x=x, y=y, hue=hue_name) 308 | g.set(title=title, xlabel=xlabel, ylabel=ylabel, 309 | xlim=(xmin - 0.03 * (xmax - xmin), xmax + 0.03 * (xmax - xmin))) 310 | plt.xticks(rotation=10) 311 | plt.yticks(rotation=80) 312 | sns.despine() 313 | self.page.savefig() 314 | plt.close() 315 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | lxml 2 | matplotlib 3 | networkx 4 | nose-cov 5 | nose 6 | numpy 7 | pandas 8 | scipy 9 | seaborn 10 | stem >= 1.7.0 11 | tgentools 12 | -------------------------------------------------------------------------------- /schema/onionperf-3.0.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-07/schema", 3 | "$id": "https://gitlab.torproject.org/tpo/metrics/onionperf/-/raw/master/schema/onionperf-3.0.json", 4 | "type": "object", 5 | "title": "OnionPerf analysis JSON file format 3.0", 6 | "required": [ 7 | "data", 8 | "type", 9 | "version" 10 | ], 11 | "properties": { 12 | "data": { 13 | "type": "object", 14 | "title": "Measurement data by source name", 15 | "propertyNames": { 16 | "pattern": "^[A-Za-z0-9-]+$" 17 | }, 18 | "additionalProperties": { 19 | "type": "object", 20 | "title": "Measurement data from a single source", 21 | "required": [ 22 | "measurement_ip", 23 | "tgen", 24 | "tor" 25 | ], 26 | "properties": { 27 | "measurement_ip": { 28 | "type": "string", 29 | "title": "Public IP address of the measuring host." 30 | }, 31 | "tgen": { 32 | "type": "object", 33 | "title": "Measurement data obtained from client-side TGen logs", 34 | "required": [ 35 | "streams" 36 | ], 37 | "properties": { 38 | "streams": { 39 | "type": "object", 40 | "title": "Measurement data, by TGen stream identifier", 41 | "additionalProperties": { 42 | "type": "object", 43 | "title": "Information on a single measurement, obtained from a single [stream-success] or [stream-error] log message (except for elapsed_seconds)", 44 | "required": [ 45 | "byte_info", 46 | "is_complete", 47 | "is_error", 48 | "is_success", 49 | "stream_id", 50 | "stream_info", 51 | "time_info", 52 | "transport_info", 53 | "unix_ts_end", 54 | "unix_ts_start" 55 | ], 56 | "properties": { 57 | "byte_info": { 58 | "type": "object", 59 | "title": "Information on sent and received bytes", 60 | "required": [ 61 | "payload-bytes-recv", 62 | "payload-bytes-send", 63 | "payload-progress-recv", 64 | "payload-progress-send", 65 | "total-bytes-recv", 66 | "total-bytes-send" 67 | ], 68 | "properties": { 69 | "payload-bytes-recv": { 70 | "type": "string", 71 | "pattern": "^[0-9]+$", 72 | "title": "Number of payload bytes received" 73 | }, 74 | "payload-bytes-send": { 75 | "type": "string", 76 | "pattern": "^[0-9]+$", 77 | "title": "Number of payload bytes sent" 78 | }, 79 | "payload-progress-recv": { 80 | "type": "string", 81 | "pattern": "^[0-9]+\\.[0-9]+%$", 82 | "title": "Progress of receiving payload in percent" 83 | }, 84 | "payload-progress-send": { 85 | "type": "string", 86 | "pattern": "^[0-9]+\\.[0-9]+%$", 87 | "title": "Progress of sending payload in percent" 88 | }, 89 | "total-bytes-recv": { 90 | "type": "string", 91 | "pattern": "^[0-9]+$", 92 | "title": "Total number of bytes received" 93 | }, 94 | "total-bytes-send": { 95 | "type": "string", 96 | "pattern": "^[0-9]+$", 97 | "title": "Total number of bytes sent" 98 | } 99 | } 100 | }, 101 | "elapsed_seconds": { 102 | "type": "object", 103 | "title": "Elapsed seconds until a given number or fraction of payload bytes have been received or sent, obtained from [stream-status], [stream-success], and [stream-error] log messages, only included if the measurement was a success", 104 | "properties": { 105 | "payload_bytes_recv": { 106 | "type": "object", 107 | "title": "Number of received payload bytes", 108 | "propertyNames": { 109 | "pattern": "^[0-9]+$" 110 | }, 111 | "additionalProperties": { 112 | "type": "number", 113 | "title": "Elapsed seconds" 114 | } 115 | }, 116 | "payload_bytes_send": { 117 | "type": "object", 118 | "title": "Number of sent payload bytes", 119 | "propertyNames": { 120 | "pattern": "^[0-9]+$" 121 | }, 122 | "additionalProperties": { 123 | "type": "number", 124 | "title": "Elapsed seconds" 125 | } 126 | }, 127 | "payload_progress_recv": { 128 | "type": "object", 129 | "title": "Fraction of received payload bytes", 130 | "propertyNames": { 131 | "pattern": "^[01]\\.[0-9]$" 132 | }, 133 | "additionalProperties": { 134 | "type": "number", 135 | "title": "Elapsed seconds" 136 | } 137 | }, 138 | "payload_progress_send": { 139 | "type": "object", 140 | "title": "Fraction of sent payload bytes", 141 | "propertyNames": { 142 | "pattern": "^[01]\\.[0-9]$" 143 | }, 144 | "additionalProperties": { 145 | "type": "number", 146 | "title": "Elapsed seconds" 147 | } 148 | } 149 | } 150 | }, 151 | "is_complete": { 152 | "type": "boolean", 153 | "title": "Whether the stream finished, no matter the error state, which is always true, or otherwise the measurement would not be included here" 154 | }, 155 | "is_error": { 156 | "type": "boolean", 157 | "title": "Whether an error occurred" 158 | }, 159 | "is_success": { 160 | "type": "boolean", 161 | "title": "Whether the measurement was a success" 162 | }, 163 | "stream_id": { 164 | "type": "string", 165 | "title": "Stream identifier" 166 | }, 167 | "stream_info": { 168 | "type": "object", 169 | "title": "Information about the TGen stream", 170 | "required": [ 171 | "error", 172 | "id", 173 | "name", 174 | "peername", 175 | "recvsize", 176 | "recvstate", 177 | "sendsize", 178 | "sendstate", 179 | "vertexid" 180 | ], 181 | "properties": { 182 | "error": { 183 | "type": "string", 184 | "title": "Error code, or NONE if no error occurred" 185 | }, 186 | "id": { 187 | "type": "string", 188 | "title": "Stream numerical identifier, or 0 if the stream failed" 189 | }, 190 | "name": { 191 | "type": "string", 192 | "title": "Hostname of the TGen client" 193 | }, 194 | "peername": { 195 | "type": "string", 196 | "title": "Hostname of the TGen server" 197 | }, 198 | "recvsize": { 199 | "type": "string", 200 | "title": "Number of expected payload bytes in the response" 201 | }, 202 | "recvstate": { 203 | "type": "string", 204 | "title": "Last recorded receive state of the stream, one of RECV_{NONE,AUTHENTICATE,HEADER,MODEL,PAYLOAD,CHECKSUM,SUCCESS,ERROR}" 205 | }, 206 | "sendsize": { 207 | "type": "string", 208 | "title": "Number of expected payload bytes in the request" 209 | }, 210 | "sendstate": { 211 | "type": "string", 212 | "title": "Last recorded send state of the stream, one of SEND_{NONE,COMMAND,RESPONSE,PAYLOAD,CHECKSUM,FLUSH,SUCCESS,ERROR}" 213 | }, 214 | "vertexid": { 215 | "type": "string", 216 | "title": "Vertex identifier in the TGen model" 217 | } 218 | } 219 | }, 220 | "time_info": { 221 | "type": "object", 222 | "title": "Elapsed time until reaching given substeps in a measurement", 223 | "required": [ 224 | "created-ts", 225 | "now-ts", 226 | "usecs-to-checksum-recv", 227 | "usecs-to-checksum-send", 228 | "usecs-to-command", 229 | "usecs-to-first-byte-recv", 230 | "usecs-to-first-byte-send", 231 | "usecs-to-last-byte-recv", 232 | "usecs-to-last-byte-send", 233 | "usecs-to-proxy-choice", 234 | "usecs-to-proxy-init", 235 | "usecs-to-proxy-request", 236 | "usecs-to-proxy-response", 237 | "usecs-to-response", 238 | "usecs-to-socket-connect", 239 | "usecs-to-socket-create" 240 | ], 241 | "properties": { 242 | "created-ts": { 243 | "type": "string", 244 | "title": "Montonic system time when TGen created this stream, in microseconds since some arbitrary, fixed point in the past." 245 | }, 246 | "now-ts": { 247 | "type": "string", 248 | "title": "Montonic system time when TGen computed elapsed microseconds for this stream, in microseconds since some arbitrary, fixed point in the past." 249 | }, 250 | "usecs-to-checksum-recv": { 251 | "type": "string", 252 | "title": "Elapsed microseconds until the TGen client has received the checksum from the TGen server, or -1 if missing (step 11)" 253 | }, 254 | "usecs-to-checksum-send": { 255 | "type": "string", 256 | "title": "Elapsed microseconds until the TGen client has sent the checksum to the TGen server, or -1 if missing (step 11)" 257 | }, 258 | "usecs-to-command": { 259 | "type": "string", 260 | "title": "Elapsed microseconds until the TGen client has sent the command to the TGen server, or -1 if missing (step 7)" 261 | }, 262 | "usecs-to-first-byte-recv": { 263 | "type": "string", 264 | "title": "Elapsed microseconds until the TGen client has received the first payload byte, or -1 if missing (step 9)" 265 | }, 266 | "usecs-to-first-byte-send": { 267 | "type": "string", 268 | "title": "Elapsed microseconds until the TGen client has sent the first payload byte, or -1 if missing (step 9)" 269 | }, 270 | "usecs-to-last-byte-recv": { 271 | "type": "string", 272 | "title": "Elapsed microseconds until the TGen client has received the last payload byte, or -1 if missing (step 10)" 273 | }, 274 | "usecs-to-last-byte-send": { 275 | "type": "string", 276 | "title": "Elapsed microseconds until the TGen client has sent the last payload byte, or -1 if missing (step 10)" 277 | }, 278 | "usecs-to-proxy-choice": { 279 | "type": "string", 280 | "title": "Elapsed microseconds until the TGen client has received the SOCKS choice from the Tor client, or -1 if missing (step 4)" 281 | }, 282 | "usecs-to-proxy-init": { 283 | "type": "string", 284 | "title": "Elapsed microseconds until the TGen client has sent the SOCKS initialization to the Tor client, or -1 if missing (step 3)" 285 | }, 286 | "usecs-to-proxy-request": { 287 | "type": "string", 288 | "title": "Elapsed microseconds until the TGen client has sent the SOCKS request to the Tor client, or -1 if missing (step 5)" 289 | }, 290 | "usecs-to-proxy-response": { 291 | "type": "string", 292 | "title": "Elapsed microseconds until the TGen client has received the SOCKS response from the Tor client, or -1 if missing (step 6)" 293 | }, 294 | "usecs-to-response": { 295 | "type": "string", 296 | "title": "Elapsed microseconds until the TGen client has received the command from the TGen server, or -1 if missing (step 8)" 297 | }, 298 | "usecs-to-socket-connect": { 299 | "type": "string", 300 | "title": "Elapsed microseconds until the TGen client has connected to the Tor client's SOCKS port, or -1 if missing (step 2)" 301 | }, 302 | "usecs-to-socket-create": { 303 | "type": "string", 304 | "title": "Elapsed microseconds until the TGen client has opened a TCP connection to the Tor client's SOCKS port, or -1 if missing (step 1)" 305 | } 306 | } 307 | }, 308 | "transport_info": { 309 | "type": "object", 310 | "title": "Information about the TGen transport", 311 | "required": [ 312 | "error", 313 | "fd", 314 | "local", 315 | "proxy", 316 | "remote", 317 | "state" 318 | ], 319 | "properties": { 320 | "error": { 321 | "type": "string", 322 | "title": "Error code, or NONE if no error occurred" 323 | }, 324 | "fd": { 325 | "type": "string", 326 | "title": "File descriptor" 327 | }, 328 | "local": { 329 | "type": "string", 330 | "title": "Local host name, IP address, and TCP port" 331 | }, 332 | "proxy": { 333 | "type": "string", 334 | "title": "Proxy host name, IP address, and TCP port" 335 | }, 336 | "remote": { 337 | "type": "string", 338 | "title": "Remote host name, IP address, and TCP port" 339 | }, 340 | "state": { 341 | "type": "string", 342 | "title": "Last recorded state of the transport, one of CONNECT,INIT,CHOICE,REQUEST,AUTH_{REQUEST,RESPONSE},RESPONSE_{STATUS,TYPE,IPV4,NAMELEN,NAME},SUCCESS_{OPEN,EOF},ERROR" 343 | } 344 | } 345 | }, 346 | "unix_ts_end": { 347 | "type": "number", 348 | "title": "Final end time of the measurement, obtained from the log time of the [stream-success] or [stream-error] log message, given in seconds since the epoch" 349 | }, 350 | "unix_ts_start": { 351 | "type": "number", 352 | "title": "Initial start time of the measurement, obtained by subtracting the largest number of elapsed microseconds in time_info from unix_ts_end, given in seconds since the epoch" 353 | } 354 | } 355 | } 356 | } 357 | } 358 | }, 359 | "tor": { 360 | "type": "object", 361 | "title": "Metadata obtained from client-side Tor controller logs", 362 | "required": [ 363 | "circuits", 364 | "streams" 365 | ], 366 | "properties": { 367 | "circuits": { 368 | "type": "object", 369 | "title": "Information about Tor circuits, by circuit identifier, obtained from CIRC and CIRC_MINOR events, for all circuits created by the Tor client", 370 | "propertyNames": { 371 | "pattern": "^[0-9]+$" 372 | }, 373 | "additionalProperties": { 374 | "type": "object", 375 | "title": "Information about a Tor circuit", 376 | "required": [ 377 | "circuit_id", 378 | "elapsed_seconds", 379 | "unix_ts_end", 380 | "unix_ts_start" 381 | ], 382 | "additionalProperties": false, 383 | "properties": { 384 | "build_quantile": { 385 | "type": "number", 386 | "title": "Circuit build time quantile, obtained from the most recent BUILDTIMEOUT_SET event preceding the CIRC LAUNCHED event" 387 | }, 388 | "build_timeout": { 389 | "type": "integer", 390 | "title": "Circuit build time in milliseconds, obtained from the most recent BUILDTIMEOUT_SET event preceding the CIRC event with status LAUNCHED" 391 | }, 392 | "buildtime_seconds": { 393 | "type": "number", 394 | "title": "Build time in seconds, computed as time elapsed between CIRC LAUNCHED and CIRC BUILT events" 395 | }, 396 | "circuit_id": { 397 | "type": "integer", 398 | "title": "Circuit identifier, obtained from CIRC and CIRC_MINOR events" 399 | }, 400 | "elapsed_seconds": { 401 | "type": "array", 402 | "title": "Elapsed seconds until receiving and logging CIRC and CIRC_MINOR events", 403 | "items": { 404 | "type": "array", 405 | "title": "Elapsed seconds until reaching a given circuit status change", 406 | "items": [ 407 | { 408 | "type": "string", 409 | "title": "Circuit status change" 410 | }, 411 | { 412 | "type": "number", 413 | "title": "Elapsed seconds" 414 | } 415 | ] 416 | } 417 | }, 418 | "failure_reason_local": { 419 | "type": "string", 420 | "title": "Local failure reason, obtained from CIRC FAILED events" 421 | }, 422 | "failure_reason_remote": { 423 | "type": "string", 424 | "title": "Remote failure reason, obtained from CIRC FAILED events" 425 | }, 426 | "path": { 427 | "type": "array", 428 | "title": "Path information", 429 | "items": { 430 | "type": "array", 431 | "title": "Elapsed seconds until extending the circuit to a given relay", 432 | "items": [ 433 | { 434 | "type": "string", 435 | "pattern": "^\\$[0-9A-Z]{40}~[0-9a-zA-Z]{1,19}$", 436 | "title": "Relay fingerprint and nickname" 437 | }, 438 | { 439 | "type": "number", 440 | "minimum": 0, 441 | "title": "Elapsed seconds" 442 | } 443 | ] 444 | } 445 | }, 446 | "unix_ts_end": { 447 | "type": "number", 448 | "title": "Final end time of the circuit, obtained from the log time of the last CIRC CLOSED or CIRC FAILED event, given in seconds since the epoch" 449 | }, 450 | "unix_ts_start": { 451 | "type": "number", 452 | "title": "Initial start time of the circuit, obtained from the log time of the CIRC LAUNCHED event, given in seconds since the epoch" 453 | } 454 | } 455 | } 456 | }, 457 | "streams": { 458 | "type": "object", 459 | "title": "Information about Tor stream, by stream identifier, obtained from STREAM events, for all streams created by the Tor client", 460 | "propertyNames": { 461 | "pattern": "^[0-9]+$" 462 | }, 463 | "additionalProperties": { 464 | "type": "object", 465 | "title": "Information about a Tor stream", 466 | "required": [ 467 | "circuit_id", 468 | "elapsed_seconds", 469 | "stream_id", 470 | "target", 471 | "unix_ts_end", 472 | "unix_ts_start" 473 | ], 474 | "additionalProperties": false, 475 | "properties": { 476 | "circuit_id": { 477 | "title": "Circuit identifier, obtained from STREAM events" 478 | }, 479 | "elapsed_seconds": { 480 | "type": "array", 481 | "title": "Elapsed seconds until receiving and logging STREAM events", 482 | "items": { 483 | "type": "array", 484 | "items": [ 485 | { 486 | "type": "string", 487 | "title": "Stream purpose and STREAM event status" 488 | }, 489 | { 490 | "type": "number", 491 | "title": "Elapsed seconds" 492 | } 493 | ] 494 | } 495 | }, 496 | "failure_reason_local": { 497 | "type": "string", 498 | "title": "Local failure reason, obtained from STREAM FAILED events" 499 | }, 500 | "failure_reason_remote": { 501 | "type": "string", 502 | "title": "Remote failure reason, obtained from STREAM FAILED events" 503 | }, 504 | "source": { 505 | "type": "string", 506 | "title": "Stream source IP address and TCP port, obtained from STREAM NEW or STREAM NEWRESOLVE events" 507 | }, 508 | "stream_id": { 509 | "type": "integer", 510 | "title": "Stream identifier, unique at least for the lifetime of this stream" 511 | }, 512 | "target": { 513 | "type": "string", 514 | "title": "Stream target domain name and TCP port, obtained from STREAM events", 515 | "examples": [ 516 | "jzxfvaupigl7hkemf4jhfi2vrruvbb7ucyiwdolkkc2hf3xlm34f3qyd.onion:8080" 517 | ] 518 | }, 519 | "unix_ts_end": { 520 | "type": "number", 521 | "title": "Final end time of the stream, obtained from the log time of the last STREAM CLOSED or STREAM FAILED event, given in seconds since the epoch" 522 | }, 523 | "unix_ts_start": { 524 | "type": "number", 525 | "title": "Initial start time of the stream, obtained from the log time of the first STREAM NEW or STREAM NEWRESOLVE event, given in seconds since the epoch" 526 | } 527 | } 528 | } 529 | } 530 | } 531 | } 532 | } 533 | } 534 | }, 535 | "type": { 536 | "type": "string", 537 | "title": "Document type", 538 | "const": "onionperf" 539 | }, 540 | "version": { 541 | "type": "string", 542 | "title": "Document version", 543 | "pattern": "^3\\.[0-9]+$" 544 | } 545 | } 546 | } 547 | -------------------------------------------------------------------------------- /schema/onionperf-4.0.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-07/schema", 3 | "$id": "https://gitlab.torproject.org/tpo/metrics/onionperf/-/raw/master/schema/onionperf-4.0.json", 4 | "type": "object", 5 | "title": "OnionPerf analysis JSON file format 4.0", 6 | "required": [ 7 | "data", 8 | "type", 9 | "version" 10 | ], 11 | "properties": { 12 | "data": { 13 | "type": "object", 14 | "title": "Measurement data by source name", 15 | "propertyNames": { 16 | "pattern": "^[A-Za-z0-9-]+$" 17 | }, 18 | "additionalProperties": { 19 | "type": "object", 20 | "title": "Measurement data from a single source", 21 | "required": [ 22 | "measurement_ip", 23 | "tgen", 24 | "tor" 25 | ], 26 | "properties": { 27 | "measurement_ip": { 28 | "type": "string", 29 | "title": "Public IP address of the measuring host." 30 | }, 31 | "tgen": { 32 | "type": "object", 33 | "title": "Measurement data obtained from client-side TGen logs", 34 | "required": [ 35 | "streams" 36 | ], 37 | "properties": { 38 | "streams": { 39 | "type": "object", 40 | "title": "Measurement data, by TGen stream identifier", 41 | "additionalProperties": { 42 | "type": "object", 43 | "title": "Information on a single measurement, obtained from a single [stream-success] or [stream-error] log message (except for elapsed_seconds)", 44 | "required": [ 45 | "byte_info", 46 | "is_complete", 47 | "is_error", 48 | "is_success", 49 | "stream_id", 50 | "stream_info", 51 | "time_info", 52 | "transport_info", 53 | "unix_ts_end", 54 | "unix_ts_start" 55 | ], 56 | "properties": { 57 | "byte_info": { 58 | "type": "object", 59 | "title": "Information on sent and received bytes", 60 | "required": [ 61 | "payload-bytes-recv", 62 | "payload-bytes-send", 63 | "payload-progress-recv", 64 | "payload-progress-send", 65 | "total-bytes-recv", 66 | "total-bytes-send" 67 | ], 68 | "properties": { 69 | "payload-bytes-recv": { 70 | "type": "string", 71 | "pattern": "^[0-9]+$", 72 | "title": "Number of payload bytes received" 73 | }, 74 | "payload-bytes-send": { 75 | "type": "string", 76 | "pattern": "^[0-9]+$", 77 | "title": "Number of payload bytes sent" 78 | }, 79 | "payload-progress-recv": { 80 | "type": "string", 81 | "pattern": "^[0-9]+\\.[0-9]+%$", 82 | "title": "Progress of receiving payload in percent" 83 | }, 84 | "payload-progress-send": { 85 | "type": "string", 86 | "pattern": "^[0-9]+\\.[0-9]+%$", 87 | "title": "Progress of sending payload in percent" 88 | }, 89 | "total-bytes-recv": { 90 | "type": "string", 91 | "pattern": "^[0-9]+$", 92 | "title": "Total number of bytes received" 93 | }, 94 | "total-bytes-send": { 95 | "type": "string", 96 | "pattern": "^[0-9]+$", 97 | "title": "Total number of bytes sent" 98 | } 99 | } 100 | }, 101 | "elapsed_seconds": { 102 | "type": "object", 103 | "title": "Elapsed seconds until a given number or fraction of payload bytes have been received or sent, obtained from [stream-status], [stream-success], and [stream-error] log messages, only included if the measurement was a success", 104 | "properties": { 105 | "payload_bytes_recv": { 106 | "type": "object", 107 | "title": "Number of received payload bytes", 108 | "propertyNames": { 109 | "pattern": "^[0-9]+$" 110 | }, 111 | "additionalProperties": { 112 | "type": "number", 113 | "title": "Elapsed seconds" 114 | } 115 | }, 116 | "payload_bytes_send": { 117 | "type": "object", 118 | "title": "Number of sent payload bytes", 119 | "propertyNames": { 120 | "pattern": "^[0-9]+$" 121 | }, 122 | "additionalProperties": { 123 | "type": "number", 124 | "title": "Elapsed seconds" 125 | } 126 | }, 127 | "payload_progress_recv": { 128 | "type": "object", 129 | "title": "Fraction of received payload bytes", 130 | "propertyNames": { 131 | "pattern": "^[01]\\.[0-9]$" 132 | }, 133 | "additionalProperties": { 134 | "type": "number", 135 | "title": "Elapsed seconds" 136 | } 137 | }, 138 | "payload_progress_send": { 139 | "type": "object", 140 | "title": "Fraction of sent payload bytes", 141 | "propertyNames": { 142 | "pattern": "^[01]\\.[0-9]$" 143 | }, 144 | "additionalProperties": { 145 | "type": "number", 146 | "title": "Elapsed seconds" 147 | } 148 | } 149 | } 150 | }, 151 | "is_complete": { 152 | "type": "boolean", 153 | "title": "Whether the stream finished, no matter the error state, which is always true, or otherwise the measurement would not be included here" 154 | }, 155 | "is_error": { 156 | "type": "boolean", 157 | "title": "Whether an error occurred" 158 | }, 159 | "is_success": { 160 | "type": "boolean", 161 | "title": "Whether the measurement was a success" 162 | }, 163 | "stream_id": { 164 | "type": "string", 165 | "title": "Stream identifier" 166 | }, 167 | "stream_info": { 168 | "type": "object", 169 | "title": "Information about the TGen stream", 170 | "required": [ 171 | "error", 172 | "id", 173 | "name", 174 | "peername", 175 | "recvsize", 176 | "recvstate", 177 | "sendsize", 178 | "sendstate", 179 | "vertexid" 180 | ], 181 | "properties": { 182 | "error": { 183 | "type": "string", 184 | "title": "Error code, or NONE if no error occurred" 185 | }, 186 | "id": { 187 | "type": "string", 188 | "title": "Stream numerical identifier, or 0 if the stream failed" 189 | }, 190 | "name": { 191 | "type": "string", 192 | "title": "Hostname of the TGen client" 193 | }, 194 | "peername": { 195 | "type": "string", 196 | "title": "Hostname of the TGen server" 197 | }, 198 | "recvsize": { 199 | "type": "string", 200 | "title": "Number of expected payload bytes in the response" 201 | }, 202 | "recvstate": { 203 | "type": "string", 204 | "title": "Last recorded receive state of the stream, one of RECV_{NONE,AUTHENTICATE,HEADER,MODEL,PAYLOAD,CHECKSUM,SUCCESS,ERROR}" 205 | }, 206 | "sendsize": { 207 | "type": "string", 208 | "title": "Number of expected payload bytes in the request" 209 | }, 210 | "sendstate": { 211 | "type": "string", 212 | "title": "Last recorded send state of the stream, one of SEND_{NONE,COMMAND,RESPONSE,PAYLOAD,CHECKSUM,FLUSH,SUCCESS,ERROR}" 213 | }, 214 | "vertexid": { 215 | "type": "string", 216 | "title": "Vertex identifier in the TGen model" 217 | } 218 | } 219 | }, 220 | "time_info": { 221 | "type": "object", 222 | "title": "Elapsed time until reaching given substeps in a measurement", 223 | "required": [ 224 | "created-ts", 225 | "now-ts", 226 | "usecs-to-checksum-recv", 227 | "usecs-to-checksum-send", 228 | "usecs-to-command", 229 | "usecs-to-first-byte-recv", 230 | "usecs-to-first-byte-send", 231 | "usecs-to-last-byte-recv", 232 | "usecs-to-last-byte-send", 233 | "usecs-to-proxy-choice", 234 | "usecs-to-proxy-init", 235 | "usecs-to-proxy-request", 236 | "usecs-to-proxy-response", 237 | "usecs-to-response", 238 | "usecs-to-socket-connect", 239 | "usecs-to-socket-create" 240 | ], 241 | "properties": { 242 | "created-ts": { 243 | "type": "string", 244 | "title": "Montonic system time when TGen created this stream, in microseconds since some arbitrary, fixed point in the past." 245 | }, 246 | "now-ts": { 247 | "type": "string", 248 | "title": "Montonic system time when TGen computed elapsed microseconds for this stream, in microseconds since some arbitrary, fixed point in the past." 249 | }, 250 | "usecs-to-checksum-recv": { 251 | "type": "string", 252 | "title": "Elapsed microseconds until the TGen client has received the checksum from the TGen server, or -1 if missing (step 11)" 253 | }, 254 | "usecs-to-checksum-send": { 255 | "type": "string", 256 | "title": "Elapsed microseconds until the TGen client has sent the checksum to the TGen server, or -1 if missing (step 11)" 257 | }, 258 | "usecs-to-command": { 259 | "type": "string", 260 | "title": "Elapsed microseconds until the TGen client has sent the command to the TGen server, or -1 if missing (step 7)" 261 | }, 262 | "usecs-to-first-byte-recv": { 263 | "type": "string", 264 | "title": "Elapsed microseconds until the TGen client has received the first payload byte, or -1 if missing (step 9)" 265 | }, 266 | "usecs-to-first-byte-send": { 267 | "type": "string", 268 | "title": "Elapsed microseconds until the TGen client has sent the first payload byte, or -1 if missing (step 9)" 269 | }, 270 | "usecs-to-last-byte-recv": { 271 | "type": "string", 272 | "title": "Elapsed microseconds until the TGen client has received the last payload byte, or -1 if missing (step 10)" 273 | }, 274 | "usecs-to-last-byte-send": { 275 | "type": "string", 276 | "title": "Elapsed microseconds until the TGen client has sent the last payload byte, or -1 if missing (step 10)" 277 | }, 278 | "usecs-to-proxy-choice": { 279 | "type": "string", 280 | "title": "Elapsed microseconds until the TGen client has received the SOCKS choice from the Tor client, or -1 if missing (step 4)" 281 | }, 282 | "usecs-to-proxy-init": { 283 | "type": "string", 284 | "title": "Elapsed microseconds until the TGen client has sent the SOCKS initialization to the Tor client, or -1 if missing (step 3)" 285 | }, 286 | "usecs-to-proxy-request": { 287 | "type": "string", 288 | "title": "Elapsed microseconds until the TGen client has sent the SOCKS request to the Tor client, or -1 if missing (step 5)" 289 | }, 290 | "usecs-to-proxy-response": { 291 | "type": "string", 292 | "title": "Elapsed microseconds until the TGen client has received the SOCKS response from the Tor client, or -1 if missing (step 6)" 293 | }, 294 | "usecs-to-response": { 295 | "type": "string", 296 | "title": "Elapsed microseconds until the TGen client has received the command from the TGen server, or -1 if missing (step 8)" 297 | }, 298 | "usecs-to-socket-connect": { 299 | "type": "string", 300 | "title": "Elapsed microseconds until the TGen client has connected to the Tor client's SOCKS port, or -1 if missing (step 2)" 301 | }, 302 | "usecs-to-socket-create": { 303 | "type": "string", 304 | "title": "Elapsed microseconds until the TGen client has opened a TCP connection to the Tor client's SOCKS port, or -1 if missing (step 1)" 305 | } 306 | } 307 | }, 308 | "transport_info": { 309 | "type": "object", 310 | "title": "Information about the TGen transport", 311 | "required": [ 312 | "error", 313 | "fd", 314 | "local", 315 | "proxy", 316 | "remote", 317 | "state" 318 | ], 319 | "properties": { 320 | "error": { 321 | "type": "string", 322 | "title": "Error code, or NONE if no error occurred" 323 | }, 324 | "fd": { 325 | "type": "string", 326 | "title": "File descriptor" 327 | }, 328 | "local": { 329 | "type": "string", 330 | "title": "Local host name, IP address, and TCP port" 331 | }, 332 | "proxy": { 333 | "type": "string", 334 | "title": "Proxy host name, IP address, and TCP port" 335 | }, 336 | "remote": { 337 | "type": "string", 338 | "title": "Remote host name, IP address, and TCP port" 339 | }, 340 | "state": { 341 | "type": "string", 342 | "title": "Last recorded state of the transport, one of CONNECT,INIT,CHOICE,REQUEST,AUTH_{REQUEST,RESPONSE},RESPONSE_{STATUS,TYPE,IPV4,NAMELEN,NAME},SUCCESS_{OPEN,EOF},ERROR" 343 | } 344 | } 345 | }, 346 | "unix_ts_end": { 347 | "type": "number", 348 | "title": "Final end time of the measurement, obtained from the log time of the [stream-success] or [stream-error] log message, given in seconds since the epoch" 349 | }, 350 | "unix_ts_start": { 351 | "type": "number", 352 | "title": "Initial start time of the measurement, obtained by subtracting the largest number of elapsed microseconds in time_info from unix_ts_end, given in seconds since the epoch" 353 | } 354 | } 355 | } 356 | } 357 | } 358 | }, 359 | "tor": { 360 | "type": "object", 361 | "title": "Metadata obtained from client-side Tor controller logs", 362 | "required": [ 363 | "circuits", 364 | "streams" 365 | ], 366 | "properties": { 367 | "circuits": { 368 | "type": "object", 369 | "title": "Information about Tor circuits, by circuit identifier, obtained from CIRC and CIRC_MINOR events, for all circuits created by the Tor client", 370 | "propertyNames": { 371 | "pattern": "^[0-9]+$" 372 | }, 373 | "additionalProperties": { 374 | "type": "object", 375 | "title": "Information about a Tor circuit", 376 | "required": [ 377 | "circuit_id", 378 | "elapsed_seconds", 379 | "unix_ts_end", 380 | "unix_ts_start" 381 | ], 382 | "additionalProperties": false, 383 | "properties": { 384 | "build_quantile": { 385 | "type": "number", 386 | "title": "Circuit build time quantile, obtained from the most recent BUILDTIMEOUT_SET event preceding the CIRC LAUNCHED event" 387 | }, 388 | "build_timeout": { 389 | "type": "integer", 390 | "title": "Circuit build time in milliseconds, obtained from the most recent BUILDTIMEOUT_SET event preceding the CIRC event with status LAUNCHED" 391 | }, 392 | "buildtime_seconds": { 393 | "type": "number", 394 | "title": "Build time in seconds, computed as time elapsed between CIRC LAUNCHED and CIRC BUILT events" 395 | }, 396 | "circuit_id": { 397 | "type": "integer", 398 | "title": "Circuit identifier, obtained from CIRC and CIRC_MINOR events" 399 | }, 400 | "elapsed_seconds": { 401 | "type": "array", 402 | "title": "Elapsed seconds until receiving and logging CIRC and CIRC_MINOR events", 403 | "items": { 404 | "type": "array", 405 | "title": "Elapsed seconds until reaching a given circuit status change", 406 | "items": [ 407 | { 408 | "type": "string", 409 | "title": "Circuit status change" 410 | }, 411 | { 412 | "type": "number", 413 | "title": "Elapsed seconds" 414 | } 415 | ] 416 | } 417 | }, 418 | "failure_reason_local": { 419 | "type": "string", 420 | "title": "Local failure reason, obtained from CIRC FAILED events" 421 | }, 422 | "failure_reason_remote": { 423 | "type": "string", 424 | "title": "Remote failure reason, obtained from CIRC FAILED events" 425 | }, 426 | "filtered_out": { 427 | "type": "boolean", 428 | "title": "Whether this circuit has been filtered out when applying filters in `onionperf filter`." 429 | }, 430 | "path": { 431 | "type": "array", 432 | "title": "Path information", 433 | "items": { 434 | "type": "array", 435 | "title": "Elapsed seconds until extending the circuit to a given relay", 436 | "items": [ 437 | { 438 | "type": "string", 439 | "pattern": "^\\$[0-9A-Z]{40}~[0-9a-zA-Z]{1,19}$", 440 | "title": "Relay fingerprint and nickname" 441 | }, 442 | { 443 | "type": "number", 444 | "minimum": 0, 445 | "title": "Elapsed seconds" 446 | } 447 | ] 448 | } 449 | }, 450 | "unix_ts_end": { 451 | "type": "number", 452 | "title": "Final end time of the circuit, obtained from the log time of the last CIRC CLOSED or CIRC FAILED event, given in seconds since the epoch" 453 | }, 454 | "unix_ts_start": { 455 | "type": "number", 456 | "title": "Initial start time of the circuit, obtained from the log time of the CIRC LAUNCHED event, given in seconds since the epoch" 457 | } 458 | } 459 | } 460 | }, 461 | "streams": { 462 | "type": "object", 463 | "title": "Information about Tor stream, by stream identifier, obtained from STREAM events, for all streams created by the Tor client", 464 | "propertyNames": { 465 | "pattern": "^[0-9]+$" 466 | }, 467 | "additionalProperties": { 468 | "type": "object", 469 | "title": "Information about a Tor stream", 470 | "required": [ 471 | "circuit_id", 472 | "elapsed_seconds", 473 | "stream_id", 474 | "target", 475 | "unix_ts_end", 476 | "unix_ts_start" 477 | ], 478 | "additionalProperties": false, 479 | "properties": { 480 | "circuit_id": { 481 | "title": "Circuit identifier, obtained from STREAM events" 482 | }, 483 | "elapsed_seconds": { 484 | "type": "array", 485 | "title": "Elapsed seconds until receiving and logging STREAM events", 486 | "items": { 487 | "type": "array", 488 | "items": [ 489 | { 490 | "type": "string", 491 | "title": "Stream purpose and STREAM event status" 492 | }, 493 | { 494 | "type": "number", 495 | "title": "Elapsed seconds" 496 | } 497 | ] 498 | } 499 | }, 500 | "failure_reason_local": { 501 | "type": "string", 502 | "title": "Local failure reason, obtained from STREAM FAILED events" 503 | }, 504 | "failure_reason_remote": { 505 | "type": "string", 506 | "title": "Remote failure reason, obtained from STREAM FAILED events" 507 | }, 508 | "source": { 509 | "type": "string", 510 | "title": "Stream source IP address and TCP port, obtained from STREAM NEW or STREAM NEWRESOLVE events" 511 | }, 512 | "stream_id": { 513 | "type": "integer", 514 | "title": "Stream identifier, unique at least for the lifetime of this stream" 515 | }, 516 | "target": { 517 | "type": "string", 518 | "title": "Stream target domain name and TCP port, obtained from STREAM events", 519 | "examples": [ 520 | "jzxfvaupigl7hkemf4jhfi2vrruvbb7ucyiwdolkkc2hf3xlm34f3qyd.onion:8080" 521 | ] 522 | }, 523 | "unix_ts_end": { 524 | "type": "number", 525 | "title": "Final end time of the stream, obtained from the log time of the last STREAM CLOSED or STREAM FAILED event, given in seconds since the epoch" 526 | }, 527 | "unix_ts_start": { 528 | "type": "number", 529 | "title": "Initial start time of the stream, obtained from the log time of the first STREAM NEW or STREAM NEWRESOLVE event, given in seconds since the epoch" 530 | } 531 | } 532 | } 533 | } 534 | } 535 | } 536 | } 537 | } 538 | }, 539 | "filters": { 540 | "type": "object", 541 | "title": "Filters applied by type", 542 | "propertyNames": { 543 | "pattern": "^[A-Za-z/]" 544 | }, 545 | "additionalProperties": { 546 | "type": "array", 547 | "title": "Filters applied of a given type", 548 | "items": { 549 | "type": "object", 550 | "required": [ 551 | "name" 552 | ], 553 | "properties": { 554 | "name": { 555 | "type": "string", 556 | "title": "Filter name" 557 | }, 558 | "filepath": { 559 | "type": "string", 560 | "title": "File path" 561 | } 562 | } 563 | } 564 | } 565 | }, 566 | "type": { 567 | "type": "string", 568 | "title": "Document type", 569 | "const": "onionperf" 570 | }, 571 | "version": { 572 | "type": "string", 573 | "title": "Document version", 574 | "pattern": "^4\\.[0-9]+$" 575 | } 576 | } 577 | } 578 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | from setuptools import setup 4 | 5 | with open('requirements.txt') as f: 6 | install_requires = f.readlines() 7 | 8 | setup(name='OnionPerf', 9 | version='0.8', 10 | description='A utility to monitor, measure, analyze, and visualize the performance of Tor and Onion Services', 11 | author='Rob Jansen', 12 | url='https://github.com/robgjansen/onionperf/', 13 | packages=['onionperf'], 14 | scripts=['onionperf/onionperf'], 15 | install_requires=install_requires 16 | ) 17 | --------------------------------------------------------------------------------