├── .dockerignore
├── .gitignore
├── .gitlab-ci.yml
├── .gitmodules
├── .mailmap
├── .readthedocs.yaml
├── COPYING
├── Dockerfile
├── NEWS
├── README.md
├── ci
    ├── dnsjit
    │   └── Dockerfile
    ├── luacheck-run.sh
    ├── mypy-run.sh
    └── pylint-run.sh
├── configs
    ├── doh-get.toml
    ├── doh-post.toml
    ├── doh.toml
    ├── doq.toml
    ├── dot.toml
    ├── mixed.toml
    ├── tcp.toml
    └── udp.toml
├── docs
    ├── analyzing-clients.md
    ├── capturing-traffic.md
    ├── configuration-file.md
    ├── configuration-presets.md
    ├── connection-chart.md
    ├── extracting-clients.md
    ├── img
    │   ├── clients.png
    │   ├── connections.png
    │   ├── handshakes.png
    │   ├── latency.png
    │   └── response-rate.png
    ├── index.md
    ├── installation.md
    ├── key-concepts.md
    ├── latency-histogram.md
    ├── performance-tuning.md
    ├── raw-output.md
    ├── replaying-traffic.md
    ├── response-rate-chart.md
    ├── showcase
    │   ├── connections.png
    │   └── handshakes.png
    └── troubleshooting.md
├── mkdocs.yml
├── pcap
    ├── count-packets-over-time.lua
    ├── count-packets-per-ip.lua
    ├── cut-pcap.lua
    ├── extract-clients.lua
    ├── filter-dnsq.lua
    ├── generate-const-qps.lua
    ├── limit-clients.lua
    ├── merge-chunks.py
    └── split-clients.lua
├── pylintrc
├── replay.py
├── replay
    ├── dnssim
    │   ├── .gitignore
    │   ├── CHANGELOG.md
    │   ├── CMakeLists.txt
    │   ├── README.md
    │   └── src
    │   │   └── output
    │   │       ├── dnssim.c
    │   │       ├── dnssim.h
    │   │       ├── dnssim.lua
    │   │       └── dnssim
    │   │           ├── common.c
    │   │           ├── connection.c
    │   │           ├── https2.c
    │   │           ├── internal.h
    │   │           ├── ll.h
    │   │           ├── quic.c
    │   │           ├── tcp.c
    │   │           ├── tls.c
    │   │           └── udp.c
    ├── luaconfig.lua.j2
    └── shotgun.lua
├── requirements.txt
└── tools
    ├── merge-data.py
    ├── mplhlpr
        ├── README
        ├── presentation.mplstyle
        ├── shotgun.mplstyle
        └── styles.py
    ├── plot-client-distribution.py
    ├── plot-connections.py
    ├── plot-latency.py
    ├── plot-packet-rate.py
    └── plot-response-rate.py


/.dockerignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | # Tests
 7 | .pytest_cache/
 8 | .mypy_cache/
 9 | 
10 | # Data files
11 | *.pcap
12 | 
13 | replay/dnssim/build*
14 | replay/dnssim/compile_commands.json
15 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | # Tests
 7 | .pytest_cache/
 8 | .mypy_cache/
 9 | 
10 | # Data files
11 | *.pcap
12 | 
13 | # Built docs
14 | site/
15 | 


--------------------------------------------------------------------------------
/.gitlab-ci.yml:
--------------------------------------------------------------------------------
  1 | variables:
  2 |   LC_ALL: C.UTF-8
  3 |   GIT_SUBMODULE_STRATEGY: recursive
  4 |   GIT_STRATEGY: clone # sometimes unclean submodule dirs otherwise
  5 |   DNSJIT_IMAGE_NAME: knot/shotgun/ci/dnsjit
  6 | 
  7 | stages:
  8 |   - docker
  9 |   - test
 10 |   - deploy
 11 | 
 12 | .rebuild-image-rule: &rebuild-image-rule
 13 |   if: $CI_PIPELINE_SOURCE == "push"
 14 |   changes:
 15 |   - ci/dnsjit/**/*
 16 |   - .gitlab-ci.yml
 17 | 
 18 | dnsjit-image:
 19 |   stage: docker
 20 |   services:
 21 |     - docker:23-dind
 22 |   image: docker:23-dind
 23 |   tags:
 24 |     - amd64
 25 |     - dind
 26 |   rules:
 27 |     - <<: *rebuild-image-rule
 28 |   script:
 29 |       # We concatenate '$CI_COMMIT_TAG$CI_COMMIT_TAG' here - they are exclusive,
 30 |       # only one of them should apply
 31 |     - docker login -u "$CI_REGISTRY_USER" -p "$CI_JOB_TOKEN" "$CI_REGISTRY"
 32 |     - docker pull "$CI_REGISTRY/$DNSJIT_IMAGE_NAME:latest" || true # to use as many cached layers as possible (allowed to fail)
 33 |     - docker pull "$CI_REGISTRY/$DNSJIT_IMAGE_NAME:$CI_COMMIT_BRANCH$CI_COMMIT_TAG" || true # to use as many cached layers as possible (allowed to fail)
 34 |     - docker build --build-arg "CI_REGISTRY=$CI_REGISTRY" --tag "$CI_REGISTRY/$DNSJIT_IMAGE_NAME:$CI_COMMIT_SHORT_SHA" ci/dnsjit
 35 |     - test "$CI_COMMIT_BRANCH" == "$CI_DEFAULT_BRANCH"
 36 |       && docker tag "$CI_REGISTRY/$DNSJIT_IMAGE_NAME:$CI_COMMIT_SHORT_SHA" "$CI_REGISTRY/$DNSJIT_IMAGE_NAME:latest"
 37 |       && echo "Created 'latest' tag"
 38 |       || echo "Not on '$CI_DEFAULT_BRANCH' branch - skipping"
 39 |     - test -n "$CI_COMMIT_BRANCH$CI_COMMIT_TAG"
 40 |       && docker tag "$CI_REGISTRY/$DNSJIT_IMAGE_NAME:$CI_COMMIT_SHORT_SHA" "$CI_REGISTRY/$DNSJIT_IMAGE_NAME:$CI_COMMIT_BRANCH$CI_COMMIT_TAG"
 41 |       && docker push "$CI_REGISTRY/$DNSJIT_IMAGE_NAME:$CI_COMMIT_BRANCH$CI_COMMIT_TAG"
 42 |       && echo "Pushed '$CI_COMMIT_BRANCH$CI_COMMIT_TAG' tag"
 43 |       || echo "Not on a branch/tag"
 44 |     - test "$CI_COMMIT_BRANCH" == "$CI_DEFAULT_BRANCH"
 45 |       && docker tag "$CI_REGISTRY/$DNSJIT_IMAGE_NAME:$CI_COMMIT_SHORT_SHA" "$CI_REGISTRY/$DNSJIT_IMAGE_NAME:latest"
 46 |       && docker push "$CI_REGISTRY/$DNSJIT_IMAGE_NAME:latest"
 47 |       && echo "Pushed 'latest' tag"
 48 |       || echo "Not on '$CI_DEFAULT_BRANCH' branch - skipping"
 49 | 
 50 | .debian: &debian
 51 |   # Concatenating
 52 |   image: $CI_REGISTRY/$DNSJIT_IMAGE_NAME:$CI_COMMIT_BRANCH$CI_COMMIT_TAG
 53 |   # This is unsupported in GitLab <16.1
 54 |   #needs: []
 55 |   #rules:
 56 |   #  - <<: *rebuild-image-rule
 57 |   #    needs:
 58 |   #      - job: dnsjit-image
 59 |   #        artifacts: false
 60 |   #  - when: on_success
 61 |   tags:
 62 |     - docker
 63 |     - linux
 64 |     - amd64
 65 | 
 66 | black:
 67 |   <<: *debian
 68 |   stage: test
 69 |   script:
 70 |     - python3 -m black --check . --extend-exclude 'replay/dnssim/vendor'
 71 | 
 72 | mypy:
 73 |   <<: *debian
 74 |   stage: test
 75 |   script:
 76 |     - ./ci/mypy-run.sh
 77 | 
 78 | pylint:
 79 |   <<: *debian
 80 |   stage: test
 81 |   script:
 82 |     - pip3 install -r requirements.txt
 83 |     - ./ci/pylint-run.sh
 84 | 
 85 | luacheck:
 86 |   <<: *debian
 87 |   stage: test
 88 |   script:
 89 |     - ./ci/luacheck-run.sh
 90 | 
 91 | .dnssim-build: &dnssim-build
 92 |   <<: *debian
 93 |   stage: test
 94 |   script:
 95 |     - mkdir replay/dnssim/build
 96 |     - cd    replay/dnssim/build
 97 |     - echo "ASAN=$DNSSIM_ASAN  UBSAN=$DNSSIM_UBSAN"
 98 |     - cmake .. -DASAN=${DNSSIM_ASAN} -DUBSAN=${DNSSIM_UBSAN}
 99 |     - make
100 | 
101 | dnssim-build:
102 |   <<: *dnssim-build
103 |   variables:
104 |     DNSSIM_ASAN: 0
105 |     DNSSIM_UBSAN: 0
106 | 
107 | dnssim-build-sanitizers:
108 |   <<: *dnssim-build
109 |   variables:
110 |     DNSSIM_ASAN: 1
111 |     DNSSIM_UBSAN: 1
112 | 
113 | docker:
114 |   stage: deploy
115 |   script:
116 |     - docker login -u gitlab-ci-token -p $CI_BUILD_TOKEN $CI_REGISTRY
117 |     - docker build --no-cache -t "$CI_REGISTRY_IMAGE:$CI_COMMIT_TAG" .
118 |     - docker push "$CI_REGISTRY_IMAGE:$CI_COMMIT_TAG"
119 |   tags:
120 |     - dind
121 |   only:
122 |      - tags
123 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "replay/dnssim/vendor/ngtcp2"]
2 | 	path = replay/dnssim/vendor/ngtcp2
3 | 	url = https://github.com/ngtcp2/ngtcp2.git
4 | 


--------------------------------------------------------------------------------
/.mailmap:
--------------------------------------------------------------------------------
1 | Nicki Křížek <nicki@isc.org> <tkrizek@isc.org>
2 | Nicki Křížek <nicki@isc.org> <tomas.krizek@nic.cz>
3 | 


--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
 1 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 2 | 
 3 | version: 2
 4 | 
 5 | build:
 6 |   os: ubuntu-22.04
 7 |   tools:
 8 |     python: "3.11"
 9 | 
10 | mkdocs:
11 |   configuration: mkdocs.yml
12 | 
13 | # Build all formats
14 | formats: all
15 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | ARG BASE_IMAGE=ubuntu:22.04
 2 | FROM $BASE_IMAGE AS runtime_base
 3 | MAINTAINER Petr Spacek <pspacek@isc.org>
 4 | ENV DEBIAN_FRONTEND=noninteractive
 5 | 
 6 | RUN apt-get update -q
 7 | RUN apt-get upgrade -y -q
 8 | # required for PPA repo usage
 9 | RUN apt-get install -y -q -o APT::Install-Suggests=0 -o APT::Install-Recommends=0 \
10 | 	ca-certificates \
11 | 	lsb-release
12 | 
13 | # avoid PGP; keyring download here would have dependency on TLS anyway
14 | # and I don't want to hardcode PGP key ID here
15 | RUN echo "deb [trusted=yes] https://ppa.launchpadcontent.net/dns-oarc/dnsjit/ubuntu `lsb_release -c -s` main" > /etc/apt/sources.list.d/dns-oarc.list
16 | RUN apt-get update -q
17 | 
18 | # shotgun's runtime depedencies
19 | RUN apt-get install -y -q -o APT::Install-Suggests=0 -o APT::Install-Recommends=0 \
20 | 	dnsjit \
21 | 	libnghttp2-14 \
22 | 	libuv1 \
23 | 	python3 \
24 | 	python3-pip
25 | 
26 | COPY requirements.txt /tmp/requirements.txt
27 | RUN pip3 install -r /tmp/requirements.txt
28 | 
29 | # separate image for build, will not be tagged at the end
30 | FROM runtime_base AS build_stage
31 | RUN apt-get install -y -q -o APT::Install-Suggests=0 -o APT::Install-Recommends=0 \
32 | 	cmake \
33 | 	dnsjit-dev \
34 | 	g++ \
35 | 	gcc \
36 | 	git \
37 | 	jq \
38 | 	libnghttp2-dev \
39 | 	libuv1-dev \
40 | 	ninja-build \
41 | 	pkg-config \
42 | 	tshark
43 | 
44 | COPY . /shotgun
45 | RUN mkdir /shotgun/replay/dnssim/build
46 | WORKDIR /shotgun/replay/dnssim/build
47 | RUN cmake .. -DCMAKE_BUILD_TYPE=Release -G Ninja
48 | RUN cmake --build .
49 | RUN cmake --install .
50 | 
51 | # copy only installed artifacts, Shotgun repo and throw away everything else
52 | FROM runtime_base AS installed
53 | COPY --from=build_stage /usr/local /usr/local
54 | COPY . /shotgun
55 | WORKDIR /shotgun
56 | ENV PATH="${PATH}:/shotgun"
57 | 
58 | # cleanup intended for docker build --squash
59 | RUN rm -rf /shotgun/.git
60 | 


--------------------------------------------------------------------------------
/NEWS:
--------------------------------------------------------------------------------
  1 | Next
  2 | ====
  3 | 
  4 | Incompatible changes
  5 | --------------------
  6 | - dnsjit 1.3.0+ is now required
  7 | 
  8 | Bugfixes
  9 | --------
 10 | - pcap/filter-dnsq: fix potential data mixup between packets
 11 | 
 12 | DNS Shotgun v20240219
 13 | =====================
 14 | 
 15 | Incompatible changes
 16 | --------------------
 17 | - CMake is now being used to build dnssim instead of Autotools
 18 | - GnuTLS 3.7.0+ is now required
 19 | 
 20 | Improvements
 21 | ------------
 22 | - pcap/extract-clients: always reset UDP port numbers to 53 (!56)
 23 | - pcap/extract-clients: ability to write to stdout (!62)
 24 | - pcap/filter-dnsq: skip 'special' queries for \*.dotnxdomain.net (!58)
 25 | - pcap/split-clients: new tool to split larger PCAPs into smaller ones (!61)
 26 | - pcap/merge-chunks: allow disabling randomization (!67)
 27 | - tools/plot-latency: ability to diversify lines with linestyles (!69)
 28 | - tools/plot-response-rate: estimate worst-case drop caused by discarded packets (!74)
 29 | - tools/plot-packet-rate: handle incomplete last sampling period (!71)
 30 | - tools/plot-response-rate: ability to ignore RCODEs with small response rate (!73)
 31 | - pcap/filter-dnsq: ability to log malformed queries (!72)
 32 | - pcap/generate-const-qps: new tool to generate constant QPS (!33)
 33 | - tools: allow customizing plot charts with `SHOTGUN_MPLSTYLES` (!65)
 34 | - replay: `--preload` argument, mainly for dnssim debugging with sanitizers (!76)
 35 | - tools/plot-latency: use fractional values for humans in charts (!78)
 36 | - pcap/extract-clients: warn if some input packets were skipped (!80)
 37 | - dnssim: replace Autotools with CMake (!77, !86)
 38 | - configs: DoH configs with exclusively GET/POST methods (!82)
 39 | - tools/plot-response-rate: avoid division by zero (!89)
 40 | - tools/plot-latency: denser labels to improve logarithmic scale readability (!90)
 41 | - pcap/extract-clients: allow query rewriting - anonymization (!91)
 42 | - Support for DNS-over-QUIC :tada: (!75)
 43 | 
 44 | Bugfixes
 45 | --------
 46 | - tools/plot-response-rate: avoid white lines on white background (!55)
 47 | - tools/plot-client-distribution: properly handle file limit (!59)
 48 | - pcap: proper PCAP write error handling (!60)
 49 | - tools/plot-connections: set axis limits properly (!66)
 50 | - tools/plot-packet-rate: trim chart whitespace (!79)
 51 | - replay: do not exit silently when dnssim returns non-zero (!87)
 52 | 
 53 | DNS Shotgun v20210714
 54 | =====================
 55 | 
 56 | Incompatible changes
 57 | --------------------
 58 | - dnsjit 1.2.0+ is now required
 59 | - dnssim component was moved from dnsjit to this repository and has to be
 60 |   compiled separately (!52)
 61 | 
 62 | Improvements
 63 | ------------
 64 | - replay: new option `stop_after_s` added to input (!40)
 65 | - tools/plot-packat-rate: new tool to plot packet rate in PCAPs (!46)
 66 | - replay/shotgun: support reading PCAP from stdin (!36)
 67 | - pcap/merge-chunks: utility to merge chunks on-the-fly (!37)
 68 | - tools/plot-response-rate: add `--rcodes-above-pct` option (!43)
 69 | - tools/plot-response-rate: add `--skip-total` option (!47)
 70 | - tools/plot-response-rate: add `--sum-rcodes` option (!47)
 71 | - tools/plot-latency: add `--group` option to plot aggregate data (!53)
 72 | - pcap/cut-pcap: new script to effetively trim a sorted PCAP (!50)
 73 | - configs: plot response rate chart with non-zero rcodes by default (!44)
 74 | - pcap/extract-clients: add --seed option for deterministic splitting (!34)
 75 | - pcap/filter-dnsq: improved malformed packet handling (!31)
 76 | - pcap/extract-clients: detect non-monotonic timestamps (!31)
 77 | - replay/shotgun: use faster mmpcap (!36)
 78 | - tools/plot-response-rate: add markers for plotting rcodes (!41)
 79 | - tools/plot-response-rate: improve color selection (!41, !42)
 80 | 
 81 | Bugfixes
 82 | --------
 83 | - pcap/extract-clients: fix imprecise timestamps and off-by-one error (!32)
 84 | - tools/plot-client-distribution: fix handling of empty query count intervals (!38)
 85 | - replay: add workaround to support /32 and /128 addresses (!39)
 86 | 
 87 | 
 88 | DNS Shotgun v20210203
 89 | =====================
 90 | 
 91 | - new user interface
 92 |   - replay.py is now the main tool instead of shotgun.lua
 93 |   - new configuration format to support complex scenarios
 94 |   - mixed-protocol traffic replay is supported
 95 |   - some scripts in `tools/` were renamed
 96 | - fast scripts for processing PCAP data
 97 |   - can be found in `pcap/`
 98 |   - these replaced the now removed pellet.py and scale.py
 99 | - added IPv4 support
100 | - DoH is now fully supported
101 | - comprehensive usage documentation is available at https://dns-shotgun.readthedocs.io
102 | - minimum required dnsjit version is 1.1.0
103 | 
104 | 
105 | DNS Shotgun v20200914
106 | =====================
107 | 
108 | - fully supported UDP, TCP and DNS-over-TLS with
109 |   [dnsjit](https://github.com/DNS-OARC/dnsjit) 1.0.0
110 | - fully supported DNS-over-HTTPS with development version of dnsjit
111 | - traffic can be replayed only over IPv6
112 | - user interface
113 |   - may be unstable
114 |   - only very basic UI available
115 |   - more complex scenarios are no supported yet
116 |     (e.g. simultaneously using multiple protocols)
117 | - pellet.py is functional, but it is very slow and requires python-dpkt from
118 |   master
119 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # DNS Shotgun
 2 | 
 3 | Realistic DNS benchmarking tool which supports multiple transport protocols:
 4 | 
 5 |   - **DNS-over-TLS (DoT)**
 6 |   - **DNS-over-HTTPS (DoH)**
 7 |   - UDP
 8 |   - TCP
 9 | 
10 | *DNS Shotgun is capable of simulating hundreds of thousands of DoT/DoH
11 | clients.*
12 | 
13 | Every client establishes its own connection(s) when communicating over
14 | TCP-based protocol. This makes the tool uniquely suited for realistic DoT/DoH
15 | benchmarks since its traffic patterns are very similar to real clients.
16 | 
17 | DNS Shotgun exports a number of statistics, such as query latencies, number of
18 | handshakes and connections, response rate, response codes etc. in JSON format.
19 | The toolchain also provides scripts that can plot these into readable charts.
20 | 
21 | ## Features
22 | 
23 | - Supports DNS over UDP, TCP, TLS and HTTP/2
24 | - Allows mixed-protocol simultaneous benchmark/testing
25 | - Can bind to multiple source IP addresses
26 | - Customizable client behaviour (idle time, TLS versions, HTTP method, ...)
27 | - Replays captured queries over selected protocol(s) while keeping original timing
28 | - Suitable for high-performance realistic benchmarks
29 | - Tools to plot charts from output data to evaluate results
30 | 
31 | ## Caveats
32 | 
33 | - Requires captured traffic from clients
34 | - Setup for proper benchmarks can be quite complex
35 | - Isn't suitable for testing with very low number of clients/queries
36 | - Backward compatibility between versions isn't kept
37 | 
38 | ## Documentation
39 | 
40 | **The latest documentation can be found at <https://dns-shotgun.readthedocs.io/>.**
41 | 
42 | For installation and/or build instructions, see the
43 | [Installation section](https://dns-shotgun.readthedocs.io/en/stable/installation.html)
44 | of the documentation. The same section also contains instructions for building
45 | the documentation itself.
46 | 
47 | ## Showcase
48 | 
49 | The following charts highlight the unique capabilities of DNS Shotgun.
50 | Measurements are demonstrated using DNS over TCP.  In our test setup, DNS
51 | Shotgun was able to keep sending/receiving:
52 | 
53 | - 400k queries per second over
54 | - **500k simultaneously active TCP connections**, with about
55 | - 25k handshakes per second, which amounts to
56 | - 1.6M total established connections during the 60s test runtime.
57 | 
58 | ![Active Connections](docs/showcase/connections.png)
59 | ![Handshakes](docs/showcase/handshakes.png)
60 | 
61 | ### Socket statistics on server
62 | 
63 | ```
64 | # ss -s
65 | Total: 498799 (kernel 0)
66 | TCP:   498678 (estab 498466, closed 52, orphaned 0, synrecv 0, timewait 54/0), ports 0
67 | 
68 | Transport Total     IP        IPv6
69 | *        0         -         -
70 | RAW      4         1         3
71 | UDP      19        2         17
72 | TCP      498626    5         498621
73 | INET     498649    8         498641
74 | FRAG     0         0         0
75 | ```
76 | 
77 | ### Test setup
78 | 
79 | - DNS over TCP against [TCP echo server](https://gitlab.nic.cz/knot/echo-server)
80 | - two physical servers: one for DNS Shotgun, another for the echo server
81 | - both servers have 16 CPUs, 32 GB RAM, 10GbE network card (up to 64 queues)
82 | - servers were connected directly to each other - no latency
83 | - TCP network stack was tuned and there was no firewall
84 | 
85 | ## License
86 | 
87 | DNS Shotgun is released under GPLv3 or later.
88 | 
89 | ## Thanks
90 | 
91 | We'd like to thank the [Comcast Innovation
92 | Fund](https://innovationfund.comcast.com) for sponsoring the work to support
93 | the use of TCP, DoT and DoH protocols.
94 | 
95 | DNS Shogun is built of top of the [dnsjit](https://github.com/DNS-OARC/dnsjit)
96 | engine. We'd like to thank DNS-OARC and Jerry Lundström for the development and
97 | continued support of dnsjit.
98 | 


--------------------------------------------------------------------------------
/ci/dnsjit/Dockerfile:
--------------------------------------------------------------------------------
 1 | ARG DNSJIT_VERSION
 2 | ARG CI_REGISTRY=registry.nic.cz
 3 | FROM $CI_REGISTRY/knot/knot-resolver/ci/debian-12:knot-3.2
 4 | 
 5 | ENV DNSJIT_VERSION=${DNSJIT_VERSION:-1.3.0}
 6 | 
 7 | RUN apt-get update && \
 8 |     apt-get install -y cmake libgnutls28-dev libpcap-dev libck-dev
 9 | 
10 | RUN pip install --break-system-packages --no-input \
11 |         types-toml black mypy pylint
12 | 
13 | RUN wget "https://github.com/DNS-OARC/dnsjit/archive/refs/tags/v${DNSJIT_VERSION}.tar.gz" \
14 |     --output-document 'dnsjit.tar.gz' && \
15 |     tar -xzf dnsjit.tar.gz
16 | WORKDIR dnsjit-${DNSJIT_VERSION}
17 | RUN sh autogen.sh && \
18 |     ./configure --prefix=/usr && \
19 |     make && \
20 |     make install
21 | 


--------------------------------------------------------------------------------
/ci/luacheck-run.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | set -e
3 | 
4 | luacheck --codes --formatter TAP .
5 | 


--------------------------------------------------------------------------------
/ci/mypy-run.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -e
 3 | 
 4 | # Find Python scripts
 5 | FILES=$(find . \
 6 | 	-path './ci' -prune -o \
 7 | 	-path './.git' -prune -o \
 8 | 	-path './replay/dnssim/vendor' -prune -o \
 9 | 	-name '*.py' -print)
10 | 
11 | python3 -m mypy \
12 | 	--explicit-package-bases \
13 | 	--ignore-missing-imports ${FILES}
14 | 


--------------------------------------------------------------------------------
/ci/pylint-run.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -e
 3 | 
 4 | # Find Python modules and standalone Python scripts
 5 | FILES=$(find . \
 6 | 	-type d -exec test -e '{}/__init__.py' \; -print -prune -o \
 7 | 	-path './ci' -prune -o \
 8 | 	-path './.git' -prune -o \
 9 | 	-path './replay/dnssim/vendor' -prune -o \
10 | 	-name '*.py' -print)
11 | 
12 | python3 -m pylint -j 0 --rcfile pylintrc ${FILES}
13 | 


--------------------------------------------------------------------------------
/configs/doh-get.toml:
--------------------------------------------------------------------------------
 1 | # DNS-over-HTTPS traffic senders using HTTP/2 over TLS.
 2 | #
 3 | # Below is configured a traffic sender that represents well behaved clients
 4 | # using GET method to send queries.
 5 | #
 6 | # For other client types, refer to DoT or TCP configuration examples. All
 7 | # options used there also make sense for configuring DoH client behaviour, most
 8 | # notably connection idle timeout, TLS session resumption or TLS versions.
 9 | [traffic]
10 | 
11 | # Well behaved DNS-over-HTTPS clients using GET method.
12 | [traffic.DoH-GET]
13 | protocol = "doh"
14 | http_method = "GET"
15 | 
16 | 
17 | [charts]
18 | 
19 | [charts.latency]
20 | type = "latency"
21 | 
22 | [charts.response-rate]
23 | type = "response-rate"
24 | 
25 | [charts.response-rate-rcodes]
26 | type = "response-rate"
27 | rcodes-above-pct = 0
28 | 
29 | [charts.connections]
30 | type = "connections"
31 | kind = "active"
32 | 
33 | [charts.handshakes]
34 | type = "connections"
35 | kind = ["conn_hs", "failed_hs"]
36 | title = "Handshakes over Time"
37 | 
38 | [charts.resumptions]
39 | type = "connections"
40 | kind = ["conn_hs", "tls_resumed"]
41 | title = "TLS Session Resumption for New Connections"
42 | 


--------------------------------------------------------------------------------
/configs/doh-post.toml:
--------------------------------------------------------------------------------
 1 | # DNS-over-HTTPS traffic senders using HTTP/2 over TLS.
 2 | #
 3 | # Below is configured a traffic sender that represents well behaved clients
 4 | # using POST method to send queries.
 5 | #
 6 | # For other client types, refer to DoT or TCP configuration examples. All
 7 | # options used there also make sense for configuring DoH client behaviour, most
 8 | # notably connection idle timeout, TLS session resumption or TLS versions.
 9 | [traffic]
10 | 
11 | # Well behvaed DNS-over-HTTPS clients using POST method.
12 | [traffic.DoH-POST]
13 | protocol = "doh"
14 | http_method = "POST"
15 | 
16 | 
17 | [charts]
18 | 
19 | [charts.latency]
20 | type = "latency"
21 | 
22 | [charts.response-rate]
23 | type = "response-rate"
24 | 
25 | [charts.response-rate-rcodes]
26 | type = "response-rate"
27 | rcodes-above-pct = 0
28 | 
29 | [charts.connections]
30 | type = "connections"
31 | kind = "active"
32 | 
33 | [charts.handshakes]
34 | type = "connections"
35 | kind = ["conn_hs", "failed_hs"]
36 | title = "Handshakes over Time"
37 | 
38 | [charts.resumptions]
39 | type = "connections"
40 | kind = ["conn_hs", "tls_resumed"]
41 | title = "TLS Session Resumption for New Connections"
42 | 


--------------------------------------------------------------------------------
/configs/doh.toml:
--------------------------------------------------------------------------------
 1 | # DNS-over-HTTPS traffic senders using HTTP/2 over TLS.
 2 | #
 3 | # Below are configured two traffic senders that both represent well behaved
 4 | # clients using either GET or POST method to send queries.
 5 | #
 6 | # For other client types, refer to DoT or TCP configuration examples. All
 7 | # options used there also make sense for configuring DoH client behaviour, most
 8 | # notably connection idle timeout, TLS session resumption or TLS versions.
 9 | [traffic]
10 | 
11 | # Well behaved DNS-over-HTTPS clients using GET method.
12 | [traffic.DoH-GET]
13 | protocol = "doh"
14 | http_method = "GET"
15 | 
16 | # Well behvaed DNS-over-HTTPS clients using POST method.
17 | [traffic.DoH-POST]
18 | protocol = "doh"
19 | http_method = "POST"
20 | 
21 | 
22 | [charts]
23 | 
24 | [charts.latency]
25 | type = "latency"
26 | 
27 | [charts.response-rate]
28 | type = "response-rate"
29 | 
30 | [charts.response-rate-rcodes]
31 | type = "response-rate"
32 | rcodes-above-pct = 0
33 | 
34 | [charts.connections]
35 | type = "connections"
36 | kind = "active"
37 | 
38 | [charts.handshakes]
39 | type = "connections"
40 | kind = ["conn_hs", "failed_hs"]
41 | title = "Handshakes over Time"
42 | 
43 | [charts.resumptions]
44 | type = "connections"
45 | kind = ["conn_hs", "tls_resumed"]
46 | title = "TLS Session Resumption for New Connections"
47 | 


--------------------------------------------------------------------------------
/configs/doq.toml:
--------------------------------------------------------------------------------
 1 | # DNS-over-QUIC traffic senders.
 2 | #
 3 | # Below is a configuration example for DoQ clients. You may use this as
 4 | # inspiration to craft your own client-types.
 5 | #
 6 | # GnuTLS priority string (with the key `gnutls_priority`) may be used to control
 7 | # TLS protocol version, used ciphers suites etc. For more GnuTLS priority
 8 | # options, refer to: https://gnutls.org/manual/html_node/Priority-Strings.html
 9 | [traffic]
10 | 
11 | # Default DoQ clients
12 | [traffic.DoQ]
13 | protocol = "doq"
14 | 
15 | # Aggressive DNS-over-QUIC clients that terminate connection as soon as queries are answered.
16 | #[traffic.DoQ-no_idle]
17 | #protocol = "doq"
18 | #idle_timeout_s = 0
19 | 
20 | # DoQ clients without TLS Session Resumption
21 | #[traffic.DoQ-no_tickets]
22 | #protocol = "doq"
23 | #gnutls_priority = "dnssim-default:%NO_TICKETS"
24 | #zero_rtt = false  # 0-RTT makes no sense without tickets
25 | 
26 | # DoQ clients without QUIC 0-RTT
27 | #[traffic.DoQ-no_0rtt]
28 | #protocol = "doq"
29 | #zero_rtt = false
30 | 
31 | [charts]
32 | 
33 | [charts.latency]
34 | type = "latency"
35 | 
36 | [charts.response-rate]
37 | type = "response-rate"
38 | 
39 | [charts.response-rate-rcodes]
40 | type = "response-rate"
41 | rcodes-above-pct = 0
42 | 
43 | [charts.connections]
44 | type = "connections"
45 | kind = "active"
46 | 
47 | [charts.handshakes]
48 | type = "connections"
49 | kind = ["conn_hs", "failed_hs"]
50 | title = "Handshakes over Time"
51 | 
52 | [charts.resumptions]
53 | type = "connections"
54 | kind = ["conn_hs", "tls_resumed"]
55 | title = "TLS Session Resumption for New Connections"
56 | 
57 | [charts.0rtt]
58 | type = "connections"
59 | kind = ["conn_hs", "quic_0rtt"]
60 | title = "QUIC 0-RTT for New Connections"
61 | 


--------------------------------------------------------------------------------
/configs/dot.toml:
--------------------------------------------------------------------------------
 1 | # DNS-over-TLS traffic senders.
 2 | #
 3 | # Below are some examples of DoT clients with various behaviours. You can use
 4 | # these as inspiration to craft your own client-types. Only the well behaved
 5 | # client is enabled for this default "dot" scenario.
 6 | #
 7 | # GnuTLS priority string can be used to control TLS protocol version, used
 8 | # ciphers suites, TLS Session Resumption support etc. For more GnuTLS priority
 9 | # options, refer to: https://gnutls.org/manual/html_node/Priority-Strings.html
10 | [traffic]
11 | 
12 | # Well-behaved DoT clients utilizing both idle connection and TLS session resumption.
13 | [traffic.DoT]
14 | protocol = "dot"
15 | 
16 | # DoT clients without TLS Session Resumption
17 | #[traffic.DoT-no_tickets]
18 | #protocol = "dot"
19 | #gnutls_priority = "dnssim-default:%NO_TICKETS"
20 | 
21 | # Aggressive DoT clients with TLS Session Resumption
22 | #[traffic.DoT-no_idle]
23 | #protocol = "dot"
24 | #idle_timeout_s = 0
25 | 
26 | # Aggressive DoT clients without TLS Session Resumption
27 | #[traffic.DoT-no_idle-no_tickets]
28 | #protocol = "dot"
29 | #idle_timeout_s = 0
30 | #gnutls_priority = "dnssim-default:%NO_TICKETS"
31 | 
32 | # DoT clients with TLS 1.3
33 | #[traffic.DoT-tls1_3]
34 | #protocol = "dot"
35 | #gnutls_priority = "dnssim-default:-VERS-ALL:+VERS-TLS1.3"
36 | 
37 | 
38 | [charts]
39 | 
40 | [charts.latency]
41 | type = "latency"
42 | 
43 | [charts.response-rate]
44 | type = "response-rate"
45 | 
46 | [charts.response-rate-rcodes]
47 | type = "response-rate"
48 | rcodes-above-pct = 0
49 | 
50 | [charts.connections]
51 | type = "connections"
52 | kind = "active"
53 | 
54 | [charts.handshakes]
55 | type = "connections"
56 | kind = ["conn_hs", "failed_hs"]
57 | title = "Handshakes over Time"
58 | 
59 | [charts.resumptions]
60 | type = "connections"
61 | kind = ["conn_hs", "tls_resumed"]
62 | title = "TLS Session Resumption for New Connections"
63 | 


--------------------------------------------------------------------------------
/configs/mixed.toml:
--------------------------------------------------------------------------------
 1 | # Complex scenario simulating clients using multiple types of supported
 2 | # protocols (Do53 - UDP and TCP, DoH and DoT).
 3 | #
 4 | # Clients are randomly assigned exactly one of the configured traffic settings.
 5 | # The assignment respects the configured weights. In the following example:
 6 | #   60 % of clients use UDP
 7 | #    5 % + 5 % of clients use TCP (with/without utilizing idle connections)
 8 | #   10 % + 5 % of clients use DoT (with/without TLS Session Resumption)
 9 | #   10 % + 5 % of clients use DoH (using GET or POST)
10 | #
11 | # You can use this example as a starting point to create your own
12 | # configurations with various clients and traffic distribution. Refer to other
13 | # example configs for more ideas of clients types.
14 | [traffic]
15 | 
16 | # DNS-over-UDP clients.
17 | [traffic.UDP]
18 | protocol = "udp"
19 | weight = 60
20 | 
21 | # Well-behaved DNS-over-TCP clients.
22 | [traffic.TCP]
23 | protocol = "tcp"
24 | weight = 5
25 | 
26 | # Aggressive DNS-over-TCP clients.
27 | [traffic.TCP-no_idle]
28 | protocol = "tcp"
29 | weight = 5
30 | idle_timeout_s = 0
31 | 
32 | # Well-behaved DNS-over-TLS clients.
33 | [traffic.DoT]
34 | protocol = "dot"
35 | weight = 10
36 | 
37 | # Well-behaved DNS-over-TLS clients without TLS Session Resumption.
38 | [traffic.DoT-no_tickets]
39 | protocol = "dot"
40 | weight = 5
41 | gnutls_priority = "dnssim-default:%NO_TICKETS"
42 | 
43 | # Well-behaved DNS-over-HTTPS clients using GET method.
44 | [traffic.DoH-GET]
45 | protocol = "doh"
46 | weight = 10
47 | http_method = "GET"
48 | 
49 | # Well-behaved DNS-over-HTTPS clients using POST method.
50 | [traffic.DoH-POST]
51 | protocol = "doh"
52 | weight = 5
53 | http_method = "POST"
54 | 
55 | 
56 | [charts]
57 | 
58 | [charts.latency]
59 | type = "latency"
60 | 
61 | [charts.response-rate]
62 | type = "response-rate"
63 | 
64 | [charts.response-rate-rcodes]
65 | type = "response-rate"
66 | rcodes-above-pct = 0
67 | 


--------------------------------------------------------------------------------
/configs/tcp.toml:
--------------------------------------------------------------------------------
 1 | # DNS-over-TCP traffic senders.
 2 | [traffic]
 3 | 
 4 | # Well behaved DNS-over-TCP clients utilizing idle connection (default).
 5 | [traffic.TCP]
 6 | protocol = "tcp"
 7 | 
 8 | # Aggressive DNS-over-TCP clients that terminate connection as soon as queries are answered.
 9 | #[traffic.TCP-no_idle]
10 | #protocol = "tcp"
11 | #idle_timeout_s = 0
12 | 
13 | 
14 | [charts]
15 | 
16 | [charts.latency]
17 | type = "latency"
18 | 
19 | [charts.response-rate]
20 | type = "response-rate"
21 | 
22 | [charts.response-rate-rcodes]
23 | type = "response-rate"
24 | rcodes-above-pct = 0
25 | 
26 | [charts.connections]
27 | type = "connections"
28 | kind = "active"
29 | 
30 | [charts.handshakes]
31 | type = "connections"
32 | kind = ["conn_hs", "failed_hs"]
33 | title = "Handshakes over Time"
34 | 


--------------------------------------------------------------------------------
/configs/udp.toml:
--------------------------------------------------------------------------------
 1 | # Plain DNS over UDP traffic sender.
 2 | [traffic]
 3 | 
 4 | # DNS-over-UDP clients.
 5 | [traffic.UDP]
 6 | protocol = "udp"
 7 | 
 8 | 
 9 | [charts]
10 | 
11 | [charts.latency]
12 | type = "latency"
13 | 
14 | [charts.response-rate]
15 | type = "response-rate"
16 | 
17 | [charts.response-rate-rcodes]
18 | type = "response-rate"
19 | rcodes-above-pct = 0
20 | 


--------------------------------------------------------------------------------
/docs/analyzing-clients.md:
--------------------------------------------------------------------------------
 1 | # Analyzing Clients
 2 | 
 3 | When you've created a pellets file that is ready to use for DNS Shotgun replay,
 4 | you may want to verify you didn't distort the original client population. There
 5 | is a tool that can be used to compare client distribution and activity between
 6 | the original traffic capture and the pellets file.
 7 | 
 8 | !!! note
 9 |     This steps is optional and may not be neccessary for larger client
10 |     populations or for client populations with similar behaviour. Nevertheless,
11 |     it's better to check your assumptions.
12 | 
13 | First, you need to run client analysis script for both the original capture (or
14 | rather the `filtered.pcap` file) and the processed pellets file.
15 | 
16 | ```
17 | $ pcap/count-packets-per-ip.lua -r filtered.pcap --csv filtered.csv
18 | $ pcap/count-packets-per-ip.lua -r pellets.pcap --csv pellets.csv
19 | ```
20 | 
21 | Then, you can use another tool to plot a chart of these results.
22 | 
23 | ```
24 | $ tools/plot-client-distribution.py -o clients.png filtered.csv pellets.csv
25 | ```
26 | 
27 | ## Client distribution chart
28 | 
29 | The following charts demonstrates how queries are distributed among clients. It
30 | can be used to read how active are your clients or how many overall queries
31 | your resolver receives from which clients.
32 | 
33 | !!! warning
34 |     The following chart displays absolute number of queries, not QPS. When
35 |     comparing multiple distributions, always make sure to use PCAPs of the same
36 |     duration.
37 | 
38 | ![Client distribution chart](img/clients.png)
39 | 
40 | There are several blobs on the chart that represent groups of clients. The area
41 | of the blob visually signifies the total amount of queries that were received
42 | from these clients.
43 | 
44 | For each blob, you can locate its center and read the X and Y axes values.
45 | Please note that both axis are logarithmic. On the Y-axis you can read the mean
46 | number of queries that a client represented in the blob has sent. On the
47 | X-axis, you can read the percentage of clients that are represented by this
48 | blob.
49 | 
50 | In the example above, the first blob from the left shows that almost 80 % of
51 | clients send less than 10 queries. Around 20 % of clients send between 10 and
52 | 100 queries. Even though the remaining clients represent around 1 % of the
53 | total client population, we can see that these clients generate significant
54 | query traffic.
55 | 
56 | The comparison shows the two samples are quite similar. In case these
57 | differences are significant, you may want to consider changes to pellets files.
58 | If you used `pcap/limit-clients.lua` to generate these, using a different
59 | `-s/--seed` might help.
60 | 


--------------------------------------------------------------------------------
/docs/capturing-traffic.md:
--------------------------------------------------------------------------------
 1 | # Capturing Traffic
 2 | 
 3 | When replaying traffic using DNS Shotgun, you need to provide it with a PCAP
 4 | that contains extracted client data, or "*pellets*". You may not use an
 5 | arbitrary PCAP file. Instead, you must pre-process the raw PCAP capture into
 6 | pellets as described in the following sections.
 7 | 
 8 | !!! note
 9 |     DNS Shotgun's measurements are only as good as the data you feed it.
10 |     Quality of input data that most accurately represents your clients is
11 |     crucial for realistic benchmarking. Results can vary greatly for different
12 |     client populations.
13 | 
14 | ## Raw capture assumptions
15 | 
16 | To start, you need a traffic capture from your network to work with. It only
17 | needs to contain UDP DNS queries from clients towards your resolver. Other
18 | traffic may be present as well, but it will be filtered out.
19 | 
20 | ### Packets must be sorted by increasing timestamp
21 | 
22 | Some network or hardware conditions may cause the packets to appear in
23 | different order. To ensure correct order, use the `reodercap` command from
24 | tshark/wireshark.
25 | 
26 | ```
27 | $ reordercap raw.pcap ordered.pcap
28 | ```
29 | 
30 | ### Unique IP means unique client
31 | 
32 | Client needs to be somehow identified in the captured traffic. We decided to
33 | use IP address to tell clients apart. This should be a reasonable assumption,
34 | unless your clients are behind NAT.
35 | 
36 | !!! warning
37 |     If your real clients are behind NAT, this has major consequences and should
38 |     be acounted for, since multiple real clients will be bundled in a single
39 |     simulated one.
40 | 
41 | ### Only UDP packets are used
42 | 
43 | If large number of your clients already use DoT, DoH or TCP, you need to
44 | somehow get their queries into plain UDP format. For example, Knot Resolver can
45 | [mirror](https://knot-resolver.readthedocs.io/en/v5.2.1/modules-policy.html#policy.MIRROR)
46 | incoming queries to UDP.
47 | 
48 | ## Filtering DNS queries
49 | 
50 | In this step, UDP DNS queries from clients are extracted from the raw PCAP. If
51 | the raw capture includes queries from resolver to upstream servers, it is
52 | _crucial_ to provide the script with resolver IP address(es) to filter out
53 | outgoing queries.
54 | 
55 | ```
56 | $ pcap/filter-dnsq.lua -r ordered.pcap -w filtered.pcap -a $RESOLVER_IP
57 | ```
58 | 
59 | !!! tip
60 |     You may also use this script to work with traffic directly captured from
61 |     interface chosen with `-i`. See `--help` for usage.
62 | 


--------------------------------------------------------------------------------
/docs/configuration-file.md:
--------------------------------------------------------------------------------
  1 | # Configuration File
  2 | 
  3 | !!! tip
  4 |     You can find configuration files for presets in
  5 |     [`config/`](https://gitlab.nic.cz/knot/shotgun/-/tree/master/config).  They
  6 |     are an excellent starting point to create your own configurations.
  7 | 
  8 | Configuration is written in [TOML](https://toml.io/en/). There are multiple sections that may have additional subsections.
  9 | 
 10 | - `[traffic]` contains one or more subsections that each define client behaviour, including protocol
 11 | - `[charts]` is an optional section which can contain subsections that define charts that should be automatically plotted
 12 | - `[defaults.traffic]` is an optional section that makes it possible specify defaults shared by all traffic senders
 13 | - `[input]` is an optional section that specifies how to read input data
 14 | 
 15 | ## [traffic] section
 16 | 
 17 | You can define one or more traffic senders with specific client behaviour. Every traffic sender has a name and may have multiple parameters. At the very least, each traffic sender must define `protocol`.
 18 | 
 19 | This is an example of minimal configuration file sending all traffic as DNS-over-TLS using defaults for everything. The name of the traffic sender here is "DoT".
 20 | 
 21 | ```
 22 | [traffic]
 23 | [traffic.DoT]
 24 | protocol = "dot"
 25 | ```
 26 | 
 27 | The following configuration parameters for traffic senders are supported.
 28 | 
 29 | ### protocol
 30 | 
 31 | - `udp`: DNS over UDP
 32 | - `tcp`: DNS over TCP
 33 | - `dot`: DNS over TLS over TCP
 34 | - `doh`: DNS over HTTP/2 over TLS over TCP
 35 | - `doq`: DNS over QUIC
 36 | 
 37 | ### weight
 38 | 
 39 | When multiple traffic senders are defined, weight affects the client
 40 | distribution between them.  Weight is relative to the sum of all weights.
 41 | 
 42 | Integer or float. Defaults to 1.
 43 | 
 44 | 
 45 | ### idle_time_s
 46 | 
 47 | Determines whether clients keep the connection in idle state, i.e. leaving it
 48 | established after they have received all answers and currently have no more
 49 | queries to send.  Idle time of 0 means the client will close the connection as
 50 | soon as possible.
 51 | 
 52 | Integer. Defaults to 10 seconds.
 53 | 
 54 | ### gnutls_priority
 55 | 
 56 | [GnuTLS priority string](https://gnutls.org/manual/html_node/Priority-Strings.html)
 57 | which can be used to select TLS protocol version and features, for example:
 58 | 
 59 | ```
 60 | gnutls_priority = "dnssim-default:%NO_TICKETS"  # don't use TLS Session Resumption
 61 | gnutls_priority = "dnssim-default:-VERS-ALL:+VERS-TLS1.3"  # only use TLS 1.3
 62 | ```
 63 | 
 64 | String.
 65 | 
 66 | A non-standard `dnssim-default` (case-sensitive) keyword is allowed to be at the
 67 | beginning of the priority string, optionally with additional keywords separated
 68 | by colons (`:`). For conventional TLS over TCP connections, this gets replaced
 69 | by `NORMAL`, which lets the system's GnuTLS library determine the default
 70 | settings. For QUIC, this always sets the minimum TLS version to 1.3 as dictated
 71 | by [section 4.2 of RFC 9001](https://www.rfc-editor.org/rfc/rfc9001.html#section-4.2)
 72 | and disables some of the less secure ciphers (settings taken from
 73 | [Knot DNS](https://gitlab.nic.cz/knot/knot-dns/-/blob/v3.3.2/src/libknot/quic/quic.c#L50)).
 74 | 
 75 | !!! warning
 76 |     When `%NO_TICKETS` is set here for DNS-over-QUIC transport, `zero_rtt`
 77 |     needs to be **disabled**. Otherwise, DNS Shotgun may misbehave.
 78 | 
 79 | 
 80 | ### http_method
 81 | 
 82 | - `GET`
 83 | - `POST`
 84 | 
 85 | ### timeout_s
 86 | 
 87 | Individual query timeout in seconds.
 88 | 
 89 | Integer. Defaults to 2 seconds.
 90 | 
 91 | !!! warning
 92 |     Increasing the query timeout can negatively impact DNS Shotgun's
 93 |     performance and is not recommended.
 94 | 
 95 | ### handshake_timeout_s
 96 | 
 97 | Timeout for establishing a connection in seconds.
 98 | 
 99 | Integer. Defaults to 5 seconds.
100 | 
101 | ### zero_rtt
102 | 
103 | Whether 0-RTT (early data) is enabled. Only applicable to DNS-over-QUIC.
104 | 
105 | Boolean. Defaults to `true`.
106 | 
107 | ### Advanced settings
108 | 
109 | You shouldn't use these unless you need to.
110 | 
111 | - `cpu_factor`: override the default CPU thread distribution (UDP: 1, TCP:2, DoT/DoH: 3)
112 | - `max_clients`: number of clients each dnssim instance can hold (per-thread settings)
113 | - `channel_size`: number of queries that can be buffered before thread starts to block
114 | - `batch_size`: number of queries processed in each loop
115 | 
116 | ### CLI overrides
117 | 
118 | The following options can be used to override the CLI options for `replay.py`.
119 | Values in configuration file always take precedence before CLI options.
120 | 
121 | - `server`: target server's IPv4/IPv6 address
122 | - `dns_port`: target server's port for plain DNS (UDP and TCP)
123 | - `dot_port`: target server's port for DNS-over-TLS
124 | - `doh_port`: target server's port for DNS-over-HTTPS
125 | 
126 | ## [charts] section
127 | 
128 | This section is optional and is only provided as a convenience to automate
129 | plotting charts after the test. Anything defined in this section can be
130 | achieved by using the plotting scripts directly.
131 | 
132 | Similarly to the `[traffic]` section, it also contains named subsections. Every
133 | such subsection must contain `type` which determines the charts that should be
134 | plotted. For example:
135 | 
136 | ```
137 | [charts]
138 | [charts.response-rate]
139 | type = "response-rate"
140 | ```
141 | 
142 | ### type
143 | 
144 | Type determines which chart will be plotted. The following charts are supported:
145 | 
146 | - `response-rate`: [Response Rate Chart](response-rate-chart.md)
147 | - `latency`: [Latency Histogram](latency-histogram.md)
148 | - `connections`: [Connection Chart](connection-chart.md)
149 | 
150 | ### title
151 | 
152 | Title of the chart.
153 | 
154 | ### output
155 | 
156 | Output filename for the chart. Various file extensions can be used. Defaults to using svg.
157 | 
158 | ### Other parameters
159 | 
160 | These depend on the specific chart type. Generally, any option that can be
161 | passed directly to the plotting scripts can also be specified in the config.
162 | Refer to the tools `--help` for possible options.
163 | 
164 | ## [defaults] section
165 | 
166 | ### [defaults.traffic] section
167 | 
168 | This section can provide defaults for all traffic senders. If a specific
169 | traffic sender re-defines the same parameter, the traffic sender-specific value
170 | takes precedence before the default value.
171 | 
172 | Any parameter that can be specified for traffic senders in `[traffic]` section
173 | can also be specified in this section. For example, to override the default
174 | behavior to not use TLS Session Resumption, you can use:
175 | 
176 | ```
177 | [defaults]
178 | [defaults.traffic]
179 | gnutls_priority = "dnssim-default:%NO_TICKETS"
180 | ```
181 | 
182 | ## [input] section
183 | 
184 | Optionally specifies how to read input data.
185 | 
186 | ```
187 | [input]
188 | pcap = /path/to/input.pcap
189 | stop_after_s = 600
190 | ```
191 | 
192 | ### pcap
193 | 
194 | Path to PCAP file, overrides value specified by `--read` command line option.
195 | Intended as shortcut when re-running test with the same dataset again and again.
196 | 
197 | ### stop_after_s
198 | 
199 | Time limit for test, in seconds (integer).
200 | Reading queries from PCAP will stop at first packet with timestamp >= `stop_after_s`.
201 | 
202 | Defaults to no limit, i.e. read all packets from PCAP.
203 | 
204 | !!! warning
205 |     Using the `stop_after_s` option negatively impacts DNS Shotgun's read
206 |     performance and slows down PCAP processing by 50 %. If this performance
207 |     penalty is unacceptable, cut the PCAP using external tools and avoid using
208 |     this option.
209 | 


--------------------------------------------------------------------------------
/docs/configuration-presets.md:
--------------------------------------------------------------------------------
 1 | # Configuration Presets
 2 | 
 3 | You can either use a configuration preset or create your own configuration. It
 4 | is possible to replay the original traffic over various different protocols
 5 | with different client behaviours simultaneously. For example, you can split
 6 | your traffic into 60 % UDP, 20 % DoT and 20 % DoH.
 7 | 
 8 | There are the following predefined use-cases for simplicity of use without the
 9 | need to create a configuration file. You can pass these values instead of
10 | filepath to `-c/--config` option of `replay.py` utility.
11 | 
12 | - `udp`
13 |     - 100 % DNS-over-UDP clients
14 | - `tcp`
15 |     - 100 % well-behaved DNS-over-TCP clients
16 | - `dot`
17 |     - 100 % well-behaved DNS-over-TLS clients using TLS Session Resumption
18 | - `doh`
19 |     - 50 % well-behaved DNS-over-HTTPS GET clients using TLS Session Resumption
20 |     - 50 % well-behaved DNS-over-HTTPS POST clients using TLS Session Resumption
21 | - `doq`
22 |     - 100 % well-behaved DNS-over-QUIC clients
23 | - `mixed`
24 |     - 60 % DNS-over-UDP clients
25 |     - 5 % well-behaved DNS-over-TCP clients
26 |     - 5 % aggressive DNS-over-TCP clients
27 |     - 10 % well-behaved DNS-over-TLS clients using TLS Session Resumption
28 |     - 5 % well-behaved DNS-over-TLS clients without TLS Session Resumption
29 |     - 10 % well-behaved DNS-over-HTTPS GET clients using TLS Session Resumption
30 |     - 5 % well-behaved DNS-over-HTTPS POST clients using TLS Session Resumption
31 | 
32 | !!! note
33 |     You can find configuration files for presets in
34 |     [`config/`](https://gitlab.nic.cz/knot/shotgun/-/tree/master/config).  They
35 |     are an excellent starting point to create your own configurations.
36 | 


--------------------------------------------------------------------------------
/docs/connection-chart.md:
--------------------------------------------------------------------------------
 1 | # Connection Chart
 2 | 
 3 | The connection chart can be used to visualize connection-related information,
 4 | such as the number of active established connections, handshake attempts,
 5 | successful TLS Session Resumptions or failed handshakes.
 6 | 
 7 | ```
 8 | $ tools/plot-connections.py -k active -- DoT.json
 9 | $ tools/plot-connections.py -k conn_hs tls_resumed failed_hs -t "Handshakes over Time" DoT.json
10 | ```
11 | 
12 | The optional parameter `-k/--kind` can be used to select which data should be
13 | plotted. The following values are supported.
14 | 
15 | - `active` means the number of currently active established connections
16 | - `conn_hs` means the number of TCP or QUIC handshake attempts in the last second
17 | - `failed_hs` means the number of failed handshakes. All kinds of connection
18 |   setup failures will be included, whether it's TCP handshake timeout, TLS
19 |   negotiation failure or anything else.
20 | - `tls_resumed` means the number of connection that were resumed with TLS
21 |   Session Resumption during the last second
22 | 
23 | !!! tip
24 |     Using the `--` to separate a list of JSON files after specifying
25 |     `-k/--kind` might be needed in some cases.
26 | 
27 | ![connections](img/connections.png)
28 | ![handshakes](img/handshakes.png)
29 | 


--------------------------------------------------------------------------------
/docs/extracting-clients.md:
--------------------------------------------------------------------------------
 1 | # Extracting Clients
 2 | 
 3 | Once you have the `filtered.pcap` with DNS queries from clients, you can
 4 | process them into *pellets* - the pre-processed input files for DNS Shotgun.
 5 | All the content of these files will be used during the replay stage - all
 6 | clients for the entire duration of the file.
 7 | 
 8 | The following example takes the entire `filtered.pcap` and transforms it into
 9 | pellets. The pellets file will contain all the clients and it will have the
10 | same duration as the original file.
11 | 
12 | ```
13 | $ pcap/extract-clients.lua -r filtered.pcap -O $OUTPUT_DIR
14 | ```
15 | 
16 | The produced pellets file is ready to be used as the input for DNS Shotgun
17 | replay.
18 | 
19 | ## Splitting original capture into multiple pellets files
20 | 
21 | It can be useful to have a long original capture file, which contains more
22 | clients and queries. However, since the pellets file will be replayed in its
23 | entirety, you may want to split the original file into multiple pellets files
24 | with shorter duration.
25 | 
26 | For example, if your initial capture file is 30 minutes long and you could
27 | split it into fifteen two minute pellets files with the `-d/--duration` option.
28 | 
29 | ```
30 | $ pcap/extract-clients.lua -r filtered.pcap -O $OUTPUT_DIR -d 120
31 | ```
32 | 
33 | !!! tip
34 |     Is it useful to keep a collection of these original pellets files of same
35 |     duration. They can be later combined to create different test cases.
36 | 
37 | ## Scaling-up the traffic
38 | 
39 | If you want to stress-test your infrastructure, you can combine these pellets
40 | files together to effectively scale-up the traffic. The pellets files are
41 | created in a way that you can simply use `mergecap` utility to combine them.
42 | 
43 | ```
44 | $ mergecap -w scaled.pcap $OUTPUT_DIR/*
45 | ```
46 | 
47 | !!! warning
48 |     You can only merge chunks that were created with the same duration when
49 |     calling `extract-clients.lua`. Modifying the chunks in other ways, such as
50 |     attempting to shift or extend the traffic, will produce unexpected results.
51 |     For more information, see [this
52 |     discussion](https://gitlab.nic.cz/knot/shotgun/-/merge_requests/32#note_196879).
53 | 
54 | ## Limiting the traffic
55 | 
56 | It is also possible to take a pellets file and scale-down its traffic. This is
57 | done on a per-client basis. Either client's entire query stream will be
58 | present, or the client won't be present at all.
59 | 
60 | To limit the overall traffic, you can select the portion of the clients that
61 | should be included. This can range from 0 to 1. For example, let's suppose we
62 | want to scale-down the number of clients in the pellets file to 30 %.
63 | 
64 | ```
65 | $ pcap/limit-clients.lua -r pellets.pcap -w limited.pcap -l 0.3
66 | ```
67 | 
68 | An alternative approach is to split an original large pellet to multiple small
69 | pellets using `split-clients.lua` script. To split one big pellet into 10
70 | smaller ones use:
71 | 
72 | ```
73 | $ pcap/split-clients.lua -r pellets.pcap -O output_directory -n 10
74 | ```
75 | 
76 | Every client will be assigned to a one output file. All of client's packets
77 | remain intact and go into a single file.
78 | 


--------------------------------------------------------------------------------
/docs/img/clients.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CZ-NIC/shotgun/3e05bf46997ed5b6fa1b96c9690b8ee807c3df71/docs/img/clients.png


--------------------------------------------------------------------------------
/docs/img/connections.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CZ-NIC/shotgun/3e05bf46997ed5b6fa1b96c9690b8ee807c3df71/docs/img/connections.png


--------------------------------------------------------------------------------
/docs/img/handshakes.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CZ-NIC/shotgun/3e05bf46997ed5b6fa1b96c9690b8ee807c3df71/docs/img/handshakes.png


--------------------------------------------------------------------------------
/docs/img/latency.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CZ-NIC/shotgun/3e05bf46997ed5b6fa1b96c9690b8ee807c3df71/docs/img/latency.png


--------------------------------------------------------------------------------
/docs/img/response-rate.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CZ-NIC/shotgun/3e05bf46997ed5b6fa1b96c9690b8ee807c3df71/docs/img/response-rate.png


--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
 1 | # DNS Shotgun
 2 | 
 3 | Realistic DNS benchmarking tool which supports multiple transport protocols:
 4 | 
 5 |   - **DNS-over-QUIC (DoQ)**
 6 |   - **DNS-over-TLS (DoT)**
 7 |   - **DNS-over-HTTPS (DoH)**
 8 |   - UDP
 9 |   - TCP
10 | 
11 | *DNS Shotgun is capable of simulating hundreds of thousands of DoQ/DoT/DoH
12 | clients.*
13 | 
14 | Every client establishes its own connection(s) when communicating over TCP-based
15 | protocols. This makes the tool uniquely suited for realistic DoT/DoH benchmarks
16 | since its traffic patterns are very similar to real clients.
17 | 
18 | Similarly, each client establishes its own connection(s) when communicating over
19 | QUIC, utilizing its capability of sending/receiving queries over a single secure
20 | connection, but with multiple mutually independent streams.
21 | 
22 | DNS Shotgun exports a number of statistics, such as query latencies, number of
23 | handshakes and connections, response rate, response codes etc. in JSON format.
24 | The toolchain also provides scripts that can plot these into readable charts.
25 | 
26 | ## Features
27 | 
28 | - Supports DNS over UDP, TCP, TLS, HTTP/2, and QUIC
29 | - Allows mixed-protocol simultaneous benchmark/testing
30 | - Can bind to multiple source IP addresses
31 | - Customizable client behaviour (idle time, TLS versions, HTTP method, ...)
32 | - Replays captured queries over selected protocol(s) while keeping original timing
33 | - Suitable for high-performance realistic benchmarks
34 | - Tools to plot charts from output data to evaluate results
35 | 
36 | ## Caveats
37 | 
38 | - Requires captured traffic from clients
39 | - Setup for proper benchmarks can be quite complex
40 | - Isn't suitable for testing with very low number of clients/queries
41 | - Backward compatibility between versions isn't kept
42 | 
43 | ## Code Repository
44 | 
45 | [https://gitlab.nic.cz/knot/shotgun](https://gitlab.nic.cz/knot/shotgun)
46 | 


--------------------------------------------------------------------------------
/docs/installation.md:
--------------------------------------------------------------------------------
 1 | # Installation
 2 | 
 3 | There are two options for using DNS Shotgun. You can either use a pre-built
 4 | docker image, or install the dependencies, compile the dnssim module and use
 5 | the scripts from the repository directly.
 6 | 
 7 | ## Docker Image
 8 | 
 9 | Pre-built image can be obtained from [CZ.NIC DNS Shotgun
10 | Registry](https://gitlab.nic.cz/knot/shotgun/container_registry/65).
11 | 
12 | ```bash
13 | $ docker pull registry.nic.cz/knot/shotgun:v20240219
14 | ```
15 | 
16 | Alternately, you can build the image yourself from Dockerfile in the repository.
17 | 
18 | ### Docker Usage
19 | 
20 | - Make sure to run with `--network host`.
21 | - Mount input/output directories and files with `-v/--volume`.
22 | - Using `--privileged` might slightly improve performance if you don't mind the security risk.
23 | 
24 | ```bash
25 | $ docker run \
26 |     --network host \
27 |     -v "$PWD:/mnt" \
28 |     registry.nic.cz/knot/shotgun:v20240219 \
29 |     $COMMAND
30 | ```
31 | 
32 | ## Using scripts from sources
33 | 
34 | You can use the toolchain scripts directly from the git repository. You need to
35 | ensure you have the required dependencies installed and the compile and install
36 | the dnssim module. Also make sure to check out some tagged version, as the
37 | development happens in master branch.
38 | 
39 | ```bash
40 | $ git clone https://gitlab.nic.cz/knot/shotgun.git
41 | $ cd shotgun
42 | $ git checkout v20240219
43 | $ git submodule update --init --recursive
44 | $ cd replay/dnssim
45 | $ mkdir build && cd build
46 | $ cmake ..
47 | $ cmake --build .
48 | $ sudo cmake --install .
49 | ```
50 | 
51 | ### Dependencies
52 | 
53 | When using the scripts directly, the following dependencies are needed.
54 | 
55 | If you only wish to process shotgun JSON output (e.g. plot charts), then dnsjit
56 | and compiling the dnssim module isn't required.
57 | 
58 | - [dnsjit 1.3+](https://github.com/DNS-OARC/dnsjit): Can be installed from [DNS-OARC
59 |   repositories](https://dev.dns-oarc.net/packages/).
60 | - libuv
61 | - libnghttp2
62 | 
63 | - Python 3.6 or later
64 | - Python dependencies from [requirements.txt](https://gitlab.nic.cz/knot/shotgun/-/blob/master/requirements.txt)
65 | - (optional) tshark/wireshark for some PCAP pre-processing
66 | 
67 | ## Documentation
68 | 
69 | **The latest documentation can be found at
70 | <https://dns-shotgun.readthedocs.io/>** &mdash; chances are that is what you are
71 | looking at right now. The documentation is also available in human-readable
72 | Markdown files in the source tree's `docs` subdirectory.
73 | 
74 | You may wish to edit the documentation locally and preview those local changes.
75 | To do that, [install MkDocs](https://www.mkdocs.org/user-guide/installation/),
76 | then, in the sources directory, run:
77 | 
78 | ```bash
79 | $ mkdocs build
80 | ```
81 | 
82 | This will create a new `site` directory containing the Shotgun documentation in
83 | HTML format.
84 | 
85 | For testing the locally built documentation with live-rebuild, MkDocs' built-in
86 | development server may be used like so:
87 | 
88 | ```bash
89 | $ mkdocs serve
90 | ```
91 | 


--------------------------------------------------------------------------------
/docs/key-concepts.md:
--------------------------------------------------------------------------------
 1 | # Key Concepts
 2 | 
 3 | DNS Shotgun is capable of simulating real client behaviour by replaying
 4 | captured traffic over selected protocol(s). The timing of original queries as
 5 | well as their content is kept intact.
 6 | 
 7 | Realistic high-performance benchmarking requires complex setup, especially for
 8 | TCP-based protocols. However, the authors of this tool have successfully used it
 9 | to benchmark and test various DNS implementations with up to hundreds of
10 | thousands of clients (meaning _connections_ for TCP-based transports) using
11 | commodity hardware. This requires [performance tuning](performance-tuning.md)
12 | that is described in later section.
13 | 
14 | ## Client
15 | 
16 | These docs often mention "*client*" and we often use it to describe DNS
17 | infrastructure throughput in addition to queries per second (QPS). What is a
18 | considered a client and why does it matter?
19 | 
20 | A client is the origin of one or more queries and it is supposed to represent a
21 | single device, i.e. anything from a CPE such as home/office router to a mobile
22 | device. Since traffic patterns of various devices can vary greatly, it is
23 | crucial to use traffic that most accurately represents your real clients.
24 | 
25 | In plain DNS sent over UDP the concept of client doesn't matter, since UDP is a
26 | stateless protocol and a packet is just a packet. Thus, QPS throughput may be
27 | sufficient metric for UDP.
28 | 
29 | In stateful DNS protocols, such as DoT, DoH or TCP, much of the overhead and
30 | performance cost is caused by establishing the connection over which queries
31 | are subsequently sent. Therefore, the concept of client becomes crucial for
32 | benchmarking stateful protocols.
33 | 
34 | !!! note
35 |     As an extreme example, consider 10k QPS sent over a single DoH connection
36 |     versus establishing a 10k DoH connections, each with 1 QPS. While both
37 |     scenarios have the same overall QPS, the second one will consume vastly more
38 |     resources, especially when establishing the connections.
39 | 
40 | ### Client replay guarantees
41 | 
42 | DNS Shotgun aims to provide the most realistic client behaviour when replaying
43 | the traffic. When you run DNS Shotgun, there are the following guarantees when
44 | using a stateful protocol.
45 | 
46 | - **Multiple clients never share a single connection.**
47 | - **Each client attempts to establish at least one connection.**
48 | - **A client may have zero, one or more (rarely) active established connections
49 |   at any time**, depending on its traffic and behavior.
50 | 
51 | ## Real traffic
52 | 
53 | A key focus of this toolchain is to make the benchmarks as realistic as
54 | possible. Therefore, no synthetic queries or clients are generated. To
55 | effectively use this tool, you need to have large amount of source PCAPs.
56 | Ideally, these contain the traffic from your own network.
57 | 
58 | !!! note
59 |     In case you'd prefer to use synthetic client/queries anyway, you can just
60 |     generate the traffic and capture it in PCAP for further processing. Doing that
61 |     is outside of the scope of this documentation.
62 | 
63 | ### Traffic replay guarantees
64 | 
65 | - **Content of DNS messages is left intact.** Messages without proper DNS header
66 |   or question section will be discarded.
67 | - **Timing of the DNS messages is kept as close to the original traffic as
68 |   possible.** If the tool detects time skew larger than one second, it aborts the
69 |   test. However, real time difference may be slightly longer due to various
70 |   buffers.
71 | 


--------------------------------------------------------------------------------
/docs/latency-histogram.md:
--------------------------------------------------------------------------------
 1 | # Latency Histogram
 2 | 
 3 | This very useful chart is a bit difficult to read and understand, but it
 4 | provides a great deal of information about the overall latency from client side
 5 | perspective. We use the logarithmic percentile histogram to display this data.
 6 | [This
 7 | article](https://blog.powerdns.com/2017/11/02/dns-performance-metrics-the-logarithmic-percentile-histogram/)
 8 | provides an in-depth explanation about the chart and how to interpret it.
 9 | 
10 | ```
11 | $ tools/plot-latency.py -t "DNS Latency Overhead" UDP.json TCP.json DoT.json DoH.json
12 | ```
13 | 
14 | ![latency overhead](img/latency.png)
15 | 
16 | The chart above illustrates why comparing just the response rate isn't a
17 | sufficient metric. For all protocols compared in this case, you'd get around
18 | 99.5 % response rate. However, when you examine the client latency, you can see
19 | clear differences.
20 | 
21 | In the chart, 80 % of all queries are represented by the rightmost part of the
22 | chart - between the "slowest percentile" of 20 % and 100 %. For these
23 | queries, the latency for UDP, TCP, DoT or DoH is the same, which is one
24 | round trip. These represent immediate answers from the resolver (e.g. cached or
25 | refused), which are sent either over UDP or over an already established
26 | connection (for stateful protocols). The latency is 10 ms, or 1 RTT.
27 | 
28 | The most interesting part is between the 5 % and 20 % slowest percentile. For
29 | these 15 % of all queries, there are major differences between the latency of
30 | UDP, TCP and DoT/DoH. This illustrates the latency cost of setting up a
31 | connection where none is present. UDP is stateless and requires just 1 RTT. TCP
32 | requires an extra round trip to establish the connection and the latency for the
33 | client becomes 2 RTTs. Finally, both DoT and DoH require an additional round
34 | trip for the TLS handshake and thus the overall latency cost becomes 3 RTTs.
35 | 
36 | The trailing 5 % of queries show no difference between protocols, since these
37 | are queries that aren't answered from cache and the delay is introduced by the
38 | communication between the resolver and the upstream servers. The last 0.5 % of
39 | queries aren't answered by the resolver within 2 seconds and are considered a
40 | timeout by the client.
41 | 


--------------------------------------------------------------------------------
/docs/performance-tuning.md:
--------------------------------------------------------------------------------
 1 | # Performance Tuning
 2 | 
 3 | Any high-performance benchmark setup requires separate server for generating
 4 | traffic which then sends the traffic to the target server under test.  In order
 5 | to scale-up DNS Shotgun to be able to perform well under heavy load, some
 6 | performance tuning and network adjustments are needed.
 7 | 
 8 | !!! tip
 9 |     An example of performance tuning we use in our benchmarks can be found in
10 |     our [ansible
11 |     role](https://gitlab.nic.cz/knot/resolver-benchmarking/-/tree/master/roles/tuning).
12 | 
13 | ## Number of file descriptors
14 | 
15 | Make sure the number of available file descriptors is sufficient. It's
16 | typically necessary when running DNS Shotgun from terminal. When using docker,
17 | the defaults are usually sufficient.
18 | 
19 | ```
20 | $ ulimit -n 1000000
21 | ```
22 | 
23 | ## Ephemeral port range
24 | 
25 | Extending the ephemeral port range gives the tool more outgoing ports to work with.
26 | 
27 | ```
28 | $ sysctl -w net.ipv4.ip_local_port_range="1025 60999"
29 | ```
30 | 
31 | ## NIC queues
32 | 
33 | High-end network cards typically has multiple queues. Ideally, you want to set
34 | their number to be the same as number of available CPUs.
35 | 
36 | ```
37 | $ ethtool -L $INTERFACE combined $NCPU
38 | ```
39 | 
40 | !!! note
41 |     It's important that the NIC interrupts from different queues are handled
42 |     by different CPUs. If there are throughput issues, you may want to verify
43 |     this is the case.
44 | 
45 | ## UDP
46 | 
47 | DNS Shotgun can generate quite bursty traffic. Increasing the receiving
48 | server's socket memory can help to prevent that. If this buffer isn't
49 | sufficient, it can cause packet loss.
50 | 
51 | ```
52 | $ sysctl -w net.core.rmem_default="8192000"
53 | ```
54 | 
55 | ## TCP, DoT, DoH
56 | 
57 | Tuning the network stack for TCP isn't as straightforward and it's network-card
58 | specific. It's best to refer to [kernel
59 | documentation](https://www.kernel.org/doc/html/latest/networking/device_drivers/ethernet/intel/ixgb.html#improving-performance)
60 | for your specific network card.
61 | 
62 | ## conntrack
63 | 
64 | For our benchmarks, we don't use iptables or any firewall. Especially the
65 | `conntrack` module probably won't be able to handle serious load. Make sure the
66 | conntrack module isn't loaded by kernel if you're not using it.
67 | 


--------------------------------------------------------------------------------
/docs/raw-output.md:
--------------------------------------------------------------------------------
 1 | # Raw Output
 2 | 
 3 | In the output directory of DNS Shotgun's `replay.py` tool, the following
 4 | structure is created. Let's assume we ran a configuration that configure two
 5 | traffic senders - `DoT` and `DoH`.
 6 | 
 7 | ```
 8 | $OUTDIR
 9 | ├── .config               # ignore this directory
10 | │   └── luaconfig.lua     # for debugging purposes only
11 | ├── data                  # directory with raw JSON output
12 | │   ├── DoH               # "DoH" traffic sender data
13 | │   │   ├── DoH-01.json   # raw data from first thread of DoH traffic sender
14 | │   │   ├── DoH-02.json   # raw data from second thread of DoH traffic sender
15 | │   │   └── ...           # raw data from other threads of DoH traffic sender
16 | │   ├── DoH.json          # merged raw data from all DoH sender threads
17 | │   ├── DoT               # "DoT" traffic sender data
18 | │   │   ├── DoT-01.json   # raw data from first thread of DoT traffic sender
19 | │   │   ├── DoT-02.json   # raw data from second thread of DoT traffic sender
20 | │   │   └── ...           # raw data from other threads of DoT traffic sender
21 | │   └── DoT.json          # merged raw data from all DoT sender threads
22 | └── charts                # directory with automatically plotted charts (if configured)
23 |     ├── latency.svg       # chart comparing latency of DoT and DoH clients
24 |     └── response-rate.svg # chart comparing the response rate of DoT and DoH clients
25 | ```
26 | 
27 | ## data directory
28 | 
29 | This directory contains the raw JSON data. Since DNS Shotgun typically operates
30 | with multiple threads, the results for each traffic sender are also provided
31 | per each thread. However, since you typically don't care about the clients were
32 | emulated, but only about their aggregate behaviour, a data file that contains
33 | the combined results of all threads belonging to the configured traffic sender
34 | is also provided.
35 | 
36 | Every configured traffic sender will have its own output directory of the same
37 | name. Inside, per-thread raw data are available. The aggregate file is directly
38 | in the `data/` directory as JSON file with the name of the configured traffic
39 | sender. The aggregate file is the one you typically want to use.
40 | 
41 | !!! note
42 |     The raw JSON file is versioned and is not intended to be forward or
43 |     backward compatible with various DNS Shotgun versions. You should use the
44 |     same version of the toolchain for both replay and interpreting the data.
45 | 
46 | !!! tip
47 |     If you wish to explore, format or interpret the raw JSON data,
48 |     [jq](https://stedolan.github.io/jq/) utility can be useful for some
49 |     rudimentary processing.
50 | 
51 | ## charts directory
52 | 
53 | This directory may not be present if you didn't configure any charts to be
54 | automatically plotted in the configuration file. If it is available, it
55 | contains the plotted charts that are described in the following sections.
56 | 
57 | When charts are plotted automatically, they always display data for all the
58 | configure traffic senders with their predefined names. If you wish to customize
59 | it, omit certain senders etc., you can use the plotting scripts
60 | directly from CLI. These can be found in the `tools/` directory and you can
61 | refer to their `--help` for usage.
62 | 


--------------------------------------------------------------------------------
/docs/replaying-traffic.md:
--------------------------------------------------------------------------------
 1 | # Replaying Traffic
 2 | 
 3 | Once you've prepared the input pellets file with clients and either have you
 4 | own configuration file or know which present you want to use, you can the the
 5 | following scripts to run DNS Shotgun.
 6 | 
 7 | ```
 8 | $ replay.py -r pellets.pcap -c udp -s ::1
 9 | ```
10 | 
11 | !!! tip
12 |     Use the `--help` option to explore other options.
13 | 
14 | During the replay, there is quite a bit of logging information that look like
15 | this.
16 | 
17 | ```
18 | UDP-01 notice: total processed:       267; answers:         0; discarded:         2; ongoing:       172
19 | ```
20 | 
21 | The important thing to look out for is the number of `discarded` packets. In
22 | case nearly all the packets are discarded or a large portion of them, it almost
23 | certainly indicates some improper setup or input data. The test should be
24 | aborted and the reason should be investigated. Increasing the `-v/--verbosity`
25 | level might help.
26 | 
27 | ## Binding to multiple source addresses
28 | 
29 | When sending traffic against a single IP/port combination of the target server,
30 | the source IP address has a limited number of ports it can utilize.  A single
31 | IP address is insufficient to achieve hundreds of thousands of clients.
32 | 
33 | 
34 | DNS Shotgun can bind to multiple sources addresses with the `-b/--bind-net`
35 | option. You can specify either IP address or a newtork range using CIDR
36 | notation. Multiple values (either IPs, ranges or any combination of those) can
37 | be specified. When using CIDR notation, the network and broadcast address won't
38 | be used.
39 | 
40 | 
41 | ```
42 | $ replay.py -r pellets.pcap -c tcp -s fd00:dead:beef::cafe -b fd00:dead:beef::/124
43 | ```
44 | 
45 | !!! tip
46 |     Our rule of thumb is to use at least one source IP address per every 30k
47 |     clients.  However, using more addresses is certainly better and can help to
48 |     avoid weird behaviour, slow performance and other issues that require
49 |     in-depth troubleshooting.
50 | 
51 | !!! note
52 |     If you're limited by the number of source addresses you can use, utilizing
53 |     either IPv6 unique-local addresses (fd00::/8) or private IPv4 ranges could
54 |     be helpful.
55 | 
56 | ## Emulating link latency
57 | 
58 | !!! warning
59 |     This is an advanced topic and emulating latency isn't necessary for many
60 |     scenarios.
61 | 
62 | Overall latency will affect the user's experience with DNS resolution. It also
63 | becomes much more relevant when using TCP and TLS, since the handshakes
64 | introduce additional round trips. When benchmarks are done in the data center
65 | with two servers that are directly connected to each other with practically no
66 | latency, it can provide a skewed view of the expected end user latency.
67 | 
68 | Luckily, the `netem` Network Emulator makes it very simple to emulate various
69 | network conditions. For example, emulating latency on the sender side can be
70 | done quite easily. The following command adds 10 ms latency to outgoing
71 | packets, effectively simulating RTT of 10 ms.
72 | 
73 | ```
74 | $ tc qdisc add dev $INTERFACE root netem limit 10000000 delay 10ms
75 | ```
76 | 
77 | !!! tip
78 |     For more possibilities, refer to `man netem.8`. Using a sufficiently large
79 |     buffer (limit) is essential for proper operation.
80 | 
81 | However, beware that the settings affect the entire interface. If you're going
82 | to emulate latency, it's best if the resolver-client traffic is on a separate
83 | interface, so the resolver-upstream traffic isn't negatively impacted.
84 | 


--------------------------------------------------------------------------------
/docs/response-rate-chart.md:
--------------------------------------------------------------------------------
 1 | # Response Rate Chart
 2 | 
 3 | This basic chart can display the overall response rate over time. It is also
 4 | possible to plot specific error codes, such as `NOERROR`.
 5 | 
 6 | ```
 7 | $ tools/plot-response-rate.py -r 0 -o rr.png UDP.json
 8 | ```
 9 | 
10 | !!! tip
11 |     The image format depends on the output filename extension chosen with can
12 |     `-o/--output`. `svg` is used by default, but other formats such as `png`
13 |     are supported as well.
14 | 
15 | The following chart displays the answer rate and the rate of `NOERROR` answers.
16 | In this measurement, the resolver was started with a cold cache. We can see the
17 | overall response rate is close to 100 %. The `NOERROR` response rate slightly
18 | increases over time from 72 % to around 75 % as the cache warms up.
19 | 
20 | ![UDP response rate](img/response-rate.png)
21 | 


--------------------------------------------------------------------------------
/docs/showcase/connections.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CZ-NIC/shotgun/3e05bf46997ed5b6fa1b96c9690b8ee807c3df71/docs/showcase/connections.png


--------------------------------------------------------------------------------
/docs/showcase/handshakes.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CZ-NIC/shotgun/3e05bf46997ed5b6fa1b96c9690b8ee807c3df71/docs/showcase/handshakes.png


--------------------------------------------------------------------------------
/docs/troubleshooting.md:
--------------------------------------------------------------------------------
 1 | # Troubleshooting
 2 | 
 3 | ## failed to send udp packet: too many open files
 4 | 
 5 | Increase the number of file descriptors. (See
 6 | [`man limits.conf`](https://www.man7.org/linux/man-pages/man5/limits.conf.5.html)
 7 | and/or `ulimit --help`)
 8 | 
 9 | ## fatal: aborting, real time drifted ahead of simulated time
10 | 
11 | This happens when DNS Shotgun can't keep up with the traffic it's supposed to
12 | send/receive. The tool attempts to keep realistic timing from the original data
13 | and it just aborts it if fails to keep that promise. This can have multiple
14 | causes.
15 | 
16 | - You're pushing the tool beyond the limits of what it can do, e.g.:
17 |     - Not enough computing power (are all CPUs utilized?)
18 |     - Insufficient network throughput (is network tuned properly? are there enough source IPs?)
19 |     - Unresponsive resolver and/or too high `timeout_s`
20 | - NIC interrupts aren't properly distributed among CPUs
21 | - A single thread is assigned too much traffic
22 |     - This typically shouldn't be the case, but if specific traffic sender is
23 |       *always* causing this failure, tweaking `cpu_factor` and/or number of
24 |       threads might help
25 | 
26 | ## critical: buffer capacity exceeded, threads are blocked
27 | 
28 | This is an indication that a specific thread filled up its buffer and is now
29 | causing the entire tool to slow down which will eventually cause the crash
30 | described above if it goes on for too long. If it only happens for a specific
31 | traffic sender, tweaking `cpu_factor` to change thread distribution could help.
32 | 
33 | ## various warnings
34 | 
35 | Especially under heavy load, there can occasionally be some warnings.
36 | Sometimes it's a GnuTLS connection error, a mismatched response etc. The general
37 | rule is a few different warnings during heavy load probably isn't something to
38 | be too concerned about. Typically, it's when the output is spammed by the same
39 | warning over and over that you have a problem.
40 | 


--------------------------------------------------------------------------------
/mkdocs.yml:
--------------------------------------------------------------------------------
 1 | site_name: DNS Shotgun
 2 | theme:
 3 |   name: readthedocs
 4 |   navigation_depth: 1
 5 | nav:
 6 |   - "Overview": index.md
 7 |   - installation.md
 8 |   - key-concepts.md
 9 |   - "Input Data":
10 |     - capturing-traffic.md
11 |     - extracting-clients.md
12 |     - analyzing-clients.md
13 |   - "Replay":
14 |     - configuration-presets.md
15 |     - configuration-file.md
16 |     - replaying-traffic.md
17 |     - performance-tuning.md
18 |     - troubleshooting.md
19 |   - "Interpreting Results":
20 |     - raw-output.md
21 |     - response-rate-chart.md
22 |     - latency-histogram.md
23 |     - connection-chart.md
24 | markdown_extensions:
25 |   - admonition
26 | 


--------------------------------------------------------------------------------
/pcap/count-packets-over-time.lua:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env dnsjit
  2 | 
  3 | -- count-packets-over-time.lua: count number of packets in input PCAP in time intervals
  4 | 
  5 | local input = require("dnsjit.input.pcap").new()
  6 | local layer = require("dnsjit.filter.layer").new()
  7 | local object = require("dnsjit.core.objects")
  8 | local log = require("dnsjit.core.log").new("count-packets-over-time.lua")
  9 | local getopt = require("dnsjit.lib.getopt").new({
 10 | 	{ "r", "read", "", "input file to read", "?" },
 11 | 	{ "s", "stats_period", 100, "period for printing stats (ms)", "?" },
 12 | 	{ nil, "csv", "", "path to the output CSV file (default: stdout)", "?" },
 13 | })
 14 | 
 15 | log:enable("all")
 16 | 
 17 | -- Parse arguments
 18 | local args = {}
 19 | getopt:parse()
 20 | args.read = getopt:val("r")
 21 | args.stats_period = getopt:val("s")
 22 | args.csv = getopt:val("csv")
 23 | 
 24 | -- Display help
 25 | if getopt:val("help") then
 26 | 	getopt:usage()
 27 | 	return
 28 | end
 29 | 
 30 | -- Check arguments
 31 | if args.stats_period <= 0 then
 32 | 	log:fatal("stats_period must be grater than 0")
 33 | end
 34 | 
 35 | -- Set up input
 36 | if args.read ~= "" then
 37 | 	if input:open_offline(args.read) ~= 0 then
 38 | 		log:fatal("failed to open input PCAP "..args.read)
 39 | 	end
 40 | 	log:notice("using input PCAP "..args.read)
 41 | else
 42 | 	getopt:usage()
 43 | 	log:fatal("input must be specified, use -r/-i")
 44 | end
 45 | layer:producer(input)
 46 | local produce, pctx = layer:produce()
 47 | 
 48 | -- Set up CSV
 49 | local csv_output
 50 | if args.csv ~= "" then
 51 | 	csv_output = io.open(args.csv, 'w')
 52 | 	if csv_output == nil then
 53 | 		log:fatal('failed to open "'..args.csv..'" for writing')
 54 | 	else
 55 | 		log:notice('writing output CSV to "'..args.csv..'"')
 56 | 	end
 57 | else
 58 | 	csv_output = io.stdout
 59 | end
 60 | 
 61 | local Stats = {}
 62 | local StatsCounters = {}
 63 | 
 64 | function Stats.new(stats_period_ms, output, format)
 65 | 	if stats_period_ms == nil then
 66 | 		stats_period_ms = 1000
 67 | 	end
 68 | 	if stats_period_ms <= 0 then
 69 | 		log:fatal("statistics interval must be greater than 0")
 70 | 	end
 71 | 	if format == nil then
 72 | 		format = "time_s,period_time_since_ms,period_time_until_ms,period_packets,total_packets,period_pps,total_pps"
 73 | 	end
 74 | 
 75 | 	local self = setmetatable({
 76 | 		_stats_period_ms = stats_period_ms,
 77 | 		_output = output,
 78 | 		_format = format,
 79 | 		_time_first_ms = nil,   -- time of the very first received packet
 80 | 		_time_next_ms = nil,    -- time when next stats begins
 81 | 		_time_last_ms = nil,    -- time of the last received packet
 82 | 		_period = StatsCounters.new(),
 83 | 		_total = StatsCounters.new(),
 84 | 	}, {  __index = Stats })
 85 | 
 86 | 	if self._output ~= nil then
 87 | 		self._output:write(format.."\n")
 88 | 	end
 89 | 
 90 | 	return self
 91 | end
 92 | 
 93 | function Stats:display()
 94 | 	if self._output == nil then
 95 | 		return
 96 | 	end
 97 | 
 98 | 	local period = self._period:tabulate("period")
 99 | 	local total = self._total:tabulate("total")
100 | 	local additional = {
101 | 		time_s = string.format("%.03f", (self._period.time_until_ms - self._time_first_ms) / 1e3),
102 | 	}
103 | 
104 | 	local outstr = string.gsub(self._format, "([_%w]+)", period)
105 | 	outstr = string.gsub(outstr, "([_%w]+)", total)
106 | 	outstr = string.gsub(outstr, "([_%w]+)", additional)
107 | 
108 | 	self._output:write(outstr.."\n")
109 | end
110 | 
111 | function Stats:receive(obj)
112 | 	local obj_pcap = obj:cast_to(object.PCAP)
113 | 	local time_pcap_ms = tonumber(obj_pcap.ts.sec) * 1e3 + tonumber(obj_pcap.ts.nsec) * 1e-6
114 | 
115 | 	if self._time_first_ms == nil then
116 | 		self._time_first_ms = time_pcap_ms
117 | 		self._time_next_ms = self._time_first_ms + self._stats_period_ms
118 | 		self._period:begin(self._time_first_ms, self._time_next_ms)
119 | 		self._total:begin(self._time_first_ms, self._time_next_ms)
120 | 	end
121 | 
122 | 	while time_pcap_ms >= self._time_next_ms do  -- don't skip over 0-value intervals
123 | 		self._total = self._total + self._period
124 | 		self:display()
125 | 
126 | 		local next_ms = self._time_next_ms + self._stats_period_ms
127 | 		self._period:begin(self._time_next_ms, next_ms)
128 | 		self._time_next_ms = next_ms
129 | 	end
130 | 
131 | 	self._period.packets = self._period.packets + 1
132 | 
133 | 	-- ensure monotonic update of time
134 | 	if self._time_last_ms == nil or time_pcap_ms > self._time_last_ms then
135 | 		self._time_last_ms = time_pcap_ms
136 | 	end
137 | end
138 | 
139 | function Stats:finish()
140 | 	if self._time_last_ms == nil then
141 | 		self._log:warning("no packets received")
142 | 		return
143 | 	elseif self._time_last_ms < self._period.time_since_ms then
144 | 		-- this shouldn't happen, handling just in case
145 | 		self._log:fatal("last packet time is less than start of measurement interval")
146 | 	elseif self._time_last_ms == self._period.time_since_ms then
147 | 		-- avoid division by zero in calculations by adding an extra millisecond
148 | 		self._time_last_ms = self._time_last_ms + 1
149 | 	end
150 | 	self._period.time_until_ms = self._time_last_ms
151 | 	self._total = self._total + self._period
152 | 	self:display()
153 | end
154 | 
155 | 
156 | function StatsCounters.new()
157 | 	local self = setmetatable({
158 | 		period_s = nil,
159 | 		time_since_ms = nil,
160 | 		time_until_ms = nil,
161 | 		packets = 0,
162 | 	}, {
163 | 		__index = StatsCounters,
164 | 		__add = function(op1, op2)
165 | 			op1.time_since_ms = math.min(op1.time_since_ms, op2.time_since_ms)
166 | 			op1.time_until_ms = math.max(op1.time_until_ms, op2.time_until_ms)
167 | 			op1.packets = op1.packets + op2.packets
168 | 
169 | 			return op1
170 | 		end,
171 | 	})
172 | 
173 | 	return self
174 | end
175 | 
176 | function StatsCounters:begin(time_since_ms, time_until_ms)
177 | 	self.packets = 0
178 | 	assert(time_until_ms > time_since_ms)
179 | 	self.time_since_ms = time_since_ms
180 | 	self.time_until_ms = time_until_ms
181 | end
182 | 
183 | function StatsCounters:tabulate(prefix)
184 | 	if prefix == nil then
185 | 		prefix = ""
186 | 	elseif string.sub(prefix, -1) ~= "_" then
187 | 		prefix = prefix .. "_"
188 | 	end
189 | 
190 | 	local res = {}
191 | 	local period_s = (self.time_until_ms - self.time_since_ms) / 1e3
192 | 	res[prefix.."time_since_ms"] = string.format("%d", self.time_since_ms)
193 | 	res[prefix.."time_until_ms"] = string.format("%d", self.time_until_ms)
194 | 	res[prefix.."packets"] = string.format("%d", self.packets)
195 | 	res[prefix.."pps"] = string.format("%d", self.packets / period_s)
196 | 	return res
197 | end
198 | 
199 | function StatsCounters:count()
200 | 	self.packets = self.packets + 1
201 | end
202 | 
203 | 
204 | local stats = Stats.new(args.stats_period, csv_output)
205 | local obj
206 | while true do
207 | 	obj = produce(pctx)
208 | 	if obj == nil then break end
209 | 	stats:receive(obj)
210 | end
211 | stats:finish()
212 | 
213 | csv_output:close()
214 | 


--------------------------------------------------------------------------------
/pcap/count-packets-per-ip.lua:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env dnsjit
  2 | 
  3 | -- count-packets-per-ip.lua: provide packet summary for every source IP
  4 | 
  5 | local input = require("dnsjit.input.pcap").new()
  6 | local layer = require("dnsjit.filter.layer").new()
  7 | local object = require("dnsjit.core.objects")
  8 | local log = require("dnsjit.core.log").new("count-packets-per-ip.lua")
  9 | local getopt = require("dnsjit.lib.getopt").new({
 10 | 	{ "r", "read", "", "input file to read", "?" },
 11 | 	{ nil, "csv", "", "path to the output CSV file (default: stdout)", "?" },
 12 | })
 13 | 
 14 | log:enable("all")
 15 | 
 16 | -- Parse arguments
 17 | local args = {}
 18 | getopt:parse()
 19 | args.read = getopt:val("r")
 20 | args.csv = getopt:val("csv")
 21 | 
 22 | -- Display help
 23 | if getopt:val("help") then
 24 | 	getopt:usage()
 25 | 	return
 26 | end
 27 | 
 28 | -- Set up input
 29 | if args.read ~= "" then
 30 | 	if input:open_offline(args.read) ~= 0 then
 31 | 		log:fatal("failed to open input PCAP "..args.read)
 32 | 	end
 33 | 	log:notice("using input PCAP "..args.read)
 34 | else
 35 | 	getopt:usage()
 36 | 	log:fatal("input must be specified, use -r")
 37 | end
 38 | layer:producer(input)
 39 | local produce, pctx = layer:produce()
 40 | 
 41 | -- Set up CSV
 42 | local csv_output
 43 | if args.csv ~= "" then
 44 | 	csv_output = io.open(args.csv, 'w')
 45 | 	if csv_output == nil then
 46 | 		log:fatal('failed to open "'..args.csv..'" for writing')
 47 | 	else
 48 | 		log:notice('writing output CSV to "'..args.csv..'"')
 49 | 	end
 50 | else
 51 | 	csv_output = io.stdout
 52 | end
 53 | 
 54 | local clients = {}
 55 | local now_ms, chunk_since_ms
 56 | local n_clients = 0
 57 | 
 58 | log:info("processing... (this may take up to minutes for very large files)")
 59 | local obj, obj_pcap_in, obj_ip, client, src_ip
 60 | while true do
 61 | 	obj = produce(pctx)
 62 | 	if obj == nil then break end
 63 | 
 64 | 	obj_ip = obj:cast_to(object.IP)
 65 | 	if obj_ip == nil then
 66 | 		obj_ip = obj:cast_to(object.IP6)
 67 | 	end
 68 | 
 69 | 	obj_pcap_in = obj:cast_to(object.PCAP)
 70 | 	if obj_ip ~= nil and obj_pcap_in ~= nil then
 71 | 		now_ms = tonumber(obj_pcap_in.ts.sec) * 1e3 + tonumber(obj_pcap_in.ts.nsec) * 1e-6
 72 | 		if chunk_since_ms == nil then
 73 | 			chunk_since_ms = now_ms
 74 | 		end
 75 | 
 76 | 		src_ip = obj_ip:source()
 77 | 		client = clients[src_ip]
 78 | 		if client == nil then
 79 | 			client = {
 80 | 				packets = 0,
 81 | 				since_ms = now_ms,
 82 | 				until_ms = now_ms,
 83 | 			}
 84 | 			clients[src_ip] = client
 85 | 			n_clients = n_clients + 1
 86 | 		end
 87 | 		client["packets"] = client["packets"] + 1
 88 | 		client["until_ms"] = now_ms
 89 | 	end
 90 | end
 91 | 
 92 | local duration_s = (now_ms - chunk_since_ms) / 1e3
 93 | log:info(string.format("duration of input PCAP (s): %.3f", duration_s))
 94 | log:info(string.format("number of clients: %d", n_clients))
 95 | 
 96 | csv_output:write('"ip","ip_since_ms","ip_until_ms","packets","ip_chunk_qps"\n')
 97 | for ip, data in pairs(clients) do
 98 | 	csv_output:write('"')
 99 | 	csv_output:write(ip)
100 | 	csv_output:write('",')
101 | 	csv_output:write(string.format("%d", data["since_ms"]))
102 | 	csv_output:write(',')
103 | 	csv_output:write(string.format("%d", data["until_ms"]))
104 | 	csv_output:write(',')
105 | 	csv_output:write(string.format("%d", data["packets"]))
106 | 	csv_output:write(',')
107 | 	csv_output:write(string.format("%.2f", data["packets"] / duration_s))
108 | 	csv_output:write('\n')
109 | end
110 | csv_output:close()
111 | 


--------------------------------------------------------------------------------
/pcap/cut-pcap.lua:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env dnsjit
  2 | 
  3 | -- cut-pcap.lua: Copy input PCAP to output until_s specified timestamp is reached.
  4 | -- This is an efficient implementation of editcap -B for already sorted input.
  5 | 
  6 | -- SPDX-FileCopyrightText: Internet Systems Consortium, Inc. ("ISC")
  7 | -- SPDX-License-Identifier: BSD-2-Clause
  8 | 
  9 | local fpcap = require("dnsjit.input.fpcap")
 10 | local log = require("dnsjit.core.log")
 11 | local mmpcap = require("dnsjit.input.mmpcap")
 12 | local object = require("dnsjit.core.objects")
 13 | local output = require("dnsjit.output.pcap").new()
 14 | 
 15 | local function open_pcap(filename)
 16 | 	local input
 17 | 	if filename == '-' then
 18 | 		input = fpcap.new()
 19 | 		if input:openfp(io.stdin) ~= 0 then
 20 | 			log.fatal("failed to open PCAP on stdin")
 21 | 		else
 22 | 			log.debug('stdin opened using fpcap')
 23 | 		end
 24 | 	else
 25 | 		input = mmpcap.new()
 26 | 		if input:open(filename) ~= 0 then
 27 | 			log.notice("failed to open PCAP with mmap, fallback to fpcap")
 28 | 			input = fpcap.new()
 29 | 			if input:open(filename) ~= 0 then
 30 | 				log.fatal("failed to open PCAP with fpcap")
 31 | 			else
 32 | 				log.debug('file %s opened using fpcap', filename)
 33 | 			end
 34 | 		else
 35 | 			log.debug('file %s opened using mmpcap', filename)
 36 | 		end
 37 | 	end
 38 | 	local producer, pctx = input:produce()
 39 | 	return producer, pctx
 40 | end
 41 | 
 42 | local function get_next_pkt(producer, pctx)
 43 | 	local obj = producer(pctx)
 44 | 	if obj ~= nil then
 45 | 		return obj, obj:cast_to(object.PCAP)
 46 | 	end
 47 | end
 48 | 
 49 | local out_filename
 50 | local function check_output()
 51 |        if output:have_errors() then
 52 |                log.fatal("error writting to file %s", out_filename)
 53 |        end
 54 | end
 55 | 
 56 | log.enable("all")
 57 | if #arg ~= 4 or not tonumber(arg[4]) then
 58 | 	print("usage: "..arg[1].." <pcap file in | -> <pcap file out | -> <unix timestamp to cut at>")
 59 | 	print("Copy packets before specified timestamp from input to output PCAP")
 60 | 	return
 61 | end
 62 | 
 63 | local in_filename = arg[2]
 64 | out_filename = arg[3]
 65 | local until_s = tonumber(arg[4])
 66 | 
 67 | if until_s ~= math.floor(until_s) or until_s <= 0 then
 68 | 	log.fatal('unsupported stop timestamp: use an integer > 0')
 69 | end
 70 | 
 71 | local producer, pctx = open_pcap(in_filename)
 72 | local cur_obj, cur_pkt = get_next_pkt(producer, pctx)
 73 | if not cur_pkt then
 74 | 	log.fatal('no packets in input pcap %s, terminating', in_filename)
 75 | end
 76 | 
 77 | log.info('opening output file %s', out_filename)
 78 | output:open(out_filename,
 79 | 	cur_pkt.linktype,
 80 | 	cur_pkt.snaplen)
 81 | local receiver, rctx = output:receive()
 82 | 
 83 | local npackets = 0
 84 | while cur_pkt do
 85 | 	if cur_pkt.ts.sec >= until_s then
 86 | 		log.info('timestamp %.f reached, stop', until_s)
 87 | 		break
 88 | 	end
 89 | 	receiver(rctx, cur_obj)
 90 | 	--
 91 | 	-- check output state only every 10 000 packets - optimization
 92 | 	if npackets % 10000 == 0 then
 93 | 	        check_output()
 94 | 	end
 95 | 	npackets = npackets + 1
 96 | 	cur_obj, cur_pkt = get_next_pkt(producer, pctx)
 97 | end
 98 | 
 99 | check_output()
100 | log.info('output %.f packets', npackets)
101 | log.debug('closing output file %s', out_filename)
102 | output:close()
103 | log.debug('output file %s closed', out_filename)
104 | 


--------------------------------------------------------------------------------
/pcap/extract-clients.lua:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env dnsjit
  2 | 
  3 | -- extract-clients.lua: prepare PCAPs with client streams
  4 | --
  5 | -- Process input PCAP and assign each client a unique IPv6 address.
  6 | -- Optionally, the input PCAP can be split in into multiple chunks on desired
  7 | -- duration. Output PCAP contains just RAWIP layer with IPv6 packets.
  8 | --
  9 | -- For example, 600s of input with 10k unique clients could be split up into
 10 | -- chunks of 60s. The output files combined have more than 10k clients,
 11 | -- since a client is considered "unique" for every time chunk it appears in.
 12 | -- Depending on the traffic, the output would have anywhere between 10k and
 13 | -- 100k clients (combined).
 14 | --
 15 | -- Other tools can then be used to merge these files to "scale up" the number
 16 | -- of clients in a single time chunk.
 17 | 
 18 | --- Check if a file or directory exists in this path
 19 | local function exists(file)
 20 |    local ok, err, code = os.rename(file, file)
 21 |    if not ok then
 22 |       if code == 13 then
 23 |          -- Permission denied, but it exists
 24 |          return true
 25 |       end
 26 |    end
 27 |    return ok, err
 28 | end
 29 | 
 30 | local seed_def = os.time() + os.clock() / 1e6
 31 | local dir = os.getenv("PWD") or ""
 32 | local bit = require("bit")
 33 | local ffi = require("ffi")
 34 | local input = require("dnsjit.input.pcap").new()
 35 | local output = require("dnsjit.output.pcap").new()
 36 | local layer = require("dnsjit.filter.layer").new()
 37 | local object = require("dnsjit.core.objects")
 38 | local log = require("dnsjit.core.log").new("extract-clients.lua")
 39 | local getopt = require("dnsjit.lib.getopt").new({
 40 | 	{ "r", "read", "", "input file to read", "?" },
 41 | 	{ "O", "outdir", dir, "directory for client chunks (must exist)", "?" },
 42 | 	{ "d", "duration", 0, "duration of each chunk (in seconds, 0 means entire file)", "?" },
 43 | 	{ "k", "keep", false, "keep last chunk even if it's incomplete", "?" },
 44 | 	{ nil, "seed", seed_def, "seed for RNG", "?" },
 45 | 	{ nil, "stdout", false, "output to stdout as a single file, no splits", "?" },
 46 | 	{ nil, "query-rewrite", false, "rewrite all queries to . NS", "?" },
 47 | })
 48 | 
 49 | local SNAPLEN = 66000
 50 | local LINKTYPE = 12  -- DLT_RAW in Linux, see https://github.com/the-tcpdump-group/libpcap/blob/master/pcap/dlt.h
 51 | local HEADERSLEN = 40 + 8  -- IPv6 header and UDP header
 52 | 
 53 | -- DNS payload WITHOUT message ID, query . NS +RD; used if --query-rewrite
 54 | local DNS_PAYLOAD = "\x01\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x01"
 55 | 
 56 | log:enable("all")
 57 | 
 58 | -- Parse arguments
 59 | local args = {}
 60 | getopt:parse()
 61 | args.read = getopt:val("r")
 62 | args.duration = getopt:val("d")
 63 | args.keep = getopt:val("k")
 64 | args.outdir = getopt:val("O")
 65 | args.seed = getopt:val("seed")
 66 | args.stdout = getopt:val("stdout")
 67 | args.query_rewrite = getopt:val("query-rewrite")
 68 | math.randomseed(args.seed)
 69 | 
 70 | -- Display help
 71 | if getopt:val("help") then
 72 | 	getopt:usage()
 73 | 	return
 74 | end
 75 | 
 76 | -- Check arguments
 77 | if args.stdout then
 78 | 	if args.duration ~= 0 or args.outdir ~= "" then
 79 | 		log.fatal("--stdout is mutualy exclusive with -d and -O, use -O ''")
 80 | 	end
 81 | end
 82 | if args.duration < 0 then
 83 | 	log:fatal("duration can't be negative")
 84 | elseif args.duration == 0 then
 85 | 	args.duration = math.huge
 86 | 	log:notice("processing entire file as one chunk")
 87 | else
 88 | 	log:notice("file will be split every " .. args.duration .. " seconds")
 89 | end
 90 | if not args.stdout and (args.outdir == "" or not exists(args.outdir .. "/")) then
 91 | 	log.fatal("output directory \"" .. args.outdir .. "\" doesn't exist")
 92 | end
 93 | 
 94 | -- Set up input
 95 | if args.read ~= "" then
 96 | 	if input:open_offline(args.read) ~= 0 then
 97 | 		log:fatal("failed to open input PCAP "..args.read)
 98 | 	end
 99 | 	log:notice("using input PCAP "..args.read)
100 | else
101 | 	getopt:usage()
102 | 	log:fatal("input must be specified, use -r")
103 | end
104 | layer:producer(input)
105 | local produce, pctx = layer:produce()
106 | 
107 | 
108 | local i_chunk = 0
109 | local chunk_id
110 | local write, writectx
111 | local outfilename
112 | local function open_pcap()
113 | 	if args.stdout then
114 | 		outfilename = "-"
115 | 	else
116 | 		outfilename = args.outdir .. "/" .. chunk_id .. ".pcap"
117 | 		if exists(outfilename) then
118 | 			log:warning("chunk_id collision detected! skipping: " .. outfilename)
119 | 			return false
120 | 		end
121 | 	end
122 | 	if output:open(outfilename, LINKTYPE, SNAPLEN) ~= 0 then
123 | 		log:fatal("failed to open chunk file " .. outfilename)
124 | 	else
125 | 		log:notice("writing chunk: " .. outfilename)
126 | 	end
127 | 	write, writectx = output:receive()
128 | 	return true
129 | end
130 | 
131 | 
132 | local obj_pcap_out = ffi.new("core_object_pcap_t")
133 | obj_pcap_out.obj_type = object.PCAP
134 | 
135 | local bytes = ffi.new("uint8_t[?]", SNAPLEN)
136 | bytes[0] = 0x60  -- IPv6 header
137 | -- UDP len in bytes[4]:bytes[5]
138 | bytes[6] = 0x11  -- next header: UDP
139 | bytes[8] = 0xfd  -- bytes[8]:bytes[23] source IPv6 fd00::
140 | bytes[39] = 0x01  -- dst IPv6 ::1
141 | obj_pcap_out.bytes = bytes
142 | 
143 | local function put_uint16_be(dst, offset, src)
144 | 	dst[offset] = bit.rshift(bit.band(src, 0xff00), 8)
145 | 	dst[offset + 1] = bit.band(src, 0xff)
146 | end
147 | 
148 | local function put_uint32_be(dst, offset, src)
149 | 	dst[offset] = bit.rshift(bit.band(src, 0xff000000), 24)
150 | 	dst[offset + 1] = bit.rshift(bit.band(src, 0xff0000), 16)
151 | 	dst[offset + 2] = bit.rshift(bit.band(src, 0xff00), 8)
152 | 	dst[offset + 3] = bit.band(src, 0xff)
153 | end
154 | 
155 | local clients = {}
156 | local i_client = 0
157 | local ct_4b = ffi.typeof("uint8_t[4]")
158 | local now_ms, diff_ms, chunk_since_ms, chunk_until_ms
159 | 
160 | local function check_output()
161 | 	if output:have_errors() then
162 | 		log:fatal("error writting to file %s", outfilename)
163 | 	end
164 | end
165 | 
166 | local function chunk_init()
167 | 	local opened
168 | 	repeat
169 | 		-- assign random "unique" chunk ID
170 | 		bytes[16] = math.random(0, 255)
171 | 		bytes[17] = math.random(0, 255)
172 | 		bytes[18] = math.random(0, 255)
173 | 		bytes[19] = math.random(0, 255)
174 | 		chunk_id = string.format("%02x%02x%02x%02x", bytes[16], bytes[17], bytes[18], bytes[19])
175 | 		opened = open_pcap()
176 | 	until(opened)
177 | 
178 | 	clients = {}
179 | 	i_client = 0
180 | 	i_chunk = i_chunk + 1
181 | 
182 | 	chunk_since_ms = chunk_until_ms or now_ms
183 | 	chunk_until_ms = chunk_since_ms + args.duration * 1e3
184 | end
185 | 
186 | local function chunk_finalize()
187 | 	check_output()
188 | 	output:close()
189 | 	local duration_s = (chunk_until_ms - chunk_since_ms) / 1e3
190 | 	log:info(string.format("    duration_s: %.3f", duration_s))
191 | 	log:info(string.format("    number of clients: %d", i_client))
192 | 	if i_client == 0 then
193 | 		log:warning("    deleting empty chunk, double check your data")
194 | 		os.remove(outfilename)
195 | 	end
196 | end
197 | 
198 | local obj, obj_pcap_in, obj_ip, obj_udp, obj_pl, client, src_ip, ip_len, prev_ms
199 | local npacketsin = 0
200 | local npacketsout = 0
201 | local npacketsskip = 0
202 | while true do
203 | 	obj = produce(pctx)
204 | 	if obj == nil then break end
205 | 	npacketsin = npacketsin + 1
206 | 
207 | 	ip_len = 4
208 | 	obj_ip = obj:cast_to(object.IP)
209 | 	if obj_ip == nil then
210 | 		obj_ip = obj:cast_to(object.IP6)
211 | 		ip_len = 16
212 | 	end
213 | 
214 | 	obj_udp = obj:cast_to(object.UDP)
215 | 	obj_pl = obj:cast_to(object.PAYLOAD)
216 | 	obj_pcap_in = obj:cast_to(object.PCAP)
217 | 	if obj_ip ~= nil and obj_udp ~= nil and obj_pl ~= nil and obj_pcap_in ~= nil then
218 | 		now_ms = tonumber(obj_pcap_in.ts.sec) * 1e3 + tonumber(obj_pcap_in.ts.nsec) * 1e-6
219 | 		if prev_ms then
220 | 			if (now_ms < prev_ms) then
221 | 				log:fatal('non-monotonic packet timestamp detected, exiting '
222 | 					.. '(current ts %f < previous %f)', now_ms, prev_ms)
223 | 				break
224 | 			end
225 | 		end
226 | 		prev_ms = now_ms
227 | 		while chunk_until_ms == nil or now_ms >= chunk_until_ms do
228 | 			if chunk_until_ms ~= nil then
229 | 				chunk_finalize()
230 | 			end
231 | 			chunk_init()
232 | 		end
233 | 
234 | 		src_ip = ffi.string(obj_ip.src, ip_len)
235 | 		client = clients[src_ip]
236 | 		if client == nil then
237 | 			client = { addr = ct_4b(), queries = 0 }
238 | 			put_uint32_be(client["addr"], 0, i_client)
239 | 			i_client = i_client + 1
240 | 			clients[src_ip] = client
241 | 		end
242 | 		client["queries"] = client["queries"] + 1
243 | 		ffi.copy(bytes + 20, client["addr"], 4)
244 | 
245 | 		diff_ms = now_ms - chunk_since_ms
246 | 		obj_pcap_out.ts.sec = math.floor(diff_ms / 1e3)
247 | 		obj_pcap_out.ts.nsec = math.floor((diff_ms % 1e3) * 1e6)
248 | 
249 | 		obj_pcap_out.len = HEADERSLEN + obj_pl.len
250 | 		obj_pcap_out.caplen = obj_pcap_out.len
251 | 
252 | 		put_uint16_be(bytes, 4, obj_udp.ulen)  -- IPv6 payload length
253 | 		put_uint16_be(bytes, 40, 0x0035)  -- normalized src port 53
254 | 		put_uint16_be(bytes, 42, 0x0035)  -- normalized dst port 53
255 | 		if args.query_rewrite then
256 | 			put_uint16_be(bytes, 44, 0x0019)  -- UDP length incl. UDP header
257 | 			put_uint16_be(bytes, 46, 0x0000)  -- checksum: disabled/ignored
258 | 			put_uint16_be(bytes, 48, math.random(0, 65535))  -- msg ID
259 | 			ffi.copy(bytes + HEADERSLEN + 2, DNS_PAYLOAD)
260 | 		else
261 | 			put_uint16_be(bytes, 44, obj_udp.ulen)
262 | 			put_uint16_be(bytes, 46, obj_udp.sum)
263 | 			ffi.copy(bytes + HEADERSLEN, obj_pl.payload, obj_pl.len)
264 | 		end
265 | 
266 | 		-- check output state only every 10 000 packets - optimization
267 | 		if npacketsout % 10000 == 0 then
268 | 			check_output()
269 | 		end
270 | 		write(writectx, obj_pcap_out:uncast())
271 | 		npacketsout = npacketsout + 1
272 | 	else
273 | 		npacketsskip = npacketsskip + 1
274 | 	end
275 | end
276 | if npacketsskip > 0 then
277 | 	log:warning(string.format("skipped %d non-IP or non-UDP packets (%f %%)",
278 | 				  npacketsskip, npacketsskip / npacketsin * 100))
279 | end
280 | 
281 | if now_ms == nil then
282 | 	log:fatal("no valid packets found")
283 | end
284 | 
285 | chunk_finalize()
286 | 
287 | if args.duration ~= math.huge and not args.keep then
288 | 	log:notice("removing incomplete last chunk "..outfilename)
289 | 	os.remove(outfilename)
290 | end
291 | 


--------------------------------------------------------------------------------
/pcap/filter-dnsq.lua:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env dnsjit
  2 | 
  3 | -- filter-dnsq.lua: obtain DNS queries from input PCAP / interface
  4 | --
  5 | -- Process input and extract DNS queries into an output PCAP.
  6 | 
  7 | local ffi = require("ffi")
  8 | local C = ffi.C
  9 | local input = require("dnsjit.input.pcap").new()
 10 | local output = require("dnsjit.output.pcap").new()
 11 | local layer = require("dnsjit.filter.layer").new()
 12 | local object = require("dnsjit.core.objects")
 13 | local dns = require("dnsjit.core.object.dns").new()
 14 | local dns_q = require("dnsjit.core.object.dns.q").new()
 15 | local dns_rr = require("dnsjit.core.object.dns.rr").new()
 16 | local labels = require("dnsjit.core.object.dns.label").new(127)
 17 | local log = require("dnsjit.core.log").new("filter-dnsq.lua")
 18 | local getopt = require("dnsjit.lib.getopt").new({
 19 | 	{ "r", "read", "", "input file to read", "?" },
 20 | 	{ "i", "interface", "", "capture interface", "?" },
 21 | 	{ "w", "write", "", "output file to write (or /dev/null)", "?" },
 22 | 	{ "p", "port", 53, "destination port to check for UDP DNS queries", "?" },
 23 | 	{ "l", "log-malformed", false, "log why queries were considered as malformed", "?" },
 24 | 	{ "m", "malformed", false, "include malformed queries", "?" },
 25 | 	{ "M", "only-malformed", false, "include only malformed queries", "?" },
 26 | 	{ "s", "skipped", false, "include queries for *.dotnxdomain.net, "
 27 | 				 .. "which would otherwise be skipped", "?" },
 28 | 	{ "a", "address", "", "destination address (can be specified multiple times)", "?*" },
 29 | })
 30 | 
 31 | local AF_INET = 2
 32 | local AF_INET6 = 10
 33 | if ffi.os == "OSX" then
 34 |     AF_INET6 = 30
 35 | end
 36 | 
 37 | local args
 38 | 
 39 | local function check_output()
 40 | 	if output:have_errors() then
 41 | 		log:fatal("error writting to file %s", args.write)
 42 | 	end
 43 | end
 44 | 
 45 | ffi.cdef[[
 46 |     int inet_pton(int af, const char* src, void* dst);
 47 |     int memcmp(const void *s1, const void *s2, size_t n);
 48 | ]]
 49 | 
 50 | log:enable("all")
 51 | 
 52 | -- Parse arguments
 53 | args = {}
 54 | getopt:parse()
 55 | args.read = getopt:val("r")
 56 | args.interface = getopt:val("i")
 57 | args.write = getopt:val("w")
 58 | args.port = getopt:val("p")
 59 | args.only_malformed = getopt:val("M")
 60 | args.malformed = getopt:val("m") or args.only_malformed
 61 | args.log_malformed = getopt:val("l")
 62 | args.csv = getopt:val("csv")
 63 | args.skipped = getopt:val("s")
 64 | args.address = getopt:val("a")
 65 | 
 66 | -- Display help
 67 | if getopt:val("help") then
 68 | 	getopt:usage()
 69 | 	return
 70 | end
 71 | 
 72 | -- Check arguments
 73 | if args.port <= 0 or args.port > 65535 then
 74 | 	log:fatal("invalid port number")
 75 | end
 76 | 
 77 | -- Convert IPs to binary
 78 | local addresses = {}
 79 | if #args.address > 0 then
 80 | 	for i, addr in ipairs(args.address) do
 81 | 		local inet = ffi.new("uint8_t [16]")  -- reserve enough memory for either IPv4 or IPv6
 82 | 		local len = 4
 83 | 		-- try parse as IPv4
 84 | 		if C.inet_pton(AF_INET, addr, inet) ~= 1 then
 85 | 			len = 16
 86 | 			if C.inet_pton(AF_INET6, addr, inet) ~= 1 then
 87 | 				log:fatal("failed to parse address as IPv4 or IPv6: "..addr)
 88 | 			end
 89 | 		end
 90 | 		addresses[i] = { inet = inet, len = len }
 91 | 	end
 92 | end
 93 | 
 94 | -- Set up input
 95 | if args.read ~= "" then
 96 | 	if input:open_offline(args.read) ~= 0 then
 97 | 		log:fatal("failed to open input PCAP "..args.read)
 98 | 	end
 99 | 	log:notice("using input PCAP "..args.read)
100 | elseif args.interface ~= "" then
101 | 	input:create(args.interface)
102 | 	if input:activate() ~= 0 then
103 | 		log:fatal("failed to capture interface "..args.interface.." (insufficient permissions?)")
104 | 	end
105 | 	log:notice("capturing input interface "..args.interface)
106 | else
107 | 	getopt:usage()
108 | 	log:fatal("input must be specified, use -r/-i")
109 | end
110 | layer:producer(input)
111 | local produce, pctx = layer:produce()
112 | 
113 | -- Set up output
114 | if args.write == "" then
115 | 	log:fatal("output must be specified, use -w; use /dev/null if you want just counters")
116 | elseif output:open(args.write, input:linktype(), input:snaplen()) ~= 0 then
117 | 	log:fatal("failed to open output PCAP "..args.write)
118 | else
119 | 	log:notice("using output PCAP "..args.write)
120 | end
121 | local write, writectx = output:receive()
122 | 
123 | local function matches_addresses(ip, len)
124 | 	for _, addr in ipairs(addresses) do
125 | 		if addr.len == len and C.memcmp(ip, addr.inet, len) == 0 then
126 | 			return true
127 | 		end
128 | 	end
129 | 	return false
130 | end
131 | 
132 | local function rr_idx_section_name(rr_idx, ancount, nscount)
133 | 	if rr_idx <= ancount then
134 | 		return string.format('answer RR idx %d', rr_idx)
135 | 	elseif rr_idx <= ancount + nscount then
136 | 		return string.format('authority RR idx %d', rr_idx - ancount)
137 | 	else
138 | 		return string.format('additional RR idx %d', rr_idx - ancount - nscount)
139 | 	end
140 | end
141 | 
142 | local function log_packet(obj, reason)
143 | 	if not args.log_malformed then
144 | 		return
145 | 	end
146 | 	local obj_pcap = obj:cast_to(object.PCAP)
147 | 	log:info("timestamp %d.%d: %s", obj_pcap.ts.sec, obj_pcap.ts.nsec, reason)
148 | end
149 | 
150 | local function is_skipped_qname(payload, qlabels, max_labels)
151 | 	local found_labels = 0
152 | 	-- iterate over label lengths to the or label array end
153 | 	for n = 1, max_labels do
154 | 		local qlabel = qlabels[n - 1]
155 | 		if qlabel.have_offset == 1 then
156 | 			return nil -- malformed, qname should not be compressed
157 | 		elseif qlabel.have_dn == 0 then
158 | 			break  -- end of label array
159 | 		end
160 | 		-- have_dn == 1, continue to see if there are further labels
161 | 		found_labels = n
162 | 	end
163 | 	-- check if qname can have form *.dotnxdomain.net.
164 | 	if found_labels < 3 then
165 | 		return false
166 | 	end
167 | 	-- malformed, qname must be terminated with root label
168 | 	if qlabels[found_labels].length ~= 0 then
169 | 		return nil
170 | 	end
171 | 
172 | 	-- is it in net.?
173 | 	local tld = qlabels[found_labels - 1]
174 | 	if tld.length ~= 3 then
175 | 		return false
176 | 	end
177 | 	local tlddata = ffi.cast('char *', payload + tld.offset + 1)
178 | 	if ffi.string(tlddata, tld.length):lower() ~= 'net' then
179 | 		return false
180 | 	end
181 | 
182 | 	-- is it in dotnxdomain.net.?
183 | 	local sld = qlabels[found_labels - 2]
184 | 	if sld.length ~= 11 then
185 | 		return false
186 | 	end
187 | 	local slddata = ffi.cast('char *', payload + sld.offset + 1)
188 | 	if ffi.string(slddata, sld.length):lower() ~= 'dotnxdomain' then
189 | 		return false
190 | 	end
191 | 
192 | 	return true
193 | end
194 | 
195 | 
196 | local nmalformed = 0
197 | local nskipped = 0
198 | -- Filtering function that picks only DNS queries
199 | local function is_dnsq(obj)
200 | 	local payload = obj:cast_to(object.PAYLOAD)
201 | 	if payload == nil then return false end
202 | 	if payload.len < 12 then return false end  -- ignore garbage smaller than DNS header size
203 | 	local udp = obj:cast_to(object.UDP)
204 | 	if udp == nil then return false end  -- use only UDP packets
205 | 	if udp.dport ~= args.port then return false end
206 | 
207 | 	if #addresses > 0 then  -- check destination IP
208 | 		local ip_obj = obj:cast_to(object.IP) or obj:cast_to(object.IP6)
209 | 		local len = 4
210 | 		if ip_obj.obj_type == object.IP6 then len = 16 end
211 | 		if matches_addresses(ip_obj.dst, len) == false then return false end
212 | 	end
213 | 
214 | 	dns:reset()
215 | 	dns.obj_prev = obj
216 | 	dns:parse_header()
217 | 	if dns.qr == 1 then return false end  -- ignore DNS responses
218 | 
219 | 	-- check that query isn't malformed
220 | 	if dns.qdcount > 0 then  -- parse all questions
221 | 		for idx = 1, dns.qdcount do
222 | 			if dns:parse_q(dns_q, labels, 127) ~= 0 then
223 | 				log_packet(obj, 'cannot parse qname idx %d', idx)
224 | 				nmalformed = nmalformed + 1
225 | 				return args.malformed
226 | 			end
227 | 			local is_skipped = is_skipped_qname(dns.payload, labels, 127)
228 | 			if is_skipped == nil then
229 | 				log_packet(obj, 'too suspicious qname idx %d', idx)
230 | 				nmalformed = nmalformed + 1
231 | 				return args.malformed
232 | 			elseif is_skipped and not args.skipped then
233 | 				nskipped = nskipped + 1
234 | 				return false
235 | 			end
236 | 		end
237 | 	end
238 | 	local rrcount = dns.ancount + dns.nscount + dns.arcount
239 | 	if rrcount > 0 then  -- parse all other RRs
240 | 		for idx = 1, rrcount do
241 | 			if dns:parse_rr(dns_rr, labels, 127) ~= 0 then
242 | 				log_packet(obj, string.format('malformed RR idx %d (%s)', idx, rr_idx_section_name(idx, dns.ancount, dns.nscount)))
243 | 				nmalformed = nmalformed + 1
244 | 				return args.malformed
245 | 			end
246 | 		end
247 | 	end
248 | 	return not args.only_malformed
249 | end
250 | 
251 | local npackets_in = 0
252 | local npackets_out = 0
253 | local obj
254 | while true do
255 | 	obj = produce(pctx)
256 | 	if obj == nil then break end
257 | 	npackets_in = npackets_in + 1
258 | 	if is_dnsq(obj) then
259 | 		write(writectx, obj)
260 | 		npackets_out = npackets_out + 1
261 | 		if npackets_out % 10000 == 0 then
262 | 			check_output()
263 | 		end
264 | 	end
265 | end
266 | 
267 | check_output()
268 | output:close()
269 | 
270 | if npackets_out == 0 then
271 | 	log:fatal("no packets were matched by filter!")
272 | else
273 | 	log:notice("%0.f out of %0.f packets matched filter (%f %%)",
274 | 		npackets_out, npackets_in, npackets_out / npackets_in * 100)
275 | 	if nmalformed > 0 then
276 | 		local total
277 | 		if args.only_malformed then
278 | 			total = npackets_out
279 | 		else
280 | 			total = npackets_out + nmalformed
281 | 		end
282 | 		local malformed_desc
283 | 		if args.malformed then
284 | 			malformed_desc = "and written to output"
285 | 		else
286 | 			malformed_desc = "and omitted from output"
287 | 		end
288 | 		log:notice("%0.f malformed DNS packets detected "
289 | 				.. "(%f %% of matching packets) %s",
290 | 			nmalformed, nmalformed / total * 100, malformed_desc)
291 | 	else
292 | 		log:info("0 malformed DNS packets detected")
293 | 	end
294 | 	if nskipped > 0 then
295 | 		log:notice("%0.f skipped queries for *.dotnxdomain.net were "
296 | 			   .. "omitted from output", nskipped)
297 | 	end
298 | end
299 | 


--------------------------------------------------------------------------------
/pcap/generate-const-qps.lua:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env dnsjit
  2 | local bit = require("bit")
  3 | 
  4 | -- For mysterious reasons this combination of write_uint32 implementations is fastest
  5 | -- with QPS >= 10 and it gets only slower if implementations are unified (LuaJIT 2.1.0b3).
  6 | -- It is slower for QPS < 10 but that's a corner case we are not optimizing for.
  7 | local function write_uint32_le(output, src)
  8 | 	local s = string.char(
  9 | 		bit.band(src, 0xff),
 10 | 		bit.rshift(bit.band(src, 0xff00), 8),
 11 | 		bit.rshift(bit.band(src, 0xff0000), 16),
 12 | 		bit.rshift(bit.band(src, 0xff000000), 24))
 13 | 	output:write(s)
 14 | 	return s
 15 | end
 16 | 
 17 | local function write_uint32_be(output, src)
 18 | 	output:write(string.char(
 19 | 		bit.rshift(bit.band(src, 0xff000000), 24)))
 20 | 	output:write(string.char(
 21 | 		bit.rshift(bit.band(src, 0xff0000), 16)))
 22 | 	output:write(string.char(
 23 | 		bit.rshift(bit.band(src, 0xff00), 8)))
 24 | 	output:write(string.char(
 25 | 		bit.band(src, 0xff)))
 26 | end
 27 | 
 28 | local function write_uint16_be(output, src)
 29 | 	output:write(string.char(
 30 | 		bit.rshift(bit.band(src, 0xff00), 8)))
 31 | 	output:write(string.char(
 32 | 		bit.band(src, 0xff)))
 33 | end
 34 | 
 35 | 
 36 | -- https://wiki.wireshark.org/Development/LibpcapFileFormat
 37 | local function write_pcap_header(output)
 38 | 	output:write("\xD4\xC3\xB2\xA1")  -- PCAP magic
 39 | 	output:write("\x02\x00")  -- major version number
 40 | 	output:write("\x04\x00")  -- minor version number
 41 | 	output:write("\x00\x00\x00\x00")  -- thizone: gmt to local correction
 42 | 	output:write("\x00\x00\x00\x00")  -- sigfigs: accuracy of timestamps, in practice always 0
 43 | 	output:write("\xD0\x01\x01\x00")  -- snaplen: max length of captured packets, in octets
 44 | 	output:write("\x65\x00\x00\x00")  -- linktype: data link type RAW_IP
 45 | end
 46 | 
 47 | local frame_start =
 48 | 	-- PCAP packet header
 49 | 	"\x41\x00\x00\x00" ..  -- snap length
 50 | 	"\x41\x00\x00\x00" ..  -- original length
 51 | 	-- IP layer
 52 | 	"\x60\x00\x00\x00" ..  -- IP version 6 and no flags
 53 | 	"\x00\x19" ..  -- IP payload length
 54 | 	"\x11" ..  -- protocol payload = UDP
 55 | 	"\x00" ..  -- hop limit
 56 | 	"\xfd\x00\x00\x00\x00\x00\x00\x00\x02\x11\x66\x8e" -- source address WITHOUT last 4 octets
 57 | 
 58 | local header_end =
 59 | 	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01" .. -- full dest address
 60 | 	-- UDP
 61 | 	"\x00\x35" ..  -- source port
 62 | 	"\x00\x35" ..  -- dest port
 63 | 	"\x00\x19" ..  -- UDP length incl. UDP header (= payload + 8 bytes)
 64 | 	"\x00\x00"     -- checksum (disabled/ignored)
 65 | 
 66 | -- DNS payload WITHOUT message ID, query . NS +RD
 67 | local dns_payload = "\x01\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x01"
 68 | 
 69 | local function write_frame(output, source_id)
 70 | 	output:write(frame_start)
 71 | 	write_uint32_be(output, source_id)
 72 | 	output:write(header_end)
 73 | 	write_uint16_be(output, math.random(0, 65535))
 74 | 	output:write(dns_payload)
 75 | end
 76 | 
 77 | local cache_sec
 78 | local cache_sec_bytes
 79 | local function write_timestamps(output, now_sec)
 80 | 	local sec_int = math.floor(now_sec)
 81 | 	local usec_int = math.floor((now_sec - sec_int) * 1e6)
 82 | 	-- unix timestamp in seconds
 83 | 	if cache_sec == sec_int then
 84 | 		output:write(cache_sec_bytes)
 85 | 	else
 86 | 		cache_sec_bytes = write_uint32_le(output, sec_int)
 87 | 		cache_sec = sec_int
 88 | 	end
 89 | 	-- microseconds since second
 90 | 	write_uint32_le(output, usec_int)
 91 | end
 92 | 
 93 | local log = require("dnsjit.core.log").new("generate-const-qps.lua")
 94 | local getopt = require("dnsjit.lib.getopt").new({
 95 | 	{ "c", "clients", 1, "number of source IP addresses to generate", "?" },
 96 | 	{ "q", "qps", 1, "queries per second to generate", "" },
 97 | 	{ "t", "time-limit", math.huge, "length of query stream in seconds", "?" },
 98 | 	{ "u", "uniform-clients", false, "do not randomize source IP addresses, do round robin", "?" },
 99 | 	{ "w", "write", "-", "output file to write, - means stdout (default)", "?" },
100 | })
101 | local ok, left = pcall(getopt.parse, getopt)
102 | if not ok or #left > 0 or getopt:val("help")
103 | 	or getopt:val("clients") < 1 or getopt:val("clients") > 2^32
104 | 	or getopt:val("time-limit") <= 0 or getopt:val("qps") < 1 then
105 | 	log:info("Generate DNS query stream with uniform QPS and given number of source IP addresses.")
106 | 	getopt:usage()
107 | 	if not ok then
108 | 		log:fatal(left)
109 | 	elseif #left > 0 then
110 | 		log:fatal('unsupported extra arguments on command line')
111 | 	end
112 | 	return
113 | end
114 | 
115 | local output
116 | if getopt:val("write") == "-" then
117 | 	output = io.stdout
118 | else
119 | 	output = io.open(getopt:val("write"), "w")
120 | end
121 | if output == nil then
122 | 	log:fatal("cannot open output file %s", getopt:val("write"))
123 | 	return
124 | end
125 | 
126 | local clients = getopt:val("clients")
127 | local uniform_clients = clients == 1 or getopt:val("uniform-clients")
128 | local packet_interval = 1/getopt:val("qps")
129 | local end_sec = getopt:val("time-limit")
130 | 
131 | write_pcap_header(output)
132 | 
133 | local now_sec = 0
134 | local i_client = 0
135 | while (now_sec <= end_sec)
136 | do
137 | 	write_timestamps(output, now_sec)
138 | 	write_frame(output, i_client)
139 | 	now_sec = now_sec + packet_interval
140 | 	if uniform_clients then
141 | 		i_client = (i_client + 1) % clients
142 | 	else
143 | 		i_client = math.random(0, clients - 1)
144 | 	end
145 | end
146 | 


--------------------------------------------------------------------------------
/pcap/limit-clients.lua:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env dnsjit
  2 | 
  3 | -- limit-clients.lua: randomize which clients (IPs) will be included in output
  4 | --
  5 | -- Every unique IP (client) has the given chance to appear in the output file.
  6 | -- If a client appears, all of its packets remain intact. If a client doesn't
  7 | -- appear in the output, none of its packets will.
  8 | --
  9 | -- This script can only scale-down (limit) the number of clients, i.e. the
 10 | -- chance must be in range 0 to 1.
 11 | 
 12 | local ffi = require("ffi")
 13 | local input = require("dnsjit.input.pcap").new()
 14 | local output = require("dnsjit.output.pcap").new()
 15 | local layer = require("dnsjit.filter.layer").new()
 16 | local object = require("dnsjit.core.objects")
 17 | local log = require("dnsjit.core.log").new("limit-clients.lua")
 18 | local getopt = require("dnsjit.lib.getopt").new({
 19 | 	{ "r", "read", "", "input file to read", "?" },
 20 | 	{ "w", "write", "", "output file to write", "?" },
 21 | 	{ "l", "limit", 1.0, "chance for each client to appear, 0 to 1", "?" },
 22 | 	{ nil, "seed", 0, "seed for RNG", "?" },
 23 | })
 24 | 
 25 | local SNAPLEN = 66000
 26 | local LINKTYPE = 12  -- DLT_RAW in Linux, see https://github.com/the-tcpdump-group/libpcap/blob/master/pcap/dlt.h
 27 | 
 28 | local args
 29 | 
 30 | local function check_output()
 31 | 	if output:have_errors() then
 32 | 		log:fatal("error writting to file %s", args.write)
 33 | 	end
 34 | end
 35 | 
 36 | log:enable("all")
 37 | 
 38 | -- Parse arguments
 39 | args = {}
 40 | getopt:parse()
 41 | args.read = getopt:val("r")
 42 | args.write = getopt:val("w")
 43 | args.limit = getopt:val("l")
 44 | args.seed = getopt:val("seed")
 45 | 
 46 | -- Display help
 47 | if getopt:val("help") then
 48 | 	getopt:usage()
 49 | 	return
 50 | end
 51 | 
 52 | -- Check arguments
 53 | if args.limit <= 0 then
 54 | 	log:fatal("limit must be greater than 0")
 55 | elseif args.limit > 1 then
 56 | 	log:fatal("limit can't be greater than 1")
 57 | end
 58 | math.randomseed(args.seed)
 59 | 
 60 | -- Set up input
 61 | if args.read ~= "" then
 62 | 	if input:open_offline(args.read) ~= 0 then
 63 | 		log:fatal("failed to open input PCAP " .. args.read)
 64 | 	end
 65 | 	log:notice("using input PCAP " .. args.read)
 66 | else
 67 | 	getopt:usage()
 68 | 	log:fatal("input must be specified, use -r")
 69 | end
 70 | layer:producer(input)
 71 | local produce, pctx = layer:produce()
 72 | 
 73 | -- Set up output
 74 | if args.write ~= "" then
 75 | 	if output:open(args.write, LINKTYPE, SNAPLEN) ~= 0 then
 76 | 		log:fatal("failed to open chunk file " .. args.write)
 77 | 	else
 78 | 		log:notice("writing output PCAP: " .. args.write)
 79 | 	end
 80 | else
 81 | 	getopt:usage()
 82 | 	log:fatal("output must be specified, use -w")
 83 | end
 84 | local write, writectx = output:receive()
 85 | 
 86 | local clients = {}
 87 | local n_present = 0
 88 | local n_packets = 0
 89 | 
 90 | local obj, obj_pcap_in, obj_ip, obj_pl, src_ip, ip_len, present
 91 | while true do
 92 | 	obj = produce(pctx)
 93 | 	if obj == nil then break end
 94 | 
 95 | 	ip_len = 4
 96 | 	obj_ip = obj:cast_to(object.IP)
 97 | 	if obj_ip == nil then
 98 | 		obj_ip = obj:cast_to(object.IP6)
 99 | 		ip_len = 16
100 | 	end
101 | 
102 | 	obj_pl = obj:cast_to(object.PAYLOAD)
103 | 	obj_pcap_in = obj:cast_to(object.PCAP)
104 | 	if obj_ip ~= nil and obj_pl ~= nil and obj_pcap_in ~= nil then
105 | 		src_ip = ffi.string(obj_ip.src, ip_len)
106 | 		present = clients[src_ip]
107 | 		if present == nil then
108 | 			present = math.random() < args.limit
109 | 			if present then
110 | 				n_present = n_present + 1
111 | 			end
112 | 			clients[src_ip] = present
113 | 		end
114 | 
115 | 		if present then
116 | 			write(writectx, obj)
117 | 			n_packets = n_packets + 1
118 | 			if n_packets % 10000 == 0 then
119 | 				check_output()
120 | 			end
121 | 		end
122 | 	end
123 | end
124 | 
125 | check_output()
126 | log:info(string.format("    number of clients: %d", n_present))
127 | 


--------------------------------------------------------------------------------
/pcap/merge-chunks.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | import argparse
 3 | import os
 4 | from pathlib import Path
 5 | import random
 6 | import sys
 7 | 
 8 | 
 9 | def positive_int(val):
10 |     i = int(val)
11 |     if i <= 0:
12 |         raise ValueError("must be greater than 0")
13 |     return i
14 | 
15 | 
16 | def readable_directory(path):
17 |     po = Path(path)
18 |     if not po.is_dir():
19 |         raise ValueError("must be path to directory")
20 |     return po
21 | 
22 | 
23 | def main():
24 |     parser = argparse.ArgumentParser(
25 |         description="Merge subset of PCAP chunks on the fly and write result to stdout"
26 |     )
27 | 
28 |     parser.add_argument(
29 |         "nchunks",
30 |         type=positive_int,
31 |         help="Number of chunks to randomly select from source_dirs and merge",
32 |     )
33 |     parser.add_argument(
34 |         "source_dirs",
35 |         nargs="+",
36 |         type=readable_directory,
37 |         help="Paths to directories with PCAP chunks",
38 |     )
39 |     parser.add_argument(
40 |         "--seed",
41 |         default=0,
42 |         type=int,
43 |         help="Randomization seed (default: 0); use negative value to turn off randomization",
44 |     )
45 |     args = parser.parse_args()
46 | 
47 |     # reproducible pseudorandomness
48 |     random.seed(args.seed, version=2)
49 | 
50 |     pcaps = []
51 |     for dir_path in args.source_dirs:
52 |         pcaps.extend(
53 |             str(path)
54 |             for path in dir_path.glob("**/*")
55 |             if path.is_file() or path.is_fifo()
56 |         )
57 | 
58 |     if args.nchunks > len(pcaps):
59 |         sys.exit(f"{args.nchunks} chunks requested but only {len(pcaps)} available")
60 | 
61 |     pcaps.sort()
62 |     if args.seed >= 0:
63 |         random.shuffle(pcaps)
64 |     mergecap_args = ["mergecap", "-F", "pcap", "-w", "-"]
65 |     mergecap_args.extend(pcaps[: args.nchunks])
66 | 
67 |     sys.stderr.write(f"merging {args.nchunks} chunks into PCAP stream on stdout\n")
68 |     sys.stderr.write(f"executing merge command: {mergecap_args}\n")
69 |     sys.stderr.flush()
70 | 
71 |     os.execvp("mergecap", mergecap_args)
72 | 
73 | 
74 | if __name__ == "__main__":
75 |     main()
76 | 


--------------------------------------------------------------------------------
/pcap/split-clients.lua:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env dnsjit
  2 | 
  3 | -- split-clients.lua: separate clients (IPs) into multiple output files
  4 | --
  5 | -- Every unique IP (client) will be assigned to a one output file.
  6 | -- All of client's packets remain intact and go into a single file.
  7 | 
  8 | local ffi = require("ffi")
  9 | local input = require("dnsjit.input.pcap").new()
 10 | local layer = require("dnsjit.filter.layer").new()
 11 | local object = require("dnsjit.core.objects")
 12 | local log = require("dnsjit.core.log").new("split-clients.lua")
 13 | local getopt = require("dnsjit.lib.getopt").new({
 14 | 	{ "r", "read", "", "input file to read", "?" },
 15 | 	{ "O", "outdir", "", "directory for client chunks (must exist)", "?" },
 16 | 	{ "n", "noutputs", 0, "number of output files", "?" },
 17 | 	{ nil, "seed", 0, "seed for RNG", "?" },
 18 | })
 19 | 
 20 | local SNAPLEN = 66000
 21 | local LINKTYPE = 12  -- DLT_RAW in Linux, see https://github.com/the-tcpdump-group/libpcap/blob/master/pcap/dlt.h
 22 | 
 23 | log:enable("all")
 24 | 
 25 | --- Check if a file or directory exists in this path
 26 | local function exists(file)
 27 |    local ok, err, code = os.rename(file, file)
 28 |    if not ok then
 29 |       if code == 13 then
 30 |          -- Permission denied, but it exists
 31 |          return true
 32 |       end
 33 |    end
 34 |    return ok, err
 35 | end
 36 | 
 37 | -- Error out if write failed
 38 | local function check_output(output, filename)
 39 | 	if output:have_errors() then
 40 | 		log:fatal("error writting to file %s", filename)
 41 | 	end
 42 | end
 43 | 
 44 | -- Parse arguments
 45 | local args = {}
 46 | getopt:parse()
 47 | args.read = getopt:val("r")
 48 | args.outdir = getopt:val("O")
 49 | args.noutputs = getopt:val("n")
 50 | args.seed = getopt:val("seed")
 51 | math.randomseed(args.seed)
 52 | 
 53 | -- Display help
 54 | if getopt:val("help") then
 55 | 	getopt:usage()
 56 | 	return
 57 | end
 58 | 
 59 | -- Prepare output directories
 60 | if args.outdir == "" then
 61 | 	getopt:usage()
 62 | 	log:fatal("output directory must be specified, use -O")
 63 | elseif not exists(args.outdir .. "/") then
 64 | 	log:fatal("output directory \"" .. args.outdir .. "\" doesn't exist")
 65 | end
 66 | 
 67 | -- Check arguments
 68 | if args.noutputs <= 1 then
 69 | 	log:fatal("number of output files must be greater than 1")
 70 | end
 71 | 
 72 | -- Set up input
 73 | if args.read ~= "" then
 74 | 	if input:open_offline(args.read) ~= 0 then
 75 | 		log:fatal("failed to open input PCAP " .. args.read)
 76 | 	end
 77 | 	log:notice("using input PCAP " .. args.read)
 78 | else
 79 | 	getopt:usage()
 80 | 	log:fatal("input must be specified, use -r")
 81 | end
 82 | 
 83 | layer:producer(input)
 84 | local produce, pctx = layer:produce()
 85 | 
 86 | local outputs = {}
 87 | local fname_padding = tostring(math.ceil(math.log(args.noutputs, 10)))
 88 | for n = 1, args.noutputs do
 89 | 	local output = require("dnsjit.output.pcap").new()
 90 | 	local out_fname = string.format("%s/%0" .. fname_padding
 91 | 					.. "d.pcap", args.outdir, n)
 92 | 	if output:open(out_fname, LINKTYPE, SNAPLEN) ~= 0 then
 93 | 		log:fatal("failed to open chunk file " .. out_fname)
 94 | 	else
 95 | 		log:notice("created output PCAP: " .. out_fname)
 96 | 	end
 97 | 
 98 | 	outputs[n] = { fn = out_fname, output = output, nclients = 0, npackets = 0 }
 99 | 	outputs[n]['write'], outputs[n]['writectx'] = output:receive()
100 | end
101 | 
102 | local nclients = 0
103 | local client2output = {}
104 | 
105 | local npackets = 0
106 | local ip_len = 16
107 | local obj, obj_ip, output_id, src_ip
108 | while true do
109 | 	obj = produce(pctx)
110 | 	if obj == nil then break end
111 | 	npackets = npackets + 1
112 | 
113 | 	obj_ip = obj:cast_to(object.IP6)
114 | 	assert(obj_ip ~= nil, 'no IPv6 header found, use output from '
115 | 			      .. 'extract-clients.lua as input for this script')
116 | 
117 | 	src_ip = ffi.string(obj_ip.src, ip_len)
118 | 	output_id = client2output[src_ip]
119 | 	if output_id == nil then
120 | 		output_id = math.random(1, args.noutputs)
121 | 		client2output[src_ip] = output_id
122 | 		outputs[output_id]['nclients'] = outputs[output_id]['nclients'] + 1
123 | 		nclients = nclients + 1
124 | 	end
125 | 	local output_tab = outputs[output_id]
126 | 	output_tab.write(output_tab.writectx, obj)
127 | 	output_tab.npackets = output_tab.npackets + 1
128 | 	if output_tab.npackets % 10000 == 0 then
129 | 		check_output(output_tab.output, output_tab.fn)
130 | 	end
131 | end
132 | 
133 | if npackets == 0 then
134 | 	log:fatal("no input packets processed!")
135 | else
136 | 	log:info("processed %0.f input packets", npackets)
137 | end
138 | for _, output in pairs(outputs) do
139 | 	check_output(output['output'], output['fn'])
140 | 	log:info("%s: clients: %0.f packets: %0.f", output['fn'], output['nclients'], output['npackets'])
141 | end
142 | 
143 | -- stats for sanity checks: min, max, and (deviation / average * 100) for clients and packets
144 | local stats = {}
145 | local avgs = {nclients = nclients / args.noutputs, npackets = npackets / args.noutputs}
146 | for _, stat_name in pairs({'nclients', 'npackets'}) do
147 | 	stats[stat_name] = {abs = {min=tonumber('inf'), max=tonumber('-inf')}, err = {}}
148 | 	local abs = stats[stat_name].abs  -- absolute values
149 | 	local err = stats[stat_name].err  -- deviation from per-output average as float
150 | 	for func, _ in pairs(abs) do
151 | 		for _, output in pairs(outputs) do
152 | 			abs[func] = math[func](abs[func], output[stat_name])
153 | 		end
154 | 	end
155 | 	local avg = avgs[stat_name]
156 | 	-- procentual deviations from average (expected value)
157 | 	for func, _ in pairs(abs) do
158 | 		err[func] = (abs[func] - avg) / avg
159 | 	end
160 | end
161 | for stat_name, _ in pairs(stats) do
162 | 	log:notice("deviation from average number of %s in range <%0.1f, %0.1f> %% per "
163 | 		   .. "output file (average %0.f)", stat_name,
164 | 		   stats[stat_name].err.min * 100,
165 | 		   stats[stat_name].err.max * 100,
166 | 		   avgs[stat_name])
167 | end
168 | 


--------------------------------------------------------------------------------
/pylintrc:
--------------------------------------------------------------------------------
 1 | [MESSAGES CONTROL]
 2 | 
 3 | disable=
 4 |     missing-docstring,
 5 |     too-few-public-methods,
 6 |     too-many-arguments,
 7 |     too-many-locals,
 8 |     too-many-return-statements,
 9 |     too-many-branches,
10 |     fixme,
11 |     unused-import,  # checked by flake8
12 |     line-too-long,  # checked by flake8
13 |     invalid-name,
14 |     broad-except,
15 |     global-statement,
16 |     no-else-return,
17 |     duplicate-code,
18 | 
19 | 
20 | [SIMILARITIES]
21 | min-similarity-lines=6
22 | ignore-comments=yes
23 | ignore-docstrings=yes
24 | ignore-imports=no
25 | 
26 | [DESIGN]
27 | max-parents=10
28 | 


--------------------------------------------------------------------------------
/replay/dnssim/.gitignore:
--------------------------------------------------------------------------------
 1 | # Prerequisites
 2 | *.d
 3 | 
 4 | # Object files
 5 | *.o
 6 | *.ko
 7 | *.obj
 8 | *.elf
 9 | 
10 | # Linker output
11 | *.ilk
12 | *.map
13 | *.exp
14 | 
15 | # Precompiled Headers
16 | *.gch
17 | *.pch
18 | 
19 | # Libraries
20 | *.lib
21 | *.a
22 | *.la
23 | *.lo
24 | 
25 | # Shared objects (inc. Windows DLLs)
26 | *.dll
27 | *.so
28 | *.so.*
29 | *.dylib
30 | 
31 | # Executables
32 | *.exe
33 | *.out
34 | *.app
35 | *.i*86
36 | *.x86_64
37 | *.hex
38 | 
39 | # Debug files
40 | *.dSYM/
41 | *.su
42 | *.idb
43 | *.pdb
44 | 
45 | # Kernel Module Compile Results
46 | *.mod*
47 | *.cmd
48 | .tmp_versions/
49 | modules.order
50 | Module.symvers
51 | Mkfile.old
52 | dkms.conf
53 | 
54 | # Automake
55 | Makefile.in
56 | aclocal.m4
57 | ar-lib
58 | autom4te.cache
59 | compile
60 | config.guess
61 | config.sub
62 | configure
63 | depcomp
64 | install-sh
65 | ltmain.sh
66 | m4/libtool.m4
67 | m4/ltoptions.m4
68 | m4/ltsugar.m4
69 | m4/ltversion.m4
70 | m4/lt~obsolete.m4
71 | missing
72 | config.h.in
73 | config.h.in~
74 | test-driver
75 | 
76 | # Configure
77 | Makefile
78 | config.log
79 | config.status
80 | libtool
81 | .libs
82 | .deps
83 | src/config.h
84 | src/stamp-h1
85 | build*
86 | .dirstamp
87 | 
88 | # Project specific files
89 | 


--------------------------------------------------------------------------------
/replay/dnssim/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | dnssim forever
 2 | ==============
 3 | Since dnssim is now a part of Shotgun, all future changes will be logged in
 4 | Shotgun's `NEWS` file.
 5 | 
 6 | dnssim v20210714
 7 | ================
 8 | - dnssim was moved from dnsjit to shotgun, this implies new
 9 |   lua name: "shotgun.output.dnssim"
10 | 
11 | dnssim v20210129
12 | ================
13 | 
14 | - Added DNS-over-HTTPS support with https2()
15 | - Added IPv4 support
16 | - Abort operation on insufficient file descriptors
17 | - Match QUESTION section of received responses
18 | - Improvements in connection state handling
19 | - Deprecate udp_only() in favor of udp()
20 | - Allow setting logger name with log(name)
21 | - Added check_version() and check_json_version()
22 | 
23 | dnssim v20200723
24 | ================
25 | 
26 | - First released dnssim version with UDP, TCP and DoT support
27 | 


--------------------------------------------------------------------------------
/replay/dnssim/CMakeLists.txt:
--------------------------------------------------------------------------------
  1 | cmake_minimum_required(VERSION 3.5)
  2 | project(dnssim C)
  3 | 
  4 | include(CheckIncludeFile)
  5 | include(CheckIncludeFiles)
  6 | include(CheckFunctionExists)
  7 | 
  8 | option(DNSJIT_PATH "Path to dnsjit prefix" "")
  9 | option(ASAN "Use AddressSanitizer" OFF)
 10 | option(UBSAN "Use UndefinedBehaviorSanitizer" OFF)
 11 | option(USE_SYSTEM_NGTCP2 "Use ngtcp2 from the system" OFF)
 12 | 
 13 | 
 14 | ## Dependencies ################################################################
 15 | 
 16 | # GnuTLS
 17 | find_package(GnuTLS REQUIRED)
 18 | list(APPEND DNSSIM_LIBS GnuTLS::GnuTLS)
 19 | 
 20 | # LibUV
 21 | find_library(LIBUV_LIB libuv.so REQUIRED)
 22 | list(APPEND DNSSIM_LIBS ${LIBUV_LIB})
 23 | check_include_file(uv.h HAS_UV_H)
 24 | if (NOT HAS_UV_H)
 25 | 	message(FATAL_ERROR "LibUV is required")
 26 | endif ()
 27 | 
 28 | # nghttp2
 29 | find_library(NGHTTP2_LIB libnghttp2.so REQUIRED)
 30 | list(APPEND DNSSIM_LIBS ${NGHTTP2_LIB})
 31 | check_include_file(nghttp2/nghttp2.h HAS_NGHTTP2_H)
 32 | if (NOT HAS_NGHTTP2_H)
 33 | 	message(FATAL_ERROR "nghttp2 is required")
 34 | endif ()
 35 | 
 36 | # ngtcp2
 37 | if (USE_SYSTEM_NGTCP2)
 38 | 	find_library(NGTCP2_LIB libngtcp2.so REQUIRED)
 39 | 	list(APPEND DNSSIM_LIBS ${NGTCP2_LIB})
 40 | 	find_library(NGTCP2_GTLS_LIB libngtcp2_crypto_gnutls.so REQUIRED)
 41 | 	list(APPEND DNSSIM_LIBS ${NGTCP2_GTLS_LIB})
 42 | 	check_include_files("ngtcp2/ngtcp2.h;ngtcp2/ngtcp2_crypto.h;ngtcp2/ngtcp2_crypto_gnutls.h"
 43 | 		HAS_NGTCP2_H)
 44 | 
 45 | 	if (NOT HAS_NGTCP2_H)
 46 | 		message(FATAL_ERROR "ngtcp2 is required (and set to use system)")
 47 | 	endif ()
 48 | else ()
 49 | 	function (add_ngtcp2) # wrapped in a function to ensure scope
 50 | 		list(APPEND CMAKE_C_FLAGS -fPIC)
 51 | 		set(ENABLE_STATIC_LIB ON)
 52 | 		set(ENABLE_GNUTLS ON)
 53 | 		set(ENABLE_QUICTLS OFF)
 54 | 		set(ENABLE_OPENSSL OFF)
 55 | 		add_subdirectory(vendor/ngtcp2 EXCLUDE_FROM_ALL SYSTEM)
 56 | 	endfunction ()
 57 | 	add_ngtcp2()
 58 | 	list(APPEND DNSSIM_LIBS ngtcp2_static)
 59 | 	list(APPEND DNSSIM_LIBS ngtcp2_crypto_gnutls_static)
 60 | endif ()
 61 | 
 62 | # dnsjit
 63 | if (DNSJIT_PATH)
 64 | 	include_directories(${DNSJIT_PATH}/include)
 65 | endif ()
 66 | check_include_file(dnsjit/version.h HAS_DNSJIT_H)
 67 | if (NOT HAS_DNSJIT_H)
 68 | 	message(FATAL_ERROR "dnsjit is required (you may use DNSJIT_PATH to specify its install prefix)")
 69 | endif ()
 70 | 
 71 | # syscalls
 72 | check_function_exists(clock_gettime HAS_CLOCK_GETTIME)
 73 | check_function_exists(clock_nanosleep HAS_CLOCK_NANOSLEEP)
 74 | if (NOT HAS_CLOCK_GETTIME OR NOT HAS_CLOCK_NANOSLEEP)
 75 | 	message(FATAL_ERROR "clock_gettime() and clock_nanosleep() are required")
 76 | endif ()
 77 | 
 78 | 
 79 | ## Sanitizers ##################################################################
 80 | 
 81 | if (ASAN)
 82 | 	list(APPEND DNSSIM_SANITIZERS address)
 83 | endif ()
 84 | if (UBSAN)
 85 | 	list(APPEND DNSSIM_SANITIZERS undefined)
 86 | endif ()
 87 | 
 88 | if (DNSSIM_SANITIZERS)
 89 | 	list(JOIN DNSSIM_SANITIZERS "," DNSSIM_SANITIZERS_JOINED)
 90 | 	list(APPEND CMAKE_C_FLAGS -fsanitize=${DNSSIM_SANITIZERS_JOINED})
 91 | endif ()
 92 | 
 93 | 
 94 | ## DNSSIM ######################################################################
 95 | 
 96 | list(APPEND DNSSIM_OPTS -fno-strict-aliasing -Wall)
 97 | include_directories(src)
 98 | add_library(dnssim SHARED
 99 | 	src/output/dnssim.c
100 | 	src/output/dnssim/common.c
101 | 	src/output/dnssim/connection.c
102 | 	src/output/dnssim/https2.c
103 | 	src/output/dnssim/quic.c
104 | 	src/output/dnssim/tcp.c
105 | 	src/output/dnssim/tls.c
106 | 	src/output/dnssim/udp.c
107 | 	)
108 | target_link_libraries(dnssim PUBLIC ${DNSSIM_LIBS})
109 | set_target_properties(dnssim PROPERTIES
110 | 	PREFIX "")
111 | target_link_options(dnssim PUBLIC ${DNSSIM_OPTS})
112 | target_compile_options(dnssim PUBLIC ${DNSSIM_OPTS})
113 | 
114 | 
115 | ## Install #####################################################################
116 | 
117 | install(TARGETS dnssim
118 | 	DESTINATION lib/lua/5.1/shotgun-output-dnssim/)
119 | install(FILES src/output/dnssim.lua
120 | 	DESTINATION share/lua/5.1/shotgun/output/)
121 | 


--------------------------------------------------------------------------------
/replay/dnssim/README.md:
--------------------------------------------------------------------------------
1 | # shotgun.output.dnssim
2 | 


--------------------------------------------------------------------------------
/replay/dnssim/src/output/dnssim.h:
--------------------------------------------------------------------------------
  1 | /*  Copyright (C) CZ.NIC, z.s.p.o. <knot-resolver@labs.nic.cz>
  2 |  *  SPDX-License-Identifier: GPL-3.0-or-later
  3 |  */
  4 | 
  5 | #include <dnsjit/core/log.h>
  6 | #include <dnsjit/core/receiver.h>
  7 | 
  8 | #ifndef __dnsjit_output_dnssim_h
  9 | #define __dnsjit_output_dnssim_h
 10 | 
 11 | #include <stdbool.h>
 12 | 
 13 | typedef enum output_dnssim_transport {
 14 |     OUTPUT_DNSSIM_TRANSPORT_UDP_ONLY,
 15 |     OUTPUT_DNSSIM_TRANSPORT_UDP,
 16 |     OUTPUT_DNSSIM_TRANSPORT_TCP,
 17 |     OUTPUT_DNSSIM_TRANSPORT_TLS,
 18 |     OUTPUT_DNSSIM_TRANSPORT_HTTPS2,
 19 |     OUTPUT_DNSSIM_TRANSPORT_QUIC,
 20 | } output_dnssim_transport_t;
 21 | 
 22 | typedef enum output_dnssim_h2_method {
 23 |     OUTPUT_DNSSIM_H2_GET,
 24 |     OUTPUT_DNSSIM_H2_POST
 25 | } output_dnssim_h2_method_t;
 26 | 
 27 | typedef struct output_dnssim_stats output_dnssim_stats_t;
 28 | struct output_dnssim_stats {
 29 |     output_dnssim_stats_t* prev;
 30 |     output_dnssim_stats_t* next;
 31 | 
 32 |     uint64_t* latency;
 33 | 
 34 |     uint64_t since_ms;
 35 |     uint64_t until_ms;
 36 | 
 37 |     uint64_t requests;
 38 |     uint64_t ongoing;
 39 |     uint64_t answers;
 40 | 
 41 |     /* Number of connections that are open at the end of the stats interval. */
 42 |     uint64_t conn_active;
 43 | 
 44 |     /* Number of TCP/QUIC connection handshake attempts during the stats interval. */
 45 |     uint64_t conn_handshakes;
 46 | 
 47 |     /* Number of connections that have been resumed with TLS session resumption. */
 48 |     uint64_t conn_resumed;
 49 | 
 50 |     /* Number of QUIC connections that have used 0-RTT transport parameters to
 51 |      * initiate a new connection. */
 52 |     uint64_t conn_quic_0rtt_loaded;
 53 |     uint64_t quic_0rtt_sent;
 54 |     uint64_t quic_0rtt_answered;
 55 | 
 56 |     /* Number of timed out connection handshakes during the stats interval. */
 57 |     uint64_t conn_handshakes_failed;
 58 | 
 59 |     uint64_t rcode_noerror;
 60 |     uint64_t rcode_formerr;
 61 |     uint64_t rcode_servfail;
 62 |     uint64_t rcode_nxdomain;
 63 |     uint64_t rcode_notimp;
 64 |     uint64_t rcode_refused;
 65 |     uint64_t rcode_yxdomain;
 66 |     uint64_t rcode_yxrrset;
 67 |     uint64_t rcode_nxrrset;
 68 |     uint64_t rcode_notauth;
 69 |     uint64_t rcode_notzone;
 70 |     uint64_t rcode_badvers;
 71 |     uint64_t rcode_badkey;
 72 |     uint64_t rcode_badtime;
 73 |     uint64_t rcode_badmode;
 74 |     uint64_t rcode_badname;
 75 |     uint64_t rcode_badalg;
 76 |     uint64_t rcode_badtrunc;
 77 |     uint64_t rcode_badcookie;
 78 |     uint64_t rcode_other;
 79 | };
 80 | 
 81 | typedef struct output_dnssim {
 82 |     core_log_t _log;
 83 | 
 84 |     uint64_t processed;
 85 |     uint64_t discarded;
 86 |     uint64_t ongoing;
 87 | 
 88 |     output_dnssim_stats_t* stats_sum;
 89 |     output_dnssim_stats_t* stats_current;
 90 |     output_dnssim_stats_t* stats_first;
 91 | 
 92 |     size_t zero_rtt_data_initial_capacity;
 93 | 
 94 |     size_t max_clients;
 95 |     bool   free_after_use;
 96 |     bool   zero_rtt;
 97 | 
 98 |     uint64_t timeout_ms;
 99 |     uint64_t idle_timeout_ms;
100 |     uint64_t handshake_timeout_ms;
101 |     uint64_t stats_interval_ms;
102 | } output_dnssim_t;
103 | 
104 | core_log_t* output_dnssim_log();
105 | 
106 | output_dnssim_t* output_dnssim_new(size_t max_clients);
107 | void             output_dnssim_free(output_dnssim_t* self);
108 | 
109 | void output_dnssim_log_name(output_dnssim_t* self, const char* name);
110 | void output_dnssim_set_transport(output_dnssim_t* self, output_dnssim_transport_t tr);
111 | int  output_dnssim_target(output_dnssim_t* self, const char* ip, uint16_t port);
112 | int  output_dnssim_bind(output_dnssim_t* self, const char* ip);
113 | int  output_dnssim_tls_priority(output_dnssim_t* self, const char* priority, bool is_quic);
114 | int  output_dnssim_run_nowait(output_dnssim_t* self);
115 | void output_dnssim_timeout_ms(output_dnssim_t* self, uint64_t timeout_ms);
116 | void output_dnssim_h2_uri_path(output_dnssim_t* self, const char* uri_path);
117 | void output_dnssim_h2_method(output_dnssim_t* self, const char* method);
118 | void output_dnssim_h2_zero_out_msgid(output_dnssim_t* self, bool zero_out_msgid);
119 | void output_dnssim_stats_collect(output_dnssim_t* self, uint64_t interval_ms);
120 | void output_dnssim_stats_finish(output_dnssim_t* self);
121 | 
122 | core_receiver_t output_dnssim_receiver();
123 | 
124 | #endif
125 | 


--------------------------------------------------------------------------------
/replay/dnssim/src/output/dnssim/ll.h:
--------------------------------------------------------------------------------
 1 | /*  Copyright (C) 2019-2021 CZ.NIC, z.s.p.o. <knot-resolver@labs.nic.cz>
 2 |  *  SPDX-License-Identifier: GPL-3.0-or-later
 3 |  */
 4 | 
 5 | #ifndef __dnsjit_output_dnssim_ll_h
 6 | #define __dnsjit_output_dnssim_ll_h
 7 | 
 8 | #include <dnsjit/core/assert.h>
 9 | 
10 | /* Utility macros for linked list structures.
11 |  *
12 |  * - "list" is the pointer to the first node of the linked list
13 |  * - "list" can be NULL if there are no nodes
14 |  * - every node has "next", which points to the next node (can be NULL)
15 |  */
16 | 
17 | /* Append a node to the list.
18 |  *
19 |  * Only a single node can be appended - node->next must be NULL.
20 |  */
21 | #define _ll_append(list, node)                                                    \
22 |     {                                                                             \
23 |         glassert((node)->next == NULL, "node->next must be null when appending"); \
24 |         if ((list) == NULL)                                                       \
25 |             (list) = (node);                                                      \
26 |         else if ((node) != NULL) {                                                \
27 |             typeof(list) _current = (list);                                       \
28 |             while (_current->next != NULL)                                        \
29 |                 _current = _current->next;                                        \
30 |             _current->next = node;                                                \
31 |         }                                                                         \
32 |     }
33 | 
34 | /* Remove a node from the list.
35 |  *
36 |  * In strict mode, the node must be present in the list.
37 |  */
38 | #define _ll_remove_template(list, currname, cond, strict, once, dealloc)                           \
39 |     do {                                                                                           \
40 |         if (strict)                                                                                \
41 |             glassert((list), "list can't be null when removing nodes");                            \
42 |         if ((list) != NULL) {                                                                      \
43 |             bool _removed = false;                                                                 \
44 |             typeof(list)* currname = &(list);                                                      \
45 |             while (*currname) {                                                                    \
46 |                 if ((cond)) {                                                                      \
47 |                     typeof(list) _c = *currname;                                                   \
48 |                     (*currname) = _c->next;                                                        \
49 |                     _c->next = NULL;                                                               \
50 |                     _removed = true;                                                               \
51 |                     if ((dealloc))                                                                 \
52 |                         free(_c);                                                                  \
53 |                     if ((once))                                                                    \
54 |                         break;                                                                     \
55 |                 } else {                                                                           \
56 |                     currname = &(*currname)->next;                                                 \
57 |                 }                                                                                  \
58 |             }                                                                                      \
59 |             if (!_removed && (strict))                                                             \
60 |                 glfatal("list doesn't contain the node to be removed");                            \
61 |         }                                                                                          \
62 |     } while (0)
63 | 
64 | #define _ll_remove_node_template(list, node, strict) \
65 |         _ll_remove_template((list), curr, *curr == (node), strict, true, false)
66 | 
67 | /* Remove the specified node from the list. */
68 | #define _ll_remove(list, node) \
69 |         _ll_remove_node_template((list), (node), true)
70 | 
71 | /* Remove the specified node from the list if it's present. */
72 | #define _ll_try_remove(list, node) \
73 |         _ll_remove_node_template((list), (node), false)
74 | 
75 | /* Remove all nodes for which `cond` is `true`. Here, `currname` is the name of
76 |  * the pointer to the node currently checked by `cond`. I.e. in the first case,
77 |  * `currname` will be `&list`, then `&list->next`, then `&list->next->next` etc.
78 |  *
79 |  * For `currname = c`, `cond` may be e.g. `(*c)->qry == qry`. */
80 | #define _ll_remove_cond(list, currname, cond, dealloc) \
81 |         _ll_remove_template((list), currname, (cond), false, false, (dealloc))
82 | 
83 | #endif
84 | 


--------------------------------------------------------------------------------
/replay/dnssim/src/output/dnssim/tcp.c:
--------------------------------------------------------------------------------
  1 | /*  Copyright (C) 2019-2021 CZ.NIC, z.s.p.o. <knot-resolver@labs.nic.cz>
  2 |  *  SPDX-License-Identifier: GPL-3.0-or-later
  3 |  */
  4 | 
  5 | #include "output/dnssim.h"
  6 | #include "output/dnssim/internal.h"
  7 | #include "output/dnssim/ll.h"
  8 | 
  9 | #include <string.h>
 10 | 
 11 | static core_log_t _log = LOG_T_INIT("output.dnssim");
 12 | 
 13 | static void _on_tcp_closed(uv_handle_t* handle)
 14 | {
 15 |     _output_dnssim_connection_t* conn = (_output_dnssim_connection_t*)handle->data;
 16 |     mlassert(conn, "conn is nil");
 17 |     mlassert(conn->transport_type == _OUTPUT_DNSSIM_CONN_TRANSPORT_TCP, "conn must have tcp transport type");
 18 |     conn->state = _OUTPUT_DNSSIM_CONN_CLOSED;
 19 | 
 20 |     /* Orphan any queries that are still unresolved. */
 21 |     _output_dnssim_conn_move_queries_to_pending((_output_dnssim_query_stream_t**)&conn->queued);
 22 |     _output_dnssim_conn_move_queries_to_pending((_output_dnssim_query_stream_t**)&conn->sent);
 23 | 
 24 |     /* TODO Improve client re-connect behavior in case the connection fails to
 25 |      * establish. Currently, queries are orphaned and attempted to be re-sent
 26 |      * along with the next query that triggers a new connection.
 27 |      *
 28 |      * Attempting to establish new connection immediately leads to performance
 29 |      * issues if the number of these attempts doesn't have upper limit. */
 30 |     ///* Ensure orhpaned queries are re-sent over a different connection. */
 31 |     //if (_output_dnssim_handle_pending_queries(conn->client) != 0)
 32 |     //    mlinfo("tcp: orphaned queries failed to be re-sent");
 33 | 
 34 |     mlassert(conn->transport.tcp, "conn must have tcp handle when closing it");
 35 |     free(conn->transport.tcp);
 36 |     conn->transport.tcp = NULL;
 37 |     _output_dnssim_conn_maybe_free(conn);
 38 | }
 39 | 
 40 | static void _on_tcp_query_written(uv_write_t* wr_req, int status)
 41 | {
 42 |     _output_dnssim_query_stream_t* qry = (_output_dnssim_query_stream_t*)wr_req->data;
 43 |     mlassert(qry, "qry/wr_req->data is nil");
 44 |     mlassert(qry->conn, "query must be associated with connection");
 45 |     _output_dnssim_connection_t* conn = qry->conn;
 46 | 
 47 |     free(((_output_dnssim_query_stream_t*)qry)->bufs[0].base);
 48 | 
 49 |     if (qry->qry.state == _OUTPUT_DNSSIM_QUERY_PENDING_CLOSE) {
 50 |         qry->qry.state                = status < 0 ? _OUTPUT_DNSSIM_QUERY_WRITE_FAILED : _OUTPUT_DNSSIM_QUERY_SENT;
 51 |         _output_dnssim_request_t* req = qry->qry.req;
 52 |         _output_dnssim_close_query_tcp(qry);
 53 |         _output_dnssim_maybe_free_request(req);
 54 |         qry = NULL;
 55 |     }
 56 | 
 57 |     if (status < 0) {
 58 |         if (status != UV_ECANCELED)
 59 |             mlinfo("tcp write failed: %s", uv_strerror(status));
 60 |         if (qry != NULL)
 61 |             qry->qry.state = _OUTPUT_DNSSIM_QUERY_WRITE_FAILED;
 62 |         _output_dnssim_conn_close(conn);
 63 |         return;
 64 |     }
 65 | 
 66 |     if (qry == NULL)
 67 |         return;
 68 | 
 69 |     /* Mark query as sent and assign it to connection. */
 70 |     mlassert(qry->qry.state == _OUTPUT_DNSSIM_QUERY_PENDING_WRITE_CB, "invalid query state");
 71 |     qry->qry.state = _OUTPUT_DNSSIM_QUERY_SENT;
 72 |     if (qry->conn->state == _OUTPUT_DNSSIM_CONN_ACTIVE) {
 73 |         mlassert(qry->conn->queued, "conn has no queued queries");
 74 |         _ll_remove(qry->conn->queued, &qry->qry);
 75 |         _ll_append(qry->conn->sent, &qry->qry);
 76 |     }
 77 | }
 78 | 
 79 | void _output_dnssim_tcp_write_query(_output_dnssim_connection_t* conn, _output_dnssim_query_stream_t* qry)
 80 | {
 81 |     mlassert(qry, "qry can't be null");
 82 |     mlassert(qry->qry.state == _OUTPUT_DNSSIM_QUERY_PENDING_WRITE, "qry must be pending write");
 83 |     mlassert(qry->qry.req, "req can't be null");
 84 |     mlassert(qry->qry.req->dns_q, "dns_q can't be null");
 85 |     mlassert(qry->qry.req->dns_q->obj_prev, "payload can't be null");
 86 |     mlassert(conn, "conn can't be null");
 87 |     mlassert(conn->transport_type == _OUTPUT_DNSSIM_CONN_TRANSPORT_TCP, "conn transport type must be tcp");
 88 |     mlassert(conn->state == _OUTPUT_DNSSIM_CONN_ACTIVE, "connection state != ACTIVE");
 89 |     mlassert(conn->client, "conn must be associated with client");
 90 |     mlassert(conn->client->pending, "conn has no pending queries");
 91 | 
 92 |     mldebug("tcp write dnsmsg id: %04x", qry->qry.req->dns_q->id);
 93 | 
 94 |     core_object_payload_t* payload = (core_object_payload_t*)qry->qry.req->dns_q->obj_prev;
 95 |     uint16_t*              len;
 96 |     mlfatal_oom(len = malloc(sizeof(uint16_t)));
 97 |     *len         = htons(payload->len);
 98 |     qry->bufs[0] = uv_buf_init((char*)len, 2);
 99 |     qry->bufs[1] = uv_buf_init((char*)payload->payload, payload->len);
100 | 
101 |     qry->conn = conn;
102 |     _ll_remove(conn->client->pending, &qry->qry);
103 |     _ll_append(conn->queued, &qry->qry);
104 | 
105 |     /* Stop idle timer, since there are queries to answer now. */
106 |     if (conn->idle_timer != NULL) {
107 |         conn->is_idle = false;
108 |         uv_timer_stop(conn->idle_timer);
109 |     }
110 | 
111 |     qry->write_req.data = (void*)qry;
112 |     uv_write(&qry->write_req, (uv_stream_t*)conn->transport.tcp, qry->bufs, 2, _on_tcp_query_written);
113 |     qry->qry.state = _OUTPUT_DNSSIM_QUERY_PENDING_WRITE_CB;
114 | }
115 | 
116 | static void _on_tcp_read(uv_stream_t* handle, ssize_t nread, const uv_buf_t* buf)
117 | {
118 |     _output_dnssim_connection_t* conn = (_output_dnssim_connection_t*)handle->data;
119 |     output_dnssim_t*             self = conn->client->dnssim;
120 | 
121 |     if (nread > 0) {
122 |         mldebug("tcp nread: %d", nread);
123 |         switch (_self->transport) {
124 |         case OUTPUT_DNSSIM_TRANSPORT_TCP:
125 |             _output_dnssim_read_dns_stream(conn, nread, buf->base, -1);
126 |             break;
127 |         case OUTPUT_DNSSIM_TRANSPORT_TLS:
128 |         case OUTPUT_DNSSIM_TRANSPORT_HTTPS2:
129 | #if DNSSIM_HAS_GNUTLS
130 |             mlassert(conn->tls, "con must have tls ctx");
131 |             conn->tls->buf     = (uint8_t*)buf->base;
132 |             conn->tls->buf_pos = 0;
133 |             conn->tls->buf_len = nread;
134 |             _output_dnssim_tls_process_input_data(conn);
135 | #else
136 |             mlfatal(DNSSIM_MIN_GNUTLS_ERRORMSG);
137 | #endif
138 |             break;
139 |         default:
140 |             mlfatal("unsupported transport");
141 |             break;
142 |         }
143 |     } else if (nread < 0) {
144 |         if (nread != UV_EOF) {
145 |             mlinfo("tcp conn unexpected close: %s", uv_strerror(nread));
146 |             _output_dnssim_conn_close(conn);
147 |         } else {
148 |             _output_dnssim_conn_bye(conn);
149 |         }
150 |     }
151 | 
152 |     if (buf->base != NULL)
153 |         free(buf->base);
154 | }
155 | 
156 | static void _on_tcp_connected(uv_connect_t* conn_req, int status)
157 | {
158 |     _output_dnssim_connection_t* conn = (_output_dnssim_connection_t*)conn_req->handle->data;
159 |     mlassert(conn, "conn is nil");
160 |     mlassert(conn->transport_type == _OUTPUT_DNSSIM_CONN_TRANSPORT_TCP, "conn transport type must be tcp");
161 | 
162 |     free(conn_req);
163 | 
164 |     if (status < 0) {
165 |         mldebug("tcp connect failed: %s", uv_strerror(status));
166 |         _output_dnssim_conn_close(conn);
167 |         return;
168 |     }
169 | 
170 |     mlassert(conn->state == _OUTPUT_DNSSIM_CONN_TRANSPORT_HANDSHAKE, "connection state != TCP_HANDSHAKE");
171 |     int ret = uv_read_start((uv_stream_t*)conn->transport.tcp, _output_dnssim_on_uv_alloc, _on_tcp_read);
172 |     if (ret < 0) {
173 |         mlwarning("tcp uv_read_start() failed: %s", uv_strerror(ret));
174 |         _output_dnssim_conn_close(conn);
175 |         return;
176 |     }
177 | 
178 |     mldebug("tcp connected");
179 |     mlassert(conn->client, "conn must be associated with a client");
180 |     mlassert(conn->client->dnssim, "client must be associated with dnssim");
181 |     output_dnssim_t* self = conn->client->dnssim;
182 |     switch (_self->transport) {
183 |     case OUTPUT_DNSSIM_TRANSPORT_TCP:
184 |         _output_dnssim_conn_activate(conn);
185 |         break;
186 |     case OUTPUT_DNSSIM_TRANSPORT_TLS:
187 |     case OUTPUT_DNSSIM_TRANSPORT_HTTPS2:
188 | #if DNSSIM_HAS_GNUTLS
189 |         mldebug("init tls handshake");
190 |         _output_dnssim_tls_process_input_data(conn); /* Initiate TLS handshake. */
191 | #else
192 |         mlfatal(DNSSIM_MIN_GNUTLS_ERRORMSG);
193 | #endif
194 |         break;
195 |     default:
196 |         lfatal("unsupported transport protocol");
197 |         break;
198 |     }
199 | }
200 | 
201 | static void _on_connection_timeout(uv_timer_t* handle)
202 | {
203 |     _output_dnssim_connection_t* conn = (_output_dnssim_connection_t*)handle->data;
204 |     _output_dnssim_conn_close(conn);
205 | }
206 | 
207 | int _output_dnssim_tcp_connect(output_dnssim_t* self, _output_dnssim_connection_t* conn)
208 | {
209 |     mlassert_self();
210 |     lassert(conn, "connection can't be null");
211 |     lassert(conn->transport.tcp == NULL, "connection already has a handle");
212 |     lassert(conn->handshake_timer == NULL, "connection already has a handshake timer");
213 |     lassert(conn->idle_timer == NULL, "connection already has idle timer");
214 |     lassert(conn->state == _OUTPUT_DNSSIM_CONN_INITIALIZED, "connection state != INITIALIZED");
215 | 
216 |     conn->transport_type = _OUTPUT_DNSSIM_CONN_TRANSPORT_TCP;
217 |     lfatal_oom(conn->transport.tcp = malloc(sizeof(uv_tcp_t)));
218 |     conn->transport.tcp->data = (void*)conn;
219 |     int ret            = uv_tcp_init(&_self->loop, conn->transport.tcp);
220 |     if (ret < 0) {
221 |         lwarning("failed to init uv_tcp_t");
222 |         goto failure;
223 |     }
224 | 
225 |     ret = _output_dnssim_bind_before_connect(self, (uv_handle_t*)conn->transport.tcp);
226 |     if (ret < 0)
227 |         goto failure;
228 | 
229 |     /* Set connection parameters. */
230 |     ret = uv_tcp_nodelay(conn->transport.tcp, 1);
231 |     if (ret < 0)
232 |         lwarning("tcp: failed to set TCP_NODELAY: %s", uv_strerror(ret));
233 | 
234 |     /* Set connection handshake timeout. */
235 |     lfatal_oom(conn->handshake_timer = malloc(sizeof(uv_timer_t)));
236 |     uv_timer_init(&_self->loop, conn->handshake_timer);
237 |     conn->handshake_timer->data = (void*)conn;
238 |     uv_timer_start(conn->handshake_timer, _on_connection_timeout, self->handshake_timeout_ms, 0);
239 | 
240 |     /* Set idle connection timer. */
241 |     if (self->idle_timeout_ms > 0) {
242 |         lfatal_oom(conn->idle_timer = malloc(sizeof(uv_timer_t)));
243 |         uv_timer_init(&_self->loop, conn->idle_timer);
244 |         conn->idle_timer->data = (void*)conn;
245 | 
246 |         /* Start and stop the timer to set the repeat value without running the timer. */
247 |         uv_timer_start(conn->idle_timer, _on_connection_timeout, self->idle_timeout_ms, self->idle_timeout_ms);
248 |         uv_timer_stop(conn->idle_timer);
249 |     }
250 | 
251 |     mldebug("tcp connecting");
252 |     uv_connect_t* conn_req;
253 |     lfatal_oom(conn_req = malloc(sizeof(uv_connect_t)));
254 |     ret = uv_tcp_connect(conn_req, conn->transport.tcp, (struct sockaddr*)&_self->target, _on_tcp_connected);
255 |     if (ret < 0)
256 |         goto failure;
257 | 
258 |     conn->stats->conn_handshakes++;
259 |     self->stats_sum->conn_handshakes++;
260 |     conn->state = _OUTPUT_DNSSIM_CONN_TRANSPORT_HANDSHAKE;
261 |     return 0;
262 | failure:
263 |     _output_dnssim_conn_close(conn);
264 |     return ret;
265 | }
266 | 
267 | void _output_dnssim_tcp_close(_output_dnssim_connection_t* conn)
268 | {
269 |     mlassert(conn, "conn can't be nil");
270 |     mlassert(conn->transport_type == _OUTPUT_DNSSIM_CONN_TRANSPORT_TCP, "conn transport type must be tcp");
271 | 
272 |     if (conn->transport.tcp != NULL) {
273 |         uv_read_stop((uv_stream_t*)conn->transport.tcp);
274 |         uv_close((uv_handle_t*)conn->transport.tcp, _on_tcp_closed);
275 |     }
276 | }
277 | 
278 | int _output_dnssim_create_query_tcp(output_dnssim_t* self, _output_dnssim_request_t* req)
279 | {
280 |     mlassert_self();
281 |     lassert(req, "req is nil");
282 |     lassert(req->client, "request must have a client associated with it");
283 | 
284 |     _output_dnssim_query_stream_t* qry;
285 | 
286 |     lfatal_oom(qry = calloc(1, sizeof(_output_dnssim_query_stream_t)));
287 | 
288 |     qry->qry.transport = OUTPUT_DNSSIM_TRANSPORT_TCP;
289 |     qry->qry.req       = req;
290 |     qry->qry.state     = _OUTPUT_DNSSIM_QUERY_PENDING_WRITE;
291 |     req->qry           = &qry->qry; // TODO change when adding support for multiple Qs for req
292 |     _ll_append(req->client->pending, &qry->qry);
293 | 
294 |     return _output_dnssim_handle_pending_queries(req->client);
295 | }
296 | 
297 | void _output_dnssim_close_query_tcp(_output_dnssim_query_stream_t* qry)
298 | {
299 |     mlassert(qry, "qry can't be null");
300 |     mlassert(qry->qry.req, "query must be part of a request");
301 |     _output_dnssim_request_t* req = qry->qry.req;
302 |     mlassert(req->client, "request must belong to a client");
303 | 
304 |     if ((qry->qry.state == _OUTPUT_DNSSIM_QUERY_PENDING_WRITE_CB || qry->qry.state == _OUTPUT_DNSSIM_QUERY_PENDING_CLOSE)) {
305 |         /* Query can't be freed until uv callback is called. */
306 |         qry->qry.state = _OUTPUT_DNSSIM_QUERY_PENDING_CLOSE;
307 |         return;
308 |     }
309 | 
310 |     _ll_try_remove(req->client->pending, &qry->qry);
311 |     if (qry->conn) {
312 |         _output_dnssim_connection_t* conn = qry->conn;
313 |         _ll_try_remove(conn->queued, &qry->qry); /* edge-case of cancelled queries */
314 |         _ll_try_remove(conn->sent, &qry->qry);
315 |         qry->conn = NULL;
316 |         _output_dnssim_conn_idle(conn);
317 |     }
318 | 
319 |     _ll_remove(req->qry, &qry->qry);
320 |     free(qry);
321 | }
322 | 


--------------------------------------------------------------------------------
/replay/dnssim/src/output/dnssim/udp.c:
--------------------------------------------------------------------------------
  1 | /*  Copyright (C) 2019-2021 CZ.NIC, z.s.p.o. <knot-resolver@labs.nic.cz>
  2 |  *  SPDX-License-Identifier: GPL-3.0-or-later
  3 |  */
  4 | 
  5 | #include "output/dnssim.h"
  6 | #include "output/dnssim/internal.h"
  7 | #include "output/dnssim/ll.h"
  8 | 
  9 | static core_log_t _log = LOG_T_INIT("output.dnssim");
 10 | 
 11 | static int _process_udp_response(uv_udp_t* handle, ssize_t nread, const uv_buf_t* buf)
 12 | {
 13 |     _output_dnssim_query_udp_t* qry = (_output_dnssim_query_udp_t*)handle->data;
 14 |     _output_dnssim_request_t*   req;
 15 |     core_object_payload_t       payload = CORE_OBJECT_PAYLOAD_INIT(NULL);
 16 |     core_object_dns_t           dns_a   = CORE_OBJECT_DNS_INIT(&payload);
 17 |     mlassert(qry, "qry is nil");
 18 |     mlassert(qry->qry.req, "query must be part of a request");
 19 |     req = qry->qry.req;
 20 | 
 21 |     payload.payload = (uint8_t*)buf->base;
 22 |     payload.len     = nread;
 23 | 
 24 |     dns_a.obj_prev = (core_object_t*)&payload;
 25 |     int ret        = core_object_dns_parse_header(&dns_a);
 26 |     if (ret != 0) {
 27 |         mldebug("udp response malformed");
 28 |         return _ERR_MALFORMED;
 29 |     }
 30 |     if (dns_a.id != req->dns_q->id) {
 31 |         mldebug("udp response msgid mismatch %x(q) != %x(a)", req->dns_q->id, dns_a.id);
 32 |         return _ERR_MSGID;
 33 |     }
 34 |     if (dns_a.tc == 1) {
 35 |         mldebug("udp response has TC=1");
 36 |         return _ERR_TC;
 37 |     }
 38 |     ret = _output_dnssim_answers_request(req, &dns_a);
 39 |     if (ret != 0) {
 40 |         mlwarning("udp reponse question mismatch");
 41 |         return _ERR_QUESTION;
 42 |     }
 43 | 
 44 |     _output_dnssim_request_answered(req, &dns_a, false);
 45 |     return 0;
 46 | }
 47 | 
 48 | static void _on_udp_query_recv(uv_udp_t* handle, ssize_t nread, const uv_buf_t* buf, const struct sockaddr* addr, unsigned flags)
 49 | {
 50 |     if (nread > 0) {
 51 |         mldebug("udp recv: %d", nread);
 52 | 
 53 |         // TODO handle TC=1
 54 |         _process_udp_response(handle, nread, buf);
 55 |     }
 56 | 
 57 |     if (buf->base != NULL) {
 58 |         free(buf->base);
 59 |     }
 60 | }
 61 | 
 62 | static void _on_query_udp_closed(uv_handle_t* handle)
 63 | {
 64 |     _output_dnssim_query_udp_t* qry = (_output_dnssim_query_udp_t*)handle->data;
 65 |     _output_dnssim_request_t*   req;
 66 |     mlassert(qry, "qry is nil");
 67 |     mlassert(qry->qry.req, "query must be part of a request");
 68 |     req = qry->qry.req;
 69 | 
 70 |     free(qry->handle);
 71 | 
 72 |     _ll_remove(req->qry, &qry->qry);
 73 |     free(qry);
 74 | 
 75 |     if (req->qry == NULL)
 76 |         _output_dnssim_maybe_free_request(req);
 77 | }
 78 | 
 79 | void _output_dnssim_close_query_udp(_output_dnssim_query_udp_t* qry)
 80 | {
 81 |     int ret;
 82 |     mlassert(qry, "qry is nil");
 83 | 
 84 |     ret = uv_udp_recv_stop(qry->handle);
 85 |     if (ret < 0) {
 86 |         mldebug("failed uv_udp_recv_stop(): %s", uv_strerror(ret));
 87 |     }
 88 | 
 89 |     uv_close((uv_handle_t*)qry->handle, _on_query_udp_closed);
 90 | }
 91 | 
 92 | int _output_dnssim_create_query_udp(output_dnssim_t* self, _output_dnssim_request_t* req)
 93 | {
 94 |     int                         ret;
 95 |     _output_dnssim_query_udp_t* qry;
 96 |     core_object_payload_t*      payload;
 97 |     mlassert_self();
 98 |     lassert(req, "req is nil");
 99 |     payload = (core_object_payload_t*)req->dns_q->obj_prev;
100 | 
101 |     lfatal_oom(qry = calloc(1, sizeof(_output_dnssim_query_udp_t)));
102 |     lfatal_oom(qry->handle = malloc(sizeof(uv_udp_t)));
103 | 
104 |     qry->qry.transport = OUTPUT_DNSSIM_TRANSPORT_UDP;
105 |     qry->qry.req       = req;
106 |     qry->buf           = uv_buf_init((char*)payload->payload, payload->len);
107 |     qry->handle->data  = (void*)qry;
108 |     ret                = uv_udp_init(&_self->loop, qry->handle);
109 |     if (ret < 0) {
110 |         lwarning("failed to init uv_udp_t");
111 |         goto failure;
112 |     }
113 |     _ll_append(req->qry, &qry->qry);
114 | 
115 |     ret = _output_dnssim_bind_before_connect(self, (uv_handle_t*)qry->handle);
116 |     if (ret < 0)
117 |         return ret;
118 | 
119 |     ret = uv_udp_try_send(qry->handle, &qry->buf, 1, (struct sockaddr*)&_self->target);
120 |     if (ret < 0) {
121 |         lwarning("failed to send udp packet: %s", uv_strerror(ret));
122 |         return ret;
123 |     }
124 | 
125 |     // listen for reply
126 |     ret = uv_udp_recv_start(qry->handle, _output_dnssim_on_uv_alloc, _on_udp_query_recv);
127 |     if (ret < 0) {
128 |         lwarning("failed uv_udp_recv_start(): %s", uv_strerror(ret));
129 |         return ret;
130 |     }
131 | 
132 |     return 0;
133 | failure:
134 |     free(qry->handle);
135 |     free(qry);
136 |     return ret;
137 | }
138 | 


--------------------------------------------------------------------------------
/replay/luaconfig.lua.j2:
--------------------------------------------------------------------------------
 1 | local config = {}
 2 | 
 3 | config.pcap = "{{ pcap }}"
 4 | config.verbosity = {{ verbosity|int }}
 5 | config.drift_s = {{ drift_s|default(1)|int }}
 6 | config.stop_after_s = {{ stop_after_s|default('nil') }}
 7 | 
 8 | config.threads = {}
 9 | {% for thread in threads %}
10 | config.threads[{{ loop.index }}] = {
11 | 	name = "{{ thread.name }}",
12 | 	target_ip = "{{ thread.target_ip }}",
13 | 	target_port = {{ thread.target_port|int }},
14 | 	weight = {{ thread.weight }},
15 | 	timeout_s = {{ thread.timeout_s|default(2)|int }},
16 | 	handshake_timeout_s = {{ thread.handshake_timeout_s|default(5)|int }},
17 | 	idle_timeout_s = {{ thread.idle_timeout_s|default(10)|int }},
18 | 	protocol_func = "{{ thread.protocol_func|default('udp') }}",
19 | 	gnutls_priority = "{{ thread.gnutls_priority|default('dnssim-default') }}",
20 | 	zero_rtt = {{ thread.zero_rtt|default(1)|int }},
21 | 	http_method = "{{ thread.http_method|default('GET') }}",
22 | 	output_file = "{{ thread.output_file }}",
23 | 	channel_size = {{ thread.channel_size|default(2048)|int }},
24 | 	max_clients = {{ thread.max_clients|default(200000)|int }},
25 | 	batch_size = {{ thread.batch_size|default(32)|int }},
26 | 	bind_ips = {
27 | {%- for ip in thread.bind_ips %}
28 | 		"{{ ip|string }}",
29 | {%- endfor %}
30 | 	}
31 | }
32 | {% endfor %}
33 | 
34 | return config
35 | 
36 | -- vim: syntax=lua
37 | 


--------------------------------------------------------------------------------
/replay/shotgun.lua:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env dnsjit
  2 | 
  3 | local object = require("dnsjit.core.objects")
  4 | local log = require("dnsjit.core.log")
  5 | local dnssim = require("shotgun.output.dnssim")
  6 | 
  7 | local DNSSIM_REQ_VERSION = 20240219
  8 | local has_check_version, version = pcall(dnssim.check_version, DNSSIM_REQ_VERSION)
  9 | if not has_check_version or version == nil then
 10 | 	log.fatal(string.format(
 11 | 		"Newer dnssim is required. Minimum version of dnssim component is v%d.",
 12 | 		DNSSIM_REQ_VERSION))
 13 | end
 14 | 
 15 | local getopt = require("dnsjit.lib.getopt").new({})
 16 | 
 17 | local confpath = unpack(getopt:parse())
 18 | if confpath == nil then
 19 | 	log.fatal("lua config file must be specified as first argument")
 20 | end
 21 | local ok, config = pcall(dofile, confpath)
 22 | if not ok then
 23 | 	log.fatal("failed to load lua config file \""..config.."\"")
 24 | end
 25 | 
 26 | if config.verbosity > 0 then
 27 | 	log.enable("warning")
 28 | end
 29 | if config.verbosity > 1 then
 30 | 	log.enable("notice")
 31 | end
 32 | if config.verbosity > 2 then
 33 | 	log.display_file_line(true)
 34 | end
 35 | if config.verbosity > 3 then
 36 | 	log.enable("info")
 37 | end
 38 | if config.verbosity > 4 then
 39 | 	log.enable("debug")
 40 | end
 41 | 
 42 | local function send_thread_main(thr)
 43 | 	local channel = thr:pop()
 44 | 	local running
 45 | 
 46 | 	local max_clients = thr:pop()
 47 | 	local name = thr:pop()
 48 | 	local target_ip = thr:pop()
 49 | 	local target_port = thr:pop()
 50 | 	local timeout_s = thr:pop()
 51 | 	local handshake_timeout_s = thr:pop()
 52 | 	local idle_timeout_s = thr:pop()
 53 | 	local protocol_func = thr:pop()
 54 | 	local gnutls_priority = thr:pop()
 55 | 	local zero_rtt = thr:pop()
 56 | 	local http_method = thr:pop()
 57 | 	local output_file = thr:pop()
 58 | 	local batch_size = thr:pop()
 59 | 	local nbind = thr:pop()
 60 | 
 61 | 	local output = require("shotgun.output.dnssim").new(max_clients)
 62 | 	-- luacheck: ignore log
 63 | 	local log = output:log(name)
 64 | 
 65 | 	output:target(target_ip, target_port)
 66 | 	output:timeout(timeout_s)
 67 | 	output:handshake_timeout(handshake_timeout_s)
 68 | 	output:idle_timeout(idle_timeout_s)
 69 | 	output:zero_rtt(zero_rtt)
 70 | 
 71 | 	if protocol_func == "udp" then
 72 | 		output:udp()
 73 | 	elseif protocol_func == "tcp" then
 74 | 		output:tcp()
 75 | 	elseif protocol_func == "tls" then
 76 | 		output:tls(gnutls_priority)
 77 | 	elseif protocol_func == "https2" then
 78 | 		output:https2({ method = http_method }, gnutls_priority)
 79 | 	elseif protocol_func == "quic" then
 80 | 		output:quic(gnutls_priority)
 81 | 	else
 82 | 		log:fatal("unknown protocol_func: " .. protocol_func)
 83 | 	end
 84 | 
 85 | 	output:stats_collect(1)
 86 | 	output:free_after_use(true)
 87 | 
 88 | 	for _ = 1, nbind do
 89 | 		output:bind(thr:pop())
 90 | 	end
 91 | 
 92 | 	local recv, rctx = output:receive()
 93 | 	local i_full = 0
 94 | 	while true do
 95 | 		local obj
 96 | 		local i = 0
 97 | 
 98 | 		if channel:full() then
 99 | 			i_full = i_full + 1
100 | 			if i_full == 1 then
101 | 				log:debug("buffer capacity reached")
102 | 			elseif i_full == 4 then
103 | 				log:info("buffer capacity reached")
104 | 			elseif i_full == 16 then
105 | 				log:warning("buffer capacity exceeded, threads may become blocked")
106 | 			elseif i_full % 64 == 0 then
107 | 				log:critical("buffer capacity exceeded, threads are blocked")
108 | 			end
109 | 		else
110 | 			if i_full >= 16 then
111 | 				log:notice("buffer capacity restored")
112 | 			end
113 | 			i_full = 0
114 | 		end
115 | 
116 | 		-- read available data from channel
117 | 		while i < batch_size do
118 | 			obj = channel:try_get()
119 | 			if obj == nil then break end
120 | 			recv(rctx, obj)
121 | 			i = i + 1
122 | 		end
123 | 
124 | 		-- execute libuv loop
125 | 		running = output:run_nowait()
126 | 
127 | 		-- check if channel is still open
128 | 		if obj == nil and channel.closed == 1 then
129 | 			output:stats_finish()
130 | 			break
131 | 		end
132 | 	end
133 | 
134 | 	-- finish processing outstanding requests
135 | 	while running ~= 0 do
136 | 		running = output:run_nowait()
137 | 	end
138 | 
139 | 	output:export(output_file)
140 | end
141 | 
142 | 
143 | ---- setup input
144 | local delay = require("dnsjit.filter.timing").new()
145 | local layer = require("dnsjit.filter.layer").new()
146 | local ipsplit = require("dnsjit.filter.ipsplit").new()
147 | local copy = require("dnsjit.filter.copy").new()
148 | local input
149 | 
150 | if config.pcap == '-' then
151 | 	input = require("dnsjit.input.fpcap").new()
152 | 	if input:openfp(io.stdin) ~= 0 then
153 | 		log.fatal("failed to open PCAP on stdin")
154 | 	end
155 | else
156 | 	input = require("dnsjit.input.mmpcap").new()
157 | 	if input:open(config.pcap) ~= 0 then
158 | 		log.notice("failed to open PCAP with mmap, fallback to fpcap")
159 | 		input = require("dnsjit.input.fpcap").new()
160 | 		if input:open(config.pcap) ~= 0 then
161 | 			log.fatal("failed to open PCAP with fpcap")
162 | 		end
163 | 	end
164 | end
165 | delay:realtime(config.drift_s)
166 | delay:producer(input)
167 | layer:producer(delay)
168 | ipsplit:overwrite_dst()
169 | ipsplit:random()
170 | 
171 | -- setup threads
172 | local thread = require("dnsjit.core.thread")
173 | local channel = require("dnsjit.core.channel")
174 | local threads = {}
175 | local channels = {}
176 | 
177 | ---- initialize send threads
178 | for i, thrconf in ipairs(config.threads) do
179 | 	channels[i] = channel.new(thrconf.channel_size)
180 | 	ipsplit:receiver(channels[i], thrconf.weight)
181 | 
182 | 	threads[i] = thread.new()
183 | 	threads[i]:start(send_thread_main)
184 | 	threads[i]:push(channels[i])
185 | 	threads[i]:push(thrconf.max_clients)
186 | 	threads[i]:push(thrconf.name)
187 | 	threads[i]:push(thrconf.target_ip)
188 | 	threads[i]:push(thrconf.target_port)
189 | 	threads[i]:push(thrconf.timeout_s)
190 | 	threads[i]:push(thrconf.handshake_timeout_s)
191 | 	threads[i]:push(thrconf.idle_timeout_s)
192 | 	threads[i]:push(thrconf.protocol_func)
193 | 	threads[i]:push(thrconf.gnutls_priority)
194 | 	threads[i]:push(thrconf.zero_rtt)
195 | 	threads[i]:push(thrconf.http_method)
196 | 	threads[i]:push(thrconf.output_file)
197 | 	threads[i]:push(thrconf.batch_size)
198 | 	threads[i]:push(#thrconf.bind_ips)
199 | 	for _, bind_ip in ipairs(thrconf.bind_ips) do
200 | 		threads[i]:push(bind_ip)
201 | 	end
202 | end
203 | 
204 | copy:obj_type(object.PAYLOAD)
205 | copy:obj_type(object.IP6)
206 | copy:receiver(ipsplit)
207 | 
208 | 
209 | -- process PCAP
210 | local prod, pctx = layer:produce()
211 | local recv, rctx = copy:receive()
212 | while true do
213 | 	local obj = prod(pctx)
214 | 	if obj == nil then break end
215 | 	if config.stop_after_s then
216 | 		local obj_pcap_in = obj:cast_to(object.PCAP)
217 | 		if obj_pcap_in.ts.sec >= config.stop_after_s then
218 | 			break
219 | 		end
220 | 	end
221 | 	recv(rctx, obj)
222 | end
223 | log.notice('processed %.0f packets from input PCAP', input:packets())
224 | 
225 | -- teardown
226 | for i, _ in ipairs(config.threads) do
227 | 	channels[i]:close()
228 | end
229 | for i, _ in ipairs(config.threads) do
230 | 	threads[i]:stop()
231 | end
232 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | jinja2
2 | toml
3 | matplotlib
4 | 


--------------------------------------------------------------------------------
/tools/merge-data.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | import argparse
  4 | import json
  5 | import logging
  6 | import os
  7 | import sys
  8 | import traceback
  9 | 
 10 | 
 11 | JSON_VERSION = 20200527
 12 | DEFAULT_FILENAME = "shotgun-all.json"
 13 | 
 14 | 
 15 | class VersionError(RuntimeError):
 16 |     def __init__(self):
 17 |         super().__init__(
 18 |             "Older formats of JSON data aren't supported. "
 19 |             "Use older tooling or re-run the tests with newer shotgun."
 20 |         )
 21 | 
 22 | 
 23 | class MismatchData(RuntimeError):
 24 |     pass
 25 | 
 26 | 
 27 | class MissingData(RuntimeError):
 28 |     def __init__(self, field):
 29 |         super().__init__(f'Field "{field}" is missing in one or more files.')
 30 | 
 31 | 
 32 | class MergeFailed(RuntimeError):
 33 |     def __init__(self, field):
 34 |         super().__init__(f'Failed to merge field "{field}".')
 35 | 
 36 | 
 37 | def first(iterable):
 38 |     assert len(iterable) >= 1
 39 |     return iterable[0]
 40 | 
 41 | 
 42 | def same(iterable):
 43 |     assert len(iterable) >= 1
 44 |     if not all(val == iterable[0] for val in iterable):
 45 |         raise MismatchData
 46 |     return iterable[0]
 47 | 
 48 | 
 49 | def merge_latency(iterable):
 50 |     assert len(iterable) >= 1
 51 |     latency = list(iterable[0])
 52 |     for latency_data in iterable[1:]:
 53 |         if len(latency_data) != len(latency):
 54 |             raise MismatchData
 55 |         for i, _ in enumerate(latency_data):
 56 |             latency[i] += latency_data[i]
 57 |     return latency
 58 | 
 59 | 
 60 | DATA_STRUCTURE_STATS = {
 61 |     "since_ms": min,
 62 |     "until_ms": max,
 63 |     "requests": sum,
 64 |     "ongoing": sum,
 65 |     "answers": sum,
 66 |     "conn_active": sum,
 67 |     "conn_resumed": sum,
 68 |     "conn_handshakes": sum,
 69 |     "conn_quic_0rtt_loaded": sum,
 70 |     "quic_0rtt_sent": sum,
 71 |     "quic_0rtt_answered": sum,
 72 |     "conn_handshakes_failed": sum,
 73 |     "rcode_noerror": sum,
 74 |     "rcode_formerr": sum,
 75 |     "rcode_servfail": sum,
 76 |     "rcode_nxdomain": sum,
 77 |     "rcode_notimp": sum,
 78 |     "rcode_refused": sum,
 79 |     "rcode_yxdomain": sum,
 80 |     "rcode_yxrrset": sum,
 81 |     "rcode_nxrrset": sum,
 82 |     "rcode_notauth": sum,
 83 |     "rcode_notzone": sum,
 84 |     "rcode_badvers": sum,
 85 |     "rcode_badkey": sum,
 86 |     "rcode_badtime": sum,
 87 |     "rcode_badmode": sum,
 88 |     "rcode_badname": sum,
 89 |     "rcode_badalg": sum,
 90 |     "rcode_badtrunc": sum,
 91 |     "rcode_badcookie": sum,
 92 |     "rcode_other": sum,
 93 |     "latency": merge_latency,
 94 | }
 95 | 
 96 | 
 97 | def merge_stats(iterable):
 98 |     return merge_fields(DATA_STRUCTURE_STATS, iterable)
 99 | 
100 | 
101 | def merge_periodic_stats(iterable):
102 |     out = []
103 | 
104 |     for i in range(max(len(stats_periodic) for stats_periodic in iterable)):
105 |         to_merge = []
106 |         for stats_periodic in iterable:
107 |             try:
108 |                 stats = stats_periodic[i]
109 |             except IndexError:
110 |                 continue
111 |             else:
112 |                 to_merge.append(stats)
113 |         out.append(merge_stats(to_merge))
114 | 
115 |     return out
116 | 
117 | 
118 | DATA_STRUCTURE_ROOT = {
119 |     "version": same,
120 |     "merged": lambda x: True,
121 |     "stats_interval_ms": same,
122 |     "timeout_ms": same,
123 |     "discarded": sum,
124 |     "stats_sum": merge_stats,
125 |     "stats_periodic": merge_periodic_stats,
126 | }
127 | 
128 | 
129 | def merge_fields(fields, thread_data):
130 |     out = {}
131 |     for field, merge_func in fields.items():
132 |         try:
133 |             field_data = [data[field] for data in thread_data]
134 |         except KeyError as exc:
135 |             raise MissingData(field) from exc
136 |         try:
137 |             out[field] = merge_func(field_data)
138 |         except Exception as exc:
139 |             raise MergeFailed(field) from exc
140 |     return out
141 | 
142 | 
143 | def merge_data(thread_data):
144 |     assert len(thread_data) >= 1
145 |     try:
146 |         if thread_data[0]["version"] != JSON_VERSION:
147 |             raise VersionError
148 |     except KeyError as exc:
149 |         raise VersionError from exc
150 |     return merge_fields(DATA_STRUCTURE_ROOT, thread_data)
151 | 
152 | 
153 | def main():
154 |     logging.basicConfig(
155 |         format="%(asctime)s %(levelname)8s  %(message)s", level=logging.DEBUG
156 |     )
157 | 
158 |     parser = argparse.ArgumentParser(description="Merge JSON shotgun results")
159 | 
160 |     parser.add_argument("json_file", nargs="+", help="Paths to per-thread JSON results")
161 |     parser.add_argument(
162 |         "-o", "--output", default=DEFAULT_FILENAME, help="Output JSON file"
163 |     )
164 |     args = parser.parse_args()
165 | 
166 |     outpath = args.output
167 |     if outpath == DEFAULT_FILENAME:
168 |         outpath = os.path.join(os.path.dirname(args.json_file[0]), outpath)
169 | 
170 |     try:
171 |         thread_data = []
172 |         for path in args.json_file:
173 |             with open(path, encoding="utf-8") as f:
174 |                 thread_data.append(json.load(f))
175 | 
176 |         merged = merge_data(thread_data)
177 | 
178 |         with open(outpath, "w", encoding="utf-8") as f:
179 |             json.dump(merged, f)
180 |         logging.info("DONE: merged shotgun results saved as %s", outpath)
181 |     except (FileNotFoundError, VersionError) as exc:
182 |         logging.critical("%s", exc)
183 |         sys.exit(1)
184 |     except (MergeFailed, MissingData) as exc:
185 |         logging.debug(traceback.format_exc())
186 |         logging.critical("%s", exc)
187 |         sys.exit(1)
188 |     except Exception as exc:
189 |         logging.critical("uncaught exception: %s", exc)
190 |         logging.debug(traceback.format_exc())
191 |         sys.exit(1)
192 | 
193 | 
194 | if __name__ == "__main__":
195 |     main()
196 | 


--------------------------------------------------------------------------------
/tools/mplhlpr/README:
--------------------------------------------------------------------------------
 1 | *.mplstyle files in this directory augment system-wide Matplotlib config
 2 | on your system located in site-packages/matplotlib/mpl-data/matplotlibrc
 3 | (relative to your Python installation location).
 4 | 
 5 | Matplotlib styles used by DNS Shotgun plotters are defined as comma delimited
 6 | list of styles in SHOTGUN_MPLSTYLES environment variable. It defaults to
 7 | "shotgun" and thus by default loads file "shotgun.mplstyle" from this directory.
 8 | 
 9 | Items listed later in the list can redefine values from preceding files, i.e.
10 | SHOTGUN_MPLSTYLES="shotgun,presentation,fast,grayscale"
11 | starts with "shotgun" style as base and the "presentation" style redefines
12 | individual settings.
13 | 
14 | Matplotlib provides its own styles, e.g. "fast", "tableau-colorblind10" etc.
15 | See site-packages/matplotlib/mpl-data/stylelib.
16 | 


--------------------------------------------------------------------------------
/tools/mplhlpr/presentation.mplstyle:
--------------------------------------------------------------------------------
 1 | # Inteded as augment to "shotgun" style.
 2 | # Size and font is meant to be suitable for widescreen presentations
 3 | # with slides 33.87 cm (width) x 19.05 cm (height)
 4 | #
 5 | # Usage:
 6 | # export SHOTGUN_MPLSTYLES="shotgun,presentation"
 7 | #
 8 | # If resulting files are too big or rendering too slow, try adding "fast".
 9 | # export SHOTGUN_MPLSTYLES="shotgun,presentation,fast"
10 | 
11 | ###### FONT
12 | font.family:  sans-serif
13 | #font.style:   normal
14 | #font.variant: normal
15 | #font.weight:  normal
16 | #font.stretch: normal
17 | font.size:    14
18 | 
19 | font.sans-serif: Mada, DejaVu Sans, Bitstream Vera Sans, Computer Modern Sans Serif, Lucida Grande, Verdana, Geneva, Lucid, Arial, Helvetica, Avant Garde, sans-serif
20 | 
21 | ###### AXES
22 | # hide chart title - slide has it's own
23 | axes.titlesize: 0
24 | 
25 | ###### FIGURE
26 | #figure.labelsize:   large     # size of the figure label (``Figure.sup[x|y]label()``)
27 | figure.figsize:     10.161, 4.755  # figure size in inches
28 | #figure.dpi:         100       # figure dots per inch
29 | #figure.facecolor:   white     # figure face color
30 | #figure.edgecolor:   white     # figure edge color
31 | #figure.frameon:     True      # enable figure frame
32 | 
33 | 
34 | ###### Tweak to suit target presentation software
35 | 
36 | ## SAVING FIGURES
37 | #savefig.format:      svg       # {png, ps, pdf, svg}
38 | #savefig.transparent: False     # whether figures are saved with a transparent
39 |                                 # background by default
40 | 
41 | 
42 | ### ps backend params
43 | #ps.papersize:      letter  # {auto, letter, legal, ledger, A0-A10, B0-B10}
44 | #ps.useafm:         False   # use of AFM fonts, results in small files
45 | #ps.usedistiller:   False   # {ghostscript, xpdf, None}
46 |                             # Experimental: may produce smaller files.
47 |                             # xpdf intended for production of publication quality files,
48 |                             # but requires ghostscript, xpdf and ps2eps
49 | #ps.distiller.res:  6000    # dpi
50 | #ps.fonttype:       3       # Output Type 3 (Type3) or Type 42 (TrueType)
51 | 
52 | ### PDF backend params
53 | #pdf.compression:    6  # integer from 0 to 9
54 |                         # 0 disables compression (good for debugging)
55 | #pdf.fonttype:       3  # Output Type 3 (Type3) or Type 42 (TrueType)
56 | #pdf.use14corefonts: False
57 | #pdf.inheritcolor:   False
58 | 
59 | ### SVG backend params
60 | #svg.image_inline: True  # Write raster image data directly into the SVG file
61 | #svg.fonttype: path      # How to handle SVG fonts:
62 |                          #     path: Embed characters as paths -- supported
63 |                          #           by most SVG renderers
64 |                          #     None: Assume fonts are installed on the
65 |                          #           machine where the SVG will be viewed.
66 | 


--------------------------------------------------------------------------------
/tools/mplhlpr/shotgun.mplstyle:
--------------------------------------------------------------------------------
 1 | #### MATPLOTLIBRC FORMAT
 2 | grid.color:     "#dddddd"  # grid color
 3 | grid.linestyle: "dotted"    # default: solid
 4 | 
 5 | figure.figsize:     16, 9  # inches
 6 | 
 7 | # automatically make plot elements fit on the figure
 8 | figure.constrained_layout.use: True
 9 | # autolayout is not compatible with constrained_layout
10 | figure.autolayout: False
11 | 
12 | lines.linestyle: "dotted"
13 | lines.marker: "x"
14 | 
15 | # deterministic SVG output for easier diff-ing
16 | svg.hashsalt: 0      # If not None, use this string as hash salt instead of uuid4
17 | 


--------------------------------------------------------------------------------
/tools/mplhlpr/styles.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | """
 3 | Load Matplotlib styles specified by SHOTGUN_MPLSTYLES env var.
 4 | 
 5 | Multiple styles are separated by comma:
 6 | SHOTGUN_MPLSTYLES=shotgun,tableau-colorblind10,fast
 7 | 
 8 | Default is "shotgun", which distributed with Shotgun itself.
 9 | 
10 | Load attempts are done in this order:
11 |     1. Style name taken verbatim as file path
12 |     2. Relative base name pointing directory with this module
13 |     3. Matplotlib supplied style name (see matplotlib.style module)
14 | """
15 | from pathlib import Path
16 | import os
17 | 
18 | import matplotlib.style
19 | 
20 | SCRIPT_DIR = Path(__file__).parent
21 | 
22 | 
23 | # style name "default" would be nicer, but it is reserved by matplotlib
24 | def configure_mpl_styles(comma_list=os.environ.get("SHOTGUN_MPLSTYLES", "shotgun")):
25 |     styles = comma_list.split(",")
26 |     for style in styles:
27 |         candidates = [Path(style), SCRIPT_DIR / f"{style}.mplstyle", style]
28 |         for candidate in candidates:
29 |             if isinstance(candidate, Path) and not candidate.exists():
30 |                 continue
31 |             # raises if the style cannot be found
32 |             matplotlib.style.use(candidate)
33 |             break
34 | 
35 | 
36 | def ax_set_title(ax, title):
37 |     """show title only if style defines titlesize > 0"""
38 |     titlesize = matplotlib.rcParams["axes.titlesize"]
39 |     if not isinstance(titlesize, (int, float)) or titlesize > 0:
40 |         ax.set_title(title)
41 | 


--------------------------------------------------------------------------------
/tools/plot-client-distribution.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | # pylint: disable=wrong-import-order,wrong-import-position
  4 | import argparse
  5 | import csv
  6 | import logging
  7 | import os
  8 | import statistics
  9 | import sys
 10 | import traceback
 11 | from typing import Dict, List, Union
 12 | 
 13 | # Force matplotlib to use a different backend to handle machines without a display
 14 | from cycler import cycler
 15 | import matplotlib
 16 | 
 17 | matplotlib.use("Agg")
 18 | import matplotlib.colors
 19 | from matplotlib.lines import Line2D
 20 | import matplotlib.pyplot as plt
 21 | 
 22 | import mplhlpr.styles
 23 | 
 24 | SCALE_MAGIC = 10000
 25 | 
 26 | 
 27 | def init_plot(title):
 28 |     _, ax = plt.subplots()
 29 | 
 30 |     ax.set_xscale("log")
 31 |     ax.set_yscale("log")
 32 | 
 33 |     ax.grid(True, which="major")
 34 |     ax.grid(True, which="minor")
 35 |     ax.set_ylim(0.00009, 110)
 36 | 
 37 |     ax.set_xlabel("Number of queries per client")
 38 |     ax.set_ylabel("Percentage of clients")
 39 |     mplhlpr.styles.ax_set_title(ax, title)
 40 | 
 41 |     colors = [
 42 |         matplotlib.colors.to_rgba(c)
 43 |         for c in plt.rcParams["axes.prop_cycle"].by_key()["color"]
 44 |     ]
 45 |     default_cycler = cycler(hatch=[None, "++", "xx", "oo"]) * cycler(facecolor=colors)
 46 | 
 47 |     return ax, default_cycler
 48 | 
 49 | 
 50 | def count_client_queries(
 51 |     filename: str,
 52 | ) -> Dict[str, int]:
 53 |     with open(filename, newline="", encoding="utf-8") as csvfile:
 54 |         reader = csv.DictReader(
 55 |             csvfile, delimiter=",", quotechar='"', quoting=csv.QUOTE_NONNUMERIC
 56 |         )
 57 |         return {row["ip"]: int(row["packets"]) for row in reader}
 58 | 
 59 | 
 60 | def plot_client_query_scatter(ax, clients: Dict[str, int], plot_props):
 61 |     data = clients.values()
 62 | 
 63 |     x = []
 64 |     y = []
 65 |     s = []  # type: List[Union[float,int]]
 66 |     sanity_nsamples = 0
 67 |     step_multiplier = 10
 68 |     lmin = 0
 69 |     lmax = step_multiplier
 70 |     while lmin <= max(data):
 71 |         samples = list(n for n in data if lmin <= n < lmax)
 72 |         if len(samples) == 0:  # an empty interval
 73 |             logging.info("  [%d-%d) queries per client: 0 clients", lmin, lmax)
 74 |         else:
 75 |             sanity_nsamples += len(samples)
 76 |             x.append(statistics.mean(samples))
 77 |             y.append(len(samples) / len(data) * 100)
 78 |             s.append(sum(samples))
 79 |             logging.info(
 80 |                 "  [%d-%d) queries per client: %d (%.2f %%) clients; %d queries total",
 81 |                 lmin,
 82 |                 lmax,
 83 |                 len(samples),
 84 |                 y[-1],
 85 |                 int(s[-1]),
 86 |             )
 87 |         lmin = lmax
 88 |         lmax *= step_multiplier
 89 | 
 90 |     assert sanity_nsamples == len(data)
 91 |     logging.info("  total: %d clients; %d queries", len(data), int(sum(s)))
 92 | 
 93 |     # normalize size
 94 |     s_tot = sum(s)
 95 |     s = [size * (SCALE_MAGIC / s_tot) for size in s]
 96 | 
 97 |     ax.scatter(x, y, s, alpha=0.5, **plot_props)
 98 |     ax.scatter(x, y, linewidth=1, marker="x", alpha=0.5, **plot_props)
 99 | 
100 | 
101 | def main():
102 |     logging.basicConfig(
103 |         format="%(asctime)s %(levelname)8s  %(message)s", level=logging.DEBUG
104 |     )
105 |     logger = logging.getLogger("matplotlib")
106 |     # set WARNING for Matplotlib
107 |     logger.setLevel(logging.WARNING)
108 | 
109 |     mplhlpr.styles.configure_mpl_styles()
110 | 
111 |     parser = argparse.ArgumentParser(
112 |         description="Analyze query distribution among clients in input pcap"
113 |     )
114 |     parser.add_argument(
115 |         "csv",
116 |         nargs="+",
117 |         help="CSV(s) to visualize (output from count-packets-per-ip.lua)",
118 |     )
119 |     parser.add_argument(
120 |         "-o",
121 |         "--output",
122 |         type=str,
123 |         default="clients.svg",
124 |         help="output filename (default: clients.svg)",
125 |     )
126 |     args = parser.parse_args()
127 | 
128 |     ax, plot_cycler = init_plot("Query distribution among clients")
129 |     handles = []
130 |     lines = []
131 |     labels = []
132 | 
133 |     if len(plot_cycler) < len(args.csv):
134 |         logging.critical(
135 |             "more than %d input files at once is not supported, got %d",
136 |             len(plot_cycler),
137 |             len(args.csv),
138 |         )
139 |         sys.exit(3)
140 |     for plot_props, csv_inf in zip(plot_cycler, args.csv):
141 |         label = os.path.basename(csv_inf)
142 |         logging.info("Processing: %s", label)
143 |         try:
144 |             clients_qps = count_client_queries(csv_inf)
145 |         except FileNotFoundError as exc:
146 |             logging.critical("%s", exc)
147 |             sys.exit(1)
148 |         except Exception as exc:
149 |             logging.critical("uncaught exception: %s", exc)
150 |             logging.debug(traceback.format_exc())
151 |             sys.exit(1)
152 |         else:
153 |             labels.append(label)
154 |             lines.append(matplotlib.patches.Patch(**plot_props))
155 |             handles.append(plot_client_query_scatter(ax, clients_qps, plot_props))
156 | 
157 |     ax.legend(lines, labels, loc="lower left")
158 |     plt.savefig(args.output)
159 |     sys.exit(0)
160 | 
161 | 
162 | if __name__ == "__main__":
163 |     main()
164 | 


--------------------------------------------------------------------------------
/tools/plot-connections.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | import argparse
  4 | from itertools import cycle
  5 | import json
  6 | import logging
  7 | import math
  8 | import os
  9 | import sys
 10 | 
 11 | # pylint: disable=wrong-import-order,wrong-import-position
 12 | import matplotlib
 13 | 
 14 | matplotlib.use("Agg")
 15 | import matplotlib.pyplot as plt
 16 | 
 17 | import mplhlpr.styles
 18 | 
 19 | JSON_VERSION = 20200527
 20 | 
 21 | COLOR_ACTIVE = cycle(["royalblue", "cornflowerblue", "darkblue", "lightsteelblue"])
 22 | COLOR_CONN_HS = cycle(["forestgreen", "limegreen", "darkgreen", "lightgreen"])
 23 | COLOR_QUIC_0RTT = cycle(
 24 |     ["darkolivegreen", "darkseagreen", "darkslategray", "greenyellow"]
 25 | )
 26 | COLOR_QUIC_0RTT_SENT = cycle(["crimson", "brown", "firebrick", "indianred"])
 27 | COLOR_QUIC_0RTT_ANSWERED = cycle(["khaki", "moccasin", "peru", "wheat"])
 28 | COLOR_TLS_RESUMED = cycle(["orange", "moccasin", "darkorange", "antiquewhite"])
 29 | COLOR_FAILED_HS = cycle(["gray", "silver", "black", "gainsboro"])
 30 | 
 31 | 
 32 | sinames = ["", " k", " M", " G", " T"]
 33 | 
 34 | 
 35 | def siname(n):
 36 |     try:
 37 |         n = float(n)
 38 |     except ValueError:
 39 |         return n
 40 | 
 41 |     siidx = max(
 42 |         0,
 43 |         min(len(sinames) - 1, int(math.floor(0 if n == 0 else math.log10(abs(n)) / 3))),
 44 |     )
 45 |     return f"{(n / 10 ** (3 * siidx)):.0f}{sinames[siidx]}"
 46 | 
 47 | 
 48 | def init_plot(title):
 49 |     _, ax = plt.subplots()
 50 | 
 51 |     ax.set_xlabel("Time [s]")
 52 |     ax.set_ylabel("Number of connections")
 53 |     mplhlpr.styles.ax_set_title(ax, title)
 54 | 
 55 |     ax.grid(True, axis="x", which="major")
 56 |     ax.grid(True, axis="y", which="major")
 57 |     ax.grid(True, axis="y", which="minor")
 58 | 
 59 |     return ax
 60 | 
 61 | 
 62 | def plot(ax, data, label, eval_func, min_timespan=0, color=None):
 63 |     stats_periodic = data["stats_periodic"][
 64 |         :-1
 65 |     ]  # omit the last often misleading datapoint
 66 |     time_offset = stats_periodic[0]["since_ms"]
 67 | 
 68 |     xvalues = []
 69 |     yvalues = []
 70 |     for stats in stats_periodic:
 71 |         timespan = stats["until_ms"] - stats["since_ms"]
 72 |         if timespan < min_timespan:
 73 |             continue
 74 |         time = (stats["until_ms"] - time_offset) / 1000
 75 |         xvalues.append(time)
 76 |         yvalues.append(eval_func(stats))
 77 | 
 78 |     ax.plot(xvalues, yvalues, label=label, color=color)
 79 | 
 80 | 
 81 | def main():
 82 |     logging.basicConfig(
 83 |         format="%(asctime)s %(levelname)8s  %(message)s", level=logging.DEBUG
 84 |     )
 85 |     logger = logging.getLogger("matplotlib")
 86 |     # set WARNING for Matplotlib
 87 |     logger.setLevel(logging.WARNING)
 88 | 
 89 |     mplhlpr.styles.configure_mpl_styles()
 90 | 
 91 |     parser = argparse.ArgumentParser(
 92 |         description="Plot connections over time from shotgun experiment"
 93 |     )
 94 | 
 95 |     parser.add_argument("json_file", nargs="+", help="Shotgun results JSON file(s)")
 96 |     parser.add_argument(
 97 |         "-t", "--title", default="Connections over Time", help="Graph title"
 98 |     )
 99 |     parser.add_argument(
100 |         "-o", "--output", default="connections.svg", help="Output graph filename"
101 |     )
102 |     parser.add_argument(
103 |         "-k",
104 |         "--kind",
105 |         nargs="+",
106 |         choices=[
107 |             "active",
108 |             "conn_hs",
109 |             "tcp_hs",  # same as conn_hs - backwards compatibility
110 |             "quic_0rtt",
111 |             "quic_0rtt_sent",
112 |             "quic_0rtt_answered",
113 |             "tls_resumed",
114 |             "failed_hs",
115 |         ],
116 |         default=["active", "conn_hs", "tls_resumed", "failed_hs"],
117 |         help="Which data should be rendered",
118 |     )
119 |     args = parser.parse_args()
120 | 
121 |     # initialize graph
122 |     ax = init_plot(args.title)
123 | 
124 |     for json_path in args.json_file:
125 |         try:
126 |             with open(json_path, encoding="utf-8") as f:
127 |                 data = json.load(f)
128 |         except FileNotFoundError as exc:
129 |             logging.critical("%s", exc)
130 |             sys.exit(1)
131 | 
132 |         try:
133 |             assert data["version"] == JSON_VERSION
134 |         except (KeyError, AssertionError):
135 |             logging.critical(
136 |                 "Older formats of JSON data aren't supported. "
137 |                 "Use older tooling or re-run the tests with newer shotgun."
138 |             )
139 |             sys.exit(1)
140 | 
141 |         if data["discarded"] != 0:
142 |             logging.warning("%d discarded packets may skew results!", data["discarded"])
143 | 
144 |         name = os.path.splitext(os.path.basename(os.path.normpath(json_path)))[0]
145 | 
146 |         if "active" in args.kind:
147 |             plot(
148 |                 ax,
149 |                 data,
150 |                 label=f"Active ({name})",
151 |                 color=next(COLOR_ACTIVE),
152 |                 eval_func=lambda stats: stats["conn_active"],
153 |             )
154 |         if "conn_hs" in args.kind or "tcp_hs" in args.kind:
155 |             plot(
156 |                 ax,
157 |                 data,
158 |                 label=f"Handshakes ({name})",
159 |                 color=next(COLOR_CONN_HS),
160 |                 eval_func=lambda stats: stats["conn_handshakes"],
161 |             )
162 |         if "quic_0rtt" in args.kind:
163 |             plot(
164 |                 ax,
165 |                 data,
166 |                 label=f"QUIC 0RTT ({name})",
167 |                 color=next(COLOR_QUIC_0RTT),
168 |                 eval_func=lambda stats: stats["conn_quic_0rtt_loaded"],
169 |             )
170 |         if "quic_0rtt_sent" in args.kind:
171 |             plot(
172 |                 ax,
173 |                 data,
174 |                 label=f"QUIC 0RTT sent ({name})",
175 |                 color=next(COLOR_QUIC_0RTT_SENT),
176 |                 eval_func=lambda stats: stats["quic_0rtt_sent"],
177 |             )
178 |         if "quic_0rtt_answered" in args.kind:
179 |             plot(
180 |                 ax,
181 |                 data,
182 |                 label=f"QUIC 0RTT answered ({name})",
183 |                 color=next(COLOR_QUIC_0RTT_ANSWERED),
184 |                 eval_func=lambda stats: stats["quic_0rtt_answered"],
185 |             )
186 |         if "tls_resumed" in args.kind:
187 |             plot(
188 |                 ax,
189 |                 data,
190 |                 label=f"TLS Resumed ({name})",
191 |                 color=next(COLOR_TLS_RESUMED),
192 |                 eval_func=lambda stats: stats["conn_resumed"],
193 |             )
194 |         if "failed_hs" in args.kind:
195 |             plot(
196 |                 ax,
197 |                 data,
198 |                 label=f"Failed Handshakes ({name})",
199 |                 color=next(COLOR_FAILED_HS),
200 |                 eval_func=lambda stats: stats["conn_handshakes_failed"],
201 |             )
202 | 
203 |     # set axis boundaries
204 |     ax.set_xlim(xmin=0)
205 |     ax.set_ylim(ymin=0)
206 | 
207 |     plt.legend()
208 |     plt.savefig(args.output)
209 | 
210 | 
211 | if __name__ == "__main__":
212 |     main()
213 | 


--------------------------------------------------------------------------------
/tools/plot-latency.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | # NOTE: Due to a weird bug, numpy is detected as a 3rd party module, while lmdb
  4 | #       is not and pylint complains about wrong-import-order.
  5 | #       Since these checks have to be disabled for matplotlib imports anyway, they
  6 | #       were moved a bit higher up to avoid the issue.
  7 | # pylint: disable=wrong-import-order,wrong-import-position
  8 | import argparse
  9 | import collections
 10 | import itertools
 11 | import logging
 12 | import json
 13 | import math
 14 | import os
 15 | import re
 16 | import sys
 17 | 
 18 | import numpy as np
 19 | 
 20 | # Force matplotlib to use a different backend to handle machines without a display
 21 | import matplotlib
 22 | import matplotlib.ticker as mtick
 23 | 
 24 | matplotlib.use("Agg")
 25 | import matplotlib.pyplot as plt
 26 | 
 27 | import mplhlpr.styles
 28 | 
 29 | JSON_VERSION = 20200527
 30 | MIN_X_EXP = -1
 31 | MAX_X_EXP = 2
 32 | 
 33 | sinames = ["", " k", " M", " G", " T"]
 34 | 
 35 | 
 36 | def siname(n):
 37 |     try:
 38 |         n = float(n)
 39 |     except ValueError:
 40 |         return n
 41 | 
 42 |     siidx = max(
 43 |         0,
 44 |         min(len(sinames) - 1, int(math.floor(0 if n == 0 else math.log10(abs(n)) / 3))),
 45 |     )
 46 |     return f"{(n / 10 ** (3 * siidx)):.0f}{sinames[siidx]}"
 47 | 
 48 | 
 49 | def init_plot(title):
 50 |     _, ax = plt.subplots()
 51 | 
 52 |     fmt = mtick.FormatStrFormatter("%g")
 53 |     maj_loc = mtick.LogLocator(subs=[(x / 10) for x in range(0, 10)])
 54 | 
 55 |     ax.set_xscale("log")
 56 |     ax.xaxis.set_major_formatter(fmt)
 57 |     ax.xaxis.set_major_locator(maj_loc)
 58 |     ax.set_yscale("log")
 59 |     ax.yaxis.set_major_formatter(fmt)
 60 |     ax.yaxis.set_major_locator(maj_loc)
 61 | 
 62 |     ax.grid(True, which="major")
 63 |     ax.grid(True, which="minor")
 64 | 
 65 |     ax.margins(x=0)
 66 | 
 67 |     ax.set_xlabel("Slowest percentile")
 68 |     ax.set_ylabel("Response time [ms]")
 69 |     mplhlpr.styles.ax_set_title(ax, title)
 70 | 
 71 |     return ax
 72 | 
 73 | 
 74 | def get_percentile_latency(latency_data, percentile):
 75 |     total = sum(latency_data)
 76 |     ipercentile = math.ceil((100 - percentile) / 100 * total - 1)
 77 |     assert ipercentile <= total
 78 |     i = 0
 79 |     for latency, n in enumerate(latency_data):
 80 |         i += n
 81 |         if ipercentile <= i:
 82 |             return latency
 83 |     raise RuntimeError("percentile not found")
 84 | 
 85 | 
 86 | def get_xy_from_histogram(latency_histogram):
 87 |     percentiles = np.logspace(MIN_X_EXP, MAX_X_EXP, num=200)
 88 |     y = [get_percentile_latency(latency_histogram, pctl) for pctl in percentiles]
 89 |     return percentiles, y
 90 | 
 91 | 
 92 | def merge_latency(data, since=0, until=float("+inf")):
 93 |     """generate latency histogram for given period"""
 94 |     # add 100ms tolarence for interval beginning / end
 95 |     since_ms = data["stats_sum"]["since_ms"] + since * 1000 - 100
 96 |     until_ms = data["stats_sum"]["since_ms"] + until * 1000 + 100
 97 | 
 98 |     latency = []
 99 |     requests = 0
100 |     start = None
101 |     end = None
102 |     for stats in data["stats_periodic"]:
103 |         if stats["since_ms"] < since_ms:
104 |             continue
105 |         if stats["until_ms"] >= until_ms:
106 |             break
107 |         requests += stats["requests"]
108 |         end = stats["until_ms"]
109 |         if not latency:
110 |             latency = list(stats["latency"])
111 |             start = stats["since_ms"]
112 |         else:
113 |             assert len(stats["latency"]) == len(latency)
114 |             for i, _ in enumerate(stats["latency"]):
115 |                 latency[i] += stats["latency"][i]
116 | 
117 |     if not latency:
118 |         raise RuntimeError("no samples matching this interval")
119 | 
120 |     qps = requests / (end - start) * 1000  # convert from ms
121 |     return latency, qps
122 | 
123 | 
124 | class NamedGroupAction(argparse.Action):
125 |     def __call__(self, parser, namespace, values, option_string=None):
126 |         if not isinstance(values, list) or len(values) <= 1:
127 |             raise argparse.ArgumentError(
128 |                 self,
129 |                 "name required at first position, followed by one or more paths to JSON files",
130 |             )
131 |         groups = getattr(namespace, self.dest) or {}
132 |         group_name = values[0]
133 |         try:
134 |             groups[group_name] = [
135 |                 argparse.FileType()(filename) for filename in values[1:]
136 |             ]
137 |         except argparse.ArgumentTypeError as ex:
138 |             raise argparse.ArgumentError(self, ex)
139 |         setattr(namespace, self.dest, groups)
140 | 
141 | 
142 | LINE_STYLES = matplotlib.cbook.ls_mapper.values()
143 | 
144 | 
145 | class LineStyleAction(argparse.Action):
146 |     def __call__(self, parser, namespace, values, option_string=None):
147 |         try:
148 |             regex = re.compile(values[0])
149 |         except re.error as e:
150 |             raise argparse.ArgumentError(
151 |                 self, f"first linestyle argument is not a regex: {e}"
152 |             )
153 |         style = values[1]
154 |         if style not in LINE_STYLES:
155 |             raise argparse.ArgumentError(
156 |                 self,
157 |                 f"second linestyle argument must be one of: {', '.join(LINE_STYLES)}",
158 |             )
159 |         linestyles = getattr(namespace, self.dest) or {}
160 |         linestyles[regex] = style
161 |         setattr(namespace, self.dest, linestyles)
162 | 
163 | 
164 | def read_json(file_obj):
165 |     data = json.load(file_obj)
166 | 
167 |     try:
168 |         assert data["version"] == JSON_VERSION
169 |     except (KeyError, AssertionError):
170 |         logging.critical(
171 |             "Older formats of JSON data aren't supported. "
172 |             "Use older tooling or re-run the tests with newer shotgun."
173 |         )
174 |         sys.exit(1)
175 | 
176 |     return data
177 | 
178 | 
179 | def parse_args():
180 |     parser = argparse.ArgumentParser(
181 |         description="Plot query response time histogram from shotgun results"
182 |     )
183 |     parser.add_argument("-t", "--title", default="Response Latency", help="Graph title")
184 |     parser.add_argument(
185 |         "-o",
186 |         "--output",
187 |         type=str,
188 |         default="latency.svg",
189 |         help="output filename (default: latency.svg)",
190 |     )
191 |     parser.add_argument(
192 |         "--since",
193 |         type=float,
194 |         default=0,
195 |         help="Omit data before this time (secs since test start)",
196 |     )
197 |     parser.add_argument(
198 |         "--until",
199 |         type=float,
200 |         default=float("+inf"),
201 |         help="Omit data after this time (secs since test start)",
202 |     )
203 |     parser.add_argument(
204 |         "--linestyle",
205 |         nargs=2,
206 |         action=LineStyleAction,
207 |         default={},
208 |         help=(
209 |             "change style for series with names matching regex; "
210 |             "name_regex linestyle_name (can be specified multiple times)"
211 |         ),
212 |     )
213 | 
214 |     input_args = parser.add_argument_group(
215 |         title="input data",
216 |         description="Shotgun result JSON file(s) to plot as individual data sets"
217 |         " or groups aggregated to min/avg/max.",
218 |     )
219 |     input_args.add_argument(
220 |         "-g",
221 |         "--group",
222 |         nargs="+",
223 |         action=NamedGroupAction,
224 |         default={},
225 |         help="group_name json_file [json_file ...]; can be used multiple times",
226 |     )
227 |     input_args.add_argument(
228 |         "json_file",
229 |         nargs="*",
230 |         type=argparse.FileType(),
231 |         help="JSON file(s) to plot individually",
232 |     )
233 | 
234 |     args = parser.parse_args()
235 |     if not args.json_file and not args.group:
236 |         parser.error(
237 |             "at least one input JSON file required (individually or in a group)"
238 |         )
239 |     return args
240 | 
241 | 
242 | def main():
243 |     logging.basicConfig(
244 |         format="%(asctime)s %(levelname)8s  %(message)s", level=logging.DEBUG
245 |     )
246 |     logger = logging.getLogger("matplotlib")
247 |     # set WARNING for Matplotlib
248 |     logger.setLevel(logging.WARNING)
249 | 
250 |     mplhlpr.styles.configure_mpl_styles()
251 | 
252 |     args = parse_args()
253 | 
254 |     groups = collections.defaultdict(list)
255 |     ax = init_plot(args.title)
256 | 
257 |     for json_file in args.json_file:
258 |         logging.info("processing %s", json_file.name)
259 |         data = read_json(json_file)
260 |         name = os.path.splitext(os.path.basename(os.path.normpath(json_file.name)))[0]
261 |         groups[name].append(data)
262 | 
263 |     for name, group_files in args.group.items():
264 |         for json_file in group_files:
265 |             logging.info("processing group %s: %s", name, json_file.name)
266 |             data = read_json(json_file)
267 |             groups[name].append(data)
268 | 
269 |     for name, group_data in groups.items():
270 |         pos_inf = float("inf")
271 |         neg_inf = float("-inf")
272 |         group_x = []  # we use the same X coordinates for all runs
273 |         group_ymin = []
274 |         group_ymax = []
275 |         group_ysum = []
276 |         for run_data in group_data:
277 |             latency, qps = merge_latency(run_data, args.since, args.until)
278 |             label = f"{name} ({siname(qps)} QPS)"
279 |             group_x, run_y = get_xy_from_histogram(latency)
280 |             if len(group_data) == 1:  # no reason to compute aggregate values
281 |                 group_ysum = run_y
282 |                 break
283 |             group_ysum = [
284 |                 old + new
285 |                 for old, new in itertools.zip_longest(group_ysum, run_y, fillvalue=0)
286 |             ]
287 |             group_ymin = [
288 |                 min(old, new)
289 |                 for old, new in itertools.zip_longest(
290 |                     group_ymin, run_y, fillvalue=pos_inf
291 |                 )
292 |             ]
293 |             group_ymax = [
294 |                 max(old, new)
295 |                 for old, new in itertools.zip_longest(
296 |                     group_ymax, run_y, fillvalue=neg_inf
297 |                 )
298 |             ]
299 |         if len(group_data) > 1:
300 |             group_yavg = [ysum / len(group_data) for ysum in group_ysum]
301 |             ax.fill_between(group_x, group_ymin, group_ymax, alpha=0.2)
302 |         else:
303 |             group_yavg = group_ysum
304 |         linestyle = "solid"
305 |         for name_re, style in args.linestyle.items():
306 |             if name_re.search(name):
307 |                 linestyle = style
308 |         ax.plot(group_x, group_yavg, lw=2, label=label, marker="", linestyle=linestyle)
309 | 
310 |     plt.legend()
311 |     plt.savefig(args.output)
312 | 
313 | 
314 | if __name__ == "__main__":
315 |     main()
316 | 


--------------------------------------------------------------------------------
/tools/plot-packet-rate.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | import argparse
  4 | import csv
  5 | import logging
  6 | import math
  7 | import os
  8 | import statistics
  9 | import sys
 10 | from typing import Dict, Tuple
 11 | 
 12 | # pylint: disable=wrong-import-order,wrong-import-position
 13 | from cycler import cycler
 14 | import matplotlib
 15 | import matplotlib.colors
 16 | 
 17 | matplotlib.use("Agg")
 18 | import matplotlib.pyplot as plt
 19 | 
 20 | import mplhlpr.styles
 21 | 
 22 | sinames = ["", " k", " M", " G", " T"]
 23 | 
 24 | 
 25 | def init_plot(title):
 26 |     _, ax = plt.subplots()
 27 | 
 28 |     ax.set_xlabel("Time [s]")
 29 |     ax.set_ylabel("Packets per sampling period")
 30 |     mplhlpr.styles.ax_set_title(ax, title)
 31 | 
 32 |     ax.grid(True, axis="x", which="both")
 33 |     ax.grid(True, axis="y", which="both")
 34 |     plt.minorticks_on()
 35 | 
 36 |     default_cycler = cycler(marker=["x", "o", "v", "s"]) * cycler(
 37 |         color=list(matplotlib.colors.TABLEAU_COLORS.keys())
 38 |     )
 39 | 
 40 |     return ax, default_cycler
 41 | 
 42 | 
 43 | def plot(ax, data, label, since, until, line_props):
 44 |     xvalues = []
 45 |     yvalues = []
 46 |     for time_s, rate in data.items():
 47 |         xvalues.append(time_s)
 48 |         yvalues.append(rate)
 49 | 
 50 |     ax.plot(xvalues, yvalues, label=label, **line_props)
 51 |     ax.set_xlim(xmin=since)
 52 |     if not math.isfinite(until):
 53 |         until = xvalues[-1]
 54 |         if until > 1:
 55 |             until = math.ceil(until)
 56 |     ax.set_xlim(xmax=until)
 57 | 
 58 | 
 59 | def parse_csv(csv_f, since: float, until: float) -> Tuple[float, Dict[float, float]]:
 60 |     """
 61 |     Parse CSV and return tuple (period, xydata).
 62 |     Period between samples is float or NaN if it varies by more than 1 ms.
 63 |     XY points are in format Dict[time_s] = period_packets value.
 64 |     """
 65 |     data = {}
 66 |     prev_time = None
 67 |     period = None
 68 |     prev_period = None
 69 |     for row in csv.DictReader(csv_f):
 70 |         now = float(row["time_s"])
 71 |         if now < since:
 72 |             continue
 73 |         if now > until:
 74 |             break
 75 | 
 76 |         if prev_time is not None:
 77 |             if not period:
 78 |                 period = now - prev_time
 79 |             elif math.isnan(period):
 80 |                 prev_period = period
 81 |             elif abs(period - abs(now - prev_time)) > 0.001:
 82 |                 logging.warning(
 83 |                     "file %s: sampling period has changed between samples %f and %f",
 84 |                     csv_f.name,
 85 |                     prev_time,
 86 |                     now,
 87 |                 )
 88 |                 prev_period = period
 89 |                 period = float("nan")  # varies, undefined
 90 | 
 91 |         prev_time = now
 92 |         data[now] = float(row["period_packets"])
 93 | 
 94 |     if not prev_time or not period:
 95 |         raise ValueError("at least two data rows are required")
 96 | 
 97 |     # ignore period change, but on the last sample only
 98 |     if prev_period and not math.isnan(prev_period):
 99 |         period = prev_period
100 |         logging.info("period change has happened only on the last sample, ignoring")
101 |     return period, data
102 | 
103 | 
104 | def xyrate_average(
105 |     xyrate: Dict[float, float], orig_period: float, avg_n_samples: int
106 | ) -> Dict[float, float]:
107 |     """
108 |     Transform dictionary with [X]=Y values by averaging Y values of avg_n_samples
109 |     consecutive points on X (time) axis.
110 |     """
111 |     orig_start_time = min(xyrate)
112 |     # first sample is at the end of first period
113 |     # our new average should point to the middle of all samples we are averaging over
114 |     avg_start_time = (orig_start_time - orig_period) + (avg_n_samples * orig_period / 2)
115 | 
116 |     # flaten XY chart to a to sorted list, [0] corresponds to orig_start_time
117 |     orig_rate_vals = list(xyrate[time] for time in sorted(xyrate))
118 |     avg_xy = {}
119 |     avg_idx = 0
120 |     avg_last_idx = int(
121 |         len(orig_rate_vals) / avg_n_samples
122 |     )  # ignore incomplete samples at the end
123 |     while avg_idx < avg_last_idx:
124 |         orig_idx = avg_idx * avg_n_samples
125 |         # beware: indexing from 0, sample 0 is at the end of the first period
126 |         avg_now = avg_start_time + orig_period * (orig_idx + 1)
127 |         avg_xy[avg_now] = statistics.mean(
128 |             orig_rate_vals[orig_idx : orig_idx + avg_n_samples]
129 |         )
130 |         avg_idx += 1
131 |     return avg_xy
132 | 
133 | 
134 | def main():
135 |     logging.basicConfig(
136 |         format="%(asctime)s %(levelname)8s  %(message)s", level=logging.DEBUG
137 |     )
138 |     logger = logging.getLogger("matplotlib")
139 |     # set WARNING for Matplotlib
140 |     logger.setLevel(logging.WARNING)
141 | 
142 |     mplhlpr.styles.configure_mpl_styles()
143 | 
144 |     parser = argparse.ArgumentParser(description="Plot packet rate")
145 | 
146 |     parser.add_argument(
147 |         "csv_file", nargs="+", help="CSV produced by count-packets-over-time.lua"
148 |     )
149 |     parser.add_argument(
150 |         "-t", "--title", default="Packet rate in traffic sample", help="Graph title"
151 |     )
152 |     parser.add_argument(
153 |         "-o", "--output", default="packet_rate.svg", help="Output graph filename"
154 |     )
155 |     parser.add_argument(
156 |         "--since",
157 |         type=float,
158 |         default=0,
159 |         help="Omit data before this time (secs since test start)",
160 |     )
161 |     parser.add_argument(
162 |         "--until",
163 |         type=float,
164 |         default=float("+inf"),
165 |         help="Omit data after this time (secs since test start)",
166 |     )
167 |     parser.add_argument(
168 |         "--average", type=float, help="Average samples over specified period (secs)"
169 |     )
170 | 
171 |     args = parser.parse_args()
172 | 
173 |     # initialize graph
174 |     ax, plot_props = init_plot(args.title)
175 | 
176 |     if len(plot_props) < len(args.csv_file):
177 |         logging.critical(
178 |             "more than %d input files at once is not supported, got %d",
179 |             len(plot_props),
180 |             len(args.csv_file),
181 |         )
182 |         sys.exit(3)
183 |     for csv_path, line_props in zip(args.csv_file, plot_props):
184 |         try:
185 |             with open(csv_path, encoding="utf-8") as f:
186 |                 period, xyrate = parse_csv(f, args.since, args.until)
187 |         except FileNotFoundError as exc:
188 |             logging.critical("%s", exc)
189 |             sys.exit(1)
190 | 
191 |         name = os.path.splitext(os.path.basename(os.path.normpath(csv_path)))[0]
192 |         if not math.isnan(period):
193 |             period_str = f"sampling period {round(period, 4)} s"
194 |         else:
195 |             period_str = "variable sampling period"
196 | 
197 |         if args.average:
198 |             if not math.isfinite(period):
199 |                 logging.critical(
200 |                     "file %s: refusing to average samples with a variable "
201 |                     "sampling period",
202 |                     csv_path,
203 |                 )
204 |                 sys.exit(2)
205 |             n_samples = args.average / period
206 |             if abs(round(n_samples) - n_samples) > 0.0001:
207 |                 logging.critical(
208 |                     "file %s: averaging period %f is not an integer multiple "
209 |                     "of the original period %f",
210 |                     csv_path,
211 |                     args.average,
212 |                     period,
213 |                 )
214 |                 sys.exit(3)
215 |             n_samples = round(n_samples)
216 |             period_str = (
217 |                 f"avg {n_samples} samples with period {round(period, 4)} s "
218 |                 f"= new period {round(n_samples * period, 4)} s"
219 |             )
220 |             xyrate = xyrate_average(xyrate, period, n_samples)
221 |         plot(ax, xyrate, f"{name} ({period_str})", args.since, args.until, line_props)
222 | 
223 |     plt.legend()
224 |     plt.savefig(args.output)
225 | 
226 | 
227 | if __name__ == "__main__":
228 |     main()
229 | 


--------------------------------------------------------------------------------
/tools/plot-response-rate.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | import argparse
  4 | import collections
  5 | import itertools
  6 | import json
  7 | import logging
  8 | import math
  9 | import os.path
 10 | import sys
 11 | 
 12 | # pylint: disable=wrong-import-order,wrong-import-position
 13 | import matplotlib
 14 | import matplotlib.colors as mcolors
 15 | from matplotlib.ticker import MultipleLocator
 16 | 
 17 | matplotlib.use("Agg")
 18 | import matplotlib.pyplot as plt
 19 | 
 20 | import mplhlpr.styles
 21 | 
 22 | JSON_VERSION = 20200527
 23 | 
 24 | 
 25 | StatRcode = collections.namedtuple("StatRcode", ["field", "label"])
 26 | 
 27 | RCODES = {
 28 |     0: StatRcode("rcode_noerror", "NOERROR"),
 29 |     1: StatRcode("rcode_formerr", "FORMERR"),
 30 |     2: StatRcode("rcode_servfail", "SERVFAIL"),
 31 |     3: StatRcode("rcode_nxdomain", "NXDOMAIN"),
 32 |     4: StatRcode("rcode_notimp", "NOTIMP"),
 33 |     5: StatRcode("rcode_refused", "REFUSED"),
 34 |     6: StatRcode("rcode_yxdomain", "YXDOMAIN"),
 35 |     7: StatRcode("rcode_yxrrset", "YXRRSET"),
 36 |     8: StatRcode("rcode_nxrrset", "NXRRSET"),
 37 |     9: StatRcode("rcode_notauth", "NOTAUTH"),
 38 |     10: StatRcode("rcode_notzone", "NOTZONE"),
 39 |     16: StatRcode("rcode_badvers", "BADVERS"),
 40 |     17: StatRcode("rcode_badkey", "BADKEY"),
 41 |     18: StatRcode("rcode_badtime", "BADTIME"),
 42 |     19: StatRcode("rcode_badmode", "BADMODE"),
 43 |     20: StatRcode("rcode_badname", "BADNAME"),
 44 |     21: StatRcode("rcode_badalg", "BADALG"),
 45 |     22: StatRcode("rcode_badtrunc", "BADTRUNC"),
 46 |     23: StatRcode("rcode_badcookie", "BADCOOKIE"),
 47 |     100000: StatRcode("rcode_other", "other"),
 48 | }
 49 | 
 50 | RCODES_TO_NUM = {rcodestat.field: number for number, rcodestat in RCODES.items()}
 51 | 
 52 | RCODE_MARKERS = {1: "f", 2: "s", 3: "n", 4: "i", 5: "r", 100000: "?"}
 53 | 
 54 | RCODE_COLORS = {
 55 |     0: "tab:green",
 56 |     1: "tab:brown",
 57 |     2: "tab:red",
 58 |     3: "tab:blue",
 59 |     4: "tab:pink",
 60 |     5: "tab:orange",
 61 |     6: "tab:purple",
 62 |     7: "tab:olive",
 63 |     8: "tab:cyan",
 64 |     9: "#f0944d",
 65 |     10: "#840000",
 66 |     11: "#bc13fe",
 67 |     12: "#601ef9",
 68 |     13: "#bbf90f",
 69 |     14: "#fffd01",
 70 |     15: "#4f738e",
 71 |     16: "#ac7e04",
 72 |     17: "#5d1451",
 73 |     18: "#fdb0c0",
 74 |     19: "#fd3c06",
 75 |     20: "#536267",
 76 |     21: "#a03623",
 77 |     22: "#b7e1a1",
 78 |     23: "#0a888a",
 79 |     100000: "#000000",
 80 | }
 81 | 
 82 | sinames = ["", " k", " M", " G", " T"]
 83 | 
 84 | 
 85 | def siname(n):
 86 |     try:
 87 |         n = float(n)
 88 |     except ValueError:
 89 |         return n
 90 | 
 91 |     siidx = max(
 92 |         0,
 93 |         min(len(sinames) - 1, int(math.floor(0 if n == 0 else math.log10(abs(n)) / 3))),
 94 |     )
 95 |     return f"{(n / 10 ** (3 * siidx)):.0f}{sinames[siidx]}"
 96 | 
 97 | 
 98 | def stat_field_rate(field):
 99 |     def inner(stats):
100 |         if stats["requests"] == 0:
101 |             return float("nan")
102 |         if callable(field):
103 |             field_val = field(stats)
104 |         else:
105 |             field_val = stats[field]
106 |         return 100.0 * field_val / stats["requests"]
107 | 
108 |     return inner
109 | 
110 | 
111 | response_rate = stat_field_rate("answers")
112 | 
113 | 
114 | def init_plot(title):
115 |     _, ax = plt.subplots()
116 | 
117 |     ax.set_xlabel("Time [s]")
118 |     ax.set_ylabel("Response Rate [%]")
119 |     mplhlpr.styles.ax_set_title(ax, title)
120 | 
121 |     ax.grid(True, axis="x", which="major")
122 | 
123 |     ax.yaxis.set_major_locator(MultipleLocator(10))
124 |     ax.grid(True, axis="y", which="major")
125 | 
126 |     ax.yaxis.set_minor_locator(MultipleLocator(2))
127 |     ax.grid(True, axis="y", which="minor")
128 | 
129 |     return ax
130 | 
131 | 
132 | def set_axes_limits(ax):
133 |     bottom, top = ax.get_ylim()
134 |     bottom = math.floor(bottom / 10) * 10
135 |     top = math.ceil(top / 10) * 10
136 |     top = top + 1 if top <= 100 else 101
137 |     bottom = bottom - 1 if bottom >= 0 else -1
138 |     ax.set_ylim(bottom, top)
139 | 
140 | 
141 | def plot_response_rate(
142 |     ax,
143 |     data,
144 |     label,
145 |     eval_func=None,
146 |     min_timespan=0,
147 |     min_rate=0,
148 |     marker=None,
149 |     linestyle=None,
150 |     color=None,
151 | ):
152 |     stats_periodic = data["stats_periodic"]
153 |     time_offset = stats_periodic[0]["since_ms"]
154 | 
155 |     if not eval_func:
156 |         eval_func = response_rate
157 | 
158 |     xvalues = []
159 |     yvalues = []
160 |     for stats in stats_periodic:
161 |         timespan = stats["until_ms"] - stats["since_ms"]
162 |         if timespan < min_timespan:
163 |             continue
164 |         time = (stats["until_ms"] - time_offset) / 1000
165 |         xvalues.append(time)
166 |         yvalues.append(eval_func(stats))
167 | 
168 |     if not min_rate or max(yvalues) >= min_rate:
169 |         ax.plot(
170 |             xvalues,
171 |             yvalues,
172 |             label=label,
173 |             marker=marker,
174 |             linestyle=linestyle,
175 |             color=color,
176 |         )
177 | 
178 | 
179 | def rcode_to_int(rcode: str) -> int:
180 |     try:
181 |         return int(rcode)
182 |     except ValueError:
183 |         pass
184 | 
185 |     try:
186 |         return RCODES_TO_NUM[f"rcode_{rcode.lower()}"]
187 |     except KeyError:
188 |         raise argparse.ArgumentTypeError(f'unsupported rcode "{rcode}"') from None
189 | 
190 | 
191 | def main():
192 |     logging.basicConfig(
193 |         format="%(asctime)s %(levelname)8s  %(message)s", level=logging.DEBUG
194 |     )
195 |     logger = logging.getLogger("matplotlib")
196 |     # set WARNING for Matplotlib
197 |     logger.setLevel(logging.WARNING)
198 | 
199 |     mplhlpr.styles.configure_mpl_styles()
200 | 
201 |     parser = argparse.ArgumentParser(
202 |         description="Plot response rate from shotgun experiment"
203 |     )
204 | 
205 |     parser.add_argument("json_file", nargs="+", help="Shotgun results JSON file(s)")
206 |     parser.add_argument(
207 |         "-t", "--title", default="Response Rate over Time", help="Graph title"
208 |     )
209 |     parser.add_argument(
210 |         "-o", "--output", default="response_rate.svg", help="Output graph filename"
211 |     )
212 |     parser.add_argument(
213 |         "-T",
214 |         "--skip-total",
215 |         action="store_const",
216 |         const="True",
217 |         help="Plot line for total response rate (default)",
218 |     )
219 |     parser.add_argument(
220 |         "-r",
221 |         "--rcode",
222 |         nargs="*",
223 |         type=rcode_to_int,
224 |         help="RCODE(s) to plot in addition to answer rate",
225 |     )
226 |     parser.add_argument(
227 |         "-R",
228 |         "--rcodes-above-pct",
229 |         type=float,
230 |         help="Add RCODE(s) representing more than the specified percentage "
231 |         "of all answers (short spikes might not be shown if the percentage "
232 |         "is too high)",
233 |     )
234 |     parser.add_argument(
235 |         "-i",
236 |         "--ignore-rcodes-rate-pct",
237 |         type=float,
238 |         help="Remove RCODE(s) whose response rate never exceeds the specified value "
239 |         "(a single spike will cause the RCODE to show)",
240 |     )
241 |     parser.add_argument(
242 |         "-s", "--sum-rcodes", nargs="*", type=rcode_to_int, help="Plot sum of RCODE(s)"
243 |     )
244 |     args = parser.parse_args()
245 | 
246 |     # initialize graph
247 |     ax = init_plot(args.title)
248 | 
249 |     colors = list(mcolors.TABLEAU_COLORS.keys()) + list(mcolors.BASE_COLORS.keys())
250 |     colors.remove("w")  # avoid white line on white background
251 |     for json_path, color in itertools.zip_longest(
252 |         args.json_file, colors[: len(args.json_file)]
253 |     ):
254 |         try:
255 |             process_file(json_path, color, args, ax)
256 |         except (FileNotFoundError, NotImplementedError) as exc:
257 |             logging.critical("%s: %s", json_path, exc)
258 |             sys.exit(1)
259 | 
260 |     set_axes_limits(ax)
261 | 
262 |     plt.legend()
263 |     plt.savefig(args.output)
264 | 
265 | 
266 | def process_file(json_path, json_color, args, ax):
267 |     with open(json_path, encoding="utf-8") as f:
268 |         data = json.load(f)
269 |     try:
270 |         assert data["version"] == JSON_VERSION
271 |     except (KeyError, AssertionError):
272 |         raise NotImplementedError(
273 |             "Older formats of JSON data aren't supported. "
274 |             "Use older tooling or re-run the tests with newer shotgun."
275 |         ) from None
276 | 
277 |     if data["discarded"] != 0:
278 |         proportion_all_perc = data["discarded"] / data["stats_sum"]["requests"] * 100
279 |         proportion_one_sec_perc = (
280 |             data["discarded"]
281 |             / min(
282 |                 sample["requests"]
283 |                 for sample in data["stats_periodic"]
284 |                 if sample["requests"] > 0
285 |             )
286 |             * 100
287 |         )
288 |         logging.warning(
289 |             "%d discarded packets may skew results! Discarded %.1f %% of all "
290 |             "requests; theoretical worst case %.1f %% loss if all discarded packets "
291 |             "happened to be in one %d ms sample",
292 |             data["discarded"],
293 |             proportion_all_perc,
294 |             proportion_one_sec_perc,
295 |             data["stats_interval_ms"],
296 |         )
297 | 
298 |     timespan = (data["stats_sum"]["until_ms"] - data["stats_sum"]["since_ms"]) / 1000
299 |     qps = data["stats_sum"]["requests"] / timespan
300 |     name = os.path.splitext(os.path.basename(os.path.normpath(json_path)))[0]
301 |     label = f"{name} ({siname(qps)} QPS)"
302 |     min_timespan = data["stats_interval_ms"] / 2
303 | 
304 |     if not args.skip_total:
305 |         plot_response_rate(ax, data, label, min_timespan=min_timespan, color=json_color)
306 | 
307 |     draw_rcodes = set(args.rcode or [])
308 |     sum_rcodes = set(args.sum_rcodes or [])
309 |     if args.rcodes_above_pct is not None:
310 |         threshold = data["stats_sum"]["answers"] * args.rcodes_above_pct / 100
311 |         rcodes_above_limit = set(
312 |             RCODES_TO_NUM[key]
313 |             for key, cnt in data["stats_sum"].items()
314 |             if key.startswith("rcode_") and cnt > threshold
315 |         )
316 |         draw_rcodes = draw_rcodes.union(rcodes_above_limit)
317 | 
318 |     if draw_rcodes:
319 |         if len(args.json_file) > 1:
320 |             # same color for all rcodes from one JSON
321 |             cur_rcode_colors = collections.defaultdict(lambda: json_color)
322 |         else:
323 |             # single JSON - different color for each RCODE
324 |             cur_rcode_colors = RCODE_COLORS
325 |         for rcode in draw_rcodes:
326 |             try:
327 |                 stat_rcode = RCODES[rcode]
328 |                 symbol = RCODE_MARKERS.get(rcode, str(rcode))
329 |             except KeyError:
330 |                 logging.error("Unsupported RCODE: %s", rcode)
331 |                 continue
332 | 
333 |             eval_func = stat_field_rate(stat_rcode.field)
334 |             rcode_label = f"{label} {stat_rcode.label}"
335 | 
336 |             plot_response_rate(
337 |                 ax,
338 |                 data,
339 |                 rcode_label,
340 |                 eval_func=eval_func,
341 |                 min_timespan=min_timespan,
342 |                 min_rate=args.ignore_rcodes_rate_pct,
343 |                 marker=f"${symbol}$",
344 |                 color=cur_rcode_colors[rcode],
345 |             )
346 | 
347 |     if sum_rcodes:
348 | 
349 |         def sum_rate(stats):
350 |             return sum(stats[RCODES[ircode].field] for ircode in sum_rcodes)
351 | 
352 |         eval_func = stat_field_rate(sum_rate)
353 | 
354 |         sum_label = " ".join(RCODES[ircode].label for ircode in sum_rcodes)
355 |         plot_response_rate(
356 |             ax,
357 |             data,
358 |             f"{label} {sum_label}",
359 |             eval_func=eval_func,
360 |             min_timespan=min_timespan,
361 |             marker="$\\sum$",
362 |             color=json_color,
363 |         )
364 | 
365 | 
366 | if __name__ == "__main__":
367 |     main()
368 | 


--------------------------------------------------------------------------------