├── .editorconfig
├── .gitignore
├── CODEOWNERS
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── PULL_REQUEST_TEMPLATE.md
├── README.md
├── attic
    ├── README.md
    ├── diagrams
    │   ├── mlab-ooni-dataflow.pdf
    │   ├── mlab-ooni-dataflow.svg
    │   ├── ooni-backend-diagram.pdf
    │   └── ooni-backend-diagram.svg
    ├── probe-legacy-release-procedure.md
    ├── td-mlab-notes.md
    └── td-spec.md
├── backends
    ├── README.md
    ├── bk-001-ooni-backend.md
    ├── bk-002-orchestra.md
    ├── bk-003-collector.md
    ├── bk-004-bouncer.md
    ├── bk-005-ooni-run-v2.md
    ├── bk-006-anon-creds.md
    ├── th-000-example.md
    ├── th-001-tcp-echo.md
    ├── th-002-http-return-json-headers.md
    ├── th-003-dns.md
    ├── th-004-raw-tcp-echo.md
    ├── th-005-raw-udp-echo.md
    ├── th-006-reverse-traceroute.md
    └── th-007-websteps.md
├── data-formats
    ├── README.md
    ├── df-000-base.md
    ├── df-001-httpt.md
    ├── df-002-dnst.md
    ├── df-003-scapy.md
    ├── df-004-tcpt.md
    ├── df-005-tcpconnect.md
    ├── df-006-tlshandshake.md
    ├── df-007-errors.md
    ├── df-008-netevents.md
    └── df-009-tunnel.md
├── nettests
    ├── README.md
    ├── ts-000-example.md
    ├── ts-001-bridget.md
    ├── ts-002-dns-consistency.md
    ├── ts-003-http-requests.md
    ├── ts-004-http-host.md
    ├── ts-005-dns-spoof.md
    ├── ts-006-header-field-manipulation.md
    ├── ts-007-http-invalid-request-line.md
    ├── ts-008-tcp-connect.md
    ├── ts-009-multi-protocol-traceroute.md
    ├── ts-010-captive-portal.md
    ├── ts-011-bridge-reachability.md
    ├── ts-012-dns-injection.md
    ├── ts-013-lantern.md
    ├── ts-014-meek-fronted-requests.md
    ├── ts-015-psiphon.md
    ├── ts-016-openvpn.md
    ├── ts-016-vanilla-tor.md
    ├── ts-017-web-connectivity.md
    ├── ts-018-whatsapp.md
    ├── ts-019-facebook-messenger.md
    ├── ts-020-telegram.md
    ├── ts-021-dash.md
    ├── ts-022-ndt.md
    ├── ts-023-tor.md
    ├── ts-024-sni-blocking.md
    ├── ts-025-stun-reachability.md
    ├── ts-026-riseupvpn.md
    ├── ts-027-urlgetter.md
    ├── ts-028-dnscheck.md
    ├── ts-029-signal.md
    ├── ts-030-torsf.md
    ├── ts-031-quicping.md
    ├── ts-032-tcpping.md
    ├── ts-033-tlsping.md
    ├── ts-034-simplequicping.md
    ├── ts-035-dnsping.md
    ├── ts-036-browser_web.md
    ├── ts-037-tlsmiddlebox.md
    ├── ts-038-port-filtering.md
    ├── ts-039-echcheck.md
    └── ts-040-openvpn.md
├── policies
    └── po-001-measurements-and-urls.md
├── probe
    ├── README.md
    ├── architecture.png
    └── interaction.png
└── techniques
    ├── Readme.md
    ├── tq-000-timing-information-collection.md
    ├── tq-001-DNS-UDP-censorship-transparency.md
    ├── tq-002-second-DNS-reply.md
    ├── tq-003-DNS-latency-of-non-cached-domain.md
    ├── tq-004-DNS-TCP-DoT-DoH-against-same-resolver.md
    ├── tq-005-DNS-Resource-Record-TTL-ticks-in-cache.md
    ├── tq-006-another-NXDOMAIN-domain-in-same-zone.md
    ├── tq-007-UDP-information-collection.md
    ├── tq-008-DNS-delegation-chain-from-root.md
    ├── tq-009-subdomains-of-possibly-censored-domains.md
    ├── tq-010-SOAs-and-NSes-for-possibly-censored-domain.md
    ├── tq-011-both-A-and-AAAA-DNS-queries.md
    ├── tq-012-traceroute.md
    ├── tq-013-traceroute-with-payload.md
    ├── tq-014-TCP-injection-blocking-with-BPF.md
    ├── tq-015-packet-headers-exfiltration-with-BPF.md
    ├── tq-016-request-to-blackhole.md
    ├── tq-017-request-to-discard-test-helper.md
    ├── tq-018-request-to-dummy-proxy-test-helper.md
    ├── tq-019-prepend-and-append-to-possibly-censored-domain.md
    ├── tq-020-0x20-hack-for-possibly-censored-domain.md
    ├── tq-021-TLS-cert-recording.md
    ├── tq-022-test-helper-L7-checks-for-possibly-censored-IPs.md
    ├── tq-023-OpenVPN-Control-Hard-Reset.md
    ├── tq-024-TCP-segmentation.md
    ├── tq-025-saving-bandwidth-in-repeated-HTTP-requests.md
    ├── tq-026-SNI-free-and-fake-SNI-TLS-ClientHello.md
    ├── tq-027-stuffed-TLS-ClientHello.md
    ├── tq-028-stuffed-HTTP-camouflage.md
    ├── tq-029-staying-alive-HTTP-camouflage.md
    ├── tq-030-fingerprinting-requests-for-transparent-proxies.md
    ├── tq-031-attempt-ecn.md
    ├── tq-032-attempt-h2-upgrade.md
    ├── tq-033-attempt-tfo.md
    ├── tq-034-block-udp.md
    ├── tq-034-request-dnssec-records.md
    ├── tq-035-block-quic.md
    └── tq-999-backlog.md


/.editorconfig:
--------------------------------------------------------------------------------
 1 | root = true
 2 | 
 3 | [*]
 4 | end_of_line = lf
 5 | insert_final_newline = true
 6 | indent_style = tab
 7 | indent_size = 4
 8 | trim_trailing_whitespace = true
 9 | 
10 | [*.md]
11 | indent_style = space
12 | 
13 | [*.{yml,yaml}]
14 | indent_style = space
15 | indent_size = 2
16 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea/
2 | .DS_Store
3 | /ignore/
4 | /_book/
5 | 


--------------------------------------------------------------------------------
/CODEOWNERS:
--------------------------------------------------------------------------------
1 | * @bassosimone @hellais
2 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # OONI Code of Conduct
2 | 
3 | Please, refer to [ooni/ooni.org's CODE\_OF\_CONDUCT.md](
4 | https://github.com/ooni/ooni.org/blob/master/CODE_OF_CONDUCT.md).
5 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing to ooni/spec
 2 | 
 3 | This is an open source project, and contributions are welcome! You are welcome
 4 | to open pull requests. An open pull request will be reviewed by a core
 5 | developer. The review may request you to apply changes. Once the assigned
 6 | reviewer is satisfied, they will merge the pull request.
 7 | 
 8 | ## OONI Software Development Guidelines
 9 | 
10 | Please, make sure you read [OONI Software Development Guidelines](
11 | https://ooni.org/post/ooni-software-development-guidelines/).
12 | 
13 | When you are developing a new nettest (aka experiment) you should also
14 | read [the probe-cli development guidelines](https://github.com/ooni/probe-cli/blob/master/CONTRIBUTING.md).
15 | 
16 | ## Opening issues
17 | 
18 | As regards opening issues, please open issues pertaining to this repository at
19 | https://github.com/ooni/probe/issues. Please, before opening a new issue,
20 | check whether the issue or feature request you want us to consider has not
21 | already been reported by someone else.
22 | 
23 | ## New proposal process
24 | 
25 | If you are interested in having a new experiment be part of OONI, that's great!
26 | 
27 | The process for suggesting a new experiment is as follows:
28 | 
29 | 1. You figure out if you want to specify a technique (it's just an idea or some
30 |    extra data we should collect as part of many tests) or full fledged nettest
31 |    (it's delivering some new and interesting results conclusions to end users)
32 | 
33 | 2. You copy the nettest template or create a new technique document
34 | 
35 | 3. File a pull requests on github
36 | 
37 | 4. Discussion happens on github and it's merged
38 | 
39 | 5. If it's feasible, you implement it as a test or technique in measurement-kit
40 | 
41 | 6. If it's useful for our users to run it, we ship it in OONI Probe
42 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright 2012 Open Observatory of Network Interference (OONI), The Tor Project
 2 | 
 3 | Redistribution and use in source and binary forms, with or without
 4 | modification, are permitted provided that the following conditions are met:
 5 | 
 6 | 1. Redistributions of source code must retain the above copyright notice, this
 7 |    list of conditions and the following disclaimer.
 8 | 
 9 | 2. Redistributions in binary form must reproduce the above copyright notice,
10 |    this list of conditions and the following disclaimer in the documentation
11 |    and/or other materials provided with the distribution.
12 | 
13 | 3. Neither the name of the copyright holder nor the names of its contributors
14 |    may be used to endorse or promote products derived from this software
15 |    without specific prior written permission.
16 | 
17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
21 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
24 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
25 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 | 


--------------------------------------------------------------------------------
/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | ## Checklist
 2 | 
 3 | - [ ] I have read the [contribution guidelines](https://github.com/ooni/spec/blob/master/CONTRIBUTING.md)
 4 | - [ ] reference issue for this pull request: <!-- add URL here -->
 5 | - [ ] related ooni/probe-cli pull request: <!-- add URL here -->
 6 | - [ ] If I changed a spec, I also bumped its version number and/or date
 7 | 
 8 | <!-- Location of the issue tracker: https://github.com/ooni/probe -->
 9 | 
10 | ## Description
11 | 
12 | Please, insert here a more detailed description.
13 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # OONI Spec
 2 | 
 3 | The OONI testing methodology is openly specified as part of OONI Spec. This
 4 | includes all the experiments that we run, the backend components used as part
 5 | of test and the data formats.
 6 | 
 7 | The way in which we do these specifications is similar to the RFC (Request For
 8 | Comments) process.
 9 | 
10 | This makes it possible to have a deep understanding of how OONI tests work,
11 | explain the assumption we make in them and make it possible to create
12 | independent implementations.
13 | 
14 | To get an introduction of how we operate from the point of view of OONI Probe,
15 | i.e. the software that users can install, please see [the specific
16 | documentation describing that](probe). New developers would ideally want
17 | to be familiar with this document to understand the context.
18 | 
19 | ## Terminology
20 | 
21 | *Nettests (aka experiments)*
22 | 
23 | Are network experiments that deliver some meaningful results to end users. See
24 | the [nettests](nettests) directory.
25 | 
26 | *OONI Probe (aka the Probe)*
27 | 
28 | The software that users run to perform nettests. See the [probe](probe) directory.
29 | 
30 | *Techniques*
31 | 
32 | Are strategies that we follow for implementing nettests, hypothesis we are
33 | interested in testing and implementation details for each target platform. See
34 | the [techniques](techniques) directory.
35 | 
36 | *Data formats*
37 | 
38 | Define how data is presented to end users or analysts. See the
39 | [data-formats](data-formats) directory.
40 | 
41 | *Backends*
42 | 
43 | Are core OONI infrastructure components that are necessary to carry out experiments. See
44 | the [backends](backends) directory.
45 | 
46 | ## Contributing
47 | 
48 | Please, see [CONTRIBUTING.md](CONTRIBUTING.md).
49 | 


--------------------------------------------------------------------------------
/attic/README.md:
--------------------------------------------------------------------------------
1 | This directory contains legacy specifications.
2 | 


--------------------------------------------------------------------------------
/attic/diagrams/mlab-ooni-dataflow.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ooni/spec/e8aafe45d5bd15a371a08dfbeb0dc557e1622d04/attic/diagrams/mlab-ooni-dataflow.pdf


--------------------------------------------------------------------------------
/attic/diagrams/ooni-backend-diagram.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ooni/spec/e8aafe45d5bd15a371a08dfbeb0dc557e1622d04/attic/diagrams/ooni-backend-diagram.pdf


--------------------------------------------------------------------------------
/attic/probe-legacy-release-procedure.md:
--------------------------------------------------------------------------------
  1 | # Feature Specification
  2 | 
  3 | All new features shall be specified before they are implemented. When the
  4 | feature is a feature of the client (ooniprobe) it is sufficient to specify it
  5 | inside of a ticket. Features that are to be part of the backend (oonib) shall
  6 | be specified inside of the [ooni backend
  7 | specification](https://github.com/TheTorProject/ooni-spec/blob/master/oonib.md).
  8 | 
  9 | Newly developed tests that consistute part of the core ooniprobe tests shall be
 10 | specified as part of the ooni-spec repository and should follow the [test
 11 | specification template](https://github.com/TheTorProject/ooni-spec/blob/master/test-specs/ts-000-example.md).
 12 | 
 13 | An issue shall then be created for the feature and closed once it has been
 14 | merged into the master branch.
 15 | 
 16 | # Code Review
 17 | 
 18 | When new tests are developed they shall be deployed immediately on non m-lab
 19 | collectors and backend, while they must pass the approval from m-lab before
 20 | they will be fit to run on their platform.
 21 | 
 22 | # Testing
 23 | 
 24 | Code coverage is measured via coveralls and the results for such tests can be
 25 | found [here](https://coveralls.io/r/TheTorProject/ooni-probe).
 26 | 
 27 | Before a release is fit for release it must pass the automated and manual tests
 28 | listed below:
 29 | 
 30 | ## Unit tests
 31 | 
 32 | ```
 33 | sudo trial ooni
 34 | ```
 35 | 
 36 | Must produce no errors.
 37 | 
 38 | ## Build source distribution and install
 39 | 
 40 | It should be possible to build a source distribution of ooniprobe. Install it
 41 | inside of a fresh virtuale environment and then run all the manual tests
 42 | needed.
 43 | 
 44 | ```
 45 | python setup.py sdist
 46 | rmvirtualenv ooni-testsdist; mkvirtualenv ooni-testsdist
 47 | pip install dist/ooniprobe-$OONIPROBE_VERSION.tar.gz
 48 | ```
 49 | 
 50 | ## Usage tests
 51 | 
 52 | First cd into a temporary directory so that you don't mess up the code tree and
 53 | you import what you have just installed.
 54 | 
 55 | ```
 56 | cd `mktemp -d 2>/dev/null || mktemp -d -t 'ooni'`
 57 | ```
 58 | 
 59 | ### Running decks
 60 | 
 61 | ```
 62 | ooniprobe -i ~/.virtualenvs/ooni-testsdist/share/ooni/decks/mlab.deck
 63 | ooniprobe -i ~/.virtualenvs/ooni-testsdist/share/ooni/decks/fast_no_root.deck
 64 | ooniprobe -i ~/.virtualenvs/ooni-testsdist/share/ooni/decks/complete_no_root.deck
 65 | ```
 66 | 
 67 | While this is running you may as well open another shell, get into that
 68 | virtualenv and make sure oonireport is working as it should.
 69 | 
 70 | Check to see if by running:
 71 | 
 72 | ```
 73 | oonireport status
 74 | ```
 75 | 
 76 | You see a list of "Reports in progress". There should be at least 4 of them.
 77 | 
 78 | The above commands should not throw any exceptions and you should now see
 79 | inside of your current working directory the following list of files:
 80 | 
 81 | ```
 82 | report-dns_consistency-2014-08-18T134642Z.yamloo
 83 | report-http_header_field_manipulation-2014-08-18T134555Z.yamloo
 84 | report-http_header_field_manipulation-2014-08-18T134625Z.yamloo
 85 | report-http_header_field_manipulation-2014-08-18T134642Z.yamloo
 86 | report-http_host-2014-08-18T134642Z.yamloo
 87 | report-http_invalid_request_line-2014-08-18T134625Z.yamloo
 88 | report-http_invalid_request_line-2014-08-18T134642Z.yamloo
 89 | report-http_requests-2014-08-18T134642Z.yamloo
 90 | ```
 91 | 
 92 | ### Running single tests
 93 | 
 94 | The following command should produce the list of tests installed on your
 95 | system:
 96 | 
 97 | ```
 98 | ooniprobe -s
 99 | ```
100 | 
101 | The following commands should all succeed and produce some measurements:
102 | 
103 | ```
104 | ooniprobe -n manipulation/http_header_field_manipulation
105 | ooniprobe -n blocking/http_requests -f httpo://ihiderha53f36lsd.onion/input/37e60e13536f6afe47a830bfb6b371b5cf65da66d7ad65137344679b24fdccd1
106 | ```
107 | 
108 | ### Testing oonideckgen
109 | 
110 | This command must produce a deck in the current working directory.
111 | 
112 | ```
113 | oonideckgen --country-code IT
114 | ```
115 | 
116 | ### Testing ooniresource
117 | 
118 | This command must update the resources.
119 | 
120 | ```
121 | ooniresources --update-geoip --update-inputs
122 | ```
123 | 
124 | ### Testing vagrant installation
125 | 
126 | To setup and test ooniprobe on a fresh ubuntu installation with
127 | [Vagrant](https://www.vagrantup.com):
128 | 
129 | ```
130 | vagrant destroy -f
131 | vagrant up
132 | vagrant ssh
133 | ```
134 | 
135 | You should now be logged into the vagrant machine with ooniprobe installed.
136 | 
137 | To make sure everything went as expected try running:
138 | 
139 | ```
140 | sudo ooniprobe -i /usr/share/ooni/decks/fast.deck
141 | ```
142 | 
143 | # Tagging and Signing of a release
144 | 
145 | To create a new release you should update the following files with the new
146 | version information:
147 | 
148 |   * ooni/__init__.py: set __version__ = "$OONI_VERSION"
149 | 
150 |   *  Changelog.md: to include a new changelog entry with the introduced
151 |      changes.
152 | 
153 |   * debian/changelog: with a new entry for the new release
154 | 
155 | Once this is done you should commit the changes with a message like this:
156 | 
157 | ```
158 | git commit -a -m 'update ooniprobe to $OONI_VERSION'
159 | ```
160 | 
161 | You can now tag and sign a new release with:
162 | 
163 | ```
164 | git tag -s -a 'v$OONI_VERSION'
165 | ```
166 | 
167 | Then write a reasonable message inside of the tag message body. The format to
168 | follow should be:
169 | 
170 | ```
171 | ooniprobe $OONI_VERSION
172 | 
173 | $SOME_DESCRIPTION_OR_CHANGELOG_OR_WHATEVER
174 | ```
175 | 
176 | All tags should be incremental and they will be signed with either of the
177 | following GPG keys:
178 | 
179 | ```
180 | pub   3072D/702287F4 2015-10-19 [expires: 2018-05-06]
181 |       Key fingerprint = 67EF 3966 5099 86E9 6ACE  E84E 5D67 CD18 7022 87F4
182 | uid       [ultimate] Arturo Filastò <arturo@filasto.net>
183 | uid       [ultimate] Arturo Filastò <art@fuffa.org>
184 | uid       [ultimate] Arturo Filastò <art@torproject.org>
185 | sub   2752g/C58FC4EE 2015-10-19 [expires: 2018-05-06]
186 | ```
187 | 
188 | ## Versioning scheme
189 | 
190 | All OONI tools follow [semantic versioning](http://semver.org/).
191 | 
192 | ## Updating packages
193 | 
194 | To upload a new version to pypi run:
195 | 
196 | ```
197 | make man
198 | git add data/
199 | git commit -a 'update manpages'
200 | make sdist
201 | make sign
202 | make upload
203 | ```
204 | 
205 | # Build systems & environments
206 | 
207 | We currently only support packages for debian and ubuntu based systems.
208 | 
209 | # Release announcement
210 | 
211 | Once a new stable release is tagged a new changelog entry shall be written and
212 | the corresponding changelog entry shall be updated.
213 | 
214 | Following a new release we should write an email to the ooni-dev mailing list
215 | announcing the new release.
216 | 
217 | # Notes
218 | 
219 | See txtorcon release procedure:
220 | https://github.com/meejah/txtorcon/blob/master/docs/release-checklist.rst
221 | 


--------------------------------------------------------------------------------
/attic/td-mlab-notes.md:
--------------------------------------------------------------------------------
 1 | This is the list of tests that shall be included inside of the MLAB test deck:
 2 | 
 3 | 
 4 | ## HTTP Invalid Request Line
 5 | 
 6 | This tests does some basic fuzzing on the HTTP request line, generating a
 7 | series of invalid HTTP requests between the OONI test client and the M-Lab
 8 | server. The M-Lab server runs a TCP echo test helper, and if the response from
 9 | the server doesn’t match with what was sent, the conclusion is that tampering
10 | is occurring. The assumption driving this methodology is that certain
11 | transparent HTTP proxies may not properly parse the HTTP request line.
12 | 
13 | ## Header Field Manipulation 
14 | 
15 | This test client sends HTTP requests request headers that vary capitalization
16 | toward an HTTPReturnJSONHeaders test helper backend running on an M-Lab server.
17 | If the headers received by the M-Lab server don’t match those sent, tampering
18 | is inferred.
19 | 
20 | ## Multi-port Traceroute
21 | 
22 | This test performs a multi port, multi-protocol traceroute from an OONI client
23 | toward an M-Lab server. The goal of such is to determine biases in the paths
24 | based on destination port. Destination ports are 22, 23, 80, 123, 443. Note
25 | that if the user has opted not to include source IP in the report then source
26 | and destination IP will be eliminated from the collected data. Note that while
27 | a user may be able to opt not to eliminate IP address, we will need to
28 | provision an option for those who wish IP not to be collected.
29 | 
30 | ## HTTP Host
31 | 
32 | This test detects the presence of a transparent HTTP proxy and enumerates the
33 | sites it is configured to censor. To do this the test places the hostname of a
34 | probable censored site inside of the Host header field, and communicates this
35 | stream of data between an OONI client and an M-Lab server. If the response from
36 | the server doesn’t match the data sent, the test determines the presence of a
37 | transparent HTTP proxy.
38 | 
39 | ## DNS Consistency
40 | 
41 | This test performs A queries to a set of test resolvers and a validated control
42 | resolver. If the two results do not match it performs a reverse DNS lookup on
43 | the first A record address of both sets of queries, checking that they both
44 | resolve to the same name. NOTE: This test frequently results in false positives
45 | due to GeoIP-based load balancing on major global sites such as Google,
46 | Facebook, and Youtube, etc. This will need to be noted and accounted for.
47 | 
48 | 


--------------------------------------------------------------------------------
/backends/README.md:
--------------------------------------------------------------------------------
 1 | # Backends specification
 2 | 
 3 | This directory contains the specification of OONI backends.
 4 | 
 5 | Most of the specs in this directory are marked as _obsolete_ and
 6 | should be taken with a grain of salt.
 7 | 
 8 | Please, refer to the [automatically-generated API documentation](
 9 | https://api.ooni.io/apidocs/) for the
10 | [OONI API](https://github.com/ooni/api) instead.
11 | 


--------------------------------------------------------------------------------
/backends/bk-004-bouncer.md:
--------------------------------------------------------------------------------
  1 | # OONI bouncer specification
  2 | 
  3 | * version: 2.0.0
  4 | * date: 2019-03-15
  5 | * author: Simone Basso
  6 | * status: _obsolete_
  7 | 
  8 | This document aims at providing a functional specification of the
  9 | OONI bouncer. We keep a description of the _legacy_ bouncer behaviour
 10 | in [bk-001-ooni-backend.md](bk-001-ooni-backend.md).
 11 | 
 12 | # 1.0 System overview
 13 | 
 14 | The bouncer exposes an HTTP API allowing OONI probes to discover
 15 | active collectors and test helpers. A collector is an HTTP endpoint
 16 | used to submit measurements. A test helper is a server that helps
 17 | a specific network test to do its job.
 18 | 
 19 | The bouncer client and the bouncer server MUST NOT assume a keep
 20 | alive semantics for the HTTP connections.
 21 | 
 22 | New implementations MUST properly set `Content-Type`. Server side
 23 | implementatons MUST be able to deal with legacy clients that possibly
 24 | do not correctly set the `Content-Type`.
 25 | 
 26 | The bouncer MUST be exposed as an HTTPS service. It MUST also be exposed as
 27 | a Tor onion service as long as legacy OONI probe clients use it. The need
 28 | to expose an onion service will be rediscussed when legacy OONI probe
 29 | clients will no longer be relevant. A [legacy document](
 30 | https://ooni.torproject.org/docs/architecture.html)
 31 | explains why the OONI project originally chose to allow for both HTTPS
 32 | and Tor onion service services (henceforth, Onion).
 33 | 
 34 | # 2.0 Threat model
 35 | 
 36 | The bouncer transport MUST guarantee some reasonable level of encryption
 37 | and authentication between the OONI probe and itself. Therefore, a malicious
 38 | lazy actor won't be able to easily see and/or modify the exchanged data.
 39 | 
 40 | It is outside of the scope of the bouncer to provide blocking resistance or
 41 | to conceal to a passive network observer the fact that they are communicating to
 42 | a bouncer. Such properties are to be provided by other software, e.g. Tor.
 43 | 
 44 | Therefore a client implementation of the bouncer protocol SHOULD allow one
 45 | to specify a [SOCKS5](https://tools.ietf.org/html/rfc1928) proxy where the
 46 | name resolution is performed by the circumvention tool (`socks5h`).
 47 | 
 48 | # 3.0 API
 49 | 
 50 | The same API is available via HTTP and Onion.
 51 | 
 52 | ## 3.1 Legacy API
 53 | 
 54 | As long as we have legacy clients, a bouncer MUST implement the legacy
 55 | API defined in [bk-001-ooni-backend.md](bk-001-ooni-backend.md). In
 56 | this document we only describe the new API.
 57 | 
 58 | ## 3.2 Discovering collectors
 59 | 
 60 | A probe will send the following request:
 61 | 
 62 |     GET /api/v1/collectors
 63 | 
 64 | On success, the bouncer will reply with status `200` and a body
 65 | containing a JSON document following this spec:
 66 | 
 67 |     [{
 68 |       "address":
 69 |         `string` containing the service URL. The semantics depends
 70 |         also on the value of type.
 71 |       
 72 |       "type":
 73 |         `string` indicating the type. One of "https", "cloudfront",
 74 |         or "onion". When type is "https" or "onion", "address" is
 75 |         the HTTPS or Onion URL to use. When it's "cloudfront", the
 76 |         URL hostname is the hostname to pass to the domain fronting
 77 |         service, while the real hostname to connect to is provided
 78 |         in the optional "front" field.
 79 |       
 80 |       "front":
 81 |         (optional) `string` indicating the real host to connect
 82 |         to when using a domain fronting service.
 83 |     }]
 84 | 
 85 | On failure, the bouncer MUST return `5xx`.
 86 | 
 87 | The following example shows how getting the collectors looks like from
 88 | the point of view of a modern bouncer client (where the JSON
 89 | messages have been edited for readability):
 90 | 
 91 | ```
 92 | > GET /api/v1/collectors HTTP/1.1
 93 | > Host: bouncer.ooni.io
 94 | >
 95 | < HTTP/1.1 200 OK
 96 | < Server: nginx
 97 | < Date: Wed, 13 Mar 2019 13:19:42 GMT
 98 | < Content-Type: application/json; charset=utf-8
 99 | < Content-Length: 152
100 | < Connection: keep-alive
101 | < 
102 | < [{
103 | <   "address": "httpo://ihiderha53f36lsd.onion",
104 | <   "type": "onion",
105 | < }, {
106 | <   "address": "https://a.collector.ooni.io:4441",
107 | <   "type": "https"
108 | < }, {
109 | <   "address": "https://das0y2z2ribx3.cloudfront.net",
110 | <   "front": "a0.awsstatic.com",
111 | <   "type": "cloudfront"
112 | < }]
113 | ```
114 | 
115 | ## 3.3 Discovering test helpers
116 | 
117 | A probe will send the following request:
118 | 
119 |     GET /api/v1/test-helpers
120 | 
121 | On success, the bouncer will reply with status `200` and a body
122 | containing a JSON document following this spec:
123 | 
124 |     {
125 |       "<test-helper-name>": [{
126 |         "address":
127 |           `string` containing the service URL or address. The semantics
128 |           depends also on the value of type.
129 |       
130 |         "type":
131 |           `string` indicating the type. One of "legacy", "https", and
132 |           "cloudfront". For "https" and "cloudfront" we use the
133 |           same semantics of the one described for /collectors.
134 | 
135 |           With "legacy" we indicate whatever was the default returned
136 |           by the legacy bouncer as the primary test helper. In this
137 |           case, the meaning of the value returned in "address" depends
138 |           on the type of helper requested. The client is expected to
139 |           already know what is the proper semantic for "address" given
140 |           a specific test helper.
141 |       
142 |         "front":
143 |           (optional) `string` indicating the real host to connect
144 |           to when using a domain fronting service.
145 |     }]}
146 | 
147 | On failure, the bouncer MUST return `5xx`.
148 | 
149 | The following example shows how getting the tst helpers looks like from
150 | the point of view of a modern bouncer client (where the JSON
151 | messages have been edited for readability):
152 | 
153 | ```
154 | > GET /api/v1/test-helpers HTTP/1.1
155 | > Host: bouncer.ooni.io
156 | >
157 | < HTTP/1.1 200 OK
158 | < Server: nginx
159 | < Date: Wed, 13 Mar 2019 13:19:42 GMT
160 | < Content-Type: application/json; charset=utf-8
161 | < Content-Length: 152
162 | < Connection: keep-alive
163 | < 
164 | < {
165 | <   "dns": [{"type": "legacy", "address": "213.138.109.232:57004"}],
166 | <   "http-return-json-headers": [{
167 | <     "type": "legacy",
168 | <     "address": "http://38.107.216.10:80"
169 | <   }],
170 | <   ssl": [{
171 | <     "type": "legacy",
172 | <     "address": "https://213.138.109.232"
173 | <   }],
174 | <   "tcp-echo": [{
175 | <     "type": "legacy",
176 | <     "address": "213.138.109.232"
177 | <   }],
178 | <   "traceroute": [{
179 | <     "type": "legacy",
180 | <     "address": "213.138.109.232"
181 | <   }],
182 | <   "web-connectivity": [{
183 | <     "type": "legacy",
184 | <     "address": "httpo://7jne2rpg5lsaqs6b.onion"
185 | <   }, {
186 | <     "address": "https://a.web-connectivity.th.ooni.io:4442",
187 | <     "type": "https",
188 | <   }, {
189 | <     "address": "https://d2vt18apel48hw.cloudfront.net",
190 | <     "front": "a0.awsstatic.com",
191 | <     "type": "cloudfront",
192 | <   }]
193 | < }
194 | ```
195 | 
196 | # 4.0 Implementation considerations
197 | 
198 | A client side implementation MUST retry any failing bouncer operation
199 | immediately for three times in case there is a DNS or TCP error. This
200 | is to ensure that transient errors do not prevent us from contacting the
201 | bouncer. If all these immediate retries fail, then the client MUST use
202 | a static configuration. The resulting measurement MUST have an annotation
203 | named `"static_configuration"` with value `"true"`.
204 | 


--------------------------------------------------------------------------------
/backends/th-000-example.md:
--------------------------------------------------------------------------------
 1 | # Specification version number
 2 | 
 3 | # Specification name
 4 | 
 5 | # Helper preconditions
 6 | 
 7 | # Expected impact
 8 | 
 9 | # Expected inputs
10 | 
11 | ## Parent data format
12 | 
13 | ## Required input data
14 | 
15 | ## Semantics
16 | 
17 | # Helper description
18 | 
19 | # Expected output
20 | 
21 | ## Parent data format
22 | 
23 | ## Required output data
24 | 
25 | ## Semantics
26 | 
27 | ## Possible conclusions
28 | 


--------------------------------------------------------------------------------
/backends/th-001-tcp-echo.md:
--------------------------------------------------------------------------------
 1 | # Specification version number 
 2 | 
 3 | 2013-09-26-000
 4 | 
 5 | * status: _obsolete_
 6 | 
 7 | # Specification name
 8 | 
 9 | TCP Echo Test Helper
10 | 
11 | # Helper preconditions
12 | 
13 | * An Internet connection
14 | * An Internet-Reachable TCP Port.
15 | 
16 | # Expected impact
17 | 
18 | Ability to help an ooni-probe client determine if TCP payload has been
19 | modified in transit.
20 | 
21 | # Expected inputs
22 | 
23 | Bytes received over TCP.
24 | 
25 | ## Semantics
26 | 
27 | TCP Echo helper listens on a TCP port and writes any bytes received back to
28 | the connected client. The implementation depends on
29 | twisted.internet.TCPServer and the TCP and lower protocols are therefore
30 | transparent to the helper.
31 | 
32 | # Helper description
33 | 
34 | A TCP Echo service.
35 | 
36 | # Expected output
37 | 
38 | Bytes received over TCP.
39 | 
40 | ## Required output data
41 |   
42 | All unmodified received bytes.
43 | 
44 | ## Possible conclusions
45 | 
46 | The TCP Echo test helper can be used by ooni-probe tests to determine if
47 | the TCP payload has been modified in transit.
48 | 


--------------------------------------------------------------------------------
/backends/th-002-http-return-json-headers.md:
--------------------------------------------------------------------------------
 1 | # Specification version number
 2 | 
 3 | 2013-09-26-000
 4 | 
 5 | * status: _obsolete_
 6 | 
 7 | # Specification name
 8 | 
 9 | HTTP Return JSON Headers Test Helper
10 | 
11 | # Helper preconditions
12 | 
13 | * An Internet connection
14 | * An Internet-Reachable TCP Port, typically port 80.
15 | 
16 | # Expected impact
17 | 
18 | Ability to help an ooni-probe client determine if the HTTP request headers
19 | have been modified in transit.
20 | 
21 | # Expected inputs
22 | 
23 | An HTTP request.
24 | 
25 | ## Required input data
26 | 
27 | An HTTP Request with Headers.
28 | 
29 | ## Semantics
30 | 
31 | This helper processes the HTTP Request Line and the Request Headers and
32 | returns them in a JSON datastructure in the order it received them.  It is
33 | implemented as a twisted.basic.LineReceiver and expects the first line to
34 | contain the HTTP Request Line, and the following to contain the HTTP Request
35 | Headers. Once All of the Request Headers have been received the response is
36 | written and the transport closed. If an invalid header is received, it is
37 | not included in the response. Malformed or invalid requests will timeout
38 | after 12 hours.
39 | 
40 | # Helper description
41 | 
42 | An HTTP Request Echo service.
43 | 
44 | # Expected output
45 | 
46 | The HTTP Request and Request Headers as seen by the test helper.
47 | 
48 | ## Required output data
49 | 
50 | The HTTP Request line and HTTP Request Headers.
51 | 
52 | ## Semantics
53 | 
54 | The returned JSON dictionary contains the keys 'request_headers' and
55 | 'request_line'. The value for 'request_line' is a string, and the value for
56 | 'request_headers' is an ordered list of lists, where the sublists follow the
57 | format ['Header-Name', 'Header-Value'].
58 | 
59 | e.g.
60 | 
61 |     {
62 |       'request_headers':
63 |         [['User-Agent', 'IE6'], ['Content-Length', 200]]
64 |       'request_line':
65 |         'GET / HTTP/1.1'
66 |     }
67 | 
68 | ## Possible conclusions
69 | 
70 | The HTTP Return JSON Headers test helper can be used by ooni-probe tests to
71 | determine if the HTTP Headers or HTTP Request Line have been modified in
72 | transit. 
73 | 


--------------------------------------------------------------------------------
/backends/th-003-dns.md:
--------------------------------------------------------------------------------
 1 | # Specification version number
 2 | 
 3 | 2013-09-26-000
 4 | 
 5 | * status: _obsolete_
 6 | 
 7 | # Specification name
 8 | 
 9 | DNS Test Helper
10 | 
11 | # Helper preconditions
12 | 
13 | * An Internet connection.
14 | * An Internet-Reachable UDP Port. (Typically 53)
15 | 
16 | # Expected impact
17 | 
18 | Provides a public recursive DNS resolver with all of the known drawbacks to
19 | doing so.
20 | 
21 | # Expected inputs
22 | 
23 | Any valid DNS Queries
24 | 
25 | ## Required input data
26 | 
27 | A valid DNS Query
28 | 
29 | ## Semantics
30 | 
31 | The implementation depends on twisted.names.server.DNSFactory and forwards
32 | all requests to an upstream resolver. The default resolver is Google public
33 | DNS (8.8.8.8).
34 | 
35 | # Helper description
36 | 
37 | An Open Recursive DNS Resolver.
38 | 
39 | # Expected output
40 | 
41 | A Response to a DNS Query.
42 | 
43 | ## Required output data
44 | 
45 | A Response to a DNS Query.
46 | 
47 | ## Possible conclusions
48 | 
49 | An ooni-probe client can use the DNS Test Helper as a control resolver in
50 | tests that to determine if an experiment resolver is tampering with DNS
51 | responses.
52 | 
53 | ## Known Limitations
54 | 
55 | Running an open public recursive resolver is not recommended. Miscreants
56 | scan the Internet looking for recursive resolvers and use them in traffic
57 | reflection attacks to DoS victims.
58 | 


--------------------------------------------------------------------------------
/backends/th-004-raw-tcp-echo.md:
--------------------------------------------------------------------------------
 1 | # Specification version number
 2 | 
 3 | 2014-12-15-000
 4 | 
 5 | * status: _obsolete_
 6 | 
 7 | # Specification name
 8 | 
 9 | Raw TCP Echo Test Helper
10 | 
11 | # Helper description
12 | 
13 | The Raw TCPEcho Test Helper listens on a TCP port for incoming connections.
14 | When a connection is initiated, it accepts the connection, reads a dummy HTTP
15 | request and sends an HTTP reply containing an encoding of the IP packets that
16 | made up the TCP 3 way handshake [[Wikipedia-3WHS][]] which can be compared
17 | against the packets sent by an ooni-probe meter.
18 | 
19 | # Helper preconditions
20 | 
21 | * An Internet connection
22 | * An Internet-Reachable TCP Port (preferably port 80)
23 | * No known middleboxes rewriting IP or TCP headers in unexpected ways between
24 |   the helper and the transit ISP
25 | 
26 | # Expected impact
27 | 
28 | Ability to help an ooni-probe client determine if the IP or TCP headers have
29 | been modified in transit, possibly indicating that the connection is being
30 | transparently proxied.
31 | 
32 | # Expected inputs
33 | 
34 |  * An HTTP 1.1 [[RFC2616][]] request that should not include any "dubious" Host:
35 |    header or keywords that might attract special treatment
36 | 
37 | By emulating an HTTP session, a protocol known to censorship systems and
38 | protocol-enhancing proxies, it will increase the chances to see intereference
39 | occuring over using another protocol that may simply be passed through.
40 | 
41 | # Expected output
42 | 
43 |  * An HTTP 1.1 [[RFC2616][]] reply that includes as its body an encoding of the
44 |    packets that made up the TCP 3 way handshake [[Wikipedia-3WHS][]] from the
45 |    vantage point of the helper
46 | 
47 | The encoding chosen could be simply a binary PCAP trace, a base64 encoded PCAP
48 | trace or even a JSON encoding of a dissector output as long as the dissector is
49 | sufficiently verbose to maximise the usefulness of the helper.
50 | 
51 | MIME types like text/html should be avoided as these have a stronger potential
52 | for being rewritten on the wire.
53 | 
54 | # Possible conclusions
55 | 
56 | Possible conclusions that could be drawn from tests using this helper are:
57 | 
58 |  * The presence of a transparent proxy
59 |  * The presence of network address translation
60 |  * The bleaching of bits in the IP and TCP header by badly implemented middleboxes
61 | 
62 | # Notes
63 | 
64 | An implementation of this test helper is currently being worked on by Iain R.
65 | Learmonth <<irl@fsfe.org>> using Scapy.
66 | 
67 | [RFC2616]: http://tools.ietf.org/html/rfc2616
68 | [Wikipedia-3WHS]: http://en.wikipedia.org/wiki/Transmission_Control_Protocol#Connection_establishment
69 | 


--------------------------------------------------------------------------------
/backends/th-005-raw-udp-echo.md:
--------------------------------------------------------------------------------
 1 | # Specification version number
 2 | 
 3 | 2014-12-15-000
 4 | 
 5 | * status: _obsolete_
 6 | 
 7 | # Specification name
 8 | 
 9 | Raw UDP Echo Test Helper
10 | 
11 | # Helper description
12 | 
13 | The Raw UDP Echo Test Helper listens on a UDP port for incoming packets. When a
14 | packet is recieved, it sends a UDP packet in reply with the original packet's
15 | IP and UDP header and payload as the reply's payload.
16 | 
17 | # Helper preconditions
18 | 
19 |  * An Internet connection
20 |  * An Internet-Reachable UDP Port
21 |  * No known middleboxes rewriting UDP headers in unexpected ways between
22 |    the helper and the transit ISP
23 | 
24 | # Expected impact
25 | 
26 | Ability to help an ooni-probe client determine if the UDP header is being
27 | rewritten and if packets are being truncated when UDP-lite [[RFC3828][]] is
28 | used with a shorter checksum coverage than the full length of the packet using
29 | the UDP protocol number in the IP header.
30 | 
31 | # Expected inputs
32 | 
33 |  * A UDP packet addressed to the helper
34 |  * The destination port should not be one that might attract special treatment
35 |  * The payload should not contain keywords that might attract special treatment
36 | 
37 | # Expected output
38 | 
39 |  * A UDP packet with the original incoming packet as its payload addressed to
40 |    return to the ooni-probe meter
41 | 
42 | # Possible conclusions
43 | 
44 | Possible conclusions that could be drawn from tests using this helper are:
45 | 
46 |  * Middleboxes are truncating UDP-lite packets when the UDP protocol number is
47 |    used
48 |  * Middleboxes are discarding UDP-lite packets with the checksum does not match
49 |    for the full length of the packet
50 |  * Source and destination ports are being rewritten
51 | 
52 | # Security considerations
53 | 
54 | In order to prevent the possibility of this helper being used to set up a
55 | "loop" where a forged source address causes packets to be sent to another
56 | service that replies to arbitrary UDP packets, such as UDP echo, the first byte
57 | of the payload in the request must have a zero value. Replies will never have a
58 | zero value as this first byte contains the IP protocol version number from the
59 | IP header.
60 | 
61 | The possibility of this helper being used for an amplification attack was
62 | considered, but as the amplification factor is limited to the size of an IP and
63 | UDP header, it was not deemed that mitigation for this was necessary.
64 | 
65 | # Notes
66 | 
67 | An implementation of this test helper is currently being worked on by Iain R.
68 | Learmonth <<irl@fsfe.org>> using Scapy.
69 | 
70 | [RFC3828]: http://tools.ietf.org/html/rfc3828
71 | 


--------------------------------------------------------------------------------
/backends/th-006-reverse-traceroute.md:
--------------------------------------------------------------------------------
 1 | # Specification version number
 2 | 
 3 | 2014-12-15-000
 4 | 
 5 | * status: _obsolete_
 6 | 
 7 | # Specification name
 8 | 
 9 | Reverse Traceroute Test Helper
10 | 
11 | # Helper description
12 | 
13 | The reverse traceroute test helper listens for incoming connections either on
14 | a TCP socket or via a web server. When a connection is initiated, it performs
15 | a reverse traceroute to the connection's source IP address and returns the
16 | results of the traceroute via the same connection.
17 | 
18 | # Helper preconditions
19 | 
20 |  * An Internet connection
21 |  * An Internet-Reachable TCP Port
22 |  * No known middleboxes rewriting packet payloads in unexpected ways between
23 |    the helper and the transit ISP
24 | 
25 | # Expected impact
26 | 
27 | Through cross-referencing results, the ability to determine where on the
28 | network path network interference exists which could indicate whether or not
29 | the interference is localised to an access ISP or being conducted on a national
30 | level.
31 | 
32 | # Expected inputs
33 | 
34 |  * The initiation of a connection.
35 | 
36 | # Expected output
37 | 
38 |  * The results of a traceroute giving the IP address of each hop and, if
39 |    available, the ping times to each hop.
40 | 
41 | The encoding chosen could be JSON, CSV, or another format. It should be
42 | possible to convert between this format and the format used by tests that
43 | perform a forward traceroute in ooni-probe.
44 | 
45 | # Possible conclusions
46 | 
47 | Through cross-referencing of results, it should be possible to determine in
48 | which AS the network interference is occuring.
49 | 
50 | # Notes
51 | 
52 | An implementation of this test helper is currently being worked on by Iain R.
53 | Learmonth <<irl@fsfe.org>> using Scapy.
54 | 
55 | 


--------------------------------------------------------------------------------
/data-formats/README.md:
--------------------------------------------------------------------------------
  1 | # OONI Data Formats
  2 | 
  3 | | Authors    | Arturo Filastò et al. |
  4 | |------------|-----------------------|
  5 | | Version    | 0.2.0                 |
  6 | | Maintainer | Simone Basso          |
  7 | 
  8 | ## Overview
  9 | 
 10 | The output of OONI _experiments_ (also known as _nettests_ or simply _tests_)
 11 | consists of a series of JSON documents separated by newline characters, also known
 12 | as [JSONL](http://jsonlines.org/). Every JSON document within the JSONL MUST be
 13 | a JSON object with a specific toplevel structure, also referred to as the _base data
 14 | format_. Such data format provides for a place where experiments and _test
 15 | templates_ could write their own keys. (A _test template_ is a routine that performs
 16 | functionality common across several OONI experiments, e.g., fetching a web page
 17 | using HTTP). Test templates have their own data format. Experiments have their
 18 | own data format. Thus, the output of any experiment consists of the base data
 19 | format, plus the data format of zero or more test templates, plus zero
 20 | of more fields generated by the experiment itself. That is:
 21 | 
 22 | ```JavaScript
 23 | {
 24 |     "data_format_version": "0.2.0",
 25 |     "test_keys": {}
 26 | }
 27 | ```
 28 | 
 29 | Of course, experiments MUST NOT use `test_keys` that conflict with the test
 30 | keys reserved by the test templates. However, keys starting with `x_` are
 31 | always permitted anywhere. They are experimental and should not be
 32 | relied upon. As a general rule, data consumers MUST be prepared for any
 33 | field being `null` or missing; data producers SHOULD NOT omit fields (or
 34 | emit `null`s) unless this has been explicitly documented in the field description.
 35 | 
 36 | ## Data format version
 37 | 
 38 | The current `data_format_version` is `0.2.0`. This applies only to the keys
 39 | in the external envelope. Since 2020-04-06, the `extensions` top-level key describes
 40 | the data formats contained inside the `test_keys` (see below).
 41 | 
 42 | Between November 2019 and April 2020, experimental versions of OONI probe had
 43 | version numbers ranging from `0.2.1` to `0.4.0`. Since 2020-04-06, the version
 44 | is back again to `0.2.0`. Because such larger version numbers were used by
 45 | experimental versions of OONI the next major data format version will be `0.5.0`.
 46 | 
 47 | ## Example
 48 | 
 49 | The following is a valid JSON that was edited for brevity.
 50 | 
 51 | ```JSON
 52 | {
 53 |   "annotations": {
 54 |     "platform": "macos",
 55 |   },
 56 |   "data_format_version": "0.2.0",
 57 |   "extensions": {
 58 |     "dnst": 0,
 59 |     "httpt": 0,
 60 |     "tcpconnect": 0
 61 |   },
 62 |   "input": null,
 63 |   "measurement_start_time": "2020-01-10 17:25:19",
 64 |   "probe_asn": "AS30722",
 65 |   "probe_cc": "IT",
 66 |   "probe_ip": "127.0.0.1",
 67 |   "report_id": "20200110T172519Z_AS30722_5UdG13d6rEfOVCTHEdMjuXGah8vF6dpShA0jditnrHCmH10o1K",
 68 |   "resolver_asn": "AS15169",
 69 |   "resolver_ip": "172.217.34.2",
 70 |   "resolver_network_name": "Google LLC",
 71 |   "software_name": "miniooni",
 72 |   "software_version": "0.1.0-dev",
 73 |   "test_keys": {
 74 |     "agent": "redirect",
 75 |     "queries": [
 76 |       {
 77 |         "answers": [
 78 |           {
 79 |             "answer_type": "A",
 80 |             "ipv4": "149.154.167.99",
 81 |             "ttl": null
 82 |           }
 83 |         ],
 84 |         "engine": "system",
 85 |         "failure": null,
 86 |         "hostname": "web.telegram.org",
 87 |         "query_type": "A",
 88 |         "resolver_hostname": null,
 89 |         "resolver_port": null,
 90 |         "resolver_address": ""
 91 |       }
 92 |     ],
 93 |     "requests": [
 94 |       {
 95 |         "failure": null,
 96 |         "request": {
 97 |           "body": "",
 98 |           "body_is_truncated": false,
 99 |           "headers_list": [[
100 |               "Host", "149.154.171.5"
101 |             ], [
102 |               "User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36"
103 |             ], [
104 |               "Content-Length", "0"
105 |             ], [
106 |               "Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
107 |             ], [
108 |               "Accept-Language", "en-US;q=0.8,en;q=0.5"
109 |             ]
110 |           ],
111 |           "headers": {
112 |             "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
113 |             "Accept-Language": "en-US;q=0.8,en;q=0.5",
114 |             "Content-Length": "0",
115 |             "Host": "149.154.171.5",
116 |             "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36"
117 |           },
118 |           "method": "POST",
119 |           "tor": {
120 |             "exit_ip": null,
121 |             "exit_name": null,
122 |             "is_tor": false
123 |           },
124 |           "url": "http://149.154.171.5/"
125 |         },
126 |         "response": {
127 |           "body": "<html>\r\n<head><title>501 Not Implemented</title></head>\r\n<body bgcolor=\"white\">\r\n<center><h1>501 Not Implemented</h1></center>\r\n<hr><center>nginx/0.3.33</center>\r\n</body>\r\n</html>\r\n",
128 |           "body_is_truncated": false,
129 |           "code": 501,
130 |           "headers_list": [[
131 |               "Content-Length", "181"
132 |             ], [
133 |               "Server", "nginx/0.3.33"
134 |             ], [
135 |               "Date", "Fri, 10 Jan 2020 17:25:20 GMT"
136 |             ], [
137 |               "Content-Type", "text/html"
138 |             ]
139 |           ],
140 |           "headers": {
141 |             "Content-Length": "181",
142 |             "Content-Type": "text/html",
143 |             "Date": "Fri, 10 Jan 2020 17:25:20 GMT",
144 |             "Server": "nginx/0.3.33"
145 |           }
146 |         }
147 |       }
148 |     ],
149 |     "tcp_connect": [
150 |       {
151 |         "ip": "149.154.171.5",
152 |         "port": 80,
153 |         "status": {
154 |           "failure": null,
155 |           "success": true
156 |         }
157 |       }
158 |     ],
159 |     "telegram_http_blocking": false,
160 |     "telegram_tcp_blocking": false,
161 |     "telegram_web_failure": null,
162 |     "telegram_web_status": "ok"
163 |   },
164 |   "test_name": "telegram",
165 |   "test_runtime": 4.426603178,
166 |   "test_start_time": "2020-01-10 17:25:19",
167 |   "test_version": "0.0.4"
168 | }
169 | ```
170 | 
171 | In this example:
172 | 
173 | - all toplevel keys belong to the base data format.
174 | 
175 | - the `agent` and `requests` keys within the `test_keys`
176 | belong to the HTTP data format, which is declared as
177 | `httpt` in of the `extensions` map.
178 | 
179 | - the `queries` key within the `test_keys` belongs to
180 | the DNS data format, which is declared as `dnst` in the
181 | `extensions` map.
182 | 
183 | - the `tcp_connect` key within the `test_keys` belongs
184 | to the TCPConnect data format, which is declared as
185 | `tcpconnect` in the `extensions` map.
186 | 
187 | - all the other keys within `test_keys` are generated
188 | by the `telegram` experiment.
189 | 
190 | ## Index
191 | 
192 | This directory contains the specification of
193 | [the base data format](df-000-base.md) as well as
194 | of the following extensions:
195 | 
196 | - [the HTTP data format](df-001-httpt.md)
197 | - [the DNS data format](df-002-dnst.md)
198 | - [the Scapy data format](df-003-scapy.md)
199 | - [the TCPTest data format](df-004-tcpt.md)
200 | - [the TCPConnect data format](df-005-tcpconnect.md)
201 | - [the TLSHandshake data format](df-006-tlshandshake.md)
202 | - [the available failure strings](df-007-errors.md)
203 | - [the network events data format](df-008-netevents.md)
204 | - [the tunnel data format](df-009-tunnel.md)
205 | 
206 | See the [nettests](../nettests) directory for the experiments' specs.
207 | 
208 | ## History
209 | 
210 | - `0.1.0` [2013-02-01]: original YAML format. New code MUST NOT use that.
211 | 
212 | - `0.2.0` [2016-01-27]: the new JSON format. OONI Probe CLI v2.x and OONI
213 | Probe Mobile when using Measurement Kit as the measurement engine.
214 | 
215 | Between `2019-11-11` and `2020-04-06`, experimental versions of OONI have
216 | used the following versions `0.2.1`, `0.3.0`, `0.3.1`, `0.3.2`, `0.3.3`,
217 | and `0.4.0`. Since `2020-04-06`, `0.2.0` is again used by experimental and
218 | stable versions of OONI Probe.
219 | 


--------------------------------------------------------------------------------
/data-formats/df-004-tcpt.md:
--------------------------------------------------------------------------------
 1 | # TCPTest Data Format
 2 | 
 3 | This document describes the keys with `test_keys` that all experiments
 4 | sending and receiving data using TCP template SHOULD populate, possibly
 5 | using directly the specific template code. See this directory's
 6 | [README](README.md) for the basic concepts.
 7 | 
 8 | ## Specification
 9 | 
10 | ```JSON
11 | {
12 |     "received": [],
13 |     "sent": []
14 | }
15 | ```
16 | 
17 | - `received` (`[]MaybeBinaryData`): list of data received. See the
18 | definition of `MaybeBinaryData` in `df-001-httpt.md`, which in
19 | the common case boils down to an UTF-8 string.
20 | 
21 | - `sent`: like `received` but contains sent data.
22 | 
23 | ## Example
24 | 
25 | In the following example we've omitted all the keys that are
26 | not relevant to the TCP data format:
27 | 
28 | ```JSON
29 | {
30 |     "test_keys": {
31 |         "received": [
32 |             "53LC / HTTP/1.1\n\r",
33 |             "",
34 |             "dNnLP MHIdC s5vst k7Ir2\n\r",
35 |             "yWoBwUKUnlMfZQ1BDFIHJsLAU9PNKUVOzOJ1s7BdXKlhAnALegIOkvEsq8QCHNKqFoMk5ndpSa3bE99hIVXZSK7hCGa6wk770C9WJoL7VLYDeGRYWEhoYF2eg8PERFK4CGYXuuCLkr0ScT3esnj66ypgzpuP85PpCSERP5qc0DNYzHF4edM9RcDxttfMU0X5HyQ0EzKCMX4dcKlB6DLianESEKFKE3VwRht2cwUdLs6IXG5fsUBLEiJUQEHzFENpr40dPvcnk1KoTc4UZr5EP9JlNJ9f7fx6Ps6m2QzxyXkVT8UjPYbx2Rk6EO27nfd21iKtzZyZzUhyHxVQtLS58hzeQumCwAMdOi5FmwiDG6vFS1THTODJdwovw7V0CsaXvFwkJmBagWVvRR3zWQ9o509BnK9bxvBIo5cgFdyBlVHSH3Bbq0kXyarVAoQjaOo0C8Tb8lr5Ug7FFelGnBmdKNmNQj1QmmiobrcUMY22JKdxp81Z0R1AoyjLjeJQ7NNlhpbM6REiHladSmVmgBPChjjFQJn2TGOmSAIQJAvnsREpdMyuRemTA5Vb0QMIUmEVvpIKV8HOhBaniz389ftxglIizaOF3pacUIBycUwLermpolNatVn6BbDSCNJGCwil8NVUBtfKQqTlEgQk1zo3LNfryrKEd5M4PBdOzqIFHb0zhsY8NsSy7geXOZfMnNRNfu0dsMDchiLYHzQD0qPg2heEsJ3w8usyH462eqUcNF5qNOt47tC53rnbChT8Tjktr55LrJQhvKg8QRqWg2HuTnM4eMxSjdF8iCUzxEhDHkoxah5v6iQPmRE7qCUxf2Jwyi404MLX0gGvoawukkrEiVlhcHrQo3yrnAqIRx7mYhp9izzmWw62e35xzpFD3rxhAlrLTBr3bJQPBXvMzkAY62UHt1pAQPCaDojEo1WrHKnb8TMsNUS8u5yYumvbuxsLSJFWIjkrqf2G6rm1aVo95jxx9Uvx665eJ9tWRAT6rD4A1QoXVg34m20ywW1n3voP / HTTP/1.1\n\r"
36 |         ],
37 |         "sent": [
38 |             "53LC / HTTP/1.1\n\r",
39 |             "GET / HTTP/g8z\n\r",
40 |             "dNnLP MHIdC s5vst k7Ir2\n\r",
41 |             "yWoBwUKUnlMfZQ1BDFIHJsLAU9PNKUVOzOJ1s7BdXKlhAnALegIOkvEsq8QCHNKqFoMk5ndpSa3bE99hIVXZSK7hCGa6wk770C9WJoL7VLYDeGRYWEhoYF2eg8PERFK4CGYXuuCLkr0ScT3esnj66ypgzpuP85PpCSERP5qc0DNYzHF4edM9RcDxttfMU0X5HyQ0EzKCMX4dcKlB6DLianESEKFKE3VwRht2cwUdLs6IXG5fsUBLEiJUQEHzFENpr40dPvcnk1KoTc4UZr5EP9JlNJ9f7fx6Ps6m2QzxyXkVT8UjPYbx2Rk6EO27nfd21iKtzZyZzUhyHxVQtLS58hzeQumCwAMdOi5FmwiDG6vFS1THTODJdwovw7V0CsaXvFwkJmBagWVvRR3zWQ9o509BnK9bxvBIo5cgFdyBlVHSH3Bbq0kXyarVAoQjaOo0C8Tb8lr5Ug7FFelGnBmdKNmNQj1QmmiobrcUMY22JKdxp81Z0R1AoyjLjeJQ7NNlhpbM6REiHladSmVmgBPChjjFQJn2TGOmSAIQJAvnsREpdMyuRemTA5Vb0QMIUmEVvpIKV8HOhBaniz389ftxglIizaOF3pacUIBycUwLermpolNatVn6BbDSCNJGCwil8NVUBtfKQqTlEgQk1zo3LNfryrKEd5M4PBdOzqIFHb0zhsY8NsSy7geXOZfMnNRNfu0dsMDchiLYHzQD0qPg2heEsJ3w8usyH462eqUcNF5qNOt47tC53rnbChT8Tjktr55LrJQhvKg8QRqWg2HuTnM4eMxSjdF8iCUzxEhDHkoxah5v6iQPmRE7qCUxf2Jwyi404MLX0gGvoawukkrEiVlhcHrQo3yrnAqIRx7mYhp9izzmWw62e35xzpFD3rxhAlrLTBr3bJQPBXvMzkAY62UHt1pAQPCaDojEo1WrHKnb8TMsNUS8u5yYumvbuxsLSJFWIjkrqf2G6rm1aVo95jxx9Uvx665eJ9tWRAT6rD4A1QoXVg34m20ywW1n3voP / HTTP/1.1\n\r"
42 |         ],
43 |     }
44 | }
45 | ```
46 | 


--------------------------------------------------------------------------------
/data-formats/df-005-tcpconnect.md:
--------------------------------------------------------------------------------
  1 | # TCPConnect Data Format
  2 | 
  3 | This document describes the keys with `test_keys` that all experiments
  4 | performing TCP connects SHOULD populate, possibly using directly the
  5 | specific template code. See this directory's [README](README.md) for the
  6 | basic concepts.
  7 | 
  8 | | Name       | `tcpconnect` |
  9 | |------------|--------------|
 10 | | Version    | 0            |
 11 | 
 12 | ## Specification
 13 | 
 14 | ```JSON
 15 | {
 16 |   "tcp_connect": [],
 17 | }
 18 | ```
 19 | 
 20 | - `tcp_connect` (`[]TCPConnect`): list of TCPConnect objects. See below.
 21 | 
 22 | ## TCPConnect
 23 | 
 24 | ```JavaScript
 25 | {
 26 |     // fields currently used as of 2022-09-08
 27 |     "ip": "149.154.171.5",
 28 |     "port": 80,
 29 |     "status": {},
 30 |     "t0": 1.011,
 31 |     "t": 1.114,
 32 |     "tags": [],
 33 |     "transaction_id": 1,
 34 | 
 35 |     // deprecated or unused fields
 36 |     "conn_id": 141,
 37 |     "dial_id": 177171,
 38 | }
 39 | ```
 40 | 
 41 | - `conn_id` (`int`; optional; since 2020-01-11; deprecated): identifier of the connection. See
 42 | the discussion in `df-008-netevents.md`.
 43 | 
 44 | - `dial_id` (`int`; optional; since 2020-01-11; deprecated): identifier of a dialing
 45 | operation (i.e. name resolution followed by connect). See the
 46 | discussion in `df-002-dnst.md`.
 47 | 
 48 | - `ip` (`string`): IP address we're connecting to.
 49 | 
 50 | - `port` (`int`): port we're connecting to.
 51 | 
 52 | - `status` (`Status`): object describing the results.
 53 | 
 54 | - `t0` (`float`): number of seconds elapsed since `measurement_start_time`
 55 | measured in the moment in which we started the operation (`t - t0` gives you
 56 | the amount of time spent performing the operation);
 57 | 
 58 | - `t` (`float`): number of seconds elapsed since `measurement_start_time`
 59 | measured in the moment in which `failure` is determined (`t - t0` gives you
 60 | the amount of time spent performing the operation);
 61 | 
 62 | - `tags` (`[]string`; optional): list of tags for this event. This is useful to
 63 | understand what part of a complex measurement generated an event.
 64 | 
 65 | - `transaction_id` (`int`; optional; since 2020-01-11): the set of operations
 66 | to which this event belongs to (typically an HTTP transaction or a DNS
 67 | round trip). A zero or missing value means we don't know the transaction
 68 | to which this code belongs to.
 69 | 
 70 | ## Status
 71 | 
 72 | ```JavaScript
 73 | {
 74 |     "blocked": null, // only WebConnectivity, new nettests SHOULD NOT include it
 75 |     "failure": null,
 76 |     "success": true
 77 | }
 78 | ```
 79 | 
 80 | - `blocked` (`string`; nullable; deprecated; optional): field used only by Web
 81 | Connectivity to indicate whether this endpoint is blocked. New experiments
 82 | SHOULD NOT use this field and SHOULD instead use distinct keys to represent
 83 | network observations and the probe's analysis.
 84 | 
 85 | - `failure` (`string`; nullable): if there was an error, this field is
 86 | a string indicating the error, otherwise it MUST be `null`. Some older versions of OONI Probe
 87 | set this to `false` instead of `null`.
 88 | 
 89 | - `success` (`bool`): true if failure is `null`, false otherwise.
 90 | 
 91 | ## Example
 92 | 
 93 | In the following example we've omitted all the keys that are
 94 | not relevant to the HTTP data format:
 95 | 
 96 | ```JSON
 97 | {
 98 |   "ip": "93.184.216.34",
 99 |   "port": 443,
100 |   "status": {
101 |     "blocked": false,
102 |     "failure": null,
103 |     "success": true
104 |   },
105 |   "t0": 0.450831,
106 |   "t": 0.595157,
107 |   "transaction_id": 4
108 | }
109 | ```
110 | 


--------------------------------------------------------------------------------
/data-formats/df-007-errors.md:
--------------------------------------------------------------------------------
 1 | # Errors
 2 | 
 3 | This document describes the possible values of the `failure` key that
 4 | occurs in several data formats to indicate a failure. The type of
 5 | this key is `string; nullable`. See also this directory's
 6 | [README](README.md) for the basic concepts.
 7 | 
 8 | To indicate that an error is emitted by ooni/probe-legacy we will flag
 9 | the error using `(PL)`. To indicate that an error is emitted by Measurement
10 | Kit, we will flag it using `(MK)`. To indicate that an error is emitted by
11 | ooni/probe-engine, we will flag it using `(PE)`. When an error is emitted
12 | by more than one product we will write `(product, product)`. When an error
13 | is not flagged, it means that it's used by all the three products.
14 | 
15 | For clarity, we would look at the errors currently faced, separately
16 | from the legacy errors. Following are the errors emitted by
17 | ooni/probe-engine and/or Mesurement Kit.
18 | 
19 | 
20 | |            Error               |Flag(s)|Details|
21 | |:-------------------------------|:------|:------|
22 | |`null`                          |PL,MK,PE   |no error has occurred|
23 | |`"android_dns_cache_no_data"`   |  PE   |DNS lookup using `getaddrinfo` failed and we have no way of knowing why (see [ooni/probe#2029](https://github.com/ooni/probe/issues/2029#issuecomment-1140258729))|
24 | |`"connection_aborted"`          |MK, PE |`ECONNABORTED`|
25 | |`"connection_already_closed"`   |  PE   |I/O on socket interrupted by another thread closing the socket itself (you can generally ignore this error)|
26 | |`"connection_refused"`          |MK, PE |`ECONNREFUSED`|
27 | |`"connection_reset"`            |MK, PE |`ECONNRESET`|
28 | |`"dns_bogon_error"`             |  PE   |detected bogon in DNS response|
29 | |`"dns_no_answer"`               |  PE   |successful DNS response but no answer matching the query type (e.g., no `AAAA` record for domain)|
30 | |`"dns_non_recoverable_failure"` |  PE   |non-recoverable DNS lookup failure (mostly when using Windows' `getaddrinfo`)|
31 | |`"dns_nxdomain_error"`          |  PE   |`NXDOMAIN` Rcode in DNS response|
32 | |`"dns_reply_with_wrong_query_id"`| PE   |The DNS response ID doesn't match the query ID|
33 | |`"dns_refused_error"`           |  PE   |`Refused` RCode in DNS response|
34 | |`"dns_server_misbehaving"`      |  PE   |generic error indicating DNS failure without more specific reasons on the kind of failure|
35 | |`"dns_servfail_error"`          |  PE   |`Servfail` RCode in DNS response|
36 | |`"dns_temporary_failure"`       |  PE   |temporary DNS failure (mostly when using Windows' `getaddrinfo`)|
37 | |`"eof_error"`                   |MK, PE |unexpected EOF on connection|
38 | |`"http_invalid_redirect_location_host"` | PE | the HTTP redirect URL has a malformed or empty host component|
39 | |`"http_request_failed"`         |MK, PE |HTTP request did not return a successful response code|
40 | |`"host_unreachable"`            |MK, PE |`EHOSTUNREACH`|
41 | |`"generic_timeout_error"`       |PL,MK,PE |error returned when a timeout expires|
42 | |`"http_unexpected_redirect_url"`|  PE   |we expected a specific redirect URL and instead we saw either no redirect URL or a different redirect URL|
43 | |`"http_unexpected_status_code"` |  PE   |we did not expected to see this status code (e.g. we expected a redirection and saw something else)|
44 | |`"interrupted"`                 |  PE   |the user interrupted us by cancelling the context|
45 | |`"json_parse_error"`            |MK, PE |parsing of a JSON failed|
46 | |`"network_down"`                |MK, PE |`ENETDOWN`|
47 | |`"network_reset"`               |MK, PE |`ENETRESET`|
48 | |`"network_unreachable"`         |MK, PE |`ENETUNREACH`|
49 | |`"quic_incompatible_version"`   |  PE   |QUIC version negotiation failed|
50 | |`"ssl_failed_handshake"`        |  PE   |TLS/QUIC handshake failed for unknown reasons|
51 | |`"ssl_invalid_hostname"`        |MK, PE |certificate not valid for SNI|
52 | |`"ssl_unknown_authority"`       |  PE   |cannot find CA validating certificate|
53 | |`"ssl_invalid_certificate"`     |MK, PE |e.g. certificate expired|
54 | |`"unknown_failure ..."`         |PL,MK,PE |any other error|
55 | 
56 | 
57 | 
58 | The ooni/probe-legacy contains mostly the legacy errors, listed below.
59 | 
60 | 
61 | |              Error                   |Details|
62 | |:-------------------------------------|:------|
63 | |`"socks_error"`                       |Generic error in SOCKS code. <mark>Also an (MK) error</mark>|
64 | |`"address_family_not_supported_error"`|[socket.gaierror](https://docs.python.org/3.8/library/socket.html#socket.gaierror)|
65 | |`"connect_error"`                     |[t.i.e.ConnectError](https://twistedmatrix.com/documents/15.4.0/api/twisted.internet.error.ConnectError.html)||
66 | |`"connection_done"`                   |[t.i.e.ConnectionDone](https://twistedmatrix.com/documents/15.4.0/api/twisted.internet.error.ConnectionDone.html)|
67 | |`"connection_lost_error"`             |[t.i.e.ConnectionLost](https://twistedmatrix.com/documents/15.4.0/api/twisted.internet.error.ConnectionLost.html)|
68 | |`"connection_refused_error"`          |[t.i.e.ConnectionRefusedError](https://twistedmatrix.com/documents/15.4.0/api/twisted.internet.error.ConnectionRefusedError.html)|
69 | |`"deferred_timeout_error"`            |[t.i.d.TimeoutError](https://twistedmatrix.com/documents/15.4.0/api/twisted.internet.defer.TimeoutError.html)|
70 | |`"dns_lookup_error"`                  |[t.i.e.DNSLookupError](https://twistedmatrix.com/documents/15.4.0/api/twisted.internet.error.DNSLookupError.html)|
71 | |`"dns_name_error"`                    |[t.n.e.DNSNameError](https://twistedmatrix.com/documents/15.4.0/api/twisted.names.error.DNSNameError.html)|
72 | |`"dns_name_failure"`                  |[t.n.e.DNSServerError](https://twistedmatrix.com/documents/15.4.0/api/twisted.names.error.DNSServerError.html)|
73 | |`"response_never_received"`           |[t.w._newclient.ResponseNeverReceived](https://twistedmatrix.com/documents/15.4.0/api/twisted.web._newclient.ResponseNeverReceived.html)|
74 | |`"socks_address_not_supported"`       |[txsocksx.errors.AddressNotSupported](https://github.com/habnabit/txsocksx/blob/59ac4e088ea064ae9ee44ac371df3ed46ca3b92f/txsocksx/errors.py)|
75 | |`"socks_command_not_supported"`       |[txsocksx.errors.CommandNotSupported](https://github.com/habnabit/txsocksx/blob/59ac4e088ea064ae9ee44ac371df3ed46ca3b92f/txsocksx/errors.py)|
76 | |`"socks_connection_not_allowed"`      |[txsocksx.errors.ConnectionNotAllowed](https://github.com/habnabit/txsocksx/blob/59ac4e088ea064ae9ee44ac371df3ed46ca3b92f/txsocksx/errors.py)|
77 | |`"socks_connection_refused"`          |[txsocksx.errors.ConnectionRefused](https://github.com/habnabit/txsocksx/blob/59ac4e088ea064ae9ee44ac371df3ed46ca3b92f/txsocksx/errors.py)|
78 | |`"socks_host_unreachable"`            |[txsocksx.errors.HostUnreachable](https://github.com/habnabit/txsocksx/blob/59ac4e088ea064ae9ee44ac371df3ed46ca3b92f/txsocksx/errors.py)|
79 | |`"socks_network_unreachable"`         |[txsocksx.errors.NetworkUnreachable](https://github.com/habnabit/txsocksx/blob/59ac4e088ea064ae9ee44ac371df3ed46ca3b92f/txsocksx/errors.py)
80 | |`"socks_server_failure"`              |[txsocksx.errors.ServerFailure](https://github.com/habnabit/txsocksx/blob/59ac4e088ea064ae9ee44ac371df3ed46ca3b92f/txsocksx/errors.py)|
81 | |`"socks_ttl_expired"`                 |[txsocksx.errors.TTLExpired](https://github.com/habnabit/txsocksx/blob/59ac4e088ea064ae9ee44ac371df3ed46ca3b92f/txsocksx/errors.py)|
82 | |`"task_timed_out"`                    |a task has timed out|
83 | |`"tcp_timed_out_error"`               |[t.i.e.TCPTimedOutError](https://twistedmatrix.com/documents/15.4.0/api/twisted.internet.error.TCPTimedOutError.html)|
84 | 


--------------------------------------------------------------------------------
/data-formats/df-008-netevents.md:
--------------------------------------------------------------------------------
  1 | # NetEvents Data Format
  2 | 
  3 | This document describes the keys with `test_keys` that experiments
  4 | MAY use to include network-level events. See this directory's
  5 | [README](README.md) for the basic concepts.
  6 | 
  7 | | Name       | `netevents` |
  8 | |------------|-------------|
  9 | | Version    | 0           |
 10 | 
 11 | ## Specification
 12 | 
 13 | ```JSON
 14 | {
 15 |     "network_events": []
 16 | }
 17 | ```
 18 | 
 19 | - `network_events` (`[]Event`): list of events.
 20 | 
 21 | ## Event
 22 | 
 23 | ```JavaScript
 24 | {
 25 |     // fields currently used as of 2022-09-08
 26 |     "address": "1.1.1.1:443",
 27 |     "failure": "connection_reset",
 28 |     "num_bytes": 4114,
 29 |     "operation": "read",
 30 |     "proto": "tcp",
 31 |     "t0": 1.001,
 32 |     "t": 1.174,
 33 |     "tags": [],
 34 |     "transaction_id": 1,
 35 | 
 36 |     // deprecated or unused fields
 37 |     "conn_id": 11,
 38 |     "dial_id": 4,
 39 | }
 40 | ```
 41 | 
 42 | - `address` (`string`; optional): address for `connect`.
 43 | 
 44 | - `conn_id` (`int`; optional; deprecated): identifier of this connection (see below). When
 45 | zero, it means we don't know the conn ID. SHOULD be omitted when zero.
 46 | 
 47 | - `dial_id` (`int`; optional; since 2020-01-11; deprecated): identifier of a dialing
 48 | operation (i.e. name resolution followed by connect). See the
 49 | discussion in `df-002-dnst.md`.
 50 | 
 51 | - `failure` (`string`; nullable): if there was an error, this field is
 52 | a string indicating the error, otherwise it MUST be `null`.
 53 | 
 54 | - `num_bytes` (`int`; optional): number of bytes transferred by
 55 | `read` or `write`.
 56 | 
 57 | - `operation` (`string`): one of `bytes_received_cumulative`, `connect`, `read`,
 58 | `read_from`, `write`, and `write_to`.
 59 | 
 60 | - `proto` (`string`; optional): protocol for `connect` (`tcp` or `udp`).
 61 | 
 62 | - `t0` (`float`): number of seconds elapsed since `measurement_start_time`
 63 | measured in the moment in which we started the operation (`t - t0` gives you
 64 | the amount of time spent performing the operation);
 65 | 
 66 | - `t` (`float`): number of seconds elapsed since `measurement_start_time`
 67 | measured at the end of the operation at hand. If there was an error, this is the
 68 | moment in which `failure` is determined; otherwise it's the moment marked by the successfully
 69 | completion of the operation: as an example, consider a blocking call to `read`
 70 | or `write`. In any case, `t - t0` gives you the amount of time spent performing
 71 | the operation;
 72 | 
 73 | - `tags` (`[]string`): list of tags for this event. This is useful to
 74 | understand what part of a complex measurement generated an event.
 75 | 
 76 | - `transaction_id` (`int`; optional; since 2020-01-11): the set of operations
 77 | to which this event belongs to (typically an HTTP transaction or a DNS
 78 | round trip). A zero or missing value means we don't know the transaction
 79 | to which this code belongs to.
 80 | 
 81 | ## Connection ID and Life Cycle (obsolete and unused)
 82 | 
 83 | When a connection is created you see a `connect` event with a
 84 | specific `conn_id` and no failure. Subsequently you should see
 85 | one or more `read` or `write` with the same `conn_id`.
 86 | 
 87 | If you see another `connect` with the same `conn_id`, this means
 88 | that the implementation is reusing connection IDs, and you should
 89 | henceforth consider such ID as a new connection.
 90 | 
 91 | Note that this is perfectly normal. A probe implementation
 92 | MAY reuse the `conn_id` in the same measurement session.
 93 | 
 94 | ## Example
 95 | 
 96 | In the following example we've omitted all the keys that are
 97 | not relevant to the netevents data format:
 98 | 
 99 | ```JSON
100 | {
101 |   "address": "93.184.216.34:443",
102 |   "failure": null,
103 |   "num_bytes": 99,
104 |   "operation": "read",
105 |   "proto": "tcp",
106 |   "t0": 0.602109,
107 |   "t": 0.746866,
108 |   "transaction_id": 4
109 | }
110 | ```
111 | 


--------------------------------------------------------------------------------
/data-formats/df-009-tunnel.md:
--------------------------------------------------------------------------------
 1 | # Tunnel Data Format
 2 | 
 3 | This document describes the keys with `test_keys` that experiments
 4 | MAY use when they're using a tunnel, e.g., psiphon. See this directory's
 5 | [README](README.md) for the basic concepts.
 6 | 
 7 | | Name       | `tunnel` |
 8 | |------------|----------|
 9 | | Version    | 0        |
10 | 
11 | When an implementation includes `tunnel` in the `extensions` it merely
12 | means that the experiment COULD be using a `tunnel`. To detect whether a
13 | tunnel was used, you need to check the `test_keys.tunnel` field.
14 | 
15 | ## Specification
16 | 
17 | ```JSON
18 | {
19 |     "bootstrap_time": 6.1,
20 |     "failure": null,
21 |     "socksproxy": "127.0.0.1:9050",
22 |     "tunnel": "psiphon"
23 | }
24 | ```
25 | 
26 | - `bootstrap_time` (`float`; optional): number of seconds it took to bootstrap
27 | the tunnel. This field is omitted if there is no tunnel. (See also the
28 | description of `failure` below).
29 | 
30 | - `failure` (`string`; nullable): if there was an error, this field is
31 | a string indicating the error, otherwise it MUST be `null`. Note that this
32 | field is also defined by other specifications. When there is an error in
33 | bootstrapping the tunnel, `bootstrap_time` is present and set to zero, the
34 | `tunnel` field is present, and this value is not `null`. If an error instead
35 | happens after the tunnel bootstrap, the `bootstrap_time` is nonzero.
36 | 
37 | - `socksproxy` (`string`; optional): address of the SOCKS proxy being
38 | used. Omit or set to `null` if no SOCKS proxy is being used. The format
39 | to be used is `1.2.3.4:54321` for IPv4 and `[::1234]:54321` for IPv6.
40 | 
41 | - `tunnel` (`string`; optional): not provided if there is no tunnel, otherwise
42 | one of `"psiphon"`, `"tor"` or `"openvpn"` when there is a tunnel.
43 | 
44 | ## Example
45 | 
46 | In the following example we've omitted all the keys that are
47 | not relevant to the tunnel data format:
48 | 
49 | ```JSON
50 | {
51 |     "test_keys": {
52 |         "bootstrap_time": 6.1,
53 |         "failure": null,
54 |         "socksproxy": "127.0.0.1:9050",
55 |         "tunnel": "psiphon"
56 |     }
57 | }
58 | ```
59 | 


--------------------------------------------------------------------------------
/nettests/README.md:
--------------------------------------------------------------------------------
1 | # Nettests (aka experiments)
2 | 
3 | This directory contains the specification of the nettests we implement.
4 | 


--------------------------------------------------------------------------------
/nettests/ts-000-example.md:
--------------------------------------------------------------------------------
 1 | # Specification version number
 2 | 
 3 | 2013-01-19-000
 4 | 
 5 | # Specification name
 6 | 
 7 | Bob's example quality test spec!
 8 | 
 9 | # Test preconditions
10 | 
11 | Example: an internet connection
12 | Alternate example: An x.25 network connection
13 | 
14 | # Expected impact
15 | 
16 | Example: Ability to detect TCP MITM
17 | 
18 | # Expected inputs
19 | 
20 | ## Import document or import data format
21 | 
22 | ## Data specification version number
23 | 
24 | Question: Do we really nead a version number for the data specification?
25 | 
26 | ## Semantics
27 | 
28 | For example:
29 | 
30 | one ip address port combination per line separated by colon (ex.
31 | 127.0.0.1:9050)
32 | 
33 | # Test description
34 | 
35 | Describe what operations are done on the input to produce the output.
36 | Implementation details that could potentially affect the end result should also
37 | be mentioned here.
38 | 
39 | # Expected output
40 | 
41 | ## Parent data format
42 | 
43 | This is the base data format(s) that this test will adhere to (it is
44 | implicit that it will follow df-000-base).
45 | 
46 | ## Required output data
47 | 
48 | This is data that should be part of the base dataformat without which the
49 | test cannot properly be interpreted.
50 | 
51 | ## Data specification version number
52 | 
53 | Question: Isn't this implicit in the test specification number, is there a reason
54 | why we should have two versions one for the data format and one for the
55 | test specification? Would changing the dataformat not imply changing the
56 | test version number?
57 | 
58 | ## Semantics
59 | 
60 | List the extra keys that will be part of the report that are not part of
61 | the parent data format. Be sure not to have keys that clash with the
62 | parent data format.
63 | 
64 | ## Possible conclusions
65 | 
66 | Based on the ouput data what conclusions can you draw?
67 | 
68 | ## Example output sample
69 | 
70 | ## Expected Post-processing efforts
71 | 
72 | Question: What exactly is meant by this? Is this meaning the possible
73 | difficulties that a person doing post-processing may encouter?
74 | 
75 | # Privacy considerations
76 | 
77 | There are a few!
78 | 
79 | # Packet capture considerations
80 | 
81 | We capture all packets on the interface foo for bar units of time.
82 | 
83 | # Other notes
84 | 
85 | Bikesh{r}ed!
86 | 
87 | 


--------------------------------------------------------------------------------
/nettests/ts-001-bridget.md:
--------------------------------------------------------------------------------
 1 | # Specification version number
 2 | 
 3 | 2013-01-19_000
 4 | 
 5 | * _status_: obsolete
 6 | 
 7 | # Specification name
 8 | 
 9 | bridgeT
10 | 
11 | Note: This test is deprecated
12 | 
13 | # Test preconditions
14 | 
15 |   * Tor installed.
16 |   * If testing pluggable transports, the host used for scanning the bridges
17 |     should have the pluggable transport program.
18 | 
19 | # Expected impact
20 | 
21 |   * Detect whether or not a Tor bridge is reachable from a specific network
22 |     vantage point on a network.
23 |   * Implementations of bridget should not give the location of Tor bridges to
24 |     adversaries, and thus should attempt to accurately catagorize tests
25 |     according to such test's probability of providing the adversary
26 |     information or oracles pertaining to the bridge's location.
27 |   * Implementations should be automatable in a safe fashion, meaning that they
28 |     should be capable of dynamically determining as best as possible the
29 |     likelihood of the next iteration, ordered by probability of alerting
30 |     adversaries of the bridge location, of alerting the adversaries. Test
31 |     iterations should also be ceased if a test fails in a manner in which it
32 |     is highly unlikely that any further tests would be capable of contacting
33 |     the bridge. For example, if an ICMP-8 ping and a SYNACK cannot reach the
34 |     bridge's ORPORT, it should be clear that a full connection following the
35 |     Tor handshake protocol would likely be unsuccessful, and therefore
36 |     conducting a bridge reachability test which completes a Tor handshake with
37 |     the bridge should be skipped due to a high reachability information to
38 |     adversarial location discovery ratio.
39 |   * Basic active scanning tests which must be implemented: ICMP-8, TCP SYN,
40 |     TCP SYNACK, TLS HANDSHAKE, FULL TOR PROTOCOL CONNECTION.
41 | 
42 | # Expected inputs
43 | 
44 |   * Import document or import data format:
45 |     * A file with an IP[4/6]:PORT, one per line.
46 |       Example:
47 |             1.2.3.4:2323
48 |             [2006:2000::0098]:2323
49 |     * If the bridge has a pluggable transport enabled, it should be specified
50 |       before the bridge's IP:PORT. Example:
51 |             obfs2 66.66.66.66:443
52 | 
53 | # Expected output
54 | 
55 |   * Should output information in a format which is easily translatable for
56 |     storage in BridgeDB, and parseable for use in metrics.
57 | 
58 | # Privacy considerations
59 | 
60 |   * Bridge location should not be revealed to adversaries.
61 |   * If possible, the fact that a scan is running should be difficult to detect.
62 | 
63 | # Packet capture considerations
64 | 
65 |   * A montitoring interface is temporarily constructed, when permitted, in
66 |     order to listen for responses to packets sent in ICMP, TCP, and TLS based
67 |     tests.
68 |   * Any packet captures should not be written to disk, due to the scanning
69 |     host likely being located in the country of the adversarial party, which
70 |     is assumed to have the ability to obtain access to information stored on
71 |     the disk of the scanning host.
72 |   * Any packet capture taken on the scanning host should be deleted after the
73 |     scan test is completed.
74 | 


--------------------------------------------------------------------------------
/nettests/ts-002-dns-consistency.md:
--------------------------------------------------------------------------------
  1 | # Specification version number
  2 | 
  3 | 0.2.0
  4 | 
  5 | * _status_: obsolete
  6 | 
  7 | # Specification name
  8 | 
  9 | DNS Consistency
 10 | 
 11 | # Test preconditions
 12 | 
 13 |   * An internet connection
 14 |   * A unfiltered connection to a DNS resolver that is not performing censorship
 15 | 
 16 | For reporting to the backend to work that it is possible for the probe to
 17 | establish a connection to the Tor network.
 18 | 
 19 | # Expected impact
 20 | 
 21 | Ability to detect if A DNS queries for a certain hostname are being tampered with.
 22 | 
 23 | # Expected inputs
 24 | 
 25 |   * A list of hostnames to be tested for censorship
 26 |   * A list of DNS resolvers to be tested for censorship
 27 |   * A DNS resolver that is not being tampered with (control_resolver)
 28 | 
 29 | The list of hostnames should be in a text file and separated by newline.
 30 | 
 31 | Example:
 32 | 
 33 |     one.example.com
 34 |     two.example.com
 35 |     three.example.com
 36 | 
 37 | The list of DNS resolvers to test for tampering shold be in a text file as
 38 | dotted quads separated by a newline:
 39 | 
 40 | Example:
 41 | 
 42 |     1.1.1.1
 43 |     2.2.2.2
 44 |     3.3.3.3
 45 | 
 46 | # Test Description
 47 | 
 48 | For each hostname to be tested we do the following:
 49 | 
 50 | We perform an A DNS query (via UDP) to the control resolver. The answer to such
 51 | query is called the control answer.
 52 | 
 53 | For each DNS resolver to be tested we perform an A DNS query for the domain in
 54 | question. We then compare this answer (experiment answer) with the control
 55 | answer.
 56 | 
 57 | If the two have a common IPV4 address then the hostname is not considered to be
 58 | tampered with the resolver in question (tampering: False)
 59 | 
 60 | If they do not have commonalities we take the first IPV4 address in the control
 61 | answer and the first IPV4 address in the experiment answer and do a reverse
 62 | lookup. If the two reverse lookups match (the PTR record points to the same
 63 | hostname), we take note of this (tampering: "reverse_match").
 64 | 
 65 | In any other case we mark the result as: tampering: True.
 66 | 
 67 | # Expected output
 68 | 
 69 | ## Parent Data format
 70 | 
 71 | df-002-dnst
 72 | 
 73 | ## Semantics
 74 | 
 75 | The following extra fields will be present in every measurement entry.
 76 | 
 77 | ```
 78 | {
 79 |     "successful": [
 80 |         "The list of addresses of the resolvers that provided a consistent"
 81 |         "answer to our query."
 82 |     ],
 83 |     "failures": [
 84 |         "The list of addresses that failed to resolve the query."
 85 |         "Note: in the case of NXDOMAIN these will turn up as failures."
 86 |     ],
 87 |     "inconsistent": [
 88 |         "The list of addresses that returned an inconsistent result"
 89 |     ],
 90 |     "errors": {
 91 |         "RESOLVER_IP": "error string of the failure"
 92 |     }
 93 | 
 94 | }
 95 | ```
 96 | 
 97 | ## Possible conclusions
 98 | 
 99 | That the DNS resolver in question has provided a false response to a DNS Query.
100 | 
101 | ## Expected post-processing efforts
102 | 
103 | ## Example output sample
104 | 
105 | ```
106 | {
107 |     "bucket_date": "2015-11-23",
108 |     "data_format_version": "0.2.0",
109 |     "id": "e6f6257f-e7c2-48fa-8345-b7ed055ab1d2",
110 |     "input": "198.175.124.185",
111 |     "options": [
112 |         "-f",
113 |         "citizenlab-urls-global.txt",
114 |         "-T",
115 |         "dns-server-jo.txt"
116 |     ],
117 |     "probe_asn": "AS8376",
118 |     "probe_cc": "JO",
119 |     "probe_ip": "127.0.0.1",
120 |     "report_filename": "2015-11-23/20151123T161428Z-JO-AS8376-dns_consistency-F1KI1WusW4c1T6OGyQDgHJjaSQ1bfpqV2G39bpmuHiLAJnse8R1F44vdRuTz6nO4-0.1.0-probe.json",
121 |     "report_id": "F1KI1WusW4c1T6OGyQDgHJjaSQ1bfpqV2G39bpmuHiLAJnse8R1F44vdRuTz6nO4",
122 |     "software_name": "ooniprobe",
123 |     "software_version": "1.3.1",
124 |     "test_helpers": {
125 |         "backend": "8.8.8.8:53"
126 |     },
127 |     "input_hashes": [
128 |         "0055f0881fba857d8b48123017d7aec83014e89f057e44b66107f657ec5e2eab"
129 |     ],
130 |     "probe_city": null,
131 |     "backend_version": "1.1.4",
132 |     "test_keys": {
133 |         "control_resolver": "8.8.8.8:53",
134 |         "errors": {
135 |             "212.118.0.1": "no_answer",
136 |             "212.118.0.2": "no_answer",
137 |             "212.38.128.3": "dns_lookup_error",
138 |             "217.144.6.6": "no_answer",
139 |             "8.8.8.8:53": "no_answer",
140 |             "80.90.160.135": "dns_lookup_error",
141 |             "80.90.160.172": "dns_lookup_error",
142 |             "81.28.112.2": "dns_lookup_error"
143 |         },
144 |         "failed": [
145 |             "80.90.160.172",
146 |             "8.8.8.8:53",
147 |             "212.118.0.2",
148 |             "80.90.160.135",
149 |             "212.38.128.3",
150 |             "212.118.0.1",
151 |             "217.144.6.6",
152 |             "81.28.112.2"
153 |         ],
154 |         "inconsistent": [
155 |         ],
156 |         "queries": [
157 |             {
158 |                 "answers": [],
159 |                 "failure": "no_answer",
160 |                 "hostname": "198.175.124.185",
161 |                 "query_type": "A",
162 |                 "resolver_hostname": "8.8.8.8",
163 |                 "resolver_port": 53
164 |             },
165 |             {
166 |                 "answers": [],
167 |                 "failure": "deferred_timeout_error",
168 |                 "hostname": "198.175.124.185",
169 |                 "query_type": "A",
170 |                 "resolver_hostname": "212.38.128.3",
171 |                 "resolver_port": 53
172 |             },
173 |             {
174 |                 "answers": [],
175 |                 "failure": "no_answer",
176 |                 "hostname": "198.175.124.185",
177 |                 "query_type": "A",
178 |                 "resolver_hostname": "217.144.6.6",
179 |                 "resolver_port": 53
180 |             },
181 |             {
182 |                 "answers": [],
183 |                 "failure": "deferred_timeout_error",
184 |                 "hostname": "198.175.124.185",
185 |                 "query_type": "A",
186 |                 "resolver_hostname": "81.28.112.2",
187 |                 "resolver_port": 53
188 |             },
189 |             {
190 |                 "answers": [],
191 |                 "failure": "deferred_timeout_error",
192 |                 "hostname": "198.175.124.185",
193 |                 "query_type": "A",
194 |                 "resolver_hostname": "80.90.160.135",
195 |                 "resolver_port": 53
196 |             },
197 |             {
198 |                 "answers": [],
199 |                 "failure": "deferred_timeout_error",
200 |                 "hostname": "198.175.124.185",
201 |                 "query_type": "A",
202 |                 "resolver_hostname": "80.90.160.172",
203 |                 "resolver_port": 53
204 |             },
205 |             {
206 |                 "answers": [],
207 |                 "failure": "no_answer",
208 |                 "hostname": "198.175.124.185",
209 |                 "query_type": "A",
210 |                 "resolver_hostname": "212.118.0.1",
211 |                 "resolver_port": 53
212 |             },
213 |             {
214 |                 "answers": [],
215 |                 "failure": "no_answer",
216 |                 "hostname": "198.175.124.185",
217 |                 "query_type": "A",
218 |                 "resolver_hostname": "212.118.0.2",
219 |                 "resolver_port": 53
220 |             }
221 |         ],
222 |         "start_time": 1448291668.0,
223 |         "successful": []
224 |     },
225 |     "test_name": "dns_consistency",
226 |     "test_runtime": 0.0837070942,
227 |     "test_start_time": "2015-11-23 16:14:28",
228 |     "test_version": "0.6"
229 | }
230 | ```
231 | 
232 | # Privacy considerations
233 | 
234 | This test does not inherently risk leaking user information.
235 | 
236 | # Packet capture considerations
237 | 
238 | We do not do any packet capturing, this test only requires to be able to create
239 | UDP sockets.
240 | 
241 | # Notes
242 | 
243 | Sites that do geolocation based load balancing via DNS will report a different
244 | set of IPv4 addresses depending on the source of the DNS request. For this
245 | reason we also do a reverse lookup to check to see if the domain pointers of
246 | the IP addresses match.
247 | This means of seeding out false positive, though, is also not that effective
248 | since in some circumstances also the PTR record will point to a different
249 | domain name.
250 | 


--------------------------------------------------------------------------------
/nettests/ts-004-http-host.md:
--------------------------------------------------------------------------------
  1 | # Specification version number
  2 | 
  3 | 0.2.0
  4 | 
  5 | * _status_: obsolete
  6 | 
  7 | # Specification name
  8 | 
  9 | HTTP Host test
 10 | 
 11 | # Test preconditions
 12 | 
 13 |   * An internet connection.
 14 |   * That no special treatment is given to the supplied oonib test helper.
 15 | 
 16 | For reporting to the backend to work that it is possible for the probe to
 17 | establish a connection to the Tor network.
 18 | 
 19 | # Expected impact
 20 | 
 21 |   * Ability to determine that the transparent HTTP proxy is doing censorship
 22 |     based on the HTTP Host header field.
 23 | 
 24 |   * Ability to detect the presence of a Transparent HTTP Proxy
 25 | 
 26 |   * Ability to detect which logic is being used by the Transparent proxy to
 27 |     censor the target sites and if some circumvention strategies are effective.
 28 | 
 29 |   * (optional) if the blockpage is specified if the hostname under analysis is
 30 |     being blocked.
 31 | 
 32 | # Expected inputs
 33 | 
 34 |   * A list of hostnames to be tested
 35 | 
 36 |   * The IP address (or hostname) of an oonib HTTPReturnJSONHeadersHelper test
 37 |     helper running on port 80.
 38 | 
 39 |   * (optional) the content of the blockpage to compare against when processing
 40 |     the response.
 41 | 
 42 | ## Semantics
 43 | 
 44 | One per line a list of hostnames, for example:
 45 | 
 46 |     torproject.org
 47 |     ooni.nu
 48 | 
 49 | # Test description
 50 | 
 51 | For every given hostname we perform the following series of tests. Once every
 52 | test is completed we always perform a fixed set of operations to infer the
 53 | presence of a transparent HTTP proxy and/or censorship.
 54 | 
 55 | We take the response from our request and check to see if it starts with the
 56 | character '{', if it does not we consider that a transparent HTTP proxy is
 57 | present.
 58 | 
 59 | If not we attempt to parse the response as a JSON string, if it does not parse
 60 | we consider a that a transparent HTTP proxy is present.
 61 | 
 62 | If the JSON string does parse we look for the following dict keys:
 63 | 
 64 |   * 'request_headers'
 65 | 
 66 |   * 'request_line'
 67 | 
 68 |   * 'headers_dict'
 69 | 
 70 | If all of them are present we consider that no transparent HTTP proxy is
 71 | present.
 72 | 
 73 | If a transparent HTTP proxy is present and the user has specified the content
 74 | of the censorship blockpage we compare the response with the known blockpage
 75 | and check if they match. If they do match then the hostname is maked as
 76 | censored.
 77 | 
 78 | These operations are done once for every one of the following tests:
 79 | 
 80 | ## test_send_host_header
 81 | 
 82 | We connect to the backend test helper on port 80 and perform a HTTP GET request
 83 | with the Host header field set to the target hostname.
 84 | 
 85 | ## test_filtering_via_fuzzy_matching
 86 | 
 87 | The Host header field contains the hostname prefixed by 10 random characters
 88 | and postfixed by 10 random characters.
 89 | 
 90 | The purpose of this is to determine if censorship is being triggerred by fuzzy
 91 | matching.
 92 | 
 93 | ## test_filtering_of_subdomain
 94 | 
 95 | The Host header field contains a random 10 character subdomain of the target
 96 | hostname (`ninechars1.example.com`).
 97 | 
 98 | The purpose of this is to determine if also subdomains are being censored.
 99 | 
100 | 
101 | ## test_filtering_add_tab_to_host
102 | 
103 | The Host header field contains the subdomain postfixed by the tab character
104 | `\t`.
105 | 
106 | The purpose of this is to determine if by appending a tab character the filter
107 | is being bypassed.
108 | 
109 | ## test_filtering_prepend_newline_to_method
110 | 
111 | The HTTP Request Line is prefixed with a newline character `\n`.
112 | 
113 | The purpose of this is to determine if this is a valid filter bypassing
114 | strategy.
115 | 
116 | XXX move this to a separate test as it does not have much to do with the HTTP
117 | Host field.
118 | 
119 | # Expected output
120 | 
121 | ## Parent data format
122 | 
123 | df-001-httpt
124 | 
125 | ## Semantics
126 | 
127 | 'filtering_via_fuzzy_matching': true|false|null
128 | 'filtering_prepend_newline_to_method': true|false|null
129 | 'filtering_add_tab_to_host': true|false|null
130 | 'filtering_of_subdomain': true|false|null
131 | 
132 |   If the site supplied as input can be reached by using the evasion technique
133 |   this is set to false.
134 | 
135 |   If the content of the blockpage is specified we make an evaluation of
136 |   censorship or not based on the response matching it or not.
137 | 
138 |   If the response contains the expect JSON dict returned from the oonib test
139 |   helper then we consider censorship to not be happening ('censorship': False).
140 | 
141 |   In all other cases 'censorship' is set to null.
142 | 
143 | 'transparent_http_proxy': true|false
144 | 
145 |   if we have detected the presence of a transparent HTTP proxy or not.
146 | 
147 | ## Possible conclusions
148 | 
149 | We can say that a certain site is blocked or not and looking at the result we
150 | can understand which censorship bypassing strategies have worked and therefore
151 | understand which censorship device the one being analyzed may be.
152 | ## Example output sample
153 | 
154 | ```
155 | {
156 |     "bucket_date": "2015-11-25",
157 |     "data_format_version": "0.2.0",
158 |     "id": "a59899ab-cd33-49db-8ca4-7bf5dd2d317d",
159 |     "input": null,
160 |     "options": [],
161 |     "probe_asn": "AS3269",
162 |     "probe_cc": "IT",
163 |     "probe_ip": "127.0.0.1",
164 |     "report_filename": "2015-11-25/20151125T214003Z-IT-AS3269-http_host-FKByhJTXzseUDZOSvQkPiZur0ji6csV5UCiyarEoCCUCiKicGTgXq06fcJVA6XfA-0.1.0-probe.json",
165 |     "report_id": "FKByhJTXzseUDZOSvQkPiZur0ji6csV5UCiyarEoCCUCiKicGTgXq06fcJVA6XfA",
166 |     "software_name": "ooniprobe",
167 |     "software_version": "1.3.2",
168 |     "test_helpers": {
169 |         "backend": "http://216.156.197.144:80"
170 |     },
171 |     "input_hashes": [],
172 |     "probe_city": null,
173 |     "backend_version": "1.1.4",
174 |     "test_keys": {
175 |         "agent": "agent",
176 |         "filtering_prepend_newline_to_method": false,
177 |         "requests": [
178 |             {
179 |                 "request": {
180 |                     "body": null,
181 |                     "headers": {
182 |                         "Host": null
183 |                     },
184 |                     "method": "GET",
185 |                     "tor": {
186 |                         "exit_ip": false,
187 |                         "exit_name": false,
188 |                         "is_tor": false
189 |                     },
190 |                     "url": "http://216.156.197.144:80"
191 |                 },
192 |                 "response": {
193 |                     "body": "{\"headers_dict\": {\"Connection\": [\"close\"], \"Host\": [\"None\"]}, \"request_line\": \"GET / HTTP/1.1\", \"request_headers\": [[\"Connection\", \"close\"], [\"Host\", \"None\"]]}",
194 |                     "code": 200,
195 |                     "headers": {}
196 |                 },
197 |                 "response_length": null
198 |             },
199 |             {
200 |                 "request": {
201 |                     "body": null,
202 |                     "headers": {
203 |                         "Host": null
204 |                     },
205 |                     "method": "\nGET",
206 |                     "tor": {
207 |                         "exit_ip": false,
208 |                         "exit_name": false,
209 |                         "is_tor": false
210 |                     },
211 |                     "url": "http://216.156.197.144:80"
212 |                 },
213 |                 "response": {
214 |                     "body": "{\"headers_dict\": {\"Connection\": [\"close\"], \"Host\": [\"None\"]}, \"request_line\": \"\\nGET / HTTP/1.1\", \"request_headers\": [[\"Connection\", \"close\"], [\"Host\", \"None\"]]}",
215 |                     "code": 200,
216 |                     "headers": {}
217 |                 },
218 |                 "response_length": null
219 |             }
220 |         ],
221 |         "send_host_header": false,
222 |         "socksproxy": null,
223 |         "start_time": 1448484003.0,
224 |         "transparent_http_proxy": false
225 |     },
226 |     "test_name": "http_host",
227 |     "test_runtime": 0.3475949764,
228 |     "test_start_time": "2015-11-25 21:40:03",
229 |     "test_version": "0.2.4"
230 | }
231 | ```
232 | 
233 | # Privacy considerations
234 | 
235 | If the user is behind a transparent HTTP proxy that sets the X-Forwarded-For
236 | header their IP address will end up being part of the final report.
237 | 


--------------------------------------------------------------------------------
/nettests/ts-005-dns-spoof.md:
--------------------------------------------------------------------------------
  1 | # Specification version number
  2 | 
  3 | 0.2.0
  4 | 
  5 | * _status_: obsolete
  6 | 
  7 | # Specification name
  8 | 
  9 | DNS Spoof Test
 10 | 
 11 | Note: this test is deprecated.
 12 | 
 13 | # Test preconditions
 14 | 
 15 |   * An Internet connection.
 16 | 
 17 |   * A DNS resolver to test against.
 18 | 
 19 |   * A Hostname to resolve and check for tampering.
 20 | 
 21 | # Expected impact
 22 | 
 23 |   * The ability to detect spoofed DNS responses.
 24 | 
 25 | # Expected inputs
 26 | 
 27 | This test expects the following arguments:
 28 | 
 29 |   * The address and port of the resolver to test.
 30 | 
 31 |   * The hostname to be resolved.
 32 | 
 33 | ## Semantics
 34 | 
 35 | The resolver to test is passed by argument (-r) with ADDRESS:PORT convention.
 36 | A known good backend resolver may also be passed by argument (-b) with the
 37 | same convention.  The default known good backend resolver is Google DNS
 38 | (8.8.8.8:53).
 39 | 
 40 | The hostname to test is passed by argument (-h) as a FQDN, e.g.
 41 | www.google.com
 42 | 
 43 | # Test Description
 44 | 
 45 | We perform an A DNS query (via UDP) to the control resolver. The answer to
 46 | such query is called the control answer.
 47 | 
 48 | We perform an A DNS query (via UDP) to the test resolver. We then compare
 49 | this answer (experiment answer) with the control answer.
 50 | 
 51 | If the DNS payload of the received packets matches identically, then spoofing
 52 | is considered to be occurring.
 53 | 
 54 | # Expected output
 55 | 
 56 | The output report will contain a boolean entry 'spoofing'.  If spoofing is
 57 | occurring, it shall be True. Otherwise, it is False.
 58 | 
 59 | ## Parent data format
 60 | 
 61 | df-003-scapyt
 62 | 
 63 | ## Required output data
 64 | 
 65 |   * Whether or not the DNS payload matches (spoofing).
 66 | 
 67 | ## Semantics
 68 | 
 69 | In addition to the data specified in the parent data format, the following
 70 | field(s) are added to the report:
 71 | 
 72 | Version 0.0.1:
 73 | 
 74 |     spoofing: true|false
 75 | 
 76 | ## Possible conclusions
 77 | 
 78 | Whether or not DNS spoofing is occurring for a particular FQDN.
 79 | 
 80 | ## Example output sample
 81 | 
 82 | ```
 83 | ###########################################
 84 | # OONI Probe Report for dns_spoof (0.0.1)
 85 | # Wed Sep 25 15:39:32 2013
 86 | ###########################################
 87 | ---
 88 | input_hashes: []
 89 | options: [-r, '10.211.0.10:53', -h, google.com]
 90 | probe_asn: AS2819
 91 | probe_cc: CZ
 92 | probe_ip: 127.0.0.1
 93 | software_name: ooniprobe
 94 | software_version: 1.0.0-rc3
 95 | start_time: 1380116372.573729
 96 | test_name: dns_spoof
 97 | test_version: 0.0.1
 98 | ...
 99 | ---
100 | answer_flags: [ipsrc]
101 | answered_packets:
102 | - - raw_packet: !!binary |
103 |       RbgA6OumAAAyEY4+CAgICH8AAAEANQA1ANSshgAAgYAAAQALAAAAAAZnb29nbGUDY29tAAABAAEG
104 |       Z29vZ2xlA2NvbQAAAQABAAAA4AAErcIs5QZnb29nbGUDY29tAAABAAEAAADgAAStwizkBmdvb2ds
105 |       ZQNjb20AAAEAAQAAAOAABK3CLOYGZ29vZ2xlA2NvbQAAAQABAAAA4AAErcIs6QZnb29nbGUDY29t
106 |       AAABAAEAAADgAAStwizoBmdvb2dsZQNjb20AAAEAAQAAAOAABK3CLOcGZ29vZ2xlA2NvbQAAAQAB
107 |       AAAA4AAErcIs4gZnb29nbGUDY29tAAABAAEAAADgAAStwizjBmdvb2dsZQNjb20AAAEAAQAAAOAA
108 |       BK3CLOAGZ29vZ2xlA2NvbQAAAQABAAAA4AAErcIs4QZnb29nbGUDY29tAAABAAEAAADgAAStwizu
109 |     summary: 'IP / UDP / DNS Ans "173.194.44.229" '
110 | - - raw_packet: !!binary |
111 |       RbgA6J0DAABAEdQUCtMACn8AAAEANQA1ANSxxAAAgYAAAQALAAAAAAZnb29nbGUDY29tAAABAAEG
112 |       Z29vZ2xlA2NvbQAAAQABAAAA3wAErcIs5wZnb29nbGUDY29tAAABAAEAAADfAAStwizoBmdvb2ds
113 |       ZQNjb20AAAEAAQAAAN8ABK3CLOkGZ29vZ2xlA2NvbQAAAQABAAAA3wAErcIs5gZnb29nbGUDY29t
114 |       AAABAAEAAADfAAStwizkBmdvb2dsZQNjb20AAAEAAQAAAN8ABK3CLOUGZ29vZ2xlA2NvbQAAAQAB
115 |       AAAA3wAErcIs7gZnb29nbGUDY29tAAABAAEAAADfAAStwizhBmdvb2dsZQNjb20AAAEAAQAAAN8A
116 |       BK3CLOAGZ29vZ2xlA2NvbQAAAQABAAAA3wAErcIs4wZnb29nbGUDY29tAAABAAEAAADfAAStwizi
117 |     summary: 'IP / UDP / DNS Ans "173.194.44.231" '
118 | input: null
119 | sent_packets:
120 | - - raw_packet: !!binary |
121 |       RQAAOAABAABAEeujfwAAAQgICAgANQA1ACRccgAAAQAAAQAAAAAAAAZnb29nbGUDY29tAAABAAE=
122 |     summary: 'IP / UDP / DNS Qry "google.com" '
123 | - - raw_packet: !!binary |
124 |       RQAAOAABAABAEfDWfwAAAQrTAAoANQA1ACRhpQAAAQAAAQAAAAAAAAZnb29nbGUDY29tAAABAAE=
125 |     summary: 'IP / UDP / DNS Qry "google.com" '
126 | spoofing: false
127 | test_a_lookup:
128 |   answered_packets:
129 |   - raw_packet: !!binary |
130 |       RbgA6J0DAABAEdQUCtMACn8AAAEANQA1ANSxxAAAgYAAAQALAAAAAAZnb29nbGUDY29tAAABAAEG
131 |       Z29vZ2xlA2NvbQAAAQABAAAA3wAErcIs5wZnb29nbGUDY29tAAABAAEAAADfAAStwizoBmdvb2ds
132 |       ZQNjb20AAAEAAQAAAN8ABK3CLOkGZ29vZ2xlA2NvbQAAAQABAAAA3wAErcIs5gZnb29nbGUDY29t
133 |       AAABAAEAAADfAAStwizkBmdvb2dsZQNjb20AAAEAAQAAAN8ABK3CLOUGZ29vZ2xlA2NvbQAAAQAB
134 |       AAAA3wAErcIs7gZnb29nbGUDY29tAAABAAEAAADfAAStwizhBmdvb2dsZQNjb20AAAEAAQAAAN8A
135 |       BK3CLOAGZ29vZ2xlA2NvbQAAAQABAAAA3wAErcIs4wZnb29nbGUDY29tAAABAAEAAADfAAStwizi
136 |     summary: 'IP / UDP / DNS Ans "173.194.44.231" '
137 | test_control_a_lookup:
138 |   answered_packets:
139 |   - raw_packet: !!binary |
140 |       RbgA6OumAAAyEY4+CAgICH8AAAEANQA1ANSshgAAgYAAAQALAAAAAAZnb29nbGUDY29tAAABAAEG
141 |       Z29vZ2xlA2NvbQAAAQABAAAA4AAErcIs5QZnb29nbGUDY29tAAABAAEAAADgAAStwizkBmdvb2ds
142 |       ZQNjb20AAAEAAQAAAOAABK3CLOYGZ29vZ2xlA2NvbQAAAQABAAAA4AAErcIs6QZnb29nbGUDY29t
143 |       AAABAAEAAADgAAStwizoBmdvb2dsZQNjb20AAAEAAQAAAOAABK3CLOcGZ29vZ2xlA2NvbQAAAQAB
144 |       AAAA4AAErcIs4gZnb29nbGUDY29tAAABAAEAAADgAAStwizjBmdvb2dsZQNjb20AAAEAAQAAAOAA
145 |       BK3CLOAGZ29vZ2xlA2NvbQAAAQABAAAA4AAErcIs4QZnb29nbGUDY29tAAABAAEAAADgAAStwizu
146 |     summary: 'IP / UDP / DNS Ans "173.194.44.229" '
147 | ...
148 | ```
149 | 
150 | # Privacy considerations
151 | 
152 | As this test inherits from the Scapy template (see the Parent data format),
153 | the same warnings apply. In particular, ICMP error messages may contain the
154 | non anonymized user IP address.
155 | 


--------------------------------------------------------------------------------
/nettests/ts-006-header-field-manipulation.md:
--------------------------------------------------------------------------------
  1 | # Specification version number
  2 | 
  3 | 0.2.1
  4 | 
  5 | * _status_: current
  6 | 
  7 | # Specification name
  8 | 
  9 | Header Field Manipulation Test
 10 | 
 11 | # Test preconditions
 12 | 
 13 |   * An internet connection
 14 |   * An ooni-backend providing the http-return-json-headers test helper
 15 | 
 16 | For reporting to the backend to work that it is possible for
 17 | the probe to establish a connection to the Tor network.
 18 | 
 19 | # Expected impact
 20 | 
 21 | The ability to determine if HTTP request headers are being manipulated,
 22 | inferring the existence of HTTP aware middleboxes.
 23 | 
 24 | # Expected inputs
 25 | 
 26 | The address of the http-return-json-headers test helper.
 27 | 
 28 | ## Semantics
 29 | 
 30 | A backend must be passed with option -b, e.g.
 31 | 
 32 |   ooniprobe manipulation/http_header_field_manipulation -b http://12.34.56.78
 33 | 
 34 | Optionally, a yaml file containing request headers may be supplied with option
 35 | -h.
 36 | 
 37 | # Test description
 38 | 
 39 | It performs HTTP requests with request headers that vary capitalization towards
 40 | a backend. If the headers reported by the server differ from the ones we sent,
 41 | then we have detected tampering.
 42 | 
 43 | If the optional headers yaml file is not supplied, the
 44 | headers will be constructed as so:
 45 | 
 46 | ```
 47 | {
 48 |   "User-Agent": [random.choice(net.userAgents)],
 49 |   "Accept":["text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"],
 50 |   "Accept-Encoding": ["gzip,deflate,sdch"],
 51 |   "Accept-Language": ["en-US,en;q=0.8"],
 52 |   "Accept-Charset": ["ISO-8859-1,utf-8;q=0.7,*;q=0.3"],
 53 |   "Host": [randomStr(15)+'.com']
 54 | }
 55 | ```
 56 | 
 57 | The Host header is a random string of 15 characters + .com
 58 | The User-Agent header is randomly selected from one of the following:
 59 | 
 60 | ```
 61 | Mozilla/5.0 (iPhone; U; CPU iPhone OS 3 1 2 like Mac OS X; en-us) AppleWebKit/528.18 (KHTML, like Gecko) Mobile/7D11
 62 | Mozilla/5.0 (Windows; U; Windows NT 5.1; de; rv:1.9.1.7) Gecko/20091221 Firefox/3.5.7 (.NET CLR 3.5.30729))
 63 | Mozilla/5.0 (Windows; U; Windows NT 5.1; de; rv:1.9.2) Gecko/20100115 Firefox/3.6
 64 | Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.1.7) Gecko/20091221 Firefox/3.5.7
 65 | Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6
 66 | Mozilla/5.0 (Windows; U; Windows NT 6.1; de; rv:1.9.2) Gecko/20100115 Firefox/3.6
 67 | Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.7) Gecko/20091221 Firefox/3.5.7
 68 | Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6
 69 | ```
 70 | 
 71 | A request is then made towards the backend.
 72 | 
 73 | We have 5 categories of tampering:
 74 | 
 75 | ## total
 76 | 
 77 | The response is not a json object and therefore we were not
 78 | able to reach the ooniprobe test backend
 79 | 
 80 | ## request_line_capitalization
 81 | 
 82 | HTTP Request line (e.x. GET / HTTP/1.1) does not match the capitalization we set.
 83 | 
 84 | ## header_field_number
 85 | 
 86 | The number of headers we sent does not match those the backend received.
 87 | 
 88 | ## header_name_capitalization
 89 | 
 90 | The header field names do not match those that we sent.
 91 | 
 92 | ## header_field_value
 93 | 
 94 | The header field value does not match with the one we transmitted.
 95 | 
 96 | # Expected output
 97 | 
 98 | ## Parent data format
 99 | 
100 | df-001-httpt
101 | 
102 | ## Required output data
103 | 
104 | In addition to the details provided in the parent data format we add to the
105 | report the key 'tampering', which is a dictionary containing the following keys
106 | that correspond to the categories of tampering we detect as well as the
107 | difference between the sent and received headers represented as a list of the
108 | headers not present in both the sent and received headers.
109 | 
110 | ## Semantics
111 | 
112 | ```
113 | {
114 |   "total":
115 |         "true|false if an invalid response was received from the "
116 |         "backend control server.",
117 | 
118 |   "request_line_capitalization":
119 |         "true|false if the capitalisation of the request method was normalised.",
120 | 
121 |   "header_name_capitalization":
122 |         "true|false if the header name capitalisation was normalised.",
123 | 
124 |   "header_field_value":
125 |         "true|false if the value of the headers received by the backend "
126 |         "does not match the value of the headers sent by the probe.",
127 | 
128 |   "header_field_number":
129 |         "true|false if the number of headers received different than the "
130 |         "number of headers sent."
131 | 
132 |   "header_name_diff": [
133 |         "The keys of the headers that differ in the request and the response."
134 |    ]
135 | }
136 | ```
137 | 
138 | ## Example output sample
139 | 
140 | 
141 | ```
142 | {
143 |     "bucket_date": "2015-11-29",
144 |     "data_format_version": "0.2.0",
145 |     "id": "3cd95f57-9930-48b9-90b0-67ed20f3adfe",
146 |     "input": null,
147 |     "options": [],
148 |     "probe_asn": "AS12876",
149 |     "probe_cc": "FR",
150 |     "probe_ip": "127.0.0.1",
151 |     "report_filename": "2015-11-29/20151129T230014Z-FR-AS12876-http_header_field_manipulation-no_report_id-0.1.0-probe.json",
152 |     "report_id": null,
153 |     "software_name": "ooniprobe",
154 |     "software_version": "1.3.1",
155 |     "backend_version": "1.1.4",
156 |     "input_hashes": [],
157 |     "probe_city": null,
158 |     "test_helpers": {
159 |         "backend": "http://173.205.4.16:80"
160 |     },
161 |     "test_keys": {
162 |         "agent": "agent",
163 |         "requests": [
164 |             {
165 |                 "request": {
166 |                     "body": null,
167 |                     "headers": {
168 |                         "ACCEpT-cHarSET": "ISO-8859-1,utf-8;q=0.7,*;q=0.3",
169 |                         "AccEPT-LAngUAge": "en-US,en;q=0.8",
170 |                         "HOSt": "kFR3mwImawc0ivv.com",
171 |                         "aCCePT": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
172 |                         "acCEpt-encOdING": "gzip,deflate,sdch",
173 |                         "useR-agEnT": "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6"
174 |                     },
175 |                     "method": "get",
176 |                     "tor": {
177 |                         "exit_ip": false,
178 |                         "exit_name": false,
179 |                         "is_tor": false
180 |                     },
181 |                     "url": "http://173.205.4.16:80"
182 |                 },
183 |                 "response": {
184 |                     "body": "{\"headers_dict\": {\"AccEPT-LAngUAge\": [\"en-US,en;q=0.8\"], \"acCEpt-encOdING\": [\"gzip,deflate,sdch\"], \"HOSt\": [\"kFR3mwImawc0ivv.com\"], \"aCCePT\": [\"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\"], \"useR-agEnT\": [\"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6\"], \"ACCEpT-cHarSET\": [\"ISO-8859-1,utf-8;q=0.7,*;q=0.3\"], \"Connection\": [\"close\"]}, \"request_line\": \"get / HTTP/1.1\", \"request_headers\": [[\"Connection\", \"close\"], [\"AccEPT-LAngUAge\", \"en-US,en;q=0.8\"], [\"acCEpt-encOdING\", \"gzip,deflate,sdch\"], [\"aCCePT\", \"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\"], [\"useR-agEnT\", \"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6\"], [\"ACCEpT-cHarSET\", \"ISO-8859-1,utf-8;q=0.7,*;q=0.3\"], [\"HOSt\", \"kFR3mwImawc0ivv.com\"]]}",
185 |                     "code": 200,
186 |                     "headers": {}
187 |                 },
188 |                 "response_length": null
189 |             }
190 |         ],
191 |         "socksproxy": null,
192 |         "start_time": 1448834414.0,
193 |         "tampering": {
194 |             "header_field_name": false,
195 |             "header_field_number": false,
196 |             "header_field_value": false,
197 |             "header_name_capitalization": false,
198 |             "header_name_diff": [],
199 |             "request_line_capitalization": false,
200 |             "total": false
201 |         }
202 |     },
203 |     "test_name": "http_header_field_manipulation",
204 |     "test_runtime": 0.4789488316,
205 |     "test_start_time": "2015-11-29 23:00:14",
206 |     "test_version": "0.1.5"
207 | }
208 | ```
209 | 
210 | # Privacy considerations
211 | 
212 | If the user is behind a transparent HTTP proxy that sets the X-Forwarded-For
213 | header their IP address will end up being part of the final report.
214 | 


--------------------------------------------------------------------------------
/nettests/ts-007-http-invalid-request-line.md:
--------------------------------------------------------------------------------
  1 | # Specification version number
  2 | 
  3 | 1.0.0
  4 | 
  5 | * _status_: current
  6 | 
  7 | # Specification name
  8 | 
  9 | HTTP Invalid Request Line test
 10 | 
 11 | # Test preconditions
 12 | 
 13 |   * An internet connection.
 14 |   * An ooni-backend providing the tcp-echo test helper.
 15 | 
 16 | # Expected impact
 17 | 
 18 | The goal of this test is to do some very basic and not very
 19 | noisy fuzzing on the HTTP request line. We generate a series
 20 | of requests that are not valid HTTP requests. If the data we get back is not what we have sent, then tampering is occurring.
 21 | 
 22 | # Expected inputs
 23 | 
 24 | An ooni-backend tcp-echo test helper must be provided with
 25 | option -b.
 26 | 
 27 | Optionally, a port other than the default (80) may be
 28 | specified with option -p.
 29 | 
 30 | # Test description
 31 | 
 32 | The goal of this test is to test sending invalid HTTP
 33 | requests in order to trick middleboxes, such as caching
 34 | proxies like squid, into revealing themselves.
 35 | 
 36 | This is for example what squid will return when performing
 37 | such request:
 38 | 
 39 | ```
 40 | HTTP/1.0 400 Bad Request
 41 | Server: squid/2.6.STABLE21
 42 | Date: Sat, 23 Jul 2011 02:22:44 GMT
 43 | Content-Type: text/html
 44 | Content-Length: 1178
 45 | Expires: Sat, 23 Jul 2011 02:22:44 GMT
 46 | X-Squid-Error: ERR_INVALID_REQ 0
 47 | X-Cache: MISS from cache_server
 48 | X-Cache-Lookup: NONE from cache_server:3128
 49 | Via: 1.0 cache_server:3128 (squid/2.6.STABLE21)
 50 | Proxy-Connection: close
 51 | ```
 52 | 
 53 | The various types of invalid request that we do are:
 54 | 
 55 | ## test_random_invalid_method
 56 | 
 57 | Performs a request with a HTTP method that is a random 4
 58 | character string. e.g.
 59 | 
 60 | ```
 61 | request_line = randomSTR(4) + " / HTTP/1.1\n\r"
 62 | ```
 63 | 
 64 | ## test_random_invalid_field_count
 65 | 
 66 | Performs a request with an invalid number of random fields, e.g.
 67 | 
 68 | ```
 69 | request_line = ' '.join(randomStr(5) for x in range(4)) + '\n\r'
 70 | ```
 71 | 
 72 | ## test_random_big_request_method
 73 | 
 74 | Send a 1024 byte request method, e.g.
 75 | 
 76 | ```
 77 | request-line = randomStr(1024) + ' / HTTP/1.1\n\r'
 78 | ```
 79 | 
 80 | ## test_random_invalid_version_number
 81 | 
 82 | Performs a request with an invalid version, e.g.
 83 | 
 84 | ```
 85 | request_line = 'GET / HTTP/' + randomStr(3)
 86 | ```
 87 | 
 88 | Each of these tests may trigger some bugs in the HTTP parsers
 89 | of transparent HTTP proxies.
 90 | 
 91 | If the response is not what we have sent, then tampering is
 92 | occurring.
 93 | 
 94 | ## test_squid_cache_manager
 95 | 
 96 | Performs a request triggering a response from on-path squid
 97 | caching servers, particularly targeting the built-in
 98 | cache management functionality of the proxy.
 99 | 
100 | ```
101 | request_line = 'GET cache_object://localhost/ HTTP/1.0\n\r'
102 | ```
103 | 
104 | If a squid proxy receives such a request it will generally
105 | respond that the request has been denied, or depending
106 | on the configuration may respond with an index of cache
107 | management options, but without revealing sensitive
108 | information about the proxy itself.
109 | 
110 | # Expected output
111 | 
112 | ## Parent data format
113 | 
114 | df-004-tcpt
115 | 
116 | ## Required output data
117 | 
118 | In addition to the parent data format, the key 'tampering' is
119 | added to the report. If the sent data does not match the
120 | received data from the test helper, 'tampering' is set to
121 | True. Otherwise, it is set to False.
122 | 
123 | ## Semantics
124 | 
125 | 'tampering': true|false
126 | 
127 | ## Possible conclusions
128 | 
129 | It may reveal the existance of a middlebox such as a
130 | transparent HTTP proxy inbetween the ooni-probe and
131 | ooni-backend.
132 | 
133 | ## Example output sample
134 | 
135 | ```
136 | {
137 |     "bucket_date": "2015-12-01",
138 |     "data_format_version": "0.2.0",
139 |     "id": "f116e9a4-a648-48c8-b3b7-cca7bd84d069",
140 |     "input": null,
141 |     "options": [],
142 |     "probe_asn": "AS786",
143 |     "probe_cc": "GB",
144 |     "probe_ip": "127.0.0.1",
145 |     "report_filename": "2015-12-01/20151201T072512Z-GB-AS786-http_invalid_request_line-ZXjsOzoy3gl0f71b35et9kLZsTkc3W1yLYMHSLVLN6buJvnXveywSBM24C2YPtfv-0.1.0-probe.json",
146 |     "report_id": "ZXjsOzoy3gl0f71b35et9kLZsTkc3W1yLYMHSLVLN6buJvnXveywSBM24C2YPtfv",
147 |     "software_name": "ooniprobe",
148 |     "software_version": "1.3.1",
149 |     "test_helpers": {
150 |         "backend": "213.138.109.232"
151 |     },
152 |     "backend_version": "1.1.4",
153 |     "input_hashes": [],
154 |     "probe_city": null,
155 |     "test_keys": {
156 |         "received": [
157 |             "53LC / HTTP/1.1\n\r",
158 |             "",
159 |             "dNnLP MHIdC s5vst k7Ir2\n\r",
160 |             "yWoBwUKUnlMfZQ1BDFIHJsLAU9PNKUVOzOJ1s7BdXKlhAnALegIOkvEsq8QCHNKqFoMk5ndpSa3bE99hIVXZSK7hCGa6wk770C9WJoL7VLYDeGRYWEhoYF2eg8PERFK4CGYXuuCLkr0ScT3esnj66ypgzpuP85PpCSERP5qc0DNYzHF4edM9RcDxttfMU0X5HyQ0EzKCMX4dcKlB6DLianESEKFKE3VwRht2cwUdLs6IXG5fsUBLEiJUQEHzFENpr40dPvcnk1KoTc4UZr5EP9JlNJ9f7fx6Ps6m2QzxyXkVT8UjPYbx2Rk6EO27nfd21iKtzZyZzUhyHxVQtLS58hzeQumCwAMdOi5FmwiDG6vFS1THTODJdwovw7V0CsaXvFwkJmBagWVvRR3zWQ9o509BnK9bxvBIo5cgFdyBlVHSH3Bbq0kXyarVAoQjaOo0C8Tb8lr5Ug7FFelGnBmdKNmNQj1QmmiobrcUMY22JKdxp81Z0R1AoyjLjeJQ7NNlhpbM6REiHladSmVmgBPChjjFQJn2TGOmSAIQJAvnsREpdMyuRemTA5Vb0QMIUmEVvpIKV8HOhBaniz389ftxglIizaOF3pacUIBycUwLermpolNatVn6BbDSCNJGCwil8NVUBtfKQqTlEgQk1zo3LNfryrKEd5M4PBdOzqIFHb0zhsY8NsSy7geXOZfMnNRNfu0dsMDchiLYHzQD0qPg2heEsJ3w8usyH462eqUcNF5qNOt47tC53rnbChT8Tjktr55LrJQhvKg8QRqWg2HuTnM4eMxSjdF8iCUzxEhDHkoxah5v6iQPmRE7qCUxf2Jwyi404MLX0gGvoawukkrEiVlhcHrQo3yrnAqIRx7mYhp9izzmWw62e35xzpFD3rxhAlrLTBr3bJQPBXvMzkAY62UHt1pAQPCaDojEo1WrHKnb8TMsNUS8u5yYumvbuxsLSJFWIjkrqf2G6rm1aVo95jxx9Uvx665eJ9tWRAT6rD4A1QoXVg34m20ywW1n3voP / HTTP/1.1\n\r"
161 |         ],
162 |         "sent": [
163 |             "53LC / HTTP/1.1\n\r",
164 |             "GET / HTTP/g8z\n\r",
165 |             "dNnLP MHIdC s5vst k7Ir2\n\r",
166 |             "yWoBwUKUnlMfZQ1BDFIHJsLAU9PNKUVOzOJ1s7BdXKlhAnALegIOkvEsq8QCHNKqFoMk5ndpSa3bE99hIVXZSK7hCGa6wk770C9WJoL7VLYDeGRYWEhoYF2eg8PERFK4CGYXuuCLkr0ScT3esnj66ypgzpuP85PpCSERP5qc0DNYzHF4edM9RcDxttfMU0X5HyQ0EzKCMX4dcKlB6DLianESEKFKE3VwRht2cwUdLs6IXG5fsUBLEiJUQEHzFENpr40dPvcnk1KoTc4UZr5EP9JlNJ9f7fx6Ps6m2QzxyXkVT8UjPYbx2Rk6EO27nfd21iKtzZyZzUhyHxVQtLS58hzeQumCwAMdOi5FmwiDG6vFS1THTODJdwovw7V0CsaXvFwkJmBagWVvRR3zWQ9o509BnK9bxvBIo5cgFdyBlVHSH3Bbq0kXyarVAoQjaOo0C8Tb8lr5Ug7FFelGnBmdKNmNQj1QmmiobrcUMY22JKdxp81Z0R1AoyjLjeJQ7NNlhpbM6REiHladSmVmgBPChjjFQJn2TGOmSAIQJAvnsREpdMyuRemTA5Vb0QMIUmEVvpIKV8HOhBaniz389ftxglIizaOF3pacUIBycUwLermpolNatVn6BbDSCNJGCwil8NVUBtfKQqTlEgQk1zo3LNfryrKEd5M4PBdOzqIFHb0zhsY8NsSy7geXOZfMnNRNfu0dsMDchiLYHzQD0qPg2heEsJ3w8usyH462eqUcNF5qNOt47tC53rnbChT8Tjktr55LrJQhvKg8QRqWg2HuTnM4eMxSjdF8iCUzxEhDHkoxah5v6iQPmRE7qCUxf2Jwyi404MLX0gGvoawukkrEiVlhcHrQo3yrnAqIRx7mYhp9izzmWw62e35xzpFD3rxhAlrLTBr3bJQPBXvMzkAY62UHt1pAQPCaDojEo1WrHKnb8TMsNUS8u5yYumvbuxsLSJFWIjkrqf2G6rm1aVo95jxx9Uvx665eJ9tWRAT6rD4A1QoXVg34m20ywW1n3voP / HTTP/1.1\n\r"
167 |         ],
168 |         "tampering": true
169 |     },
170 |     "test_name": "http_invalid_request_line",
171 |     "test_runtime": 5.7580039501,
172 |     "test_start_time": "2015-12-01 07:25:12",
173 |     "test_version": "0.2"
174 | }
175 | ```
176 | 
177 | ## UTF-8 considerations
178 | 
179 | A middlebox may respond with a purely or partially binary response that would not
180 | be JSON serializable. If that happens, the related entry in the `sent` key must be
181 | encoded using the same format used for binary HTTP response bodies, e.g.:
182 | 
183 | ```json
184 | {
185 |         "received": [
186 |             "",
187 |             "",
188 |             "",
189 |             "",
190 |             {
191 |                 "data": "AQ05bwxG+MIS9g9MCV8tzSk=",
192 |                 "format": "base64"
193 |             }
194 |         ],
195 | }
196 | ```
197 | 
198 | # Privacy considerations
199 | 
200 | A middlebox could reveal the ooni-probe IP address by the
201 | X_FORWARDED_FOR header.
202 | 


--------------------------------------------------------------------------------
/nettests/ts-008-tcp-connect.md:
--------------------------------------------------------------------------------
 1 | # Specification version number
 2 | 
 3 | 0.2.0
 4 | 
 5 | * _status_: obsolete
 6 | 
 7 | # Specification name
 8 | 
 9 | TCP Connect Test
10 | 
11 | # Test preconditions
12 | 
13 |   * An internet connection
14 | 
15 | For reporting to the backend to work that it is possible for
16 | the probe to establish a connection to the Tor network.
17 | 
18 | # Expected impact
19 | 
20 | Ability to determine if a TCP connection can be successfully established.
21 | 
22 | # Expected inputs
23 | 
24 | ## Import document or import data format
25 | 
26 | A list of URLs to be tested for censorship.
27 | 
28 | ## Data specification version number
29 | 
30 | ## Semantics
31 | 
32 | The input document may contain an http or https URL, an IP:PORT, or a FQDN:PORT per line. e.g.
33 | 
34 | ```
35 | http://www.google.com
36 | google.com:80
37 | 8.8.8.8:53
38 | ```
39 | 
40 | are all valid entries
41 | 
42 | # Test description
43 | 
44 | For every item given as input we perform a TCP connect. If
45 | the connection is succesful, we record 'success' for the
46 | test. If the connection fails, we record the reason for the
47 | failure.
48 | 
49 | # Expected output
50 | 
51 | The key 'connection' is added to the report. One report entry is written per line in the input document.
52 | 
53 | ## Required output data
54 | 
55 | The result of the connection attempt, 'success' or failure type.
56 | 
57 | ## Semantics
58 | 
59 | 'success' or a string indicating the reason for the failure.
60 | 
61 | ## Possible conclusions
62 | 
63 | Ability to determine that a specific host:port is blocked.
64 | 
65 | ## Example output sample
66 | 
67 | ```
68 | {
69 |     "bucket_date": "2015-12-29",
70 |     "data_format_version": "0.2.0",
71 |     "id": "6305de45-ce5d-43b5-ba11-fd9019e5e90c",
72 |     "input": "github.com:443\n",
73 |     "input_hashes": [
74 |         "586ded3d64bcb8672fedb475c8ea799331779b446e529b204a2088bedc91e3f0"
75 |     ],
76 |     "options": [
77 |         "-f",
78 |         "hosts.txt"
79 |     ],
80 |     "probe_asn": "AS13703",
81 |     "probe_cc": "US",
82 |     "probe_ip": "127.0.0.1",
83 |     "report_filename": "2015-12-29/20151225T201931Z-US-AS13703-tcp_connect-IHsje0cnGs2uOpRgEmfnkpuZAa8ysrZgdf8YyImTUhtBTSZSoTeXle79RuchYuzi-0.1.0-probe.json",
84 |     "report_id": "IHsje0cnGs2uOpRgEmfnkpuZAa8ysrZgdf8YyImTUhtBTSZSoTeXle79RuchYuzi",
85 |     "software_name": "ooniprobe",
86 |     "software_version": "1.3.1",
87 |     "backend_version": "1.1.4",
88 |     "probe_city": null,
89 |     "test_helpers": {},
90 |     "test_keys": {
91 |         "connection": "success",
92 |     },
93 |     "test_name": "tcp_connect",
94 |     "test_runtime": 0.0218939781,
95 |     "test_start_time": "2015-12-25 20:19:31",
96 |     "test_version": "0.1"
97 | }
98 | ```
99 | 


--------------------------------------------------------------------------------
/nettests/ts-012-dns-injection.md:
--------------------------------------------------------------------------------
  1 | # Specification version number
  2 | 
  3 | 0.2.0
  4 | 
  5 | * _status_: obsolete
  6 | 
  7 | # Specification name
  8 | 
  9 | DNS Injection test
 10 | 
 11 | # Test preconditions
 12 | 
 13 | * An internet connection
 14 | 
 15 | * An IP address that will traverse a box doing DNS injection triggering an
 16 |   injected DNS response.
 17 | 
 18 | # Expected impact
 19 | 
 20 | Ability to detect the presence of DNS Injection and list of domains that are
 21 | being blocked via DNS injection.
 22 | 
 23 | # Expected inputs
 24 | 
 25 | * List of domains to test for injection.
 26 | 
 27 | * IP address to send DNS queries to. This IP should not have a DNS resolver
 28 |   listening on port 53 UDP or TCP.
 29 | 
 30 | ## Semantics
 31 | 
 32 | one domain name per line
 33 | 
 34 | # Test description
 35 | 
 36 | For every domain in the input list we perform a A query towards the target IP
 37 | address using UDP on port 53.
 38 | 
 39 | We wait for a DNS response until the timeout (by default 3 seconds) is reached.
 40 | 
 41 | If we have received an answer by that time we say that that hostname is being
 42 | injected (since the endpoint is not a DNS resolver the answer we got must have
 43 | been injected by the censorship middle boxes).
 44 | 
 45 | If we don't receive an answer that means everything is as usual and we mark
 46 | that hostname as not being injected.
 47 | 
 48 | # Expected output
 49 | 
 50 | ## Parent data format
 51 | 
 52 | df-002-dnst
 53 | 
 54 | ## Required output data
 55 | 
 56 | * If the domain as input is being injected or not
 57 | 
 58 | ## Semantics
 59 | 
 60 | injected: true|false
 61 | Indicates if we got an injected response for the domain in question.
 62 | 
 63 | ## Possible conclusions
 64 | 
 65 | If DNS injection is being done and on which domains.
 66 | 
 67 | ## Example output sample
 68 | 
 69 | ```
 70 | ###########################################
 71 | # OONI Probe Report for dns_injection (0.1)
 72 | # Wed Sep 10 09:34:51 2014
 73 | ###########################################
 74 | ---
 75 | input_hashes: [d87e90ead07a7d8ec8f8ca4724807e3fb6c7f1b9471979c934d0dc01b0bd6551]
 76 | options: [-r, 123.58.180.7, -f, domains.txt]
 77 | probe_asn: AS7922
 78 | probe_cc: US
 79 | probe_city: null
 80 | probe_ip: 127.0.0.1
 81 | software_name: ooniprobe
 82 | software_version: 1.1.1
 83 | start_time: 1410356091.339939
 84 | test_name: dns_injection
 85 | test_version: '0.1'
 86 | ...
 87 | ---
 88 | injected: true
 89 | input: facebook.com
 90 | queries:
 91 | - addrs: [173.252.110.27]
 92 |   answers:
 93 |   - [<RR name=facebook.com type=A class=IN ttl=121s auth=False>, <A address=173.252.110.27
 94 |       ttl=121>]
 95 |   query: '[Query(''facebook.com'', 1, 1)]'
 96 |   query_type: A
 97 |   resolver: [123.58.180.7, 53]
 98 | ...
 99 | ---
100 | injected: true
101 | input: www.twitter.com
102 | queries:
103 | - addrs: [199.16.156.198, 199.16.156.230, 199.16.156.6, 199.16.156.102]
104 |   answers:
105 |   - [<RR name=www.twitter.com type=CNAME class=IN ttl=548s auth=False>, <CNAME name=twitter.com
106 |       ttl=548>]
107 |   - [<RR name=twitter.com type=A class=IN ttl=16s auth=False>, <A address=199.16.156.198
108 |       ttl=16>]
109 |   - [<RR name=twitter.com type=A class=IN ttl=16s auth=False>, <A address=199.16.156.230
110 |       ttl=16>]
111 |   - [<RR name=twitter.com type=A class=IN ttl=16s auth=False>, <A address=199.16.156.6
112 |       ttl=16>]
113 |   - [<RR name=twitter.com type=A class=IN ttl=16s auth=False>, <A address=199.16.156.102
114 |       ttl=16>]
115 |   query: '[Query(''www.twitter.com'', 1, 1)]'
116 |   query_type: A
117 |   resolver: [123.58.180.7, 53]
118 | ...
119 | ---
120 | injected: true
121 | input: youtube.com
122 | queries:
123 | - addrs: [173.194.43.35, 173.194.43.36, 173.194.43.37, 173.194.43.38, 173.194.43.39,
124 |     173.194.43.40, 173.194.43.41, 173.194.43.46, 173.194.43.32, 173.194.43.33, 173.194.43.34]
125 |   answers:
126 |   - [<RR name=youtube.com type=A class=IN ttl=300s auth=False>, <A address=173.194.43.35
127 |       ttl=300>]
128 |   - [<RR name=youtube.com type=A class=IN ttl=300s auth=False>, <A address=173.194.43.36
129 |       ttl=300>]
130 |   - [<RR name=youtube.com type=A class=IN ttl=300s auth=False>, <A address=173.194.43.37
131 |       ttl=300>]
132 |   - [<RR name=youtube.com type=A class=IN ttl=300s auth=False>, <A address=173.194.43.38
133 |       ttl=300>]
134 |   - [<RR name=youtube.com type=A class=IN ttl=300s auth=False>, <A address=173.194.43.39
135 |       ttl=300>]
136 |   - [<RR name=youtube.com type=A class=IN ttl=300s auth=False>, <A address=173.194.43.40
137 |       ttl=300>]
138 |   - [<RR name=youtube.com type=A class=IN ttl=300s auth=False>, <A address=173.194.43.41
139 |       ttl=300>]
140 |   - [<RR name=youtube.com type=A class=IN ttl=300s auth=False>, <A address=173.194.43.46
141 |       ttl=300>]
142 |   - [<RR name=youtube.com type=A class=IN ttl=300s auth=False>, <A address=173.194.43.32
143 |       ttl=300>]
144 |   - [<RR name=youtube.com type=A class=IN ttl=300s auth=False>, <A address=173.194.43.33
145 |       ttl=300>]
146 |   - [<RR name=youtube.com type=A class=IN ttl=300s auth=False>, <A address=173.194.43.34
147 |       ttl=300>]
148 |   query: '[Query(''youtube.com'', 1, 1)]'
149 |   query_type: A
150 |   resolver: [123.58.180.7, 53]
151 | ...
152 | ```
153 | 


--------------------------------------------------------------------------------
/nettests/ts-013-lantern.md:
--------------------------------------------------------------------------------
  1 | # Specification version number
  2 | 
  3 | 0.1.0
  4 | 
  5 | * _status_: obsolete
  6 | 
  7 | # Specification name
  8 | 
  9 | Lantern Test
 10 | 
 11 | # Test preconditions
 12 | 
 13 | Downloaded or compiled the "lantern" binary and made executable and in
 14 | the users PATH environment variable.
 15 | 
 16 | # Expected impact
 17 | 
 18 | Ability to measure whether Lantern is working from the given network vantage point.
 19 | 
 20 | # Expected inputs
 21 | 
 22 | None
 23 | 
 24 | # Test description
 25 | 
 26 | This test launches Lantern in --headless mode, and parses output to determine
 27 | if it has bootstrapped.  After bootstrap, it fetches a URL using Lanterns HTTP
 28 | proxy interface listening on 127.0.0.1.8787 and checks to see if the response
 29 | body matches the expected result.
 30 | As a URL for testing we use http://www.google.com/humans.txt and look for the
 31 | string "Google is built by a large" in the response body.
 32 | 
 33 | The specific string used to determine bootstrap from Lantern output in version
 34 | "2.0.10" is "Successfully dialed via" from standard output.
 35 | 
 36 | # Expected output
 37 | 
 38 | ## Parent data format
 39 | 
 40 | None.
 41 | 
 42 | ## Required output data
 43 | 
 44 | success:
 45 | **boolean** The bootstrap status of Lantern (success or failure).
 46 | 
 47 | lantern --headless:
 48 | **dictionary** the parent key of Lanterns output that contains the keys stdout and stderr
 49 | 
 50 | stdout:
 51 | **string** Output produced by Lanterns standard output.
 52 | 
 53 | stderr:
 54 | **string** Error produced by Lanterns standard error.
 55 | 
 56 | body:
 57 | **string** The page body of a successful HTTP request.
 58 | 
 59 | failure:
 60 | **string** If failure, then the corresponding failure message.
 61 | 
 62 | 
 63 | ## Data specification version number
 64 | 
 65 | ## Semantics
 66 | 
 67 | 'success' - True or False - whether Lantern has bootstrapped.
 68 | 'body' - http page body if successfully requested.
 69 | 'failure' - optional, present if there is a failure.
 70 | 'lantern --headless':
 71 |   'stdout' - Contents of standard output produced by Lantern.
 72 |   'stderr' - Contents of standard error produced by Lantern.
 73 | 'default_configuration' - True or False - whether it is using the default, sane, configuration or not
 74 | 
 75 | ## Possible conclusions
 76 | 
 77 | We can determine whether or not Lantern is able to bootstrap, according to its output.
 78 | We can determine whether or not a URL is reachable via Lantern.
 79 | 
 80 | ## Example output sample
 81 | ```
 82 | ---
 83 | input_hashes: []
 84 | options: []
 85 | probe_asn: AS1234
 86 | probe_cc: US
 87 | probe_city: null
 88 | probe_ip: 127.0.0.1
 89 | software_name: ooniprobe
 90 | software_version: 1.2.3-rc1
 91 | start_time: 1428344311.0
 92 | test_name: lantern_circumvention_tool_test
 93 | test_version: 0.1.0
 94 | ...
 95 | ---
 96 | body: "Google is built by a large team of engineers, designers, researchers, robots, and others in many different sites across the globe. It is updated continuously, and built with more tools and technologies than we can shake a stick at. If you'd like to help us out, see google.com/careers."
 97 | bootstrapped: true
 98 | default_configuration: true
 99 | input: null
100 | lantern --headless: {exit_reason: process_done, stderr: '', stdout: ''}
101 | ```
102 | 
103 | ## Expected Post-processing efforts
104 | 
105 | You should be aware of the `default_confguration` parameter as the user may
106 | have misconfigured the test leading to inconsistent results.
107 | 
108 | # Privacy considerations
109 | 
110 | Lantern does not seek to provide anonymity. Lantern contains tracking analytics
111 | software and may connect directly to Lantern-provided proxy endpoints, or use
112 | fronted domains via Content Delivery Networks (CDNs) as a data channel.
113 | 
114 | # Packet capture considerations
115 | 
116 | This test does not capture packets by default.
117 | 


--------------------------------------------------------------------------------
/nettests/ts-014-meek-fronted-requests.md:
--------------------------------------------------------------------------------
  1 | # Specification version number
  2 | 
  3 | 2015-04-01-000
  4 | 
  5 | * _status_: obsolete
  6 | 
  7 | # Specification name
  8 | 
  9 | Meek Fronted Request Test
 10 | 
 11 | # Test preconditions
 12 | 
 13 | * An internet connection
 14 | 
 15 | # Expected impact
 16 | 
 17 | Ability to detect uncensored fronted domains that can transmit requests to the
 18 | "inside" meek-server via an intermediary web service.
 19 | 
 20 | # Expected inputs
 21 | 
 22 | ## Import document or import data format
 23 | 
 24 | A list of domain names (fronted domains) and host headers (meek-server) of the
 25 | intermediary web service.
 26 | 
 27 | ## Semantics
 28 | 
 29 | The input document may contain a domain name and a host header combination per
 30 | line separated by colon in the format:
 31 | 
 32 | 	DomainName:HostHeader
 33 | 
 34 | Example:
 35 | 
 36 | 	www.google.com:meek-reflect.appspot.com
 37 | 	a0.awsstatic.com:d2zfqthxsdq309.cloudfront.net
 38 | 
 39 | # Test description
 40 | 
 41 | Performs a HTTP GET request over TLS (HTTPS) to a list of fronted domains with
 42 | the Host Header of the "inside" meek-server. For diagnostic purposes the
 43 | meek-server handles a GET request and respond with: "I’m just a happy little
 44 | web server.\n". The GET request is sent over TLS to the root of the fronted
 45 | domain with the Host Header option of the desired meek-server host.
 46 | 
 47 | 
 48 | # Expected output
 49 | 
 50 | ## Parent data format
 51 | 
 52 | df-001-httpt-000
 53 | 
 54 | ## Required output data
 55 | 
 56 | * The domain name and host header used in the measurement
 57 | 	(DomainName:HostHeader)
 58 | 
 59 | * The requests that have been made
 60 | 
 61 | * The received responses
 62 | 
 63 | * If the meek server is blocked or unreachable
 64 | 
 65 | ## Semantics
 66 | 
 67 | success:
 68 | 	**boolean** indicates if an HTTPS GET response to the meek server is
 69 | 	successfull
 70 | 
 71 | ## Possible conclusions
 72 | 
 73 | If the fronted request/response to the meek server is successful.
 74 | 
 75 | ## Example output sample
 76 | 
 77 | ```
 78 | agent: agent
 79 | input: ajax.aspnetcdn.com:az668014.vo.msecnd.net
 80 | requests:
 81 | - request:
 82 |     body: null
 83 |     headers:
 84 |     - - Host
 85 |       - [az668014.vo.msecnd.net]
 86 |     method: GET
 87 |     tor: {is_tor: false}
 88 |     url: https://ajax.aspnetcdn.com
 89 |   response:
 90 |     body: "I\u2019m just a happy little web server.\n"
 91 |     code: 200
 92 |     headers:
 93 |     - - Content-Length
 94 |       - ['38']
 95 |     - - X-Cache
 96 |       - [HIT]
 97 |     - - X-Powered-By
 98 |       - [ASP.NET]
 99 |     - - Accept-Ranges
100 |       - [bytes]
101 |     - - Server
102 |       - [ECAcc (fcn/40C4)]
103 |     - - Last-Modified
104 |       - ['Wed, 01 Apr 2015 09:25:13 GMT']
105 |     - - Connection
106 |       - [close]
107 |     - - Date
108 |       - ['Wed, 01 Apr 2015 10:01:37 GMT']
109 |     - - Content-Type
110 |       - [text/plain; charset=utf-8]
111 | socksproxy: null
112 | success: true
113 | ```
114 | 


--------------------------------------------------------------------------------
/nettests/ts-016-vanilla-tor.md:
--------------------------------------------------------------------------------
  1 | # Specification version number
  2 | 
  3 | 2022-05-10
  4 | 
  5 | This version of the specification documents the Go implementation of
  6 | `vanilla_tor`, written on 2022-05-10. The
  7 | [previous](https://github.com/ooni/spec/blob/fc05b9a12f4202d824f2bcdb52b4eacfbb914a4e/nettests/ts-016-vanilla-tor.md)
  8 | (and slightly incompatible!) version of this specification documents the original and
  9 | obsolete Python implementation inside `ooni/probe-legacy`.
 10 | 
 11 | * _status_: experimental
 12 | 
 13 | # Specification name
 14 | 
 15 | `vanilla_tor` (Vanilla Tor)
 16 | 
 17 | # Test preconditions
 18 | 
 19 | * An internet connection
 20 | 
 21 | * The `tor` binary installed on the system
 22 | 
 23 | # Expected impact
 24 | 
 25 | Ability to determine if Tor in it's default configuration is able to bootstrap
 26 | or at what point in bootstrapping it fails.
 27 | 
 28 | # Expected inputs
 29 | 
 30 | None
 31 | 
 32 | # Test description
 33 | 
 34 | The test will run the tor executable and collect logs. The bootstrap
 35 | will either succeed or eventually time out.
 36 | 
 37 | # Expected output
 38 | 
 39 | ## Parent data format
 40 | 
 41 | * none
 42 | 
 43 | However, this test tries to produce a data structure as
 44 | similar as possible to `ts-030-torsf.md`.
 45 | 
 46 | ## Semantics
 47 | 
 48 | ```JSON
 49 | {
 50 |     "bootstrap_time": 1.1,
 51 |     "error": null,
 52 |     "failure": null,
 53 |     "success": false,
 54 |     "timeout": 600,
 55 |     "tor_logs": [],
 56 |     "tor_progress": 0,
 57 |     "tor_progress_tag": "",
 58 |     "tor_progress_summary": "",
 59 |     "tor_version": "",
 60 |     "transport_name": "vanilla"
 61 | }
 62 | ```
 63 | 
 64 | where:
 65 | 
 66 | - `bootstrap_time` (`float`) is zero if the bootstrap times out and otherwise is
 67 | the number of seconds it required to bootstrap;
 68 | 
 69 | - `error` (`null | string`) is `null` on success, `timeout-reached` in case of
 70 | timeout, and `unknown-error` otherwise (this field only exists for backwards
 71 | compatibility with the previous version of the `vanilla_tor` spec);
 72 | 
 73 | - `failure` conforms to `df-007-errors`;
 74 | 
 75 | - `success` (`bool`) is set to `true` if we bootstrap, to `false` otherwise (this
 76 | field only exists for backwards compatibility);
 77 | 
 78 | - `timeout` (`float`) is the default timeout for the experiment (in seconds);
 79 | 
 80 | - `tor_logs` (`[]string`) is a list of bootstrap-related logs emitted by
 81 | the tor daemon during the bootstrap;
 82 | 
 83 | - `tor_progress` (`int`) is the progress in the last bootstrap line;
 84 | 
 85 | - `tor_progress_tag` (`string`) is the machine readable tag of the last bootstrap line;
 86 | 
 87 | - `tor_progress_summary` (`string`) is the human readable description of
 88 | the last bootstrap line;
 89 | 
 90 | - `tor_version` (`string`) is the version of `tor` we're using;
 91 | 
 92 | - `transport_name` (`string`) is always set to `"vanilla"`.
 93 | 
 94 | ## Incompatibility with ooni/probe-legacy
 95 | 
 96 | The `ooni/probe-legacy` implementation used different field names and/or data types as
 97 | documented [by the previous version of this spec](
 98 | https://github.com/ooni/spec/blob/fc05b9a12f4202d824f2bcdb52b4eacfbb914a4e/nettests/ts-016-vanilla-tor.md).
 99 | The following table shows which fields changed since the previous implementation:
100 | 
101 | | legacy name            | legacy type     | new name      | new type        |
102 | | ---------------------- | --------------- | ------------- | --------------- |
103 | | `tor_log`              | `string`        | `tor_logs`    | `[]string`      |
104 | | `timeout`              | `integer`       | `timeout`     | `float`         |
105 | 
106 | The _main_ difference between the new and the old implementation is that the new
107 | implementation collects the logs as an array of lines while the old implementation
108 | collects the logs as a single string.
109 | 
110 | The `timeout` field changed only in its type and it should be possible to parse it
111 | using a language such as Python or JavaScript without major issues, since it's still
112 | a numeric value. Also, the timeout we set is always an integral number of seconds,
113 | which means that most JSON emitters (including
114 | Golang's JSON emitter) will emit an integer (i.e., a serialized
115 | number without trailing `.0`).
116 | 
117 | ## Possible conclusions
118 | 
119 | If Tor with the default configuration can successfully bootstrap.
120 | 
121 | ## Example output sample
122 | 
123 | ```JSON
124 | {
125 |   "annotations": {
126 |     "architecture": "arm64",
127 |     "engine_name": "ooniprobe-engine",
128 |     "engine_version": "3.15.0-alpha",
129 |     "platform": "macos"
130 |   },
131 |   "data_format_version": "0.2.0",
132 |   "input": null,
133 |   "measurement_start_time": "2022-05-10 11:31:29",
134 |   "probe_asn": "AS30722",
135 |   "probe_cc": "IT",
136 |   "probe_ip": "127.0.0.1",
137 |   "probe_network_name": "Vodafone Italia S.p.A.",
138 |   "report_id": "",
139 |   "resolver_asn": "AS30722",
140 |   "resolver_ip": "91.80.36.88",
141 |   "resolver_network_name": "Vodafone Italia S.p.A.",
142 |   "software_name": "miniooni",
143 |   "software_version": "3.15.0-alpha",
144 |   "test_keys": {
145 |     "bootstrap_time": 3.620014542,
146 |     "error": null,
147 |     "failure": null,
148 |     "success": true,
149 |     "timeout": 200,
150 |     "tor_logs": [
151 |       "May 10 13:31:26.000 [notice] Bootstrapped 0% (starting): Starting",
152 |       "May 10 13:31:26.000 [notice] Bootstrapped 5% (conn): Connecting to a relay",
153 |       "May 10 13:31:26.000 [notice] Bootstrapped 10% (conn_done): Connected to a relay",
154 |       "May 10 13:31:26.000 [notice] Bootstrapped 14% (handshake): Handshaking with a relay",
155 |       "May 10 13:31:26.000 [notice] Bootstrapped 15% (handshake_done): Handshake with a relay done",
156 |       "May 10 13:31:26.000 [notice] Bootstrapped 20% (onehop_create): Establishing an encrypted directory connection",
157 |       "May 10 13:31:26.000 [notice] Bootstrapped 25% (requesting_status): Asking for networkstatus consensus",
158 |       "May 10 13:31:26.000 [notice] Bootstrapped 30% (loading_status): Loading networkstatus consensus",
159 |       "May 10 13:31:27.000 [notice] Bootstrapped 40% (loading_keys): Loading authority key certs",
160 |       "May 10 13:31:27.000 [notice] Bootstrapped 45% (requesting_descriptors): Asking for relay descriptors",
161 |       "May 10 13:31:28.000 [notice] Bootstrapped 50% (loading_descriptors): Loading relay descriptors",
162 |       "May 10 13:31:29.000 [notice] Bootstrapped 55% (loading_descriptors): Loading relay descriptors",
163 |       "May 10 13:31:29.000 [notice] Bootstrapped 61% (loading_descriptors): Loading relay descriptors",
164 |       "May 10 13:31:29.000 [notice] Bootstrapped 70% (loading_descriptors): Loading relay descriptors",
165 |       "May 10 13:31:29.000 [notice] Bootstrapped 75% (enough_dirinfo): Loaded enough directory info to build circuits",
166 |       "May 10 13:31:29.000 [notice] Bootstrapped 80% (ap_conn): Connecting to a relay to build circuits",
167 |       "May 10 13:31:29.000 [notice] Bootstrapped 85% (ap_conn_done): Connected to a relay to build circuits",
168 |       "May 10 13:31:29.000 [notice] Bootstrapped 89% (ap_handshake): Finishing handshake with a relay to build circuits",
169 |       "May 10 13:31:29.000 [notice] Bootstrapped 90% (ap_handshake_done): Handshake finished with a relay to build circuits",
170 |       "May 10 13:31:29.000 [notice] Bootstrapped 95% (circuit_create): Establishing a Tor circuit",
171 |       "May 10 13:31:29.000 [notice] Bootstrapped 100% (done): Done"
172 |     ],
173 |     "tor_progress": 100,
174 |     "tor_progress_tag": "done",
175 |     "tor_progress_summary": "Done",
176 |     "tor_version": "0.4.7.7",
177 |     "transport_name": "vanilla"
178 |   },
179 |   "test_name": "vanilla_tor",
180 |   "test_runtime": 3.8545842500000003,
181 |   "test_start_time": "2022-05-10 11:31:26",
182 |   "test_version": "0.2.0"
183 | }
184 | ```
185 | 


--------------------------------------------------------------------------------
/nettests/ts-025-stun-reachability.md:
--------------------------------------------------------------------------------
  1 | # Specification version number
  2 | 
  3 | 2020-11-26-000
  4 | 
  5 | * _status_: experimental
  6 | 
  7 | # Specification name
  8 | 
  9 | STUN reachability (`stunreachability`)
 10 | 
 11 | # Test preconditions
 12 | 
 13 | None
 14 | 
 15 | # Expected impact
 16 | 
 17 | Detect whether [STUN](https://en.wikipedia.org/wiki/STUN) is working in a specific network.
 18 | 
 19 | # Expected inputs
 20 | 
 21 | This nettest will test all the STUN-server endpoints encoded as URLs (i.e. `stun://"<domain>:<port>"`)
 22 | passed as input. If no input is provided, the test fails.
 23 | 
 24 | # Test description
 25 | 
 26 | For each STUN input URL, this nettest sends a binding request to the given URL's
 27 | endpoint and receives the corresponding response. If a valid response is received, then
 28 | the nettest is successful, otherwise it failed.
 29 | 
 30 | # Expected output
 31 | 
 32 | ## Parent data format
 33 | 
 34 | - `df-002-dnst`
 35 | - `df-007-errors`
 36 | - `df-008-netevents`
 37 | 
 38 | ## Required output data
 39 | 
 40 | ```JSON
 41 | {
 42 |   "endpoint": "",
 43 |   "failure": null,
 44 |   "network_events": [],
 45 |   "queries": []
 46 | }
 47 | ```
 48 | 
 49 | - `endpoint` (`string`): STUN endpoint that we are using;
 50 | 
 51 | - `failure` (`string|null`): string indicating the error that occurred
 52 | or `null` if no error occurred (see `df-007-errors`);
 53 | 
 54 | - `network_events` (`[]NetworkEvent`): see `df-008-netevents`;
 55 | 
 56 | -  `queries` (`[]Query`): see `df-002-dnst`.
 57 | 
 58 | ## Example output sample
 59 | 
 60 | ```JSON
 61 | {
 62 |   "annotations": {
 63 |     "assets_version": "20200529153246",
 64 |     "engine_name": "miniooni",
 65 |     "engine_version": "0.12.0",
 66 |     "platform": "macos"
 67 |   },
 68 |   "data_format_version": "0.2.0",
 69 |   "extensions": {
 70 |     "dnst": 0,
 71 |     "netevents": 0
 72 |   },
 73 |   "input": "stun://stun.l.google.com:19302",
 74 |   "measurement_start_time": "2020-06-01 16:51:44",
 75 |   "probe_asn": "AS30722",
 76 |   "probe_cc": "IT",
 77 |   "probe_ip": "127.0.0.1",
 78 |   "probe_network_name": "VODAFONE-IT-ASN Vodafone Italia S.p.A.",
 79 |   "report_id": "20200601T165144Z_AS30722_qwaNcWHznyBuj2Unhq3YpL1fJ1pV1WCs98xZHDjcUL8U6WP3Ot",
 80 |   "resolver_asn": "AS30722",
 81 |   "resolver_ip": "91.80.36.85",
 82 |   "resolver_network_name": "VODAFONE-IT-ASN Vodafone Italia S.p.A.",
 83 |   "software_name": "miniooni",
 84 |   "software_version": "0.12.0",
 85 |   "test_keys": {
 86 |     "endpoint": "stun.l.google.com:19302",
 87 |     "failure": null,
 88 |     "network_events": [
 89 |       {
 90 |         "failure": null,
 91 |         "operation": "resolve_start",
 92 |         "t": 2.336e-05
 93 |       },
 94 |       {
 95 |         "failure": null,
 96 |         "operation": "resolve_done",
 97 |         "t": 0.04526879
 98 |       },
 99 |       {
100 |         "address": "108.177.15.127:19302",
101 |         "failure": null,
102 |         "operation": "connect",
103 |         "proto": "udp",
104 |         "t": 0.045541248
105 |       },
106 |       {
107 |         "failure": null,
108 |         "num_bytes": 20,
109 |         "operation": "write",
110 |         "t": 0.04578789
111 |       },
112 |       {
113 |         "failure": null,
114 |         "num_bytes": 32,
115 |         "operation": "read",
116 |         "t": 0.084153449
117 |       }
118 |     ],
119 |     "queries": [
120 |       {
121 |         "answers": [
122 |           {
123 |             "asn": 15169,
124 |             "as_org_name": "GOOGLE",
125 |             "answer_type": "A",
126 |             "ipv4": "108.177.15.127",
127 |             "ttl": null
128 |           }
129 |         ],
130 |         "engine": "system",
131 |         "failure": null,
132 |         "hostname": "stun.l.google.com",
133 |         "query_type": "A",
134 |         "resolver_hostname": null,
135 |         "resolver_port": null,
136 |         "resolver_address": "",
137 |         "t": 0.04526879
138 |       },
139 |       {
140 |         "answers": [
141 |           {
142 |             "asn": 15169,
143 |             "as_org_name": "GOOGLE",
144 |             "answer_type": "AAAA",
145 |             "ipv6": "2a00:1450:400c:c07::7f",
146 |             "ttl": null
147 |           }
148 |         ],
149 |         "engine": "system",
150 |         "failure": null,
151 |         "hostname": "stun.l.google.com",
152 |         "query_type": "AAAA",
153 |         "resolver_hostname": null,
154 |         "resolver_port": null,
155 |         "resolver_address": "",
156 |         "t": 0.04526879
157 |       }
158 |     ]
159 |   },
160 |   "test_name": "stun_reachability",
161 |   "test_runtime": 0.085115757,
162 |   "test_start_time": "2020-06-01 16:51:44",
163 |   "test_version": "0.0.1"
164 | }
165 | ```
166 | 
167 | # Privacy considerations
168 | 
169 | The STUN server response will contain the user's IP address. For this reason we
170 | should not save the STUN server response into `network_events`.
171 | 
172 | # Packet capture considerations
173 | 
174 | This test does not capture packets by default.
175 | 


--------------------------------------------------------------------------------
/nettests/ts-026-riseupvpn.md:
--------------------------------------------------------------------------------
  1 | # Specification version number
  2 | 
  3 | 2023-10-17-000
  4 | 
  5 | * _status_: current
  6 | 
  7 | # Specification name
  8 | 
  9 | RiseupVPN
 10 | 
 11 | # Test preconditions
 12 | 
 13 | * An internet connection with functional DNS
 14 | 
 15 | # Expected impact
 16 | 
 17 | The ability to detect both if the RiseupVPN API and its gateways can be reached and connected to.
 18 | 
 19 | # Expected inputs
 20 | 
 21 | ## Import document or import data format
 22 | 
 23 | None. We aim to allow the user to specify a provider in a future version, the default is riseup.net.
 24 | 
 25 | # Test description
 26 | 
 27 | This test will check if a LEAP-platform-based VPN service like RiseupVPN is working as exepected. The experiment consists of two parts.
 28 | 
 29 | 1. The RiseupVPN API service
 30 | 
 31 | 2. The advertised gateways (the VPN servers used by RiseupVPN clients)
 32 | 
 33 | ## RiseupVPN API check
 34 | 
 35 | A HTTP GET request to an API endpoint will hand out the self-signed certificate needed to access most other API endpoints. While RiseupVPN clients will perform a fingerprint verification, this test skips that step in order to reduce the complexity.
 36 | 
 37 | The RiseupVPN provider serves a JSON describing which endpoints a client could use (https://example.org/provider.json) for bootstrapping. However, for the sake of simplicity we hardcoded all endpoints that will be tested.
 38 | 
 39 | Using the self signed certificate, we perform a HTTP GET requests to test the reachability of the provider API endpoints. The API check implies a working DNS.
 40 | 
 41 | The locations of RiseupVPNs endpoints are:
 42 | 
 43 | * `https://black.riseup.net/ca.crt` fetched with a GET request, contains the self-signed X.509 certificate used in all subsequent requests.
 44 | 
 45 | * `https://riseup.net/provider.json` fetched with a GET request.
 46 | 
 47 | * `https://api.black.riseup.net:443/3/config/eip-service.json` fetched with GET request, contains gateway addresses, available transports, location, ports, etc. We use version 3 here.
 48 | 
 49 | * `https://api.black.riseup.net:9001/json` (hereafter referred to as geo service) contains the the client's IP address, geolocation and gives a list of gateways that's the closest and/or under less stress (from other users). This can be different and change.
 50 | 
 51 | Since ooniprobe 3.19.0 and version 0.3.0 of the riseupvpn experiment, any failure in accessing any of the above services as well as any failure in using the self-signed X.509 certificate causes the experiment to stop early, without measuring gateways.
 52 | 
 53 | Before ooniprobe 3.19.0, the data format was different as documented by [a previous version of this document](https://github.com/ooni/spec/blob/f9bbaa83541484e3e509ffa56dd87b0c5ce8c31a/nettests/ts-026-riseupvpn.md).
 54 | 
 55 | Since ooniprobe 3.19.0, if all parts of the API are functional and reachable then we write:
 56 | 
 57 | ```JSON
 58 | {
 59 |     "api_failures": null,
 60 |     "ca_cert_status": true,
 61 | }
 62 | ```
 63 | 
 64 | In case any API fails, we include its error into the `api_failures` string list, as follows:
 65 | 
 66 | ```JSON
 67 | {
 68 |     "api_failures": ["failure1", "failure2"],
 69 |     "ca_cert_status": true,
 70 | }
 71 | ```
 72 | 
 73 | The `ca_cert_status` boolean flag is set to false if we cannot get the self-signed X.509 certificate or the returned bytes are not a valid PEM-encoded certificate.
 74 | 
 75 | ## RiseupVPN gateways test
 76 | 
 77 | If the provider API is reachable, it provides a JSON-file which contains the IP addresses and capabilites of the VPN gateways.
 78 | The reachability of gateways will be tested depending on their capabilities as described by the provider (ports, OpenVPN, obfs4) by performing TCP handshakes. If a TCP handshake fails we assume the corresponding port and transport of that gateway to be blocked.
 79 | 
 80 | Before ooniprobe 3.19.0, the data format was different as documented by [a previous version of this document](https://github.com/ooni/spec/blob/f9bbaa83541484e3e509ffa56dd87b0c5ce8c31a/nettests/ts-026-riseupvpn.md).
 81 | 
 82 | Since ooniprobe 3.19.0, we do not write any toplevel key associated with riseupvpn gateways.
 83 | 
 84 | # Expected output
 85 | 
 86 | ## Parent data format
 87 | 
 88 | * `df-001-httpt`
 89 | * `df-002-dnst`
 90 | * `df-005-tcpconnect`
 91 | * `df-006-tlshandshake`
 92 | * `df-009-tunnel`
 93 | 
 94 | ## Required output data
 95 | 
 96 | JSON fields described above.
 97 | 
 98 | ## Semantics
 99 | 
100 | ```
101 | {
102 |     "api_failures": ["FAILURE STRING"] | null,
103 |     "ca_cert_status": true | false,
104 | }
105 | ```
106 | 
107 | `api_failure` can be any error string flagged with `(PE)` defined in `df-007-errors` or:
108 | 
109 | * `invalid_ca` in case the fetched ca certificate is invalid
110 | * `invalid_eipservice_response` in case the fetched eip-service.json is invalid
111 | * `invalid_geoservice_response` in case the fetched geo service response is invalid
112 | 
113 | ## Possible conclusions
114 | 
115 | * Users will be able to fetch client certificates and use the RiseupVPN services.
116 | 
117 | ## Example output sample
118 | 
119 | ## Expected Post-processing efforts
120 | 
121 | The providers will be able to learn if, where and which gateways are blocked. Depending on that, they might move or open up new VPN gateways in other locations.
122 | 
123 | # Privacy considerations
124 | 
125 | A network observer will learn that these servers exist and see that you are connecting to some IP addresses.
126 | 
127 | # Status and future directions
128 | 
129 | This test is currently experimental and will be used to further understand the design space. The idea is to broaden this test, besides Riseup, there is for example the Calyx Institute which offers a service based on the same software.
130 | 


--------------------------------------------------------------------------------
/nettests/ts-032-tcpping.md:
--------------------------------------------------------------------------------
  1 | # Specification version number
  2 | 
  3 | 2022-06-22
  4 | 
  5 | * _status_: experimental
  6 | 
  7 | # Specification name
  8 | 
  9 | `tcpping` (TCP ping)
 10 | 
 11 | # Test preconditions
 12 | 
 13 | * An internet connection
 14 | 
 15 | # Expected impact
 16 | 
 17 | The possibility of pinging a TCP endpoint.
 18 | 
 19 | # Expected inputs
 20 | 
 21 | A URL like `tcpconnect://<host>:<port>`.
 22 | 
 23 | # Test description
 24 | 
 25 | The experiment will attempt to connect to the given TCP endpoint
 26 | every second for ten times and return the results.
 27 | 
 28 | # Expected output
 29 | 
 30 | ## Parent data format
 31 | 
 32 | * `df-005-tcpconnect`
 33 | 
 34 | ## Semantics
 35 | 
 36 | ```JSON
 37 | {
 38 |     "pings": []
 39 | }
 40 | ```
 41 | 
 42 | where:
 43 | 
 44 | - `pings` is a `SinglePing` instance, which looks like:
 45 | 
 46 | ```JSON
 47 | {
 48 | 	"tcp_connect": {}
 49 | }
 50 | ```
 51 | 
 52 | where:
 53 | 
 54 | - `tcp_connect` follows the `df-005-tcpconnect` data format.
 55 | 
 56 | Before 2022-06-22, `tcp_connect` pointed to a *list* of `df-005-tcpconnect`
 57 | data. We changed this because the nettest is still experimental and we want to
 58 | clearly indicate that a single TCP connect per `SinglePing` is possible.
 59 | 
 60 | ## Possible conclusions
 61 | 
 62 | This experiment is an exploratory tool. There is no immediate conclusion
 63 | from its results but it is useful to perform censorship research.
 64 | 
 65 | ## Example output sample
 66 | 
 67 | ```JSON
 68 | {
 69 |   "annotations": {
 70 |     "architecture": "arm64",
 71 |     "engine_name": "ooniprobe-engine",
 72 |     "engine_version": "3.16.0-alpha",
 73 |     "platform": "macos"
 74 |   },
 75 |   "data_format_version": "0.2.0",
 76 |   "input": "tcpconnect://8.8.8.8:443",
 77 |   "measurement_start_time": "2022-06-22 13:32:07",
 78 |   "probe_asn": "AS30722",
 79 |   "probe_cc": "IT",
 80 |   "probe_ip": "127.0.0.1",
 81 |   "probe_network_name": "Vodafone Italia S.p.A.",
 82 |   "report_id": "20220622T133208Z_tcpping_IT_30722_n1_tWfDobdbZfWsdwHG",
 83 |   "resolver_asn": "AS30722",
 84 |   "resolver_ip": "91.80.36.88",
 85 |   "resolver_network_name": "Vodafone Italia S.p.A.",
 86 |   "software_name": "miniooni",
 87 |   "software_version": "3.16.0-alpha",
 88 |   "test_keys": {
 89 |     "pings": [
 90 |       {
 91 |         "tcp_connect": {
 92 |           "ip": "8.8.8.8",
 93 |           "port": 443,
 94 |           "status": {
 95 |             "failure": null,
 96 |             "success": true
 97 |           },
 98 |           "t": 0.020375
 99 |         }
100 |       },
101 |       {
102 |         "tcp_connect": {
103 |           "ip": "8.8.8.8",
104 |           "port": 443,
105 |           "status": {
106 |             "failure": null,
107 |             "success": true
108 |           },
109 |           "t": 1.028173
110 |         }
111 |       },
112 |       {
113 |         "tcp_connect": {
114 |           "ip": "8.8.8.8",
115 |           "port": 443,
116 |           "status": {
117 |             "failure": null,
118 |             "success": true
119 |           },
120 |           "t": 2.02758
121 |         }
122 |       },
123 |       {
124 |         "tcp_connect": {
125 |           "ip": "8.8.8.8",
126 |           "port": 443,
127 |           "status": {
128 |             "failure": null,
129 |             "success": true
130 |           },
131 |           "t": 3.026412
132 |         }
133 |       },
134 |       {
135 |         "tcp_connect": {
136 |           "ip": "8.8.8.8",
137 |           "port": 443,
138 |           "status": {
139 |             "failure": null,
140 |             "success": true
141 |           },
142 |           "t": 4.028897
143 |         }
144 |       },
145 |       {
146 |         "tcp_connect": {
147 |           "ip": "8.8.8.8",
148 |           "port": 443,
149 |           "status": {
150 |             "failure": null,
151 |             "success": true
152 |           },
153 |           "t": 5.026734
154 |         }
155 |       },
156 |       {
157 |         "tcp_connect": {
158 |           "ip": "8.8.8.8",
159 |           "port": 443,
160 |           "status": {
161 |             "failure": null,
162 |             "success": true
163 |           },
164 |           "t": 6.020745
165 |         }
166 |       },
167 |       {
168 |         "tcp_connect": {
169 |           "ip": "8.8.8.8",
170 |           "port": 443,
171 |           "status": {
172 |             "failure": null,
173 |             "success": true
174 |           },
175 |           "t": 7.019039
176 |         }
177 |       },
178 |       {
179 |         "tcp_connect": {
180 |           "ip": "8.8.8.8",
181 |           "port": 443,
182 |           "status": {
183 |             "failure": null,
184 |             "success": true
185 |           },
186 |           "t": 8.027933
187 |         }
188 |       },
189 |       {
190 |         "tcp_connect": {
191 |           "ip": "8.8.8.8",
192 |           "port": 443,
193 |           "status": {
194 |             "failure": null,
195 |             "success": true
196 |           },
197 |           "t": 9.025553
198 |         }
199 |       }
200 |     ]
201 |   },
202 |   "test_name": "tcpping",
203 |   "test_runtime": 9.025986292,
204 |   "test_start_time": "2022-06-22 13:31:58",
205 |   "test_version": "0.2.0"
206 | }
207 | ```
208 | 


--------------------------------------------------------------------------------
/nettests/ts-036-browser_web.md:
--------------------------------------------------------------------------------
  1 | # Specification version number
  2 | 
  3 | 2022-06-01-000
  4 | 
  5 | * _status_: experimental
  6 | 
  7 | # Specification name
  8 | 
  9 | browser_web
 10 | 
 11 | # Test preconditions
 12 | 
 13 | * An internet connection
 14 | 
 15 | # Expected impact
 16 | 
 17 | Determine if a web browser is able to access a particular web resource.
 18 | 
 19 | # Expected inputs
 20 | 
 21 | This experiment takes in input a list of URLs.
 22 | 
 23 | There are some limitations in what kinds of URLs can be measured due to specific
 24 | server-side CORS configuration.
 25 | 
 26 | In particular, if the URL we are trying to measure is setting [CORP headers](https://developer.mozilla.org/en-US/docs/Web/HTTP/Cross-Origin_Resource_Policy_(CORP)),
 27 | it will always show up as a failure, making the measurement unreliable.
 28 | 
 29 | # Test description
 30 | 
 31 | Browser web is a test designed to run inside of a users web browser. To initiate
 32 | the test a web page is visited and after an informed consent procedure, it is
 33 | possible to run the experiment.
 34 | 
 35 | The test uses the [fetch
 36 | API](https://developer.mozilla.org/en-US/docs/Web/API/Fetch_API) in an attempt
 37 | to measure if certain target URLs are being blocked.
 38 | 
 39 | This test will produce as a final result the time it took to load the resource
 40 | (using the [performance
 41 | API](https://developer.mozilla.org/en-US/docs/Web/API/Performance)) and whether the
 42 | result was successful.
 43 | 
 44 | ## Caveats
 45 | 
 46 | Because the information collected by this test is not very detailed some extra
 47 | precautions need to be taken when interpreting the results.
 48 | 
 49 | If the server hosting the site we intend to measure is setting the
 50 | `Cross-Origin-Resource-Policy` header, since our request will be issued from a
 51 | different `Origin`, it will always result in a failure. For this reason, it's
 52 | advisable to pre-screen the targets for testing, to exclude sites that have this
 53 | header set.
 54 | 
 55 | Another issue, is that if the measurement tool is deployed on HTTPS any request
 56 | to HTTP sites will fail due to it being a [mixed content request](https://developer.mozilla.org/en-US/docs/Web/Security/Mixed_content).
 57 | Even if we were to be able to bypass this limitation, since aren't able to read
 58 | the payload of the response, we are not able to determine if the retrieve
 59 | content is consistent with what we would expect to see. It's therefore advisable
 60 | to only use as testing targets HTTPS websites as an inconsistent DNS response or
 61 | a TLS MITM would be visible as a failure.
 62 | 
 63 | Moreover, if the user has enabled [strict enhanced tracking
 64 | protection](https://support.mozilla.org/en-US/kb/enhanced-tracking-protection-firefox-desktop#w_strict-enhanced-tracking-protection)
 65 | on Firefox, requests to [certain sites](https://github.com/disconnectme/disconnect-tracking-protection/blob/master/services.json) (ex. twitter.com or facebook.com) will
 66 | fail and appear to be a result of blocking.
 67 | Firefox users should be asked, if they have strict enhanced tracking protection
 68 | enabled, to add a whitelist rule for the website hosting the experiment,
 69 | however this is not a guarantee that they will run the test with it on.
 70 | If a site is deemed inaccessible as a result of specific settings the user has in their browser, the `load_time_ms` will generally be significantly lower than a truly blocked website. When interpreting the data this should be taken into account.
 71 | 
 72 | # Expected output
 73 | 
 74 | ## Parent data format
 75 | 
 76 | ## Semantics
 77 | 
 78 | The contents of the `test_keys` field is as follows:
 79 | 
 80 | ```JSON
 81 | {
 82 |   "result": "ok",
 83 |   "load_time_ms": 404.40000000596046
 84 | }
 85 | ```
 86 | 
 87 | where:
 88 | 
 89 | * `result` (`string`) will be `ok` in the case of a successful measurement,
 90 |   while it will be `error` in the case of a failure;
 91 | * `load_time_ms` (`float`) indicates the number of milliseconds it took to load
 92 |   the requested resource.
 93 |  * `browser` (`string`) indicates the name of the browser used to run the experiment (ex. `firefox` or `chrome`)
 94 | 
 95 | ## Possible conclusions
 96 | 
 97 | If a certain web resource is accessible or not from the vantage point of the
 98 | user.
 99 | 
100 | ## Example output sample
101 | 
102 | ```JSON
103 | {
104 |   "software_name": "ooniprobe-web",
105 |   "software_version": "0.0.1",
106 |   "test_start_time": "2022-06-02 13:08:21",
107 |   "test_name": "browser_web",
108 |   "test_version": "0.1.0",
109 |   "data_format_version": "0.2.0",
110 |   "report_id": "20220602T130821Z_browserweb_IT_30722_n1_MJoDg0lHFMo9QncM",
111 |   "measurement_start_time": "2022-06-02 13:10:01",
112 |   "probe_asn": "AS30722",
113 |   "probe_cc": "IT",
114 |   "probe_network_name": "Vodafone Italia S.p.A.",
115 |   "input": "https://go-text.me/",
116 |   "test_runtime": 0.4044000000059605,
117 |   "test_keys": {
118 |       "result": "ok",
119 |       "load_time_ms": 404.40000000596046,
120 |       "browser": "chrome"
121 |   }
122 | }
123 | ```
124 | 


--------------------------------------------------------------------------------
/policies/po-001-measurements-and-urls.md:
--------------------------------------------------------------------------------
 1 | # Measurements and URL policy
 2 | 
 3 | * _status_: needs updating (it references the defunct governance roster for OPOS)
 4 | 
 5 | This document explains the policy we follow when considering what URLs and
 6 | Measurements should be scheduled via the OONI Probe Orchestration System (OPOS)
 7 | and/or any other system run by OONI to gather network measurement data.
 8 | 
 9 | 1. To the extent that it is possible we will always do what is best for users of OONI.
10 | 
11 | 2. When in doubt favour user safety.
12 | 
13 | 3. Every action that leads to measurements being triggered by probes must be
14 |    logged (either publicly or privately depending on what is safer for the
15 |    user).
16 | 
17 | 5. Measurements aimed at deanonymizing users shall never be scheduled.
18 | 
19 | 5. A URL must first have been included as part of the
20 |    [citizen-lab testing list](https://github.com/citizenlab/test-lists) repository to be
21 |    considered for testing.
22 | 
23 | 6. Only people in the governance roster can trigger measurements and a
24 |    measurement must be signed off by at least one other person.
25 | 
26 | 7. Anybody in the governance roster can nominate somebody else to join the
27 |    roster. Their inclusion shall be discussed and once there is consensus they
28 |    are added. To be included in the roster you need to:
29 | 
30 |    a. Have a legitimate reason to do so (ex. you do research on censorship)
31 | 
32 |    b. Be a respected member of the OONI community
33 | 
34 | 8. If anybody suspects that any of these rules have not been followed or there
35 |    is suspicion of abuse, please contact anybody in the Governance roster.
36 | 


--------------------------------------------------------------------------------
/probe/README.md:
--------------------------------------------------------------------------------
  1 | # OONI Probe specification
  2 | 
  3 | * version: 202108.20.1147
  4 | * authors: Simone Basso
  5 | 
  6 | This document should serve as an introduction for the developer interested in
  7 | contributing to OONI Probe.
  8 | 
  9 | ## Software architecture
 10 | 
 11 | ![Architecture](architecture.png)
 12 | 
 13 | ### Probe
 14 | 
 15 | The probe is the software running network tests (aka nettests). The probe
 16 | is an app for mobile or desktop. Current implementations are:
 17 | 
 18 | - [github.com/ooni/probe-android](https://github.com/ooni/probe-android):
 19 | app for Android devices, written in Java;
 20 | 
 21 | - [github.com/ooni/probe-ios](https://github.com/ooni/probe-ios):
 22 | app for iOS devices, written in ObjectiveC;
 23 | 
 24 | - [github.com/ooni/probe-cli](https://github.com/ooni/probe-cli):
 25 | command line interface for Desktop (new implementation);
 26 | 
 27 | - [github.com/ooni/probe-desktop](https://github.com/ooni/probe-desktop):
 28 | graphical user interface for Desktop (new implementation),
 29 | based on probe-cli;
 30 | 
 31 | - [github.com/ooni/probe-legacy](https://github.com/ooni/probe-legacy):
 32 | CLI/Desktop (legacy) implementation, written in Python.
 33 | 
 34 | ### Engine
 35 | 
 36 | The engine is the piece of code running nettests. A specific implementation
 37 | of the probe uses an engine. Current implementations are:
 38 | 
 39 | - [github.com/ooni/probe-cli](https://github.com/ooni/probe-cli): private
 40 | engine written in Go along with public mobile API;
 41 | 
 42 | - [github.com/measurement-kit/measurement-kit](
 43 | https://github.com/measurement-kit/measurement-kit): C++ engine
 44 | historically used in the 2016-2020 time period;
 45 | 
 46 | - [github.com/ooni/probe-engine](https://github.com/ooni/probe-engine):
 47 | historical Go engine merged into probe-cli in early 2021;
 48 | 
 49 | - [github.com/ooni/probe-legacy](https://github.com/ooni/probe-legacy):
 50 | historical engine written in Python.
 51 | 
 52 | ### OONI API Client
 53 | 
 54 | Client API for speaking with the OONI backends (aka *probe services*).
 55 | 
 56 | We specify these services in the [backends](../backends) directory.
 57 | 
 58 | ### Geolocate Client
 59 | 
 60 | API to discover the probe's:
 61 | 
 62 | - IP address;
 63 | 
 64 | - ASN (autonomous system number);
 65 | 
 66 | - CC (country code);
 67 | 
 68 | - network name (entity owning the ASN).
 69 | 
 70 | We obtain the ASN, the CC, and the network name from the probe's IP address.
 71 | 
 72 | Modern probes (e.g., probe-cli) also discover:
 73 | 
 74 | - the IP address used by the system resolver;
 75 | 
 76 | - the corresponding ASN;
 77 | 
 78 | - the corresponding network name.
 79 | 
 80 | ### Nettetsts
 81 | 
 82 | The nettests (aka experiments) are the network measurement experiments run
 83 | by a OONI Probe instance.
 84 | 
 85 | We specify them in the [nettests](../nettests) directory.
 86 | 
 87 | ## Nettest flow
 88 | 
 89 | ![Interaction](interaction.png)
 90 | 
 91 | ### Check-in
 92 | 
 93 | The Probe uses the OONI API Client to obtain the nettest targets and
 94 | configuration from the OONI backends. As part of checking in with the
 95 | backends, the Probe also obtains a *Report ID*. Such an ID will be
 96 | used later to identify related network measurements.
 97 | 
 98 | ### Geolocation
 99 | 
100 | The Probe uses the Geolocate Client to discover its IP address, ASN, CC,
101 | and network name. Modern probes (e.g., probe-cli) also discover the
102 | resolver's IP address, ASN, and network name.
103 | 
104 | ### Measuring
105 | 
106 | The Probe uses the selected nettest API to perform a network measurement. Some
107 | nettests (e.g., website-measuring nettests) take in input a list of targets, others
108 | (e.g., network-performance nettests) do not. If the nettest takes
109 | in input a list of targets, the Probe will perform a measurement for each
110 | target. Otherwise the probe performs a single measurement.
111 | 
112 | ### Measurement submission
113 | 
114 | The Probe uses the OONI API Client and the *Report ID* to submit measurements
115 | to the OONI backends.
116 | 
117 | ### Measurement processing
118 | 
119 | The OONI backends process the submitted measurements, store the raw
120 | data files on S3 and exports them using the [OONI API](
121 | https://github.com/ooni/api).
122 | 


--------------------------------------------------------------------------------
/probe/architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ooni/spec/e8aafe45d5bd15a371a08dfbeb0dc557e1622d04/probe/architecture.png


--------------------------------------------------------------------------------
/probe/interaction.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ooni/spec/e8aafe45d5bd15a371a08dfbeb0dc557e1622d04/probe/interaction.png


--------------------------------------------------------------------------------
/techniques/Readme.md:
--------------------------------------------------------------------------------
1 | # Techniques
2 | 
3 | This directory contains techniques. A technique is a measurement idea that should solve a
4 | specific measurement use case. OONI implements network tests. Each network test may implement
5 | one or more techniques. The same technique can be in multiple tests.
6 | 


--------------------------------------------------------------------------------
/techniques/tq-000-timing-information-collection.md:
--------------------------------------------------------------------------------
 1 | # tq-000 Timing information collection
 2 | 
 3 | All the experiments should collect all possible timing information, every sent
 4 | and incoming byte, every network error of every exchange should be time-stamped
 5 | with monotonic timer started at the measurement_start_time<sup>[1](#fn1)</sup>. Incoming TCP ACKs
 6 | may be time-stamped with some precision by polling TCP_INFO. L5 and L7 events like
 7 | “ServerCertificate arrived over TLS1.3” and “HTTP header arrived” should also
 8 | be time-stamped when possible.
 9 | 
10 | Bytes come in packets, so packet may be time-stamped for UDP, while byte range may
11 | be time-stamped for TCP.
12 | 
13 | TBD: It’s unclear if `SO_TIMESTAMP` and other alike options are useful.
14 | 
15 | TBD: is `connect()` time the best estimate for SYN-ACK? Is RTT estimate from
16 | `TCP_INFO` after `connect()` also good or, maybe, better? Maybe `BBR_INFO` and
17 | `TCP_CC_INFO`?
18 | 
19 | Having all monotonic timestamps within the measurement rooted at a single wall
20 | clock sample is needed as intra-measurement ordering may be important. For example, imagine
21 | “stateful” DPI that can “learn” a protocol of a service at IP:Port and two
22 | traceroute tests to the endpoint done before and after “teaching” DPI the
23 | protocol.
24 | 
25 | <a name="fn1">1</a>: there is no need to _explicitly_ keep a singleton object of
26 | a reference clock if all participating libraries use the same monotonic clock time
27 | source. Platforms may have several monotonic timescales!
28 | 
29 | ## Examples
30 | - When RST or TLS Alert packet arrive way faster than SYN-ACK they are likely injected
31 | - When DNS reply for non-cached “censored” hostname arrives as fast as reply to “a.root-servers.net.” or “.” served from root, hints it is likely injected
32 | - Mozilla has [tips on cross-platform monotonic clock](https://firefox-source-docs.mozilla.org/toolkit/components/telemetry/telemetry/data/main-ping.html#sessionlength), Go/1.9+ and C++11 also have monotonic clock support
33 | 


--------------------------------------------------------------------------------
/techniques/tq-001-DNS-UDP-censorship-transparency.md:
--------------------------------------------------------------------------------
 1 | # tq-001 DNS/UDP Censorship Transparency
 2 | 
 3 | DNS testing is fundamental to network measurement to detect censorship as it’s
 4 | a commonly used technique to implement censorship.
 5 | 
 6 | When reasoning about DNS interference, we shall consider the following possibilities:
 7 | - [_DNS hijacking_](https://en.wikipedia.org/wiki/DNS_hijacking#Manipulation_by_ISPs) (or _Policy Based DNS interference_), whereby a DNS resolver run by the ISP, parental control DNS service or government is configured to return specifically altered responses to specific queries. This is usually trivial to circumvent by changing resolver.
 8 | - [_DNS Spoofing_](https://en.wikipedia.org/wiki/DNS_spoofing), whereby there is equipment in the network that listens for DNS queries and sends replies back to the user faster than the legitimate DNS server. This tends to be tricker to circumvent.
 9 | - _DNS Transparent Proxy_, whereby all DNS requests sent by a user are routed through a DNS proxy box regardless of the destination DNS server, and the reply is served by the proxy box. This also tends to be trickier to circumvent.
10 | 
11 | The difference between _DNS Spoofing_ and _DNS Transparent Proxy_ is that in one case the
12 | origin server gets the client query and in the other it doesn't, in other words &mdash; if the
13 | [end-to-end principle](https://en.wikipedia.org/wiki/End-to-end_principle) is
14 | preserved or 100%-violated.  If the origin server gets the query from the
15 | client and the response from origin server is blocked so the client only gets a
16 | single injected response, then it's still spoofing. The following metadata bits are
17 | useful to classify the case as _spoofing_ or _proxy_:
18 | 
19 | - [presence of second DNS reply to single query](./tq-002-second-DNS-reply.md)
20 | - [IP TTL values of the replies](./tq-007-UDP-information-collection.md) got from _different_ resolvers
21 | - difference between probe IP addresses collected over HTTPS, DNS/TCP, DNS/UDP and STUN/UDP
22 | - different handling of hop-by-hop [EDNS(0)](https://tools.ietf.org/html/rfc6891) options
23 | 
24 | E.g. lack of the second DNS reply combined with the injected DNS response
25 | suggests<sup>[1](#fn1)</sup> that censorship equipment is doing an
26 | [_in-path_ attack (aka _man-in-the-middle_)](https://en.wikipedia.org/wiki/Man-in-the-middle_attack),
27 | while the presence of a second DNS reply suggests<sup>[2](#fn2)</sup> that an
28 | [_on-path_ attack (aka _man-on-the-side_)](https://en.wikipedia.org/wiki/Man-on-the-side_attack)
29 | is happening.
30 | 
31 | <a name="fn1">1</a>: _"suggets in-path"_ (not _"proves"_) as the second DNS reply can also _disappear_ due to natural packet loss
32 | 
33 | <a name="fn2">2</a>: _"suggets on-path"_ (not _"proves"_) as in-path equipment may do *all* on-path attacks as well
34 | 
35 | The following techniques may help to distinguish DNS-based censorship from a
36 | malfunctioning network or DNS service:
37 | 
38 | - [more than one reply](./tq-002-second-DNS-reply.md) is received for a single DNS query
39 | - the same resolver gives the same answer with [DNS/TCP, DoT and DoH](./tq-004-DNS-TCP-DoT-DoH-against-same-resolver.md)
40 | - [DNS Resource Record TTL “ticks”](./tq-005-DNS-Resource-Record-TTL-ticks-in-cache.md) on caching Recursive Resolver
41 | - [another “non-existent” domain in same zone gives same error](./tq-006-another-NXDOMAIN-domain-in-same-zone.md)
42 | - timeout can be explained by [5-tuple load balancing going bad](./tq-007-UDP-information-collection.md)
43 | - delegation [chain works from root](./tq-008-DNS-delegation-chain-from-root.md) (dig +trace, drill)
44 | - domain and parent domains have [SOAs and NSes matching with control](./tq-010-SOAs-and-NSes-for-possibly-censored-domain.md) measurement
45 | - TBD :-)
46 | 


--------------------------------------------------------------------------------
/techniques/tq-002-second-DNS-reply.md:
--------------------------------------------------------------------------------
 1 | # tq-002 Second DNS reply
 2 | 
 3 | The client issuing a DNS/UDP query may receive two<sup>[1](#fn1)</sup> DNS responses, one coming
 4 | from the legitimate DNS resolver and one coming from the censor. In order to capture both the first reply, but also, possibily, a second one, it's suggested to wait 5 seconds for the first response<sup>[2](#fn2)</sup> and an additional 5 seconds<sup>[3](#fn3)</sup> on the same UDP socket after receving the first reply.
 5 | 
 6 | These two responses should have [TTL](./tq-015-packet-headers-exfiltration-with-BPF.md) and
 7 | [latency](./tq-000-timing-information-collection.md) recorded (if platform
 8 | permits). All the captured IPs from several responses should be passed to
 9 | further tests.
10 | 
11 | <a name="fn1">1</a>: There may be more than one extra response, but that has never been observed so far.
12 | 
13 | <a name="fn2">2</a>: 5 second timeout is default `RES_TIMEOUT` in GNU C library
14 | 2.23. It’s possible to estimate platform `getaddrinfo()` timeout with an
15 | attempt to resolve a domain from non-responding server, but it’ll make code
16 | more complex.
17 | 
18 | <a name="fn3">3</a>: Two separate 5 second windows come from the hypothesis
19 | that the query may be sitting in the TX queue for several seconds, so it would
20 | be silly to get first response in 4.9s and stop waiting just in 0.1s. It is
21 | also reasonable to wait for 10 seconds since query if that makes implementation
22 | easier, but it makes the test a bit more “sleepy” and makes treatment of these
23 | two responses a bit less “fair”.
24 | 
25 | If a network operator also firewalls the DNS packets from leaving or entering
26 | their network, there would not be a second response even though in reality there
27 | is injection happening as opposed to hijacking. This is a technical detail and
28 | does not change the fact that there is DNS interception happening.
29 | 
30 | ## Examples
31 | - [AS41843, CJSC "ER-Telecom Holding" Omsk branch](https://github.com/ooni/probe/issues/647#issuecomment-275999682)
32 | 


--------------------------------------------------------------------------------
/techniques/tq-003-DNS-latency-of-non-cached-domain.md:
--------------------------------------------------------------------------------
 1 | # tq-003 DNS latency of non-cached domain
 2 | 
 3 | To understand if the filtering equipment has a rule for `*.censored.net` or not it 
 4 | can be interesting to measure the lantency of looking up `random-subdomain.censored.net` 
 5 | as this would cause recursive resolution when no such rule exists. Conversely if the 
 6 | policy is to block anything containing `censored.net` the response may arrive earlier, 
 7 | because there is no extra latency caused by the recursive resolution.
 8 | 
 9 | There are however some caveats in this approach. Indeed, if the recursor hosts a zone for
10 | `censored.net` it does not have to query the `censored.net` `NS` for the domain. But
11 | it’s not a 100% clear signal as some [DNSSEC versions may leak cacheable ranges](https://blog.cloudflare.com/dnssec-complexities-and-considerations/)
12 | of non-existent domains. Also, NSes responsible for the `censored.net` zone may be
13 | unreachable<sup>[1](#fn1)</sup> and that information may be cached as well, that will also affect
14 | resolution latency.
15 | 
16 | <a name="fn1">1</a>: e.g. some israeli authoritative DNS servers were dropping requests from Egypt
17 | 


--------------------------------------------------------------------------------
/techniques/tq-004-DNS-TCP-DoT-DoH-against-same-resolver.md:
--------------------------------------------------------------------------------
 1 | # tq-004 DNS/TCP, DoT, DoH against same resolver
 2 | 
 3 | When a Probe assumes MITM between the client and the recursive DNS resolver, it
 4 | is interesting to compare responses obtained via DNS/UDP and responses obtained via
 5 | DNS/TCP, DoT ([DNS over TLS](https://tools.ietf.org/html/rfc7858)) and DoH (DNS
 6 | over HTTPS) as those requests are likely to go through the same network path. Extra
 7 | IPs obtained this way may be added to a set of “Origin IPs” for further testing (TCP,
 8 | HTTP, TLS).
 9 | 
10 | It makes sense both for [public DNS resolvers](https://en.wikipedia.org/wiki/Public_recursive_name_server),
11 | and DNS resolvers run by ISPs.
12 | 
13 | ## Examples
14 | - [AS41843, ER-Telecom, Omsk](https://github.com/ooni/probe/issues/647#issuecomment-275999682): DNS/TCP does not provoke “censored” reply for rutracker.org neither from Google’s 8.8.8.8, nor from ISP’s 5.3.3.3, but DNS/TCP gives “censored” reply from another ISP’s resolver 109.194.112.1
15 | - AS61173, Green Web Samaneh Novin Co Ltd, Iran — “uncensored” reply from 8.8.8.8 for bridges.torproject.org via TCP
16 | 


--------------------------------------------------------------------------------
/techniques/tq-005-DNS-Resource-Record-TTL-ticks-in-cache.md:
--------------------------------------------------------------------------------
 1 | # tq-005 DNS Resource Record TTL “ticks” in cache
 2 | 
 3 | When a Probe assumes that a specific domain name is censored it’s interesting to
 4 | query the same nameserver for the same name in two seconds and check if the TTL
 5 | field changes. If the TTL field does not change, it’s a clear indicator that at
 6 | least one of following is likely true:
 7 | 
 8 | - the domain name is served by the caching recursive DNS resolver
 9 | - the domain name has very low TTL so it expires and is re-fetched while Probe sleeps
10 | - the load balancer in front of the DNS servers cluster is not aware of DNS names, so requests to the cache are not “sticky”
11 | - the response is injected with static TTL by an entity that is able to observe the traffic
12 | 
13 | If the TTL changes for an “unexpected” value (e.g. diff is not within 1..3 range
14 | after 2 seconds delay) it may be also a side-effect of qname-unaware load
15 | balancing or some other logic. That was observed at least with 8.8.8.8.
16 | 
17 | ## Examples
18 | 
19 | - [AS41843, ER-Telecom, Omsk](https://github.com/ooni/probe/issues/647#issuecomment-275999682): static TTL=600 for “censored” DNS/UDP reply for `rutracker.org` for any resolver
20 | - [AS41843](https://github.com/ooni/probe/issues/647#issuecomment-275999682): ticking(!) TTL for “censored” DNS/TCP reply for `rutracker.org` from ISP’s resolver 109.194.112.1
21 | - AS61173, Green Web Samaneh Novin Co Ltd, Iran — static TTL=418 from 8.8.8.8 for bridges.torproject.org
22 | 


--------------------------------------------------------------------------------
/techniques/tq-006-another-NXDOMAIN-domain-in-same-zone.md:
--------------------------------------------------------------------------------
 1 | # tq-006 Check another “NXDOMAIN” domain in same zone
 2 | 
 3 | When the suspected-to-be-censored domain returns the `NXDOMAIN` error from the resolver
 4 | it is interesting to attempt a resolution of another domain that we know for sure to not exist. These two
 5 | queries may result in different outcomes that may be a sign of network
 6 | interference.
 7 | 
 8 | ## Examples
 9 | - AS22047, VTR, Chile — “SOA? riseup.net.” sent to ISP’s resolver resulted in an `NXDOMAIN` reply with 0 answers and 0 authority records, while “SOA? riseup-5223-547-27075.net.” also was an `NXDOMAIN` with 0 answers, but had 1 authority records “a.gtld-servers.net. Nstld.verisign-grs.com ...” with valid `Serial` and other fields clearly distinguishing it as non-existent domain.
10 | 


--------------------------------------------------------------------------------
/techniques/tq-007-UDP-information-collection.md:
--------------------------------------------------------------------------------
 1 | # tq-007 UDP information collection
 2 | 
 3 | A UDP client should `bind()` to a random port for every query sent to the same
 4 | server<sup>[1](#fn1)</sup> and should not reuse the same 5-tuple for retries.
 5 | It should enable `IP_RECVERR` and `IP_RECVTTL` to collect more metadata.
 6 | 
 7 | By using a random UDP source port:
 8 | - allows us to distinguish ICMP errors delivered via `IP_RECVERR` to `MSG_ERRQUEUE` across queries
 9 | - accounts for ECMP-like<sup>[2](#fn2)</sup> broken paths
10 | - [BCP152](https://tools.ietf.org/html/bcp152#section-6.1) suggests this to prevent blind DNS/UDP spoofing
11 | 
12 | <a name="fn1">1</a>: unless doing traceroutes or other alike measurements that
13 | SHOULD preserve path like paris-traceroute does
14 | 
15 | <a name="fn2">2</a>: that accounts for unlikely but possible sources of DNS
16 | timeouts: when the route to the resolver is load-balanced using 5-tuple at any
17 | stage of the network path: ECMP routes, different queues of NIC, etc.
18 | 


--------------------------------------------------------------------------------
/techniques/tq-008-DNS-delegation-chain-from-root.md:
--------------------------------------------------------------------------------
 1 | # tq-008 DNS delegation chain from root
 2 | 
 3 | When a domain is suspected to be censored by a resolver it is interesting to check
 4 | if the delegation chain for the domain works and if the client starts from the DNS
 5 | root. It also allows verification that a NS record does not point to a CNAME Resource
 6 | Records that breaks the bind software.
 7 | 
 8 | TBD: it’s unclear if the client should try doing both [QNAME minimisation (RFC7816)](https://tools.ietf.org/html/rfc7816)
 9 | and not doing it. Different results may also highlight the presence of a network
10 | anomaly. But [doing it according to the letter of RFC may be problematic](https://ripe72.ripe.net/presentations/120-unbound_qnamemin_ripe72.pdf),
11 | so data format should be verbose enough to capture what was actually done.
12 | 
13 | TBD: it’s unclear if every A/AAAA for NS records should be resolved from root as well. Probably, they should be.
14 | 
15 | TBD: it’s unclear if DNSSEC should be validated. Probably, it should be, but in non-fatal mode following NS-points-to-CNAME logic.
16 | 
17 | ## Examples
18 | 
19 | - some israeli authoritative DNS servers were dropping requests from Egypt, so domains appeared to be censored by the egyptian resolver while that was actually not the case
20 | - [delegation misconfiguration for pernambuco.com](https://ooni.torproject.org/post/not-quite-network-censorship/) (NS pointing to CNAME) was not a censorship incident
21 | - AS41843, Russia: sending a query for `rutracker.org` without QNAME minification triggers injected response when the query is sent to root servers
22 | 


--------------------------------------------------------------------------------
/techniques/tq-009-subdomains-of-possibly-censored-domains.md:
--------------------------------------------------------------------------------
 1 | # tq-009 Subdomains of possibly-censored domains
 2 | 
 3 | When a domain is suspected to be censored by a resolver (returns unexpected error
 4 | or unexpected RR for a query) it is interesting to check well-known and random
 5 | subdomains of this domain and compare those measurements to control. They may
 6 | present same values / errors as a parent domain due to the following reasons:
 7 | 
 8 | - NSes for the domain may be unreachable from the recursive resolver, so `SERVFAIL`
 9 | - domain may have a delegation or DNSSEC misconfiguration, so `SERVFAIL`
10 | - whole domain may be censored via DNS
11 | - domain may have an actual “star” record (or have dynamically generated records)
12 | 
13 | ## Examples
14 | - some israeli authoritative DNS servers were dropping requests from Egypt
15 | - [delegation misconfiguration for pernambuco.com](https://ooni.torproject.org/post/not-quite-network-censorship/) (NS pointing to CNAME)
16 | - AS61173, Iran — woohoo.torproject.org points to 10.10.34.36
17 | - AS41843, Russia — woo.hoo.rutracker.org points to 5.3.3.17
18 | - *.*.*.*.livejournal.com and *.*.*.*.blogspot.com are CNAMEs to same load balancer
19 | 


--------------------------------------------------------------------------------
/techniques/tq-010-SOAs-and-NSes-for-possibly-censored-domain.md:
--------------------------------------------------------------------------------
 1 | # tq-010 SOAs and NSes for possibly-censored domain
 2 | 
 3 | When a domain is suspected to be censored by a resolver it is interesting to
 4 | capture the resolver’s opinion on NS and SOA records for the domain and every
 5 | parent domain up to TLD<sup>[1](#fn1)</sup>. E.g. login.vk.com needs SOA and NS requests for at least
 6 | login.vk.com and vk.com.
 7 | 
 8 | <a name="fn1">1</a>: It may be easier to query all the domains up to the root as TLD definition is non trivial. See [public suffix list](https://publicsuffix.org/) for inspiration.
 9 | 
10 | TBD: it’s unclear if SOA and NS records for some well-known subdomain are of use or not.
11 | 
12 | ## Examples
13 | - Italy, one ISP was transparent about login.vk.com blocking announcing the ISP in SOA Resource Record for vk.com
14 | - [Russia, Omsk](https://glam-evil.livejournal.com/565583.html), one ISP was blocking youtube.com with an `admin@` email address in SOA record
15 | 


--------------------------------------------------------------------------------
/techniques/tq-011-both-A-and-AAAA-DNS-queries.md:
--------------------------------------------------------------------------------
 1 | # tq-011 Both A and AAAA DNS queries
 2 | 
 3 | It is important to do both A and AAAA DNS queries for all the domains 
 4 | Probes resolve during testing, as sometimes a “censor” will add the second stack (ex. IPv6) for a
 5 | single-stack network endpoint to serve a blockpage.
 6 | 
 7 | ## Examples
 8 | 
 9 | - [AS41843, ER-Telecom, Omsk, Russia](https://github.com/ooni/probe/issues/647#issuecomment-275999682) makes “grani.ru” dual-stack with “5.3.3.17” and “2a02:2698:a002:1::3:17” while it’s “normally” single-stacked with 95.211.178.194
10 | 


--------------------------------------------------------------------------------
/techniques/tq-012-traceroute.md:
--------------------------------------------------------------------------------
 1 | # tq-012 traceroute
 2 | 
 3 | When the IP or IP:Port tuple is suspected to be blocked it is interesting to collect traceroutes to the following endpoints:
 4 | - vary the host within the same /24 subnet keeping port and protocol the same
 5 | - vary the host within a different /24 of the same AS keeping port and protocol the same
 6 | - vary the port amongst the set of well-known<sup>[1](#fn1)</sup> ports keeping the protocol and IP the same
 7 | - vary the port amongst the set of possibly-blacklisted ports (25, 135-139, 445, etc.) keeping the protocol and IP the same
 8 | - vary the protocol comparing TCP to UDP and ICMP
 9 | - vary the network path for UDP<sup>[2](#fn2)</sup> to account for possible ECMP routes
10 | - test a control vantage point<sup>[3](#fn3)</sup> to ensure that the network does not block all the traceroutes
11 | - test a non-routable vantage point to find closest router without default route (likely, having a full view)
12 | 
13 | Traceroutes should capture PTR records for routers generating ICMP errors as
14 | some routers may have RFC1918 IP addresses (so, global PTR record does not make
15 | sense for them and can’t be resolved on ingestion or later).
16 | 
17 | ## (TBD) Privacy considerations
18 | 
19 | - likely we don’t want to store PTR record for the very first hop if it’s RFC1918 address, some OpenWRT routers set it to their hostname and it may have user-supplied PII
20 | - maybe we don’t want to store exact IP address for the very first hop as well
21 | - maybe we don’t want to scrub anything if the user has set their privacy settings to share their IP address
22 | - we likely want to store the IP addresses and PTR record for all the path, that’s valuable information: see APN-Proxy-after-DPI in Uganda, see Tor blocking in Venezuela, see PTRs like “filter-gw.transtelecom...” and “censor-02.obit...” in Russia
23 | - we may want to ask the user to share their IP address if we detect that some case can't properly be bisected on the probe without that information
24 | - we probably don't want to round IPs of whole path down to AS numbers as it kills all PTRs
25 | 
26 | <a name="fn1">1</a>: it may be the set of 80, 443, 22, 110, 143 or other `Safe_ports` from squid, BUT comparing 80 to 443 is not enough. Also, `mtr` uses port 80 by default for TCP.
27 | 
28 | <a name="fn2">2</a>: it’s impossible to do that for TCP without root; there is no ECMP for ICMP
29 | 
30 | <a name="fn3">3</a>: it’s TBD if it should be RIPE Atlas Anchor with well-known network location or some anycasted service like DNS Root
31 | 
32 | ## Examples
33 | - AS8997, Russia blocks www.imperialviolet.org having address 159.203.111.115 due to partial Amazon ban. ICMP traceroute reaches destination, UDP traceroute reaches destination and shows ECMP in action, TCP traceroutes to ports 80 or 443 stop at AS1299 (≈same teleco, different AS), TCP traceroutes to random port and port 22 (ssh) both reach destination
34 | - see proxy notes in report on [Uganda Social Media Tax](https://ooni.torproject.org/post/uganda-social-media-tax/), see also how 443/https and 25/smtp traceroutes differ
35 | - [AS8048, CANTV, Venezuela blocked tor](https://ooni.torproject.org/post/venezuela-internet-censorship/#testing) by IP:Port of ORs on reverse path, “forward” traceroute from the client could not capture that
36 | - see [Leonid’s talk at Chaos Constructions 2017](http://darkk.net.ru/garbage/RIPE-Atlas-OONI-and-CC2017.pdf) for PTR record samples
37 | 


--------------------------------------------------------------------------------
/techniques/tq-013-traceroute-with-payload.md:
--------------------------------------------------------------------------------
 1 | # tq-013 traceroute with payload
 2 | 
 3 | If a TCP<sup>[1](#fn1)</sup> or UDP<sup>[2](#fn2)</sup> request triggers an
 4 | unexpected response it is interesting to do a traceroute with that payload to
 5 | understand if we can point to a network location possibly responsible for the
 6 | unexpected response. It is also interesting to compare the result of the
 7 | experiment to an ordinary traceroute without payload. A UDP experiment should
 8 | account for multi-path and perform several experiments following the logic of
 9 | paris-traceroute<sup>[3](#fn3)</sup>. A TCP experiment can’t do that
10 | without root as it’s impossible to stick to the path (reuse 5-tuple) within
11 | reasonable timeframe as it requires to wait for `TIME_WAIT` timeout (order of
12 | minutes) before performing the next experiment.
13 | 
14 | <a name="fn1">1</a>: DNS/TCP, TLS Client Hello, Tor/TLS, HTTP GET, OpenVPN/TCP and so on
15 | 
16 | <a name="fn2">2</a>: DNS/UDP, OpenVPN/UDP
17 | 
18 | <a name="fn3">3</a>: TBD: does dublin-traceroute provide any useful extension above paris-traceroute?
19 | 
20 | TBD: does `IP_RECVERR` + `IP_TTL` provide enough information for TCP sockets
21 | 


--------------------------------------------------------------------------------
/techniques/tq-014-TCP-injection-blocking-with-BPF.md:
--------------------------------------------------------------------------------
 1 | # tq-014 TCP injection blocking with BPF
 2 | 
 3 | If a TCP request triggers a "suspected-to-be-injected response" (RST packet, TLS
 4 | Error, HTTP redirect not matching a control) it is interesting to repeat the same
 5 | measurement by blocking the "supected-to-be-injected response" with a BPF filter<sup>[1](#fn1)</sup> and
 6 | observing if the latency of an error or response changes. The filter can be made to
 7 | match packet data, TCP and IP headers.
 8 | 
 9 | The test may uncover “single-sided on-path injector” if injection blocking
10 | leads to “normal” communication.
11 | 
12 | That is specific to TCP as UDP can wait for another reply to come and both
13 | replies will be delivered to the same socket, UDP does not need special
14 | machinery as it has no L4 state (no connection, no “stream” of bytes to
15 | overwrite). Although, UDP has L3 state (like PMTU), but we haven't 
16 | observed PMTU being used for filtering purposes (yet).
17 | 
18 | It is also possible to attach and detach a drop-all BPF filter using a timer and
19 | try to win the race (drop the injected packets within time window) without
20 | generating BPF code to match content dynamically. It MAY be beneficial to
21 | follow the “race” method when an injected response tries to circumvent the filter
22 | splitting redirect into different TCP segments or IP fragments. We
23 | hope that filters are not going to play _that_ sort of cat-and-mouse game
24 | against an attempt to increase transparency of network filters.
25 | 
26 | See the PoC implementation at [github.com/darkk/rstlss](https://github.com/darkk/rstlss).
27 | 
28 | <a name="fn1">1</a>: `SO_ATTACH_BPF` is available to non-privileged users at least on Linux and Android
29 | 


--------------------------------------------------------------------------------
/techniques/tq-015-packet-headers-exfiltration-with-BPF.md:
--------------------------------------------------------------------------------
 1 | # tq-015 Packet headers exfiltration with BPF
 2 | 
 3 | If UDP or TCP injection can be reliably blocked with a BPF and if the header values
 4 | are static it’s possible to exfiltrate the values through a series of tests
 5 | bisecting binary values for those headers in BPF<sup>[1](#fn1)</sup> filters.
 6 | IPv4 has a 8-bit TTL<sup>[2](#fn2)</sup> and a 16-bit Fragment ID, IPv6 has a Hop
 7 | Limit<sup>[2](#fn2)</sup> (modern pseudonym of TTL).
 8 | 
 9 | TTL exfiltration tricks are not needed for UDP as it has `IP_RECVTTL`.
10 | 
11 | TBD: is IPv6 flow label a useful value? Any IPv6 EHs? What TCP packet bits are useful? Window size? Presence of ACK in RST? TCP Options?
12 | 
13 | TBD: is it possible to exfiltrate the presence of a RST packet _ignored_ by the TCP stack?
14 | 
15 | <a name="fn1">1</a>: unfortunately, eBPF that is [so useful for metadata exfiltration](https://blog.cloudflare.com/epbf_sockets_hop_distance/) is [whitelisted only for the Android System, not for applications](https://github.com/aosp-mirror/platform_bionic/blob/android-9.0.0_r12/libc/SECCOMP_WHITELIST_SYSTEM.TXT)
16 | 
17 | <a name="fn2">2</a>: TTL in an incoming packet is different from the hop distance gathered via a traceroute. Also, “static” TTL may vary a bit due to possible multi-path routes from injecting server.
18 | 
19 | ## Examples
20 | - AS6697, Beltelecom, [Belarus blocks tor](https://ooni.torproject.org/post/belarus-fries-onion/) injecting TCP RST
21 | - Egypt had equipment injecting TCP RST with [static IP Fragment IDs](https://ooni.torproject.org/post/egypt-network-interference/#attempts-to-block-tor)
22 | 


--------------------------------------------------------------------------------
/techniques/tq-016-request-to-blackhole.md:
--------------------------------------------------------------------------------
 1 | # tq-016 Request to blackhole
 2 | 
 3 | If a TCP or UDP request triggers an unexpected response it is interesting to
 4 | send that request to an endpoint that is not expected to reply to that request and/or
 5 | protocol. It may be interesting to preserve the destination port.
 6 | 
 7 | E.g. sending TLS ClientHello to 8.8.8.8:53 TCP and see if it gets a “censored” reply.
 8 | 
 9 | E.g. sending OpenVPN/UDP hello to the IP address that is routable but acts as blackhole.
10 | 
11 | E.g. sending DNS/UDP request to the IP address that is not routed in global BGP table.
12 | 


--------------------------------------------------------------------------------
/techniques/tq-017-request-to-discard-test-helper.md:
--------------------------------------------------------------------------------
 1 | # tq-017 Request to discard test helper
 2 | 
 3 | If a TCP or UDP request triggers an unexpected response it is interesting to send that request to a test helper acting as a discard or dummy server. An unexpected reply highlights possible internet-wide protocol interception. The test helper can do following tricks:
 4 | 
 5 | - capture TCP/IP metadata<sup>[1](#fn1)</sup> of injected RST or ICMP Unreachable
 6 | - request capture to check for the mere presence of the request<sup>[2](#fn2)</sup> with out of band channel<sup>[3](#fn3)</sup>
 7 | - observe RST injection to distinguish “two-sided” injector from “one-sided”
 8 | - block RST injection together with the client to distinguish “on-path” filter from “in-path”
 9 | - reverse traceroute to the client
10 | - parasitic reverse traceroute with SYN-ACK in case of TCP
11 | - parasitic reverse traceroute with the reply payload
12 | 
13 | <a name="fn1">1</a>: TTL may be the same as the client's or a different one, IP ID, bad Seq/Ack numbers, TCP Window value and so on
14 | 
15 | <a name="fn2">2</a>: the request may be dropped by in-path filter, but on-path filter can't do it
16 | 
17 | <a name="fn3">3</a>: speaking directly to the test-helper may be bad idea when IP is blacklisted for a while after "triggering" the filter
18 | 
19 | ## Example
20 | 
21 | - [Iran blocked www.instagram.com](http://www.instagram.com), parasitic reverse traceroute to the client was different from parasitic reverse traceroute with CommonName=instagram.com ServerCertificate payload
22 | - [AS8048, CANTV, Venezuela blocked tor](https://ooni.torproject.org/post/venezuela-internet-censorship/#testing) by IP:Port of ORs on reverse path, reverse traceroute clearly indicated that
23 | - Rostelecom was banning IP:Port for ~two hours after [MTProto-like handshake](https://github.com/darkk/poormansmtproto/)
24 | - Yota was shaping some connections to ~32 kbit/s with "stateful" shaper that has kept policy for a while even when different service was brought up on that IP:Port (TBD: href to the blog post when published)
25 | 


--------------------------------------------------------------------------------
/techniques/tq-018-request-to-dummy-proxy-test-helper.md:
--------------------------------------------------------------------------------
 1 | # tq-018 Request to dummy proxy test helper
 2 | 
 3 | When we suspect an IP address or domain name to be censored, we can do a
 4 | request to a proxy-like test-helper and see if the request is being blocked. See “request
 5 | to discard test helper” for possible test-helper capabilities. At least the
 6 | following protocols are interesting to test:
 7 | 
 8 | - HTTP proxy call with domain name in `Host` header
 9 | - CONNECT to domain name
10 | - Socks5 connection to domain name
11 | - CONNECT to IP address 
12 | - Socks5 connection to IP address
13 | 
14 | A TCP request triggering a suspected-to-be-injected response should follow if
15 | the client gets a reply from the test-helper. The test helper always gives positive
16 | reply, but never actually relays any traffic. Test helpers should pass “proof of
17 | genuine reply” in headers so a client can distinguish a transparent proxy
18 | intercepting requests to proxies from a genuine dummy proxy.
19 | 
20 | That is slightly different from the “request to discard test helper” technique as
21 | the proxy protocol may be different from a “triggering request”, e.g. it’s possible
22 | to do OpenVPN/TCP hello over CONNECT.
23 | 


--------------------------------------------------------------------------------
/techniques/tq-019-prepend-and-append-to-possibly-censored-domain.md:
--------------------------------------------------------------------------------
 1 | # tq-019 Prepend and append to possibly-censored domain
 2 | 
 3 | When we suspect a domain name to be censored in some protocol request (DNS,
 4 | HTTP, TLS), it is interesting to see if the same protocol request to the same endpoint
 5 | returns a different error when we prepend a possibly-censored domain to some
 6 | nonexistent domain name. It is also interesting to check well-known and random
 7 | subdomains of the possibly-censored domain.
 8 | 
 9 | The logic is to check if the block of evil.co(sic!) observes the same pattern as
10 | **www.**evil.co, **we**evil.co, evil.co**m**, evil.co**.tls-chan.net** and even
11 | **garbag**evil.co**.tls-chan.net**.
12 | 
13 | DNS subdomains (like www.evil.org) are better described in
14 | [tq-009 subdomains of possibly censored domains](./tq-009-subdomains-of-possibly-censored-domains.md).
15 | 


--------------------------------------------------------------------------------
/techniques/tq-020-0x20-hack-for-possibly-censored-domain.md:
--------------------------------------------------------------------------------
 1 | # tq-020 0x20-hack for possibly-censored domain
 2 | 
 3 | When we suspect a domain name to be censored in some protocol request (DNS,
 4 | HTTP, TLS), it is interesting to see if the same protocol request to the same endpoint
 5 | returns different error when we apply the 0x20-hack to the domain.
 6 | 
 7 | TBD: verify that 0x20-hack is not rejected by HTTP and TLS servers.
 8 | 
 9 | ## Examples
10 | - [AS41843, ER-Telecom, Omsk, Russia](https://github.com/ooni/probe/issues/647#issuecomment-275999682) 0x20-hack works for TLS
11 | 


--------------------------------------------------------------------------------
/techniques/tq-021-TLS-cert-recording.md:
--------------------------------------------------------------------------------
 1 | # tq-021 TLS cert recording
 2 | 
 3 | Every TLS connection observing certificate validation anomalies should record
 4 | the full certificate as injection of self-signed certificate is another method for
 5 | transparency of TLS-based protocols censorship. Failure to validate a certificate
 6 | should be recorded, but should not block the connection as some ISPs serve
 7 | blockpages for HTTPS URLs that way.
 8 | 
 9 | OONI Probe may prefer to ship it’s own CA bundle to avoid trusting “enterprise”
10 | CAs in “enterprise” networks on BYOD devices running OONI Probe.
11 | 
12 | TLS1.3 will require certificate being exported from TLS library as TLS1.3 encrypts it on the wire.
13 | 
14 | TBD: it’s unclear if TLS certificates recording should be always-on feature.
15 | 
16 | TBD: it's unclear if we should or should not submit to [CertificateTransparency](https://www.certificate-transparency.org/how-ct-works)
17 | logs without understanding if the issuing CA is
18 | [Locally-trusted “enterprise” CA](https://chromium.googlesource.com/chromium/src/+/master/net/docs/certificate-transparency.md#Certificate-Transparency-for-Enterprises)
19 | or not. It may be non trivial to implement while shipping our own CA bundle.
20 | 


--------------------------------------------------------------------------------
/techniques/tq-022-test-helper-L7-checks-for-possibly-censored-IPs.md:
--------------------------------------------------------------------------------
1 | # tq-022 Test-helper L7 checks for possibly-censored IPs
2 | 
3 | If probe gets a set of IPs for the origin servers that it can’t connect to, the
4 | test-helper should do a L5/L7-check for these IPs if they serve content for the
5 | domain. The HTTP protocol can do a GET request and fetch the page title / hashsum /
6 | simhash, TLS can issue a ClientHello, fetch the Server Certificate and validate it.
7 | 


--------------------------------------------------------------------------------
/techniques/tq-023-OpenVPN-Control-Hard-Reset.md:
--------------------------------------------------------------------------------
 1 | # tq-023 OpenVPN Control Hard Reset
 2 | 
 3 | It’s possible to use `P_CONTROL_HARD_RESET_CLIENT_V2` packet as “triggering
 4 | request” as there are some DPIs filtering OpenVPN traffic using those packets.
 5 | That is what was observed for both OpenVPN/TCP and OpenVPN/UDP. That is by no means a
 6 | comprehensive OpenVPN test.
 7 | 
 8 | TBD: what does Egypt do? They do something.
 9 | 
10 | ## Example
11 | - OpenVPN notes in report on [Uganda Social Media Tax](https://ooni.torproject.org/post/uganda-social-media-tax/)
12 | 


--------------------------------------------------------------------------------
/techniques/tq-024-TCP-segmentation.md:
--------------------------------------------------------------------------------
 1 | # tq-024 TCP segmentation
 2 | 
 3 | If a TCP request triggers an unexpected response it is interesting to see if the unexpected response persists if the request is segmented into several distinct packets. There are several segmentation strategies:
 4 | 
 5 | - Split the request in the middle of the “badword” (domain, URL, binary fingerprint)
 6 | - Split the request in the beginning of the request to confuse the DPI protocol detection and prevent a reassembly attempt
 7 | - Send the request byte-by-byte with some [packet pacing](https://en.wikipedia.org/wiki/TCP_pacing), maybe considering [LSO/TSO/GSO](https://en.wikipedia.org/wiki/Large_send_offload) 
 8 | - Send the request byte-by-byte waiting for bytes to be ACKed
 9 | - HTTP-specific: some DPI boxes are erratically triggered as soon as they see “Host: censored.org” without seeing “\r\n” (so the request can become “Host: censored.org.ru”)
10 | 
11 | Nagle should be disabled to prevent the kernel from gluing packets together (see TCP_NODELAY).
12 | 
13 | TBD: conduct a series of experiments and collect examples to define reasonable segmentation rules (all of them?)
14 | 
15 | TBD: is `TCP_MAXSEG` useful for this purpose? It may affect segment sizes in both directions as it may be used to announce small MSS to peer.
16 | 
17 | ## Example
18 | - [GoodbyeDPI](https://github.com/ValdikSS/goodbyedpi) and [brdgrd](https://github.com/NullHypothesis/brdgrd) work at different edges of the network with some non-zero success rate for HTTP, HTTPS, Tor/TLS, OpenVPN/TCP
19 | - TBD: Philipp Winter has paper on the bisection search method for discovering the exact fingerprint the censor is using.
20 | 


--------------------------------------------------------------------------------
/techniques/tq-025-saving-bandwidth-in-repeated-HTTP-requests.md:
--------------------------------------------------------------------------------
 1 | # tq-025 Saving bandwidth in repeated HTTP requests
 2 | 
 3 | HTTP tests have a very high-volume response compared to the request size. For example
 4 | response bodies for censored webpages may weight up to 4MB. If the request is repeated hundreds of times
 5 | due to traceroutes, TCP Segmentation, various HTTP camouflage and other tricks
 6 | it can significantantly increase the bill for traffic without a good reason. OONI Probe
 7 | should terminate the connection as soon as it is reasonably sure that the HTTP
 8 | body is not going to differ from the one that it has already recorded.
 9 | 
10 | A limited TCP initcwnd (initial congestion window) at servers ensures that OONI
11 | Probe has one RTT to decide if it wants to continue fetching the data.
12 | 
13 | OONI Probe may also set low `SO_RCVBUF` or `TCP_WINDOW_CLAMP` to do TCP backpressure.
14 | 
15 | TBD: is it possible to drain the kernel buffer without sending an ACK with some
16 | non-zero window? The data that is already in the buffer should probably make its
17 | way into the measurement data.
18 | 


--------------------------------------------------------------------------------
/techniques/tq-026-SNI-free-and-fake-SNI-TLS-ClientHello.md:
--------------------------------------------------------------------------------
 1 | # tq-026 SNI-free and fake-SNI TLS ClientHello
 2 | 
 3 | If a TLS request triggers an unexpected response it is interesting to check the
 4 | role of the [SNI field](https://tools.ietf.org/html/rfc3546#section-3.1) in
 5 | triggering the behavior. SNI may be both “expectedly” absent (e.g.
 6 | https://1.1.1.1) and “expectedly” present (TLS1.2, TLS1.3 without
 7 | [ESNI](https://blog.cloudflare.com/encrypted-sni/)). The outcome of the following
 8 | tricks may be inspected:
 9 | 
10 | - absent SNI may be filled with a related<sup>[1](#fn1)</sup> domain (e.g. “one.one.one.one” for “1.1.1.1”)
11 | - absent SNI may be filled with an unrelated domain (e.g. “example.org”)
12 | - present SNI may be dropped
13 | - present SNI may be replaced with a related<sup>[1](#fn1)</sup> domain that also has a risk to be censored
14 | - present SNI may be replaced with an unrelated domain, but control measurement should check for “expected” server reply in this case
15 | 
16 | <a name="fn1">1</a>: a domain “related” to the IP address may be extracted from a dataset like the [Rapid7 Forward DNS](https://opendata.rapid7.com/sonar.fdns_v2/), some data may also be extracted from the TLS certificate presented via a SNI-free query
17 | 
18 | ## Examples
19 | - AS8997, Russia blocks requests to https://1.1.1.1 without SNI field, but requests with SNI=one.one.one.one pass
20 | - AS8997, Russia blocks requests to https://rutracker.org when SNI field exists, but requests to the same endpoint without an SNI field pass
21 | - [Iran blocked www.instagram.com](https://ooni.torproject.org/post/2018-iran-protests-pt2/) using information both from SNI and unencrypted Server Certificate, these two cases have different timing patterns
22 | 


--------------------------------------------------------------------------------
/techniques/tq-027-stuffed-TLS-ClientHello.md:
--------------------------------------------------------------------------------
 1 | # tq-027 Stuffed TLS ClientHello
 2 | 
 3 | If a TLS request triggers an unexpected response it is interesting to try
 4 | “stuffing” tricks to check if the SNI field placed in a separate packet triggers a
 5 | different behavior. Possible fields for "stuffing" are:
 6 | 
 7 | - Session ID, 256 bytes
 8 | - Cipher Suites, ~16 Kbytes
 9 | - Compression Methods, 256 bytes
10 | - Extensions (especially Padding), ~16 Kbytes
11 | 
12 | The whole ClientHello packet is limited to 16 Kbytes.
13 | 
14 | This technique is different from [tq-024 TCP Segmentation](./tq-024-TCP-segmentation.md)
15 | as it does not produce undersized packets.
16 | 


--------------------------------------------------------------------------------
/techniques/tq-028-stuffed-HTTP-camouflage.md:
--------------------------------------------------------------------------------
 1 | # tq-028 Stuffed HTTP camouflage
 2 | 
 3 | If a HTTP request triggers an unexpected response it is interesting to try
 4 | several “camouflage stuffing” tricks to check if they trigger different
 5 | behavior:
 6 | 
 7 | - append the TAB (“\t”) character to the Host header before “\r\n”
 8 | - prepend a LF (“\n”) to `GET` in the request line
 9 | - append a DOT (“.”) to the domain in the Host header
10 | - extra an SP (“ “) after `GET` in the request line
11 | - newline-space (“\r\n ”) before the domain in the Host header
12 | - “host” or “hoSt” header instead of “Host” header
13 | - dropping the usual space after the colon in “Host:example.com”
14 | - adding a long “X-Stuff: AAA...AAA” header to push the “Host” header into another packet
15 | - “\n” instead of “\r\n”
16 | - `GET / HTTP/1.1\r\nHost: blocked.com\r\n\r\nHost: allowed.com` from _Yadav-Sinha-Gosain_ paper
17 | - …
18 | 
19 | These tricks may reveal the existence of several different DPI boxes inspecting the
20 | traffic one after another having different HTTP parser flaws.
21 | 
22 | The list of tricks is incomplete and, moreover, may break some HTTP servers.
23 | E.g. “Host:\r\n<SP><SP>example.com\r\n” is valid, but lighttpd-1.4 fails to
24 | parse it and throws “400 Bad Request” while Django built-in HTTP server just
25 | crashes.
26 | 
27 | ## Examples
28 | - [Turkmenistan](https://ooni.torproject.org/post/tab-tab-come-in/) did not handle appended \t and prepended \n properly
29 | - Is there some interesting data from [http_filtering_bypassing.py](https://github.com/ooni/probe-legacy/blob/master/ooni/nettests/experimental/http_filtering_bypassing.py) ?
30 | - Some of these tricks are claimed to be successful by [GoodbyeDPI](https://github.com/ValdikSS/GoodbyeDPI)
31 | - Yadav-Sinha-Gosain &mdash; [Where The Light Gets In: Analyzing Web Censorship Mechanisms in India](https://dl.acm.org/citation.cfm?id=3278555)
32 | 


--------------------------------------------------------------------------------
/techniques/tq-029-staying-alive-HTTP-camouflage.md:
--------------------------------------------------------------------------------
 1 | # tq-029 Staying-alive HTTP camouflage
 2 | 
 3 | If a HTTP request triggers an unexpected response it is interesting to see if
 4 | it’ll trigger a response if it follows a “good” request in the same Keep-Alive
 5 | connection. It’s non-trivial to define a “good” request:
 6 | http://censored.org/robots.txt may still be filtered because of the domain and
 7 | webserver MAY drop connection after the /robots.txt request to the domain that is
 8 | not served, but [Apache and Chrome are okay](https://stackoverflow.com/questions/42717719/http-keep-alive-to-a-different-host) with the trick.
 9 | 
10 | This is related to [tq-018 Request to dummy proxy](./tq-018-request-to-dummy-proxy-test-helper.md).
11 | 


--------------------------------------------------------------------------------
/techniques/tq-030-fingerprinting-requests-for-transparent-proxies.md:
--------------------------------------------------------------------------------
1 | # tq-030 Fingerprinting requests for transparent proxies
2 | 
3 | If we suspect that a transparent proxy is handling a DNS or HTTP request on behalf of the origin server, then we can send several fingerprinting requests to get more knowledge about the specific proxy implementation:
4 | 
5 | - HTTP Squid has “[GET cache_object://localhost/info HTTP/1.1\r\n](https://github.com/ooni/probe-legacy/blob/master/ooni/nettests/experimental/http_trix.py)” request (single “\r\n”!)
6 | - DNS servers may answer `CHAOS` `TXT` queries for “version.bind”, “hostname.bind”, “id.server” and “version.server”, [RIPE Atlas mass-collects that info](https://atlas.ripe.net/docs/built-in/)
7 | 


--------------------------------------------------------------------------------
/techniques/tq-031-attempt-ecn.md:
--------------------------------------------------------------------------------
 1 | # tq-031 Attempt use of Explicit Congestion Notification
 2 | 
 3 | When a transparent HTTP or TLS proxy is in the path between the probe and the
 4 | target server, an attempt to use explicit congestion notification (ECN) may
 5 | reveal its presence in following ways:
 6 | 
 7 | 1. The connection fails due to unknown TCP flags being set
 8 | 2. The connection fails due to unknown IP flags being set
 9 | 3. ECN is not negotiated where expected, or is negotiated where it is not
10 |    expected
11 | 
12 | ## Methodology
13 | 
14 | 1. Fetch a webpage from a target server using either HTTP or HTTPS, negotiating
15 |    ECN.
16 | 3. If the first connection failed, retry the request without attempting to
17 |    negotiate ECN to confirm that this was responsible for the connectivity
18 |    failure, and not just that the host was down
19 | 
20 | ## Implementation issues
21 | 
22 | - To use ECN on Linux, it is necessary to enable a sysctl switch in the kernel
23 |   which would typically require root access. This also affects *all* new
24 |   connections and so this test should not be scheduled to run in parallel with
25 |   other tests. A userland TCP stack would be able to implement per-connection
26 |   ECN logic, but would require raw sockets.
27 | - To confirm that ECN usage was successful it is necessary to perform a packet
28 |   capture, while connectivity failures can be confirmed from the application
29 |   layer feedback alone
30 | - Analysis of raw packets, and also setting the flags for TCP negotiation,
31 |   could be performed using an eBPF program which could be attached from
32 |   userspace. Some information on this can be found in [an LWN
33 |   article](https://lwn.net/Articles/740157/).
34 | - It would be possible to attempt the use of multiple protocol features on the
35 |   first connection, falling back to trying each individually only if the first
36 |   connection fails. In some rare cases the use of a protocol feature can brick
37 |   the CPE or upstream middleboxes but this should either happen immediately or
38 |   never happen.
39 | 
40 | ## Examples
41 | 
42 | - Censorship infrastructure was discovered using this technique in the EE
43 |   mobile operator network in the United Kingdom. See: I. R. Learmonth, A. Lutu,
44 |   G. Fairhurst, D. Ros and Ö. Alay, "[Path transparency measurements from the
45 |   mobile edge with
46 |   PATHspider](https://iain.learmonth.me/stuff/pubs/PATHspiderMobile2017.pdf),"
47 |   *2017 Network Traffic Measurement and Analysis Conference (TMA)*, Dublin, 2017,
48 |   pp. 1-6. doi:10.23919/TMA.2017.8002922
49 | 
50 | ## References
51 | 
52 | - [RFC3168: The Addition of Explicit Congestion Notification (ECN) to IP](https://tools.ietf.org/html/rfc3168)
53 | 
54 | ## Implementations
55 | 
56 | - An independent implementation exists for this technique in PATHspider's [ECN
57 |   plugin](https://pathspider.readthedocs.io/en/latest/plugins/ecn.html)
58 | 


--------------------------------------------------------------------------------
/techniques/tq-032-attempt-h2-upgrade.md:
--------------------------------------------------------------------------------
 1 | # tq-032 Attempt upgrade to H2
 2 | 
 3 | When a transparent HTTP proxy is in the path between the probe and the target
 4 | server, an attempt to upgrade to H2 may reveal its presence in one of the
 5 | following ways:
 6 | 
 7 | 1. The connection fails due to an unknown header being present
 8 | 2. The header is passed successfully to the server but the connection fails
 9 |    because the reply using h2c was not understood by the proxy
10 | 3. A server that is expected to negotiate h2 or h2c does not, or a server that
11 |    is not expected to negotiate h2 or h2c does
12 | 
13 | When a transparent TLS proxy is in the path, it may reveal its presence in
14 | one of the following ways:
15 | 
16 | 1. Stripping the ALPN option which would cause h2 to not be negotiated where
17 |    expected
18 | 2. The connection fails because the ALPN option is unknown
19 | 
20 | ## Methodology
21 | 
22 | 1. Fetch a webpage from a target server using either HTTP or HTTPS, with an H2
23 |    upgrade mechanism
24 | 2. If the connection fails, retry the request without the use of the upgrade
25 |    mechanism to confirm that this was responsible for the connectivity failure,
26 |    and not just that the host was down
27 | 
28 | ## Implementation issues
29 | 
30 | - It would be possible to attempt the use of multiple protocol features on the
31 |   first connection, falling back to trying each individually only if the first
32 |   connection fails. In some rare cases the use of a protocol feature can brick
33 |   the CPE or upstream middleboxes but this should either happen immediately or
34 |   never happen.
35 | 
36 | ## Examples
37 | 
38 | - None yet
39 | 
40 | ## References
41 | 
42 | - [Protocol upgrade mechanism](https://developer.mozilla.org/en-US/docs/Web/HTTP/Protocol_upgrade_mechanism) at MDN web docs
43 | - [RFC7540: Hypertext Transfer Protocol Version 2 (HTTP/2)](https://tools.ietf.org/html/rfc7540)
44 | 
45 | ## Implementations
46 | 
47 | - An independent implementation exists for this technique in PATHspider's [H2
48 |   plugin](https://pathspider.readthedocs.io/en/latest/plugins/h2.html)
49 | 


--------------------------------------------------------------------------------
/techniques/tq-033-attempt-tfo.md:
--------------------------------------------------------------------------------
 1 | # tq-033 Attempt use of TCP Fast Open
 2 | 
 3 | When a transparent HTTP or TLS proxy is in the path between the probe and the
 4 | target server, an attempt to use TCP fast open may reveal its presence in
 5 | following ways:
 6 | 
 7 | 1. The connection fails due to an unknown TCP option being present
 8 | 2. The second connection fails due to data being present on the TCP SYN packet
 9 | 3. TCP fast open is not negotiated where expected, or is negotiated where it is
10 |    not expected
11 | 
12 | ## Methodology
13 | 
14 | 1. Fetch a webpage from a target server using either HTTP or HTTPS, using TCP
15 |    fast open to establish a TFO cookie
16 | 2. If successful, fetch a webpage from the same server, again using TCP fast
17 |    open (this time will have data on the SYN)
18 | 3. If the first connection failed, retry the request without the use of the TCP
19 |    fast open to confirm that this was responsible for the connectivity failure,
20 |    and not just that the host was down
21 | 
22 | ## Implementation issues
23 | 
24 | - To confirm that TCP fast open usage was successful (negotiation and use of
25 |   TFO cookie) it is necessary to perform a packet capture, while failures
26 |   can be confirmed from the application layer feedback alone
27 | - Analysis of raw packets could be performed using an eBPF program which could
28 |   be attached from userspace. Some information on this can be found in [an LWN
29 |   article](https://lwn.net/Articles/740157/).
30 | - It would be possible to attempt the use of multiple protocol features on the
31 |   first connection, falling back to trying each individually only if the first
32 |   connection fails. In some rare cases the use of a protocol feature can brick
33 |   the CPE or upstream middleboxes but this should either happen immediately or
34 |   never happen.
35 | 
36 | ## Examples
37 | 
38 | - None yet for TCP fast open, although this technique uses similar principles
39 |   to discover middleboxes as presented in: I. R. Learmonth, A. Lutu, G.
40 |   Fairhurst, D. Ros and Ö. Alay, "[Path transparency measurements from the mobile
41 |   edge with
42 |   PATHspider](https://iain.learmonth.me/stuff/pubs/PATHspiderMobile2017.pdf),"
43 |   *2017 Network Traffic Measurement and Analysis Conference (TMA)*, Dublin, 2017,
44 |   pp. 1-6. doi:10.23919/TMA.2017.8002922
45 | 
46 | ## References
47 | 
48 | - [RFC7413: TCP Fast Open](https://tools.ietf.org/html/rfc7413)
49 | 
50 | ## Implementations
51 | 
52 | - An independent implementation exists for this technique in PATHspider's [TFO
53 |   plugin](https://pathspider.readthedocs.io/en/latest/plugins/tfo.html)
54 | 


--------------------------------------------------------------------------------
/techniques/tq-034-block-udp.md:
--------------------------------------------------------------------------------
  1 | # tq-034 UDP blocking and manipulation
  2 | 
  3 | UDP is one of the two most widely used data transport protocols.
  4 | Unfortunately, some ISPs or local network firewalls may decide to completely block UDP.
  5 | By definition this is against [network neutrality](https://en.wikipedia.org/wiki/Net_neutrality).
  6 | In addition, with the upcoming [HTTP/3](https://en.wikipedia.org/wiki/HTTP/3)
  7 | protocol being based on [QUIC protocol](https://quicwg.org/) which itself is
  8 | built on top of UDP, it is really important that UDP works.
  9 | 
 10 | ## Measurements
 11 | 
 12 | The questions of interest further to be described in this document are if
 13 | 
 14 | * UDP works on any random port,
 15 | * UDP works on port `443`,
 16 | * UDP datagrams were not modified,
 17 | * UDP datagrams were not duplicated.
 18 | 
 19 | ## Methodology
 20 | 
 21 | 1. Run a UDP server.
 22 | 2. Send UDP datagrams to the server.
 23 | 3. Server echoes received datagrams.
 24 | 4. Client validates the response.
 25 | 
 26 | NOTE, the tests MUST be excluded on port `53` which is used for DNS queries.
 27 | DNS by itself might be manipulated in different ways and deserves a separate
 28 | technique of its own. Hence is out of scope of this document.
 29 | 
 30 | ### UDP Server
 31 | 
 32 | The same UDP server MAY be used for all measurements. UDP server is expected to
 33 | be listening on
 34 | 
 35 | 1. multiple random ports from unprivileged port space - `1025..65535`. Say
 36 |    3 arbitrary chosen ports;
 37 | 2. port `443`. HTTP/3 will be using UDP port `443`. So it is important to
 38 |    specifically test, if this port is not blocked.
 39 | 
 40 | All the server SHOULD do is echo received datagrams to the client that sent
 41 | them.
 42 | 
 43 | ### UDP client
 44 | 
 45 | The UDP test flow is as follows
 46 | 
 47 | 1. For each port in `[443, rand_port1, rand_port2, rand_port3]` send multiple
 48 |    datagrams. One datagram is not enough because UDP datagrams might be
 49 |    naturally lost on their way to the destination.
 50 |    on their
 51 | 2. Generate random datagram of size <= 1400 bytes - no more than the usual
 52 |    [MTU](https://en.wikipedia.org/wiki/Maximum_transmission_unit#IP_MTUs_for_common_media)
 53 |    size. Each datagram must be random (unique), otherwise we won't be able to
 54 |    detect UDP datagram duplication.
 55 | 3. Send the generated datagram to the server. Store the hashes of sent data
 56 |    to specific endpoints (IP:port pair).
 57 | 4. Wait for up to 10 seconds for the response before declaring datagram as lost.
 58 | 5. In the meantime, keep receiving incoming responses and validate them by
 59 |    hashing the payload of the datagram:
 60 | 5.1. check if echoed datagram was already sent, if it was not, that means
 61 |      the tested network is modifying UDP datagrams;
 62 | 5.2. if it was, increase the count of received datagrams by its hash;
 63 | 6. Finally, traverse the collected data to form an appropriate measurement.
 64 |    The resulting measurement MUST always separate `443` from random ports, e.g.:
 65 | ```json
 66 | {
 67 |   "443": "timeout",
 68 |   "random": "ok",
 69 | }
 70 | ```
 71 | 
 72 | Pseudocode:
 73 | 
 74 | ```python
 75 | sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
 76 | # Keeps track of sent/received datagrams.
 77 | sent_to = {}
 78 | 
 79 | # Send multiple datagrams to different ports
 80 | for port in [443, rand_port1, rand_port2, rand_port3]:
 81 |     server_addr = (server_ip, port)
 82 |     sent_to[server_addr] = {}
 83 | 
 84 |     # Send 5 different datagrams to the same port
 85 |     for _ in range(5):
 86 |         data = rand_data(of_size=1400)
 87 | 
 88 |         sock.sendto(data, server_addr)
 89 |         # Initial result assigned to every request in case no response is
 90 |         # received.
 91 |         sent_to[server_addr][hash(data)] = Result.TIMEOUT
 92 | 
 93 | while not timeout(10 secs):
 94 |     (data, sender_addr) = sock.recvfrom()
 95 |     if hash(data) in sent_to[server_addr]:
 96 |         if sent_to[server_addr][hash(data)] == Result.TIMEOUT:
 97 |             sent_to[server_addr][hash(data)] = Result.RECEIVED
 98 |         else:
 99 |             sent_to[server_addr][hash(data)] = Result.DUPLICATE
100 |     else:
101 |         # We did not send this datagram to this endpoint
102 |         sent_to[server_addr][hash(data)] = Result.MODIFIED
103 | 
104 | # Traverse sent_to and format the result
105 | ```
106 | 


--------------------------------------------------------------------------------
/techniques/tq-034-request-dnssec-records.md:
--------------------------------------------------------------------------------
 1 | tq-034 Request DNSSEC-related records from resolver
 2 | ===================================================
 3 | 
 4 | When a network operator provides a DNS resolver for clients either explicitly
 5 | or through the use of injection or hijacking, it may also prevent the request
 6 | of DNSSEC-related records. These include DS, DNSKEY, NSEC, and RRSIG. This
 7 | would prevent the client's ability to verify records served by the operator's
 8 | resolver and leave the client vulnerable to spoofed records.
 9 | 
10 | A resolver may support DNSSEC while a middlebox that alters responses does not
11 | which would result in the signature not being valid.
12 | 
13 | Methodology
14 | -----------
15 | 
16 | 1. Attempt to validate the full chain of DNSSEC signatures back to the root
17 |    zone for a hostname that is known to have valid DNSSEC signatures set up
18 | 2. If it was possible to retrieve the full chain of signatures, validate them
19 | 
20 | Implementation issues
21 | ---------------------
22 | 
23 | This test would require performing a "stub" resolution, where the requests
24 | that would have been made to the authoritative servers would instead be
25 | made to the resolver.
26 | 
27 | Examples
28 | --------
29 | 
30 | - None as seen in the wild for deliberate censorship reasons
31 | - Older resolvers, especially appliances that may now be end-of-life, may not
32 |   have support for these records
33 | 
34 | References
35 | ----------
36 | 
37 | [RFC4034: Resource Records for the DNS Security Extensions](https://www.ietf.org/rfc/rfc4034.txt)
38 | 


--------------------------------------------------------------------------------
/techniques/tq-035-block-quic.md:
--------------------------------------------------------------------------------
 1 | # tq-035 Blocking QUIC
 2 | 
 3 | [QUIC](https://quicwg.org/) is a new transport protocol built on UDP. QUIC is
 4 | a stream based protocol that provides congestion control, connection
 5 | multiplexing, IP migration, data encryption, etc. In addition, it is expected
 6 | to improve human rights on the Internet<sup>[1](#fn1)</sup>.
 7 | HTTP is the fundamental Web protocol and QUIC is going to be used for the next
 8 | version of HTTP - [HTTP/3](https://en.wikipedia.org/wiki/HTTP/3).
 9 | To prepare for the future of the Web it is useful to understand
10 | how accessible QUIC is today. Unfortunately, there already exists discussions
11 | on blocking it among system administrators and ISPs <sup>[2](#fn2)</sup>
12 | <sup>[3](#fn3)</sup> <sup>[4](#fn4)</sup>.
13 | 
14 | ## Methodology
15 | 
16 | 1. Run a QUIC server listening on 443 and some random port(s).
17 | 2. Send some random data over QUIC to all ports.
18 | 3. Make the server simply echo the data.
19 | 4. Wait for the data from the server and check if it matches the originally
20 |    sent one.
21 | 
22 | ## References
23 | 
24 | 1. <a name="fn1">https://tools.ietf.org/html/draft-martini-hrpc-quichr-00</a>
25 | 2. <a name="fn2">https://www.reddit.com/r/paloaltonetworks/comments/6yqpjf/anyone_blocking_quic</a>
26 | 3. <a name="fn3">https://www.reddit.com/r/networking/comments/9wriid/http3quic_and_the_yawning_abysmal_division</a>
27 | 4. <a name="fn4">https://www.reddit.com/r/k12sysadmin/comments/9w9jdr/quic_protocol_to_block_or_not_to_block</a>
28 | 


--------------------------------------------------------------------------------
/techniques/tq-999-backlog.md:
--------------------------------------------------------------------------------
 1 | # tq-999 Backlog
 2 | 
 3 | Following techniques are out of scope of currently described techniques as they’re hard to ship to our current userbase.
 4 | 
 5 | These tricks require non-trivial network setup or elevated privileges on users’ system:
 6 | 
 7 | - IP fragmentation (both for UDP and TCP). It needs root, but it is fun for sure: http://www.dtic.mil/dtic/tr/fulltext/u2/a391565.pdf, https://monkey.org/~dugsong/fragroute/
 8 | - TCP reordering
 9 | - overlapping TCP segments
10 | - exfiltration of protocol metadata from packet capture
11 | 
12 | These tricks are considered too labor-intensive for us to properly implement currently:
13 | 
14 | - Good ESNI support. ESNI is just rolled out, censors’ move is unclear
15 | - Ancient SSL test to see if a middlebox “allows” a handshake with pre-historical version of TLS protocol unlike the “expected” server behind same IP address
16 | - TLS1.3 / TLS1.2 contrast to check if ServerCertificate triggers “censors” (TLS1.3 encrypts ServerCertificate on the wire)
17 | - [SNI Proxy test helper](https://github.com/dlundquist/sniproxy). While it's useful to have a test-helper on an "unrelated" IP address returning valid (proxied) or self-signed certificate for any domain we want, the usefulness of this data will be declining quite rapidly during TLS1.3 rollout in Browsers and Webservers as TLS1.3 encrypts ServerCertificate on the wire and needs no additional configuration unlike ESNI. Also, we’ve not seen (yet) a DPI doing both ServerCertificate-based filtering and Certificate chain validation, so that’s not a useful bit of data (yet).
18 | - Throttling detection for HTTP and HTTPS: one way to measure it is to find some web asset that has a reasonable size and measure the bandwidth with precise timing information while downloading that web asset, the baseline for bandwidth may be NDT
19 | - Analysis of collected TCP_INFO samples
20 | - Analysis of collected NDT and DASH samples
21 | - Detection of protocol-based blocking and throttling of UDP- and TCP-based VPNs using userspace TCP/IP stack (instead of tap/tun)
22 | - Detection of protocol-based blocking and throttling of UDP- and TCP-based VPNs using rootless Android helper application establishing a true VPN tunnel to a specific IPv6 subnet
23 | - Handling “stateful” filter that “learns” network endpoints (like one of ISPs in China blocking access to all websites after visiting “bad” URL, like one of Russian ISPs banning IP:Port temporary after MTProto-like handshake, like one of ISPs in Turkmenistan blocking residential connection for a while after an attempt to use VPN)
24 | 
25 | - EICAR request & [`Server: EICAR`](https://twitter.com/__phw/status/1039596771993776128) HTTP header are fun and may trigger antivirus middleboxes, but it’s unclear if those are middleboxes OONI is looking for
26 | - [ReQrypt](https://reqrypt.org/reqrypt.html)-like tricks passing requests out-of-band to see if reply is censored
27 | - all the stuff that [Netalyzr](https://trac.torproject.org/projects/tor/wiki/doc/OONI/CensorshipDetectionTools/Netalyzr) does
28 | 
29 | These tricks have unclear value, their value should be confirmed by examples (experiments):
30 | - DNS “whoami” test-helper responding with requester’s IP address to every A / AAAA / TXT(?) query. It may help to capture transparent DNS proxies. It’s unclear if it is **significantly** better than publicly available “o-o.myaddr.l.google.com”, “whoami.akamai.net” and “myip.opendns.com” available via resolver{1,2}.opendns.com.
31 | 


--------------------------------------------------------------------------------