├── README.md
├── config.sh
├── count_retransmissions.py
├── find_global_ipid
    ├── analyse_sequence.c
    └── ipid_prober.sh
├── get_relays.sh
├── log.sh
├── plotting
    ├── icons
    │   ├── destination_icon.png
    │   ├── hybrid_icon.png
    │   └── source_icon.png
    ├── plot_scan_data.py
    └── pygmaps.py
├── probe_host.sh
├── probing_wrapper.sh
├── rstscan.sh
├── synscan.sh
├── traceroute.sh
└── traceroute_host.sh


/README.md:
--------------------------------------------------------------------------------
 1 | Overview
 2 | ========
 3 | 
 4 | This repository contains a set of scripts which implement a number of TCP-based
 5 | network measurement tests.  These tests were written for a [research
 6 | project analysing the Great Firewall of
 7 | China](http://cs.unm.edu/~royaen/projects/gfw/).
 8 | In particular, the following tests are supported.
 9 | 
10 |  * TCP backlog scan which probes a Linux machine's SYN backlog in order to
11 |    learn how many half-open TCP connections it currently has.  This is
12 |    implemented by `synscan.sh` and `rstscan.sh`.
13 |  * Traceroute script which runs a number of traceroutes to a given host.  This
14 |    is implemented by `traceroute.sh` and `traceroute_host.sh`.
15 |  * All tests are wrapped by the script `probing_wrapper.sh` which invokes
16 |    `probe_host.sh`.
17 | 
18 | Feedback
19 | ========
20 | 
21 | Contact: Philipp Winter <phw@nymity.ch>  
22 | OpenPGP fingerprint: `B369 E7A2 18FE CEAD EB96  8C73 CF70 89E3 D7FD C0D0`
23 | 


--------------------------------------------------------------------------------
/config.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # Copyright 2014 Philipp Winter <phw@nymity.ch>
 4 | 
 5 | # Should be "uncensored" for the machine outside the GFW.
 6 | prober_type="censored"
 7 | 
 8 | # The (spoofed) IP address of the censored machine behind the GFW.
 9 | spoofed_addr=""
10 | 


--------------------------------------------------------------------------------
/count_retransmissions.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | #
  3 | # Copyright 2013, 2014 by Philipp Winter <phw@nymity.ch>
  4 | #
  5 | # Reads the given .pcap file and determines the subsequent SYN/ACK
  6 | # retransmissions after a SYN segment was sent to a service.
  7 | 
  8 | import sys
  9 | 
 10 | from scapy.all import *
 11 | from scapy.utils import rdpcap
 12 | 
 13 | class Connection( object ):
 14 | 
 15 |     """
 16 |     Represents a TCP connection attempt.
 17 |     """
 18 | 
 19 |     def __init__( self, syn):
 20 |         self.syn = syn
 21 |         self.syn_acks = []
 22 | 
 23 |     def add_syn_ack( self, syn_ack ):
 24 |         self.syn_acks.append(syn_ack)
 25 | 
 26 |     def get_isn( self ):
 27 |         return self.syn[TCP].seq
 28 | 
 29 | def extract_connections( pkts ):
 30 |     """
 31 |     Iterate over .pcap and create Connection objects.
 32 |     """
 33 | 
 34 |     SYN = 2
 35 |     SYN_ACK = 18
 36 | 
 37 |     # Maps ISNs to Connection objects.
 38 |     connections = {}
 39 | 
 40 |     for pkt in pkts:
 41 | 
 42 |         flags = pkt[TCP].flags
 43 | 
 44 |         # Add a new SYN segment to our hash table.
 45 |         if flags == SYN:
 46 |             connections[pkt[TCP].seq] = Connection(pkt)
 47 | 
 48 |         # Add a SYN/ACK response to the respective SYN segment.
 49 |         elif flags == SYN_ACK:
 50 |             # If the key doesn't exist, the SYN/ACK is unsolicited.
 51 |             if connections.has_key(pkt[TCP].ack - 1):
 52 |                 conn = connections[pkt[TCP].ack - 1]
 53 |                 conn.add_syn_ack(pkt)
 54 | 
 55 |     return connections
 56 | 
 57 | def has_exponential_backoff( connection ):
 58 |     """
 59 |     Check if the given connection used exponential backoff for retransmissions.
 60 |     """
 61 | 
 62 |     backoff_model = ((0, 1.125), (1, 2.27), (3, 4.5), (7, 9), (15, 17), (31, 33))
 63 | 
 64 |     backoffs = []
 65 | 
 66 |     start_time = connection.syn.time
 67 | 
 68 |     good = True
 69 | 
 70 |     # Iterate over all SYN/ACKs inside a connection.
 71 |     for syn_ack in connection.syn_acks:
 72 | 
 73 |         time_diff = syn_ack.time - start_time
 74 |         backoffs.append(time_diff)
 75 | 
 76 |         fit = [upper <= time_diff <= lower for upper, lower in backoff_model]
 77 |         if not sum(fit):
 78 |             good = False
 79 | 
 80 |     if not good:
 81 |         print backoffs
 82 |         return False
 83 | 
 84 |     return True
 85 | 
 86 | def extract_retransmissions( connections ):
 87 | 
 88 |     start_time = None
 89 | 
 90 |     # Amount of SYN/ACK retransmissions when backlog is < 50% full.
 91 |     orig_retrans = []
 92 | 
 93 |     # Amount of SYN/ACK retransmissions when backlog is > 50% full.
 94 |     scan_retrans = []
 95 | 
 96 |     # Time between backlog scan and backlog size estimation.
 97 |     TIME_THRESHOLD = 1.5
 98 | 
 99 |     # Now sort the dictionary based on the timestamps.
100 |     sorted_connections = sorted(connections.items(),
101 |                                 key = lambda pkt: pkt[1].syn.time)
102 | 
103 |     i = 1
104 |     syns = synacks = max_synacks = 0
105 | 
106 |     # Extract SYN/ACK retransmissions for every connection.
107 |     for (_, conn) in sorted_connections:
108 | 
109 |         # When was the first SYN sent?
110 |         if start_time is None:
111 |             start_time = conn.syn.time
112 | 
113 |         syn_ack_count = len(conn.syn_acks)
114 |         if conn.syn.time > (start_time + TIME_THRESHOLD):
115 |             orig_retrans.append(syn_ack_count)
116 |         else:
117 |             scan_retrans.append(syn_ack_count)
118 | 
119 |         print "[%.4f] SYN segment #%d received %d SYN/ACKs." % \
120 |               (conn.syn.time, i, syn_ack_count)
121 | 
122 |         synacks += syn_ack_count
123 |         if syn_ack_count == 6:
124 |             max_synacks += 1
125 | 
126 |         syns += 1
127 |         i += 1
128 | 
129 |     return (orig_retrans, scan_retrans)
130 | 
131 | def analyse_retransmissions( orig_retrans, scan_retrans ):
132 |     """
133 |     Print high-level scan statistics used to filter and analyse the data.
134 |     """
135 | 
136 |     # Calculate average number of SYN/ACK retransmissions during backlog scan.
137 |     syn_ack_mean = (0 if len(scan_retrans) == 0 \
138 |                     else sum(scan_retrans) / float(len(scan_retrans)))
139 |     print "On average, we received %.3f SYN/ACKs for every SYN." % syn_ack_mean
140 | 
141 |     # Machines whose original backlog is not as expected have to be dumped.
142 |     if len(orig_retrans):
143 |         print "Max original backlog: %d (%s)." % (max(orig_retrans),
144 |                                                   orig_retrans)
145 | 
146 |     # Machine was probably offline.
147 |     if syn_ack_mean == 0:
148 |         verdict = "ERR"
149 | 
150 |     # 3.5 is our threshold.
151 |     elif (syn_ack_mean < 3.5) and (3 in scan_retrans):
152 |         verdict = "!RST" if "rst" in sys.argv[1] else "SYN"
153 |     else:
154 |         verdict = "RST" if "rst" in sys.argv[1] else "!SYN"
155 | 
156 |     print "Verdict: %s" % verdict
157 |     print scan_retrans
158 | 
159 | def process_file( file_name ):
160 | 
161 |     connections = extract_connections(rdpcap(file_name))
162 | 
163 |     orig_retrans, scan_retrans = extract_retransmissions(connections)
164 | 
165 |     for connection in connections.values():
166 |         if not has_exponential_backoff(connection):
167 |             print "Connections don't follow exponential backoff."
168 | 
169 |     analyse_retransmissions(orig_retrans, scan_retrans)
170 | 
171 |     return 0
172 | 
173 | if __name__ == "__main__":
174 | 
175 |     if len(sys.argv) != 2:
176 |         print >> sys.stderr, "\nUsage: %s PCAP_FILE\n" % sys.argv[0]
177 |         exit(1)
178 | 
179 |     exit(process_file(sys.argv[1]))
180 | 


--------------------------------------------------------------------------------
/find_global_ipid/analyse_sequence.c:
--------------------------------------------------------------------------------
 1 | /* This tool tells you whether a given sequence of consecutive IPID values is
 2 |  * likely to come from a global IPID machine (return code 0) or not (return
 3 |  * code 1).
 4 |  *
 5 |  * You can compile the tool as follows:
 6 |  * $ gcc -o analyse_sequence analyse_sequence.c
 7 |  *
 8 |  * The input is read from stdin.  Every line must be an integer in the
 9 |  * range 0 <= n <= 65535.  Examples:
10 |  *
11 |  * $ echo -e "10\n11\n12\n13" | ./analyse_sequence
12 |  * Given IPID sequence likely to be global.
13 |  * $ echo $?
14 |  * 0
15 |  *
16 |  * $ echo -e "10\n11\n12\n50" | ./analyse_sequence
17 |  * Given IPID sequence probably *not* global.
18 |  * $ echo $?
19 |  * 1
20 |  *
21 |  */
22 | 
23 | #include <stdio.h>
24 | #include <stdlib.h>
25 | #include <stdint.h>
26 | #include <string.h>
27 | 
28 | #define BUF_SIZE    4096
29 | 
30 | /* Threshold between two consecutive IPIDs above which a machine is no longer
31 |  * considered to have a global IPID.
32 |  */
33 | #define IPID_DIFF_THRESHOLD    ((uint16_t) 10)
34 | 
35 | inline int is_sequential( uint16_t ipid0, uint16_t ipid1 ) {
36 | 
37 |     if (((uint16_t) (ipid1 - ipid0)) > IPID_DIFF_THRESHOLD) {
38 |         return 0;
39 |     } else if (((uint16_t) (ipid1 - ipid0)) == 0) {
40 |         return 0;
41 |     } else {
42 |         return 1;
43 |     }
44 | }
45 | 
46 | int main( void ) {
47 | 
48 |     uint16_t crnt_val = 0;
49 |     uint16_t prev_val = 0;
50 |     ssize_t len = 0;
51 |     char *buf = NULL;
52 |     size_t n = BUF_SIZE;
53 |     int ipids = 0;
54 | 
55 |     buf = (char *) calloc(1, (size_t) BUF_SIZE);
56 |     if (buf == NULL) {
57 |         fprintf(stderr, "error: malloc() failed\n");
58 |         return 1;
59 |     }
60 | 
61 |     while ((len = getline(&buf, &n, stdin)) != -1) {
62 | 
63 |         if (strncmp(buf, "\n", 1) == 0) {
64 |             printf("error: no input\n");
65 |             return 2;
66 |         }
67 | 
68 |         crnt_val  = (uint16_t) atoi(buf);
69 | 
70 |         printf("%hu, ", crnt_val);
71 | 
72 |         if (ipids && !is_sequential(prev_val, crnt_val)) {
73 |             printf("non-global\n");
74 |             return 3;
75 |         }
76 | 
77 |         prev_val = crnt_val;
78 |         ipids++;
79 |     }
80 | 
81 |     if (ipids > 1) {
82 |         printf("global\n");
83 |         return 0;
84 |     } else if (ipids == 1) {
85 |         printf("error: not enough ipids\n");
86 |         return 4;
87 |     } else {
88 |         printf("error: no input\n");
89 |         return 5;
90 |     }
91 | }
92 | 


--------------------------------------------------------------------------------
/find_global_ipid/ipid_prober.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # Start the script as follows:
 4 | # $ sudo ./ipid_prober.sh IP_ADDRESS_LIST
 5 | #
 6 | # The script expects one IP address per line in the given input file.  It does
 7 | # not know how to deal with CIDR notation.  Use another tool to convert CIDR
 8 | # notation to IP addresses first.
 9 | 
10 | if [ "$#" -gt 1 ]
11 | then
12 |     echo
13 |     echo "Usage: $0 IP_ADDRESS_FILE"
14 |     echo
15 |     exit 1
16 | fi
17 | 
18 | # Change this to your needs.
19 | spoofed_addr="1.2.3.4"
20 | 
21 | probe_real() {
22 |     local ip_addr="$1"
23 | 
24 | 	hping3 -n -s $(($RANDOM % 65535)) -p 80 -S -c 1 "$ip_addr" | \
25 |         grep 'id=' | \
26 |         sed 's/.*id=\([^ ]\+\).*/\1/'
27 | }
28 | 
29 | probe_spoof() {
30 |     local ip_addr="$1"
31 | 
32 |     hping3 -a "$spoofed_addr" -n -s $(($RANDOM % 65535)) -p 80 -S -c 1 "$ip_addr" | \
33 |         grep 'id=' | \
34 |         sed 's/.*id=\([^ ]\+\).*/\1/'
35 | }
36 | 
37 | while read ip_addr
38 | do
39 | 
40 |     echo -n "Probing ${ip_addr}: "
41 | 
42 |     ( probe_real "$ip_addr"
43 |       probe_spoof "$ip_addr"
44 |       probe_real "$ip_addr"
45 |       probe_spoof "$ip_addr"
46 |       probe_real "$ip_addr"
47 |     ) 2>/dev/null | ./analyse_sequence
48 | 
49 | done < "${1:-/proc/${$}/fd/0}"
50 | 


--------------------------------------------------------------------------------
/get_relays.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | outfile="relays.txt"
 4 | 
 5 | if [ ! -e consensus ]
 6 | then
 7 | 	# Download the current consensus from moria.
 8 | 	wget http://128.31.0.39:9131/tor/status-vote/current/consensus
 9 | fi
10 | 
11 | grep '^r' consensus | \
12 | 	grep -oE '((1?[0-9][0-9]?|2[0-4][0-9]|25[0-5])\.){3}(1?[0-9][0-9]?|2[0-4][0-9]|25[0-5]) ([0-9]{1,5})' | \
13 | 	sed 's/ /:/g' > $outfile
14 | 
15 | echo "[+] Wrote relays to \"${outfile}\"."
16 | 


--------------------------------------------------------------------------------
/log.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # Copyright 2014 Philipp Winter <phw@nymity.ch>
 4 | 
 5 | log() {
 6 | 	local msg="$1"
 7 | 	printf "[$(date -u --rfc-3339=ns)] ${msg}\n" | tee -a probing-stdout.log
 8 | }
 9 | 
10 | err() {
11 | 	local msg="$1"
12 | 	printf "[$(date -u --rfc-3339=ns)] ${msg}\n" | tee -a probing-stderr.log >&2
13 | }
14 | 


--------------------------------------------------------------------------------
/plotting/icons/destination_icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NullHypothesis/backlogscans/21a7bb8439999fd031faafcabd34d95e3315c292/plotting/icons/destination_icon.png


--------------------------------------------------------------------------------
/plotting/icons/hybrid_icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NullHypothesis/backlogscans/21a7bb8439999fd031faafcabd34d95e3315c292/plotting/icons/hybrid_icon.png


--------------------------------------------------------------------------------
/plotting/icons/source_icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NullHypothesis/backlogscans/21a7bb8439999fd031faafcabd34d95e3315c292/plotting/icons/source_icon.png


--------------------------------------------------------------------------------
/plotting/plot_scan_data.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | #
  3 | # Copyright 2014 Philipp Winter <phw@nymity.ch>
  4 | 
  5 | import csv
  6 | import sys
  7 | import os
  8 | import argparse
  9 | import logging
 10 | import datetime
 11 | import time
 12 | 
 13 | import pygmaps
 14 | 
 15 | LAT_LON_CSV = "gsIPs-lat-long.csv"
 16 | 
 17 | handler = logging.StreamHandler()
 18 | handler.setFormatter(logging.Formatter(fmt="%(asctime)s [%(levelname)s]: "
 19 |                                            "%(message)s"))
 20 | 
 21 | logger = logging.getLogger()
 22 | logger.addHandler(handler)
 23 | logger.setLevel(logging.DEBUG)
 24 | 
 25 | icon_paths = {
 26 |     1: "https://raw.githubusercontent.com/NullHypothesis/backlogscans/master/plotting/icons/source_icon.png",
 27 |     2: "https://raw.githubusercontent.com/NullHypothesis/backlogscans/master/plotting/icons/destination_icon.png",
 28 |     3: "https://raw.githubusercontent.com/NullHypothesis/backlogscans/master/plotting/icons/hybrid_icon.png"
 29 | }
 30 | 
 31 | path_colours = {
 32 |     1: "#000000", # Error.
 33 |     2: "#FF0000", # Server-to-client drop.
 34 |     3: "#00FF00", # No drop.
 35 |     4: "#FF8800"  # Client-to-server drop.
 36 | }
 37 | 
 38 | class Machine( object ):
 39 | 
 40 |     """Represents a machine which is part of a scan."""
 41 | 
 42 |     def __init__( self, ip_addr, latitude, longitude, region=None,
 43 |                   machine_type=None ):
 44 | 
 45 |         self.ip_addr = ip_addr
 46 |         self.latitude = latitude
 47 |         self.longitude = longitude
 48 |         self.region = region
 49 |         self.machine_type = machine_type
 50 | 
 51 |     def get_coordinates( self ):
 52 | 
 53 |         return (self.latitude, self.longitude)
 54 | 
 55 |     def __str__( self ):
 56 | 
 57 |         s = "%s (%.5f:%.5f, %s, %s)" % (self.ip_addr,
 58 |                                         self.latitude,
 59 |                                         self.longitude,
 60 |                                         self.region,
 61 |                                         self.machine_type)
 62 |         return s
 63 | 
 64 | class Scan( object ):
 65 | 
 66 |     """Represents a scan between two machines."""
 67 | 
 68 |     def __init__( self, scan_verdict, src_host, dst_host, hour ):
 69 | 
 70 |         self.scan_verdict = scan_verdict
 71 |         self.src_host = src_host
 72 |         self.dst_host = dst_host
 73 |         self.hour = hour
 74 | 
 75 |     def get_hosts( self ):
 76 | 
 77 |         return (self.src_host, self.dst_host)
 78 | 
 79 |     def get_ip_addresses( self ):
 80 | 
 81 |         return (self.src_host.ip_addr,
 82 |                 self.dst_host.ip_addr)
 83 | 
 84 |     def __str__( self ):
 85 | 
 86 |         s = "Type %d at hour %d: %s --> %s" % (self.scan_verdict,
 87 |                                                self.hour,
 88 |                                                self.src_host,
 89 |                                                self.dst_host)
 90 |         return s
 91 | 
 92 | class Cluster( object ):
 93 | 
 94 |     def __init__( self, cluster_id ):
 95 | 
 96 |         self.cluster_id = str(cluster_id)
 97 |         self.machines = []
 98 | 
 99 |     def add_machine( self, machine ):
100 | 
101 |         self.machines.append(machine)
102 | 
103 |     def __iter__( self ):
104 |         return iter(self.machines)
105 | 
106 |     def __str__( self ):
107 | 
108 |         return self.cluster_id + "\n\t" + \
109 |                "\n\t".join([str(m) for m in self.machines])
110 | 
111 | def get_lat_long( loc_data, ip_addr ):
112 | 
113 |     fd = open(loc_data, 'r')
114 |     reader = csv.DictReader(fd,
115 |                             fieldnames = ['ip', 'lat', 'lon', 're', 'ipt'],
116 |                             delimiter = ',',
117 |                             quotechar = '"',
118 |                             skipinitialspace=True)
119 | 
120 |     loc_data = list(reader)
121 | 
122 |     for lats in loc_data:
123 |         if ip_addr in lats['ip']:
124 |             return [lats['lat'], lats['lon'], lats['re'], lats['ipt']]
125 | 
126 |     return None
127 | 
128 | def print_clusters( clusters, file_name ):
129 |     """
130 |     Analyse all clusters and write a map to the given file.
131 |     """
132 | 
133 |     # arg1: start latitude
134 |     # arg2: start longitude
135 |     # arg3: default zoom level (must be in {0..20})
136 | 
137 |     my_map = pygmaps.maps(0, 0, 2)
138 | 
139 |     # First, parse all scans and create dictionaries for the markers/points as
140 |     # well as paths.
141 | 
142 |     colors = ["#ff0000", "#00ff00", "#0000ff", "#ffff00", "#00ffff",
143 |               "#ff00ff", "#000000", "#ffffff",
144 |               "#770000", "#007700", "#000077", "#777700", "#007777",
145 |               "#770077"]
146 | 
147 |     for cluster in clusters:
148 | 
149 |         points = {}
150 | 
151 |         # Determine cluster color.
152 | 
153 |         color = colors.pop(0)
154 | 
155 |         # points key=coordinates, value=ip address(es)
156 | 
157 |         for machine in cluster.machines:
158 |             if points.has_key(machine.get_coordinates()):
159 |                 points[machine.get_coordinates()].append(machine.ip_addr)
160 |             else:
161 |                 points[machine.get_coordinates()] = [machine.ip_addr]
162 | 
163 |         for latitude, longitude in points:
164 |             my_map.addpoint(latitude, longitude,
165 |                             color=color,
166 |                             title=",".join(points[(latitude, longitude)]))
167 | 
168 |     logger.info("Writing output to \"%s\"." % file_name)
169 |     my_map.draw(file_name)
170 | 
171 | def print_map( scans, file_name ):
172 |     """
173 |     Analyse all scans and write a map to the given file.
174 |     """
175 | 
176 |     global icon_paths
177 |     global path_colours
178 | 
179 |     # arg1: start latitude
180 |     # arg2: start longitude
181 |     # arg3: default zoom level (must be in {0..20})
182 | 
183 |     my_map = pygmaps.maps(0, 0, 2)
184 | 
185 |     # First, parse all scans and create dictionaries for the markers/points as
186 |     # well as paths.
187 | 
188 |     points = {}
189 |     paths = {}
190 | 
191 |     for scan in scans:
192 | 
193 |         src_host, dst_host = scan.get_hosts()
194 |         path = (src_host.get_coordinates(), dst_host.get_coordinates())
195 | 
196 |         # Paths can overlap but for performance reasons, we plot them only
197 |         # once.  So for every overlapping path, store the scan result (block,
198 |         # error, unblocked) in a bitmap.
199 | 
200 |         if paths.has_key(path):
201 |             paths[path] |= (scan.scan_verdict + 1)
202 |         else:
203 |             paths[path] = (scan.scan_verdict + 1)
204 | 
205 |         # We also plot overlapping points only once.  We store the point type
206 |         # (scan source or destination) in a bitmap.
207 | 
208 |         src_coordinates = src_host.get_coordinates()
209 |         if points.has_key(src_coordinates):
210 |             points[src_coordinates] |= 1
211 |         else:
212 |             points[src_coordinates] = 1
213 | 
214 |         dst_coordinates = dst_host.get_coordinates()
215 |         if points.has_key(dst_coordinates):
216 |             points[dst_coordinates] |= 2
217 |         else:
218 |             points[dst_coordinates] = 2
219 | 
220 |     # Now that we have all our points, plot them.  Depending on the determined
221 |     # bitmap, we plot the source, destination, or hybrid icon for the point.
222 | 
223 |     for latitude, longitude in points:
224 | 
225 |         icon_path = icon_paths[points[(latitude, longitude)]]
226 |         my_map.addpoint(latitude, longitude, "#FFFFFF", icon=icon_path)
227 | 
228 |     # Finally, plot the paths between the points.  Again, depending on the scan
229 |     # verdict, we plot the path in different colours.
230 | 
231 |     for src_coordinates, dst_coordinates in paths:
232 | 
233 |         scan_verdict = paths[(src_coordinates, dst_coordinates)]
234 | 
235 |         if path_colours.has_key(scan_verdict):
236 |             colour = path_colours[scan_verdict]
237 |         else:
238 |             colour = "#0000FF"
239 | 
240 |         my_map.addpath([src_coordinates, dst_coordinates], color=colour)
241 | 
242 |     logger.info("Writing output to \"%s\"." % file_name)
243 |     my_map.draw(file_name)
244 | 
245 | def parse_file( file_name ):
246 |     """
247 |     Read the entire given file and create scan objects out of the data.
248 |     """
249 | 
250 |     scans = []
251 |     fd = open(file_name, 'r')
252 | 
253 |     while True:
254 |         line = fd.readline()
255 |         if not line:
256 |             break
257 |         line = line.strip()
258 | 
259 |         values = line.split(' ')
260 |         src_host = Machine(values[1], float(values[2]), float(values[3]),
261 |                            values[7], values[8])
262 |         dst_host = Machine(values[4], float(values[5]), float(values[6]),
263 |                            values[9], values[10])
264 | 
265 |         scans.append(Scan(int(values[0]), src_host, dst_host, int(values[11])))
266 | 
267 |     logger.info("Read %d idle scans from file `%s'." %
268 |                 (len(scans), file_name))
269 | 
270 |     return scans
271 | 
272 | def parse_clusters( file_name ):
273 |     """
274 |     Read the entire given file and create cluster objects out of the data.
275 |     """
276 | 
277 |     clusters = []
278 |     fd = open(file_name, 'r')
279 |     cluster_id = 0
280 | 
281 |     while True:
282 |         line = fd.readline()
283 |         if not line:
284 |             break
285 | 
286 |         # Format: <ip_1>, <ip_2>, ..., <ip_n>
287 | 
288 |         ip_addrs = [ip_addr.strip() for ip_addr in line.split(',')]
289 |         cluster = Cluster(cluster_id)
290 |         cluster_id += 1
291 | 
292 |         for ip_addr in ip_addrs:
293 | 
294 |             ret = get_lat_long(LAT_LON_CSV, ip_addr)
295 |             if not ret:
296 |                 logger.error("No location information for IP address %s." %
297 |                              ip_addr)
298 |                 continue
299 |             else:
300 |                 lat, lon = ret[:2]
301 | 
302 |             cluster.add_machine(Machine(ip_addr, float(lat), float(lon)))
303 | 
304 |         clusters.append(cluster)
305 | 
306 |     return clusters
307 | 
308 | def parse_arguments( args ):
309 | 
310 |     parser = argparse.ArgumentParser(description="Plot and filter idle "
311 |                                      "scan results on a clustered Google map.")
312 | 
313 |     parser.add_argument("datafile", metavar="IDLE_SCAN_FILE",
314 |                         help="Parse and plot the given file.")
315 | 
316 |     parser.add_argument("-w", "--write", metavar="OUTPUT_FILE",
317 |                         type=str, default="idle_scan_map.html",
318 |                         help="Write HTML output to the given file.")
319 | 
320 |     parser.add_argument("-d", "--directory", metavar="OUTPUT_DIR",
321 |                         type=str, default=None,
322 |                         help="Where to write all analysis files to.")
323 | 
324 |     parser.add_argument("-r", "--region", metavar="REGION",
325 |                         type=str, help="Region information of source or "
326 |                                        "destination machine (e.g.: CN_R7).")
327 | 
328 |     parser.add_argument("-H", "--hour", metavar="HOUR",
329 |                         type=int, help="Scan hour (e.g.: 0-23).")
330 | 
331 |     parser.add_argument("-t", "--type", metavar="TYPE", type=str,
332 |                         help="Type of source or destination machine (e.g.: "
333 |                              "Tor_Relay, Tor_Dir, Web_Server, GIP).")
334 | 
335 |     parser.add_argument("-v", "--verdict", metavar="VERDICT", type=int,
336 |                         help="The scan's verdict (e.g.: 0, 1, 2, 3).")
337 | 
338 |     parser.add_argument("-a", "--address", metavar="ADDRESS", type=str,
339 |                         help="IP address of source or destination machine "
340 |                              "(e.g.: 1.2.3.4).")
341 | 
342 |     parser.add_argument("-i", "--inspect",
343 |                         action="store_true",
344 |                         help="Only display search result without printing "
345 |                              "HTML/JavaScript.  Useful for manual analysis.")
346 | 
347 |     parser.add_argument("-c", "--cluster", metavar="CLUSTER_FILE",
348 |                         type=str, help="Use the given cluster file.")
349 | 
350 |     return parser.parse_args()
351 | 
352 | def mkdir_analysis( dirname ):
353 |     """
354 |     Create and return a directory where all created data is stored.
355 |     """
356 | 
357 |     if not dirname:
358 |         dt = datetime.datetime.fromtimestamp(time.time())
359 |         dirname = dt.strftime("%Y-%m-%d_%H:%M:%S")
360 | 
361 |     try:
362 |         os.mkdir(dirname)
363 |     except OSError as err:
364 |         logger.error("Could not create directory: %s" % err)
365 |         exit(1)
366 | 
367 |     return dirname
368 | 
369 | def main( ):
370 |     """
371 |     The tool's entry point.
372 |     """
373 | 
374 |     args = parse_arguments(sys.argv[0:])
375 | 
376 |     dirname = mkdir_analysis(args.directory)
377 | 
378 |     if args.cluster:
379 |         logger.debug("Parsing IP address cluster file `%s'." % args.cluster)
380 |         clusters = parse_clusters(args.cluster)
381 |         print_clusters(clusters, "%s/ip_addr_clusters.html" % dirname)
382 | 
383 |     logger.debug("Parsing idle scan file `%s'." % args.datafile)
384 |     scans = parse_file(args.datafile)
385 | 
386 |     # Filter scans based on the user's parameters.
387 | 
388 |     logger.debug("Filtering idle scan data.")
389 | 
390 |     if args.region is not None:
391 |         scans = filter(lambda scan: scan.src_host.region == args.region or
392 |                                     scan.dst_host.region == args.region, scans)
393 | 
394 |     if args.hour is not None:
395 |         scans = filter(lambda scan: scan.hour == args.hour, scans)
396 | 
397 |     if args.type is not None:
398 |         scans = filter(lambda scan: scan.src_host.machine_type == args.type or
399 |                                     scan.dst_host.machine_type == args.type,
400 |                        scans)
401 | 
402 |     if args.verdict is not None:
403 |         scans = filter(lambda scan: scan.scan_verdict == args.verdict, scans)
404 | 
405 |     if args.address is not None:
406 |         scans = filter(lambda scan: scan.src_host.ip_addr == args.address or
407 |                                     scan.dst_host.ip_addr == args.address,
408 |                        scans)
409 | 
410 |     # Depending on what user wants, print object representations or
411 |     # browser-ready HTML/JavaScript.
412 | 
413 |     if not scans:
414 |         logger.warning("No scan data after filtering steps.")
415 |     else:
416 |         logger.info("%d idle scans remain after filtering step." % len(scans))
417 | 
418 |     if args.inspect:
419 |         for scan in scans:
420 |             print scan
421 |     else:
422 |         print_map(scans, "%s/%s" % (dirname, args.write))
423 |         logger.info("Wrote HTML data to `%s'." % args.write)
424 | 
425 |     # Create cluster-specific scan maps.  Every scan in all scan maps contains
426 |     # an IP address which is part of a cluster.
427 | 
428 |     if not args.cluster:
429 |         return 0
430 | 
431 |     for cluster in clusters:
432 | 
433 |         cluster_scans = []
434 | 
435 |         for machine in cluster:
436 |             for scan in scans:
437 |                 if machine.ip_addr in scan.get_ip_addresses():
438 |                     cluster_scans.append(scan)
439 | 
440 |         print_map(cluster_scans, "%s/cluster_%s.html" % (dirname,
441 |                                                          cluster.cluster_id))
442 | 
443 |     return 0
444 | 
445 | if __name__ == "__main__":
446 |     exit(main())
447 | 


--------------------------------------------------------------------------------
/plotting/pygmaps.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | ###########################################################
  3 | ## Google map python wrapper V0.1
  4 | ## 
  5 | ############################################################
  6 | 
  7 | class maps:
  8 | 
  9 |     def __init__(self, centerLat, centerLng, zoom ):
 10 |         self.center = (float(centerLat),float(centerLng))
 11 |         self.zoom = int(zoom)
 12 |         self.grids = None
 13 |         self.paths = []
 14 |         self.points = []
 15 |         self.radpoints = []
 16 |         self.gridsetting = None
 17 |         self.coloricon = 'http://chart.apis.google.com/chart?cht=mm&chs=12x16&chco=FFFFFF,XXXXXX,000000&ext=.png'
 18 | 
 19 |     def setgrids(self,slat,elat,latin,slng,elng,lngin):
 20 |         self.gridsetting = [slat,elat,latin,slng,elng,lngin]
 21 | 
 22 |     def addpoint(self, lat, lng, color = '#FF0000', icon=None, title="n/a"):
 23 |         self.points.append((lat,lng,color[1:],icon,title))
 24 | 
 25 |     #def addpointcoord(self, coord):
 26 |     #    self.points.append((coord[0],coord[1]))
 27 | 
 28 |     def addradpoint(self, lat,lng,rad,color = '#0000FF'):
 29 |         self.radpoints.append((lat,lng,rad,color))
 30 | 
 31 |     def addpath(self,path,color = '#FF0000'):
 32 |         path.append(color)
 33 |         self.paths.append(path)
 34 |     
 35 |     #create the html file which inlcude one google map and all points and paths
 36 |     def draw(self, htmlfile):
 37 |         f = open(htmlfile,'w')
 38 |         f.write('<html>\n')
 39 |         f.write('<head>\n')
 40 |         f.write('<meta name="viewport" content="initial-scale=1.0, user-scalable=no" />\n')
 41 |         f.write('<meta http-equiv="content-type" content="text/html; charset=UTF-8"/>\n')
 42 |         f.write('<title>Google Maps - pygmaps </title>\n')
 43 |         f.write('<script type="text/javascript" src="http://maps.google.com/maps/api/js?sensor=false"></script>\n')
 44 |         f.write('<script type="text/javascript">\n')
 45 |         f.write('\tfunction initialize() {\n')
 46 |         self.drawmap(f)
 47 |         self.drawgrids(f)
 48 |         self.drawpoints(f)
 49 |         self.drawradpoints(f)
 50 |         self.drawpaths(f,self.paths)
 51 |         f.write('\t}\n')
 52 |         f.write('</script>\n')
 53 |         f.write('</head>\n')
 54 |         f.write('<body style="margin:0px; padding:0px;" onload="initialize()">\n')
 55 |         f.write('\t<div id="map_canvas" style="width: 100%; height: 100%;"></div>\n')
 56 |         f.write('</body>\n')
 57 |         f.write('</html>\n')        
 58 |         f.close()
 59 | 
 60 |     def drawgrids(self, f):
 61 |         if self.gridsetting == None:
 62 |             return
 63 |         slat = self.gridsetting[0]
 64 |         elat = self.gridsetting[1]
 65 |         latin = self.gridsetting[2]
 66 |         slng = self.gridsetting[3]
 67 |         elng = self.gridsetting[4]
 68 |         lngin = self.gridsetting[5]
 69 |         self.grids = []
 70 | 
 71 |         r = [slat+float(x)*latin for x in range(0, int((elat-slat)/latin))]
 72 |         for lat in r:
 73 |             self.grids.append([(lat+latin/2.0,slng+lngin/2.0),(lat+latin/2.0,elng+lngin/2.0)])
 74 | 
 75 |         r = [slng+float(x)*lngin for x in range(0, int((elng-slng)/lngin))]
 76 |         for lng in r:
 77 |             self.grids.append([(slat+latin/2.0,lng+lngin/2.0),(elat+latin/2.0,lng+lngin/2.0)])
 78 |         
 79 |         for line in self.grids:
 80 |             self.drawPolyline(f,line,strokeColor = "#000000")
 81 |     def drawpoints(self,f):
 82 |         for point in  self.points:
 83 |             self.drawpoint(f,point[0],point[1],point[2],point[3],point[4])
 84 | 
 85 |     def drawradpoints(self, f):
 86 |         for rpoint in self.radpoints:
 87 |             path = self.getcycle(rpoint[0:3])
 88 |             self.drawPolygon(f,path,strokeColor = rpoint[3])
 89 | 
 90 |     def getcycle(self,rpoint):
 91 |         cycle = []
 92 |         lat = rpoint[0]
 93 |         lng = rpoint[1]
 94 |         rad = rpoint[2] #unit: meter
 95 |         d = (rad/1000.0)/6378.8;
 96 |         lat1 = (math.pi/180.0)* lat
 97 |         lng1 = (math.pi/180.0)* lng
 98 | 
 99 |         r = [x*30 for x in range(12)]
100 |         for a in r:
101 |             tc = (math.pi/180.0)*a;
102 |             y = math.asin(math.sin(lat1)*math.cos(d)+math.cos(lat1)*math.sin(d)*math.cos(tc))
103 |             dlng = math.atan2(math.sin(tc)*math.sin(d)*math.cos(lat1),math.cos(d)-math.sin(lat1)*math.sin(y))
104 |             x = ((lng1-dlng+math.pi) % (2.0*math.pi)) - math.pi 
105 |             cycle.append( ( float(y*(180.0/math.pi)),float(x*(180.0/math.pi)) ) )
106 |         return cycle
107 | 
108 |     def drawpaths(self, f, paths):
109 |         for path in paths:
110 |             #print path
111 |             self.drawPolyline(f,path[:-1], strokeColor = path[-1])
112 | 
113 |     #############################################
114 |     # # # # # # Low level Map Drawing # # # # # # 
115 |     #############################################
116 |     def drawmap(self, f):
117 |         f.write('\t\tvar centerlatlng = new google.maps.LatLng(%f, %f);\n' % (self.center[0],self.center[1]))
118 |         f.write('\t\tvar myOptions = {\n')
119 |         f.write('\t\t\tzoom: %d,\n' % (self.zoom))
120 |         f.write('\t\t\tcenter: centerlatlng,\n')
121 |         f.write('\t\t\tmapTypeId: google.maps.MapTypeId.ROADMAP\n')
122 |         f.write('\t\t};\n')
123 |         f.write('\t\tvar map = new google.maps.Map(document.getElementById("map_canvas"), myOptions);\n')
124 |         f.write('\n')
125 | 
126 | 
127 | 
128 |     def drawpoint(self,f,lat,lon,color,icon,title):
129 |         f.write('\t\tvar latlng = new google.maps.LatLng(%f, %f);\n'%(lat,lon))
130 |         f.write('\t\tvar img = new google.maps.MarkerImage(\'%s\');\n' % (self.coloricon.replace('XXXXXX',color)))
131 |         f.write('\t\tvar marker = new google.maps.Marker({\n')
132 |         f.write('\t\ttitle: "%s",\n' % title)
133 |         f.write('\t\ticon: img,\n')
134 |         if icon:
135 |             f.write('\t\ticon: "%s",\n' % icon)
136 |         f.write('\t\tposition: latlng\n')
137 |         f.write('\t\t});\n')
138 |         f.write('\t\tmarker.setMap(map);\n')
139 |         f.write('\n')
140 |         
141 |     def drawPolyline(self,f,path,\
142 |             clickable = False, \
143 |             geodesic = True,\
144 |             strokeColor = "#FF0000",\
145 |             strokeOpacity = 0.5,\
146 |             strokeWeight = 1
147 |             ):
148 |         f.write('var PolylineCoordinates = [\n')
149 |         for coordinate in path:
150 |             f.write('new google.maps.LatLng(%f, %f),\n' % (coordinate[0],coordinate[1]))
151 |         f.write('];\n')
152 |         f.write('\n')
153 | 
154 |         f.write('var Path = new google.maps.Polyline({\n')
155 |         f.write('clickable: %s,\n' % (str(clickable).lower()))
156 |         f.write('geodesic: %s,\n' % (str(geodesic).lower()))
157 |         f.write('path: PolylineCoordinates,\n')
158 |         f.write('strokeColor: "%s",\n' %(strokeColor))
159 |         f.write('strokeOpacity: %f,\n' % (strokeOpacity))
160 |         f.write('strokeWeight: %d\n' % (strokeWeight))
161 |         f.write('});\n')
162 |         f.write('\n')
163 |         f.write('Path.setMap(map);\n')
164 |         f.write('\n\n')
165 | 
166 |     def drawPolygon(self,f,path,\
167 |             clickable = False, \
168 |             geodesic = True,\
169 |             fillColor = "#000000",\
170 |             fillOpacity = 0.0,\
171 |             strokeColor = "#FF0000",\
172 |             strokeOpacity = 1.0,\
173 |             strokeWeight = 1
174 |             ):
175 |         f.write('var coords = [\n')
176 |         for coordinate in path:
177 |             f.write('new google.maps.LatLng(%f, %f),\n' % (coordinate[0],coordinate[1]))
178 |         f.write('];\n')
179 |         f.write('\n')
180 | 
181 |         f.write('var polygon = new google.maps.Polygon({\n')
182 |         f.write('clickable: %s,\n' % (str(clickable).lower()))
183 |         f.write('geodesic: %s,\n' % (str(geodesic).lower()))
184 |         f.write('fillColor: "%s",\n' %(fillColor))
185 |         f.write('fillOpacity: %f,\n' % (fillOpacity))
186 |         f.write('paths: coords,\n')
187 |         f.write('strokeColor: "%s",\n' %(strokeColor))
188 |         f.write('strokeOpacity: %f,\n' % (strokeOpacity))
189 |         f.write('strokeWeight: %d\n' % (strokeWeight))
190 |         f.write('});\n')
191 |         f.write('\n')
192 |         f.write('polygon.setMap(map);\n')
193 |         f.write('\n\n')
194 | 
195 | if __name__ == "__main__":
196 | 
197 |     ########## CONSTRUCTOR: pygmaps(latitude, longitude, zoom) ##############################
198 |     # DESC:        initialize a map  with latitude and longitude of center point  
199 |     #        and map zoom level "15"
200 |     # PARAMETER1:    latitude (float) latittude of map center point
201 |     # PARAMETER2:    longitude (float) latittude of map center point
202 |     # PARAMETER3:    zoom (int)  map zoom level 0~20
203 |     # RETURN:    the instant of pygmaps
204 |     #========================================================================================
205 |     mymap = pygmaps(37.428, -122.145, 16)
206 | 
207 | 
208 |     ########## FUNCTION: setgrids(start-Lat, end-Lat, Lat-interval, start-Lng, end-Lng, Lng-interval) ######
209 |     # DESC:        set grids on map  
210 |     # PARAMETER1:    start-Lat (float), start (minimum) latittude of the grids
211 |     # PARAMETER2:    end-Lat (float), end (maximum) latittude of the grids
212 |     # PARAMETER3:    Lat-interval (float)  grid size in latitude 
213 |     # PARAMETER4:    start-Lng (float), start (minimum) longitude of the grids
214 |     # PARAMETER5:    end-Lng (float), end (maximum) longitude of the grids
215 |     # PARAMETER6:    Lng-interval (float)  grid size in longitude 
216 |     # RETURN:    no returns
217 |     #========================================================================================
218 |     mymap.setgrids(37.42, 37.43, 0.001, -122.15, -122.14, 0.001)
219 | 
220 | 
221 |     ########## FUNCTION:  addpoint(latitude, longitude, [color])#############################
222 |     # DESC:        add a point into a map and dispaly it, color is optional default is red
223 |     # PARAMETER1:    latitude (float) latitude of the point
224 |     # PARAMETER2:    longitude (float) longitude of the point
225 |     # PARAMETER3:    color (string) color of the point showed in map, using HTML color code
226 |     #        HTML COLOR CODE:  http://www.computerhope.com/htmcolor.htm
227 |     #        e.g. red "#FF0000", Blue "#0000FF", Green "#00FF00"
228 |     # RETURN:    no return
229 |     #========================================================================================
230 |     mymap.addpoint(37.427, -122.145, "#0000FF")
231 | 
232 | 
233 |     ########## FUNCTION:  addradpoint(latitude, longitude, radius, [color])##################
234 |     # DESC:     add a point with a radius (Meter) - Draw cycle
235 |     # PARAMETER1:    latitude (float) latitude of the point
236 |     # PARAMETER2:    longitude (float) longitude of the point
237 |     # PARAMETER3:    radius (float), radius  in meter 
238 |     # PARAMETER4:    color (string) color of the point showed in map, using HTML color code
239 |     #        HTML COLOR CODE:  http://www.computerhope.com/htmcolor.htm
240 |     #        e.g. red "#FF0000", Blue "#0000FF", Green "#00FF00"
241 |     # RETURN:    no return 
242 |     #========================================================================================
243 |     mymap.addradpoint(37.429, -122.145, 95, "#FF0000")
244 | 
245 | 
246 |     ########## FUNCTION:  addpath(path,[color])##############################################
247 |     # DESC:        add a path into map, the data struceture of Path is a list of points
248 |     # PARAMETER1:    path (list of coordinates) e.g. [(lat1,lng1),(lat2,lng2),...]
249 |     # PARAMETER2:    color (string) color of the point showed in map, using HTML color code
250 |     #        HTML COLOR CODE:  http://www.computerhope.com/htmcolor.htm
251 |     #        e.g. red "#FF0000", Blue "#0000FF", Green "#00FF00"
252 |     # RETURN:    no return
253 |     #========================================================================================
254 |     path = [(37.429, -122.145),(37.428, -122.145),(37.427, -122.145),(37.427, -122.146),(37.427, -122.146)]
255 |     mymap.addpath(path,"#00FF00")
256 | 
257 |     ########## FUNCTION:  addpath(file)######################################################
258 |     # DESC:        create the html map file (.html)
259 |     # PARAMETER1:    file (string) the map path and file
260 |     # RETURN:    no return, generate html file in specified directory
261 |     #========================================================================================
262 |     mymap.draw('./mymap.html')
263 |     
264 |         
265 | 


--------------------------------------------------------------------------------
/probe_host.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # Copyright 2014 Philipp Winter <phw@nymity.ch>
 4 | #
 5 | # Probes a given host using traceroutes, SYN scans and RST scans.  All data is
 6 | # written to the given directory.  Probing should happen in sync with the
 7 | # censored machine behind the GFW.
 8 | 
 9 | source log.sh
10 | source config.sh
11 | 
12 | PATH=$PATH:"."
13 | 
14 | traceroute="traceroute_host.sh"
15 | synscan="synscan.sh"
16 | rstscan="rstscan.sh"
17 | 
18 | if [ "$#" -ne 3 ]
19 | then
20 | 	echo
21 | 	echo "Usage: $0 DST_ADDR DST_PORT OUTPUT_DIR"
22 | 	echo
23 | 	exit 1
24 | fi
25 | 
26 | ip_addr="$1"
27 | port="$2"
28 | outdir="$3"
29 | 
30 | log "0. Now probing host ${ip_addr}:${port}."
31 | 
32 | # Traceroutes are only run by the censored machine.  We want to make sure that
33 | # the route doesn't change during the scan.  To be (reasonably) sure, we run
34 | # traceroutes before, during, and after the scan.
35 | if [ $prober_type = "censored" ]
36 | then
37 | 	log "1. Running TCP and ICMP-based traceroutes to ${ip_addr}:${port}."
38 | 	"$traceroute" "$ip_addr" "$port" "$outdir" &
39 | fi
40 | 
41 | sleep 5
42 | 
43 | 
44 | log "2. Running SYN scan to determine if SYN or SYN/ACK segments are dropped."
45 | "$synscan" "$ip_addr" "$port" "${outdir}/$(date -u +'%F.%T')_synscan.pcap"
46 | 
47 | 
48 | if [ $prober_type = "censored" ]
49 | then
50 | 	log "3. Running TCP and ICMP-based traceroutes to ${ip_addr}:${port}."
51 | 	"$traceroute" "$ip_addr" "$port" "$outdir" &
52 | fi
53 | 
54 | sleep 5
55 | 
56 | 
57 | log "4. Running RST scan to determine if RST segments are dropped."
58 | "$rstscan" "$ip_addr" "$port" "$spoofed_addr" "${outdir}/$(date -u +'%F.%T')_rstscan.pcap"
59 | 
60 | 
61 | if [ $prober_type = "censored" ]
62 | then
63 | 	log "5. Running TCP and ICMP-based traceroutes to ${ip_addr}${port}."
64 | 	"$traceroute" "$ip_addr" "$port" "$outdir" &
65 | fi
66 | 
67 | sleep 5
68 | 


--------------------------------------------------------------------------------
/probing_wrapper.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # Copyright 2014 Philipp Winter <phw@nymity.ch>
 4 | #
 5 | # This script is meant to be executed using watch(1), e.g.:
 6 | # $ watch -n 70 -p -x ./probing_wrapper.sh
 7 | 
 8 | source log.sh
 9 | 
10 | PATH=$PATH:.
11 | 
12 | # Path to the script which probes a single host.
13 | probe="probe_host.sh"
14 | 
15 | if [ "$#" -lt 2 ]
16 | then
17 | 	echo
18 | 	echo "Usage: $0 HOSTS_FILE OUTPUT_DIR"
19 | 	echo
20 | 	exit 1
21 | fi
22 | 
23 | hosts_file="$1"
24 | outdir="$2"
25 | 
26 | if [ ! -f "$hosts_file" ]
27 | then
28 | 	err "File \"${hosts_file}\" does not exist."
29 | 	exit 1
30 | fi
31 | 
32 | if [ ! -d "$outdir" ]
33 | then
34 | 	mkdir -p "$outdir"
35 | 	if [ $? != 0 ]
36 | 	then
37 | 		err "Could not create directory \"${outdir}\"."
38 | 		exit 1
39 | 	fi
40 | fi
41 | 
42 | if [ ! -f "$probe" ]
43 | then
44 | 	err "Script \"${probe}\" does not exist."
45 | 	exit 1
46 | fi
47 | 
48 | host=$(head -1 "$hosts_file")
49 | if [ -z "$host" ]
50 | then
51 | 	err "No more hosts in file \"${hosts_file}\"."
52 | else
53 | 	# Extract IP address and port from the IP:port tuple.
54 | 	array=(${host//:/ })
55 | 	ip_addr=${array[0]}
56 | 	port=${array[1]}
57 | 
58 | 	# Probe the host.
59 | 	mkdir -p "${outdir}/${host}"
60 | 	"$probe" "${ip_addr}" "${port}" "${outdir}/${host}"
61 | 
62 | 	# Drain the first relay and write back the remaining relays.
63 | 	log "Draining first relay from file \"${hosts_file}\"."
64 | 	tmpfile=$(mktemp '/tmp/relay_list-XXXXXXXXXX')
65 | 	tail -n +2 "$hosts_file" > "$tmpfile"
66 | 	mv "$tmpfile" "$hosts_file"
67 | fi
68 | 


--------------------------------------------------------------------------------
/rstscan.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # Copyright 2014 Philipp Winter <phw@nymity.ch>
 4 | 
 5 | source log.sh
 6 | source config.sh
 7 | 
 8 | # The amount of (unspoofed) TCP SYNs used to estimate the destination's backlog
 9 | # size.
10 | control_syns=10
11 | 
12 | # The amount of TCP SYNs which are sent to fill the destination's SYN backlog
13 | # more than 50%.
14 | probing_syns=145
15 | 
16 | # How long we should wait for SYN/ACKs after sending data.  65 is a reasonable
17 | # value given 5 SYN/ACK retransmissions and exponential backoff in between
18 | # segments.  After 65 seconds, our SYNs should no longer be in the destinations
19 | # backlog.
20 | timeout=65
21 | 
22 | if [ "$#" -lt 3 ]
23 | then
24 | 	echo
25 | 	echo "Usage: $0 DST_ADDRESS DST_PORT SPOOFED_ADDRESS [OUTPUT_FILE]"
26 | 	echo
27 | 	exit 1
28 | fi
29 | 
30 | dst_addr="$1"
31 | port="$2"
32 | spoofed_addr="$3"
33 | 
34 | if [ ! -z "$4" ]
35 | then
36 | 	outfile="$4"
37 | else
38 | 	outfile="$(mktemp '/tmp/rstscan-XXXXXX.pcap')"
39 | fi
40 | 
41 | log "Beginning RST probing."
42 | 
43 | log "Invoking tcpdump(8) to capture network data."
44 | tcpdump -i any -n "host ${dst_addr}" -w "${outfile}" &
45 | pid=$!
46 | 
47 | # Give tcpdump some time to start.
48 | sleep 2
49 | 
50 | if [ $prober_type = "uncensored" ]
51 | then
52 | 	log "Setting iptables rules to ignore RST segments."
53 | 	iptables -A OUTPUT -d ${dst_addr} -p tcp --tcp-flags RST RST -j DROP
54 | 
55 | 	log "Sending ${control_syns} control TCP SYN segments to ${dst_addr}:${port}."
56 | 	timeout 5 hping3-custom -n -c $control_syns -i u1300 -q -S -L 0 -s 20000 -p ${port} ${dst_addr} &
57 | 
58 | 	# VPS must start sending SYNs before us.
59 | 	sleep 1
60 | 
61 | 	# 6,000 usec means ~166.6 SYNs a second.
62 | 	log "Sending ${probing_syns} spoofed TCP SYN segments to ${dst_addr}."
63 | 	timeout 5 hping3-custom -n -c $probing_syns -a $spoofed_addr -i u6000 -q -S -L 0 -M 1000000 -s 30000 -p ${port} ${dst_addr} &
64 | else
65 | 	# 6,000 usec means ~166.6 SYNs a second.
66 | 	log "Sending ${probing_syns} TCP SYN segments to ${dst_addr}."
67 | 	timeout 5 hping3-custom -n -c $probing_syns -i u6000 -q -S -L 0 -M 1000000 -s 30000 -p ${port} ${dst_addr} &
68 | 
69 | 	# Wait a while to have some certainty that the SYNs made it to the target.
70 | 	sleep 2
71 | 
72 | 	# Send RSTs which should reach the destination even though SYN/ACKs are blocked.
73 | 	log "Sending ${probing_syns} TCP RST segments to ${dst_addr}."
74 | 	timeout 5 hping3-custom -n -c $probing_syns -i u13000 -q -R -L 0 -M 1000001 -s 30000 -p ${port} ${dst_addr} &
75 | fi
76 | 
77 | log "Done transmitting but waiting ${timeout}s for final SYN/ACKs to arrive."
78 | sleep "$timeout"
79 | 
80 | if [ $prober_type = "uncensored" ]
81 | then
82 | 	log "Removing iptables rule."
83 | 	iptables -D OUTPUT -d ${dst_addr} -p tcp --tcp-flags RST RST -j DROP
84 | fi
85 | 
86 | log "Terminating tcpdump."
87 | if [ ! -z "$pid" ]
88 | then
89 | 	kill "$pid"
90 | 	log "Sent SIGTERM to tcpdump's PID ${pid}."
91 | fi
92 | 
93 | log "Experimental results written to: ${outfile}"
94 | 


--------------------------------------------------------------------------------
/synscan.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # Copyright 2013, 2014 Philipp Winter <phw@nymity.ch>
 4 | #
 5 | # This script probes a remote TCP service by sending a specific amount of TCP
 6 | # SYN segments and capturing the replies it gets.  Note that it requires a
 7 | # modified version of hping3(8) as the tool has a global counter which is
 8 | # incremented with outgoing *and* incoming packets whereas we are only
 9 | # interested in outgoing packets.
10 | 
11 | source log.sh
12 | source config.sh
13 | 
14 | # The amount of TCP SYNs used to estimate the destination's backlog size.
15 | if [ $prober_type = "censored" ]
16 | then
17 | 	control_syns=145
18 | else
19 | 	control_syns=10
20 | fi
21 | 
22 | # How long we should wait for SYN/ACKs after sending data.  65 is a reasonable
23 | # value given 5 SYN/ACK retransmissions and exponential backoff in between
24 | # segments.  After 65 seconds, our SYNs should no longer be in the destinations
25 | # backlog.
26 | timeout=65
27 | 
28 | if [ "$#" -lt 2 ]
29 | then
30 | 	echo
31 | 	echo "Usage: $0 DST_ADDRESS DST_PORT [OUTPUT_FILE]"
32 | 	echo
33 | 	exit 1
34 | fi
35 | 
36 | dst_addr="$1"
37 | port="$2"
38 | 
39 | if [ ! -z "$3" ]
40 | then
41 | 	outfile="$3"
42 | else
43 | 	outfile="$(mktemp '/tmp/synscan-XXXXXX.pcap')"
44 | fi
45 | 
46 | log "Beginning SYN probing."
47 | log "Setting iptables rules to ignore RST segments."
48 | iptables -A OUTPUT -d ${dst_addr} -p tcp --tcp-flags RST RST -j DROP
49 | 
50 | log "Invoking tcpdump(8) to capture network data."
51 | tcpdump -i any -n "host ${dst_addr} and port ${port}" -w "${outfile}" &
52 | pid=$!
53 | 
54 | # Give tcpdump some time to start.
55 | sleep 2
56 | 
57 | # 15,000 usec means ~66.7 SYNs a second.
58 | log "Sending ${control_syns} TCP SYN segments to ${dst_addr}:${port} in the background."
59 | timeout 5 hping3-custom -n -c "$control_syns" -i u13000 -q -S -s 10000 -p ${port} ${dst_addr} &
60 | 
61 | log "Now waiting ${timeout}s for final SYN/ACKs to arrive."
62 | if [ $prober_type = "censored" ]
63 | then
64 | 	sleep "$timeout"
65 | else
66 | 	sleep 2
67 | 	log "Sending 3 control SYNs to estimate destination's original backlog size."
68 | 	timeout 5 hping3-custom -n -c 3 -i u13000 -q -S -s 20000 -p ${port} ${dst_addr} &
69 | 	sleep "$(($timeout - 2))"
70 | fi
71 | 
72 | log "Removing iptables rule."
73 | iptables -D OUTPUT -d ${dst_addr} -p tcp --tcp-flags RST RST -j DROP
74 | 
75 | log "Terminating tcpdump."
76 | if [ ! -z "$pid" ]
77 | then
78 | 	kill "$pid"
79 | 	log "Sent SIGTERM to PID ${pid}."
80 | fi
81 | 
82 | log "Experimental results written to: ${outfile}"
83 | 


--------------------------------------------------------------------------------
/traceroute.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # Copyright 2013, 2014 Philipp Winter <phw@nymity.ch>
 4 | 
 5 | source log.sh
 6 | 
 7 | if [ "$#" -lt 1 ]
 8 | then
 9 | 	echo
10 | 	echo "Usage: $0 RELAY_LIST [OUTPUT_DIR]"
11 | 	echo
12 | 	echo "The file \"RELAY_LIST\" must contain one IP:port tuple on every line."
13 | 	echo
14 | 	exit 1
15 | fi
16 | 
17 | relaylist="$1"
18 | # Check if optional argument is given.
19 | if [ ! -z "$2" ]
20 | then
21 | 	outdir="$2"
22 | 	if [ ! -d $outdir ]
23 | 	then
24 | 		log "Creating directory \"${outdir}\"."
25 | 		mkdir -p $outdir
26 | 	fi
27 | else
28 | 	outdir="$(mktemp -d '/tmp/traceroutes-XXXXXX')"
29 | fi
30 | count=1
31 | all=$(wc -l $relaylist)
32 | all=(${all// / })
33 | all=${all[0]}
34 | 
35 | timestamp() {
36 | 	local file="$1"
37 | 	echo "Date in UTC: $(date -u --rfc-3339=ns)" >> $file
38 | }
39 | 
40 | for relay in $(cat $relaylist)
41 | do
42 | 	# Parse the input which is in the format of IP:port.
43 | 	tuple=(${relay//:/ })
44 | 	ip=${tuple[0]}
45 | 	port=${tuple[1]}
46 | 	mkdir -p "${outdir}/${ip}:${port}"
47 | 
48 | 	filebase="${outdir}/${ip}:${port}/$(date -u +'%F.%T')_traceroute_nontor"
49 | 
50 | 	log "Beginning traceroutes ${count} of ${all} to ${ip}:${port}."
51 | 	count=$((${count} + 1))
52 | 
53 | 	log "Running TCP traceroutes to ${ip}:${port} in the background."
54 | 	timestamp "${filebase}_tcp"
55 | 	traceroute -T -O ack -n -w 3 -p $port $ip >> "${filebase}_tcp" 2>&1 &
56 | 
57 | 	log "Running ICMP traceroutes to ${ip}:${port} in the background."
58 | 	timestamp "${filebase}_icmp"
59 | 	traceroute -I -n -w 3 $ip >> "${filebase}_icmp" 2>&1 &
60 | 
61 | 	log "Writing results to \"${filebase}_{tcp,icmp}\"."
62 | done
63 | 


--------------------------------------------------------------------------------
/traceroute_host.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # Copyright 2013, 2014 Philipp Winter <phw@nymity.ch>
 4 | 
 5 | source log.sh
 6 | 
 7 | if [ "$#" -lt 2 ]
 8 | then
 9 | 	echo
10 | 	echo "Usage: $0 DST_ADDR DST_PORT [OUTPUT_DIR]"
11 | 	echo
12 | 	exit 1
13 | fi
14 | 
15 | ip_addr="$1"
16 | port="$2"
17 | 
18 | # Check if optional argument is given.
19 | if [ ! -z "$3" ]
20 | then
21 | 	outdir="$3"
22 | 	if [ ! -d $outdir ]
23 | 	then
24 | 		log "Creating directory \"${outdir}\"."
25 | 		mkdir -p $outdir
26 | 	fi
27 | else
28 | 	outdir="$(mktemp -d '/tmp/traceroutes-XXXXXX')"
29 | fi
30 | 
31 | timestamp() {
32 | 	local file="$1"
33 | 	printf "Date in UTC: $(date -u --rfc-3339=ns)\n" >> $file
34 | }
35 | 
36 | # Parse the input which is in the format of IP:port.
37 | filebase="${outdir}/$(date -u +'%F.%T')_traceroute"
38 | 
39 | log "Running TCP traceroute to ${ip_addr}:${port} in the background."
40 | timestamp "${filebase}_tcp"
41 | timeout 10 traceroute -T -O ack -n -w 3 -p $port $ip_addr >> "${filebase}_tcp" 2>&1 &
42 | 
43 | log "Running ICMP traceroute to ${ip_addr} in the background."
44 | timestamp "${filebase}_icmp"
45 | timeout 10 traceroute -I -n -w 3 $ip_addr >> "${filebase}_icmp" 2>&1 &
46 | 
47 | log "Writing results to \"${filebase}_{tcp,icmp}\"."
48 | 


--------------------------------------------------------------------------------