├── ark-tools ├── rb-asfinder-0.10.1.gem ├── rb-wartslib-1.4.2.gem ├── simple_warts.rb └── warts-aspaths ├── .gitignore ├── requirements.txt ├── by_country.sql ├── by_monitor.sql ├── fetch.sh ├── README.md ├── parse_trace.py ├── process.py └── ark-monitors-20160322.txt /ark-tools/rb-asfinder-0.10.1.gem: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/quartz/ark/master/ark-tools/rb-asfinder-0.10.1.gem -------------------------------------------------------------------------------- /ark-tools/rb-wartslib-1.4.2.gem: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/quartz/ark/master/ark-tools/rb-wartslib-1.4.2.gem -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.py[co] 2 | .DS_Store 3 | ark 4 | routing 5 | GeoLite2-City.mmdb 6 | data.caida.org 7 | trace.csv 8 | asnames.txt 9 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | envoy>=0.0.3 2 | psycopg2>=2.6.1 3 | maxminddb>=1.2.0 4 | git+https://github.com/jsommers/pytricia.git#egg=pytricia 5 | -------------------------------------------------------------------------------- /by_country.sql: -------------------------------------------------------------------------------- 1 | select 2 | country, 3 | count(*), 4 | round(avg(rtt)::numeric, 1) as avg_rtt, 5 | round(avg(ip_hops), 1) as avg_ip_hops, 6 | round(avg(as_hops), 1) as avg_as_hops 7 | from traces 8 | group by country 9 | order by avg_as_hops desc 10 | -------------------------------------------------------------------------------- /by_monitor.sql: -------------------------------------------------------------------------------- 1 | select name, location, org_class count_Traces, avg_rtt from monitors 2 | right join ( 3 | select monitor_name, count(*) as count_traces, avg(rtt) as avg_rtt 4 | from traces 5 | group by monitor_name 6 | ) traces on traces.monitor_name = monitors.name 7 | order by avg_rtt desc 8 | 9 | -------------------------------------------------------------------------------- /fetch.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | for i in $(seq -f "%02g" 1 31); 4 | do 5 | for t in $(seq 1 3); 6 | do 7 | wget -r --no-parent -nc -R "index.html*" http://data.caida.org/datasets/topology/ark/ipv4/probe-data/team-$t/2014/cycle-201403$i/ 8 | done 9 | done 10 | 11 | wget -r --no-parent -nc -R "index.html*" http://data.caida.org/datasets/routing/routeviews-prefix2as/2014/03/ 12 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ark 2 | 3 | Tools for processing traceroute data from CAIDA's [Ark project](http://www.caida.org/projects/ark/). 4 | 5 | Tools in the `ark-tools` folder were provided by CAIDA's [Young Hyun](http://www.caida.org/~youngh/). 6 | 7 | ## Setup 8 | 9 | ``` 10 | mkvirtualenv ark 11 | pip install -r requirements.txt 12 | 13 | cd ark-tools 14 | gem install rb-asfinder-0.10.1.gem rb-wartslib-1.4.2.gem 15 | cd .. 16 | ``` 17 | 18 | This project also requires a running, local instance of Postgres with a no-password user named `ark` who owns a geo-enabled database named `ark`: 19 | 20 | ``` 21 | createdb -O ark ark 22 | psql -q ark -c "CREATE EXTENSION postgis;" 23 | ``` 24 | 25 | ## Sourcing the data 26 | 27 | This script uses data from CAIDA's [Ark IPv4 Routed /24 Topology Dataset](http://www.caida.org/data/active/ipv4_routed_24_topology_dataset.xml). The following script will will download all data, for all three monitoring teams, for every day in March of 2014. Caution: **This is 87GB of data**. The script can be stopped and started without starting over. 28 | 29 | ``` 30 | ./fetch.sh 31 | ``` 32 | 33 | You will also need to download the following files to the root project directory: 34 | 35 | * [MaxMind GeoLite2 Country database](http://geolite.maxmind.com/download/geoip/database/GeoLite2-City.mmdb.gz) (unzip it) 36 | * [BGP Reports mapping of AS numbers to names](http://bgp.potaroo.net/as6447/asnames.txt) 37 | 38 | ## Building the database 39 | 40 | Warning: This script will run for around a half hour **per day of data**. If you're loading a month of data it could easily take a full day. (Assuming you even have the disk space to hold it all.) 41 | 42 | ``` 43 | ./process.py 44 | ``` 45 | 46 | ## Running queries 47 | 48 | ``` 49 | cat by_country.sql | psql -q ark 50 | cat by_monitor.sql | psql -q ark 51 | ``` 52 | 53 | ## Analyzing a trace path 54 | 55 | You can pass a `trace` from the database into `parse_trace.py` to generate detailed path data in CSV format: 56 | 57 | ``` 58 | ./parse_trace.py "216.66.30.102:6939,216.66.30.101:6939,213.248.67.125:1299,213.155.130.34:1299,157.130.60.13:701,:q,:r,108.51.141.48:701" > nyc_to_dc.csv 59 | ``` 60 | -------------------------------------------------------------------------------- /parse_trace.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import csv 4 | import sys 5 | 6 | import maxminddb 7 | 8 | 9 | MAXMIND = maxminddb.open_database('GeoLite2-City.mmdb') 10 | 11 | 12 | def main(): 13 | as_lookup = load_asnames() 14 | 15 | trace = sys.argv[1] 16 | rows = [] 17 | 18 | for pair in trace.split(','): 19 | row = {} 20 | 21 | row['ip'], row['asn'] = pair.split(':') 22 | 23 | if row['asn'] in ['r', 'q']: 24 | row['asn_name'], row['asn_country'] = None, None 25 | else: 26 | try: 27 | row['asn_name'], row['asn_country'] = as_lookup[row['asn']] 28 | except: 29 | print('No ASN data for %s' % row['asn']) 30 | row['asn_name'], row['asn_country'] = None, None 31 | 32 | if row['ip']: 33 | row.update(geocode(row['ip'])) 34 | 35 | rows.append(row) 36 | 37 | with open('trace.csv', 'w') as f: 38 | writer = csv.DictWriter(sys.stdout, fieldnames=['ip', 'country', 'subdivision', 'city', 'lat', 'lng', 'asn', 'asn_name', 'asn_country']) 39 | writer.writeheader() 40 | writer.writerows(rows) 41 | 42 | 43 | def load_asnames(): 44 | """ 45 | Format: 46 | 47 | AS0 -Reserved AS-, ZZ 48 | AS1 LVLT-1 - Level 3 Communications, Inc., US 49 | """ 50 | as_lookup = {} 51 | 52 | with open('asnames.txt', encoding='latin-1') as f: 53 | for line in f: 54 | if '--No Registry Entry--' in line: 55 | continue 56 | 57 | asn = line[:14].strip().replace('AS', '') 58 | name, country = line[14:].strip().rsplit(',', 1) 59 | 60 | as_lookup[asn] = (name, country) 61 | 62 | return as_lookup 63 | 64 | 65 | def geocode(ip): 66 | loc = MAXMIND.get(ip) 67 | data = {} 68 | 69 | if loc: 70 | if 'country' in loc: 71 | data['country'] = loc['country']['names']['en'] 72 | 73 | if 'subdivisions' in loc: 74 | data['subdivision'] = loc['subdivisions'][0]['names']['en'] 75 | 76 | if 'city' in loc: 77 | data['city'] = loc['city']['names']['en'] 78 | 79 | if 'location' in loc: 80 | data['lat'] = loc['location']['latitude'] 81 | data['lng'] = loc['location']['longitude'] 82 | 83 | return data 84 | 85 | 86 | if __name__ == '__main__': 87 | main() 88 | -------------------------------------------------------------------------------- /process.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import csv 4 | from datetime import date 5 | import os 6 | import psycopg2 7 | 8 | import envoy 9 | import maxminddb 10 | 11 | 12 | BIN_PATH = 'ark-tools/simple_warts.rb' 13 | MAXMIND = maxminddb.open_database('GeoLite2-City.mmdb') 14 | 15 | 16 | def main(): 17 | """ 18 | Parse everything! Write results to a single output file. 19 | """ 20 | db = psycopg2.connect(dbname='ark', user='ark') 21 | cursor = db.cursor() 22 | 23 | cursor.execute('DROP TABLE IF EXISTS monitors CASCADE;') 24 | cursor.execute('''CREATE TABLE monitors ( 25 | name char(8) primary key, 26 | ip char(15), 27 | location varchar, 28 | lat real, 29 | lng real, 30 | asn char(12), 31 | org_class varchar, 32 | org_name varchar, 33 | geom GEOMETRY(Point, 4326) 34 | ); 35 | ''') 36 | 37 | cursor.execute('''CREATE INDEX monitors_gix 38 | ON monitors 39 | USING GIST (geom); 40 | ''') 41 | 42 | # REFERENCES monitors (name) 43 | cursor.execute('DROP TABLE IF EXISTS traces;') 44 | cursor.execute('''CREATE TABLE traces ( 45 | id serial primary key, 46 | probe_date date, 47 | monitor_name char(8), 48 | monitor_ip char(15), 49 | monitor_as varchar, 50 | dest_ip char(15), 51 | dest_as varchar, 52 | country varchar, 53 | subdivision varchar, 54 | city varchar, 55 | lat real, 56 | lng real, 57 | rtt real, 58 | ip_hops integer, 59 | as_hops integer, 60 | trace varchar, 61 | geom GEOMETRY(Point, 4326) 62 | ); 63 | ''') 64 | 65 | cursor.execute('''CREATE INDEX traces_gix 66 | ON traces 67 | USING GIST (geom); 68 | ''') 69 | 70 | load_monitors(db) 71 | 72 | for day in range(1, 7): 73 | d = { 74 | 'year': 2014, 75 | 'month': 3, 76 | 'day': day 77 | } 78 | 79 | print(d) 80 | 81 | parse_date(d, db) 82 | 83 | db.close() 84 | 85 | 86 | def load_monitors(db): 87 | """ 88 | Parse data on Ark monitors. 89 | """ 90 | cursor = db.cursor() 91 | 92 | with open('ark-monitors-20160322.txt') as f: 93 | reader = csv.reader(f, delimiter='|') 94 | next(reader) 95 | 96 | for row in reader: 97 | data = ', '.join(['\'%s\'' % r for r in row]) 98 | data += ', ST_GeomFromText(\'POINT(%s %s)\', 4326)' % (row[4], row[3]) 99 | 100 | cursor.execute(''' 101 | INSERT INTO monitors 102 | VALUES (%s)''' % data) 103 | 104 | db.commit() 105 | 106 | 107 | def parse_date(d, db): 108 | """ 109 | Parse all Ark files for a single day. 110 | """ 111 | routing_path = 'data.caida.org/datasets/routing/routeviews-prefix2as/%(year)d/%(month)02d/routeviews-rv2-%(year)d%(month)02d%(day)02d-1200.pfx2as.gz' % d 112 | 113 | for team in range(1, 4): 114 | print('team-%i' % team) 115 | 116 | d['team'] = team 117 | 118 | ark_root = 'data.caida.org/datasets/topology/ark/ipv4/probe-data/team-%(team)i/2014/cycle-%(year)d%(month)02d%(day)02d/' % d 119 | 120 | if not os.path.exists(ark_root): 121 | continue 122 | 123 | for filename in os.listdir(ark_root): 124 | print(filename) 125 | 126 | ark_path = os.path.join(ark_root, filename) 127 | monitor_name = filename.split('.')[-3].strip() 128 | 129 | cmd = '%(bin)s %(routes)s %(warts)s' % { 130 | 'bin': BIN_PATH, 131 | 'routes': routing_path, 132 | 'warts': ark_path 133 | } 134 | 135 | r = envoy.run(cmd) 136 | 137 | parse_ark(monitor_name, date(d['year'], d['month'], d['day']), r.std_out, db) 138 | 139 | 140 | def parse_ark(monitor_name, probe_date, ark_text, db): 141 | """ 142 | Parse Ark text format and stream results into a CSV writer. 143 | """ 144 | cursor = db.cursor() 145 | 146 | monitors = [] 147 | 148 | for line in ark_text.splitlines(): 149 | fields = line.strip().split('\t') 150 | 151 | row = { 152 | 'monitor_name': monitor_name, 153 | 'probe_date': probe_date, 154 | 'monitor_ip': fields[0], 155 | 'monitor_as': fields[1], 156 | 'dest_ip': fields[2], 157 | 'dest_as': fields[3], 158 | 'country': None, 159 | 'subdivision': None, 160 | 'city': None, 161 | 'lat': None, 162 | 'lng': None, 163 | 'rtt': fields[4], 164 | 'ip_hops': 0, 165 | 'as_hops': 0, 166 | 'trace': ','.join(fields[5:]), 167 | 'geom': None 168 | } 169 | 170 | last_asn = None 171 | 172 | for field in fields[5:]: 173 | ip, asn = field.split(':') 174 | row['ip_hops'] += 1 175 | 176 | if asn in ['q', 'r', last_asn]: 177 | continue 178 | 179 | row['as_hops'] += 1 180 | last_asn = asn 181 | 182 | loc = MAXMIND.get(row['dest_ip']) 183 | 184 | if loc: 185 | if 'country' in loc: 186 | row['country'] = loc['country']['names']['en'] 187 | 188 | if 'subdivisions' in loc: 189 | row['subdivision'] = loc['subdivisions'][0]['names']['en'] 190 | 191 | if 'city' in loc: 192 | row['city'] = loc['city']['names']['en'] 193 | 194 | if 'location' in loc: 195 | row['lat'] = loc['location']['latitude'] 196 | row['lng'] = loc['location']['longitude'] 197 | row['geom'] = 'POINT(%s %s)' % (row['lng'], row['lat']) if row['lng'] and row['lat'] else None 198 | 199 | cursor.execute(''' 200 | INSERT INTO traces (probe_date, monitor_name, monitor_ip, monitor_as, dest_ip, dest_as, country, subdivision, city, lat, lng, rtt, ip_hops, as_hops, trace, geom) 201 | VALUES (%(probe_date)s, %(monitor_name)s, %(monitor_ip)s, %(monitor_as)s, %(dest_ip)s, %(dest_as)s, %(country)s, %(subdivision)s, %(city)s, %(lat)s, %(lng)s, %(rtt)s, %(ip_hops)s, %(as_hops)s, %(trace)s, ST_GeomFromText(%(geom)s, 4326));''' 202 | , row) 203 | 204 | db.commit() 205 | 206 | if __name__ == '__main__': 207 | main() 208 | -------------------------------------------------------------------------------- /ark-tools/simple_warts.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | 3 | require 'rubygems' 4 | require 'ostruct' 5 | require 'optparse' 6 | 7 | require 'wartslib' 8 | require 'asfinder' 9 | 10 | def main 11 | $asfinder = CAIDA::ASFinder.new ARGV[0] 12 | 13 | file = Warts::File.open ARGV[1] 14 | file.add_filters Warts::TRACE 15 | 16 | file.read do |trace| 17 | next unless trace.dest_responded? 18 | 19 | ippath = extract_ippath trace 20 | pairs = [] 21 | 22 | ippath.each do |ip| 23 | if ip == nil 24 | pairs << ['', 'q'] 25 | end 26 | 27 | asn = find_all_ases(ip) 28 | 29 | asn = 'r' if asn == nil 30 | asn = 'm' if asn.instance_of?(Array) 31 | 32 | pairs << [ip, asn] 33 | end 34 | 35 | monitor_ip = ippath[0][0] 36 | monitor_as = ippath[0][1] 37 | dest_ip = trace.dst 38 | dest_asn, pfx, len = $asfinder.get_as(dest_ip) 39 | rtt = trace.dest_rtt 40 | 41 | line = "#{monitor_ip}\t#{monitor_as}\t#{dest_ip}\t#{dest_asn}\t#{rtt}" 42 | 43 | pairs.each do |ip, asn| 44 | line.concat("\t#{ip}:#{asn}") 45 | end 46 | 47 | puts line 48 | end 49 | end 50 | 51 | # This tries to behave like skitter_as_links for edge cases. For example, 52 | # if there is a response from the destination, then this only returns IP 53 | # hops up to the hop position of the response from the destination. (Yes, 54 | # strange enough, there can be hops past the hop position of the 55 | # destination.) This includes all responses at the hop position of the 56 | # destination, even if the responses weren't from the destination. 57 | # 58 | # This doesn't return the trailing gap. So you can assume that if you 59 | # encounter a nil element before reaching the end of the result array, then 60 | # there will always be a non-nil element before reaching the end of the 61 | # array. 62 | # 63 | # This method may differ from skitter_as_links in the handling of traces with 64 | # loops. It's possible skitter_as_links simply ignores traces with loops 65 | # (I need to investigate more), but this script doesn't. However, in order 66 | # to properly generate AS links from traces with loops, we have to ignore the 67 | # hops in the loop. For example, suppose the IP path looks like the 68 | # following: 69 | # 70 | # 1 2 3 L 4 5 L 71 | # 72 | # Then the loop hops are 'L 4 5 L'. We should ignore all hops after 73 | # the first occurrence of L, since the 'L 4' IP link is most likely false 74 | # (and the remaining IP links in the loop are most likely redundant anyway). 75 | # 76 | # -------------------------------------------------------------------------- 77 | # 78 | # This returns an array with hop addresses at the corresponding array 79 | # positions (the source is at index 0). If there is more than one address 80 | # at a given hop position, then this will use a subarray to hold all the 81 | # addresses. Otherwise, this stores a single address directly as a 82 | # dotted-decimal string. If there isn't a response at a given hop position, 83 | # then the corresponding array location will contain a nil. 84 | def extract_ippath(trace) 85 | retval = [trace.src] 86 | 87 | dest_response = trace.find_dest_response 88 | trace.each_hop_and_response do |hop, response, exists| 89 | next unless exists 90 | break if dest_response && hop > dest_response[0] 91 | 92 | index = hop + 1 93 | hop_addr = trace.hop_addr hop, response 94 | 95 | if retval[index] 96 | if retval[index].instance_of? Array 97 | retval[index] << hop_addr 98 | else 99 | retval[index] = [retval[index], hop_addr] 100 | end 101 | else 102 | retval[index] = hop_addr 103 | end 104 | end 105 | 106 | if trace.loops > 1 || trace.stop_reason == Warts::Trace::STOP_LOOP 107 | stopping_loop = find_stopping_loop(find_all_loops(retval)) 108 | if stopping_loop 109 | start_index = stopping_loop[0] 110 | truncated_length = start_index + 1 111 | retval.pop while retval.length > truncated_length 112 | 113 | # maintain invariant that path will not end in any nil's 114 | retval.pop while !retval.empty? && retval[-1].nil? 115 | end 116 | end 117 | 118 | retval 119 | end 120 | 121 | # Returns an array of all loops found in the given IP path. 122 | # 123 | # You can apply this method to any trace, including traces without loops. 124 | # This returns an empty array if there are no loops. Otherwise, each array 125 | # element specifies [ start_index, length, address ]. 126 | # 127 | # The start index is the 0-based starting position of a loop. 128 | # A loop that appears in adjacent hops (e.g., 'B B' in 'A B B C') has 129 | # length 1. A loop like 'B D B' has length 2. 130 | # 131 | # A path like 'A B B B' has two loops, both of length 1, starting at 132 | # indexes 1 and 2; that is, [1, 1, B] and [2, 1, B]. 133 | # 134 | # Multiple responses at a hop don't affect the determination of loops in 135 | # a path. That is, for the purposes of determining loops, it's as if there 136 | # were only one instance of each address at any given hop. 137 | def find_all_loops(ippath) 138 | retval = [] # [ [ start_index, length, address ] ] 139 | 140 | last_index = {} # IP address => index of last occurrence of the address 141 | ippath.each_with_index do |addresses, index| 142 | next unless addresses 143 | 144 | (addresses.instance_of?(Array) ? addresses.uniq : [addresses]) 145 | .each do |address| 146 | if last_index[address] 147 | length = index - last_index[address] 148 | retval << [last_index[address], length, address] 149 | end 150 | last_index[address] = index 151 | end 152 | end 153 | 154 | retval.sort # mainly to order by starting index 155 | end 156 | 157 | # Finds the loop, if any, that would have caused a trace to stop for 158 | # 'scamper -L 1 -l 1'. This permits at most 1 loop of length 1, and no 159 | # loops of any longer length. 160 | # 161 | # You can apply this method to any trace, including traces without loops. 162 | # Returns nil if there is no stopping loop. 163 | def find_stopping_loop(loops) 164 | saw_length1_loop = false 165 | loops.each do |loop| 166 | start_index, length, address = loop 167 | if length == 1 168 | return loop if saw_length1_loop 169 | saw_length1_loop = true 170 | else 171 | return loop 172 | end 173 | end 174 | nil 175 | end 176 | 177 | # Uses ASFinder to find the AS(es) for the given address or array of 178 | # addresses. 179 | # 180 | # If a single address is given to this method and that address doesn't have 181 | # a matching AS, then this returns nil. If an array of addresses is given 182 | # to this method and none of the addresses has a matching AS, then this 183 | # returns nil. If an array of addresses is given and an AS could be found 184 | # for at least one address, then this returns an array of only the ASes 185 | # that could be found (that is, this won't return an array containing nil's). 186 | # 187 | # NOTE: For some strange reason, a small fraction of collected traces have 188 | # hops with a large number of responses (hundreds or, very rarely, 189 | # thousands) from either the same IP address or different IP 190 | # addresses (that may map to different ASes). In order to avoid 191 | # slowdowns in subsequent processing of AS paths, this returns only 192 | # the unique AS(es). 193 | def find_all_ases(addresses) 194 | if addresses.instance_of? Array 195 | retval = [] 196 | addresses.each_with_index do |address, _index| 197 | as = find_as(address) 198 | retval << as if as 199 | end 200 | 201 | retval.uniq! 202 | return nil if retval.empty? 203 | return (retval.length == 1 ? retval[0] : retval) 204 | else 205 | return find_as(addresses) 206 | end 207 | end 208 | 209 | def find_as(address) 210 | unless $special_asf.nil? 211 | as, prefix, len = $special_asf.get_as address 212 | return 's' unless as.nil? 213 | end 214 | as, prefix, len = $asfinder.get_as address 215 | as 216 | end 217 | 218 | main 219 | -------------------------------------------------------------------------------- /ark-monitors-20160322.txt: -------------------------------------------------------------------------------- 1 | # monitor|ip_address|geographic_location|geographic_latitude|geographic_longitude|as_number|organization_classification|organization_name 2 | aal-dk|62.107.66.33|Aalborg, DK|57.0488195|9.921747|197288|residential|Residential (STOFA) 3 | acc-gh|196.49.14.12|Accra, GH|5.6037168|-0.1869644|30997|infrastructure|Ghana Internet eXchange (GIX) 4 | adl-au|203.122.229.71|Adelaide, AU|-34.9286212|138.5999594|4739|residential|Residential (Internode) 5 | aep-ar|157.92.44.99|Buenos Aires, AR|-34.6036844|-58.3815591|3449|educational|Universidad de Buenos Aires 6 | ams-nl|192.42.115.98|Amsterdam, NL|52.3702157|4.8951679|1103|research|SURFnet 7 | ams2-nl|91.200.16.102|Amsterdam, NL|52.3702157|4.8951679|1200|infrastructure|AMS-IX 8 | ams5-nl|213.46.228.229|Amsterdam, NL|52.3702157|4.8951679|6830|commercial|Liberty Global 9 | ams6-nl|94.213.38.151|Amsterdam, NL|52.3702157|4.8951679|9143|residential|Residential (Ziggo) 10 | amw-us|129.186.1.240|Ames, IA, US|42.02335|-93.625622|2698|educational|Iowa State University 11 | anc-us|65.74.16.163|Anchorage, AK, US|61.2180556|-149.9002778|8047|commercial|GCI 12 | arn-se|194.68.13.6|Stockholm, SE|59.3327881|18.0644881|2603|research|NORDUnet 13 | ath-gr|77.49.101.239|Peania, GR|37.9553334|23.8522263|1241|residential|Residential (Forthnet) 14 | atl-us|65.50.110.191|Atlanta, GA, US|33.7489954|-84.3879824|31939|residential|Residential (DirectTV) 15 | avl-us|68.115.154.254|Lake Junaluska, NC, US|35.5278815|-82.9595811|20115|residential|Residential (Charter Business DSL) 16 | aza-us|184.98.79.143|Gilbert, AZ, US|33.3528264|-111.789027|209|residential|Residential (Century Link DSL) 17 | bbu-ro|5.2.151.201|Bucharest, RO|44.4267674|26.1025384|8708|residential|Residential (RCS/RDS)) 18 | bcn-es|84.88.81.122|Barcelona, ES|41.387917|2.1699187|13041|educational|Universitat Politecnica de Catalunya 19 | bed-us|50.138.170.121|Concord, MA, US|42.4602778|-71.3494444|7922|residential|Residential (Comcast Cable) 20 | bed2-us|74.104.191.187|Groton, MA, US|42.6112018|-71.5745152|701|residential|Residential (Verizon Fios) 21 | bed3-us|146.115.72.2|Lexington, MA, US|42.4430372|-71.2289641|6079|residential|Residential (RCN) 22 | beg-rs|193.105.163.184|Belgrade, RS|44.786568|20.4489216|13004|infrastructure|Serbian Open eXchange (SOX) 23 | bfi-us|75.165.54.183|Seattle, WA, US|47.6062095|-122.3320708|209|residential|Residential (Centurylink GPON) 24 | bjc-us|209.245.28.50|Broomfield, CO, US|39.9205411|-105.0866504|3356|commercial|Level 3 Communications 25 | bjl-gm|196.46.233.22|Serrekunda, GM|13.438611|-16.6816667|37309|business|QCell 26 | blr-in|122.166.213.105|Bangaluru, IN|12.9715987|77.5945627|24560|residential|Residential (Airtel) 27 | bma-se|195.206.248.254|Kista, SE|59.4024802|17.9443237|49770|business|Acreo 28 | bma2-se|46.39.112.232|Stockholm, SE|59.3293235|18.0685808|50821|residential|Residential (Stockholms Stadsnat) 29 | bre-de|77.22.2.165|Bremen, DE|53.0792962|8.8016937|31334|residential|Residential (Kabel Deutschland) 30 | bre2-de|212.201.44.87|Bremen, DE|53.0792962|8.8016937|680|educational|Jacobs University Bremen 31 | bre3-de|95.90.200.110|Bremen, DE|53.0792962|8.8016937|31334|residential|Residential (Kabel Deutschland) 32 | bud2-hu|157.181.172.123|Budapest, HU|47.497912|19.040235|2012|educational|Eötvös Loránd University (ELTE) 33 | bwi-us|204.235.64.14|Aberdeen, MD, US|39.5095556|-76.1641197|668|research|US Army Research Lab 34 | cbg-uk|128.232.97.9|Cambridge, UK|52.2025441|0.1312368|786|educational|University of Cambridge 35 | cbr-au|203.16.208.142|Red Hill, AU|-38.378544|145.0320635|4739|residential|Residential (Internode) 36 | cdg-fr|85.31.196.71|Boulogne Billancourt, FR|48.84325|2.237803|30781|commercial|Jaguar Network 37 | cgk-id|202.180.0.200|Jakarta, ID|-6.211544|106.845172|7587|research|Indonesian IPv6 Task Force 38 | cjj-kr|150.183.95.135|Daejeon, KR|36.3504119|127.3845475|1237|research|KREONet2 39 | cld2-us|76.238.237.79|San Diego, CA, US|32.715738|-117.1610838|7018|residential|Residential (ATT U-verse) 40 | cmn-ma|196.200.131.131|Casablanca, MA|33.605381|-7.631949|30983|research|CNRST 41 | cph-dk|94.126.177.65|Ballerup, DK|55.724508|12.354765|59469|commercial|Solido Networks ApS 42 | dac-bd|119.40.82.245|Dhaka, BD|23.709921|90.407143|24122|commercial|BDCOM Online Limited at BD-IX 43 | dar-tz|196.49.5.66|Dar es Salaam, TZ|-6.792354|39.2083284|33791|infrastructure|Tanzania Internet Exchange (TIX) 44 | dfw-us|129.119.99.169|Dallas, TX, US|32.802955|-96.769923|1832|educational|Southern Methodist University 45 | dkr-sn|196.1.95.8|Dakar, SN|14.75|-17.3333333|8346|educational|UCAD 46 | dub-ie|193.1.193.136|Dublin, IE|53.34156|-6.257347|1213|research|HEANet 47 | eug-us|128.223.157.8|Eugene, OR, US|44.0520691|-123.0867536|3582|educational|University of Oregon 48 | fnl-us|129.82.138.43|Fort Collins, CO, US|40.5852602|-105.084423|12145|educational|Colorado State University 49 | gai-us|73.132.73.177|Rockville, MD, US|39.0839973|-77.1527578|7922|residential|Residential (Comcast) 50 | gig-br|200.159.255.29|Rio de Janeiro, BR|-22.9035393|-43.2095869|1916|research|RNP 51 | gva-ch|46.20.241.26|Geneva, CH|46.1983922|6.1422961|25091|commercial|IP-Max SA 52 | hel-fi|195.148.124.66|Espoo, FI|60.2052352|24.6540814|1741|educational|TKK 53 | her-gr|139.91.90.6|Heraklion, Crete, GR|35.329162|25.138526|8522|research|Foundation for Research and Technology - Hellas (FORTH) 54 | hkg-cn|213.254.227.26|Hong Kong, CN|22.2478599|114.2033843|3257|commercial|Tinet 55 | hkg2-cn|158.132.255.50|Hong Kong, CN|22.2783151|114.174695|4616|educational|Hong Kong Polytechnic University 56 | hkg3-cn|124.244.239.36|Hong Kong, CN|22.2783151|114.174695|9269|residential|Residential (Hong Kong Broadband Network, HKBN) 57 | hla-za|168.210.11.119|Johannesburg, ZA|-26.2041028|28.0473051|3741|residential|Residential 58 | hlz-nz|130.217.77.6|Hamilton, NZ|-37.7870012|175.279253|681|educational|University of Waikato 59 | hnl-us|205.166.205.222|Honolulu, HI, US|21.3069444|-157.8583333|6360|educational|University of Hawaii 60 | iad-us|192.149.252.140|Chantilly, VA, US|38.8942786|-77.4310992|10745|infrastructure|ARIN 61 | igx-us|104.169.173.127|Durham, NC, US|35.9940329|-78.898619|5650|residential|Residential (Frontier DSL) 62 | ith-us|74.79.78.80|Ithaca, NY, US|42.4439614|-76.5018807|11351|residential|Residential (Time Warner Cable) 63 | jfk-us|216.66.30.102|New York, NY, US|40.7143528|-74.0059731|6939|commercial|Hurricane Electric 64 | jlp-fr|78.246.179.195|Juan Les Pins, FR|43.5691905|7.1123854|12322|residential|Residential (Bouygues Telecom) 65 | jnb-za|196.10.53.6|Johannesburg, ZA|-26.2041028|28.0473051|37474|infrastructure|Johannesburg Internet Exchange (JINX; IXP-ZA) 66 | kgl-rw|197.243.51.140|Kigali, RW|-1.950106|30.058769|37228|educational|Carnegie Mellon University in Rwanda 67 | kna-cl|200.104.79.143|Vina del Mar, CL|-33.024527|-71.55234|22047|residential|Residential (VTR broadband) 68 | ktm-np|202.52.0.24|Kathmandu, NP|27.7|85.3333333|45170|research|Nepal Research And Education Network 69 | lax-us|137.164.84.50|Los Angeles, CA, US|34.0522342|-118.2436849|2152|research|CENIC 70 | lax3-us|128.9.160.226|Marina del Rey, CA, US|33.9802893|-118.4517449|4|educational|Information Sciences Inst., USC 71 | lej-de|139.18.1.244|Leipzig, DE|51.3396731|12.3713639|680|educational|Universitat Leipzig 72 | lex-us|96.29.182.38|Lexington, KY, US|38.0405837|-84.5037164|10796|residential|Residential (Time Warner Cable) 73 | los-ng|197.211.51.180|Lagos, NG|6.5243793|3.3792057|37148|infrastructure|IXP of Nigeria (IXPN) 74 | lpi2-se|85.228.193.7|Linkoping, SE|58.410807|15.6213727|2119|residential|Residential (Bredbandsbolaget) 75 | mel-au|128.250.97.84|Melbourne, AU|-37.8131869|144.9629796|10148|educational|University of Melbourne 76 | mmx-se|212.85.88.39|Lund, SE|55.7046601|13.1910073|8473|residential|Residential (Bahnhof) 77 | mmx2-se|93.182.182.221|Lund, SE|55.7046601|13.1910073|47155|commercial|(colo at) ViaEuropa 78 | mnl-ph|202.90.158.5|Quezon City, PH|14.6760413|121.0437003|9821|research|ASTI 79 | mnz-us|173.79.11.131|Manassas, VA, US|38.7509488|-77.4752667|701|residential|Residential (Verizon Fios) 80 | mry-us|70.90.166.170|Carmel, CA, US|36.5552386|-121.9232879|7922|residential|Residential (Comcast Business Broadband) 81 | msy-us|98.164.70.214|New Orleans, LA, US|29.9667|-90.05|22773|residential|Residential (Cox Broadband) 82 | mty-mx|200.94.183.154|Monterrey, MX|25.673211|-100.309201|22894|infrastructure|NIC Mexico 83 | muc-de|131.159.14.83|Munich, DE|48.1351253|11.5819806|12816|educational|Technical University of Munich 84 | nap-it|143.225.229.226|Napoli, IT|40.8400969|14.2516357|137|educational|University of Napoli 85 | nbo-ke|80.240.194.133|Nairobi, KE|-1.2920659|36.8219462|30844|commercial|Kenia IXP (KIXP; Liquid Telecom) 86 | nce-fr|88.183.56.195|Biot, FR|43.626851|7.09808|12322|residential|Residential (Free.fr) 87 | nce2-fr|193.253.230.214|Biot, FR|43.626851|7.09808|3215|commercial|Eurocom 88 | nic-cy|91.184.204.124|Nicosia, CY|35.1666667|33.3666667|35432|commercial|Cablenet Communication Systems 89 | nrt-jp|203.181.248.51|Tokyo, JP|35.6894875|139.6917064|7660|research|APAN 90 | nrt2-jp|219.119.53.9|Tokyo, JP|35.6894875|139.6917064|2497|residential|Residential (Plala NTT broadband) 91 | oak-us|192.150.187.132|Berkeley, CA, US|37.8715926|-122.272747|25|research|International Computer Science Institute (ICSI) 92 | oak2-us|172.56.30.137|Berkeley, CA, US|37.8715926|-122.272747|21928|residential|Residential (T-mobile 3G) 93 | oak3-us|99.38.248.96|Berkeley, CA, US|37.8715926|-122.272747|7018|residential|Residential (ATT U-verse) 94 | ord-us|140.192.218.138|Chicago, IL, US|41.8781136|-87.6297982|20130_54728|educational|DePaul University 95 | ory-fr|82.230.229.30|Paris, FR|48.856614|2.3522219|12322|residential|Residential (Iliad/Free France) 96 | osl-no|128.39.36.95|Lysaker, NO|59.9131287|10.640499|224|research|Simula Research Laboratory 97 | osl2-no|109.189.94.222|Oslo, NO|59.9138688|10.7522454|2119|residential|Residential (Canal Digital) 98 | oua-bf|196.28.246.227|Ouagadougou, BF|12.3714277|-1.5196603|25543|infrastructure|Onatel 99 | pao-us|73.222.19.29|Palo Alto, CA, US|37.4418834|-122.1430195|33651|residential|Residential (Comcast) 100 | pek-cn|218.241.107.98|Beijing, CN|39.904214|116.407413|24151|infrastructure|CNNIC 101 | per-au|113.197.9.170|Perth, AU|-31.9528536|115.8573389|7575|research|AARNet 102 | pna-es|130.206.158.142|Pamplona, ES|42.8179879|-1.6441835|766|educational|Public University of Navarra 103 | pry-za|196.216.3.6|Pretoria, ZA|-25.73134|28.21837|33764|infrastructure|AFRINIC 104 | psa-it|82.58.148.121|La Spezia, IT|44.1024504|9.8240826|12874|residential|Residential (Telecom Italia) 105 | psa2-it|2.231.28.121|Pisa, IT|43.7228386|10.4016888|12874|residential|Residential (Fastweb S.p.A.) 106 | rno-us|134.197.113.5|Reno, NV, US|39.5296329|-119.8138027|3851|educational|University of Nevada at Reno 107 | san-us|192.172.226.247|San Diego, CA, US|32.7153292|-117.1572551|1909|research|CAIDA 108 | san2-us|174.65.116.24|San Diego, CA, US|32.7153292|-117.1572551|22773|residential|Residential (Cox Broadband) 109 | san3-us|184.184.112.254|San Diego, CA, US|32.7153292|-117.1572551|22773|business|HB Networks 110 | sao-br|200.160.7.159|Sao Paulo, BR|-23.5489433|-46.6388182|22548|infrastructure|Registro.br 111 | sao2-br|200.136.34.2|Sao Paulo, BR|-23.5489433|-46.6388182|1251|research|Rede ANSP / Projeto NARA 112 | scl-cl|200.27.115.62|Santiago, CL|-33.4166667|-70.55|27678|infrastructure|NIC Chile 113 | scl2-cl|200.83.2.200|Santiago, CL|-33.4691199|-70.641997|22047|commercial|VTR 114 | sea-us|128.208.4.133|Seattle, WA, US|47.6062095|-122.3320708|73|educational|University of Washington 115 | she-cn|202.118.7.140|Shenyang, CN|41.80572|123.43147|4538|educational|Northeastern University 116 | sin-sg|203.123.48.15|Singapore, SG|1.352083|103.819836|37989|business|DCS1 Pte Ltd 117 | sin2-sg|202.6.102.35|Singapore, SG|1.352083|103.819836|45494|commercial|Openmirrors.asia - at Equinix SG1 118 | sjc2-us|64.71.191.54|San Jose, CA, US|37.3393857|-121.8949555|6939|commercial|Hurricane Electric 119 | sju-pr|70.45.95.170|San Juan, PR|18.4663338|-66.1057217|36423|commercial|Liberty Global (San Juan Cable) 120 | snn-ie|193.1.101.205|Limerick, IE|52.6680204|-8.6304975|1213|educational|University of Limerick 121 | sof-bg|87.121.150.68|Sofia, BG|42.6977082|23.3218675|34224|commercial|Neterra Ltd 122 | sof2-bg|93.123.23.254|Sofia, BG|42.6977082|23.3218675|57463|infrastructure|NetIX 123 | sql-us|149.20.4.11|Redwood City, CA, US|37.4852152|-122.2363548|1280|business|Internet Systems Consortium 124 | stn-uk|86.149.128.217|Ipswich, UK|52.056736|1.14822|2856|residential|Residential (British Telecom) 125 | syd-au|202.158.196.137|Sydney, AU|-33.873651|151.2068896|7575|research|AARNet 126 | tij-mx|199.48.225.18|Tijuana, MX|32.5149469|-117.0382471|174|infrastructure|Mexican Internet Exchange (MIE) 127 | tpe-tw|211.79.61.157|Hsinchu, TW|24.8039455|120.9646866|7539|research|TWAREN 128 | tul-us|70.184.37.54|Tulsa, OK, US|36.1539816|-95.992775|22773|commercial|True Digital Security (Cox Communication) 129 | tul2-us|99.28.101.40|Tulsa, OK, US|36.1539816|-95.992775|7018|residential|Residential (ATT U-verse) 130 | vie-at|78.41.116.2|Vienna, AT|48.2081743|16.3738189|35492|commercial|FunkFeuer 131 | wbu-us|192.43.244.202|Boulder, CO, US|40.0149856|-105.2705456|194|research|NCAR 132 | wbu2-us|71.237.71.12|Boulder, CO, US|40.0149856|-105.2705456|7922|residential|Residential (Comcast Broadband) 133 | wlg-nz|121.73.231.114|Wellington, NZ|-41.2864603|174.776236|4768|residential|Residential (Vodafone NZ, High Speed Cable) 134 | wlg2-nz|219.89.126.240|Belmont, NZ|-41.1942599|174.9228953|4771|residential|Residential (Spark DSL) 135 | wlg3-nz|202.46.176.66|Wellington, NZ|-41.2864603|174.776236|38037|infrastructure|.nz Registry 136 | wvi-us|142.254.101.194|Santa Cruz, CA, US|36.9741171|-122.0307963|46375|residential|Residential (Sonic.net) 137 | ylk-ca|198.73.133.10|Barrie, ON,CA|44.3893556|-79.6903316|19764|educational|Georgian College 138 | yow-ca|192.231.228.5|Ottawa, ON, CA|45.4215296|-75.6971931|10570|infrastructure|Ottawa Internet Exchange 139 | yto-ca|205.189.33.78|Ottawa, CA|45.4215296|-75.6971931|53904|research|Canarie 140 | yul-ca|24.225.143.13|Montreal, CA|45.5086699|-73.5539925|5769|commercial|Residential (Videotron) 141 | yyz-ca|206.108.0.41|Toronto, ON, CA|43.66|-79.3|11670|infrastructure|TorIX 142 | zrh-ch|192.41.136.226|Zurich, CH|47.367347|8.5500025|559|educational|University of Zurich 143 | zrh2-ch|81.94.127.244|Zug, CH|47.1745887|8.513854|34288|educational|Kantonsschule Zug 144 | -------------------------------------------------------------------------------- /ark-tools/warts-aspaths: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | 3 | ############################################################################# 4 | ## Generates AS paths from warts traces. 5 | ## 6 | ## Output format: 7 | ## 8 | ## P monitor_index dest_RTT dest_ip dest_prefix dest_AS AS_path[0] AS_path[1] ... 9 | ## 10 | ## where AS_path[i] is an element of the AS path in the format 11 | ## 12 | ## AS_number:IP_hop_count 13 | ## 14 | ## For example, "1234:2" means 2 consecutive IP hops mapped to AS 1234. 15 | ## The "AS_number" is "q" when the underlying traceroute path didn't have a 16 | ## responding address at a given hop, and "r" when there was a responding 17 | ## address that couldn't be mapped to any AS using the supplied 18 | ## prefix-to-AS mapping file(s). 19 | ## 20 | ## -------------------------------------------------------------------------- 21 | ## Copyright (C) 2007-2012 The Regents of the University of California. 22 | ## 23 | ## This program is free software; you can redistribute it and/or modify 24 | ## it under the terms of the GNU General Public License as published by 25 | ## the Free Software Foundation; either version 2 of the License, or 26 | ## (at your option) any later version. 27 | ## 28 | ## This program is distributed in the hope that it will be useful, 29 | ## but WITHOUT ANY WARRANTY; without even the implied warranty of 30 | ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 31 | ## GNU General Public License for more details. 32 | ## 33 | ## You should have received a copy of the GNU General Public License 34 | ## along with this program; if not, write to the Free Software 35 | ## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 36 | ## 37 | ## $Id: warts-aspaths,v 1.6 2016/04/08 00:18:18 youngh Exp $ 38 | ############################################################################# 39 | 40 | require 'rubygems' 41 | require 'ostruct' 42 | require 'optparse' 43 | 44 | require 'wartslib' 45 | require 'asfinder' 46 | 47 | $options = OpenStruct.new 48 | $asfinder = nil 49 | $dst_asf = nil 50 | $special_asf = nil 51 | 52 | opts = OptionParser.new 53 | opts.banner = "Usage: warts-aspaths [options] ..." 54 | 55 | opts.on("-A", "--asfinder", "=FILE", 56 | "IP to prefix/AS mapping file in ASFinder format") do |v| 57 | $options.asfinder = v 58 | end 59 | 60 | opts.on("-D", "--dst-pfx2as", "=FILE", 61 | "IP to prefix/AS mapping file in ASFinder format for trace.dst") do |v| 62 | $options.dst_asf = v 63 | end 64 | 65 | opts.on("-S", "--special-pfx2as", "=FILE", 66 | "IP to prefix mapping file to identify use of special addresses") do |v| 67 | $options.special_asf = v 68 | end 69 | 70 | opts.on("-v", "--[no-]verbose", TrueClass, "show detailed progress") do |v| 71 | $options.verbose = v 72 | end 73 | 74 | begin 75 | ARGV.replace opts.parse(*ARGV) 76 | rescue OptionParser::ParseError 77 | $stderr.puts "ERROR: " + $!.to_s 78 | $stderr.puts opts 79 | exit 1 80 | end 81 | 82 | unless $options.test 83 | unless $options.asfinder 84 | $stderr.puts "ERROR: missing --asfinder argument" 85 | $stderr.puts opts 86 | exit 1 87 | end 88 | end 89 | 90 | #=========================================================================== 91 | #=========================================================================== 92 | 93 | class ASPaths 94 | 95 | attr_accessor :trace_count 96 | 97 | def initialize 98 | @monitor_info = {} # index to "IP_addr\tAS_number\tindex" 99 | @trace_count = 0 100 | @timestamp_min = nil 101 | @timestamp_max = nil 102 | @paths = {} 103 | end 104 | 105 | def prefix_cmp(a, b) 106 | aa = a.split(/\s+/) 107 | ba = b.split(/\s+/) 108 | ap = aa[1].split(/\//) 109 | bp = ba[1].split(/\//) 110 | ai = ap[0].split(/\./) 111 | bi = bp[0].split(/\./) 112 | for i in 0 .. 3 113 | return -1 if(ai[i].to_i < bi[i].to_i) 114 | return 1 if(ai[i].to_i > bi[i].to_i) 115 | end 116 | return -1 if(ap[1].to_i < bp[1].to_i) 117 | return 1 if(ap[1].to_i > bp[1].to_i) 118 | return -1 if(aa[0].to_i < ba[0].to_i) 119 | return 1 if(aa[0].to_i > ba[0].to_i) 120 | return 0 121 | end 122 | 123 | def print_all 124 | @monitor_info.each do |info, v| 125 | printf "M\t%s\t%d\n", info, v 126 | end 127 | 128 | @paths.each do |line, v| 129 | # @paths.keys.sort { |a, b| prefix_cmp(a, b) }.each do |line| 130 | printf "P\t%s\n", line 131 | end 132 | end 133 | 134 | def add_timestamp(timestamp) 135 | @trace_count += 1 136 | 137 | if @timestamp_min == nil || @timestamp_min > timestamp 138 | @timestamp_min = timestamp 139 | end 140 | 141 | if @timestamp_max == nil || @timestamp_max < timestamp 142 | @timestamp_max = timestamp 143 | end 144 | end 145 | 146 | def monitor_key(address, as) 147 | as_value = (as ? as : "UNKNOWN") 148 | key = "#{address}\t#{as_value}" 149 | 150 | id = @monitor_info[key] 151 | 152 | if id == nil 153 | @monitor_info[key] = @monitor_info.length 154 | end 155 | 156 | @monitor_info[key] 157 | end 158 | 159 | def add_path monitor, rtt, dst, prefix, dstas, aspath 160 | line = "#{monitor}\t#{rtt}\t#{dst}\t#{prefix}\t#{dstas}" 161 | 162 | for i in 0..aspath.length 163 | line.concat("\t#{aspath[i]}"); 164 | end 165 | 166 | @paths[line] = 1 167 | end 168 | 169 | end 170 | 171 | #=========================================================================== 172 | #=========================================================================== 173 | 174 | # This tries to behave like skitter_as_links for edge cases. For example, 175 | # if there is a response from the destination, then this only returns IP 176 | # hops up to the hop position of the response from the destination. (Yes, 177 | # strange enough, there can be hops past the hop position of the 178 | # destination.) This includes all responses at the hop position of the 179 | # destination, even if the responses weren't from the destination. 180 | # 181 | # This doesn't return the trailing gap. So you can assume that if you 182 | # encounter a nil element before reaching the end of the result array, then 183 | # there will always be a non-nil element before reaching the end of the 184 | # array. 185 | # 186 | # This method may differ from skitter_as_links in the handling of traces with 187 | # loops. It's possible skitter_as_links simply ignores traces with loops 188 | # (I need to investigate more), but this script doesn't. However, in order 189 | # to properly generate AS links from traces with loops, we have to ignore the 190 | # hops in the loop. For example, suppose the IP path looks like the 191 | # following: 192 | # 193 | # 1 2 3 L 4 5 L 194 | # 195 | # Then the loop hops are 'L 4 5 L'. We should ignore all hops after 196 | # the first occurrence of L, since the 'L 4' IP link is most likely false 197 | # (and the remaining IP links in the loop are most likely redundant anyway). 198 | # 199 | # -------------------------------------------------------------------------- 200 | # 201 | # This returns an array with hop addresses at the corresponding array 202 | # positions (the source is at index 0). If there is more than one address 203 | # at a given hop position, then this will use a subarray to hold all the 204 | # addresses. Otherwise, this stores a single address directly as a 205 | # dotted-decimal string. If there isn't a response at a given hop position, 206 | # then the corresponding array location will contain a nil. 207 | def extract_ippath(trace) 208 | retval = [ trace.src ] 209 | 210 | dest_response = trace.find_dest_response 211 | trace.each_hop_and_response do |hop, response, exists| 212 | next unless exists 213 | break if dest_response && hop > dest_response[0] 214 | 215 | index = hop + 1 216 | hop_addr = trace.hop_addr hop, response 217 | 218 | if retval[index] 219 | if retval[index].instance_of? Array 220 | retval[index] << hop_addr 221 | else 222 | retval[index] = [ retval[index], hop_addr ] 223 | end 224 | else 225 | retval[index] = hop_addr 226 | end 227 | end 228 | 229 | if trace.loops > 1 || trace.stop_reason == Warts::Trace::STOP_LOOP 230 | stopping_loop = find_stopping_loop(find_all_loops(retval)) 231 | if stopping_loop 232 | start_index = stopping_loop[0] 233 | truncated_length = start_index + 1 234 | retval.pop while retval.length > truncated_length 235 | 236 | # maintain invariant that path will not end in any nil's 237 | retval.pop while retval.length > 0 && retval[-1] == nil 238 | end 239 | end 240 | 241 | retval 242 | end 243 | 244 | 245 | # Returns an array of all loops found in the given IP path. 246 | # 247 | # You can apply this method to any trace, including traces without loops. 248 | # This returns an empty array if there are no loops. Otherwise, each array 249 | # element specifies [ start_index, length, address ]. 250 | # 251 | # The start index is the 0-based starting position of a loop. 252 | # A loop that appears in adjacent hops (e.g., 'B B' in 'A B B C') has 253 | # length 1. A loop like 'B D B' has length 2. 254 | # 255 | # A path like 'A B B B' has two loops, both of length 1, starting at 256 | # indexes 1 and 2; that is, [1, 1, B] and [2, 1, B]. 257 | # 258 | # Multiple responses at a hop don't affect the determination of loops in 259 | # a path. That is, for the purposes of determining loops, it's as if there 260 | # were only one instance of each address at any given hop. 261 | def find_all_loops(ippath) 262 | retval = [] # [ [ start_index, length, address ] ] 263 | 264 | last_index = {} # IP address => index of last occurrence of the address 265 | ippath.each_with_index do |addresses, index| 266 | next unless addresses 267 | 268 | ( addresses.instance_of?(Array) ? addresses.uniq : [ addresses ] ). 269 | each do |address| 270 | if last_index[address] 271 | length = index - last_index[address] 272 | retval << [ last_index[address], length, address ] 273 | end 274 | last_index[address] = index 275 | end 276 | end 277 | 278 | if $options.verbose 279 | printf "\nIPPATH>> %p\n", ippath 280 | printf "\nLOOPS>> %p\n", retval 281 | end 282 | 283 | retval.sort # mainly to order by starting index 284 | end 285 | 286 | 287 | # Finds the loop, if any, that would have caused a trace to stop for 288 | # 'scamper -L 1 -l 1'. This permits at most 1 loop of length 1, and no 289 | # loops of any longer length. 290 | # 291 | # You can apply this method to any trace, including traces without loops. 292 | # Returns nil if there is no stopping loop. 293 | def find_stopping_loop(loops) 294 | saw_length1_loop = false 295 | loops.each do |loop| 296 | start_index, length, address = loop 297 | if length == 1 298 | return loop if saw_length1_loop 299 | saw_length1_loop = true 300 | else 301 | return loop 302 | end 303 | end 304 | nil 305 | end 306 | 307 | def process_ippath(aspaths, monitor, src_as, ippath, dst, dst_rtt) 308 | aspath_in = generate_aspath src_as, ippath 309 | return if aspath_in.length < 2 310 | 311 | # look up details about the destination IP in BGP 312 | dst_as = nil 313 | dst_pfx = nil 314 | dst_len = nil 315 | if $dst_asf != nil 316 | dst_as, dst_pfx, dst_len = $dst_asf.get_as dst 317 | else 318 | dst_as, dst_pfx, dst_len = $asfinder.get_as dst 319 | end 320 | 321 | # if we don't know anything about it 322 | return if dst_as == nil 323 | dst_pfx = "#{dst_pfx}/#{dst_len}" 324 | 325 | hops = 0 326 | for i in 0 .. aspath_in.length-1 327 | if aspath_in[i] != nil 328 | hops += 1 329 | else 330 | if ippath[i] == nil 331 | aspath_in[i] = "q" 332 | else 333 | aspath_in[i] = "r" 334 | end 335 | end 336 | end 337 | return if hops == 0 338 | 339 | print "\n[#{monitor} #{dst_pfx} #{dst_as}" if $options.verbose 340 | 341 | aspath = [] 342 | asp_index = 0 343 | cur_as = aspath_in[0] 344 | cur_cn = 1 345 | index = 1 346 | 347 | while index < aspath_in.length 348 | if aspath_in[index] != cur_as 349 | aspath[asp_index] = "#{cur_as}:#{cur_cn}" 350 | printf " %s", aspath[asp_index] if $options.verbose 351 | asp_index += 1 352 | cur_as = aspath_in[index] 353 | cur_cn = 1 354 | else 355 | cur_cn += 1 356 | end 357 | index += 1 358 | end 359 | 360 | if(cur_as != nil) 361 | aspath[asp_index] = "#{cur_as}:#{cur_cn}" 362 | printf " %s", aspath[asp_index] if $options.verbose 363 | end 364 | 365 | print "]\n" if $options.verbose 366 | 367 | return if aspath.length < 2 368 | aspaths.add_path monitor, dst_rtt, dst, dst_pfx, dst_as, aspath 369 | end 370 | 371 | # Extracts all AS links from the given AS path and stores the links into 372 | # {aslinks}. 373 | def extract_all_aslinks(aslinks, monitor, aspath) 374 | return if aspath.length < 2 375 | 376 | # Handle the case of nil components at the beginning of the AS path, 377 | # including a nil value for the AS of the traceroute source. 378 | src_index = 0 379 | while src_index < aspath.length - 1 && aspath[src_index] == nil 380 | src_index += 1 381 | end 382 | 383 | dest_index = src_index + 1 384 | while src_index < aspath.length - 1 385 | while dest_index < aspath.length && aspath[dest_index] == nil 386 | dest_index += 1 387 | end 388 | return unless dest_index < aspath.length 389 | 390 | src_as = aspath[src_index] 391 | dest_as = aspath[dest_index] 392 | gap_length = dest_index - src_index - 1 393 | 394 | printf "index: src=%d, dest=%d; src_as=%p, dest_as=%p\n", 395 | src_index, dest_index, src_as, dest_as if $options.verbose 396 | 397 | extract_aslinks aslinks, monitor, src_as, dest_as, gap_length 398 | 399 | src_index = dest_index 400 | dest_index += 1 401 | end 402 | end 403 | 404 | 405 | # Extracts all possible AS links (that is, the cross product) from the 406 | # given pair of AS path components that are separated by {gap_length} gaps 407 | # (0 if the components are adjacent). The AS path components, which must 408 | # not be nil (since nil's represent gaps), can be either a single AS or an 409 | # array of ASes. 410 | def extract_aslinks(aslinks, monitor, src_as, dest_as, gap_length) 411 | simple_src = !src_as.instance_of?(Array) 412 | simple_dest = !dest_as.instance_of?(Array) 413 | 414 | if simple_src && simple_dest 415 | aslinks.add_link monitor, src_as, dest_as, gap_length 416 | elsif simple_src 417 | dest_as.each do |dest| 418 | aslinks.add_link monitor, src_as, dest, gap_length 419 | end 420 | elsif simple_dest 421 | src_as.each do |src| 422 | aslinks.add_link monitor, src, dest_as, gap_length 423 | end 424 | else 425 | src_as.each do |src| 426 | dest_as.each do |dest| 427 | aslinks.add_link monitor, src, dest, gap_length 428 | end 429 | end 430 | end 431 | end 432 | 433 | 434 | # Generates an AS path from the given IP path. 435 | def generate_aspath(src_as, ippath) 436 | print "\n[" if $options.verbose 437 | 438 | retval = [] 439 | ippath.each_with_index do |addresses, index| 440 | unless addresses 441 | print " *" if $options.verbose 442 | next 443 | end 444 | 445 | if index == 0 446 | printf " %s", (src_as ? src_as : "?") if $options.verbose 447 | retval[index] = src_as 448 | else 449 | retval[index] = find_all_ases(addresses) 450 | end 451 | end 452 | 453 | if $options.verbose 454 | printf " ]\n" 455 | p retval 456 | end 457 | retval 458 | end 459 | 460 | 461 | # Uses ASFinder to find the AS(es) for the given address or array of 462 | # addresses. 463 | # 464 | # If a single address is given to this method and that address doesn't have 465 | # a matching AS, then this returns nil. If an array of addresses is given 466 | # to this method and none of the addresses has a matching AS, then this 467 | # returns nil. If an array of addresses is given and an AS could be found 468 | # for at least one address, then this returns an array of only the ASes 469 | # that could be found (that is, this won't return an array containing nil's). 470 | # 471 | # NOTE: For some strange reason, a small fraction of collected traces have 472 | # hops with a large number of responses (hundreds or, very rarely, 473 | # thousands) from either the same IP address or different IP 474 | # addresses (that may map to different ASes). In order to avoid 475 | # slowdowns in subsequent processing of AS paths, this returns only 476 | # the unique AS(es). 477 | def find_all_ases(addresses) 478 | if addresses.instance_of? Array 479 | print " [" if $options.verbose 480 | retval = [] 481 | addresses.each_with_index do |address, index| 482 | print "," if $options.verbose && index > 0 483 | as = find_as(address) 484 | retval << as if as 485 | end 486 | print " ]" if $options.verbose 487 | 488 | retval.uniq! 489 | return nil if retval.empty? 490 | return (retval.length == 1 ? retval[0] : retval) 491 | else 492 | return find_as(addresses) 493 | end 494 | end 495 | 496 | 497 | def find_as(address) 498 | if $special_asf != nil 499 | as, prefix, len = $special_asf.get_as address 500 | return "s" if(as != nil) 501 | end 502 | as, prefix, len = $asfinder.get_as address 503 | as 504 | end 505 | 506 | 507 | def find_prefix(asfinder, address) 508 | as, prefix, len = asfinder.get_as address 509 | retval = "#{prefix}/#{len}" 510 | retval 511 | end 512 | 513 | 514 | ############################################################################# 515 | # Main 516 | ############################################################################# 517 | 518 | unless $options.test 519 | aspaths = ASPaths.new 520 | $asfinder = CAIDA::ASFinder.new $options.asfinder 521 | 522 | if $options.dst_asf != nil 523 | $dst_asf = CAIDA::ASFinder.new $options.dst_asf; 524 | end 525 | 526 | if $options.special_asf != nil 527 | $special_asf = CAIDA::ASFinder.new $options.special_asf; 528 | end 529 | 530 | ARGV.each do |path| 531 | file = Warts::File.open path 532 | unless file 533 | $stderr.puts "ERROR: couldn't open '#{path}'; skipping" 534 | next 535 | end 536 | 537 | src_ip = nil 538 | src_as = nil 539 | monitor = nil 540 | 541 | file.add_filters Warts::TRACE 542 | file.read do |trace| 543 | next unless trace.dest_responded? 544 | 545 | if $options.verbose 546 | puts "-" * 78 547 | puts trace.dump 548 | end 549 | 550 | aspaths.add_timestamp trace.start 551 | 552 | ippath = extract_ippath trace 553 | if $options.verbose 554 | puts 555 | p ippath 556 | end 557 | 558 | current_src_ip = ippath[0] 559 | unless current_src_ip == src_ip 560 | src_ip = current_src_ip 561 | results = $asfinder.get_as src_ip 562 | src_as = (results ? results[0] : nil) 563 | monitor = aspaths.monitor_key src_ip, src_as 564 | end 565 | 566 | process_ippath aspaths, monitor, src_as, ippath, trace.dst, trace.dest_rtt 567 | 568 | end 569 | end 570 | 571 | #========================================================================= 572 | 573 | now = Time.now 574 | printf "#INFO: generation_tool=%s\n", "$Id: warts-aspaths,v 1.6 2016/04/08 00:18:18 youngh Exp $" 575 | printf "#INFO: generation_timestamp=%d (%s)\n", now.to_i, now.to_s 576 | printf "#INFO: trace_count=%d\n", aspaths.trace_count 577 | printf "#INFO: bgp_table=%s\n", $options.asfinder 578 | printf "#INFO: dst_bgp_table=%s\n", $options.dst_asf if $options.dst_asf != nil 579 | 580 | ARGV.each do |path| 581 | printf "#INFO: input_file=%s\n", path 582 | end 583 | 584 | aspaths.print_all 585 | exit 0 586 | end 587 | 588 | exit 0 589 | --------------------------------------------------------------------------------