├── README
└── tormap.py
/README:
--------------------------------------------------------------------------------
1 | The following steps are performed by the Python script:
2 |
3 | 1. Extract information about Tor relays from cached consensus and descriptor files.
4 | 2. Use MaxMind's free GeoIP database to map their IPs to cities.
5 | 3. Generate XML-based KML file.
6 |
7 | http://moblog.wiredwings.com/archives/20101213/Visualization-Tor-nodes-on-Google-Maps-and-Google-Earth.html
--------------------------------------------------------------------------------
/tormap.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # encoding: utf-8
3 |
4 | '''
5 | quick and dirty hack Moritz Bartl moritz@torservers.net
6 | 13.12.2010
7 |
8 | let me know and send me your changes if you improve anything
9 |
10 | requires:
11 | - pygeoip, http://code.google.com/p/pygeoip/
12 | - geoIP city database, eg. http://www.maxmind.com/app/geolitecity
13 |
14 | This program is free software: you can redistribute it and/or modify
15 | it under the terms of the GNU Lesser General Public License (LGPL)
16 | as published by the Free Software Foundation, either version 3 of the
17 | License, or any later version.
18 |
19 | This program is distributed in the hope that it will be useful,
20 | but WITHOUT ANY WARRANTY; without even the implied warranty of
21 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 | GNU Lesser General Public License for more details.
23 |
24 | http://www.gnu.org/licenses/
25 | '''
26 |
27 | FAST = 2000000
28 |
29 | import base64, shelve, pygeoip, cgi, re
30 | from operator import attrgetter, itemgetter
31 | from string import Template
32 |
33 | cachedRelays = dict()
34 | currentRouter = dict()
35 |
36 | # parse cached-descriptors to extract uptime and announced bandwidth
37 | with open('cached-descriptors') as f:
38 | for line in f:
39 | line = line.strip()
40 | if line.startswith('router '):
41 | [nil,name,ip,orport,socksport,dirport] = line.split()
42 | currentRouter['name'] = name
43 | currentRouter['ip'] = ip
44 | currentRouter['orport'] = orport
45 | currentRouter['socksport'] = socksport
46 | currentRouter['dirport'] = dirport
47 | if line.startswith('platform '):
48 | currentRouter['version']=line[9:]
49 | if line.startswith('fingerprint '):
50 | fingerprint=line[12:]
51 | currentRouter['fingerprint'] = fingerprint.replace(' ','').lower()
52 | if line.startswith('opt fingerprint'):
53 | fingerprint=line[16:]
54 | currentRouter['fingerprint'] = fingerprint.replace(' ','').lower()
55 | if line.startswith('uptime '):
56 | currentRouter['uptime']=line[7:]
57 | if line.startswith('bandwidth '):
58 | currentRouter['bandwidth'] = line[10:]
59 | try:
60 | currentRouter['bw-observed'] = int(line.split()[3])
61 | except:
62 | pass
63 | bandwidth = line[10:]
64 | if line.startswith('contact '):
65 | currentRouter['contact'] = cgi.escape(line[8:])
66 | if line == 'router-signature':
67 | fingerprint = currentRouter['fingerprint']
68 | cachedRelays[fingerprint] = currentRouter
69 | currentRouter = dict()
70 |
71 | # parse cached-consensus for flags and correlate to descriptors
72 |
73 | badRelays = dict() # Bad in flags, eg. BadExit, BadDirectory
74 | exitFastRelays = dict() # Exit flag, >= FAST
75 | exitRelays = dict() # Exit flag, slower than FAST
76 | stableFastRelays = dict() # Stable flag, but not Exit
77 | stableRelays = dict() # Stable flag, but not Exit
78 | otherRelays = dict() # non Stable, non Exit
79 |
80 | count = 0
81 | with open('cached-consensus') as f:
82 | for line in f:
83 | line = line.strip()
84 | if line.startswith('r '):
85 | [nil,name,identity,digest,date,time,ip,orport,dirport] = line.split()
86 | identity = identity.strip()
87 | fingerprint = base64.decodestring(identity + '=\n').encode('hex')
88 | # php: unpack('H*',decode_base64($identity))
89 | currentRouter = dict()
90 | if fingerprint in cachedRelays:
91 | currentRouter = cachedRelays[fingerprint]
92 | # trust consensus more than cached-descriptors, replace info
93 | currentRouter['fingerprint'] = fingerprint
94 | currentRouter['name'] = name
95 | currentRouter['ip'] = ip
96 | currentRouter['orport'] = orport
97 | currentRouter['dirport'] = dirport
98 | if line.startswith('p '):
99 | currentRouter['policy'] = line[2:]
100 | if line.startswith('s '):
101 | flags = line[2:]
102 | currentRouter['flags'] = flags
103 | if flags.find('Bad')>-1:
104 | badRelays[fingerprint] = currentRouter
105 | elif flags.find('Exit')>-1:
106 | if currentRouter.has_key('bw-observed') and currentRouter['bw-observed']>FAST:
107 | exitFastRelays[fingerprint] = currentRouter
108 | else:
109 | exitRelays[fingerprint] = currentRouter
110 | elif flags.find('Stable')>-1:
111 | if currentRouter.has_key('bw-observed') and currentRouter['bw-observed']>FAST:
112 | stableFastRelays[fingerprint] = currentRouter
113 | else:
114 | stableRelays[fingerprint] = currentRouter
115 | else:
116 | otherRelays[fingerprint] = currentRouter
117 |
118 | print 'Bad:', len(badRelays)
119 | print 'Exit:', len(exitRelays)
120 | print 'Fast exit:', len(exitFastRelays)
121 | print 'Non-exit stable:', len(stableRelays)
122 | print 'Fast non-exit stable:', len(stableFastRelays)
123 | print 'Other:', len(otherRelays)
124 |
125 | inConsensus = len(badRelays)+len(exitRelays)+len(stableRelays)+len(otherRelays)
126 | print '[ in consensus:', inConsensus, ']'
127 | notInConsensus = len(cachedRelays)-len(badRelays)-len(exitRelays)-len(stableRelays)-len(otherRelays)
128 | print '[ cached descriptors not in consensus:', notInConsensus, ']'
129 |
130 | # put all relays we want to plot in one list for geoIP
131 | allRelays = dict()
132 | allRelays.update(exitRelays)
133 | allRelays.update(exitFastRelays)
134 | allRelays.update(stableRelays)
135 | allRelays.update(stableFastRelays)
136 | allRelays.update(otherRelays)
137 |
138 | # geoIP
139 | geoIPcache = shelve.open('geoip-cache')
140 | geoIPdb = None
141 |
142 | for relay in allRelays.values():
143 | ip = relay['ip']
144 | if geoIPcache.has_key(ip):
145 | info = geoIPcache[ip]
146 | else:
147 | if geoIPdb is None:
148 | geoIPdb = pygeoip.GeoIP('GeoLiteCity.dat')
149 | info = geoIPdb.record_by_addr(ip)
150 | geoIPcache[ip] = info
151 | if info is not None:
152 | relay['location'] = info
153 | relay['latitude'] = info['latitude']
154 | relay['longitude'] = info['longitude']
155 |
156 | geoIPcache.close()
157 |
158 | # generate KML
159 |
160 | placemarkTemplate = Template ('
Bandwidth: $bandwidth
\n\ 166 |Flags: $flags
\n\ 167 |Uptime: $uptime
\n\ 168 |Contact: $contact
\n\ 169 |Policy: $policy
\n\ 170 |Fingerprint: $prettyFingerprint
\n\ 171 |Version: $version
\n\ 172 | ]]>\n\ 173 | \n\ 174 |