├── Makefile ├── birdparse.py ├── buildmap.py ├── demo.dat.xz ├── demo.map ├── demo.random.dat ├── ipv4.dump.xz ├── ipv6.dump.xz ├── remote_dumps ├── README.md ├── download_dumps.py ├── prepare.sh ├── quagga_aggregate.py ├── quagga_parse.sh └── setup.sh └── testmap.py /Makefile: -------------------------------------------------------------------------------- 1 | ipv4.dump: ipv4.dump.xz 2 | xz -d ipv4.dump 3 | 4 | ipv6.dump: ipv6.dump.xz 5 | xz -d ipv6.dump 6 | 7 | demo.dat.xz: ipv4.dump ipv6.dump birdparse.py 8 | python3 birdparse.py ipv4.dump ipv6.dump | xz -9e >demo.dat.xz 9 | 10 | demo.map: demo.dat.xz buildmap.py 11 | xz -d demo.map 12 | 13 | test: demo.map 14 | python3 testmap.py 15 | -------------------------------------------------------------------------------- /birdparse.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import re 3 | import ipaddress 4 | 5 | IPV4_PREFIX = bytes([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xff, 0xff]) 6 | 7 | def AddEntry(netmask, asn, fnam, linenum, entries): 8 | loc = "%s:%i" % (fnam, linenum) 9 | network = ipaddress.ip_network(netmask, True) 10 | if asn is None: 11 | print("[WARNING] %s: no ASN for %s" % (loc, netmask), file=sys.stderr) 12 | return 13 | if not network: 14 | print("[WARNING] %s: cannot parse netmask %s for AS%i" % (loc, netmask, asn), file=sys.stderr) 15 | return 16 | if network.is_multicast: 17 | print("[WARNING] %s: multicast address %s for AS%i" % (loc, netmask, asn), file=sys.stderr) 18 | return 19 | if network.is_private: 20 | print("[WARNING] %s: private address %s for AS%i" % (loc, netmask, asn), file=sys.stderr) 21 | return 22 | if network.is_unspecified: 23 | print("[WARNING] %s: address from unspecified range %s for AS%i" % (loc, netmask, asn), file=sys.stderr) 24 | return 25 | if network.is_reserved: 26 | print("[WARNING] %s: reserved address %s for AS%i" % (loc, netmask, asn), file=sys.stderr) 27 | return 28 | if network.is_loopback: 29 | print("[WARNING] %s: loopback address %s for AS%i" % (loc, netmask, asn), file=sys.stderr) 30 | return 31 | if asn == 0 or asn == 65535 or (asn >= 65552 and asn <= 131072) or asn == 4294967295: 32 | print("[WARNING] %s: prefix %s has reserved AS%i (RFC1930)" % (loc, netmask, asn), file=sys.stderr) 33 | return 34 | if asn == 23456: 35 | print("[WARNING] %s: prefix %s has transition AS%i (RFC6793)" % (loc, netmask, asn), file=sys.stderr) 36 | return 37 | if (asn >= 64496 and asn <= 64511) or (asn >= 65536 and asn <= 65551): 38 | print("[WARNING] %s: prefix %s has documentation AS%i (RFC4893,RFC5398)" % (loc, netmask, asn), file=sys.stderr) 39 | return 40 | if (asn >= 64512 and asn <= 65534) or (asn >= 4200000000 and asn <= 4294967294): 41 | print("[WARNING] %s: prefix %s has private AS%i (RFC5398,RFC6996)" % (loc, netmask, asn), file=sys.stderr) 42 | return 43 | if isinstance(network, ipaddress.IPv4Network): 44 | entries.append((IPV4_PREFIX + network.network_address.packed, "%s AS%i # %s:%i" % (network.compressed, asn, fnam, linenum))) 45 | elif isinstance(network, ipaddress.IPv6Network): 46 | entries.append((network.network_address.packed, "%s AS%i # %s:%i" % (network.compressed, asn, fnam, linenum))) 47 | else: 48 | raise AssertionError("Unknown network type for %s" % netmask) 49 | 50 | def ParseDump(fnam, entries): 51 | RE_INITIAL = re.compile(r"^BIRD .* ready.$") 52 | RE_TABLE = re.compile(r"^Table master(4|6):$") 53 | RE_HEADER = re.compile(r"^(([0-9.]+|[0-9a-f:]+)/\d+) +unicast +\[.*\] +\* +\(\d+\)( +\[(AS(\d+))?[ie?]?\])?$") 54 | RE_PATH = re.compile(r"^[\t]BGP\.as_path:(.*)$") 55 | RE_PATH_DECOMPOSE = re.compile(r"^[0-9 ]*?(\d+)( +\{[0-9 ]*?(\d+)\})?$") 56 | RE_INNER = re.compile(r"^[\t]") 57 | RE_INNER_ADDR = re.compile(r"^ +unicast") 58 | netmask = None 59 | asn = None 60 | aslevel = 0 61 | maskline = None 62 | with open(fnam) as f: 63 | linenum = 0 64 | for line in f: 65 | linenum += 1 66 | line = line.rstrip("\n\r") 67 | if RE_INITIAL.match(line): 68 | continue 69 | if RE_TABLE.match(line): 70 | continue 71 | match = RE_HEADER.match(line) 72 | if match: 73 | if netmask: 74 | AddEntry(netmask, asn, fnam, maskline, entries) 75 | netmask = match[1] 76 | maskline = linenum 77 | if not match[3] or not match[4]: 78 | asn = None 79 | aslevel = 0 80 | else: 81 | asn = int(match[5]) 82 | aslevel = 1 83 | continue 84 | match = RE_PATH.match(line) 85 | if match: 86 | if aslevel < 2: 87 | decomp = RE_PATH_DECOMPOSE.match(match[1]) 88 | if not decomp: 89 | print("[WARNING] %s:%i: cannot parse as_path %s" % (fnam, linenum, match[1])) 90 | asn = int(decomp[1]) 91 | aslevel = 2 92 | match = RE_INNER.match(line) 93 | if match: 94 | continue 95 | match = RE_INNER_ADDR.match(line) 96 | if match: 97 | continue 98 | print("[WARNING] %s:%i: cannot parse %s" % (fnam, linenum, line), file=sys.stderr) 99 | if netmask: 100 | AddEntry(netmask, asn, fnam, maskline, entries) 101 | 102 | entries = [] 103 | for fnam in sys.argv[1:]: 104 | print("[INFO] Parsing %s" % fnam, file=sys.stderr) 105 | ParseDump(fnam, entries) 106 | print("[INFO] Parsed %i prefixes" % len(entries), file=sys.stderr) 107 | entries.sort() 108 | for _, s in entries: 109 | print(s) 110 | -------------------------------------------------------------------------------- /buildmap.py: -------------------------------------------------------------------------------- 1 | """ 2 | Intake a map of IP prefixes -> AS numbers and output instructions that will 3 | allow a decoder to match an IP address to an ASN by following a sequence 4 | of instructions. 5 | 6 | The instructions describe a prefix tree that can be navigated using the bits of 7 | an IP address (i.e. 0 for left child, 1 for right child, leaf nodes 8 | corresponding to a given ASN). The types of instructions are denoted by the 9 | *Type() functions defined below. Once an IP address specifies a bit for which 10 | there is no path in the tree (i.e. the part of its address more specific than 11 | any known network prefix), the tree returns a "default" ASN value that has been 12 | set based on the last valid location in the tree. 13 | 14 | See `testmap.py:Interpret` for an illustration of how the decoding process 15 | works. 16 | 17 | Before the prefix tree is encoded into instructions using bits, it is compacted 18 | (e.g. duplicate subtrees removed) and annotated with which default ASN values 19 | should be set for particular regions of the tree. 20 | """ 21 | import sys 22 | import ipaddress 23 | from collections import namedtuple 24 | 25 | 26 | def Parse(entries: list): 27 | """ 28 | Read in a file of the format 29 | 30 | 1.0.0.0/24 AS13335 # ipv4.dump:4856343 31 | 1.0.4.0/22 AS56203 # ipv4.dump:2759291 32 | ... 33 | 34 | Ignoring comments following '#'. Creates an Entry object for each line. 35 | Maps IPv4 networks into IPv6 space. 36 | 37 | Args: 38 | entries: modified in place with the new Entrys. 39 | """ 40 | for line in sys.stdin: 41 | line = line.split('#')[0].lstrip(' ').rstrip(' \r\n') 42 | prefix, asn = line.split(' ') 43 | assert(len(asn) > 2 and asn[:2] == "AS") 44 | network = ipaddress.ip_network(prefix) 45 | 46 | prefix_len = network.prefixlen 47 | net_addr = int.from_bytes(network.network_address.packed, 'big') 48 | 49 | # Map an IPv4 prefix into IPv6 space. 50 | if isinstance(network, ipaddress.IPv4Network): 51 | prefix_len += 96 52 | net_addr += 0xffff00000000 53 | 54 | entries.append(Entry(prefix_len, net_addr, int(asn[2:]))) 55 | 56 | 57 | Entry = namedtuple('Entry', ( 58 | # The length of the network prefix in bits. E.g. '26' for 255.255.0.0/26. 59 | 'prefix_len', 60 | 61 | # An int containing the bits of the network address. 62 | 'net_addr', 63 | 64 | # An int for the autonomous system (AS) number. 65 | 'asn', 66 | )) 67 | 68 | 69 | def UpdateTree(gtree, addrlen: int, entries: [Entry]): 70 | """ 71 | Returns a prefix tree such that following a path down through the 72 | tree based on the bits of a network prefix (in order of most significant 73 | bit) leads to an ASN. 74 | 75 | Args: 76 | gtree: tree structure to encode the mappings into. Modified in-place. 77 | addrlen: The maximum number of bits in a network address. 78 | This is 128 for IPv6 (16 bytes). 79 | entries: The network prefix -> ASN mappings to encode. 80 | """ 81 | for prefix, val, asn in sorted(entries): 82 | tree = gtree 83 | default = None 84 | 85 | # Iterate through each bit in the network prefix, starting with the 86 | # most significant bit. 87 | for i in range(prefix): 88 | bit = (val >> (addrlen - 1 - i)) & 1 89 | 90 | # If we have passed the end of the network prefix, all entries 91 | # under subsequent bits will be associated with the same ASN. 92 | needs_inner = i < prefix - 1 93 | if tree[bit] is None: 94 | if needs_inner: 95 | tree[bit] = [default, default] 96 | tree = tree[bit] 97 | continue 98 | else: 99 | tree[bit] = asn 100 | break 101 | if isinstance(tree[bit], list): 102 | assert(needs_inner) 103 | tree = tree[bit] 104 | continue 105 | assert(isinstance(tree[bit], int)) 106 | if tree[bit] == asn: 107 | break 108 | if not needs_inner: 109 | tree[bit] = asn 110 | break 111 | default = tree[bit] 112 | tree[bit] = [default, default] 113 | tree = tree[bit] 114 | return gtree 115 | 116 | 117 | def CompactTree(tree, approx=True) -> (list, set): 118 | """ 119 | Remove redundancy from a tree. 120 | 121 | E.g. if all nodes in a subtree point to the same ASN, compact the subtree 122 | into a single int. 123 | 124 | Returns: 125 | (the compacted tree, a set of all ASNs in the tree) 126 | 127 | Args: 128 | approx: if True, unassigned ranges may get reassigned to arbitrary ASNs. 129 | """ 130 | num = 0 131 | if tree is None: 132 | return (tree, set()) 133 | if isinstance(tree, int): 134 | return (tree, set([tree])) 135 | tree[0], leftas = CompactTree(tree[0], approx) 136 | tree[1], rightas = CompactTree(tree[1], approx) 137 | allas = leftas | rightas 138 | if len(allas) == 0: 139 | return (None, allas) 140 | if approx and len(allas) == 1: 141 | return (list(allas)[0], allas) 142 | if isinstance(tree[0], int) and isinstance(tree[1], int) and tree[0] == tree[1]: 143 | return tree[0], set([tree[0]]) 144 | return (tree, allas) 145 | 146 | 147 | def PropTree(tree, approx=True) -> (list, Counter, bool): 148 | """ 149 | Annotate internal nodes in the tree with the most common leafs below it. 150 | The binary serialization later uses this. 151 | 152 | This changes the shape of the `tree` datastructure from 153 | `[left_child, right_child]` to `[lc, rc, max_ASN_in_tree]`. 154 | 155 | Returns: 156 | (tree, Counter of ASNs in tree, whether or not tree is empty) 157 | """ 158 | if tree is None: 159 | return (tree, Counter(), True) 160 | if isinstance(tree, int): 161 | return (tree, Counter({tree: 1}), False) 162 | tree[0], leftcnt, leftnone = PropTree(tree[0], approx) 163 | tree[1], rightcnt, rightnone = PropTree(tree[1], approx) 164 | allcnt = leftcnt + rightcnt 165 | allnone = leftnone | rightnone 166 | maxasn, maxcount = allcnt.most_common(1)[0] 167 | if maxcount is not None and maxcount >= 2 and (approx or not allnone): 168 | return ([tree[0], tree[1], maxasn], Counter({maxasn: 1}), allnone) 169 | return (tree, allcnt, allnone) 170 | 171 | 172 | def EncodeBits(val, minval, bit_sizes) -> [int]: 173 | """ 174 | Perform a variable-length encoding of a value to bits, least significant 175 | bit first. 176 | 177 | For each `bit_sizes` passed, attempt to encode the value with that number 178 | of bits + 1. Normalize the encoded value by `minval` to potentially save 179 | bits - the value will be corrected during decoding. 180 | 181 | Returns: 182 | a list of bits representing the value to encode. 183 | """ 184 | val -= minval 185 | ret = [] 186 | for pos in range(len(bit_sizes)): 187 | bit_size = bit_sizes[pos] 188 | 189 | # If the value will not fit in `bit_size` bits, absorb the largest 190 | # value for this bitsize and continue to the next smallest size. 191 | if val >= (1 << bit_size): 192 | val -= (1 << bit_size) 193 | ret += [1] 194 | else: 195 | # If we aren't encoding the largest possible value per the largest 196 | # bitsize... 197 | if (pos + 1 < len(bit_sizes)): 198 | ret += [0] 199 | 200 | # Use remaining bits to encode the rest of val. 201 | for b in range(bit_size): 202 | ret += [(val >> (bit_size - 1 - b)) & 1] 203 | return ret 204 | 205 | # Couldn't fit val into any of the bit_sizes 206 | assert(False) 207 | 208 | def MatchType() -> [int]: 209 | """ 210 | The match instruction descends into the tree based on a bit path. If at any 211 | point the match fails to hit a valid path through the tree, it will fail 212 | and return the current default ASN (which changes as we move through the 213 | tree). 214 | """ 215 | return EncodeType(2) 216 | 217 | def JumpType() -> [int]: 218 | """ 219 | The jump instruction allows us to quickly seek to one side of the tree 220 | or the other. By encoding the length of the left child, we can skip over 221 | it to the right child if need be. 222 | """ 223 | return EncodeType(1) 224 | 225 | def LeafType() -> [int]: 226 | """The leaf instruction encodes an ASN at the end of a bit path.""" 227 | return EncodeType(0) 228 | 229 | def SetNewDefaultType() -> [int]: 230 | """ 231 | This instruction establishes a new default ASN to return should we fail 232 | while traversing this path. 233 | """ 234 | return EncodeType(3) 235 | 236 | def EncodeType(v) -> [int]: 237 | return EncodeBits(v, 0, [0, 0, 1]) 238 | 239 | def EncodeASN(v) -> [int]: 240 | # It's reasonable to ask why "15" (indicating 16 bits) is the minimum size 241 | # we might try to pack an ASN into, given there are many ASNs below 2**16. 242 | # 243 | # The reason that we start at 15 here is because we want the first bitsize 244 | # we specify to contain ~50% of the values we are trying to encode - this 245 | # is because each separate bitsize we try will add a digit to our encoded 246 | # values, so we simultaneously want to minimize the number of bitsizes we 247 | # allow while also minimizing the bit length of the encoded data, which 248 | # is a trade-off. 249 | return EncodeBits(v, 1, [15, 16, 17, 18, 19, 20, 21, 22, 23, 24]) 250 | 251 | def EncodeMatch(v) -> [int]: 252 | return EncodeBits(v, 2, [1, 2, 3, 4, 5, 6, 7, 8]) 253 | 254 | def EncodeJump(v) -> [int]: 255 | return EncodeBits(v, 17, [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]) 256 | 257 | def EncodeBytes(bits) -> [int]: 258 | """Encode a sequence of bits as a sequence of bytes.""" 259 | val = 0 260 | nbits = 0 261 | bytes = [] 262 | for bit in bits: 263 | val += (bit << nbits) 264 | nbits += 1 265 | if (nbits == 8): 266 | bytes += [val] 267 | val = 0 268 | nbits = 0 269 | if nbits: 270 | bytes += [val] 271 | return bytes 272 | 273 | def TreeSer(tree, default): 274 | match = 1 275 | assert(tree is not None) 276 | assert(not (isinstance(tree, int) and tree == default)) 277 | 278 | # If one side of the tree is empty (i.e. represents a path without 279 | # choices), encode a match instruction up to 8 bits. 280 | while isinstance(tree, list) and match <= 0xFF: 281 | if tree[0] is None or tree[0] == default: 282 | match = (match << 1) + 1 283 | tree = tree[1] 284 | elif tree[1] is None or tree[1] == default: 285 | match = (match << 1) + 0 286 | tree = tree[0] 287 | else: 288 | break 289 | if match >= 2: 290 | return MatchType() + EncodeMatch(match) + TreeSer(tree, default) 291 | 292 | # Leaf node: return the ASN. 293 | if isinstance(tree, int): 294 | return LeafType() + EncodeASN(tree) 295 | 296 | # Return the tree along with a new "default" ASN value should we fail to 297 | # match while along this path. 298 | if len(tree) > 2 and tree[2] != default: 299 | return SetNewDefaultType() + EncodeASN(tree[2]) + TreeSer(tree, tree[2]) 300 | 301 | left = TreeSer(tree[0], default) 302 | right = TreeSer(tree[1], default) 303 | 304 | # Start the program by specifying a possible jump to either child of the 305 | # first node. 306 | return JumpType() + EncodeJump(len(left)) + left + right 307 | 308 | def BuildTree(entries, approx=True): 309 | tree = [None, None] 310 | tree = UpdateTree(tree, 128, entries) 311 | return tree 312 | 313 | entries: [Entry] = [] 314 | print("[INFO] Loading", file=sys.stderr) 315 | Parse(entries) 316 | print("[INFO] Read %i prefixes" % len(entries), file=sys.stderr) 317 | print("[INFO] Constructing trie", file=sys.stderr) 318 | tree = BuildTree(entries) 319 | print("[INFO] Compacting tree", file=sys.stderr) 320 | tree, _ = CompactTree(tree, True) 321 | print("[INFO] Computing inner prefixes", file=sys.stderr) 322 | tree, _, _ = PropTree(tree, True) 323 | 324 | ser = TreeSer(tree, None) 325 | print("[INFO] Total bits: %i" % (len(ser)), file=sys.stderr) 326 | sys.stdout.buffer.write(bytes(EncodeBytes(ser))) 327 | -------------------------------------------------------------------------------- /demo.dat.xz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sipa/asmap/e01badc2658210cf86e261d0ba557893aa9f5fc0/demo.dat.xz -------------------------------------------------------------------------------- /demo.map: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sipa/asmap/e01badc2658210cf86e261d0ba557893aa9f5fc0/demo.map -------------------------------------------------------------------------------- /demo.random.dat: -------------------------------------------------------------------------------- 1 | 185.246.138.0/23 AS9009 # ipv4.dump:14068018 2 | 208.91.107.0/24 AS35913 # ipv4.dump:5300217 3 | 186.249.165.0/24 AS53037 # ipv4.dump:7983539 4 | 200.54.108.0/24 AS52310 # ipv4.dump:5501190 5 | 209.71.160.0/20 AS46339 # ipv4.dump:17825415 6 | 142.52.146.0/24 AS852 # ipv4.dump:488513 7 | 212.128.116.0/23 AS200521 # ipv4.dump:13356568 8 | 186.178.15.0/24 AS28006 # ipv4.dump:9201526 9 | 2a01:9700:13a7::/48 AS8376 # ipv6.dump:1037828 10 | 172.105.134.0/23 AS63949 # ipv4.dump:15412112 11 | 78.130.228.0/24 AS9070 # ipv4.dump:12119994 12 | 118.126.140.0/23 AS23724 # ipv4.dump:7123714 13 | 24.94.32.0/19 AS11351 # ipv4.dump:1434597 14 | 142.234.32.0/21 AS7979 # ipv4.dump:7022111 15 | 2405:4800:2140::/46 AS18403 # ipv6.dump:2326297 16 | 200.113.215.0/24 AS27653 # ipv4.dump:3279416 17 | 45.167.190.0/23 AS268046 # ipv4.dump:2584658 18 | 198.137.70.0/24 AS10264 # ipv4.dump:3336467 19 | 200.13.36.0/24 AS28400 # ipv4.dump:3882782 20 | 64.69.220.0/24 AS55002 # ipv4.dump:18960871 21 | 209.49.229.0/24 AS395626 # ipv4.dump:14398115 22 | 87.249.76.0/22 AS15641 # ipv4.dump:9319161 23 | 2001:67c:206c::/48 AS49788 # ipv6.dump:1800452 24 | 2604:2d80:8000::/48 AS30036 # ipv6.dump:1982081 25 | 177.53.12.0/24 AS52989 # ipv4.dump:2090089 26 | 85.192.36.0/22 AS12695 # ipv4.dump:17606664 27 | 212.126.108.0/24 AS39216 # ipv4.dump:18478672 28 | 66.13.132.0/24 AS5650 # ipv4.dump:19204708 29 | 2400:3800:6000::/37 AS9617 # ipv6.dump:2204768 30 | 5.123.160.0/20 AS44244 # ipv4.dump:16376955 31 | 2600:2100:1b::/48 AS54858 # ipv6.dump:1944131 32 | 150.242.174.0/24 AS132453 # ipv4.dump:9808415 33 | 62.221.134.0/23 AS13124 # ipv4.dump:3496079 34 | 213.153.170.0/23 AS34984 # ipv4.dump:15953301 35 | 197.149.72.0/24 AS35074 # ipv4.dump:18530955 36 | 132.97.0.0/16 AS306 # ipv4.dump:19630592 37 | 212.138.180.0/24 AS209464 # ipv4.dump:6079946 38 | 103.251.30.0/24 AS58984 # ipv4.dump:18032160 39 | 74.221.64.0/20 AS29979 # ipv4.dump:20136686 40 | 185.116.176.0/22 AS204033 # ipv4.dump:19959133 41 | 40.248.252.0/24 AS4249 # ipv4.dump:9393294 42 | 103.78.50.0/24 AS135655 # ipv4.dump:12363962 43 | 67.209.219.0/24 AS20356 # ipv4.dump:2756256 44 | 214.72.0.0/24 AS27064 # ipv4.dump:6228044 45 | 188.38.127.0/24 AS15897 # ipv4.dump:827001 46 | 2a03:f080:1000::/48 AS42685 # ipv6.dump:1301509 47 | 95.140.24.0/21 AS48739 # ipv4.dump:10034325 48 | 84.39.111.0/24 AS48200 # ipv4.dump:20443864 49 | 63.163.108.0/23 AS26724 # ipv4.dump:19309425 50 | 24.197.96.0/24 AS20115 # ipv4.dump:14008635 51 | 184.181.24.0/21 AS22773 # ipv4.dump:3568675 52 | 156.0.224.0/21 AS328220 # ipv4.dump:9584679 53 | 146.120.20.0/22 AS57901 # ipv4.dump:19852902 54 | 149.20.24.0/24 AS1280 # ipv4.dump:7015284 55 | 219.65.104.0/23 AS4755 # ipv4.dump:20183203 56 | 92.42.64.0/21 AS44764 # ipv4.dump:15718454 57 | 45.160.90.0/24 AS268414 # ipv4.dump:11064350 58 | 187.149.8.0/21 AS8151 # ipv4.dump:7827459 59 | 208.80.116.0/22 AS32354 # ipv4.dump:17877462 60 | 64.129.144.0/24 AS20251 # ipv4.dump:15149238 61 | 205.147.108.0/24 AS17439 # ipv4.dump:6170532 62 | 65.5.88.0/21 AS6389 # ipv4.dump:11859089 63 | 116.241.76.0/22 AS131596 # ipv4.dump:3725486 64 | 98.143.211.0/24 AS22639 # ipv4.dump:17293905 65 | 138.136.68.0/23 AS5972 # ipv4.dump:2055175 66 | 170.80.99.0/24 AS264829 # ipv4.dump:2425897 67 | 186.159.164.0/22 AS52228 # ipv4.dump:15901760 68 | 202.60.124.0/23 AS37970 # ipv4.dump:6011975 69 | 92.36.192.0/20 AS9146 # ipv4.dump:13920556 70 | 182.73.4.0/24 AS9498 # ipv4.dump:19297810 71 | 198.73.210.0/24 AS393304 # ipv4.dump:5060133 72 | 74.204.128.0/21 AS35986 # ipv4.dump:14710033 73 | 185.129.83.0/24 AS203616 # ipv4.dump:8719643 74 | 79.99.56.0/21 AS47212 # ipv4.dump:12465762 75 | 77.92.123.0/24 AS25145 # ipv4.dump:16610816 76 | 82.141.192.0/18 AS5466 # ipv4.dump:9919239 77 | 171.159.60.0/24 AS10794 # ipv4.dump:6186705 78 | 52.216.4.0/24 AS16509 # ipv4.dump:11150294 79 | 196.16.92.0/22 AS19969 # ipv4.dump:4553797 80 | 188.187.246.0/24 AS41668 # ipv4.dump:3501609 81 | 198.228.131.0/24 AS701 # ipv4.dump:10060201 82 | 168.77.75.0/24 AS3551 # ipv4.dump:6537912 83 | 201.197.64.0/23 AS11830 # ipv4.dump:12945306 84 | 105.235.103.0/24 AS36974 # ipv4.dump:20415098 85 | 217.72.60.0/23 AS45011 # ipv4.dump:7429652 86 | 185.118.72.0/22 AS47406 # ipv4.dump:11380081 87 | 109.70.187.0/24 AS44391 # ipv4.dump:6097074 88 | 116.50.78.0/23 AS38529 # ipv4.dump:15525205 89 | 177.152.66.0/24 AS262773 # ipv4.dump:9918217 90 | 116.93.48.0/23 AS23930 # ipv4.dump:15072801 91 | 172.110.58.0/24 AS396191 # ipv4.dump:13625296 92 | 182.50.255.0/24 AS45786 # ipv4.dump:20783094 93 | 199.48.177.0/24 AS20473 # ipv4.dump:9553840 94 | 85.91.120.0/23 AS28809 # ipv4.dump:12403000 95 | 213.172.0.0/19 AS20632 # ipv4.dump:4621070 96 | 150.196.85.0/24 AS747 # ipv4.dump:21064826 97 | 201.151.243.0/24 AS11172 # ipv4.dump:3121096 98 | 123.51.9.0/24 AS45510 # ipv4.dump:11847527 99 | 138.118.109.0/24 AS264257 # ipv4.dump:7184483 100 | 83.228.128.0/17 AS12350 # ipv4.dump:8421526 101 | -------------------------------------------------------------------------------- /ipv4.dump.xz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sipa/asmap/e01badc2658210cf86e261d0ba557893aa9f5fc0/ipv4.dump.xz -------------------------------------------------------------------------------- /ipv6.dump.xz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sipa/asmap/e01badc2658210cf86e261d0ba557893aa9f5fc0/ipv6.dump.xz -------------------------------------------------------------------------------- /remote_dumps/README.md: -------------------------------------------------------------------------------- 1 | This set of scripts allows to download, parse and aggregate BGP announcement dumps from open repositories to be used in asmap construction. 2 | 3 | ### Pre-reqs 4 | 5 | ``./setup.sh`` 6 | 7 | ### Use 8 | 9 | 0. ``./prepare.sh`` deletes old data. 10 | 1. ``./download_dumps.py`` downloads RIPE dumps for a selected date (configured in the file) to the `dumps` folder. 11 | 2. ``./quagga_parse.sh`` reads dumps from the `dumps` folder and 12 | writes the human readable interpretation to the `paths` folder. 13 | 3. ``./quagga_aggregate.py`` goes through the interpreted dumps in ``paths`` folder, aggregates paths and assigns every IP prefix to the first element of the common suffix of the asn path. 14 | 15 | Resulting ``prefix_asns.out`` can be fed to ``../buildmap.py``. 16 | 17 | ### Rationale 18 | 19 | Consider the following scenario: 20 | 1.2.3.4: A -> B -> C -> X 21 | 1.2.3.4: A -> F -> C -> X 22 | 23 | In this case, {C, X} is the common suffix, and we will map 1.2.3.4 to C, because C represents the single infrastructure required to reach that IP address. 24 | 25 | Note that diversifying by C would implicitly diversify by X too. 26 | -------------------------------------------------------------------------------- /remote_dumps/download_dumps.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import urllib.request 4 | import datetime 5 | 6 | providers = range(1, 24) 7 | date = datetime.date.today() 8 | 9 | dumps_dir = "dumps/" 10 | 11 | for provider in providers: 12 | provider = ("{:02d}".format(provider)) 13 | link = "http://data.ris.ripe.net/rrc{0}/latest-bview.gz".format(provider) 14 | dump_name = "dump_{0}_{1}.gz".format(provider, date) 15 | print(link) 16 | try: 17 | dump = urllib.request.urlopen(link) 18 | except Exception: 19 | print('Failed to download: ' + link) 20 | continue 21 | with open(dumps_dir + dump_name,'wb+') as output: 22 | output.write(dump.read()) -------------------------------------------------------------------------------- /remote_dumps/prepare.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | rm dumps/* 4 | rm paths/* 5 | rm prefix_asns.out -------------------------------------------------------------------------------- /remote_dumps/quagga_aggregate.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import sys 4 | import os 5 | import re 6 | 7 | PARSED_DUMPS_DIR = 'paths/' 8 | RESULT_OUTPUT = 'prefix_asns.out' 9 | first_octet = re.compile(r"^[^.|:]*") 10 | 11 | 12 | # Remove duplicate asns in a row 13 | # [1, 1, 2, 3, 3, 3] -> [1, 2, 3] 14 | def dedup(asn_path): 15 | i = len(asn_path) - 2 16 | while i > 0: 17 | if asn_path[i] == asn_path[i - 1]: 18 | asn_path = asn_path[0:i] + asn_path[i+1:] 19 | i -= 1 20 | return asn_path 21 | 22 | def find_common_suffixes(prefix_asn_paths): 23 | common_asn_suffix = dict() 24 | for prefix, asn_lists in prefix_asn_paths.items(): 25 | asn_lists = [dedup(asn_list.split(' ')) for asn_list in asn_lists] # preprocess 26 | asn_lists = [asn_list for asn_list in asn_lists if asn_list != [] and set(asn_list) != ['']] # this very rarely happens in dumps 27 | asn_lists.sort(key = len) 28 | cur_asn_suffix = asn_lists[0] # represents the common sub-path (from the end) of asns to a prefix 29 | for asn_list in asn_lists[1:]: 30 | if cur_asn_suffix == asn_list: 31 | continue 32 | if cur_asn_suffix[-1] != asn_list[-1]: # multi-homed 33 | break 34 | cur_asn_suffix_len = len(cur_asn_suffix) 35 | for i in range(1, cur_asn_suffix_len): # position from the end 36 | if cur_asn_suffix[len(cur_asn_suffix) - i - 1] != asn_list[len(asn_list) - i - 1]: 37 | cur_asn_suffix = cur_asn_suffix[len(cur_asn_suffix) - i:] 38 | break 39 | common_asn_suffix[prefix] = cur_asn_suffix 40 | return common_asn_suffix 41 | 42 | 43 | last_read_line = dict() # per file to track chunk processing 44 | FILES = os.listdir(PARSED_DUMPS_DIR) 45 | for file_name in FILES: 46 | last_read_line[file_name] = 0 47 | 48 | 49 | def process_chunk(current_chunk_start, step, end, processing_ipv4): 50 | print(("Working on chunk %i %i" % (current_chunk_start, current_chunk_start + step)), flush=True) 51 | announcements = dict() 52 | for file_name in FILES: 53 | print('Reading file: ', file_name, flush=True) 54 | with open(PARSED_DUMPS_DIR + file_name, "r") as file: 55 | for _ in range(last_read_line[file_name]): 56 | next(file) 57 | line_number = last_read_line[file_name] 58 | for line in file: 59 | line_number += 1 60 | announcement_data = re.sub(r'{[^>]+}', ' ', line.strip()) # removes {} sets in AS path 61 | announcement_data = announcement_data.split('|') 62 | prefix = announcement_data[0] 63 | first_oc = re.search(first_octet, prefix).group(0) 64 | asns = announcement_data[1] 65 | is_ipv4 = prefix.count(':') == 0 66 | if processing_ipv4 != is_ipv4: 67 | continue 68 | 69 | if first_oc == '' and processing_ipv6: # for ipv6 70 | first_oc = 0 71 | 72 | if int(first_oc) > current_chunk_start + step: # passed current chunk 73 | last_read_line[file_name] = line_number 74 | break 75 | # if int(first_oc) < i: # current chunk is ahead 76 | # continue 77 | announcements.setdefault(prefix, set()).add(asns) 78 | res = find_common_suffixes(announcements) 79 | announcements.clear() 80 | dump_result(res) 81 | res.clear() 82 | 83 | MAX_IPv4 = 2 << 7 84 | MAX_IPv6 = 2 << 16 85 | def process_files(): 86 | SMALL_STEP = 2 << 4 # for ips with the first octet less than MAX_IPv4 (all ipv4 and some ipv6) 87 | BIG_STEP = 2 << 12 # for the rest of ipv6 88 | # The assumption is that the records are ordered by ip, but ipv6 can appear here and there 89 | for i in range(0, MAX_IPv4, SMALL_STEP): # process ip range chunks so that memory is not filled 90 | process_chunk(i, SMALL_STEP, MAX_IPv4, True) 91 | 92 | for i in range(0, MAX_IPv6, BIG_STEP): # process ip range chunks so that memory is not filled 93 | process_chunk(i, BIG_STEP, MAX_IPv6, False) 94 | 95 | def dump_result(prefix_unique_asn_suffixes): 96 | with open(RESULT_OUTPUT, 'a') as file: 97 | for prefix, unique_asn_suffix in prefix_unique_asn_suffixes.items(): 98 | if unique_asn_suffix[0] == '': 99 | print(unique_asn_suffix) 100 | assert(False) 101 | file.write("%s AS%s\n" % (prefix, unique_asn_suffix[0])) 102 | 103 | res = process_files() 104 | # dump_result(res) 105 | -------------------------------------------------------------------------------- /remote_dumps/quagga_parse.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | for mrt in `ls dumps`; do 4 | /bin/echo -n "processing $mrt... " 5 | OUT=$mrt 6 | /usr/local/bin/bgpdump -vm dumps/$mrt | cut -d '|' -f '6,7' > paths/$OUT 7 | done 8 | 9 | -------------------------------------------------------------------------------- /remote_dumps/setup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | mkdir dumps 4 | mkdir paths 5 | 6 | wget http://ris.ripe.net/source/bgpdump/libbgpdump-1.6.0.tgz 7 | tar zxvf libbgpdump-1.6.0.tgz 8 | rm libbgpdump-1.6.0.tgz 9 | cd libbgpdump-1.6.0 10 | ./bootstrap.sh 11 | make install 12 | cd .. -------------------------------------------------------------------------------- /testmap.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import sys 3 | import random 4 | import ipaddress 5 | 6 | # Convert a byte array to a bit array 7 | def DecodeBytes(byts): 8 | bits = [] 9 | for byt in byts: 10 | for i in range(8): 11 | bits += [(byt >> i) & 1] 12 | return bits 13 | 14 | def DecodeBits(stream, bitpos, minval, bit_sizes): 15 | val = minval 16 | for pos in range(len(bit_sizes)): 17 | bit_size = bit_sizes[pos] 18 | if pos + 1 < len(bit_sizes): 19 | bit = stream[bitpos] 20 | bitpos += 1 21 | else: 22 | bit = 0 23 | if bit: 24 | val += (1 << bit_size) 25 | else: 26 | for b in range(bit_size): 27 | bit = stream[bitpos] 28 | bitpos += 1 29 | val += bit << (bit_size - 1 - b) 30 | return (val, bitpos) 31 | assert(False) 32 | 33 | def DecodeType(stream, bitpos): 34 | return DecodeBits(stream, bitpos, 0, [0, 0, 1]) 35 | 36 | def DecodeASN(stream, bitpos): 37 | return DecodeBits(stream, bitpos, 1, [15, 16, 17, 18, 19, 20, 21, 22, 23, 24]) 38 | 39 | def DecodeMatch(stream, bitpos): 40 | return DecodeBits(stream, bitpos, 2, [1, 2, 3, 4, 5, 6, 7, 8]) 41 | 42 | def DecodeJump(stream, bitpos): 43 | return DecodeBits(stream, bitpos, 17, [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]) 44 | 45 | def Interpret(asmap, num, bits): 46 | pos = 0 47 | default = None 48 | while True: 49 | assert(len(asmap) >= pos + 1) 50 | (opcode, pos) = DecodeType(asmap, pos) 51 | if opcode == 0: 52 | (asn, pos) = DecodeASN(asmap, pos) 53 | return asn 54 | elif opcode == 1: 55 | (jump, pos) = DecodeJump(asmap, pos) 56 | if (num >> (bits - 1)) & 1: 57 | pos += jump 58 | bits -= 1 59 | elif opcode == 2: 60 | (match, pos) = DecodeMatch(asmap, pos) 61 | matchlen = match.bit_length() - 1 62 | for bit in range(matchlen): 63 | if ((num >> (bits - 1)) & 1) != ((match >> (matchlen - 1 - bit)) & 1): 64 | return default 65 | bits -= 1 66 | elif opcode == 3: 67 | (default, pos) = DecodeASN(asmap, pos) 68 | else: 69 | assert(False) 70 | 71 | 72 | 73 | def decode_ip(ip: str) -> int: 74 | addr = ipaddress.ip_address(ip) 75 | if isinstance(addr, ipaddress.IPv4Address): 76 | return int.from_bytes(addr.packed, 'big') + 0xffff00000000 77 | elif isinstance(addr, ipaddress.IPv6Address): 78 | return int.from_bytes(addr.packed, 'big') 79 | 80 | 81 | if __name__ == '__main__': 82 | no_args = len(sys.argv) == 1 83 | 84 | if no_args: 85 | filename = './demo.map' 86 | else: 87 | filename = sys.argv[1] 88 | 89 | with open(filename, "rb") as f: 90 | asmap = DecodeBytes(f.read()) 91 | 92 | # If no arguments are passed, run a test on a random selection from 93 | # demo.dat. 94 | if no_args: 95 | expected = [ 96 | ('8.8.8.8', 15169), 97 | ] 98 | failed = False 99 | 100 | with open('./demo.random.dat', 'r') as f: 101 | for line in f: 102 | (ip, asn) = line.split()[:2] 103 | ip = ip.split('/')[0] 104 | 105 | assert(asn[:2] == 'AS') 106 | asn = int(asn[2:]) 107 | 108 | # Make the IP concrete and randomize it somewhat within the 109 | # subnet. 110 | if ':' not in ip: 111 | ip = '.'.join(ip.split('.')[:3]) + '.{}'.format( 112 | random.randint(0, 16)) 113 | 114 | expected.append((ip, asn)) 115 | 116 | for ip, asn in expected: 117 | got = Interpret(asmap, decode_ip(ip), 128) 118 | 119 | if got != asn: 120 | failed = True 121 | print("{} failed! Got {}, expected {}".format( 122 | ip, got, asn), file=sys.stderr) 123 | else: 124 | print("{} passed".format(ip)) 125 | 126 | sys.exit(1 if failed else 0) 127 | 128 | else: 129 | ret = Interpret(asmap, decode_ip(sys.argv[2]), 128) 130 | if ret: 131 | print("AS%i" % ret) 132 | --------------------------------------------------------------------------------