├── requirements.txt ├── query_ripe_db.sh ├── .gitignore ├── download_dumps.sh ├── db ├── helper.py └── model.py ├── Readme.md └── create_ripe_db.py /requirements.txt: -------------------------------------------------------------------------------- 1 | netaddr==0.7.18 2 | psycopg2==2.6.2 3 | SQLAlchemy==1.0.15 4 | -------------------------------------------------------------------------------- /query_ripe_db.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | psql -q -A -t -c "SELECT block.inetnum, block.country, block.description FROM block WHERE block.inetnum >> '$1' ORDER BY block.inetnum DESC LIMIT 1;" ripe 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | *.pyc 3 | afrinic.db.gz 4 | apnic.db.inet6num.gz 5 | apnic.db.inetnum.gz 6 | arin.db 7 | delegated-lacnic-extended-latest 8 | ripe.db.inetnum.gz 9 | ripe.db.inet6num.gz 10 | env/ 11 | -------------------------------------------------------------------------------- /download_dumps.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | wget ftp://ftp.afrinic.net/pub/dbase/afrinic.db.gz 4 | 5 | wget ftp://ftp.apnic.net/pub/apnic/whois/apnic.db.inetnum.gz 6 | wget ftp://ftp.apnic.net/pub/apnic/whois/apnic.db.inet6num.gz 7 | 8 | wget ftp://ftp.arin.net/pub/rr/arin.db 9 | 10 | wget ftp://ftp.lacnic.net/pub/stats/lacnic/delegated-lacnic-extended-latest 11 | 12 | wget ftp://ftp.ripe.net/ripe/dbase/split/ripe.db.inetnum.gz 13 | wget ftp://ftp.ripe.net/ripe/dbase/split/ripe.db.inet6num.gz 14 | -------------------------------------------------------------------------------- /db/helper.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- ® 3 | 4 | from sqlalchemy import create_engine 5 | from sqlalchemy.ext.declarative import declarative_base 6 | from sqlalchemy.orm import sessionmaker 7 | 8 | Base = declarative_base() 9 | 10 | 11 | def get_base(): 12 | return Base 13 | 14 | 15 | def setup_connection(create_db=False): 16 | engine = create_postgres_pool() 17 | session = sessionmaker() 18 | session.configure(bind=engine) 19 | 20 | if create_db: 21 | Base.metadata.drop_all(engine) 22 | Base.metadata.create_all(engine) 23 | 24 | return session() 25 | 26 | 27 | def create_postgres_pool(): 28 | engine = create_engine('postgresql://ripe:ripe@localhost/ripe') 29 | return engine 30 | -------------------------------------------------------------------------------- /db/model.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- ® 3 | 4 | from sqlalchemy import Column, ForeignKey, Integer, String, DateTime 5 | from sqlalchemy.orm import relationship 6 | from db.helper import get_base 7 | from sqlalchemy.dialects import postgresql 8 | 9 | Base = get_base() 10 | 11 | 12 | class Block(Base): 13 | __tablename__ = 'block' 14 | id = Column(Integer, primary_key=True) 15 | inetnum = Column(postgresql.CIDR, nullable=False, index=True) 16 | netname = Column(String, nullable=True, index=True) 17 | description = Column(String, index=True) 18 | country = Column(String, index=True) 19 | maintained_by = Column(String, index=True) 20 | created = Column(DateTime, index=True) 21 | last_modified = Column(DateTime, index=True) 22 | 23 | def __str__(self): 24 | return 'inetnum: {}, netname: {}, desc: {}, country: {}, maintained: {}, created: {}, updated: {}'.format( 25 | self.inetnum, self.netname, self.description, self.country, 26 | self.maintained_by, self.created, self.last_modified) 27 | 28 | def __repr__(self): 29 | return self.__str__() 30 | -------------------------------------------------------------------------------- /Readme.md: -------------------------------------------------------------------------------- 1 | # Ripe Database Parser 2 | 3 | This script parses the ARIN/APNIC/LACNIC/AfriNIC/RIPE databases into a local PostgreSQL database. 4 | 5 | Installation of needed packages (Example on Ubuntu 16.04): 6 | ```sh 7 | apt install postgresql python3 python3-netaddr python3-psycopg2 python3-sqlalchemy 8 | 9 | - or - 10 | 11 | apt install postgresql python3 python-pip 12 | pip install -r requirements.txt 13 | ``` 14 | 15 | Create PostgreSQL database (Use "ripe" as password): 16 | ```sh 17 | sudo -u postgres createuser --pwprompt --createdb ripe 18 | sudo -u postgres createdb --owner=ripe ripe 19 | ``` 20 | 21 | Prior to starting this script you need to download the database dumps from the following URLs and place it in this directory: 22 | ```sh 23 | wget ftp://ftp.afrinic.net/pub/dbase/afrinic.db.gz 24 | 25 | wget ftp://ftp.apnic.net/pub/apnic/whois/apnic.db.inetnum.gz 26 | wget ftp://ftp.apnic.net/pub/apnic/whois/apnic.db.inet6num.gz 27 | 28 | wget ftp://ftp.arin.net/pub/rr/arin.db 29 | 30 | wget ftp://ftp.lacnic.net/pub/stats/lacnic/delegated-lacnic-extended-latest 31 | 32 | wget ftp://ftp.ripe.net/ripe/dbase/split/ripe.db.inetnum.gz 33 | wget ftp://ftp.ripe.net/ripe/dbase/split/ripe.db.inet6num.gz 34 | 35 | - or simply - 36 | 37 | ./download_dumps.sh 38 | ``` 39 | 40 | After importing you can lookup an IP address like: 41 | 42 | ```sql 43 | SELECT block.inetnum, block.country, block.description FROM block WHERE block.inetnum >> '2001:db8::1' ORDER BY block.inetnum DESC LIMIT 1; 44 | 45 | - or simply - 46 | 47 | ./query_ripe_db.sh 192.0.2.1 48 | ``` 49 | 50 | TO-DO: 51 | * ARIN DB seems to be not very complete 52 | * LACNIC DB is missing owner-info 53 | -------------------------------------------------------------------------------- /create_ripe_db.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import gzip 5 | import time 6 | from multiprocessing import cpu_count, Queue, Process, current_process 7 | import logging 8 | 9 | import re 10 | import os.path 11 | from db.model import Block 12 | from db.helper import setup_connection 13 | from netaddr import iprange_to_cidrs 14 | import math 15 | 16 | FILELIST = ['afrinic.db.gz', 'apnic.db.inet6num.gz', 'apnic.db.inetnum.gz', 'arin.db', 'delegated-lacnic-extended-latest', 'ripe.db.inetnum.gz', 'ripe.db.inet6num.gz'] 17 | NUM_WORKERS = cpu_count() 18 | LOG_FORMAT = '%(asctime)s - %(name)s - %(levelname)s - %(processName)s - %(message)s' 19 | COMMIT_COUNT = 10000 20 | NUM_BLOCKS = 0 21 | 22 | logger = logging.getLogger('create_ripe_db') 23 | logger.setLevel(logging.DEBUG) 24 | formatter = logging.Formatter(LOG_FORMAT) 25 | stream_handler = logging.StreamHandler() 26 | stream_handler.setFormatter(formatter) 27 | stream_handler.setLevel(logging.DEBUG) 28 | logger.addHandler(stream_handler) 29 | 30 | 31 | def parse_property(block: str, name: str): 32 | match = re.findall(u'^{0:s}:\s*(.*)$'.format(name), block, re.MULTILINE) 33 | if match: 34 | return " ".join(match) 35 | else: 36 | return None 37 | 38 | def parse_property_inetnum(block: str): 39 | # IPv4 40 | match = re.findall('^inetnum:[\s]*((?:\d{1,3}\.){3}\d{1,3}[\s]*-[\s]*(?:\d{1,3}\.){3}\d{1,3})', block, re.MULTILINE) 41 | if match: 42 | ip_start = re.findall('^inetnum:[\s]*((?:\d{1,3}\.){3}\d{1,3})[\s]*-[\s]*(?:\d{1,3}\.){3}\d{1,3}', block, re.MULTILINE)[0] 43 | ip_end = re.findall('^inetnum:[\s]*(?:\d{1,3}\.){3}\d{1,3}[\s]*-[\s]*((?:\d{1,3}\.){3}\d{1,3})', block, re.MULTILINE)[0] 44 | cidrs = iprange_to_cidrs(ip_start, ip_end) 45 | return '{}'.format(cidrs[0]) 46 | # IPv6 47 | else: 48 | match = re.findall('^inet6num:[\s]*([0-9a-fA-F:\/]{1,43})', block, re.MULTILINE) 49 | if match: 50 | return match[0] 51 | # LACNIC translation for IPv4 52 | else: 53 | match = re.findall('^inet4num:[\s]*((?:\d{1,3}\.){3}\d{1,3}/\d{1,2})', block, re.MULTILINE) 54 | if match: 55 | return match[0] 56 | else: 57 | return None 58 | 59 | 60 | def read_blocks(filename: str) -> list: 61 | if filename.endswith('.gz'): 62 | f = gzip.open(filename, mode='rt', encoding='ISO-8859-1') 63 | else: 64 | f = open(filename, mode='rt', encoding='ISO-8859-1') 65 | single_block = '' 66 | blocks = [] 67 | 68 | # Translation for LACNIC DB 69 | if filename == 'delegated-lacnic-extended-latest': 70 | for line in f: 71 | if line.startswith('lacnic'): 72 | elements = line.split('|') 73 | if len(elements) >= 7: 74 | single_block = '' 75 | if elements[2] == 'ipv4': 76 | single_block += 'inet4num: ' + elements[3] + '/' + str(int(math.log(4294967296/int(elements[4]),2))) + '\n' 77 | elif elements[2] == 'ipv6': 78 | single_block += 'inet6num: ' + elements[3] + '/' + elements[4] + '\n' 79 | else: 80 | continue 81 | if len(elements[1]) > 1: 82 | single_block += 'country: ' + elements[1] + '\n' 83 | if elements[5].isnumeric(): 84 | single_block += 'last-modified: ' + elements[5] + '\n' 85 | single_block += 'descr: ' + elements[6] + '\n' 86 | blocks.append(single_block) 87 | 88 | # All other DBs goes here 89 | else: 90 | for line in f: 91 | if line.startswith('%') or line.startswith('#') or line.startswith('remarks:') or line.startswith(' '): 92 | continue 93 | # block end 94 | if line.strip() == '': 95 | if single_block.startswith('inetnum:') or single_block.startswith('inet6num:'): 96 | blocks.append(single_block) 97 | single_block = '' 98 | # comment out to only parse x blocks 99 | # if len(blocks) == 100: 100 | # break 101 | else: 102 | single_block = '' 103 | else: 104 | single_block += line 105 | 106 | f.close() 107 | logger.info('Got {} blocks'.format(len(blocks))) 108 | global NUM_BLOCKS 109 | NUM_BLOCKS = len(blocks) 110 | return blocks 111 | 112 | 113 | def parse_blocks(jobs: Queue): 114 | session = setup_connection() 115 | 116 | counter = 0 117 | BLOCKS_DONE = 0 118 | 119 | start_time = time.time() 120 | while True: 121 | block = jobs.get() 122 | if block is None: 123 | break 124 | 125 | inetnum = parse_property_inetnum(block) 126 | netname = parse_property(block, 'netname') 127 | description = parse_property(block, 'descr') 128 | country = parse_property(block, 'country') 129 | maintained_by = parse_property(block, 'mnt-by') 130 | created = parse_property(block, 'created') 131 | last_modified = parse_property(block, 'last-modified') 132 | 133 | b = Block(inetnum=inetnum, netname=netname, description=description, country=country, 134 | maintained_by=maintained_by, created=created, last_modified=last_modified) 135 | 136 | session.add(b) 137 | counter += 1 138 | BLOCKS_DONE += 1 139 | if counter % COMMIT_COUNT == 0: 140 | session.commit() 141 | session.close() 142 | session = setup_connection() 143 | logger.debug('committed {} blocks ({} seconds) {:.1f}% done.'.format(counter, round(time.time() - start_time, 2),BLOCKS_DONE * NUM_WORKERS * 100 / NUM_BLOCKS)) 144 | counter = 0 145 | start_time = time.time() 146 | session.commit() 147 | logger.debug('committed last blocks') 148 | session.close() 149 | logger.debug('{} finished'.format(current_process().name)) 150 | 151 | 152 | def main(): 153 | overall_start_time = time.time() 154 | 155 | session = setup_connection(create_db=True) 156 | 157 | for FILENAME in FILELIST: 158 | if os.path.exists(FILENAME): 159 | logger.info('parsing database file: {}'.format(FILENAME)) 160 | start_time = time.time() 161 | blocks = read_blocks(FILENAME) 162 | logger.info('database parsing finished: {} seconds'.format(round(time.time() - start_time, 2))) 163 | 164 | logger.info('parsing blocks') 165 | start_time = time.time() 166 | 167 | jobs = Queue() 168 | 169 | workers = [] 170 | # start workers 171 | logger.debug('starting {} processes'.format(NUM_WORKERS)) 172 | for w in range(NUM_WORKERS): 173 | p = Process(target=parse_blocks, args=(jobs,)) 174 | p.start() 175 | workers.append(p) 176 | 177 | # add tasks 178 | for b in blocks: 179 | jobs.put(b) 180 | for i in range(NUM_WORKERS): 181 | jobs.put(None) 182 | 183 | # wait to finish 184 | for p in workers: 185 | p.join() 186 | 187 | logger.info('block parsing finished: {} seconds'.format(round(time.time() - start_time, 2))) 188 | else: 189 | logger.info('File {} not found. Please download using download_dumps.sh'.format(FILENAME)) 190 | 191 | logger.info('script finished: {} seconds'.format(round(time.time() - overall_start_time, 2))) 192 | 193 | 194 | if __name__ == '__main__': 195 | main() 196 | --------------------------------------------------------------------------------