├── .dockerignore ├── .github ├── dependabot.yml └── workflows │ ├── auto-merge-dependabot.yml │ └── docker.yml ├── .gitignore ├── Dockerfile ├── LICENSE ├── Readme.md ├── bin ├── export_to_gzip ├── network_info ├── network_info-dev ├── psql └── query ├── create_db.py ├── db ├── helper.py └── model.py ├── docker-compose.override.yml ├── docker-compose.yml ├── docker-entrypoint.sh ├── download_dumps.sh ├── query_db.sh └── requirements.txt /.dockerignore: -------------------------------------------------------------------------------- 1 | .git/ 2 | env/ 3 | bin/ 4 | databases/ 5 | TODO 6 | query_db.sh 7 | .* 8 | **/.DS_STORE 9 | **/*.md 10 | **/*.swp 11 | **/*.pyc 12 | Dockerfile 13 | docker-compose.yml 14 | block_dump_* 15 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # To get started with Dependabot version updates, you'll need to specify which 2 | # package ecosystems to update and where the package manifests are located. 3 | # Please see the documentation for all configuration options: 4 | # https://help.github.com/github/administering-a-repository/configuration-options-for-dependency-updates 5 | 6 | version: 2 7 | updates: 8 | - package-ecosystem: "github-actions" 9 | directory: "/" 10 | schedule: 11 | # Check for updates to GitHub Actions every weekday 12 | interval: "daily" 13 | 14 | - package-ecosystem: "pip" 15 | directory: "/" 16 | schedule: 17 | interval: "weekly" 18 | 19 | - package-ecosystem: "Docker" 20 | directory: "/" 21 | schedule: 22 | interval: "weekly" 23 | -------------------------------------------------------------------------------- /.github/workflows/auto-merge-dependabot.yml: -------------------------------------------------------------------------------- 1 | name: Auto-merge dependabot updates 2 | 3 | on: 4 | pull_request: 5 | branches: [main] 6 | 7 | permissions: 8 | pull-requests: write 9 | contents: write 10 | 11 | jobs: 12 | 13 | dependabot-merge: 14 | 15 | runs-on: ubuntu-latest 16 | 17 | if: ${{ github.actor == 'dependabot[bot]' }} 18 | 19 | steps: 20 | - name: Dependabot metadata 21 | id: metadata 22 | uses: dependabot/fetch-metadata@v2.4.0 23 | with: 24 | github-token: "${{ secrets.GITHUB_TOKEN }}" 25 | 26 | - name: Enable auto-merge for Dependabot PRs 27 | # Only if version bump is not a major version change 28 | if: ${{steps.metadata.outputs.update-type != 'version-update:semver-major'}} 29 | run: gh pr merge --auto --merge "$PR_URL" 30 | env: 31 | PR_URL: ${{github.event.pull_request.html_url}} 32 | GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} 33 | -------------------------------------------------------------------------------- /.github/workflows/docker.yml: -------------------------------------------------------------------------------- 1 | name: Build Docker Images 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | workflow_dispatch: 8 | schedule: 9 | - cron: '0 0 * * *' 10 | 11 | jobs: 12 | Dockerhub: 13 | runs-on: ubuntu-latest 14 | 15 | steps: 16 | - name: checkout sources 17 | uses: actions/checkout@v4 18 | 19 | - name: Set up Docker Buildx 20 | uses: docker/setup-buildx-action@v3 21 | 22 | - name: Login to Docker Hub 23 | uses: docker/login-action@v3.4.0 24 | with: 25 | username: ${{ secrets.DOCKERHUB_USERNAME }} 26 | password: ${{ secrets.DOCKERHUB_TOKEN }} 27 | 28 | - name: Build and push 29 | uses: docker/build-push-action@v6 30 | with: 31 | push: true 32 | tags: firefart/network_info:latest 33 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | *.pyc 3 | *.swp 4 | databases/ 5 | env/ 6 | .bash_history 7 | .cache/ 8 | block_dump_* 9 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3-alpine 2 | LABEL maintainer="Christian Mehlmauer " 3 | 4 | RUN adduser -h /app -g app -D app 5 | 6 | WORKDIR /app 7 | 8 | COPY requirements.txt /app 9 | 10 | RUN apk add --no-cache bash postgresql-libs \ 11 | && apk add --no-cache --virtual .builddeps build-base postgresql-dev \ 12 | && pip install -r requirements.txt \ 13 | && apk del --no-cache .builddeps 14 | 15 | COPY . /app 16 | RUN chown -R app:app /app 17 | USER app 18 | 19 | RUN mkdir -p databases 20 | 21 | ENTRYPOINT ["./docker-entrypoint.sh"] 22 | CMD ["--help"] 23 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Christian Mehlmauer 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Readme.md: -------------------------------------------------------------------------------- 1 | # Network Info Parser 2 | 3 | This script parses the ARIN/APNIC/LACNIC/AfriNIC/RIPE databases into a local PostgreSQL database. 4 | After the parsing is finished you can get the infos for any IPv4 or IPv6 by querying the database. 5 | 6 | This project was used in analysing some data dumps and cross referencing the IPs with the networks. 7 | It can also be used to easily search for netranges assigned to a company in interest. 8 | 9 | I recommend using the docker setup because it removes the hassle of installing everything manually. 10 | 11 | Hint: The Database can grow fast so be sure to have enough space. On docker my postgres database uses 4.066GB of space. 12 | 13 | # Requirements 14 | 15 | - Python3 >= 3.3 16 | - postgresql 17 | - python3-netaddr 18 | - python3-psycopg 19 | - python3-sqlalchemy 20 | 21 | # Docker 22 | 23 | You can simply pull the image from Docker Hub and connect it to a local database via 24 | 25 | ```sh 26 | docker pull firefart/network_info 27 | docker run --rm firefart/network_info -c postgresql://user:pass@db:5432/network_info 28 | ``` 29 | 30 | Or you can connect the docker container to another database container. 31 | 32 | ```sh 33 | docker run --name network_info_db -e POSTGRES_DB=network_info -e POSTGRES_USER=network_info -e POSTGRES_PASSWORD=network_info -d postgres:9-alpine 34 | docker run --rm --link network_info_db:postgres firefart/network_info -c postgresql://user:pass@db:5432/network_info 35 | ``` 36 | 37 | If you have checked out the GIT repo you can run the script via `docker compose`. 38 | I included some binstubs so you don't have to deal with all the docker commands. 39 | 40 | If you run 41 | 42 | ```sh 43 | ./bin/network_info 44 | ``` 45 | 46 | the image will be built, a postgres database is connected, the files are downloaded and the parsing begins. 47 | The database stays up after the run (you can see it via `docker ps`) so you can connect it to your script. 48 | 49 | For a one shot query you can run 50 | 51 | ``` 52 | ./bin/query IPv4 53 | ``` 54 | 55 | or 56 | 57 | ``` 58 | ./bin/query IPv6 59 | ``` 60 | 61 | Or for a psql prompt 62 | 63 | ``` 64 | ./bin/psql 65 | ``` 66 | 67 | # Manual Installation 68 | 69 | Installation of needed packages (Example on Ubuntu 16.04): 70 | 71 | ```sh 72 | apt install postgresql python3 python3-netaddr python3-psycopg2 python3-sqlalchemy 73 | ``` 74 | 75 | or - 76 | 77 | ```sh 78 | apt install postgresql python3 python-pip 79 | pip install -r requirements.txt 80 | ``` 81 | 82 | Create PostgreSQL database (Use "network_info" as password): 83 | 84 | ```sh 85 | sudo -u postgres createuser --pwprompt --createdb network_info 86 | sudo -u postgres createdb --owner=network_info network_info 87 | ``` 88 | 89 | Prior to starting this script you need to download the database dumps by executing: 90 | 91 | ```sh 92 | ./download_dumps.sh 93 | ``` 94 | 95 | After importing you can lookup an IP address like: 96 | 97 | ```sql 98 | SELECT block.inetnum, block.netname, block.country, block.description, block.maintained_by, block.created, block.last_modified, block.source FROM block WHERE block.inetnum >> '2001:db8::1' ORDER BY block.inetnum DESC; 99 | SELECT block.inetnum, block.netname, block.country, block.description, block.maintained_by, block.created, block.last_modified, block.source FROM block WHERE block.inetnum >> '8.8.8.8' ORDER BY block.inetnum DESC; 100 | ``` 101 | 102 | or - 103 | 104 | ```bash 105 | ./query_db.sh 192.0.2.1 106 | ``` 107 | 108 | # Sample run (docker compose) 109 | 110 | ``` 111 | $ ./bin/network_info 112 | Creating network "ripe_default" with the default driver 113 | Creating volume "ripe_pg_data" with local driver 114 | Creating ripe_db_1 115 | Downloading afrinic.db.gz... 116 | Connecting to ftp.afrinic.net (196.216.2.24:21) 117 | afrinic.db.gz 100% |****************************************************************************************************************************| 5419k 0:00:00 ETA 118 | Downloading apnic.db.inetnum.gz... 119 | Connecting to ftp.apnic.net (202.12.29.205:21) 120 | apnic.db.inetnum.gz 100% |****************************************************************************************************************************| 37065k 0:00:00 ETA 121 | Downloading apnic.db.inet6num.gz... 122 | Connecting to ftp.apnic.net (202.12.29.205:21) 123 | apnic.db.inet6num.gz 100% |****************************************************************************************************************************| 1113k 0:00:00 ETA 124 | Downloading arin.db... 125 | Connecting to ftp.arin.net (199.71.0.151:21) 126 | arin.db 100% |****************************************************************************************************************************| 12314k 0:00:00 ETA 127 | Downloading delegated-lacnic-extended-latest... 128 | Connecting to ftp.lacnic.net (200.3.14.11:21) 129 | delegated-lacnic-ext 100% |****************************************************************************************************************************| 2161k 0:00:00 ETA 130 | Downloading ripe.db.inetnum.gz... 131 | Connecting to ftp.ripe.net (193.0.6.140:21) 132 | ripe.db.inetnum.gz 100% |****************************************************************************************************************************| 228M 0:00:00 ETA 133 | Downloading ripe.db.inet6num.gz... 134 | Connecting to ftp.ripe.net (193.0.6.140:21) 135 | ripe.db.inet6num.gz 100% |****************************************************************************************************************************| 24589k 0:00:00 ETA 136 | 2020-06-23 20:21:11,836 - create_db - INFO - MainProcess - afrinic.db.gz - parsing database file: ./databases/afrinic.db.gz 137 | 2020-06-23 20:21:20,688 - create_db - INFO - MainProcess - afrinic.db.gz - Got 171337 blocks 138 | 2020-06-23 20:21:20,688 - create_db - INFO - MainProcess - afrinic.db.gz - database parsing finished: 8.85 seconds 139 | 2020-06-23 20:21:20,688 - create_db - INFO - MainProcess - afrinic.db.gz - parsing blocks 140 | 2020-06-23 20:21:40,904 - create_db - INFO - MainProcess - afrinic.db.gz - block parsing finished: 20.22 seconds 141 | 2020-06-23 20:21:40,906 - create_db - INFO - MainProcess - apnic.db.inet6num.gz - parsing database file: ./databases/apnic.db.inet6num.gz 142 | 2020-06-23 20:21:43,364 - create_db - INFO - MainProcess - apnic.db.inet6num.gz - Got 73190 blocks 143 | 2020-06-23 20:21:43,376 - create_db - INFO - MainProcess - apnic.db.inet6num.gz - database parsing finished: 2.47 seconds 144 | 2020-06-23 20:21:43,376 - create_db - INFO - MainProcess - apnic.db.inet6num.gz - parsing blocks 145 | 2020-06-23 20:21:52,059 - create_db - INFO - MainProcess - apnic.db.inet6num.gz - block parsing finished: 8.68 seconds 146 | 2020-06-23 20:21:52,061 - create_db - INFO - MainProcess - apnic.db.inetnum.gz - parsing database file: ./databases/apnic.db.inetnum.gz 147 | 2020-06-23 20:22:21,802 - create_db - INFO - MainProcess - apnic.db.inetnum.gz - Got 1079011 blocks 148 | 2020-06-23 20:22:21,809 - create_db - INFO - MainProcess - apnic.db.inetnum.gz - database parsing finished: 29.75 seconds 149 | 2020-06-23 20:22:21,809 - create_db - INFO - MainProcess - apnic.db.inetnum.gz - parsing blocks 150 | 2020-06-23 20:24:43,940 - create_db - INFO - MainProcess - apnic.db.inetnum.gz - block parsing finished: 142.13 seconds 151 | 2020-06-23 20:24:43,942 - create_db - INFO - MainProcess - arin.db - parsing database file: ./databases/arin.db 152 | 2020-06-23 20:24:43,946 - create_db - INFO - MainProcess - arin.db - Got 0 blocks 153 | 2020-06-23 20:24:44,008 - create_db - INFO - MainProcess - arin.db - database parsing finished: 0.07 seconds 154 | 2020-06-23 20:24:44,008 - create_db - INFO - MainProcess - arin.db - parsing blocks 155 | 2020-06-23 20:24:44,030 - create_db - INFO - MainProcess - arin.db - block parsing finished: 0.02 seconds 156 | 2020-06-23 20:24:44,032 - create_db - INFO - MainProcess - delegated-lacnic-extended-latest - parsing database file: ./databases/delegated-lacnic-extended-latest 157 | 2020-06-23 20:24:44,034 - create_db - WARNING - MainProcess - delegated-lacnic-extended-latest - line does not start with lacnic: b'2.3|lacnic|20200621|73188|19870101|20200619|-0300' 158 | 2020-06-23 20:24:44,035 - create_db - WARNING - MainProcess - delegated-lacnic-extended-latest - Invalid line: b'lacnic|*|ipv4|*|18136|summary' 159 | 2020-06-23 20:24:44,035 - create_db - WARNING - MainProcess - delegated-lacnic-extended-latest - Invalid line: b'lacnic|*|ipv6|*|43276|summary' 160 | 2020-06-23 20:24:44,035 - create_db - WARNING - MainProcess - delegated-lacnic-extended-latest - Invalid line: b'lacnic|*|asn|*|11776|summary' 161 | 2020-06-23 20:24:44,489 - create_db - INFO - MainProcess - delegated-lacnic-extended-latest - Got 61412 blocks 162 | 2020-06-23 20:24:44,489 - create_db - INFO - MainProcess - delegated-lacnic-extended-latest - database parsing finished: 0.46 seconds 163 | 2020-06-23 20:24:44,489 - create_db - INFO - MainProcess - delegated-lacnic-extended-latest - parsing blocks 164 | 2020-06-23 20:24:51,207 - create_db - INFO - MainProcess - delegated-lacnic-extended-latest - block parsing finished: 6.72 seconds 165 | 2020-06-23 20:24:51,209 - create_db - INFO - MainProcess - ripe.db.inetnum.gz - parsing database file: ./databases/ripe.db.inetnum.gz 166 | 2020-06-23 20:27:46,474 - create_db - INFO - MainProcess - ripe.db.inetnum.gz - Got 4191047 blocks 167 | 2020-06-23 20:27:46,476 - create_db - INFO - MainProcess - ripe.db.inetnum.gz - database parsing finished: 175.27 seconds 168 | 2020-06-23 20:27:46,476 - create_db - INFO - MainProcess - ripe.db.inetnum.gz - parsing blocks 169 | 2020-06-23 20:37:17,835 - create_db - INFO - MainProcess - ripe.db.inetnum.gz - block parsing finished: 571.36 seconds 170 | 2020-06-23 20:37:17,837 - create_db - INFO - MainProcess - ripe.db.inet6num.gz - parsing database file: ./databases/ripe.db.inet6num.gz 171 | 2020-06-23 20:37:57,369 - create_db - INFO - MainProcess - ripe.db.inet6num.gz - Got 1081478 blocks 172 | 2020-06-23 20:37:57,706 - create_db - INFO - MainProcess - ripe.db.inet6num.gz - database parsing finished: 39.87 seconds 173 | 2020-06-23 20:37:57,706 - create_db - INFO - MainProcess - ripe.db.inet6num.gz - parsing blocks 174 | 2020-06-23 20:40:21,534 - create_db - INFO - MainProcess - ripe.db.inet6num.gz - block parsing finished: 143.83 seconds 175 | 2020-06-23 20:40:21,534 - create_db - INFO - MainProcess - empty - script finished: 1149.83 seconds 176 | 177 | $ ./bin/query 8.8.8.8 178 | SELECT block.inetnum, block.netname, block.country, block.description, block.maintained_by, block.created, block.last_modified, block.source FROM block WHERE block.inetnum >> '8.8.8.8' ORDER BY block.inetnum DESC; 179 | -[ RECORD 1 ]-+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ 180 | inetnum | 8.0.0.0/8 181 | netname | IANA-NETBLOCK-8 182 | country | AU 183 | description | This network range is not allocated to APNIC. If your whois search has returned this message, then you have searched the APNIC whois database for an address that is allocated by another Regional Internet Registry (RIR). Please search the other RIRs at whois.arin.net or whois.ripe.net for more information about that range. 184 | maintained_by | MAINT-APNIC-AP 185 | created | 186 | last_modified | 2008-09-04 06:51:28 187 | source | apnic 188 | -[ RECORD 2 ]-+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ 189 | inetnum | 8.0.0.0/6 190 | netname | NON-RIPE-NCC-MANAGED-ADDRESS-BLOCK 191 | country | EU # Country is really world wide 192 | description | IPv4 address block not managed by the RIPE NCC 193 | maintained_by | RIPE-NCC-HM-MNT 194 | created | 2019-01-07 10:49:33 195 | last_modified | 2019-01-07 10:49:33 196 | source | ripe 197 | -[ RECORD 3 ]-+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ 198 | inetnum | 0.0.0.0/0 199 | netname | IANA-BLK 200 | country | EU # Country is really world wide 201 | description | The whole IPv4 address space 202 | maintained_by | AFRINIC-HM-MNT 203 | created | 204 | last_modified | 205 | source | afrinic 206 | -[ RECORD 4 ]-+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ 207 | inetnum | 0.0.0.0/0 208 | netname | IANA-BLK 209 | country | EU # Country field is actually all countries in the world and not just EU countries 210 | description | The whole IPv4 address space 211 | maintained_by | RIPE-NCC-HM-MNT 212 | created | 2002-06-25 14:19:09 213 | last_modified | 2018-11-23 10:30:34 214 | source | ripe 215 | -[ RECORD 5 ]-+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ 216 | inetnum | 0.0.0.0/0 217 | netname | IANA-BLOCK 218 | country | AU 219 | description | General placeholder reference for all IPv4 addresses 220 | maintained_by | MAINT-APNIC-AP 221 | created | 222 | last_modified | 2008-09-04 06:51:49 223 | source | apnic 224 | ``` 225 | 226 | # Export block table 227 | 228 | If you need to export data from PG to another source (Clickhouse, Elasticsearch, etc.) you can use: 229 | ``` 230 | ./bin/export_to_gzip 231 | ``` 232 | A compressed CSV file will be created. 233 | -------------------------------------------------------------------------------- /bin/export_to_gzip: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -euf -o pipefail 4 | 5 | docker-compose run --rm -e PGPASSWORD=network_info --name dumper --entrypoint=psql db -h db -U network_info -e -q -x -c "copy block TO stdout DELIMITER ';' CSV HEADER ;" network_info | gzip > ./block_dump_`date +"%Y-%m-%d"`.csv.gz 6 | -------------------------------------------------------------------------------- /bin/network_info: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | SOURCE="${BASH_SOURCE[0]}" 4 | while [ -h "$SOURCE" ]; do # resolve $SOURCE until the file is no longer a symlink 5 | DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )" 6 | SOURCE="$(readlink "$SOURCE")" 7 | [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" # if $SOURCE was a relative symlink, we need to resolve it relative to the path where the symlink file was located 8 | done 9 | DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )" 10 | 11 | cd $DIR/../ 12 | if [[ -n "$REBUILD" ]]; then 13 | docker pull python:3-alpine 14 | docker pull postgres:13-alpine 15 | docker compose build 16 | fi 17 | 18 | docker compose -f docker-compose.yml run --rm --service-ports network_info "$@" 19 | -------------------------------------------------------------------------------- /bin/network_info-dev: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | SOURCE="${BASH_SOURCE[0]}" 4 | while [ -h "$SOURCE" ]; do # resolve $SOURCE until the file is no longer a symlink 5 | DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )" 6 | SOURCE="$(readlink "$SOURCE")" 7 | [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" # if $SOURCE was a relative symlink, we need to resolve it relative to the path where the symlink file was located 8 | done 9 | DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )" 10 | 11 | cd $DIR/../ 12 | if [[ -n "$REBUILD" ]]; then 13 | docker pull python:3-alpine 14 | docker pull postgres:13-alpine 15 | docker compose build 16 | fi 17 | # mount directory as volume and skip downloading of images by setting a new entrypoint 18 | docker compose -f docker-compose.yml -f docker-compose.override.yml run --rm --service-ports network_info "$@" 19 | -------------------------------------------------------------------------------- /bin/psql: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | SOURCE="${BASH_SOURCE[0]}" 4 | while [ -h "$SOURCE" ]; do # resolve $SOURCE until the file is no longer a symlink 5 | DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )" 6 | SOURCE="$(readlink "$SOURCE")" 7 | [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" # if $SOURCE was a relative symlink, we need to resolve it relative to the path where the symlink file was located 8 | done 9 | DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )" 10 | 11 | cd $DIR/../ 12 | 13 | docker compose run -e PGPASSWORD=network_info --entrypoint=psql db -h db -U network_info -x network_info 14 | -------------------------------------------------------------------------------- /bin/query: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | SOURCE="${BASH_SOURCE[0]}" 4 | while [ -h "$SOURCE" ]; do # resolve $SOURCE until the file is no longer a symlink 5 | DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )" 6 | SOURCE="$(readlink "$SOURCE")" 7 | [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" # if $SOURCE was a relative symlink, we need to resolve it relative to the path where the symlink file was located 8 | done 9 | DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )" 10 | 11 | cd $DIR/../ 12 | 13 | docker compose run -e PGPASSWORD=network_info --entrypoint=psql db -h db -U network_info -e -q -x -c "SELECT block.inetnum, block.netname, block.country, block.description, block.maintained_by, block.created, block.last_modified, block.source FROM block WHERE block.inetnum >> '$1' ORDER BY block.inetnum DESC;" network_info 14 | -------------------------------------------------------------------------------- /create_db.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import argparse 5 | import gzip 6 | import time 7 | from multiprocessing import cpu_count, Queue, Process, current_process 8 | import logging 9 | 10 | import re 11 | import os.path 12 | from db.model import Block 13 | from db.helper import setup_connection 14 | from netaddr import iprange_to_cidrs 15 | 16 | VERSION = '2.0' 17 | FILELIST = ['afrinic.db.gz', 'apnic.db.inet6num.gz', 'apnic.db.inetnum.gz', 'arin.db.gz', 18 | 'lacnic.db.gz', 'ripe.db.inetnum.gz', 'ripe.db.inet6num.gz'] 19 | NUM_WORKERS = cpu_count() 20 | LOG_FORMAT = '%(asctime)-15s - %(name)-9s - %(levelname)-8s - %(processName)-11s - %(filename)s - %(message)s' 21 | COMMIT_COUNT = 10000 22 | NUM_BLOCKS = 0 23 | CURRENT_FILENAME = "empty" 24 | 25 | 26 | class ContextFilter(logging.Filter): 27 | def filter(self, record): 28 | record.filename = CURRENT_FILENAME 29 | return True 30 | 31 | 32 | logger = logging.getLogger('create_db') 33 | logger.setLevel(logging.INFO) 34 | f = ContextFilter() 35 | logger.addFilter(f) 36 | formatter = logging.Formatter(LOG_FORMAT) 37 | stream_handler = logging.StreamHandler() 38 | stream_handler.setFormatter(formatter) 39 | logger.addHandler(stream_handler) 40 | 41 | 42 | def get_source(filename: str): 43 | if filename.startswith('afrinic'): 44 | return b'afrinic' 45 | elif filename.startswith('apnic'): 46 | return b'apnic' 47 | elif filename.startswith('arin'): 48 | return b'arin' 49 | elif 'lacnic' in filename: 50 | return b'lacnic' 51 | elif filename.startswith('ripe'): 52 | return b'ripe' 53 | else: 54 | logger.error(f"Can not determine source for {filename}") 55 | return None 56 | 57 | 58 | def parse_property(block: str, name: str) -> str: 59 | match = re.findall(b'^%s:\s?(.+)$' % (name), block, re.MULTILINE) 60 | if match: 61 | # remove empty lines and remove multiple names 62 | x = b' '.join(list(filter(None, (x.strip().replace( 63 | b"%s: " % name, b'').replace(b"%s: " % name, b'') for x in match)))) 64 | # remove multiple whitespaces by using a split hack 65 | # decode to latin-1 so it can be inserted in the database 66 | return ' '.join(x.decode('latin-1').split()) 67 | else: 68 | return None 69 | 70 | 71 | def parse_property_inetnum(block: str): 72 | # IPv4 73 | match = re.findall( 74 | rb'^inetnum:[\s]*((?:\d{1,3}\.){3}\d{1,3})[\s]*-[\s]*((?:\d{1,3}\.){3}\d{1,3})', block, re.MULTILINE) 75 | if match: 76 | # netaddr can only handle strings, not bytes 77 | ip_start = match[0][0].decode('utf-8') 78 | ip_end = match[0][1].decode('utf-8') 79 | cidrs = iprange_to_cidrs(ip_start, ip_end) 80 | return cidrs 81 | # direct CIDR in lacnic db 82 | match = re.findall(rb'^inetnum:[\s]*((?:\d{1,3}\.){3}\d{1,3}/\d+)', block, re.MULTILINE) 83 | if match: 84 | return match[0] 85 | # lacnic with wrong ip 86 | # inetnum: 177.46.7/24 87 | match = re.findall(rb'^inetnum:[\s]*((?:\d{1,3}\.){2}\d{1,3}/\d+)', block, re.MULTILINE) 88 | if match: 89 | tmp = match[0].split(b"/") 90 | return f"{tmp[0].decode('utf-8')}.0/{tmp[1].decode('utf-8')}".encode("utf-8") 91 | # inetnum: 148.204/16 92 | match = re.findall(rb'^inetnum:[\s]*((?:\d{1,3}\.){1}\d{1,3}/\d+)', block, re.MULTILINE) 93 | if match: 94 | tmp = match[0].split(b"/") 95 | return f"{tmp[0].decode('utf-8')}.0.0/{tmp[1].decode('utf-8')}".encode("utf-8") 96 | # IPv6 97 | match = re.findall( 98 | rb'^inet6num:[\s]*([0-9a-fA-F:\/]{1,43})', block, re.MULTILINE) 99 | if match: 100 | return match[0] 101 | # ARIN route IPv4 102 | match = re.findall( 103 | rb'^route:[\s]*((?:\d{1,3}\.){3}\d{1,3}/\d{1,2})', block, re.MULTILINE) 104 | if match: 105 | return match[0] 106 | # ARIN route6 IPv6 107 | match = re.findall( 108 | rb'^route6:[\s]*([0-9a-fA-F:\/]{1,43})', block, re.MULTILINE) 109 | if match: 110 | return match[0] 111 | return None 112 | 113 | 114 | def read_blocks(filename: str) -> list: 115 | if filename.endswith('.gz'): 116 | opemethod = gzip.open 117 | else: 118 | opemethod = open 119 | cust_source = get_source(filename.split('/')[-1]) 120 | single_block = b'' 121 | blocks = [] 122 | 123 | with opemethod(filename, mode='rb') as f: 124 | for line in f: 125 | # skip comments 126 | if line.startswith(b'%') or line.startswith(b'#') or line.startswith(b'remarks:'): 127 | continue 128 | # block end 129 | if line.strip() == b'': 130 | if single_block.startswith(b'inetnum:') or single_block.startswith(b'inet6num:') or single_block.startswith(b'route:') or single_block.startswith(b'route6:'): 131 | # add source 132 | single_block += b"cust_source: %s" % (cust_source) 133 | blocks.append(single_block) 134 | if len(blocks) % 1000 == 0: 135 | logger.debug( 136 | f"parsed another 1000 blocks ({len(blocks)} so far)") 137 | single_block = b'' 138 | # comment out to only parse x blocks 139 | # if len(blocks) == 100: 140 | # break 141 | else: 142 | single_block = b'' 143 | else: 144 | single_block += line 145 | logger.info(f"Got {len(blocks)} blocks") 146 | global NUM_BLOCKS 147 | NUM_BLOCKS = len(blocks) 148 | return blocks 149 | 150 | 151 | def parse_blocks(jobs: Queue, connection_string: str): 152 | session = setup_connection(connection_string) 153 | 154 | counter = 0 155 | BLOCKS_DONE = 0 156 | 157 | start_time = time.time() 158 | while True: 159 | block = jobs.get() 160 | if block is None: 161 | break 162 | 163 | inetnum = parse_property_inetnum(block) 164 | if not inetnum: 165 | # invalid entry, do not parse 166 | logger.warning(f"Could not parse inetnum on block {block}. skipping") 167 | continue 168 | netname = parse_property(block, b'netname') 169 | # No netname field in ARIN block, try origin 170 | if not netname: 171 | netname = parse_property(block, b'origin') 172 | description = parse_property(block, b'descr') 173 | country = parse_property(block, b'country') 174 | # if we have a city object, append it to the country 175 | city = parse_property(block, b'city') 176 | if city: 177 | country = f"{country} - {city}" 178 | maintained_by = parse_property(block, b'mnt-by') 179 | created = parse_property(block, b'created') 180 | last_modified = parse_property(block, b'last-modified') 181 | if not last_modified: 182 | changed = parse_property(block, b'changed') 183 | # ***@ripe.net 19960624 184 | # a.c@domain.com 20060331 185 | # maybe repeated multiple times, we only take the first 186 | if re.match(r'^.+?@.+? \d+', changed): 187 | date = changed.split(" ")[1].strip() 188 | if len(date) == 8: 189 | year = int(date[0:4]) 190 | month = int(date[4:6]) 191 | day = int(date[6:8]) 192 | # some sanity checks for dates 193 | if month >= 1 and month <=12 and day >= 1 and day <= 31: 194 | last_modified = f"{year}-{month}-{day}" 195 | else: 196 | logger.debug(f"ignoring invalid changed date {date}") 197 | else: 198 | logger.debug(f"ignoring invalid changed date {date}") 199 | elif "@" in changed: 200 | # email in changed field without date 201 | logger.debug(f"ignoring invalid changed date {changed}") 202 | else: 203 | last_modified = changed 204 | status = parse_property(block, b'status') 205 | source = parse_property(block, b'cust_source') 206 | 207 | if isinstance(inetnum, list): 208 | for cidr in inetnum: 209 | b = Block(inetnum=str(cidr), netname=netname, description=description, country=country, 210 | maintained_by=maintained_by, created=created, last_modified=last_modified, source=source, status=status) 211 | session.add(b) 212 | else: 213 | b = Block(inetnum=inetnum.decode('utf-8'), netname=netname, description=description, country=country, 214 | maintained_by=maintained_by, created=created, last_modified=last_modified, source=source, status=status) 215 | session.add(b) 216 | 217 | counter += 1 218 | BLOCKS_DONE += 1 219 | if counter % COMMIT_COUNT == 0: 220 | session.commit() 221 | session.close() 222 | session = setup_connection(connection_string) 223 | # not really accurate at the moment 224 | percent = (BLOCKS_DONE * NUM_WORKERS * 100) / NUM_BLOCKS 225 | if percent > 100: 226 | percent = 100 227 | logger.debug('committed {} blocks ({} seconds) {:.1f}% done.'.format( 228 | counter, round(time.time() - start_time, 2), percent)) 229 | counter = 0 230 | start_time = time.time() 231 | session.commit() 232 | logger.debug('committed last blocks') 233 | session.close() 234 | logger.debug(f"{current_process().name} finished") 235 | 236 | 237 | def main(connection_string): 238 | overall_start_time = time.time() 239 | # reset database 240 | setup_connection(connection_string, create_db=True) 241 | 242 | for entry in FILELIST: 243 | global CURRENT_FILENAME 244 | CURRENT_FILENAME = entry 245 | f_name = f"./databases/{entry}" 246 | if os.path.exists(f_name): 247 | logger.info(f"parsing database file: {f_name}") 248 | start_time = time.time() 249 | blocks = read_blocks(f_name) 250 | logger.info(f"database parsing finished: {round(time.time() - start_time, 2)} seconds") 251 | 252 | logger.info('parsing blocks') 253 | start_time = time.time() 254 | 255 | jobs = Queue() 256 | 257 | workers = [] 258 | # start workers 259 | logger.debug(f"starting {NUM_WORKERS} processes") 260 | for _ in range(NUM_WORKERS): 261 | p = Process(target=parse_blocks, args=( 262 | jobs, connection_string,), daemon=True) 263 | p.start() 264 | workers.append(p) 265 | 266 | # add tasks 267 | for b in blocks: 268 | jobs.put(b) 269 | for _ in range(NUM_WORKERS): 270 | jobs.put(None) 271 | jobs.close() 272 | jobs.join_thread() 273 | 274 | # wait to finish 275 | for p in workers: 276 | p.join() 277 | 278 | logger.info( 279 | f"block parsing finished: {round(time.time() - start_time, 2)} seconds") 280 | else: 281 | logger.info( 282 | f"File {f_name} not found. Please download using download_dumps.sh") 283 | 284 | CURRENT_FILENAME = "empty" 285 | logger.info( 286 | f"script finished: {round(time.time() - overall_start_time, 2)} seconds") 287 | 288 | 289 | if __name__ == '__main__': 290 | parser = argparse.ArgumentParser(description='Create DB') 291 | parser.add_argument('-c', dest='connection_string', type=str, 292 | required=True, help="Connection string to the postgres database") 293 | parser.add_argument("-d", "--debug", action="store_true", 294 | help="set loglevel to DEBUG") 295 | parser.add_argument('--version', action='version', 296 | version=f"%(prog)s {VERSION}") 297 | args = parser.parse_args() 298 | if args.debug: 299 | logger.setLevel(logging.DEBUG) 300 | main(args.connection_string) 301 | -------------------------------------------------------------------------------- /db/helper.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- ® 3 | 4 | from sqlalchemy import create_engine 5 | from sqlalchemy.ext.declarative import declarative_base 6 | from sqlalchemy.orm import sessionmaker 7 | 8 | Base = declarative_base() 9 | 10 | 11 | def get_base(): 12 | return Base 13 | 14 | 15 | def setup_connection(connection_string, create_db=False): 16 | engine = create_postgres_pool(connection_string) 17 | session = sessionmaker() 18 | session.configure(bind=engine) 19 | 20 | if create_db: 21 | Base.metadata.drop_all(engine) 22 | Base.metadata.create_all(engine) 23 | 24 | return session() 25 | 26 | 27 | def create_postgres_pool(connection_string): 28 | engine = create_engine(connection_string) 29 | return engine 30 | -------------------------------------------------------------------------------- /db/model.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- ® 3 | 4 | from sqlalchemy import Column, Integer, String, DateTime, Index 5 | from sqlalchemy import literal_column 6 | from db.helper import get_base 7 | from sqlalchemy.dialects import postgresql 8 | from sqlalchemy.sql import func 9 | 10 | Base = get_base() 11 | 12 | 13 | class Block(Base): 14 | __tablename__ = 'block' 15 | id = Column(Integer, primary_key=True) 16 | inetnum = Column(postgresql.CIDR, nullable=False, index=True) 17 | netname = Column(String, nullable=True, index=True) 18 | description = Column(String) 19 | country = Column(String, index=True) 20 | maintained_by = Column(String, index=True) 21 | created = Column(DateTime, index=True) 22 | last_modified = Column(DateTime, index=True) 23 | source = Column(String, index=True) 24 | status = Column(String, index=True) 25 | 26 | __table_args__ = ( 27 | Index('ix_block_description', func.to_tsvector(literal_column("'english'"), description), postgresql_using="gin"), ) 28 | 29 | def __str__(self): 30 | return f'inetnum: {self.inetnum}, netname: {self.netname}, desc: {self.description}, status: {self.status}, country: {self.country}, maintained: {self.maintained_by}, created: {self.created}, updated: {self.last_modified}, source: {self.source}' 31 | 32 | def __repr__(self): 33 | return self.__str__() 34 | -------------------------------------------------------------------------------- /docker-compose.override.yml: -------------------------------------------------------------------------------- 1 | version: "3" 2 | services: 3 | network_info: 4 | entrypoint: /app/create_db.py 5 | command: -c postgresql+psycopg://network_info:network_info@db:5432/network_info -d 6 | volumes: 7 | - .:/app 8 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3" 2 | 3 | services: 4 | network_info: 5 | image: network_info 6 | build: 7 | context: . 8 | command: -c postgresql+psycopg://network_info:network_info@db:5432/network_info 9 | volumes: 10 | - /etc/localtime:/etc/localtime:ro 11 | depends_on: 12 | db: 13 | condition: service_healthy 14 | restart: true 15 | 16 | db: 17 | image: postgres:13-alpine 18 | environment: 19 | POSTGRES_DB: network_info 20 | POSTGRES_USER: network_info 21 | POSTGRES_PASSWORD: network_info 22 | healthcheck: 23 | test: ["CMD-SHELL", "pg_isready"] 24 | interval: 30s 25 | timeout: 10s 26 | retries: 5 27 | start_period: 60s 28 | volumes: 29 | - pg_data:/var/lib/postgresql/data 30 | 31 | volumes: 32 | pg_data: 33 | driver: local 34 | -------------------------------------------------------------------------------- /docker-entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ./download_dumps.sh 4 | 5 | /app/create_db.py "$@" 6 | -------------------------------------------------------------------------------- /download_dumps.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | DOWNLOAD_DIR="./databases" 4 | mkdir -p $DOWNLOAD_DIR 5 | 6 | function download { 7 | name=$(echo $1 |awk -F "/" '{print $NF}') 8 | echo "Downloading $name..." 9 | wget -O "$DOWNLOAD_DIR/$name" "$1" 10 | } 11 | 12 | download "https://ftp.afrinic.net/pub/dbase/afrinic.db.gz" 13 | 14 | download "https://ftp.apnic.net/pub/apnic/whois/apnic.db.inetnum.gz" 15 | download "https://ftp.apnic.net/pub/apnic/whois/apnic.db.inet6num.gz" 16 | 17 | download "https://ftp.arin.net/pub/rr/arin.db.gz" 18 | 19 | download "https://ftp.lacnic.net/lacnic/dbase/lacnic.db.gz" 20 | 21 | download "https://ftp.ripe.net/ripe/dbase/split/ripe.db.inetnum.gz" 22 | download "https://ftp.ripe.net/ripe/dbase/split/ripe.db.inet6num.gz" 23 | -------------------------------------------------------------------------------- /query_db.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | psql -e -q -x -c "SELECT block.inetnum, block.netname, block.country, block.description, block.maintained_by, block.created, block.last_modified, block.source FROM block WHERE block.inetnum >> '$1' ORDER BY block.inetnum DESC;" network_info 4 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | netaddr==1.3.0 2 | psycopg==3.2.9 3 | psycopg-c==3.2.9 4 | psycopg-pool==3.2.6 5 | SQLAlchemy==2.0.41 6 | --------------------------------------------------------------------------------