├── .github
├── pull_request_template.md
└── workflows
│ └── ci.yml
├── .gitignore
├── LICENSE
├── README.md
├── examples
├── __init__.py
├── csv_to_mmdb.py
└── fake_ip_info.csv
├── mmdb_writer.py
├── pyproject.toml
├── tests
├── __init__.py
├── clients
│ ├── go
│ │ ├── go.mod
│ │ ├── go.sum
│ │ └── main.go
│ └── java
│ │ ├── .gitignore
│ │ ├── pom.xml
│ │ └── src
│ │ └── main
│ │ └── java
│ │ └── Main.java
├── record.py
├── test_api.py
└── test_clients.py
└── tox.ini
/.github/pull_request_template.md:
--------------------------------------------------------------------------------
1 | ## Which issue does this PR close?
2 |
3 |
6 |
7 | Closes #.
8 |
9 | ## Rationale for this change
10 |
11 |
15 |
16 | ## What changes are included in this PR?
17 |
18 |
21 |
22 | ## Are these changes tested?
23 |
24 |
31 |
32 | ## Are there any user-facing changes?
33 |
34 |
37 |
38 |
41 |
--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
1 | name: CI
2 |
3 | on:
4 | push:
5 | branches: [ "master", "ci/*" ]
6 | pull_request:
7 | branches: [ "master" ]
8 |
9 | permissions:
10 | contents: read
11 |
12 | jobs:
13 | test:
14 | name: ci ${{ matrix.python-version }}
15 | runs-on: ubuntu-latest
16 | strategy:
17 | matrix:
18 | python-version: [ "3.8", "3.9", "3.10", "3.11", "3.12" ]
19 | steps:
20 | - uses: actions/checkout@v4
21 | - name: Setup Go environment
22 | uses: actions/setup-go@v5
23 | with:
24 | go-version: '1.22'
25 | cache-dependency-path: tests/clients/go/go.sum
26 | - name: Setup Java JDK
27 | uses: actions/setup-java@v4
28 | with:
29 | distribution: temurin
30 | java-version: 22
31 | cache: maven
32 | cache-dependency-path: tests/clients/java/pom.xml
33 | - name: Set up Python
34 | uses: actions/setup-python@v5
35 | with:
36 | python-version: ${{ matrix.python-version }}
37 | allow-prereleases: true
38 | check-latest: true
39 | - name: Install dependencies
40 | run: |
41 | sudo apt install libmaxminddb0 libmaxminddb-dev
42 | python -m pip install --upgrade pip
43 | pip install tox
44 | - name: Test
45 | run: tox -e py
46 | - name: Lint
47 | if: matrix.python-version == '3.12'
48 | run: tox -e lint
49 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | pip-wheel-metadata/
24 | share/python-wheels/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | MANIFEST
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .nox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | *.py,cover
51 | .hypothesis/
52 | .pytest_cache/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | target/
76 |
77 | # Jupyter Notebook
78 | .ipynb_checkpoints
79 |
80 | # IPython
81 | profile_default/
82 | ipython_config.py
83 |
84 | # pyenv
85 | .python-version
86 |
87 | # pipenv
88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
91 | # install all needed dependencies.
92 | #Pipfile.lock
93 |
94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
95 | __pypackages__/
96 |
97 | # Celery stuff
98 | celerybeat-schedule
99 | celerybeat.pid
100 |
101 | # SageMath parsed files
102 | *.sage.py
103 |
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 |
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 |
117 | # Rope project settings
118 | .ropeproject
119 |
120 | # mkdocs documentation
121 | /site
122 |
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 |
128 | # Pyre type checker
129 | .pyre/
130 |
131 | # pycharm
132 | .idea/
133 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2020 VimT
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | - [MaxMind-DB-Writer-python](#maxmind-db-writer-python)
2 | * [Install](#install)
3 | * [Usage](#usage)
4 | * [Examples](#examples)
5 | * [Using the Java Client](#using-the-java-client)
6 | + [TLDR](#tldr)
7 | + [Underlying Principles](#underlying-principles)
8 | * [Type Enforcement](#type-enforcement)
9 | * [Reference:](#reference-)
10 |
11 | # MaxMind-DB-Writer-python
12 |
13 | Make `mmdb` format ip library file which can be read by [
14 | `maxmind` official language reader](https://dev.maxmind.com/geoip/geoip2/downloadable/)
15 |
16 | ~~[The official perl writer](https://github.com/maxmind/MaxMind-DB-Writer-perl) was written in perl,
17 | which was difficult to customize.
18 | So I implemented the `MaxmindDB format` ip library in python language.~~
19 |
20 | MaxMind has now released an official Go version of the MMDB writer.
21 | If you prefer using Go, you can check out the official Go
22 | implementation [mmdbwriter](https://github.com/maxmind/mmdbwriter).
23 | This project still provides a Python alternative for those who need it.
24 |
25 | ## Install
26 |
27 | ```shell script
28 | pip install -U mmdb_writer
29 | ```
30 |
31 | ## Usage
32 |
33 | ```python
34 | from netaddr import IPSet
35 |
36 | from mmdb_writer import MMDBWriter
37 |
38 | writer = MMDBWriter()
39 |
40 | writer.insert_network(IPSet(['1.1.0.0/24', '1.1.1.0/24']), {'country': 'COUNTRY', 'isp': 'ISP'})
41 | writer.to_db_file('test.mmdb')
42 |
43 | import maxminddb
44 |
45 | m = maxminddb.open_database('test.mmdb')
46 | r = m.get('1.1.1.1')
47 | assert r == {'country': 'COUNTRY', 'isp': 'ISP'}
48 | ```
49 |
50 | ## Examples
51 |
52 | see [csv_to_mmdb.py](./examples/csv_to_mmdb.py)
53 | Here is a professional and clear translation of the README.md section from Chinese into English:
54 |
55 | ## Using the Java Client
56 |
57 | If you are using the Java client, you need to be careful to set the `int_type` parameter so that Java correctly
58 | recognizes the integer type in the MMDB file.
59 |
60 | Example:
61 |
62 | ```python
63 | from mmdb_writer import MMDBWriter
64 |
65 | writer = MMDBWriter(int_type='i32')
66 | ```
67 |
68 | Alternatively, you can explicitly specify data types using the [Type Enforcement](#type-enforcement) section.
69 |
70 | ### Underlying Principles
71 |
72 | In Java, when deserializing to a structure, the numeric types will use the original MMDB numeric types. The specific
73 | conversion relationships are as follows:
74 |
75 | | mmdb type | java type |
76 | |-----------|------------|
77 | | float | Float |
78 | | double | Double |
79 | | int32 | Integer |
80 | | uint16 | Integer |
81 | | uint32 | Long |
82 | | uint64 | BigInteger |
83 | | uint128 | BigInteger |
84 |
85 | When using the Python writer to generate an MMDB file, by default, it converts integers to the corresponding MMDB type
86 | based on the size of the `int`. For instance, `int(1)` would convert to `uint16`, and `int(2**16+1)` would convert
87 | to `uint32`. This may cause deserialization failures in Java clients. Therefore, it is necessary to specify
88 | the `int_type` parameter when generating MMDB files to define the numeric type accurately.
89 |
90 | ## Type Enforcement
91 |
92 | MMDB supports a variety of numeric types such as `int32`, `uint16`, `uint32`, `uint64`, `uint128` for integers,
93 | and `f32`, `f64` for floating points, while Python only has one integer type and one float type (actually `f64`).
94 |
95 | Therefore, when generating an MMDB file, you need to specify the `int_type` parameter to define the numeric type of the
96 | MMDB file. The behaviors for different `int_type` settings are:
97 |
98 | | int_type | Behavior |
99 | |----------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
100 | | auto (default) | Automatically selects the MMDB numeric type based on the value size.
Rules:
`int32` for value < 0
`uint16` for 0 <= value < 2^16
`uint32` for 2^16 <= value < 2^32
`uint64` for 2^32 <= value < 2^64
`uint128` for value >= 2^64. |
101 | | i32 | Stores all integer types as `int32`. |
102 | | u16 | Stores all integer types as `uint16`. |
103 | | u32 | Stores all integer types as `uint32`. |
104 | | u64 | Stores all integer types as `uint64`. |
105 | | u128 | Stores all integer types as `uint128`. |
106 |
107 | If you want to use different int types for different scenarios, you can use type wrapping:
108 |
109 | ```python
110 | from mmdb_writer import MMDBWriter, MmdbI32, MmdbF32
111 |
112 | writer = MMDBWriter()
113 | # the value of field "i32" will be stored as int32 type
114 | writer.insert_network(IPSet(["1.0.0.0/24"]), {"i32": MmdbI32(128), "f32": MmdbF32(1.22)})
115 | ```
116 |
117 | ## Reference:
118 |
119 | - [MaxmindDB format](http://maxmind.github.io/MaxMind-DB/)
120 | - [geoip-mmdb](https://github.com/i-rinat/geoip-mmdb)
121 |
--------------------------------------------------------------------------------
/examples/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vimt/MaxMind-DB-Writer-python/70f8f2ad6b91db91e9d7b977268a914a888f089a/examples/__init__.py
--------------------------------------------------------------------------------
/examples/csv_to_mmdb.py:
--------------------------------------------------------------------------------
1 | import csv
2 | from collections import defaultdict
3 |
4 | from netaddr import IPNetwork, IPSet
5 |
6 | from mmdb_writer import MMDBWriter
7 |
8 |
9 | def main():
10 | writer = MMDBWriter(
11 | 4, "Test.GeoIP", languages=["EN"], description="Test IP library"
12 | )
13 | data = defaultdict(list)
14 |
15 | # merge cidr
16 | with open("fake_ip_info.csv") as f:
17 | reader = csv.DictReader(f)
18 | for line in reader:
19 | data[(line["country"], line["isp"])].append(
20 | IPNetwork(f'{line["ip"]}/{line["prefixlen"]}')
21 | )
22 | for index, cidrs in data.items():
23 | writer.insert_network(IPSet(cidrs), {"country": index[0], "isp": index[1]})
24 | writer.to_db_file("fake_ip_library.mmdb")
25 |
26 |
27 | def test_read():
28 | import maxminddb
29 |
30 | m = maxminddb.open_database("fake_ip_library.mmdb")
31 | r = m.get("3.1.1.1")
32 | print(r)
33 |
34 |
35 | if __name__ == "__main__":
36 | main()
37 |
--------------------------------------------------------------------------------
/examples/fake_ip_info.csv:
--------------------------------------------------------------------------------
1 | ip,prefixlen,country,isp
2 | 1.0.0.0,8,country6,isp6
3 | 2.0.0.0,8,country6,isp5
4 | 3.0.0.0,8,country5,isp3
5 | 4.0.0.0,8,country3,isp3
6 | 5.0.0.0,8,country9,isp3
7 | 6.0.0.0,8,country8,isp0
8 | 7.0.0.0,8,country9,isp2
9 | 8.0.0.0,8,country7,isp2
10 | 9.0.0.0,8,country3,isp8
11 | 10.0.0.0,8,country3,isp6
12 | 11.0.0.0,8,country6,isp5
13 | 12.0.0.0,8,country9,isp5
14 | 13.0.0.0,8,country4,isp1
15 | 14.0.0.0,8,country9,isp3
16 | 15.0.0.0,8,country6,isp8
17 | 16.0.0.0,8,country6,isp1
18 | 17.0.0.0,8,country9,isp2
19 | 18.0.0.0,8,country2,isp8
20 | 19.0.0.0,8,country9,isp4
21 | 20.0.0.0,8,country1,isp3
22 | 21.0.0.0,8,country8,isp3
23 | 22.0.0.0,8,country0,isp2
24 | 23.0.0.0,8,country7,isp9
25 | 24.0.0.0,8,country6,isp0
26 | 25.0.0.0,8,country2,isp0
27 | 26.0.0.0,8,country3,isp8
28 | 27.0.0.0,8,country1,isp2
29 | 28.0.0.0,8,country9,isp2
30 | 29.0.0.0,8,country0,isp1
31 | 30.0.0.0,8,country1,isp9
32 | 31.0.0.0,8,country3,isp5
33 | 32.0.0.0,8,country1,isp4
34 | 33.0.0.0,8,country3,isp0
35 | 34.0.0.0,8,country8,isp4
36 | 35.0.0.0,8,country7,isp3
37 | 36.0.0.0,8,country5,isp0
38 | 37.0.0.0,8,country1,isp8
39 | 38.0.0.0,8,country9,isp6
40 | 39.0.0.0,8,country1,isp4
41 | 40.0.0.0,8,country7,isp2
42 | 41.0.0.0,8,country5,isp7
43 | 42.0.0.0,8,country0,isp1
44 | 43.0.0.0,8,country0,isp8
45 | 44.0.0.0,8,country1,isp5
46 | 45.0.0.0,8,country9,isp2
47 | 46.0.0.0,8,country5,isp0
48 | 47.0.0.0,8,country1,isp2
49 | 48.0.0.0,8,country6,isp2
50 | 49.0.0.0,8,country4,isp9
51 | 50.0.0.0,8,country7,isp1
52 | 51.0.0.0,8,country7,isp3
53 | 52.0.0.0,8,country1,isp5
54 | 53.0.0.0,8,country7,isp1
55 | 54.0.0.0,8,country9,isp4
56 | 55.0.0.0,8,country2,isp9
57 | 56.0.0.0,8,country4,isp0
58 | 57.0.0.0,8,country7,isp1
59 | 58.0.0.0,8,country3,isp3
60 | 59.0.0.0,8,country3,isp0
61 | 60.0.0.0,8,country0,isp6
62 | 61.0.0.0,8,country7,isp8
63 | 62.0.0.0,8,country3,isp5
64 | 63.0.0.0,8,country4,isp2
65 | 64.0.0.0,8,country5,isp4
66 | 65.0.0.0,8,country5,isp4
67 | 66.0.0.0,8,country6,isp9
68 | 67.0.0.0,8,country3,isp0
69 | 68.0.0.0,8,country3,isp5
70 | 69.0.0.0,8,country1,isp3
71 | 70.0.0.0,8,country5,isp1
72 | 71.0.0.0,8,country5,isp9
73 | 72.0.0.0,8,country1,isp3
74 | 73.0.0.0,8,country5,isp6
75 | 74.0.0.0,8,country4,isp8
76 | 75.0.0.0,8,country6,isp6
77 | 76.0.0.0,8,country2,isp9
78 | 77.0.0.0,8,country1,isp5
79 | 78.0.0.0,8,country3,isp3
80 | 79.0.0.0,8,country5,isp6
81 | 80.0.0.0,8,country3,isp8
82 | 81.0.0.0,8,country0,isp4
83 | 82.0.0.0,8,country0,isp9
84 | 83.0.0.0,8,country4,isp3
85 | 84.0.0.0,8,country9,isp6
86 | 85.0.0.0,8,country5,isp7
87 | 86.0.0.0,8,country6,isp7
88 | 87.0.0.0,8,country0,isp6
89 | 88.0.0.0,8,country8,isp3
90 | 89.0.0.0,8,country1,isp1
91 | 90.0.0.0,8,country3,isp4
92 | 91.0.0.0,8,country3,isp9
93 | 92.0.0.0,8,country1,isp7
94 | 93.0.0.0,8,country3,isp9
95 | 94.0.0.0,8,country5,isp1
96 | 95.0.0.0,8,country5,isp8
97 | 96.0.0.0,8,country5,isp7
98 | 97.0.0.0,8,country7,isp1
99 | 98.0.0.0,8,country9,isp0
100 | 99.0.0.0,8,country3,isp8
101 | 100.0.0.0,8,country1,isp1
102 | 101.0.0.0,8,country9,isp9
103 | 102.0.0.0,8,country4,isp3
104 | 103.0.0.0,8,country7,isp6
105 | 104.0.0.0,8,country2,isp1
106 | 105.0.0.0,8,country6,isp6
107 | 106.0.0.0,8,country4,isp3
108 | 107.0.0.0,8,country4,isp2
109 | 108.0.0.0,8,country5,isp4
110 | 109.0.0.0,8,country4,isp1
111 | 110.0.0.0,8,country3,isp1
112 | 111.0.0.0,8,country7,isp5
113 | 112.0.0.0,8,country9,isp5
114 | 113.0.0.0,8,country4,isp8
115 | 114.0.0.0,8,country2,isp3
116 | 115.0.0.0,8,country7,isp5
117 | 116.0.0.0,8,country6,isp0
118 | 117.0.0.0,8,country3,isp1
119 | 118.0.0.0,8,country0,isp1
120 | 119.0.0.0,8,country7,isp2
121 | 120.0.0.0,8,country6,isp8
122 | 121.0.0.0,8,country4,isp3
123 | 122.0.0.0,8,country9,isp1
124 | 123.0.0.0,8,country2,isp4
125 | 124.0.0.0,8,country0,isp3
126 | 125.0.0.0,8,country2,isp4
127 | 126.0.0.0,8,country3,isp2
128 | 127.0.0.0,8,country2,isp8
129 | 128.0.0.0,8,country8,isp8
130 | 129.0.0.0,8,country8,isp6
131 | 130.0.0.0,8,country1,isp7
132 | 131.0.0.0,8,country1,isp8
133 | 132.0.0.0,8,country1,isp0
134 | 133.0.0.0,8,country7,isp1
135 | 134.0.0.0,8,country5,isp1
136 | 135.0.0.0,8,country7,isp4
137 | 136.0.0.0,8,country5,isp3
138 | 137.0.0.0,8,country3,isp5
139 | 138.0.0.0,8,country7,isp6
140 | 139.0.0.0,8,country1,isp8
141 | 140.0.0.0,8,country0,isp5
142 | 141.0.0.0,8,country4,isp9
143 | 142.0.0.0,8,country1,isp8
144 | 143.0.0.0,8,country4,isp8
145 | 144.0.0.0,8,country5,isp2
146 | 145.0.0.0,8,country5,isp4
147 | 146.0.0.0,8,country0,isp5
148 | 147.0.0.0,8,country1,isp6
149 | 148.0.0.0,8,country1,isp0
150 | 149.0.0.0,8,country1,isp1
151 | 150.0.0.0,8,country2,isp2
152 | 151.0.0.0,8,country6,isp4
153 | 152.0.0.0,8,country3,isp2
154 | 153.0.0.0,8,country4,isp4
155 | 154.0.0.0,8,country5,isp2
156 | 155.0.0.0,8,country5,isp6
157 | 156.0.0.0,8,country9,isp4
158 | 157.0.0.0,8,country7,isp5
159 | 158.0.0.0,8,country7,isp9
160 | 159.0.0.0,8,country5,isp9
161 | 160.0.0.0,8,country1,isp9
162 | 161.0.0.0,8,country3,isp9
163 | 162.0.0.0,8,country7,isp4
164 | 163.0.0.0,8,country4,isp6
165 | 164.0.0.0,8,country1,isp7
166 | 165.0.0.0,8,country7,isp1
167 | 166.0.0.0,8,country8,isp7
168 | 167.0.0.0,8,country9,isp3
169 | 168.0.0.0,8,country7,isp3
170 | 169.0.0.0,8,country9,isp3
171 | 170.0.0.0,8,country0,isp0
172 | 171.0.0.0,8,country3,isp1
173 | 172.0.0.0,8,country5,isp4
174 | 173.0.0.0,8,country6,isp6
175 | 174.0.0.0,8,country0,isp4
176 | 175.0.0.0,8,country7,isp4
177 | 176.0.0.0,8,country3,isp7
178 | 177.0.0.0,8,country4,isp1
179 | 178.0.0.0,8,country5,isp6
180 | 179.0.0.0,8,country0,isp8
181 | 180.0.0.0,8,country0,isp5
182 | 181.0.0.0,8,country1,isp3
183 | 182.0.0.0,8,country1,isp1
184 | 183.0.0.0,8,country5,isp4
185 | 184.0.0.0,8,country6,isp6
186 | 185.0.0.0,8,country3,isp8
187 | 186.0.0.0,8,country1,isp4
188 | 187.0.0.0,8,country8,isp1
189 | 188.0.0.0,8,country5,isp0
190 | 189.0.0.0,8,country6,isp4
191 | 190.0.0.0,8,country5,isp6
192 | 191.0.0.0,8,country3,isp7
193 | 192.0.0.0,8,country8,isp2
194 | 193.0.0.0,8,country6,isp2
195 | 194.0.0.0,8,country0,isp5
196 | 195.0.0.0,8,country0,isp6
197 | 196.0.0.0,8,country6,isp9
198 | 197.0.0.0,8,country7,isp8
199 | 198.0.0.0,8,country4,isp2
200 | 199.0.0.0,8,country3,isp8
201 | 200.0.0.0,8,country3,isp3
202 | 201.0.0.0,8,country1,isp3
203 | 202.0.0.0,8,country7,isp0
204 | 203.0.0.0,8,country1,isp7
205 | 204.0.0.0,8,country6,isp9
206 | 205.0.0.0,8,country9,isp0
207 | 206.0.0.0,8,country4,isp6
208 | 207.0.0.0,8,country8,isp4
209 | 208.0.0.0,8,country0,isp5
210 | 209.0.0.0,8,country2,isp6
211 | 210.0.0.0,8,country4,isp0
212 | 211.0.0.0,8,country7,isp2
213 | 212.0.0.0,8,country6,isp8
214 | 213.0.0.0,8,country5,isp6
215 | 214.0.0.0,8,country3,isp4
216 | 215.0.0.0,8,country2,isp4
217 | 216.0.0.0,8,country6,isp8
218 | 217.0.0.0,8,country7,isp4
219 | 218.0.0.0,8,country2,isp0
220 | 219.0.0.0,8,country6,isp3
221 | 220.0.0.0,8,country1,isp0
222 | 221.0.0.0,8,country5,isp3
223 | 222.0.0.0,8,country9,isp7
224 | 223.0.0.0,8,country6,isp0
225 | 224.0.0.0,8,country6,isp9
226 | 225.0.0.0,8,country7,isp1
227 | 226.0.0.0,8,country5,isp9
228 | 227.0.0.0,8,country5,isp9
229 | 228.0.0.0,8,country7,isp6
230 | 229.0.0.0,8,country1,isp0
231 | 230.0.0.0,8,country5,isp8
232 | 231.0.0.0,8,country3,isp0
233 | 232.0.0.0,8,country8,isp4
234 | 233.0.0.0,8,country6,isp3
235 | 234.0.0.0,8,country5,isp0
236 | 235.0.0.0,8,country4,isp6
237 | 236.0.0.0,8,country8,isp8
238 | 237.0.0.0,8,country2,isp5
239 | 238.0.0.0,8,country6,isp3
240 | 239.0.0.0,8,country4,isp8
241 | 240.0.0.0,8,country4,isp0
242 | 241.0.0.0,8,country7,isp3
243 | 242.0.0.0,8,country0,isp4
244 | 243.0.0.0,8,country2,isp8
245 | 244.0.0.0,8,country3,isp4
246 | 245.0.0.0,8,country5,isp6
247 | 246.0.0.0,8,country0,isp6
248 | 247.0.0.0,8,country8,isp5
249 | 248.0.0.0,8,country8,isp4
250 | 249.0.0.0,8,country4,isp0
251 | 250.0.0.0,8,country3,isp9
252 | 251.0.0.0,8,country4,isp2
253 | 252.0.0.0,8,country3,isp3
254 | 253.0.0.0,8,country5,isp1
255 | 254.0.0.0,8,country1,isp4
256 | 255.0.0.0,8,country8,isp1
257 |
--------------------------------------------------------------------------------
/mmdb_writer.py:
--------------------------------------------------------------------------------
1 | __version__ = "0.2.5"
2 |
3 | import logging
4 | import math
5 | import struct
6 | import time
7 | from decimal import Decimal
8 | from enum import IntEnum
9 | from typing import Dict, List, Literal, Union
10 |
11 | from netaddr import IPNetwork, IPSet
12 |
13 |
14 | class MmdbBaseType:
15 | def __init__(self, value):
16 | self.value = value
17 |
18 |
19 | # type hint
20 | class MmdbF32(MmdbBaseType):
21 | def __init__(self, value: float):
22 | super().__init__(value)
23 |
24 |
25 | class MmdbF64(MmdbBaseType):
26 | def __init__(self, value: Union[float, Decimal]):
27 | super().__init__(value)
28 |
29 |
30 | class MmdbI32(MmdbBaseType):
31 | def __init__(self, value: int):
32 | super().__init__(value)
33 |
34 |
35 | class MmdbU16(MmdbBaseType):
36 | def __init__(self, value: int):
37 | super().__init__(value)
38 |
39 |
40 | class MmdbU32(MmdbBaseType):
41 | def __init__(self, value: int):
42 | super().__init__(value)
43 |
44 |
45 | class MmdbU64(MmdbBaseType):
46 | def __init__(self, value: int):
47 | super().__init__(value)
48 |
49 |
50 | class MmdbU128(MmdbBaseType):
51 | def __init__(self, value: int):
52 | super().__init__(value)
53 |
54 |
55 | MMDBType = Union[
56 | dict,
57 | list,
58 | str,
59 | bytes,
60 | int,
61 | bool,
62 | MmdbF32,
63 | MmdbF64,
64 | MmdbI32,
65 | MmdbU16,
66 | MmdbU32,
67 | MmdbU64,
68 | MmdbU128,
69 | ]
70 |
71 | logger = logging.getLogger(__name__)
72 |
73 | METADATA_MAGIC = b"\xab\xcd\xefMaxMind.com"
74 |
75 |
76 | class MMDBTypeID(IntEnum):
77 | POINTER = 1
78 | STRING = 2
79 | DOUBLE = 3
80 | BYTES = 4
81 | UINT16 = 5
82 | UINT32 = 6
83 | MAP = 7
84 | INT32 = 8
85 | UINT64 = 9
86 | UINT128 = 10
87 | ARRAY = 11
88 | DATA_CACHE = 12
89 | END_MARKER = 13
90 | BOOLEAN = 14
91 | FLOAT = 15
92 |
93 |
94 | UINT16_MAX = 0xFFFF
95 | UINT32_MAX = 0xFFFFFFFF
96 | UINT64_MAX = 0xFFFFFFFFFFFFFFFF
97 |
98 |
99 | class SearchTreeNode:
100 | def __init__(self, left=None, right=None):
101 | self.left = left
102 | self.right = right
103 |
104 | def get_or_create(self, item):
105 | if item == 0:
106 | self.left = self.left or SearchTreeNode()
107 | return self.left
108 | elif item == 1:
109 | self.right = self.right or SearchTreeNode()
110 | return self.right
111 |
112 | def __getitem__(self, item):
113 | if item == 0:
114 | return self.left
115 | elif item == 1:
116 | return self.right
117 |
118 | def __setitem__(self, key, value):
119 | if key == 0:
120 | self.left = value
121 | elif key == 1:
122 | self.right = value
123 |
124 |
125 | class SearchTreeLeaf:
126 | def __init__(self, value):
127 | self.value = value
128 |
129 | def __repr__(self):
130 | return f"SearchTreeLeaf(value={self.value})"
131 |
132 | __str__ = __repr__
133 |
134 |
135 | IntType = Union[
136 | Literal[
137 | "auto",
138 | "u16",
139 | "u32",
140 | "u64",
141 | "u128",
142 | "i32",
143 | "uint16",
144 | "uint32",
145 | "uint64",
146 | "uint128",
147 | "int32",
148 | ],
149 | MmdbU16,
150 | MmdbU32,
151 | MmdbU64,
152 | MmdbU128,
153 | MmdbI32,
154 | ]
155 | FloatType = Union[Literal["f32", "f64", "float32", "float64"], MmdbF32, MmdbF64]
156 |
157 |
158 | class Encoder:
159 | def __init__(
160 | self, cache=True, int_type: IntType = "auto", float_type: FloatType = "f64"
161 | ):
162 | self.cache = cache
163 | self.int_type = int_type
164 | self.float_type = float_type
165 |
166 | self.data_cache = {}
167 | self.data_list = []
168 | self.data_pointer = 0
169 | self._python_type_id = {
170 | float: MMDBTypeID.DOUBLE,
171 | bool: MMDBTypeID.BOOLEAN,
172 | list: MMDBTypeID.ARRAY,
173 | dict: MMDBTypeID.MAP,
174 | bytes: MMDBTypeID.BYTES,
175 | str: MMDBTypeID.STRING,
176 | MmdbF32: MMDBTypeID.FLOAT,
177 | MmdbF64: MMDBTypeID.DOUBLE,
178 | MmdbI32: MMDBTypeID.INT32,
179 | MmdbU16: MMDBTypeID.UINT16,
180 | MmdbU32: MMDBTypeID.UINT32,
181 | MmdbU64: MMDBTypeID.UINT64,
182 | MmdbU128: MMDBTypeID.UINT128,
183 | }
184 |
185 | def _encode_pointer(self, value):
186 | pointer = value
187 | if pointer >= 134744064:
188 | res = struct.pack(">BI", 0x38, pointer)
189 | elif pointer >= 526336:
190 | pointer -= 526336
191 | res = struct.pack(
192 | ">BBBB",
193 | 0x30 + ((pointer >> 24) & 0x07),
194 | (pointer >> 16) & 0xFF,
195 | (pointer >> 8) & 0xFF,
196 | pointer & 0xFF,
197 | )
198 | elif pointer >= 2048:
199 | pointer -= 2048
200 | res = struct.pack(
201 | ">BBB",
202 | 0x28 + ((pointer >> 16) & 0x07),
203 | (pointer >> 8) & 0xFF,
204 | pointer & 0xFF,
205 | )
206 | else:
207 | res = struct.pack(">BB", 0x20 + ((pointer >> 8) & 0x07), pointer & 0xFF)
208 |
209 | return res
210 |
211 | def _encode_utf8_string(self, value):
212 | encoded_value = value.encode("utf-8")
213 | res = self._make_header(MMDBTypeID.STRING, len(encoded_value))
214 | res += encoded_value
215 | return res
216 |
217 | def _encode_bytes(self, value):
218 | return self._make_header(MMDBTypeID.BYTES, len(value)) + value
219 |
220 | def _encode_uint(self, type_id, max_len):
221 | value_max = 2 ** (max_len * 8)
222 |
223 | def _encode_unsigned_value(value):
224 | if value < 0 or value >= value_max:
225 | raise ValueError(
226 | f"encode uint{max_len * 8} fail: "
227 | f"{value} not in range(0, {value_max})"
228 | )
229 | res = b""
230 | while value != 0 and len(res) < max_len:
231 | res = struct.pack(">B", value & 0xFF) + res
232 | value = value >> 8
233 | return self._make_header(type_id, len(res)) + res
234 |
235 | return _encode_unsigned_value
236 |
237 | def _encode_map(self, value):
238 | res = self._make_header(MMDBTypeID.MAP, len(value))
239 | for k, v in list(value.items()):
240 | # Keys are always stored by value.
241 | res += self.encode(k)
242 | res += self.encode(v)
243 | return res
244 |
245 | def _encode_array(self, value):
246 | res = self._make_header(MMDBTypeID.ARRAY, len(value))
247 | for k in value:
248 | res += self.encode(k)
249 | return res
250 |
251 | def _encode_boolean(self, value):
252 | return self._make_header(MMDBTypeID.BOOLEAN, 1 if value else 0)
253 |
254 | def _encode_pack_type(self, type_id, fmt):
255 | def pack_type(value):
256 | res = struct.pack(fmt, value)
257 | return self._make_header(type_id, len(res)) + res
258 |
259 | return pack_type
260 |
261 | _type_encoder = None
262 |
263 | @property
264 | def type_encoder(self):
265 | if self._type_encoder is None:
266 | self._type_encoder = {
267 | MMDBTypeID.POINTER: self._encode_pointer,
268 | MMDBTypeID.STRING: self._encode_utf8_string,
269 | MMDBTypeID.DOUBLE: self._encode_pack_type(MMDBTypeID.DOUBLE, ">d"),
270 | MMDBTypeID.BYTES: self._encode_bytes,
271 | MMDBTypeID.UINT16: self._encode_uint(MMDBTypeID.UINT16, 2),
272 | MMDBTypeID.UINT32: self._encode_uint(MMDBTypeID.UINT32, 4),
273 | MMDBTypeID.MAP: self._encode_map,
274 | MMDBTypeID.INT32: self._encode_pack_type(MMDBTypeID.INT32, ">i"),
275 | MMDBTypeID.UINT64: self._encode_uint(MMDBTypeID.UINT64, 8),
276 | MMDBTypeID.UINT128: self._encode_uint(MMDBTypeID.UINT128, 16),
277 | MMDBTypeID.ARRAY: self._encode_array,
278 | MMDBTypeID.BOOLEAN: self._encode_boolean,
279 | MMDBTypeID.FLOAT: self._encode_pack_type(MMDBTypeID.FLOAT, ">f"),
280 | }
281 | return self._type_encoder
282 |
283 | def _make_header(self, type_id, length):
284 | if length >= 16843036:
285 | raise Exception("length >= 16843036")
286 |
287 | elif length >= 65821:
288 | five_bits = 31
289 | length -= 65821
290 | b3 = length & 0xFF
291 | b2 = (length >> 8) & 0xFF
292 | b1 = (length >> 16) & 0xFF
293 | additional_length_bytes = struct.pack(">BBB", b1, b2, b3)
294 |
295 | elif length >= 285:
296 | five_bits = 30
297 | length -= 285
298 | b2 = length & 0xFF
299 | b1 = (length >> 8) & 0xFF
300 | additional_length_bytes = struct.pack(">BB", b1, b2)
301 |
302 | elif length >= 29:
303 | five_bits = 29
304 | length -= 29
305 | additional_length_bytes = struct.pack(">B", length & 0xFF)
306 |
307 | else:
308 | five_bits = length
309 | additional_length_bytes = b""
310 |
311 | if type_id <= 7:
312 | res = struct.pack(">B", (type_id << 5) + five_bits)
313 | else:
314 | res = struct.pack(">BB", five_bits, type_id - 7)
315 |
316 | return res + additional_length_bytes
317 |
318 | def python_type_id(self, value):
319 | value_type = type(value)
320 | type_id = self._python_type_id.get(value_type)
321 | if type_id:
322 | return type_id
323 | if value_type is int:
324 | if self.int_type == "auto":
325 | if value > UINT64_MAX:
326 | return MMDBTypeID.UINT128
327 | elif value > UINT32_MAX:
328 | return MMDBTypeID.UINT64
329 | elif value > UINT16_MAX:
330 | return MMDBTypeID.UINT32
331 | elif value < 0:
332 | return MMDBTypeID.INT32
333 | else:
334 | return MMDBTypeID.UINT16
335 | elif self.int_type in ("u16", "uint16", MmdbU16):
336 | return MMDBTypeID.UINT16
337 | elif self.int_type in ("u32", "uint32", MmdbU32):
338 | return MMDBTypeID.UINT32
339 | elif self.int_type in ("u64", "uint64", MmdbU64):
340 | return MMDBTypeID.UINT64
341 | elif self.int_type in ("u128", "uint128", MmdbU128):
342 | return MMDBTypeID.UINT128
343 | elif self.int_type in ("i32", "int32", MmdbI32):
344 | return MMDBTypeID.INT32
345 | else:
346 | raise ValueError(f"unknown int_type={self.int_type}")
347 | elif value_type is float:
348 | if self.float_type in ("f32", "float32", MmdbF32):
349 | return MMDBTypeID.FLOAT
350 | elif self.float_type in ("f64", "float64", MmdbF64):
351 | return MMDBTypeID.DOUBLE
352 | else:
353 | raise ValueError(f"unknown float_type={self.float_type}")
354 | elif value_type is Decimal:
355 | return MMDBTypeID.DOUBLE
356 | raise TypeError(f"unknown type {value_type}")
357 |
358 | def _freeze(self, value):
359 | if isinstance(value, dict):
360 | return tuple((k, self._freeze(v)) for k, v in value.items())
361 | elif isinstance(value, list):
362 | return tuple(self._freeze(v) for v in value)
363 | return value
364 |
365 | def encode_meta(self, meta):
366 | res = self._make_header(MMDBTypeID.MAP, len(meta))
367 | meta_type = {
368 | "node_count": 6,
369 | "record_size": 5,
370 | "ip_version": 5,
371 | "binary_format_major_version": 5,
372 | "binary_format_minor_version": 5,
373 | "build_epoch": 9,
374 | }
375 | for k, v in list(meta.items()):
376 | # Keys are always stored by value.
377 | res += self.encode(k)
378 | res += self.encode(v, meta_type.get(k))
379 | return res
380 |
381 | def encode(self, value, type_id=None, return_offset=False):
382 | if self.cache:
383 | cache_key = self._freeze(value)
384 | try:
385 | offset = self.data_cache[cache_key]
386 | return offset if return_offset else self._encode_pointer(offset)
387 | except KeyError:
388 | pass
389 |
390 | if not type_id:
391 | type_id = self.python_type_id(value)
392 |
393 | try:
394 | encoder = self.type_encoder[type_id]
395 | except KeyError as err:
396 | raise ValueError(f"unknown type_id={type_id}") from err
397 |
398 | if isinstance(value, MmdbBaseType):
399 | value = value.value
400 | res = encoder(value)
401 |
402 | if self.cache:
403 | self.data_list.append(res)
404 | offset = self.data_pointer
405 | self.data_pointer += len(res)
406 | self.data_cache[cache_key] = offset
407 | return offset if return_offset else self._encode_pointer(offset)
408 | return res
409 |
410 |
411 | class TreeWriter:
412 | encoder_cls = Encoder
413 |
414 | def __init__(
415 | self,
416 | tree: "SearchTreeNode",
417 | meta: dict,
418 | int_type: IntType = "auto",
419 | float_type: FloatType = "f64",
420 | ):
421 | self._node_idx = {}
422 | self._leaf_offset = {}
423 | self._node_list = []
424 | self._node_counter = 0
425 | self._record_size = 0
426 |
427 | self.tree = tree
428 | self.meta = meta
429 |
430 | self.encoder = self.encoder_cls(
431 | cache=True, int_type=int_type, float_type=float_type
432 | )
433 |
434 | @property
435 | def _data_list(self):
436 | return self.encoder.data_list
437 |
438 | @property
439 | def _data_pointer(self):
440 | return self.encoder.data_pointer + 16
441 |
442 | def _build_meta(self):
443 | return {
444 | "node_count": self._node_counter,
445 | "record_size": self.record_size,
446 | **self.meta,
447 | }
448 |
449 | def _adjust_record_size(self):
450 | # Tree records should be large enough to contain either tree node index
451 | # or data offset.
452 | max_id = self._node_counter + self._data_pointer + 1
453 |
454 | # Estimate required bit count.
455 | bit_count = int(math.ceil(math.log(max_id, 2)))
456 | if bit_count <= 24:
457 | self.record_size = 24
458 | elif bit_count <= 28:
459 | self.record_size = 28
460 | elif bit_count <= 32:
461 | self.record_size = 32
462 | else:
463 | raise Exception("record_size > 32")
464 |
465 | self.data_offset = self.record_size * 2 / 8 * self._node_counter
466 |
467 | def _enumerate_nodes(self, node):
468 | if type(node) is SearchTreeNode:
469 | node_id = id(node)
470 | if node_id not in self._node_idx:
471 | self._node_idx[node_id] = self._node_counter
472 | self._node_counter += 1
473 | self._node_list.append(node)
474 |
475 | self._enumerate_nodes(node.left)
476 | self._enumerate_nodes(node.right)
477 |
478 | elif type(node) is SearchTreeLeaf:
479 | node_id = id(node)
480 | if node_id not in self._leaf_offset:
481 | offset = self.encoder.encode(node.value, return_offset=True)
482 | self._leaf_offset[node_id] = offset + 16
483 | else: # == None
484 | return
485 |
486 | def _calc_record_idx(self, node):
487 | if node is None:
488 | return self._node_counter
489 | elif type(node) is SearchTreeNode:
490 | return self._node_idx[id(node)]
491 | elif type(node) is SearchTreeLeaf:
492 | return self._leaf_offset[id(node)] + self._node_counter
493 | else:
494 | raise Exception("unexpected type")
495 |
496 | def _cal_node_bytes(self, node) -> bytes:
497 | left_idx = self._calc_record_idx(node.left)
498 | right_idx = self._calc_record_idx(node.right)
499 |
500 | if self.record_size == 24:
501 | b1 = (left_idx >> 16) & 0xFF
502 | b2 = (left_idx >> 8) & 0xFF
503 | b3 = left_idx & 0xFF
504 | b4 = (right_idx >> 16) & 0xFF
505 | b5 = (right_idx >> 8) & 0xFF
506 | b6 = right_idx & 0xFF
507 | return struct.pack(">BBBBBB", b1, b2, b3, b4, b5, b6)
508 |
509 | elif self.record_size == 28:
510 | b1 = (left_idx >> 16) & 0xFF
511 | b2 = (left_idx >> 8) & 0xFF
512 | b3 = left_idx & 0xFF
513 | b4 = ((left_idx >> 24) & 0xF) * 16 + ((right_idx >> 24) & 0xF)
514 | b5 = (right_idx >> 16) & 0xFF
515 | b6 = (right_idx >> 8) & 0xFF
516 | b7 = right_idx & 0xFF
517 | return struct.pack(">BBBBBBB", b1, b2, b3, b4, b5, b6, b7)
518 |
519 | elif self.record_size == 32:
520 | return struct.pack(">II", left_idx, right_idx)
521 |
522 | else:
523 | raise Exception("self.record_size > 32")
524 |
525 | def write(self, fname):
526 | self._enumerate_nodes(self.tree)
527 | self._adjust_record_size()
528 |
529 | with open(fname, "wb") as f:
530 | for node in self._node_list:
531 | f.write(self._cal_node_bytes(node))
532 |
533 | f.write(b"\x00" * 16)
534 |
535 | for element in self._data_list:
536 | f.write(element)
537 |
538 | f.write(METADATA_MAGIC)
539 | f.write(self.encoder_cls(cache=False).encode_meta(self._build_meta()))
540 |
541 |
542 | def bits_rstrip(n, length=None, keep=0):
543 | return map(int, bin(n)[2:].rjust(length, "0")[:keep])
544 |
545 |
546 | class MMDBWriter:
547 | def __init__(
548 | self,
549 | ip_version=4,
550 | database_type="GeoIP",
551 | languages: List[str] = None,
552 | description: Union[Dict[str, str], str] = "GeoIP db",
553 | ipv4_compatible=False,
554 | int_type: IntType = "auto",
555 | float_type: FloatType = "f64",
556 | ):
557 | """
558 | Args:
559 | ip_version: The IP version of the database. Defaults to 4.
560 | database_type: The type of the database. Defaults to "GeoIP".
561 | languages: A list of languages. Defaults to [].
562 | description: A description of the database for every language.
563 | ipv4_compatible: Whether the database is compatible with IPv4.
564 | int_type: The type of integer to use. Defaults to "auto".
565 | float_type: The type of float to use. Defaults to "f64".
566 |
567 | Note:
568 | If you want to store an IPv4 address in an IPv6 database, you should set
569 | ipv4_compatible=True.
570 |
571 | If you want to use a specific integer type, you can set int_type to
572 | "u16", "u32", "u64", "u128", or "i32".
573 | """
574 | self.tree = SearchTreeNode()
575 | self.ipv4_compatible = ipv4_compatible
576 |
577 | if languages is None:
578 | languages = []
579 | self.description = description
580 | self.database_type = database_type
581 | self.ip_version = ip_version
582 | self.languages = languages
583 | self.binary_format_major_version = 2
584 | self.binary_format_minor_version = 0
585 |
586 | self._bit_length = 128 if ip_version == 6 else 32
587 |
588 | if ip_version not in [4, 6]:
589 | raise ValueError(f"ip_version should be 4 or 6, {ip_version} is incorrect")
590 | if ip_version == 4 and ipv4_compatible:
591 | raise ValueError("ipv4_compatible=True can set when ip_version=6")
592 | if not self.binary_format_major_version:
593 | raise ValueError(
594 | f"major_version can't be empty or 0: {self.binary_format_major_version}"
595 | )
596 | if isinstance(description, str):
597 | self.description = {i: description for i in languages}
598 | for i in languages:
599 | if i not in self.description:
600 | raise ValueError("language {} must have description!")
601 |
602 | self.int_type = int_type
603 | self.float_type = float_type
604 |
605 | def insert_network(self, network: IPSet, content: MMDBType):
606 | """
607 | Inserts a network into the MaxMind database.
608 |
609 | Args:
610 | network: The network to be inserted. It should be an instance of
611 | netaddr.IPSet.
612 | content: The content associated with the network. It can be a
613 | dictionary, list, string, bytes, integer, or boolean.
614 |
615 |
616 | Raises:
617 | ValueError: If the network is not an instance of netaddr.IPSet.
618 | ValueError: If an IPv6 address is inserted into an IPv4-only database.
619 | ValueError: If an IPv4 address is inserted into an IPv6 database without
620 | setting ipv4_compatible=True.
621 |
622 | Note:
623 | This method modifies the internal tree structure of the MMDBWriter instance.
624 | """
625 | leaf = SearchTreeLeaf(content)
626 | if not isinstance(network, IPSet):
627 | raise ValueError("network type should be netaddr.IPSet.")
628 | network = network.iter_cidrs()
629 | for cidr in network:
630 | if self.ip_version == 4 and cidr.version == 6:
631 | raise ValueError(
632 | f"You inserted a IPv6 address {cidr} " "to an IPv4-only database."
633 | )
634 | if self.ip_version == 6 and cidr.version == 4:
635 | if not self.ipv4_compatible:
636 | raise ValueError(
637 | f"You inserted a IPv4 address {cidr} to an IPv6 database."
638 | "Please use ipv4_compatible=True option store "
639 | "IPv4 address in IPv6 database as ::/96 format"
640 | )
641 | cidr = cidr.ipv6(True)
642 | node = self.tree
643 | bits = list(bits_rstrip(cidr.value, self._bit_length, cidr.prefixlen))
644 | current_node = node
645 | supernet_leaf = None # Tracks whether we are inserting into a subnet
646 | for index, ip_bit in enumerate(bits[:-1]):
647 | previous_node = current_node
648 | current_node = previous_node.get_or_create(ip_bit)
649 |
650 | if isinstance(current_node, SearchTreeLeaf):
651 | current_cidr = IPNetwork(
652 | (
653 | int(
654 | "".join(map(str, bits[: index + 1])).ljust(
655 | self._bit_length, "0"
656 | ),
657 | 2,
658 | ),
659 | index + 1,
660 | )
661 | )
662 | logger.info(
663 | f"Inserting {cidr} ({content}) into subnet of "
664 | f"{current_cidr} ({current_node.value})"
665 | )
666 | supernet_leaf = current_node
667 | current_node = SearchTreeNode()
668 | previous_node[ip_bit] = current_node
669 |
670 | if supernet_leaf:
671 | next_bit = bits[index + 1]
672 | # Insert supernet information on each inverse bit of
673 | # the current subnet
674 | current_node[1 - next_bit] = supernet_leaf
675 | current_node[bits[-1]] = leaf
676 |
677 | def to_db_file(self, filename: str):
678 | return TreeWriter(
679 | self.tree, self._build_meta(), self.int_type, self.float_type
680 | ).write(filename)
681 |
682 | def _build_meta(self):
683 | return {
684 | "ip_version": self.ip_version,
685 | "database_type": self.database_type,
686 | "languages": self.languages,
687 | "binary_format_major_version": self.binary_format_major_version,
688 | "binary_format_minor_version": self.binary_format_minor_version,
689 | "build_epoch": int(time.time()),
690 | "description": self.description,
691 | }
692 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["flit_core >=3.2,<4"]
3 | build-backend = "flit_core.buildapi"
4 |
5 | [project]
6 | name = "mmdb_writer"
7 | description = "Make `mmdb` format ip library file which can be read by maxmind official language reader"
8 | readme = "README.md"
9 | license = {file = "LICENSE"}
10 | requires-python = ">=3.8"
11 | keywords = ["mmdb", "maxmind"]
12 | authors = [{ name = "VimT", email = "me@vimt.me" } ]
13 | classifiers = [
14 | "Development Status :: 5 - Production/Stable",
15 | "Intended Audience :: Developers",
16 | "License :: OSI Approved :: MIT License",
17 | "Natural Language :: English",
18 | "Operating System :: OS Independent",
19 | "Programming Language :: Python",
20 | "Programming Language :: Python :: 3",
21 | "Programming Language :: Python :: 3 :: Only",
22 | "Programming Language :: Python :: 3.8",
23 | "Programming Language :: Python :: 3.9",
24 | "Programming Language :: Python :: 3.10",
25 | "Programming Language :: Python :: 3.11",
26 | "Programming Language :: Python :: 3.12",
27 | "Programming Language :: Python :: Implementation :: CPython",
28 | "Programming Language :: Python :: Implementation :: PyPy",
29 | "Topic :: Software Development :: Build Tools",
30 | ]
31 | dependencies = [
32 | "netaddr>=0.7"
33 | ]
34 | dynamic = ["version"]
35 |
36 | [project.optional-dependencies]
37 | test = [
38 | "pytest >=2.7.3",
39 | "pytest-cov",
40 | "numpy",
41 | "maxminddb>=1.5",
42 | ]
43 | dev = [
44 | "ruff"
45 | ]
46 |
47 | [project.urls]
48 | Home = "https://github.com/vimt/MaxMind-DB-Writer-python"
49 | Source = "https://github.com/vimt/MaxMind-DB-Writer-python"
50 | Tracker = "https://github.com/vimt/MaxMind-DB-Writer-python/issues"
51 |
52 | [tool.flit.sdist]
53 | include = ["mmdb_writer.py"]
54 |
55 | [tool.pytest.ini_options]
56 | testpaths = ["tests"]
57 | filterwarnings = [
58 | "error",
59 | ]
60 |
61 | [tool.ruff]
62 | fix = true
63 | show-fixes = true
64 | output-format = "full"
65 |
66 | [tool.ruff.lint]
67 | select = [
68 | "B", # flake8-bugbear
69 | "E", # pycodestyle error
70 | "F", # pyflakes
71 | "I", # isort
72 | "UP", # pyupgrade
73 | "W", # pycodestyle warning
74 | ]
75 |
--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vimt/MaxMind-DB-Writer-python/70f8f2ad6b91db91e9d7b977268a914a888f089a/tests/__init__.py
--------------------------------------------------------------------------------
/tests/clients/go/go.mod:
--------------------------------------------------------------------------------
1 | module mmdb-test
2 |
3 | go 1.22
4 |
5 | require github.com/oschwald/maxminddb-golang v1.12.0
6 |
7 | require golang.org/x/sys v0.10.0 // indirect
8 |
--------------------------------------------------------------------------------
/tests/clients/go/go.sum:
--------------------------------------------------------------------------------
1 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
2 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
3 | github.com/oschwald/maxminddb-golang v1.12.0 h1:9FnTOD0YOhP7DGxGsq4glzpGy5+w7pq50AS6wALUMYs=
4 | github.com/oschwald/maxminddb-golang v1.12.0/go.mod h1:q0Nob5lTCqyQ8WT6FYgS1L7PXKVVbgiymefNwIjPzgY=
5 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
6 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
7 | github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
8 | github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
9 | golang.org/x/sys v0.10.0 h1:SqMFp9UcQJZa+pmYuAKjd9xq1f0j5rLcDIk0mj4qAsA=
10 | golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
11 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
12 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
13 |
--------------------------------------------------------------------------------
/tests/clients/go/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "encoding/json"
5 | "flag"
6 | "fmt"
7 | "github.com/oschwald/maxminddb-golang"
8 | "log"
9 | "math/big"
10 | "net"
11 | "os"
12 | )
13 |
14 | var (
15 | db = flag.String("db", "", "Path to the MaxMind DB file")
16 | ip = flag.String("ip", "", "IP address to look up")
17 | )
18 |
19 | type Record struct {
20 | I32 int `json:"i32" maxminddb:"i32"`
21 | F32 float32 `json:"f32" maxminddb:"f32"`
22 | F64 float64 `json:"f64" maxminddb:"f64"`
23 | U16 uint16 `json:"u16" maxminddb:"u16"`
24 | U32 uint32 `json:"u32" maxminddb:"u32"`
25 | U64 uint64 `json:"u64" maxminddb:"u64"`
26 | U128 *big.Int `json:"u128" maxminddb:"u128"`
27 | Array []any `json:"array" maxminddb:"array"`
28 | Map map[string]any `json:"map" maxminddb:"map"`
29 | Bytes []byte `json:"bytes" maxminddb:"bytes"`
30 | String string `json:"string" maxminddb:"string"`
31 | Bool bool `json:"bool" maxminddb:"bool"`
32 | }
33 |
34 | func main() {
35 | flag.Parse()
36 | if *db == "" || *ip == "" {
37 | flag.PrintDefaults()
38 | os.Exit(1)
39 | }
40 | db, err := maxminddb.Open(*db)
41 | if err != nil {
42 | log.Fatal(err)
43 | }
44 | defer db.Close()
45 |
46 | ip := net.ParseIP(*ip)
47 |
48 | var record Record
49 |
50 | err = db.Lookup(ip, &record)
51 | if err != nil {
52 | log.Panic(err)
53 | }
54 | data, err := json.Marshal(record)
55 | if err != nil {
56 | log.Panic(err)
57 | }
58 | fmt.Println(string(data))
59 | }
60 |
--------------------------------------------------------------------------------
/tests/clients/java/.gitignore:
--------------------------------------------------------------------------------
1 | target/
2 | !.mvn/wrapper/maven-wrapper.jar
3 | !**/src/main/**/target/
4 | !**/src/test/**/target/
5 |
6 | ### IntelliJ IDEA ###
7 | .idea/modules.xml
8 | .idea/jarRepositories.xml
9 | .idea/compiler.xml
10 | .idea/libraries/
11 | *.iws
12 | *.iml
13 | *.ipr
14 |
15 | ### Eclipse ###
16 | .apt_generated
17 | .classpath
18 | .factorypath
19 | .project
20 | .settings
21 | .springBeans
22 | .sts4-cache
23 |
24 | ### NetBeans ###
25 | /nbproject/private/
26 | /nbbuild/
27 | /dist/
28 | /nbdist/
29 | /.nb-gradle/
30 | build/
31 | !**/src/main/**/build/
32 | !**/src/test/**/build/
33 |
34 | ### VS Code ###
35 | .vscode/
36 |
37 | ### Mac OS ###
38 | .DS_Store
--------------------------------------------------------------------------------
/tests/clients/java/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 | 4.0.0
6 |
7 | me.vime
8 | mmdb-test
9 | 1.0-SNAPSHOT
10 |
11 |
12 | 22
13 | 22
14 | UTF-8
15 |
16 |
17 |
18 | com.maxmind.db
19 | maxmind-db
20 | 3.1.0
21 |
22 |
23 | args4j
24 | args4j
25 | 2.33
26 |
27 |
28 | com.google.code.gson
29 | gson
30 | 2.10.1
31 |
32 |
33 |
34 | ${project.artifactId}
35 |
36 |
37 | org.apache.maven.plugins
38 | maven-assembly-plugin
39 |
40 |
41 | package
42 |
43 | single
44 |
45 |
46 |
47 |
48 |
49 | Main
50 |
51 |
52 |
53 |
54 | jar-with-dependencies
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
--------------------------------------------------------------------------------
/tests/clients/java/src/main/java/Main.java:
--------------------------------------------------------------------------------
1 | import com.google.gson.Gson;
2 | import com.maxmind.db.MaxMindDbConstructor;
3 | import com.maxmind.db.MaxMindDbParameter;
4 | import com.maxmind.db.Reader;
5 | import org.kohsuke.args4j.CmdLineParser;
6 | import org.kohsuke.args4j.Option;
7 |
8 | import java.io.File;
9 | import java.io.IOException;
10 | import java.math.BigInteger;
11 | import java.net.InetAddress;
12 | import java.util.List;
13 | import java.util.Map;
14 |
15 | public class Main {
16 | @Option(name = "-db", usage = "Path to the MMDB file", required = true)
17 | private String databasePath;
18 |
19 | @Option(name = "-ip", usage = "IP address to lookup", required = true)
20 | private String ipAddress;
21 |
22 | public static void main(String[] args) throws Exception {
23 | Main lookup = new Main();
24 | CmdLineParser parser = new CmdLineParser(lookup);
25 | parser.parseArgument(args);
26 |
27 | lookup.run();
28 | }
29 |
30 | public void run() throws IOException {
31 | File database = new File(databasePath);
32 | Gson gson = new Gson();
33 |
34 | try (Reader reader = new Reader(database)) {
35 | InetAddress address = InetAddress.getByName(ipAddress);
36 |
37 | Record result = reader.get(address, Record.class);
38 | String jsonResult = gson.toJson(result);
39 | System.out.println(jsonResult);
40 | }
41 | }
42 |
43 |
44 | public static class Record {
45 | private Integer i32;
46 | private Float f32;
47 | private Double f64;
48 | private Integer u16;
49 | private Long u32;
50 | private BigInteger u64;
51 | private BigInteger u128;
52 | private List