├── .github
    ├── pull_request_template.md
    └── workflows
    │   └── ci.yml
├── .gitignore
├── LICENSE
├── README.md
├── examples
    ├── __init__.py
    ├── csv_to_mmdb.py
    └── fake_ip_info.csv
├── mmdb_writer.py
├── pyproject.toml
├── tests
    ├── __init__.py
    ├── clients
    │   ├── go
    │   │   ├── go.mod
    │   │   ├── go.sum
    │   │   └── main.go
    │   └── java
    │   │   ├── .gitignore
    │   │   ├── pom.xml
    │   │   └── src
    │   │       └── main
    │   │           └── java
    │   │               └── Main.java
    ├── record.py
    ├── test_api.py
    └── test_clients.py
└── tox.ini


/.github/pull_request_template.md:
--------------------------------------------------------------------------------
 1 | ## Which issue does this PR close?
 2 | 
 3 | <!--
 4 | We generally require a GitHub issue to be filed for all bug fixes and enhancements and this helps us generate change logs for our releases. You can link an issue to this PR using the GitHub syntax. For example `Closes #123` indicates that this PR will close issue #123.
 5 | -->
 6 | 
 7 | Closes #.
 8 | 
 9 | ## Rationale for this change
10 | 
11 | <!--
12 |  Why are you proposing this change? If this is already explained clearly in the issue then this section is not needed.
13 |  Explaining clearly why changes are proposed helps reviewers understand your changes and offer better suggestions for fixes.  
14 | -->
15 | 
16 | ## What changes are included in this PR?
17 | 
18 | <!--
19 | There is no need to duplicate the description in the issue here but it is sometimes worth providing a summary of the individual changes in this PR.
20 | -->
21 | 
22 | ## Are these changes tested?
23 | 
24 | <!--
25 | We typically require tests for all PRs in order to:
26 | 1. Prevent the code from being accidentally broken by subsequent changes
27 | 2. Serve as another way to document the expected behavior of the code
28 | 
29 | If tests are not included in your PR, please explain why (for example, are they covered by existing tests)?
30 | -->
31 | 
32 | ## Are there any user-facing changes?
33 | 
34 | <!--
35 | If there are user-facing changes then we may require documentation to be updated before approving the PR.
36 | -->
37 | 
38 | <!--
39 | If there are any breaking changes to public APIs, please add the `api change` label.
40 | -->
41 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ "master", "ci/*" ]
 6 |   pull_request:
 7 |     branches: [ "master" ]
 8 | 
 9 | permissions:
10 |   contents: read
11 | 
12 | jobs:
13 |   test:
14 |     name: ci ${{ matrix.python-version }}
15 |     runs-on: ubuntu-latest
16 |     strategy:
17 |       matrix:
18 |         python-version: [ "3.8", "3.9", "3.10", "3.11", "3.12" ]
19 |     steps:
20 |       - uses: actions/checkout@v4
21 |       - name: Setup Go environment
22 |         uses: actions/setup-go@v5
23 |         with:
24 |           go-version: '1.22'
25 |           cache-dependency-path: tests/clients/go/go.sum
26 |       - name: Setup Java JDK
27 |         uses: actions/setup-java@v4
28 |         with:
29 |           distribution: temurin
30 |           java-version: 22
31 |           cache: maven
32 |           cache-dependency-path: tests/clients/java/pom.xml
33 |       - name: Set up Python
34 |         uses: actions/setup-python@v5
35 |         with:
36 |           python-version: ${{ matrix.python-version }}
37 |           allow-prereleases: true
38 |           check-latest: true
39 |       - name: Install dependencies
40 |         run: |
41 |           sudo apt install libmaxminddb0 libmaxminddb-dev
42 |           python -m pip install --upgrade pip
43 |           pip install tox
44 |       - name: Test
45 |         run: tox -e py
46 |       - name: Lint
47 |         if: matrix.python-version == '3.12'
48 |         run: tox -e lint
49 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 
131 | # pycharm
132 | .idea/
133 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 VimT
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | - [MaxMind-DB-Writer-python](#maxmind-db-writer-python)
  2 |     * [Install](#install)
  3 |     * [Usage](#usage)
  4 |     * [Examples](#examples)
  5 |     * [Using the Java Client](#using-the-java-client)
  6 |         + [TLDR](#tldr)
  7 |         + [Underlying Principles](#underlying-principles)
  8 |     * [Type Enforcement](#type-enforcement)
  9 |     * [Reference:](#reference-)
 10 | 
 11 | # MaxMind-DB-Writer-python
 12 | 
 13 | Make `mmdb` format ip library file which can be read by [
 14 | `maxmind` official language reader](https://dev.maxmind.com/geoip/geoip2/downloadable/)
 15 | 
 16 | ~~[The official perl writer](https://github.com/maxmind/MaxMind-DB-Writer-perl) was written in perl,
 17 | which was difficult to customize.
 18 | So I implemented the `MaxmindDB format` ip library in python language.~~
 19 | 
 20 | MaxMind has now released an official Go version of the MMDB writer.
 21 | If you prefer using Go, you can check out the official Go
 22 | implementation [mmdbwriter](https://github.com/maxmind/mmdbwriter).
 23 | This project still provides a Python alternative for those who need it.
 24 | 
 25 | ## Install
 26 | 
 27 | ```shell script
 28 | pip install -U mmdb_writer
 29 | ```
 30 | 
 31 | ## Usage
 32 | 
 33 | ```python
 34 | from netaddr import IPSet
 35 | 
 36 | from mmdb_writer import MMDBWriter
 37 | 
 38 | writer = MMDBWriter()
 39 | 
 40 | writer.insert_network(IPSet(['1.1.0.0/24', '1.1.1.0/24']), {'country': 'COUNTRY', 'isp': 'ISP'})
 41 | writer.to_db_file('test.mmdb')
 42 | 
 43 | import maxminddb
 44 | 
 45 | m = maxminddb.open_database('test.mmdb')
 46 | r = m.get('1.1.1.1')
 47 | assert r == {'country': 'COUNTRY', 'isp': 'ISP'}
 48 | ```
 49 | 
 50 | ## Examples
 51 | 
 52 | see [csv_to_mmdb.py](./examples/csv_to_mmdb.py)
 53 | Here is a professional and clear translation of the README.md section from Chinese into English:
 54 | 
 55 | ## Using the Java Client
 56 | 
 57 | If you are using the Java client, you need to be careful to set the `int_type` parameter so that Java correctly
 58 | recognizes the integer type in the MMDB file.
 59 | 
 60 | Example:
 61 | 
 62 | ```python
 63 | from mmdb_writer import MMDBWriter
 64 | 
 65 | writer = MMDBWriter(int_type='i32')
 66 | ```
 67 | 
 68 | Alternatively, you can explicitly specify data types using the [Type Enforcement](#type-enforcement) section.
 69 | 
 70 | ### Underlying Principles
 71 | 
 72 | In Java, when deserializing to a structure, the numeric types will use the original MMDB numeric types. The specific
 73 | conversion relationships are as follows:
 74 | 
 75 | | mmdb type | java type  |
 76 | |-----------|------------|
 77 | | float     | Float      |
 78 | | double    | Double     |
 79 | | int32     | Integer    |
 80 | | uint16    | Integer    |
 81 | | uint32    | Long       |
 82 | | uint64    | BigInteger |
 83 | | uint128   | BigInteger |
 84 | 
 85 | When using the Python writer to generate an MMDB file, by default, it converts integers to the corresponding MMDB type
 86 | based on the size of the `int`. For instance, `int(1)` would convert to `uint16`, and `int(2**16+1)` would convert
 87 | to `uint32`. This may cause deserialization failures in Java clients. Therefore, it is necessary to specify
 88 | the `int_type` parameter when generating MMDB files to define the numeric type accurately.
 89 | 
 90 | ## Type Enforcement
 91 | 
 92 | MMDB supports a variety of numeric types such as `int32`, `uint16`, `uint32`, `uint64`, `uint128` for integers,
 93 | and `f32`, `f64` for floating points, while Python only has one integer type and one float type (actually `f64`).
 94 | 
 95 | Therefore, when generating an MMDB file, you need to specify the `int_type` parameter to define the numeric type of the
 96 | MMDB file. The behaviors for different `int_type` settings are:
 97 | 
 98 | | int_type       | Behavior                                                                                                                                                                                                                                                      |
 99 | |----------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
100 | | auto (default) | Automatically selects the MMDB numeric type based on the value size. <br/>Rules: <br/>`int32` for value < 0 <br/>`uint16` for 0 <= value < 2^16<br/>`uint32` for 2^16 <= value < 2^32<br/>`uint64` for 2^32 <= value < 2^64<br/> `uint128` for value >= 2^64. |
101 | | i32            | Stores all integer types as `int32`.                                                                                                                                                                                                                          |
102 | | u16            | Stores all integer types as `uint16`.                                                                                                                                                                                                                         |
103 | | u32            | Stores all integer types as `uint32`.                                                                                                                                                                                                                         |
104 | | u64            | Stores all integer types as `uint64`.                                                                                                                                                                                                                         |
105 | | u128           | Stores all integer types as `uint128`.                                                                                                                                                                                                                        |
106 | 
107 | If you want to use different int types for different scenarios, you can use type wrapping:
108 | 
109 | ```python
110 | from mmdb_writer import MMDBWriter, MmdbI32, MmdbF32
111 | 
112 | writer = MMDBWriter()
113 | # the value of field "i32" will be stored as int32 type
114 | writer.insert_network(IPSet(["1.0.0.0/24"]), {"i32": MmdbI32(128), "f32": MmdbF32(1.22)})
115 | ```
116 | 
117 | ## Reference:
118 | 
119 | - [MaxmindDB format](http://maxmind.github.io/MaxMind-DB/)
120 | - [geoip-mmdb](https://github.com/i-rinat/geoip-mmdb)
121 | 


--------------------------------------------------------------------------------
/examples/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vimt/MaxMind-DB-Writer-python/70f8f2ad6b91db91e9d7b977268a914a888f089a/examples/__init__.py


--------------------------------------------------------------------------------
/examples/csv_to_mmdb.py:
--------------------------------------------------------------------------------
 1 | import csv
 2 | from collections import defaultdict
 3 | 
 4 | from netaddr import IPNetwork, IPSet
 5 | 
 6 | from mmdb_writer import MMDBWriter
 7 | 
 8 | 
 9 | def main():
10 |     writer = MMDBWriter(
11 |         4, "Test.GeoIP", languages=["EN"], description="Test IP library"
12 |     )
13 |     data = defaultdict(list)
14 | 
15 |     # merge cidr
16 |     with open("fake_ip_info.csv") as f:
17 |         reader = csv.DictReader(f)
18 |         for line in reader:
19 |             data[(line["country"], line["isp"])].append(
20 |                 IPNetwork(f'{line["ip"]}/{line["prefixlen"]}')
21 |             )
22 |     for index, cidrs in data.items():
23 |         writer.insert_network(IPSet(cidrs), {"country": index[0], "isp": index[1]})
24 |     writer.to_db_file("fake_ip_library.mmdb")
25 | 
26 | 
27 | def test_read():
28 |     import maxminddb
29 | 
30 |     m = maxminddb.open_database("fake_ip_library.mmdb")
31 |     r = m.get("3.1.1.1")
32 |     print(r)
33 | 
34 | 
35 | if __name__ == "__main__":
36 |     main()
37 | 


--------------------------------------------------------------------------------
/examples/fake_ip_info.csv:
--------------------------------------------------------------------------------
  1 | ip,prefixlen,country,isp
  2 | 1.0.0.0,8,country6,isp6
  3 | 2.0.0.0,8,country6,isp5
  4 | 3.0.0.0,8,country5,isp3
  5 | 4.0.0.0,8,country3,isp3
  6 | 5.0.0.0,8,country9,isp3
  7 | 6.0.0.0,8,country8,isp0
  8 | 7.0.0.0,8,country9,isp2
  9 | 8.0.0.0,8,country7,isp2
 10 | 9.0.0.0,8,country3,isp8
 11 | 10.0.0.0,8,country3,isp6
 12 | 11.0.0.0,8,country6,isp5
 13 | 12.0.0.0,8,country9,isp5
 14 | 13.0.0.0,8,country4,isp1
 15 | 14.0.0.0,8,country9,isp3
 16 | 15.0.0.0,8,country6,isp8
 17 | 16.0.0.0,8,country6,isp1
 18 | 17.0.0.0,8,country9,isp2
 19 | 18.0.0.0,8,country2,isp8
 20 | 19.0.0.0,8,country9,isp4
 21 | 20.0.0.0,8,country1,isp3
 22 | 21.0.0.0,8,country8,isp3
 23 | 22.0.0.0,8,country0,isp2
 24 | 23.0.0.0,8,country7,isp9
 25 | 24.0.0.0,8,country6,isp0
 26 | 25.0.0.0,8,country2,isp0
 27 | 26.0.0.0,8,country3,isp8
 28 | 27.0.0.0,8,country1,isp2
 29 | 28.0.0.0,8,country9,isp2
 30 | 29.0.0.0,8,country0,isp1
 31 | 30.0.0.0,8,country1,isp9
 32 | 31.0.0.0,8,country3,isp5
 33 | 32.0.0.0,8,country1,isp4
 34 | 33.0.0.0,8,country3,isp0
 35 | 34.0.0.0,8,country8,isp4
 36 | 35.0.0.0,8,country7,isp3
 37 | 36.0.0.0,8,country5,isp0
 38 | 37.0.0.0,8,country1,isp8
 39 | 38.0.0.0,8,country9,isp6
 40 | 39.0.0.0,8,country1,isp4
 41 | 40.0.0.0,8,country7,isp2
 42 | 41.0.0.0,8,country5,isp7
 43 | 42.0.0.0,8,country0,isp1
 44 | 43.0.0.0,8,country0,isp8
 45 | 44.0.0.0,8,country1,isp5
 46 | 45.0.0.0,8,country9,isp2
 47 | 46.0.0.0,8,country5,isp0
 48 | 47.0.0.0,8,country1,isp2
 49 | 48.0.0.0,8,country6,isp2
 50 | 49.0.0.0,8,country4,isp9
 51 | 50.0.0.0,8,country7,isp1
 52 | 51.0.0.0,8,country7,isp3
 53 | 52.0.0.0,8,country1,isp5
 54 | 53.0.0.0,8,country7,isp1
 55 | 54.0.0.0,8,country9,isp4
 56 | 55.0.0.0,8,country2,isp9
 57 | 56.0.0.0,8,country4,isp0
 58 | 57.0.0.0,8,country7,isp1
 59 | 58.0.0.0,8,country3,isp3
 60 | 59.0.0.0,8,country3,isp0
 61 | 60.0.0.0,8,country0,isp6
 62 | 61.0.0.0,8,country7,isp8
 63 | 62.0.0.0,8,country3,isp5
 64 | 63.0.0.0,8,country4,isp2
 65 | 64.0.0.0,8,country5,isp4
 66 | 65.0.0.0,8,country5,isp4
 67 | 66.0.0.0,8,country6,isp9
 68 | 67.0.0.0,8,country3,isp0
 69 | 68.0.0.0,8,country3,isp5
 70 | 69.0.0.0,8,country1,isp3
 71 | 70.0.0.0,8,country5,isp1
 72 | 71.0.0.0,8,country5,isp9
 73 | 72.0.0.0,8,country1,isp3
 74 | 73.0.0.0,8,country5,isp6
 75 | 74.0.0.0,8,country4,isp8
 76 | 75.0.0.0,8,country6,isp6
 77 | 76.0.0.0,8,country2,isp9
 78 | 77.0.0.0,8,country1,isp5
 79 | 78.0.0.0,8,country3,isp3
 80 | 79.0.0.0,8,country5,isp6
 81 | 80.0.0.0,8,country3,isp8
 82 | 81.0.0.0,8,country0,isp4
 83 | 82.0.0.0,8,country0,isp9
 84 | 83.0.0.0,8,country4,isp3
 85 | 84.0.0.0,8,country9,isp6
 86 | 85.0.0.0,8,country5,isp7
 87 | 86.0.0.0,8,country6,isp7
 88 | 87.0.0.0,8,country0,isp6
 89 | 88.0.0.0,8,country8,isp3
 90 | 89.0.0.0,8,country1,isp1
 91 | 90.0.0.0,8,country3,isp4
 92 | 91.0.0.0,8,country3,isp9
 93 | 92.0.0.0,8,country1,isp7
 94 | 93.0.0.0,8,country3,isp9
 95 | 94.0.0.0,8,country5,isp1
 96 | 95.0.0.0,8,country5,isp8
 97 | 96.0.0.0,8,country5,isp7
 98 | 97.0.0.0,8,country7,isp1
 99 | 98.0.0.0,8,country9,isp0
100 | 99.0.0.0,8,country3,isp8
101 | 100.0.0.0,8,country1,isp1
102 | 101.0.0.0,8,country9,isp9
103 | 102.0.0.0,8,country4,isp3
104 | 103.0.0.0,8,country7,isp6
105 | 104.0.0.0,8,country2,isp1
106 | 105.0.0.0,8,country6,isp6
107 | 106.0.0.0,8,country4,isp3
108 | 107.0.0.0,8,country4,isp2
109 | 108.0.0.0,8,country5,isp4
110 | 109.0.0.0,8,country4,isp1
111 | 110.0.0.0,8,country3,isp1
112 | 111.0.0.0,8,country7,isp5
113 | 112.0.0.0,8,country9,isp5
114 | 113.0.0.0,8,country4,isp8
115 | 114.0.0.0,8,country2,isp3
116 | 115.0.0.0,8,country7,isp5
117 | 116.0.0.0,8,country6,isp0
118 | 117.0.0.0,8,country3,isp1
119 | 118.0.0.0,8,country0,isp1
120 | 119.0.0.0,8,country7,isp2
121 | 120.0.0.0,8,country6,isp8
122 | 121.0.0.0,8,country4,isp3
123 | 122.0.0.0,8,country9,isp1
124 | 123.0.0.0,8,country2,isp4
125 | 124.0.0.0,8,country0,isp3
126 | 125.0.0.0,8,country2,isp4
127 | 126.0.0.0,8,country3,isp2
128 | 127.0.0.0,8,country2,isp8
129 | 128.0.0.0,8,country8,isp8
130 | 129.0.0.0,8,country8,isp6
131 | 130.0.0.0,8,country1,isp7
132 | 131.0.0.0,8,country1,isp8
133 | 132.0.0.0,8,country1,isp0
134 | 133.0.0.0,8,country7,isp1
135 | 134.0.0.0,8,country5,isp1
136 | 135.0.0.0,8,country7,isp4
137 | 136.0.0.0,8,country5,isp3
138 | 137.0.0.0,8,country3,isp5
139 | 138.0.0.0,8,country7,isp6
140 | 139.0.0.0,8,country1,isp8
141 | 140.0.0.0,8,country0,isp5
142 | 141.0.0.0,8,country4,isp9
143 | 142.0.0.0,8,country1,isp8
144 | 143.0.0.0,8,country4,isp8
145 | 144.0.0.0,8,country5,isp2
146 | 145.0.0.0,8,country5,isp4
147 | 146.0.0.0,8,country0,isp5
148 | 147.0.0.0,8,country1,isp6
149 | 148.0.0.0,8,country1,isp0
150 | 149.0.0.0,8,country1,isp1
151 | 150.0.0.0,8,country2,isp2
152 | 151.0.0.0,8,country6,isp4
153 | 152.0.0.0,8,country3,isp2
154 | 153.0.0.0,8,country4,isp4
155 | 154.0.0.0,8,country5,isp2
156 | 155.0.0.0,8,country5,isp6
157 | 156.0.0.0,8,country9,isp4
158 | 157.0.0.0,8,country7,isp5
159 | 158.0.0.0,8,country7,isp9
160 | 159.0.0.0,8,country5,isp9
161 | 160.0.0.0,8,country1,isp9
162 | 161.0.0.0,8,country3,isp9
163 | 162.0.0.0,8,country7,isp4
164 | 163.0.0.0,8,country4,isp6
165 | 164.0.0.0,8,country1,isp7
166 | 165.0.0.0,8,country7,isp1
167 | 166.0.0.0,8,country8,isp7
168 | 167.0.0.0,8,country9,isp3
169 | 168.0.0.0,8,country7,isp3
170 | 169.0.0.0,8,country9,isp3
171 | 170.0.0.0,8,country0,isp0
172 | 171.0.0.0,8,country3,isp1
173 | 172.0.0.0,8,country5,isp4
174 | 173.0.0.0,8,country6,isp6
175 | 174.0.0.0,8,country0,isp4
176 | 175.0.0.0,8,country7,isp4
177 | 176.0.0.0,8,country3,isp7
178 | 177.0.0.0,8,country4,isp1
179 | 178.0.0.0,8,country5,isp6
180 | 179.0.0.0,8,country0,isp8
181 | 180.0.0.0,8,country0,isp5
182 | 181.0.0.0,8,country1,isp3
183 | 182.0.0.0,8,country1,isp1
184 | 183.0.0.0,8,country5,isp4
185 | 184.0.0.0,8,country6,isp6
186 | 185.0.0.0,8,country3,isp8
187 | 186.0.0.0,8,country1,isp4
188 | 187.0.0.0,8,country8,isp1
189 | 188.0.0.0,8,country5,isp0
190 | 189.0.0.0,8,country6,isp4
191 | 190.0.0.0,8,country5,isp6
192 | 191.0.0.0,8,country3,isp7
193 | 192.0.0.0,8,country8,isp2
194 | 193.0.0.0,8,country6,isp2
195 | 194.0.0.0,8,country0,isp5
196 | 195.0.0.0,8,country0,isp6
197 | 196.0.0.0,8,country6,isp9
198 | 197.0.0.0,8,country7,isp8
199 | 198.0.0.0,8,country4,isp2
200 | 199.0.0.0,8,country3,isp8
201 | 200.0.0.0,8,country3,isp3
202 | 201.0.0.0,8,country1,isp3
203 | 202.0.0.0,8,country7,isp0
204 | 203.0.0.0,8,country1,isp7
205 | 204.0.0.0,8,country6,isp9
206 | 205.0.0.0,8,country9,isp0
207 | 206.0.0.0,8,country4,isp6
208 | 207.0.0.0,8,country8,isp4
209 | 208.0.0.0,8,country0,isp5
210 | 209.0.0.0,8,country2,isp6
211 | 210.0.0.0,8,country4,isp0
212 | 211.0.0.0,8,country7,isp2
213 | 212.0.0.0,8,country6,isp8
214 | 213.0.0.0,8,country5,isp6
215 | 214.0.0.0,8,country3,isp4
216 | 215.0.0.0,8,country2,isp4
217 | 216.0.0.0,8,country6,isp8
218 | 217.0.0.0,8,country7,isp4
219 | 218.0.0.0,8,country2,isp0
220 | 219.0.0.0,8,country6,isp3
221 | 220.0.0.0,8,country1,isp0
222 | 221.0.0.0,8,country5,isp3
223 | 222.0.0.0,8,country9,isp7
224 | 223.0.0.0,8,country6,isp0
225 | 224.0.0.0,8,country6,isp9
226 | 225.0.0.0,8,country7,isp1
227 | 226.0.0.0,8,country5,isp9
228 | 227.0.0.0,8,country5,isp9
229 | 228.0.0.0,8,country7,isp6
230 | 229.0.0.0,8,country1,isp0
231 | 230.0.0.0,8,country5,isp8
232 | 231.0.0.0,8,country3,isp0
233 | 232.0.0.0,8,country8,isp4
234 | 233.0.0.0,8,country6,isp3
235 | 234.0.0.0,8,country5,isp0
236 | 235.0.0.0,8,country4,isp6
237 | 236.0.0.0,8,country8,isp8
238 | 237.0.0.0,8,country2,isp5
239 | 238.0.0.0,8,country6,isp3
240 | 239.0.0.0,8,country4,isp8
241 | 240.0.0.0,8,country4,isp0
242 | 241.0.0.0,8,country7,isp3
243 | 242.0.0.0,8,country0,isp4
244 | 243.0.0.0,8,country2,isp8
245 | 244.0.0.0,8,country3,isp4
246 | 245.0.0.0,8,country5,isp6
247 | 246.0.0.0,8,country0,isp6
248 | 247.0.0.0,8,country8,isp5
249 | 248.0.0.0,8,country8,isp4
250 | 249.0.0.0,8,country4,isp0
251 | 250.0.0.0,8,country3,isp9
252 | 251.0.0.0,8,country4,isp2
253 | 252.0.0.0,8,country3,isp3
254 | 253.0.0.0,8,country5,isp1
255 | 254.0.0.0,8,country1,isp4
256 | 255.0.0.0,8,country8,isp1
257 | 


--------------------------------------------------------------------------------
/mmdb_writer.py:
--------------------------------------------------------------------------------
  1 | __version__ = "0.2.5"
  2 | 
  3 | import logging
  4 | import math
  5 | import struct
  6 | import time
  7 | from decimal import Decimal
  8 | from enum import IntEnum
  9 | from typing import Dict, List, Literal, Union
 10 | 
 11 | from netaddr import IPNetwork, IPSet
 12 | 
 13 | 
 14 | class MmdbBaseType:
 15 |     def __init__(self, value):
 16 |         self.value = value
 17 | 
 18 | 
 19 | # type hint
 20 | class MmdbF32(MmdbBaseType):
 21 |     def __init__(self, value: float):
 22 |         super().__init__(value)
 23 | 
 24 | 
 25 | class MmdbF64(MmdbBaseType):
 26 |     def __init__(self, value: Union[float, Decimal]):
 27 |         super().__init__(value)
 28 | 
 29 | 
 30 | class MmdbI32(MmdbBaseType):
 31 |     def __init__(self, value: int):
 32 |         super().__init__(value)
 33 | 
 34 | 
 35 | class MmdbU16(MmdbBaseType):
 36 |     def __init__(self, value: int):
 37 |         super().__init__(value)
 38 | 
 39 | 
 40 | class MmdbU32(MmdbBaseType):
 41 |     def __init__(self, value: int):
 42 |         super().__init__(value)
 43 | 
 44 | 
 45 | class MmdbU64(MmdbBaseType):
 46 |     def __init__(self, value: int):
 47 |         super().__init__(value)
 48 | 
 49 | 
 50 | class MmdbU128(MmdbBaseType):
 51 |     def __init__(self, value: int):
 52 |         super().__init__(value)
 53 | 
 54 | 
 55 | MMDBType = Union[
 56 |     dict,
 57 |     list,
 58 |     str,
 59 |     bytes,
 60 |     int,
 61 |     bool,
 62 |     MmdbF32,
 63 |     MmdbF64,
 64 |     MmdbI32,
 65 |     MmdbU16,
 66 |     MmdbU32,
 67 |     MmdbU64,
 68 |     MmdbU128,
 69 | ]
 70 | 
 71 | logger = logging.getLogger(__name__)
 72 | 
 73 | METADATA_MAGIC = b"\xab\xcd\xefMaxMind.com"
 74 | 
 75 | 
 76 | class MMDBTypeID(IntEnum):
 77 |     POINTER = 1
 78 |     STRING = 2
 79 |     DOUBLE = 3
 80 |     BYTES = 4
 81 |     UINT16 = 5
 82 |     UINT32 = 6
 83 |     MAP = 7
 84 |     INT32 = 8
 85 |     UINT64 = 9
 86 |     UINT128 = 10
 87 |     ARRAY = 11
 88 |     DATA_CACHE = 12
 89 |     END_MARKER = 13
 90 |     BOOLEAN = 14
 91 |     FLOAT = 15
 92 | 
 93 | 
 94 | UINT16_MAX = 0xFFFF
 95 | UINT32_MAX = 0xFFFFFFFF
 96 | UINT64_MAX = 0xFFFFFFFFFFFFFFFF
 97 | 
 98 | 
 99 | class SearchTreeNode:
100 |     def __init__(self, left=None, right=None):
101 |         self.left = left
102 |         self.right = right
103 | 
104 |     def get_or_create(self, item):
105 |         if item == 0:
106 |             self.left = self.left or SearchTreeNode()
107 |             return self.left
108 |         elif item == 1:
109 |             self.right = self.right or SearchTreeNode()
110 |             return self.right
111 | 
112 |     def __getitem__(self, item):
113 |         if item == 0:
114 |             return self.left
115 |         elif item == 1:
116 |             return self.right
117 | 
118 |     def __setitem__(self, key, value):
119 |         if key == 0:
120 |             self.left = value
121 |         elif key == 1:
122 |             self.right = value
123 | 
124 | 
125 | class SearchTreeLeaf:
126 |     def __init__(self, value):
127 |         self.value = value
128 | 
129 |     def __repr__(self):
130 |         return f"SearchTreeLeaf(value={self.value})"
131 | 
132 |     __str__ = __repr__
133 | 
134 | 
135 | IntType = Union[
136 |     Literal[
137 |         "auto",
138 |         "u16",
139 |         "u32",
140 |         "u64",
141 |         "u128",
142 |         "i32",
143 |         "uint16",
144 |         "uint32",
145 |         "uint64",
146 |         "uint128",
147 |         "int32",
148 |     ],
149 |     MmdbU16,
150 |     MmdbU32,
151 |     MmdbU64,
152 |     MmdbU128,
153 |     MmdbI32,
154 | ]
155 | FloatType = Union[Literal["f32", "f64", "float32", "float64"], MmdbF32, MmdbF64]
156 | 
157 | 
158 | class Encoder:
159 |     def __init__(
160 |         self, cache=True, int_type: IntType = "auto", float_type: FloatType = "f64"
161 |     ):
162 |         self.cache = cache
163 |         self.int_type = int_type
164 |         self.float_type = float_type
165 | 
166 |         self.data_cache = {}
167 |         self.data_list = []
168 |         self.data_pointer = 0
169 |         self._python_type_id = {
170 |             float: MMDBTypeID.DOUBLE,
171 |             bool: MMDBTypeID.BOOLEAN,
172 |             list: MMDBTypeID.ARRAY,
173 |             dict: MMDBTypeID.MAP,
174 |             bytes: MMDBTypeID.BYTES,
175 |             str: MMDBTypeID.STRING,
176 |             MmdbF32: MMDBTypeID.FLOAT,
177 |             MmdbF64: MMDBTypeID.DOUBLE,
178 |             MmdbI32: MMDBTypeID.INT32,
179 |             MmdbU16: MMDBTypeID.UINT16,
180 |             MmdbU32: MMDBTypeID.UINT32,
181 |             MmdbU64: MMDBTypeID.UINT64,
182 |             MmdbU128: MMDBTypeID.UINT128,
183 |         }
184 | 
185 |     def _encode_pointer(self, value):
186 |         pointer = value
187 |         if pointer >= 134744064:
188 |             res = struct.pack(">BI", 0x38, pointer)
189 |         elif pointer >= 526336:
190 |             pointer -= 526336
191 |             res = struct.pack(
192 |                 ">BBBB",
193 |                 0x30 + ((pointer >> 24) & 0x07),
194 |                 (pointer >> 16) & 0xFF,
195 |                 (pointer >> 8) & 0xFF,
196 |                 pointer & 0xFF,
197 |             )
198 |         elif pointer >= 2048:
199 |             pointer -= 2048
200 |             res = struct.pack(
201 |                 ">BBB",
202 |                 0x28 + ((pointer >> 16) & 0x07),
203 |                 (pointer >> 8) & 0xFF,
204 |                 pointer & 0xFF,
205 |             )
206 |         else:
207 |             res = struct.pack(">BB", 0x20 + ((pointer >> 8) & 0x07), pointer & 0xFF)
208 | 
209 |         return res
210 | 
211 |     def _encode_utf8_string(self, value):
212 |         encoded_value = value.encode("utf-8")
213 |         res = self._make_header(MMDBTypeID.STRING, len(encoded_value))
214 |         res += encoded_value
215 |         return res
216 | 
217 |     def _encode_bytes(self, value):
218 |         return self._make_header(MMDBTypeID.BYTES, len(value)) + value
219 | 
220 |     def _encode_uint(self, type_id, max_len):
221 |         value_max = 2 ** (max_len * 8)
222 | 
223 |         def _encode_unsigned_value(value):
224 |             if value < 0 or value >= value_max:
225 |                 raise ValueError(
226 |                     f"encode uint{max_len * 8} fail: "
227 |                     f"{value} not in range(0, {value_max})"
228 |                 )
229 |             res = b""
230 |             while value != 0 and len(res) < max_len:
231 |                 res = struct.pack(">B", value & 0xFF) + res
232 |                 value = value >> 8
233 |             return self._make_header(type_id, len(res)) + res
234 | 
235 |         return _encode_unsigned_value
236 | 
237 |     def _encode_map(self, value):
238 |         res = self._make_header(MMDBTypeID.MAP, len(value))
239 |         for k, v in list(value.items()):
240 |             # Keys are always stored by value.
241 |             res += self.encode(k)
242 |             res += self.encode(v)
243 |         return res
244 | 
245 |     def _encode_array(self, value):
246 |         res = self._make_header(MMDBTypeID.ARRAY, len(value))
247 |         for k in value:
248 |             res += self.encode(k)
249 |         return res
250 | 
251 |     def _encode_boolean(self, value):
252 |         return self._make_header(MMDBTypeID.BOOLEAN, 1 if value else 0)
253 | 
254 |     def _encode_pack_type(self, type_id, fmt):
255 |         def pack_type(value):
256 |             res = struct.pack(fmt, value)
257 |             return self._make_header(type_id, len(res)) + res
258 | 
259 |         return pack_type
260 | 
261 |     _type_encoder = None
262 | 
263 |     @property
264 |     def type_encoder(self):
265 |         if self._type_encoder is None:
266 |             self._type_encoder = {
267 |                 MMDBTypeID.POINTER: self._encode_pointer,
268 |                 MMDBTypeID.STRING: self._encode_utf8_string,
269 |                 MMDBTypeID.DOUBLE: self._encode_pack_type(MMDBTypeID.DOUBLE, ">d"),
270 |                 MMDBTypeID.BYTES: self._encode_bytes,
271 |                 MMDBTypeID.UINT16: self._encode_uint(MMDBTypeID.UINT16, 2),
272 |                 MMDBTypeID.UINT32: self._encode_uint(MMDBTypeID.UINT32, 4),
273 |                 MMDBTypeID.MAP: self._encode_map,
274 |                 MMDBTypeID.INT32: self._encode_pack_type(MMDBTypeID.INT32, ">i"),
275 |                 MMDBTypeID.UINT64: self._encode_uint(MMDBTypeID.UINT64, 8),
276 |                 MMDBTypeID.UINT128: self._encode_uint(MMDBTypeID.UINT128, 16),
277 |                 MMDBTypeID.ARRAY: self._encode_array,
278 |                 MMDBTypeID.BOOLEAN: self._encode_boolean,
279 |                 MMDBTypeID.FLOAT: self._encode_pack_type(MMDBTypeID.FLOAT, ">f"),
280 |             }
281 |         return self._type_encoder
282 | 
283 |     def _make_header(self, type_id, length):
284 |         if length >= 16843036:
285 |             raise Exception("length >= 16843036")
286 | 
287 |         elif length >= 65821:
288 |             five_bits = 31
289 |             length -= 65821
290 |             b3 = length & 0xFF
291 |             b2 = (length >> 8) & 0xFF
292 |             b1 = (length >> 16) & 0xFF
293 |             additional_length_bytes = struct.pack(">BBB", b1, b2, b3)
294 | 
295 |         elif length >= 285:
296 |             five_bits = 30
297 |             length -= 285
298 |             b2 = length & 0xFF
299 |             b1 = (length >> 8) & 0xFF
300 |             additional_length_bytes = struct.pack(">BB", b1, b2)
301 | 
302 |         elif length >= 29:
303 |             five_bits = 29
304 |             length -= 29
305 |             additional_length_bytes = struct.pack(">B", length & 0xFF)
306 | 
307 |         else:
308 |             five_bits = length
309 |             additional_length_bytes = b""
310 | 
311 |         if type_id <= 7:
312 |             res = struct.pack(">B", (type_id << 5) + five_bits)
313 |         else:
314 |             res = struct.pack(">BB", five_bits, type_id - 7)
315 | 
316 |         return res + additional_length_bytes
317 | 
318 |     def python_type_id(self, value):
319 |         value_type = type(value)
320 |         type_id = self._python_type_id.get(value_type)
321 |         if type_id:
322 |             return type_id
323 |         if value_type is int:
324 |             if self.int_type == "auto":
325 |                 if value > UINT64_MAX:
326 |                     return MMDBTypeID.UINT128
327 |                 elif value > UINT32_MAX:
328 |                     return MMDBTypeID.UINT64
329 |                 elif value > UINT16_MAX:
330 |                     return MMDBTypeID.UINT32
331 |                 elif value < 0:
332 |                     return MMDBTypeID.INT32
333 |                 else:
334 |                     return MMDBTypeID.UINT16
335 |             elif self.int_type in ("u16", "uint16", MmdbU16):
336 |                 return MMDBTypeID.UINT16
337 |             elif self.int_type in ("u32", "uint32", MmdbU32):
338 |                 return MMDBTypeID.UINT32
339 |             elif self.int_type in ("u64", "uint64", MmdbU64):
340 |                 return MMDBTypeID.UINT64
341 |             elif self.int_type in ("u128", "uint128", MmdbU128):
342 |                 return MMDBTypeID.UINT128
343 |             elif self.int_type in ("i32", "int32", MmdbI32):
344 |                 return MMDBTypeID.INT32
345 |             else:
346 |                 raise ValueError(f"unknown int_type={self.int_type}")
347 |         elif value_type is float:
348 |             if self.float_type in ("f32", "float32", MmdbF32):
349 |                 return MMDBTypeID.FLOAT
350 |             elif self.float_type in ("f64", "float64", MmdbF64):
351 |                 return MMDBTypeID.DOUBLE
352 |             else:
353 |                 raise ValueError(f"unknown float_type={self.float_type}")
354 |         elif value_type is Decimal:
355 |             return MMDBTypeID.DOUBLE
356 |         raise TypeError(f"unknown type {value_type}")
357 | 
358 |     def _freeze(self, value):
359 |         if isinstance(value, dict):
360 |             return tuple((k, self._freeze(v)) for k, v in value.items())
361 |         elif isinstance(value, list):
362 |             return tuple(self._freeze(v) for v in value)
363 |         return value
364 | 
365 |     def encode_meta(self, meta):
366 |         res = self._make_header(MMDBTypeID.MAP, len(meta))
367 |         meta_type = {
368 |             "node_count": 6,
369 |             "record_size": 5,
370 |             "ip_version": 5,
371 |             "binary_format_major_version": 5,
372 |             "binary_format_minor_version": 5,
373 |             "build_epoch": 9,
374 |         }
375 |         for k, v in list(meta.items()):
376 |             # Keys are always stored by value.
377 |             res += self.encode(k)
378 |             res += self.encode(v, meta_type.get(k))
379 |         return res
380 | 
381 |     def encode(self, value, type_id=None, return_offset=False):
382 |         if self.cache:
383 |             cache_key = self._freeze(value)
384 |             try:
385 |                 offset = self.data_cache[cache_key]
386 |                 return offset if return_offset else self._encode_pointer(offset)
387 |             except KeyError:
388 |                 pass
389 | 
390 |         if not type_id:
391 |             type_id = self.python_type_id(value)
392 | 
393 |         try:
394 |             encoder = self.type_encoder[type_id]
395 |         except KeyError as err:
396 |             raise ValueError(f"unknown type_id={type_id}") from err
397 | 
398 |         if isinstance(value, MmdbBaseType):
399 |             value = value.value
400 |         res = encoder(value)
401 | 
402 |         if self.cache:
403 |             self.data_list.append(res)
404 |             offset = self.data_pointer
405 |             self.data_pointer += len(res)
406 |             self.data_cache[cache_key] = offset
407 |             return offset if return_offset else self._encode_pointer(offset)
408 |         return res
409 | 
410 | 
411 | class TreeWriter:
412 |     encoder_cls = Encoder
413 | 
414 |     def __init__(
415 |         self,
416 |         tree: "SearchTreeNode",
417 |         meta: dict,
418 |         int_type: IntType = "auto",
419 |         float_type: FloatType = "f64",
420 |     ):
421 |         self._node_idx = {}
422 |         self._leaf_offset = {}
423 |         self._node_list = []
424 |         self._node_counter = 0
425 |         self._record_size = 0
426 | 
427 |         self.tree = tree
428 |         self.meta = meta
429 | 
430 |         self.encoder = self.encoder_cls(
431 |             cache=True, int_type=int_type, float_type=float_type
432 |         )
433 | 
434 |     @property
435 |     def _data_list(self):
436 |         return self.encoder.data_list
437 | 
438 |     @property
439 |     def _data_pointer(self):
440 |         return self.encoder.data_pointer + 16
441 | 
442 |     def _build_meta(self):
443 |         return {
444 |             "node_count": self._node_counter,
445 |             "record_size": self.record_size,
446 |             **self.meta,
447 |         }
448 | 
449 |     def _adjust_record_size(self):
450 |         # Tree records should be large enough to contain either tree node index
451 |         # or data offset.
452 |         max_id = self._node_counter + self._data_pointer + 1
453 | 
454 |         # Estimate required bit count.
455 |         bit_count = int(math.ceil(math.log(max_id, 2)))
456 |         if bit_count <= 24:
457 |             self.record_size = 24
458 |         elif bit_count <= 28:
459 |             self.record_size = 28
460 |         elif bit_count <= 32:
461 |             self.record_size = 32
462 |         else:
463 |             raise Exception("record_size > 32")
464 | 
465 |         self.data_offset = self.record_size * 2 / 8 * self._node_counter
466 | 
467 |     def _enumerate_nodes(self, node):
468 |         if type(node) is SearchTreeNode:
469 |             node_id = id(node)
470 |             if node_id not in self._node_idx:
471 |                 self._node_idx[node_id] = self._node_counter
472 |                 self._node_counter += 1
473 |                 self._node_list.append(node)
474 | 
475 |             self._enumerate_nodes(node.left)
476 |             self._enumerate_nodes(node.right)
477 | 
478 |         elif type(node) is SearchTreeLeaf:
479 |             node_id = id(node)
480 |             if node_id not in self._leaf_offset:
481 |                 offset = self.encoder.encode(node.value, return_offset=True)
482 |                 self._leaf_offset[node_id] = offset + 16
483 |         else:  # == None
484 |             return
485 | 
486 |     def _calc_record_idx(self, node):
487 |         if node is None:
488 |             return self._node_counter
489 |         elif type(node) is SearchTreeNode:
490 |             return self._node_idx[id(node)]
491 |         elif type(node) is SearchTreeLeaf:
492 |             return self._leaf_offset[id(node)] + self._node_counter
493 |         else:
494 |             raise Exception("unexpected type")
495 | 
496 |     def _cal_node_bytes(self, node) -> bytes:
497 |         left_idx = self._calc_record_idx(node.left)
498 |         right_idx = self._calc_record_idx(node.right)
499 | 
500 |         if self.record_size == 24:
501 |             b1 = (left_idx >> 16) & 0xFF
502 |             b2 = (left_idx >> 8) & 0xFF
503 |             b3 = left_idx & 0xFF
504 |             b4 = (right_idx >> 16) & 0xFF
505 |             b5 = (right_idx >> 8) & 0xFF
506 |             b6 = right_idx & 0xFF
507 |             return struct.pack(">BBBBBB", b1, b2, b3, b4, b5, b6)
508 | 
509 |         elif self.record_size == 28:
510 |             b1 = (left_idx >> 16) & 0xFF
511 |             b2 = (left_idx >> 8) & 0xFF
512 |             b3 = left_idx & 0xFF
513 |             b4 = ((left_idx >> 24) & 0xF) * 16 + ((right_idx >> 24) & 0xF)
514 |             b5 = (right_idx >> 16) & 0xFF
515 |             b6 = (right_idx >> 8) & 0xFF
516 |             b7 = right_idx & 0xFF
517 |             return struct.pack(">BBBBBBB", b1, b2, b3, b4, b5, b6, b7)
518 | 
519 |         elif self.record_size == 32:
520 |             return struct.pack(">II", left_idx, right_idx)
521 | 
522 |         else:
523 |             raise Exception("self.record_size > 32")
524 | 
525 |     def write(self, fname):
526 |         self._enumerate_nodes(self.tree)
527 |         self._adjust_record_size()
528 | 
529 |         with open(fname, "wb") as f:
530 |             for node in self._node_list:
531 |                 f.write(self._cal_node_bytes(node))
532 | 
533 |             f.write(b"\x00" * 16)
534 | 
535 |             for element in self._data_list:
536 |                 f.write(element)
537 | 
538 |             f.write(METADATA_MAGIC)
539 |             f.write(self.encoder_cls(cache=False).encode_meta(self._build_meta()))
540 | 
541 | 
542 | def bits_rstrip(n, length=None, keep=0):
543 |     return map(int, bin(n)[2:].rjust(length, "0")[:keep])
544 | 
545 | 
546 | class MMDBWriter:
547 |     def __init__(
548 |         self,
549 |         ip_version=4,
550 |         database_type="GeoIP",
551 |         languages: List[str] = None,
552 |         description: Union[Dict[str, str], str] = "GeoIP db",
553 |         ipv4_compatible=False,
554 |         int_type: IntType = "auto",
555 |         float_type: FloatType = "f64",
556 |     ):
557 |         """
558 |         Args:
559 |             ip_version: The IP version of the database. Defaults to 4.
560 |             database_type: The type of the database. Defaults to "GeoIP".
561 |             languages: A list of languages. Defaults to [].
562 |             description: A description of the database for every language.
563 |             ipv4_compatible: Whether the database is compatible with IPv4.
564 |             int_type: The type of integer to use. Defaults to "auto".
565 |             float_type: The type of float to use. Defaults to "f64".
566 | 
567 |         Note:
568 |             If you want to store an IPv4 address in an IPv6 database, you should set
569 |             ipv4_compatible=True.
570 | 
571 |             If you want to use a specific integer type, you can set int_type to
572 |             "u16", "u32", "u64", "u128", or "i32".
573 |         """
574 |         self.tree = SearchTreeNode()
575 |         self.ipv4_compatible = ipv4_compatible
576 | 
577 |         if languages is None:
578 |             languages = []
579 |         self.description = description
580 |         self.database_type = database_type
581 |         self.ip_version = ip_version
582 |         self.languages = languages
583 |         self.binary_format_major_version = 2
584 |         self.binary_format_minor_version = 0
585 | 
586 |         self._bit_length = 128 if ip_version == 6 else 32
587 | 
588 |         if ip_version not in [4, 6]:
589 |             raise ValueError(f"ip_version should be 4 or 6, {ip_version} is incorrect")
590 |         if ip_version == 4 and ipv4_compatible:
591 |             raise ValueError("ipv4_compatible=True can set when ip_version=6")
592 |         if not self.binary_format_major_version:
593 |             raise ValueError(
594 |                 f"major_version can't be empty or 0: {self.binary_format_major_version}"
595 |             )
596 |         if isinstance(description, str):
597 |             self.description = {i: description for i in languages}
598 |         for i in languages:
599 |             if i not in self.description:
600 |                 raise ValueError("language {} must have description!")
601 | 
602 |         self.int_type = int_type
603 |         self.float_type = float_type
604 | 
605 |     def insert_network(self, network: IPSet, content: MMDBType):
606 |         """
607 |         Inserts a network into the MaxMind database.
608 | 
609 |         Args:
610 |            network: The network to be inserted. It should be an instance of
611 |                     netaddr.IPSet.
612 |            content: The content associated with the network. It can be a
613 |                     dictionary, list, string, bytes, integer, or boolean.
614 | 
615 | 
616 |         Raises:
617 |            ValueError: If the network is not an instance of netaddr.IPSet.
618 |            ValueError: If an IPv6 address is inserted into an IPv4-only database.
619 |            ValueError: If an IPv4 address is inserted into an IPv6 database without
620 |                        setting ipv4_compatible=True.
621 | 
622 |         Note:
623 |            This method modifies the internal tree structure of the MMDBWriter instance.
624 |         """
625 |         leaf = SearchTreeLeaf(content)
626 |         if not isinstance(network, IPSet):
627 |             raise ValueError("network type should be netaddr.IPSet.")
628 |         network = network.iter_cidrs()
629 |         for cidr in network:
630 |             if self.ip_version == 4 and cidr.version == 6:
631 |                 raise ValueError(
632 |                     f"You inserted a IPv6 address {cidr} " "to an IPv4-only database."
633 |                 )
634 |             if self.ip_version == 6 and cidr.version == 4:
635 |                 if not self.ipv4_compatible:
636 |                     raise ValueError(
637 |                         f"You inserted a IPv4 address {cidr} to an IPv6 database."
638 |                         "Please use ipv4_compatible=True option store "
639 |                         "IPv4 address in IPv6 database as ::/96 format"
640 |                     )
641 |                 cidr = cidr.ipv6(True)
642 |             node = self.tree
643 |             bits = list(bits_rstrip(cidr.value, self._bit_length, cidr.prefixlen))
644 |             current_node = node
645 |             supernet_leaf = None  # Tracks whether we are inserting into a subnet
646 |             for index, ip_bit in enumerate(bits[:-1]):
647 |                 previous_node = current_node
648 |                 current_node = previous_node.get_or_create(ip_bit)
649 | 
650 |                 if isinstance(current_node, SearchTreeLeaf):
651 |                     current_cidr = IPNetwork(
652 |                         (
653 |                             int(
654 |                                 "".join(map(str, bits[: index + 1])).ljust(
655 |                                     self._bit_length, "0"
656 |                                 ),
657 |                                 2,
658 |                             ),
659 |                             index + 1,
660 |                         )
661 |                     )
662 |                     logger.info(
663 |                         f"Inserting {cidr} ({content}) into subnet of "
664 |                         f"{current_cidr} ({current_node.value})"
665 |                     )
666 |                     supernet_leaf = current_node
667 |                     current_node = SearchTreeNode()
668 |                     previous_node[ip_bit] = current_node
669 | 
670 |                 if supernet_leaf:
671 |                     next_bit = bits[index + 1]
672 |                     # Insert supernet information on each inverse bit of
673 |                     # the current subnet
674 |                     current_node[1 - next_bit] = supernet_leaf
675 |             current_node[bits[-1]] = leaf
676 | 
677 |     def to_db_file(self, filename: str):
678 |         return TreeWriter(
679 |             self.tree, self._build_meta(), self.int_type, self.float_type
680 |         ).write(filename)
681 | 
682 |     def _build_meta(self):
683 |         return {
684 |             "ip_version": self.ip_version,
685 |             "database_type": self.database_type,
686 |             "languages": self.languages,
687 |             "binary_format_major_version": self.binary_format_major_version,
688 |             "binary_format_minor_version": self.binary_format_minor_version,
689 |             "build_epoch": int(time.time()),
690 |             "description": self.description,
691 |         }
692 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["flit_core >=3.2,<4"]
 3 | build-backend = "flit_core.buildapi"
 4 | 
 5 | [project]
 6 | name = "mmdb_writer"
 7 | description = "Make `mmdb` format ip library file which can be read by maxmind official language reader"
 8 | readme = "README.md"
 9 | license = {file = "LICENSE"}
10 | requires-python = ">=3.8"
11 | keywords = ["mmdb", "maxmind"]
12 | authors = [{ name = "VimT", email = "me@vimt.me" } ]
13 | classifiers = [
14 |     "Development Status :: 5 - Production/Stable",
15 |     "Intended Audience :: Developers",
16 |     "License :: OSI Approved :: MIT License",
17 |     "Natural Language :: English",
18 |     "Operating System :: OS Independent",
19 |     "Programming Language :: Python",
20 |     "Programming Language :: Python :: 3",
21 |     "Programming Language :: Python :: 3 :: Only",
22 |     "Programming Language :: Python :: 3.8",
23 |     "Programming Language :: Python :: 3.9",
24 |     "Programming Language :: Python :: 3.10",
25 |     "Programming Language :: Python :: 3.11",
26 |     "Programming Language :: Python :: 3.12",
27 |     "Programming Language :: Python :: Implementation :: CPython",
28 |     "Programming Language :: Python :: Implementation :: PyPy",
29 |     "Topic :: Software Development :: Build Tools",
30 | ]
31 | dependencies = [
32 |     "netaddr>=0.7"
33 | ]
34 | dynamic = ["version"]
35 | 
36 | [project.optional-dependencies]
37 | test = [
38 |     "pytest >=2.7.3",
39 |     "pytest-cov",
40 |     "numpy",
41 |     "maxminddb>=1.5",
42 | ]
43 | dev = [
44 |     "ruff"
45 | ]
46 | 
47 | [project.urls]
48 | Home = "https://github.com/vimt/MaxMind-DB-Writer-python"
49 | Source = "https://github.com/vimt/MaxMind-DB-Writer-python"
50 | Tracker = "https://github.com/vimt/MaxMind-DB-Writer-python/issues"
51 | 
52 | [tool.flit.sdist]
53 | include = ["mmdb_writer.py"]
54 | 
55 | [tool.pytest.ini_options]
56 | testpaths = ["tests"]
57 | filterwarnings = [
58 |     "error",
59 | ]
60 | 
61 | [tool.ruff]
62 | fix = true
63 | show-fixes = true
64 | output-format = "full"
65 | 
66 | [tool.ruff.lint]
67 | select = [
68 |     "B",  # flake8-bugbear
69 |     "E",  # pycodestyle error
70 |     "F",  # pyflakes
71 |     "I",  # isort
72 |     "UP",  # pyupgrade
73 |     "W",  # pycodestyle warning
74 | ]
75 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vimt/MaxMind-DB-Writer-python/70f8f2ad6b91db91e9d7b977268a914a888f089a/tests/__init__.py


--------------------------------------------------------------------------------
/tests/clients/go/go.mod:
--------------------------------------------------------------------------------
1 | module mmdb-test
2 | 
3 | go 1.22
4 | 
5 |         require github.com/oschwald/maxminddb-golang v1.12.0
6 | 
7 |         require golang.org/x/sys v0.10.0 // indirect
8 | 


--------------------------------------------------------------------------------
/tests/clients/go/go.sum:
--------------------------------------------------------------------------------
 1 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 2 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 3 | github.com/oschwald/maxminddb-golang v1.12.0 h1:9FnTOD0YOhP7DGxGsq4glzpGy5+w7pq50AS6wALUMYs=
 4 | github.com/oschwald/maxminddb-golang v1.12.0/go.mod h1:q0Nob5lTCqyQ8WT6FYgS1L7PXKVVbgiymefNwIjPzgY=
 5 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 6 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 7 | github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
 8 | github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
 9 | golang.org/x/sys v0.10.0 h1:SqMFp9UcQJZa+pmYuAKjd9xq1f0j5rLcDIk0mj4qAsA=
10 | golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
11 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
12 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
13 | 


--------------------------------------------------------------------------------
/tests/clients/go/main.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"encoding/json"
 5 | 	"flag"
 6 | 	"fmt"
 7 | 	"github.com/oschwald/maxminddb-golang"
 8 | 	"log"
 9 | 	"math/big"
10 | 	"net"
11 | 	"os"
12 | )
13 | 
14 | var (
15 | 	db = flag.String("db", "", "Path to the MaxMind DB file")
16 | 	ip = flag.String("ip", "", "IP address to look up")
17 | )
18 | 
19 | type Record struct {
20 | 	I32    int            `json:"i32" maxminddb:"i32"`
21 | 	F32    float32        `json:"f32" maxminddb:"f32"`
22 | 	F64    float64        `json:"f64" maxminddb:"f64"`
23 | 	U16    uint16         `json:"u16" maxminddb:"u16"`
24 | 	U32    uint32         `json:"u32" maxminddb:"u32"`
25 | 	U64    uint64         `json:"u64" maxminddb:"u64"`
26 | 	U128   *big.Int       `json:"u128" maxminddb:"u128"`
27 | 	Array  []any          `json:"array" maxminddb:"array"`
28 | 	Map    map[string]any `json:"map" maxminddb:"map"`
29 | 	Bytes  []byte         `json:"bytes" maxminddb:"bytes"`
30 | 	String string         `json:"string" maxminddb:"string"`
31 | 	Bool   bool           `json:"bool" maxminddb:"bool"`
32 | }
33 | 
34 | func main() {
35 | 	flag.Parse()
36 | 	if *db == "" || *ip == "" {
37 | 		flag.PrintDefaults()
38 | 		os.Exit(1)
39 | 	}
40 | 	db, err := maxminddb.Open(*db)
41 | 	if err != nil {
42 | 		log.Fatal(err)
43 | 	}
44 | 	defer db.Close()
45 | 
46 | 	ip := net.ParseIP(*ip)
47 | 
48 | 	var record Record
49 | 
50 | 	err = db.Lookup(ip, &record)
51 | 	if err != nil {
52 | 		log.Panic(err)
53 | 	}
54 | 	data, err := json.Marshal(record)
55 | 	if err != nil {
56 | 		log.Panic(err)
57 | 	}
58 | 	fmt.Println(string(data))
59 | }
60 | 


--------------------------------------------------------------------------------
/tests/clients/java/.gitignore:
--------------------------------------------------------------------------------
 1 | target/
 2 | !.mvn/wrapper/maven-wrapper.jar
 3 | !**/src/main/**/target/
 4 | !**/src/test/**/target/
 5 | 
 6 | ### IntelliJ IDEA ###
 7 | .idea/modules.xml
 8 | .idea/jarRepositories.xml
 9 | .idea/compiler.xml
10 | .idea/libraries/
11 | *.iws
12 | *.iml
13 | *.ipr
14 | 
15 | ### Eclipse ###
16 | .apt_generated
17 | .classpath
18 | .factorypath
19 | .project
20 | .settings
21 | .springBeans
22 | .sts4-cache
23 | 
24 | ### NetBeans ###
25 | /nbproject/private/
26 | /nbbuild/
27 | /dist/
28 | /nbdist/
29 | /.nb-gradle/
30 | build/
31 | !**/src/main/**/build/
32 | !**/src/test/**/build/
33 | 
34 | ### VS Code ###
35 | .vscode/
36 | 
37 | ### Mac OS ###
38 | .DS_Store


--------------------------------------------------------------------------------
/tests/clients/java/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0"
 3 |          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 4 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 5 |     <modelVersion>4.0.0</modelVersion>
 6 | 
 7 |     <groupId>me.vime</groupId>
 8 |     <artifactId>mmdb-test</artifactId>
 9 |     <version>1.0-SNAPSHOT</version>
10 | 
11 |     <properties>
12 |         <maven.compiler.source>22</maven.compiler.source>
13 |         <maven.compiler.target>22</maven.compiler.target>
14 |         <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
15 |     </properties>
16 |     <dependencies>
17 |         <dependency>
18 |             <groupId>com.maxmind.db</groupId>
19 |             <artifactId>maxmind-db</artifactId>
20 |             <version>3.1.0</version>
21 |         </dependency>
22 |         <dependency>
23 |             <groupId>args4j</groupId>
24 |             <artifactId>args4j</artifactId>
25 |             <version>2.33</version>
26 |         </dependency>
27 |         <dependency>
28 |             <groupId>com.google.code.gson</groupId>
29 |             <artifactId>gson</artifactId>
30 |             <version>2.10.1</version>
31 |         </dependency>
32 |     </dependencies>
33 |     <build>
34 |         <finalName>${project.artifactId}</finalName>
35 |         <plugins>
36 |             <plugin>
37 |                 <groupId>org.apache.maven.plugins</groupId>
38 |                 <artifactId>maven-assembly-plugin</artifactId>
39 |                 <executions>
40 |                     <execution>
41 |                         <phase>package</phase>
42 |                         <goals>
43 |                             <goal>single</goal>
44 |                         </goals>
45 |                         <configuration>
46 |                             <archive>
47 |                                 <manifest>
48 |                                     <mainClass>
49 |                                         Main
50 |                                     </mainClass>
51 |                                 </manifest>
52 |                             </archive>
53 |                             <descriptorRefs>
54 |                                 <descriptorRef>jar-with-dependencies</descriptorRef>
55 |                             </descriptorRefs>
56 |                         </configuration>
57 |                     </execution>
58 |                 </executions>
59 |             </plugin>
60 |         </plugins>
61 |     </build>
62 | </project>


--------------------------------------------------------------------------------
/tests/clients/java/src/main/java/Main.java:
--------------------------------------------------------------------------------
 1 | import com.google.gson.Gson;
 2 | import com.maxmind.db.MaxMindDbConstructor;
 3 | import com.maxmind.db.MaxMindDbParameter;
 4 | import com.maxmind.db.Reader;
 5 | import org.kohsuke.args4j.CmdLineParser;
 6 | import org.kohsuke.args4j.Option;
 7 | 
 8 | import java.io.File;
 9 | import java.io.IOException;
10 | import java.math.BigInteger;
11 | import java.net.InetAddress;
12 | import java.util.List;
13 | import java.util.Map;
14 | 
15 | public class Main {
16 |     @Option(name = "-db", usage = "Path to the MMDB file", required = true)
17 |     private String databasePath;
18 | 
19 |     @Option(name = "-ip", usage = "IP address to lookup", required = true)
20 |     private String ipAddress;
21 | 
22 |     public static void main(String[] args) throws Exception {
23 |         Main lookup = new Main();
24 |         CmdLineParser parser = new CmdLineParser(lookup);
25 |         parser.parseArgument(args);
26 | 
27 |         lookup.run();
28 |     }
29 | 
30 |     public void run() throws IOException {
31 |         File database = new File(databasePath);
32 |         Gson gson = new Gson();
33 | 
34 |         try (Reader reader = new Reader(database)) {
35 |             InetAddress address = InetAddress.getByName(ipAddress);
36 | 
37 |             Record result = reader.get(address, Record.class);
38 |             String jsonResult = gson.toJson(result);
39 |             System.out.println(jsonResult);
40 |         }
41 |     }
42 | 
43 | 
44 |     public static class Record {
45 |         private Integer i32;
46 |         private Float f32;
47 |         private Double f64;
48 |         private Integer u16;
49 |         private Long u32;
50 |         private BigInteger u64;
51 |         private BigInteger u128;
52 |         private List<Object> array;
53 |         private Map<String, Object> map;
54 |         private byte[] bytes;
55 |         private String string;
56 |         private Boolean bool;
57 | 
58 |         @MaxMindDbConstructor
59 |         public Record(
60 |                 @MaxMindDbParameter(name = "i32") Integer i32,
61 |                 @MaxMindDbParameter(name = "f32") Float f32,
62 |                 @MaxMindDbParameter(name = "f64") Double f64,
63 |                 @MaxMindDbParameter(name = "u16") Integer u16,
64 |                 @MaxMindDbParameter(name = "u32") Long u32,
65 |                 @MaxMindDbParameter(name = "u64") BigInteger u64,
66 |                 @MaxMindDbParameter(name = "u128") BigInteger u128,
67 |                 @MaxMindDbParameter(name = "array") List<Object> array,
68 |                 @MaxMindDbParameter(name = "map") Map<String, Object> map,
69 |                 @MaxMindDbParameter(name = "bytes") byte[] bytes,
70 |                 @MaxMindDbParameter(name = "string") String string,
71 |                 @MaxMindDbParameter(name = "bool") Boolean bool
72 |         ) {
73 |             this.i32 = i32;
74 |             this.f32 = f32;
75 |             this.f64 = f64;
76 |             this.u16 = u16;
77 |             this.u32 = u32;
78 |             this.u64 = u64;
79 |             this.u128 = u128;
80 |             this.array = array;
81 |             this.map = map;
82 |             this.bytes = bytes;
83 |             this.string = string;
84 |             this.bool = bool;
85 |         }
86 |     }
87 | }
88 | 


--------------------------------------------------------------------------------
/tests/record.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | from dataclasses import dataclass
  3 | 
  4 | import numpy as np
  5 | 
  6 | from mmdb_writer import MmdbF32, MmdbF64, MmdbI32, MmdbU16, MmdbU32, MmdbU64, MmdbU128
  7 | 
  8 | 
  9 | def random_str(length=10):
 10 |     return "".join(random.choices("abc中文", k=length))
 11 | 
 12 | 
 13 | def random_bytes(length=10):
 14 |     return bytes(random.choices(range(256), k=length))
 15 | 
 16 | 
 17 | def random_i32():
 18 |     return MmdbI32(random.randint(-(2**31), 0))
 19 | 
 20 | 
 21 | def random_f32():
 22 |     return MmdbF32(np.float32(random.random()))
 23 | 
 24 | 
 25 | def random_f64():
 26 |     return MmdbF64(random.random() * 1e128)
 27 | 
 28 | 
 29 | def random_u16():
 30 |     return MmdbU16(random.randint(0, 2**16 - 1))
 31 | 
 32 | 
 33 | def random_u32():
 34 |     return MmdbU32(random.randint(2**16, 2**32 - 1))
 35 | 
 36 | 
 37 | def random_u64():
 38 |     return MmdbU64(random.randint(2**32, 2**64 - 1))
 39 | 
 40 | 
 41 | def random_u128():
 42 |     return MmdbU128(random.randint(2**64, 2**128 - 1))
 43 | 
 44 | 
 45 | def random_array(length=10, nested_type=False):
 46 |     return [random_any(nested_type) for _ in range(length)]
 47 | 
 48 | 
 49 | def random_map(length=10, nested_type=False):
 50 |     return {random_str(): random_any(nested_type) for _ in range(length)}
 51 | 
 52 | 
 53 | def random_bool():
 54 |     return random.choice([True, False])
 55 | 
 56 | 
 57 | def random_any(nested_type=False):
 58 |     return random.choice(
 59 |         [
 60 |             random_i32,
 61 |             random_f32,
 62 |             random_f64,
 63 |             random_u16,
 64 |             random_u32,
 65 |             random_u64,
 66 |             random_u128,
 67 |             random_bytes,
 68 |             random_str,
 69 |             random_bool,
 70 |             *([random_array, random_map] if nested_type else []),
 71 |         ]
 72 |     )()
 73 | 
 74 | 
 75 | @dataclass
 76 | class Record:
 77 |     i32: MmdbI32
 78 |     f32: MmdbF32
 79 |     f64: MmdbF64
 80 |     u16: MmdbU16
 81 |     u32: MmdbU32
 82 |     u64: MmdbU64
 83 |     u128: MmdbU128
 84 |     array: list
 85 |     map: dict
 86 |     bytes: bytes
 87 |     string: str
 88 |     bool: bool
 89 | 
 90 |     @staticmethod
 91 |     def random():
 92 |         return Record(
 93 |             i32=random_i32(),
 94 |             f32=random_f32(),
 95 |             f64=random_f64(),
 96 |             u16=random_u16(),
 97 |             u32=random_u32(),
 98 |             u64=random_u64(),
 99 |             u128=random_u128(),
100 |             array=random_array(5, True),
101 |             map=random_map(5, True),
102 |             bytes=random_bytes(),
103 |             string=random_str(),
104 |             bool=random_bool(),
105 |         )
106 | 
107 |     def dict(self):
108 |         return self.__dict__
109 | 


--------------------------------------------------------------------------------
/tests/test_api.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os.path
 3 | import random
 4 | import struct
 5 | import unittest
 6 | 
 7 | import maxminddb
 8 | from netaddr import IPSet
 9 | 
10 | from mmdb_writer import MmdbI32, MmdbU16, MmdbU32, MmdbU64, MmdbU128, MMDBWriter
11 | 
12 | logging.basicConfig(
13 |     format="[%(asctime)s: %(levelname)s] %(message)s", level=logging.INFO
14 | )
15 | record1 = {"country": "c1", "isp": "ISP1"}
16 | record2 = {"country": "c2", "isp": "ISP2"}
17 | 
18 | 
19 | class TestBuild(unittest.TestCase):
20 |     def setUp(self) -> None:
21 |         self.filename = "_test.mmdb"
22 | 
23 |     def tearDown(self) -> None:
24 |         if os.path.exists(self.filename):
25 |             os.remove(self.filename)
26 | 
27 |     def test_metadata(self):
28 |         ip_version = 6
29 |         database_type = "test_database_type"
30 |         languages = ["en", "ch"]
31 |         description = {"en": "en test", "ch": "ch test"}
32 |         writer = MMDBWriter(
33 |             ip_version=ip_version,
34 |             database_type=database_type,
35 |             languages=languages,
36 |             description=description,
37 |             ipv4_compatible=False,
38 |         )
39 |         writer.to_db_file(self.filename)
40 |         for mode in (maxminddb.MODE_MMAP_EXT, maxminddb.MODE_MMAP, maxminddb.MODE_FILE):
41 |             m = maxminddb.open_database(self.filename, mode=mode)
42 |             self.assertEqual(ip_version, m.metadata().ip_version, mode)
43 |             self.assertEqual(database_type, m.metadata().database_type, mode)
44 |             self.assertEqual(languages, m.metadata().languages, mode)
45 |             self.assertEqual(description, m.metadata().description, mode)
46 |             m.close()
47 | 
48 |     def test_4in6(self):
49 |         writer = MMDBWriter(ip_version=6, ipv4_compatible=True)
50 |         writer.insert_network(IPSet(["1.1.0.0/24"]), record1)
51 |         writer.insert_network(IPSet(["fe80::/16"]), record2)
52 |         writer.to_db_file(self.filename)
53 |         for mode in (maxminddb.MODE_MMAP_EXT, maxminddb.MODE_MMAP, maxminddb.MODE_FILE):
54 |             m = maxminddb.open_database(self.filename, mode=mode)
55 |             self.assertEqual(record1, m.get("1.1.0.1"), mode)
56 |             self.assertEqual(record2, m.get("fe80::1"), mode)
57 |             m.close()
58 | 
59 |     def test_insert_subnet(self):
60 |         writer = MMDBWriter()
61 |         writer.insert_network(IPSet(["1.0.0.0/8"]), record1)
62 |         writer.insert_network(IPSet(["1.10.10.0/24"]), record2)
63 |         writer.to_db_file(self.filename)
64 |         for mode in (maxminddb.MODE_MMAP_EXT, maxminddb.MODE_MMAP, maxminddb.MODE_FILE):
65 |             m = maxminddb.open_database(self.filename, mode=mode)
66 |             self.assertEqual(record1, m.get("1.1.0.1"), mode)
67 |             self.assertEqual(record1, m.get("1.10.0.1"), mode)
68 |             self.assertEqual(record2, m.get("1.10.10.1"), mode)
69 |             m.close()
70 | 
71 |     def test_int_type(self):
72 |         value_range_map = {}
73 |         value_range_map.update(
74 |             {k: (-(2**31), 2**31 - 1) for k in ("i32", "int32", MmdbI32)}
75 |         )
76 |         value_range_map.update({k: (0, 2**16 - 1) for k in ("u16", "uint16", MmdbU16)})
77 |         value_range_map.update({k: (0, 2**32 - 1) for k in ("u32", "uint32", MmdbU32)})
78 |         value_range_map.update({k: (0, 2**64 - 1) for k in ("u64", "uint64", MmdbU64)})
79 |         value_range_map.update(
80 |             {k: (0, 2**128 - 1) for k in ("u128", "uint128", MmdbU128)}
81 |         )
82 | 
83 |         for int_type, value_range in value_range_map.items():
84 |             writer = MMDBWriter(int_type=int_type)
85 | 
86 |             (start, end) = value_range
87 |             ok_value = random.randint(start, end)
88 |             bad_value1 = random.randint(end + 1, end + 2**16)
89 |             bad_value2 = random.randint(start - 2**16, start - 1)
90 |             writer.insert_network(IPSet(["1.0.0.0/8"]), {"value": ok_value})
91 |             writer.to_db_file(self.filename)
92 |             for bad_value in (bad_value1, bad_value2):
93 |                 writer.insert_network(IPSet(["1.0.0.0/8"]), {"value": bad_value})
94 |                 with self.assertRaises((ValueError, struct.error)):
95 |                     writer.to_db_file(self.filename)
96 | 


--------------------------------------------------------------------------------
/tests/test_clients.py:
--------------------------------------------------------------------------------
  1 | import base64
  2 | import json
  3 | import logging
  4 | import os
  5 | import subprocess
  6 | import unittest
  7 | from pathlib import Path
  8 | 
  9 | import maxminddb
 10 | from netaddr.ip.sets import IPSet
 11 | 
 12 | from mmdb_writer import MmdbBaseType, MmdbF32, MMDBWriter
 13 | from tests.record import Record
 14 | 
 15 | logging.basicConfig(
 16 |     format="[%(asctime)s: %(levelname)s] %(message)s", level=logging.INFO
 17 | )
 18 | logger = logging.getLogger(__name__)
 19 | 
 20 | BASE_DIR = Path(__file__).parent.absolute()
 21 | 
 22 | 
 23 | def run(command: list):
 24 |     print(f"Running command: {command}")
 25 |     result = subprocess.run(command, check=True, stdout=subprocess.PIPE)
 26 |     return result.stdout
 27 | 
 28 | 
 29 | class TestClients(unittest.TestCase):
 30 |     def setUp(self) -> None:
 31 |         self.filepath = Path("_test.mmdb").absolute()
 32 |         self.filepath.unlink(True)
 33 |         self.ip = "1.1.1.1"
 34 |         self.origin_data = Record.random()
 35 |         self.generate_mmdb()
 36 |         self.maxDiff = None
 37 | 
 38 |     def tearDown(self) -> None:
 39 |         self.filepath.unlink(True)
 40 | 
 41 |     def generate_mmdb(self):
 42 |         ip_version = 4
 43 |         database_type = "test_client"
 44 |         languages = ["en"]
 45 |         description = {"en": "for testing purposes only"}
 46 |         writer = MMDBWriter(
 47 |             ip_version=ip_version,
 48 |             database_type=database_type,
 49 |             languages=languages,
 50 |             description=description,
 51 |             ipv4_compatible=False,
 52 |         )
 53 | 
 54 |         writer.insert_network(IPSet(["1.0.0.0/8"]), self.origin_data.dict())
 55 | 
 56 |         # insert other useless record
 57 |         for i in range(2, 250):
 58 |             info = Record.random()
 59 |             writer.insert_network(IPSet([f"{i}.0.0.0/8"]), info.dict())
 60 | 
 61 |         writer.to_db_file(str(self.filepath))
 62 | 
 63 |     @staticmethod
 64 |     def convert_bytes(d, bytes_convert, f32_convert=lambda x: float(str(x))):
 65 |         def inner(d):
 66 |             if isinstance(d, bytes):
 67 |                 return bytes_convert(d)
 68 |             elif isinstance(d, dict):
 69 |                 return {k: inner(v) for k, v in d.items()}
 70 |             elif isinstance(d, list):
 71 |                 return [inner(i) for i in d]
 72 |             elif isinstance(d, MmdbF32):
 73 |                 return f32_convert(d.value)
 74 |             elif isinstance(d, MmdbBaseType):
 75 |                 return d.value
 76 |             else:
 77 |                 return d
 78 | 
 79 |         return inner(d)
 80 | 
 81 |     def test_python(self):
 82 |         for mode in (maxminddb.MODE_MMAP_EXT, maxminddb.MODE_MMAP, maxminddb.MODE_FILE):
 83 |             m = maxminddb.open_database(self.filepath, mode=mode)
 84 |             python_data = m.get(self.ip)
 85 |             should_data = self.origin_data.dict()
 86 |             should_data = self.convert_bytes(
 87 |                 should_data, lambda x: bytearray(x), lambda x: float(x)
 88 |             )
 89 |             self.assertDictEqual(should_data, python_data)
 90 |             m.close()
 91 | 
 92 |     def test_java(self):
 93 |         java_dir = BASE_DIR / "clients" / "java"
 94 |         self.assertTrue(java_dir.exists())
 95 |         os.chdir(java_dir)
 96 |         run(["mvn", "clean", "package"])
 97 |         java_data_str = run(
 98 |             [
 99 |                 "java",
100 |                 "-jar",
101 |                 "target/mmdb-test-jar-with-dependencies.jar",
102 |                 "-db",
103 |                 str(self.filepath),
104 |                 "-ip",
105 |                 self.ip,
106 |             ]
107 |         )
108 |         java_data = json.loads(java_data_str)
109 |         should_data = self.origin_data.dict()
110 | 
111 |         # java bytes marshal as i8 list
112 |         should_data = self.convert_bytes(
113 |             should_data, lambda x: [i if i <= 127 else i - 256 for i in x]
114 |         )
115 |         self.assertDictEqual(should_data, java_data)
116 | 
117 |     def test_go(self):
118 |         go_dir = BASE_DIR / "clients" / "go"
119 |         self.assertTrue(go_dir.exists())
120 |         os.chdir(go_dir)
121 |         go_data_str = run(
122 |             ["go", "run", "main.go", "-db", str(self.filepath), "-ip", self.ip]
123 |         )
124 |         go_data = json.loads(go_data_str)
125 | 
126 |         should_data = self.origin_data.dict()
127 |         # go bytes marshal as base64 str
128 |         should_data = self.convert_bytes(
129 |             should_data, lambda x: base64.b64encode(x).decode()
130 |         )
131 |         self.assertDictEqual(should_data, go_data)
132 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
 1 | [tox]
 2 | envlist =
 3 |     py3{13,12,11,10,9,8}
 4 |     lint
 5 | skip_missing_interpreters = true
 6 | 
 7 | [testenv]
 8 | description = run unit tests
 9 | extras = test
10 | commands = pytest
11 | 
12 | [testenv:lint]
13 | extras = dev
14 | commands =
15 |     ruff check --no-fix
16 |     ruff format --check
17 | 


--------------------------------------------------------------------------------