├── .github
└── workflows
│ └── actions.yml
├── LICENSE
├── README.md
├── data
└── geoip2-ipv4.csv
├── datapackage.json
└── scripts
├── process.py
└── requirements.txt
/.github/workflows/actions.yml:
--------------------------------------------------------------------------------
1 | name: Automated geoip2-ipv4 update
2 |
3 | on:
4 | schedule:
5 | # Run weekly
6 | - cron: '0 0 * * 0'
7 | push:
8 | branches:
9 | - main
10 | pull_request:
11 | branches:
12 | - main
13 | workflow_dispatch:
14 |
15 | jobs:
16 | build:
17 | runs-on: ubuntu-latest
18 |
19 | if: github.ref == 'refs/heads/main'
20 |
21 | steps:
22 | - name: Check out repository
23 | uses: actions/checkout@v3
24 |
25 | - name: Set up Python 3.12
26 | uses: actions/setup-python@v4
27 | with:
28 | python-version: '3.12'
29 |
30 | - name: Install dependencies
31 | run: |
32 | pip install -r scripts/requirements.txt
33 |
34 | - name: Run geoip2-ipv4 Update Script
35 | env:
36 | MAXMIND_USERNAME: ${{ secrets.MAXMIND_USERNAME }}
37 | MAXMIND_PASSWORD: ${{ secrets.MAXMIND_PASSWORD }} # Retrieve API key from GitHub Secrets
38 | run: |
39 | python scripts/process.py
40 |
41 | - name: Configure Git
42 | run: |
43 | git config --global user.email "${{ env.CI_COMMIT_EMAIL }}"
44 | git config --global user.name "${{ env.CI_COMMIT_NAME }}"
45 | env:
46 | CI_COMMIT_NAME: "Automated commit"
47 | CI_COMMIT_EMAIL: "actions@users.noreply.github.com"
48 |
49 | - name: Commit and Push changes
50 | run: |
51 | git diff --quiet && echo "No changes to commit" || (
52 | git add data/ &&
53 | git commit -m "${{ env.CI_COMMIT_MESSAGE }}" &&
54 | git push origin main
55 | )
56 | env:
57 | CI_COMMIT_MESSAGE: "Automated commit"
58 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | CC0 1.0 Universal
2 |
3 | Statement of Purpose
4 |
5 | The laws of most jurisdictions throughout the world automatically confer
6 | exclusive Copyright and Related Rights (defined below) upon the creator and
7 | subsequent owner(s) (each and all, an "owner") of an original work of
8 | authorship and/or a database (each, a "Work").
9 |
10 | Certain owners wish to permanently relinquish those rights to a Work for the
11 | purpose of contributing to a commons of creative, cultural and scientific
12 | works ("Commons") that the public can reliably and without fear of later
13 | claims of infringement build upon, modify, incorporate in other works, reuse
14 | and redistribute as freely as possible in any form whatsoever and for any
15 | purposes, including without limitation commercial purposes. These owners may
16 | contribute to the Commons to promote the ideal of a free culture and the
17 | further production of creative, cultural and scientific works, or to gain
18 | reputation or greater distribution for their Work in part through the use and
19 | efforts of others.
20 |
21 | For these and/or other purposes and motivations, and without any expectation
22 | of additional consideration or compensation, the person associating CC0 with a
23 | Work (the "Affirmer"), to the extent that he or she is an owner of Copyright
24 | and Related Rights in the Work, voluntarily elects to apply CC0 to the Work
25 | and publicly distribute the Work under its terms, with knowledge of his or her
26 | Copyright and Related Rights in the Work and the meaning and intended legal
27 | effect of CC0 on those rights.
28 |
29 | 1. Copyright and Related Rights. A Work made available under CC0 may be
30 | protected by copyright and related or neighboring rights ("Copyright and
31 | Related Rights"). Copyright and Related Rights include, but are not limited
32 | to, the following:
33 |
34 | i. the right to reproduce, adapt, distribute, perform, display, communicate,
35 | and translate a Work;
36 |
37 | ii. moral rights retained by the original author(s) and/or performer(s);
38 |
39 | iii. publicity and privacy rights pertaining to a person's image or likeness
40 | depicted in a Work;
41 |
42 | iv. rights protecting against unfair competition in regards to a Work,
43 | subject to the limitations in paragraph 4(a), below;
44 |
45 | v. rights protecting the extraction, dissemination, use and reuse of data in
46 | a Work;
47 |
48 | vi. database rights (such as those arising under Directive 96/9/EC of the
49 | European Parliament and of the Council of 11 March 1996 on the legal
50 | protection of databases, and under any national implementation thereof,
51 | including any amended or successor version of such directive); and
52 |
53 | vii. other similar, equivalent or corresponding rights throughout the world
54 | based on applicable law or treaty, and any national implementations thereof.
55 |
56 | 2. Waiver. To the greatest extent permitted by, but not in contravention of,
57 | applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and
58 | unconditionally waives, abandons, and surrenders all of Affirmer's Copyright
59 | and Related Rights and associated claims and causes of action, whether now
60 | known or unknown (including existing as well as future claims and causes of
61 | action), in the Work (i) in all territories worldwide, (ii) for the maximum
62 | duration provided by applicable law or treaty (including future time
63 | extensions), (iii) in any current or future medium and for any number of
64 | copies, and (iv) for any purpose whatsoever, including without limitation
65 | commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes
66 | the Waiver for the benefit of each member of the public at large and to the
67 | detriment of Affirmer's heirs and successors, fully intending that such Waiver
68 | shall not be subject to revocation, rescission, cancellation, termination, or
69 | any other legal or equitable action to disrupt the quiet enjoyment of the Work
70 | by the public as contemplated by Affirmer's express Statement of Purpose.
71 |
72 | 3. Public License Fallback. Should any part of the Waiver for any reason be
73 | judged legally invalid or ineffective under applicable law, then the Waiver
74 | shall be preserved to the maximum extent permitted taking into account
75 | Affirmer's express Statement of Purpose. In addition, to the extent the Waiver
76 | is so judged Affirmer hereby grants to each affected person a royalty-free,
77 | non transferable, non sublicensable, non exclusive, irrevocable and
78 | unconditional license to exercise Affirmer's Copyright and Related Rights in
79 | the Work (i) in all territories worldwide, (ii) for the maximum duration
80 | provided by applicable law or treaty (including future time extensions), (iii)
81 | in any current or future medium and for any number of copies, and (iv) for any
82 | purpose whatsoever, including without limitation commercial, advertising or
83 | promotional purposes (the "License"). The License shall be deemed effective as
84 | of the date CC0 was applied by Affirmer to the Work. Should any part of the
85 | License for any reason be judged legally invalid or ineffective under
86 | applicable law, such partial invalidity or ineffectiveness shall not
87 | invalidate the remainder of the License, and in such case Affirmer hereby
88 | affirms that he or she will not (i) exercise any of his or her remaining
89 | Copyright and Related Rights in the Work or (ii) assert any associated claims
90 | and causes of action with respect to the Work, in either case contrary to
91 | Affirmer's express Statement of Purpose.
92 |
93 | 4. Limitations and Disclaimers.
94 |
95 | a. No trademark or patent rights held by Affirmer are waived, abandoned,
96 | surrendered, licensed or otherwise affected by this document.
97 |
98 | b. Affirmer offers the Work as-is and makes no representations or warranties
99 | of any kind concerning the Work, express, implied, statutory or otherwise,
100 | including without limitation warranties of title, merchantability, fitness
101 | for a particular purpose, non infringement, or the absence of latent or
102 | other defects, accuracy, or the present or absence of errors, whether or not
103 | discoverable, all to the greatest extent permissible under applicable law.
104 |
105 | c. Affirmer disclaims responsibility for clearing rights of other persons
106 | that may apply to the Work or any use thereof, including without limitation
107 | any person's Copyright and Related Rights in the Work. Further, Affirmer
108 | disclaims responsibility for obtaining any necessary consents, permissions
109 | or other rights required for any use of the Work.
110 |
111 | d. Affirmer understands and acknowledges that Creative Commons is not a
112 | party to this document and has no duty or obligation with respect to this
113 | CC0 or use of the Work.
114 |
115 | For more information, please see
116 |
117 |
118 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | Database of IPv4 address networks with their respective geographical location.
4 |
5 | ## Data
6 |
7 | Based on GeoLite2 Country Free Downloadable Databases as of Apr 21, 2015 http://dev.maxmind.com/geoip/geoip2/geolite2/
8 |
9 | Two files were used to generate this dataset:
10 |
11 | ```
12 | GeoLite2-Country-Blocks-IPv4.csv
13 | GeoLite2-Country-Locations-en.csv
14 | ```
15 |
16 | with the following considerations:
17 |
18 | - Where geoname_id was not available, registered_country_geoname_id was used.
19 | - Where geoname_id and registered_country_geoname_id where empty, geoname_id, continent_code, continent_name, country_iso_code and country_name are empty.
20 |
21 | ## Preparation
22 |
23 | Process is recorded and automated in python script:
24 |
25 | ```bash
26 | # Install the requirements
27 | pip install -r scripts/requirements.txt
28 |
29 | # Make sure to add your own Account_ID and License_key password as env variables
30 | username = os.getenv('MAXMIND_USERNAME')
31 | password = os.getenv('MAXMIND_PASSWORD')
32 |
33 | # Run the code
34 | python scripts/process.py
35 | ```
36 |
37 | ## Automation
38 | Up-to-date (auto-updates every week) geoip2-ipv4 dataset could be found on the datahub.io: https://datahub.io/core/geoip2-ipv4
39 |
40 | ## License
41 |
42 | Datapackage: Creative Commons Zero
43 |
44 | Original CSV: This dataset includes GeoLite2 data created by MaxMind, available from www.maxmind.com
45 |
--------------------------------------------------------------------------------
/datapackage.json:
--------------------------------------------------------------------------------
1 | {
2 | "title": "IPv4 geolocation",
3 | "name": "geoip2-ipv4",
4 | "licenses": [
5 | {
6 | "name": "odc-pddl",
7 | "path": "http://opendatacommons.org/licenses/pddl/",
8 | "title": "Public Domain Dedication and License"
9 | }
10 | ],
11 | "sources": [
12 | {
13 | "title": "Maxmind GeoLite2",
14 | "path": "http://dev.maxmind.com/geoip/geoip2/geolite2/"
15 | }
16 | ],
17 | "contributors": [
18 | {
19 | "name": "Eduardo Bejar",
20 | "email": "edobejar@gmail.com",
21 | "path": "http://www.fundapi.org",
22 | "role": "maintainer"
23 | }
24 | ],
25 | "resources": [
26 | {
27 | "name": "geoip2-ipv4",
28 | "path": "data/geoip2-ipv4.csv",
29 | "format": "csv",
30 | "mediatype": "text/csv",
31 | "schema": {
32 | "fields": [
33 | {
34 | "name": "network",
35 | "description": "This is the IPv4 network in CIDR format such as 2.21.92.0/29.",
36 | "type": "string"
37 | },
38 | {
39 | "name": "geoname_id",
40 | "description": "A unique identifier for the network's location as specified by GeoNames.",
41 | "type": "integer"
42 | },
43 | {
44 | "name": "continent_code",
45 | "description": "The continent code for this IP. Possible codes are: AF - Africa, AS - Asia, EU - Europe, NA - North America, OC - Oceania, SA - South America",
46 | "type": "string"
47 | },
48 | {
49 | "name": "continent_name",
50 | "description": "The continent name for this location",
51 | "type": "string"
52 | },
53 | {
54 | "name": "country_iso_code",
55 | "description": "A two-character ISO 3166-1 country code for the country associated with the location.",
56 | "type": "string"
57 | },
58 | {
59 | "name": "country_name",
60 | "description": "The country name for this location.",
61 | "type": "string"
62 | },
63 | {
64 | "name": "is_anonymous_proxy",
65 | "description": "A 1 if the network is an anonymous proxy, otherwise 0.",
66 | "type": "boolean"
67 | },
68 | {
69 | "name": "is_satellite_provider",
70 | "description": "A 1 if the network is for a satellite provider that provides service to multiple countries, otherwise 0.",
71 | "type": "boolean"
72 | }
73 | ]
74 | }
75 | }
76 | ],
77 | "collection": "reference-data"
78 | }
--------------------------------------------------------------------------------
/scripts/process.py:
--------------------------------------------------------------------------------
1 | import os
2 | import shutil
3 | import zipfile
4 | import requests
5 | import pandas as pd
6 |
7 | perm_URL = 'https://download.maxmind.com/geoip/databases/GeoLite2-Country-CSV/download?suffix=zip'
8 |
9 | ## Register at the MaxMind website to get the license key for account_id and license_key
10 |
11 | username = os.getenv('MAXMIND_USERNAME')
12 | password = os.getenv('MAXMIND_PASSWORD')
13 |
14 | zip_file_path = 'temp/GeoLite2-Country-CSV.zip'
15 | ipv4 = 'GeoLite2-Country-Blocks-IPv4.csv'
16 | location = 'GeoLite2-Country-Locations-en.csv'
17 |
18 | col_list = ['network',
19 | 'geoname_id',
20 | 'continent_code',
21 | 'continent_name',
22 | 'country_iso_code',
23 | 'country_name',
24 | 'is_anonymous_proxy',
25 | 'is_satellite_provider']
26 | col_types = [
27 | 'str',
28 | 'int',
29 | 'str',
30 | 'str',
31 | 'str',
32 | 'str',
33 | 'bool',
34 | 'bool'
35 | ]
36 |
37 | def merge_data(ipv4_df,location_df):
38 | ipv4_df = ipv4_df[['network',
39 | 'geoname_id',
40 | 'registered_country_geoname_id',
41 | 'is_anonymous_proxy',
42 | 'is_satellite_provider',
43 | ]]
44 | location_df = location_df[[
45 | 'geoname_id',
46 | 'continent_code',
47 | 'continent_name',
48 | 'country_iso_code',
49 | 'country_name',
50 | ]]
51 | # Fill missing geoname_id with registered_country_geoname_id
52 | ipv4_df['geoname_id'] = ipv4_df['geoname_id'].fillna(ipv4_df['registered_country_geoname_id'])
53 | ipv4_df = ipv4_df.drop(columns=['registered_country_geoname_id'])
54 | # Merge ipv4 and location by geoname_id if both null then drop
55 | merged = pd.merge(ipv4_df, location_df, on='geoname_id', how='left')
56 | condition = merged['geoname_id'] == ""
57 | merged.loc[condition, [
58 | 'geoname_id',
59 | 'continent_code',
60 | 'continent_name',
61 | 'country_iso_code',
62 | 'country_name']] = ""
63 | merged = merged[col_list]
64 | for col, col_type in zip(col_list, col_types):
65 | if col_type == 'str':
66 | merged[col] = merged[col].astype(str)
67 | elif col_type == 'int':
68 | merged[col] = merged[col].astype(int)
69 | elif col_type == 'bool':
70 | merged[col] = merged[col].astype(int)
71 | merged = merged.drop_duplicates()
72 | merged.to_csv('data/geoip2-ipv4.csv', index=False)
73 |
74 | def process_zip():
75 | with zipfile.ZipFile(zip_file_path, 'r') as z:
76 | # List all files in the ZIP archive
77 | file_list = z.namelist()
78 |
79 | ipv4_csv = [file for file in file_list if ipv4 in file][0]
80 | location_csv = [file for file in file_list if location in file][0]
81 |
82 | with z.open(ipv4_csv) as csv_file:
83 | df1 = pd.read_csv(csv_file)
84 |
85 | with z.open(location_csv) as csv_file:
86 | df2 = pd.read_csv(csv_file)
87 |
88 | print("DataFrame loaded successfully.")
89 | return df1, df2
90 |
91 | def download_zip():
92 | response = requests.get(perm_URL, auth=(username, password))
93 |
94 | if response.status_code == 200:
95 | if 'temp' not in os.listdir():
96 | os.mkdir('temp')
97 | with open(zip_file_path, 'wb') as file:
98 | file.write(response.content)
99 | print("Zip file downloaded successfully.")
100 | else:
101 | print(f"Failed to download the file. Status code: {response.status_code}, Response: {response.text}")
102 | print(response.text)
103 |
104 | def process():
105 | download_zip()
106 | ipv4_df, location_df = process_zip()
107 | merge_data(ipv4_df, location_df)
108 | shutil.rmtree('temp')
109 |
110 | if __name__ == '__main__':
111 | process()
--------------------------------------------------------------------------------
/scripts/requirements.txt:
--------------------------------------------------------------------------------
1 | pandas==2.2.3
2 | requests==2.32.3
--------------------------------------------------------------------------------