├── .gitignore ├── .idea ├── clickhouse-maxmind-geoip.iml ├── misc.xml ├── modules.xml ├── php.xml └── vcs.xml ├── Dockerfile ├── LICENSE ├── README.md ├── clickhouse └── dictionaries │ ├── geoip_asn_blocks_ipv4_dictionary.xml │ ├── geoip_asn_blocks_ipv6_dictionary.xml │ ├── geoip_city_blocks_ipv4_dictionary.xml │ ├── geoip_city_blocks_ipv6_dictionary.xml │ ├── geoip_city_locations_en_dictionary.xml │ ├── geoip_country_blocks_ipv4_dictionary.xml │ ├── geoip_country_blocks_ipv6_dictionary.xml │ └── geoip_country_locations_en_dictionary.xml ├── docker-compose.yml ├── functions.sql ├── kubernetes-example.yaml └── schema.sql /.gitignore: -------------------------------------------------------------------------------- 1 | .env -------------------------------------------------------------------------------- /.idea/clickhouse-maxmind-geoip.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | Blade files 24 | 25 | 26 | CSS 27 | 28 | 29 | Code StylePHP 30 | 31 | 32 | Ini Files 33 | 34 | 35 | Invalid elementsCSS 36 | 37 | 38 | PHP 39 | 40 | 41 | Type compatibilityPHP 42 | 43 | 44 | UnusedPHP 45 | 46 | 47 | 48 | 49 | AngularJS 50 | 51 | 52 | 53 | 54 | 55 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/php.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM clickhouse/clickhouse-server:22.8 2 | 3 | RUN apt-get update \ 4 | && apt-get install --yes --no-install-recommends \ 5 | unzip \ 6 | && rm -rf \ 7 | /var/lib/apt/lists/* \ 8 | /var/cache/debconf \ 9 | /tmp/* \ 10 | && apt-get clean 11 | 12 | # download MaxMind GeoLite2 databases 13 | ARG GEOIP_LICENSE_KEY 14 | 15 | RUN wget "https://download.maxmind.com/app/geoip_download?edition_id=GeoLite2-City-CSV&license_key=${GEOIP_LICENSE_KEY}&suffix=zip" -O /tmp/GeoLite2-City-CSV.zip 16 | RUN wget "https://download.maxmind.com/app/geoip_download?edition_id=GeoLite2-ASN-CSV&license_key=${GEOIP_LICENSE_KEY}&suffix=zip" -O /tmp/GeoLite2-ASN-CSV.zip 17 | RUN wget "https://download.maxmind.com/app/geoip_download?edition_id=GeoLite2-Country-CSV&license_key=${GEOIP_LICENSE_KEY}&suffix=zip" -O /tmp/GeoLite2-Country-CSV.zip 18 | 19 | RUN unzip /tmp/GeoLite2-City-CSV.zip -d /tmp 20 | RUN unzip /tmp/GeoLite2-Country-CSV.zip -d /tmp 21 | RUN unzip /tmp/GeoLite2-ASN-CSV.zip -d /tmp 22 | 23 | # to avoid merging with `conf.d` during loading *_dictionary.xml 24 | RUN mkdir -p /etc/clickhouse-server/dictionaries/ && mv -v /tmp/*/*.csv /etc/clickhouse-server/dictionaries/ 25 | ADD clickhouse /etc/clickhouse-server/ 26 | RUN echo 'dictionaries/*.xml' > /etc/clickhouse-server/config.d/dictionaries.xml 27 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Example of ClickHouse integration with MaxMind GeoLite2 database for geolocation. 2 | ================================================================================= 3 | 4 | This project contains: 5 | - Dictionary definitions for integrating [GeoLite2](https://dev.maxmind.com/geoip/geoip2/geolite2/) or [GeoIp2](https://dev.maxmind.com/geoip/geoip2/downloadable/) dictionaries into [ClickHouse](https://clickhouse.yandex/) database. 6 | - [Table definitions](schema.sql) based on these dictionaries. 7 | - Query examples of how you can use them with example results. 8 | - [Dockerfile](Dockerfile) / [docker-compose.yml](docker-compose.yml) files for starting ClickHouse with the GeoLite2 dictionaries inside for fast experimenting. 9 | - A workaround to load GeoLite2-City-Locations-en.csv which ClickHouse [considers corrupted](https://github.com/yandex/ClickHouse/issues/2829) because of apostrophe symbols. 10 | 11 | More on GeoLite2/GeoIp2 dictionaries structure and content can be found here: 12 | * https://dev.maxmind.com/geoip/geoip2/geoip2-city-country-csv-databases/ 13 | * https://dev.maxmind.com/geoip/geoip2/geolite2-asn-csv-database/ 14 | 15 | For successfull build docker image create personal account on https://maxmind.com and use the following command 16 | ```bash 17 | GEOIP_LICENSE_KEY=your_maxmind_key docker-compose build clickhouse 18 | ``` 19 | 20 | After loading dictionaries they have such statistics: 21 | 22 | ```sql 23 | SELECT * 24 | FROM system.dictionaries 25 | ``` 26 | ``` 27 | ┌─name───────────────────────┬─origin───────────────────────────────────────────────────────────┬─type───┬─key──────┬─attribute.names─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬─attribute.types───────────────────────────────────────────────────────────────────────────────────────────────────────┬─bytes_allocated─┬─query_count─┬─hit_rate─┬─element_count─┬─────────load_factor─┬───────creation_time─┬─source─────────────────────────────────────────────────────────────────────────┬─last_exception─┐ 28 | │ geoip_country_locations_en │ /etc/clickhouse-server/geoip_country_locations_en_dictionary.xml │ Hashed │ UInt64 │ ['locale_code','continent_code','continent_name','country_iso_code','country_name','is_in_european_union'] │ ['String','String','String','String','String','UInt8'] │ 160808 │ 0 │ 1 │ 252 │ 0.24609375 │ 2019-04-15 12:50:04 │ File: /etc/clickhouse-server/GeoLite2-Country-Locations-en.csv CSVWithNames │ │ 29 | │ geoip_country_blocks_ipv6 │ /etc/clickhouse-server/geoip_country_blocks_ipv6_dictionary.xml │ Trie │ (String) │ ['geoname_id','registered_country_geoname_id','represented_country_geoname_id','is_anonymous_proxy','is_satellite_provider'] │ ['UInt32','UInt32','UInt32','UInt8','UInt8'] │ 13738664 │ 0 │ 1 │ 92570 │ 1 │ 2019-04-15 12:50:04 │ File: /etc/clickhouse-server/GeoLite2-Country-Blocks-IPv6.csv CSVWithNames │ │ 30 | │ geoip_asn_blocks_ipv4 │ /etc/clickhouse-server/geoip_asn_blocks_ipv4_dictionary.xml │ Trie │ (String) │ ['autonomous_system_number','autonomous_system_organization'] │ ['UInt32','String'] │ 57925936 │ 0 │ 1 │ 428088 │ 1 │ 2019-04-15 12:49:51 │ File: /etc/clickhouse-server/GeoLite2-ASN-Blocks-IPv4.csv CSVWithNames │ │ 31 | │ geoip_city_blocks_ipv6 │ /etc/clickhouse-server/geoip_city_blocks_ipv6_dictionary.xml │ Trie │ (String) │ ['geoname_id','registered_country_geoname_id','represented_country_geoname_id','is_anonymous_proxy','is_satellite_provider','postal_code','latitude','longitude','accuracy_radius'] │ ['UInt32','UInt32','UInt32','UInt8','UInt8','String','Float32','Float32','UInt32'] │ 57222376 │ 0 │ 1 │ 440302 │ 1 │ 2019-04-15 12:50:03 │ File: /etc/clickhouse-server/GeoLite2-City-Blocks-IPv6.csv CSVWithNames │ │ 32 | │ geoip_asn_blocks_ipv6 │ /etc/clickhouse-server/geoip_asn_blocks_ipv6_dictionary.xml │ Trie │ (String) │ ['autonomous_system_number','autonomous_system_organization'] │ ['UInt32','String'] │ 11903280 │ 0 │ 1 │ 55741 │ 1 │ 2019-04-15 12:49:51 │ File: /etc/clickhouse-server/GeoLite2-ASN-Blocks-IPv6.csv CSVWithNames │ │ 33 | │ geoip_city_blocks_ipv4 │ /etc/clickhouse-server/geoip_city_blocks_ipv4_dictionary.xml │ Trie │ (String) │ ['geoname_id','registered_country_geoname_id','represented_country_geoname_id','is_anonymous_proxy','is_satellite_provider','postal_code','latitude','longitude','accuracy_radius'] │ ['UInt32','UInt32','UInt32','UInt8','UInt8','String','Float32','Float32','UInt32'] │ 399348968 │ 0 │ 1 │ 3223012 │ 1 │ 2019-04-15 12:50:01 │ File: /etc/clickhouse-server/GeoLite2-City-Blocks-IPv4.csv CSVWithNames │ │ 34 | │ geoip_city_locations_en │ /etc/clickhouse-server/geoip_city_locations_en_dictionary.xml │ Hashed │ UInt64 │ ['locale_code','continent_code','continent_name','country_iso_code','country_name','subdivision_1_iso_code','subdivision_1_name','subdivision_2_iso_code','subdivision_2_name','city_name','metro_code','time_zone','is_in_european_union'] │ ['String','String','String','String','String','String','String','String','String','String','UInt32','String','UInt8'] │ 87644424 │ 0 │ 1 │ 111302 │ 0.42458343505859375 │ 2019-04-15 12:50:03 │ File: /etc/clickhouse-server/GeoLite2-City-Locations-en-fixed.csv CSVWithNames │ │ 35 | │ geoip_country_blocks_ipv4 │ /etc/clickhouse-server/geoip_country_blocks_ipv4_dictionary.xml │ Trie │ (String) │ ['geoname_id','registered_country_geoname_id','represented_country_geoname_id','is_anonymous_proxy','is_satellite_provider'] │ ['UInt32','UInt32','UInt32','UInt8','UInt8'] │ 28603048 │ 0 │ 1 │ 330017 │ 1 │ 2019-04-15 12:50:03 │ File: /etc/clickhouse-server/GeoLite2-Country-Blocks-IPv4.csv CSVWithNames │ │ 36 | └────────────────────────────┴──────────────────────────────────────────────────────────────────┴────────┴──────────┴─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┴───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┴─────────────────┴─────────────┴──────────┴───────────────┴─────────────────────┴─────────────────────┴────────────────────────────────────────────────────────────────────────────────┴────────────────┘ 37 | 38 | ``` 39 | 40 | GeoLite2-City-CSV queries 41 | ========================= 42 | 43 | ```sql 44 | SELECT 45 | ip, 46 | -- geoip_city_blocks_ipv4 dictionary 47 | dictGetUInt32('geoip_city_blocks_ipv4', 'geoname_id', tuple(IPv4StringToNum(ip))) AS geoname_id, 48 | dictGetString('geoip_city_blocks_ipv4', 'postal_code', tuple(IPv4StringToNum(ip))) AS postcode, 49 | dictGetFloat32('geoip_city_blocks_ipv4', 'latitude', tuple(IPv4StringToNum(ip))) AS latitude, 50 | dictGetFloat32('geoip_city_blocks_ipv4', 'longitude', tuple(IPv4StringToNum(ip))) AS longitude, 51 | dictGetUInt32('geoip_city_blocks_ipv4', 'accuracy_radius', tuple(IPv4StringToNum(ip))) AS accuracy_radius, 52 | -- geoip_city_locations_en dictionary 53 | dictGetString('geoip_city_locations_en', 'locale_code', toUInt64(geoname_id)) AS locale_code, 54 | dictGetString('geoip_city_locations_en', 'continent_code', toUInt64(geoname_id)) AS continent_code, 55 | dictGetString('geoip_city_locations_en', 'continent_name', toUInt64(geoname_id)) AS continent_name, 56 | dictGetString('geoip_city_locations_en', 'country_iso_code', toUInt64(geoname_id)) AS country_iso_code, 57 | dictGetString('geoip_city_locations_en', 'country_name', toUInt64(geoname_id)) AS country_name, 58 | dictGetString('geoip_city_locations_en', 'subdivision_1_iso_code', toUInt64(geoname_id)) AS subdivision_1_iso_code, 59 | dictGetString('geoip_city_locations_en', 'subdivision_1_name', toUInt64(geoname_id)) AS subdivision_1_name, 60 | dictGetString('geoip_city_locations_en', 'subdivision_2_iso_code', toUInt64(geoname_id)) AS subdivision_2_iso_code, 61 | dictGetString('geoip_city_locations_en', 'subdivision_2_name', toUInt64(geoname_id)) AS subdivision_2_name, 62 | dictGetString('geoip_city_locations_en', 'city_name', toUInt64(geoname_id)) AS city_name, 63 | dictGetUInt32('geoip_city_locations_en', 'metro_code', toUInt64(geoname_id)) AS metro_code, 64 | dictGetString('geoip_city_locations_en', 'time_zone', toUInt64(geoname_id)) AS time_zone, 65 | dictGetUInt8('geoip_city_locations_en', 'is_in_european_union', toUInt64(geoname_id)) AS is_in_european_union 66 | FROM 67 | ( 68 | SELECT arrayJoin(['129.45.17.12', '173.194.112.139', '77.88.55.66', '2.28.228.0', '95.47.254.1', '62.35.172.0']) AS ip 69 | ) 70 | ``` 71 | ``` 72 | ┌─ip──────────────┬─geoname_id─┬─postcode─┬─latitude─┬─longitude─┬─accuracy_radius─┬─locale_code─┬─continent_code─┬─continent_name─┬─country_iso_code─┬─country_name───┬─subdivision_1_iso_code─┬─subdivision_1_name─┬─subdivision_2_iso_code─┬─subdivision_2_name─┬─city_name─────────────┬─metro_code─┬─time_zone──────┬─is_in_european_union─┐ 73 | │ 129.45.17.12 │ 2507480 │ 16100 │ 36.7405 │ 3.0096 │ 10 │ en │ AF │ Africa │ DZ │ Algeria │ 16 │ Algiers │ │ │ Algiers │ │ Africa/Algiers │ 0 │ 74 | │ 173.194.112.139 │ 6252001 │ │ 37.751 │ -97.822 │ 1000 │ en │ NA │ North America │ US │ United States │ │ │ │ │ │ │ │ 0 │ 75 | │ 77.88.55.66 │ 2017370 │ │ 55.7386 │ 37.6068 │ 1000 │ en │ EU │ Europe │ RU │ Russia │ │ │ │ │ │ │ │ 0 │ 76 | │ 2.28.228.0 │ 2640910 │ EH35 │ 55.913 │ -2.9398 │ 5 │ en │ EU │ Europe │ GB │ United Kingdom │ SCT │ Scotland │ ELN │ East Lothian │ Ormiston │ │ Europe/London │ 1 │ 77 | │ 95.47.254.1 │ 3077311 │ │ 50.0848 │ 14.4112 │ 100 │ en │ EU │ Europe │ CZ │ Czechia │ │ │ │ │ │ │ Europe/Prague │ 1 │ 78 | │ 62.35.172.0 │ 2983987 │ 53110 │ 48.4833 │ -0.4833 │ 100 │ en │ EU │ Europe │ FR │ France │ PDL │ Pays de la Loire │ 53 │ Mayenne │ Rennes-en-Grenouilles │ │ Europe/Paris │ 1 │ 79 | └─────────────────┴────────────┴──────────┴──────────┴───────────┴─────────────────┴─────────────┴────────────────┴────────────────┴──────────────────┴────────────────┴────────────────────────┴────────────────────┴────────────────────────┴────────────────────┴───────────────────────┴────────────┴────────────────┴──────────────────────┘ 80 | ``` 81 | 82 | GeoLite2-Country-CSV queries 83 | ============================ 84 | 85 | ```sql 86 | SELECT 87 | ip, 88 | -- geoip_country_blocks_ipv4 dictionary 89 | dictGetUInt32('geoip_country_blocks_ipv4', 'geoname_id', tuple(IPv4StringToNum(ip))) AS geoname_id, 90 | -- geoip_country_locations_en dictionary 91 | dictGetString('geoip_country_locations_en', 'locale_code', toUInt64(geoname_id)) AS locale_code, 92 | dictGetString('geoip_country_locations_en', 'continent_code', toUInt64(geoname_id)) AS continent_code, 93 | dictGetString('geoip_country_locations_en', 'continent_name', toUInt64(geoname_id)) AS continent_name, 94 | dictGetString('geoip_country_locations_en', 'country_iso_code', toUInt64(geoname_id)) AS country_iso_code, 95 | dictGetString('geoip_country_locations_en', 'country_name', toUInt64(geoname_id)) AS country_name, 96 | dictGetUInt8('geoip_country_locations_en', 'is_in_european_union', toUInt64(geoname_id)) AS is_in_european_union 97 | FROM 98 | ( 99 | SELECT arrayJoin(['129.45.17.12', '173.194.112.139', '77.88.55.66', '2.28.228.0', '95.47.254.1', '62.35.172.0']) AS ip 100 | ) 101 | ``` 102 | ``` 103 | ┌─ip──────────────┬─geoname_id─┬─locale_code─┬─continent_code─┬─continent_name─┬─country_iso_code─┬─country_name───┬─is_in_european_union─┐ 104 | │ 129.45.17.12 │ 2589581 │ en │ AF │ Africa │ DZ │ Algeria │ 0 │ 105 | │ 173.194.112.139 │ 6252001 │ en │ NA │ North America │ US │ United States │ 0 │ 106 | │ 77.88.55.66 │ 2017370 │ en │ EU │ Europe │ RU │ Russia │ 0 │ 107 | │ 2.28.228.0 │ 2635167 │ en │ EU │ Europe │ GB │ United Kingdom │ 1 │ 108 | │ 95.47.254.1 │ 3077311 │ en │ EU │ Europe │ CZ │ Czechia │ 1 │ 109 | │ 62.35.172.0 │ 3017382 │ en │ EU │ Europe │ FR │ France │ 1 │ 110 | └─────────────────┴────────────┴─────────────┴────────────────┴────────────────┴──────────────────┴────────────────┴──────────────────────┘ 111 | ``` 112 | 113 | GeoLite2-ASN-CSV queries 114 | ======================== 115 | 116 | ```sql 117 | SELECT 118 | ip, 119 | -- geoip_asn_blocks_ipv4 dictionary 120 | dictGetUInt32('geoip_asn_blocks_ipv4', 'autonomous_system_number', tuple(IPv4StringToNum(ip))) AS autonomous_system_number, 121 | dictGetString('geoip_asn_blocks_ipv4', 'autonomous_system_organization', tuple(IPv4StringToNum(ip))) AS autonomous_system_organization 122 | FROM 123 | ( 124 | SELECT arrayJoin(['129.45.17.12', '173.194.112.139', '77.88.55.66', '2.28.228.0', '95.47.254.1', '62.35.172.0']) AS ip 125 | ) 126 | ``` 127 | ``` 128 | ┌─ip──────────────┬─autonomous_system_number─┬─autonomous_system_organization─┐ 129 | │ 129.45.17.12 │ 327931 │ Optimum-Telecom-Algeria │ 130 | │ 173.194.112.139 │ 15169 │ Google LLC │ 131 | │ 77.88.55.66 │ 13238 │ YANDEX LLC │ 132 | │ 2.28.228.0 │ 12576 │ EE Limited │ 133 | │ 95.47.254.1 │ 47552 │ Vezet-Kirov Ltd. │ 134 | │ 62.35.172.0 │ 5410 │ Bouygues Telecom SA │ 135 | └─────────────────┴──────────────────────────┴────────────────────────────────┘ 136 | 137 | ``` 138 | 139 | Note on IPv6 140 | ============ 141 | 142 | * Use dictionaries postfixed with `..._ipv6` instead of `..._ipv4` 143 | * Use `IPv6StringToNum()` instead of `IPv4StringToNum()` 144 | 145 | An example: 146 | ```sql 147 | SELECT 148 | ip, 149 | dictGetString('geoip_asn_blocks_ipv6', 'autonomous_system_organization', tuple(toFixedString(ifNull(IPv6StringToNum(ip), ''), 16))) AS autonomous_system_organization, 150 | dictGetFloat32('geoip_city_blocks_ipv6', 'latitude', tuple(toFixedString(ifNull(IPv6StringToNum(ip), ''), 16))) AS latitude, 151 | dictGetFloat32('geoip_city_blocks_ipv6', 'longitude', tuple(toFixedString(ifNull(IPv6StringToNum(ip), ''), 16))) AS longitude 152 | FROM 153 | ( 154 | SELECT arrayJoin(['2001:4860:4860::8888', '2a02:6b8::feed:bad']) AS ip 155 | ) 156 | ``` 157 | ``` 158 | ┌─ip───────────────────┬─autonomous_system_organization─┬─latitude─┬─longitude─┐ 159 | │ 2001:4860:4860::8888 │ Google LLC │ 37.751 │ -97.822 │ 160 | │ 2a02:6b8::feed:bad │ YANDEX LLC │ 55.7527 │ 37.6172 │ 161 | └──────────────────────┴────────────────────────────────┴──────────┴───────────┘ 162 | ``` 163 | 164 | User Defined Functions 165 | ====================== 166 | 167 | The file [`functions.sql`](functions.sql) has example functions you can use as an alternate way to query the dictionaries. You can pass either IPv4 or IPv6 to these functions. 168 | 169 | ```sql 170 | SELECT 171 | ip, 172 | maxmind_asn(ip) AS asn, 173 | maxmind_org(ip) AS org, 174 | maxmind_country(ip) AS country, 175 | maxmind_subdivision1(ip) AS subdivision1, 176 | maxmind_subdivision2(ip) AS subdivision2, 177 | maxmind_city(ip) AS city 178 | FROM 179 | ( 180 | SELECT arrayJoin(['129.45.17.12', '173.194.112.139', '77.88.55.66', '2.28.228.0', '95.47.254.1', '62.35.172.0', '2001:4860:4860::8888', '2607:f8b0:4001:c24::65', '2606:4700:4700::1111', '2600:9000:254a:6000:7:49a5:5fd2:2221']) AS ip 181 | ) 182 | ``` 183 | 184 | ``` 185 | ┌─ip───────────────────────────────────┬────asn─┬─org─────────────────────┬─country────────┬─subdivision1───────┬─subdivision2──────┬─city───────────┐ 186 | │ 129.45.17.12 │ 327931 │ Optimum-Telecom-Algeria │ Algeria │ Aïn Defla │ │ Ain Defla │ 187 | │ 173.194.112.139 │ 15169 │ GOOGLE │ United States │ │ │ │ 188 | │ 77.88.55.66 │ 13238 │ YANDEX LLC │ Russia │ │ │ │ 189 | │ 2.28.228.0 │ 12576 │ EE Limited │ United Kingdom │ Scotland │ East Lothian │ Tranent │ 190 | │ 95.47.254.1 │ 44546 │ ALFA TELECOM s.r.o. │ Ukraine │ Volyn │ │ Kovel │ 191 | │ 62.35.172.0 │ 5410 │ Bouygues Telecom SA │ France │ Nouvelle-Aquitaine │ Charente-Maritime │ Rochefort │ 192 | │ 2001:4860:4860::8888 │ 15169 │ GOOGLE │ United States │ │ │ │ 193 | │ 2607:f8b0:4001:c24::65 │ 15169 │ GOOGLE │ United States │ Iowa │ │ Council Bluffs │ 194 | │ 2606:4700:4700::1111 │ 13335 │ CLOUDFLARENET │ United States │ │ │ │ 195 | │ 2600:9000:254a:6000:7:49a5:5fd2:2221 │ 16509 │ AMAZON-02 │ United States │ │ │ │ 196 | └──────────────────────────────────────┴────────┴─────────────────────────┴────────────────┴────────────────────┴───────────────────┴────────────────┘ 197 | ``` 198 | 199 | There is also a `maxmind` function which takes the type of attribute as the first parameter. 200 | 201 | ```sql 202 | SELECT 203 | ip, 204 | maxmind('asn', ip) AS asn, 205 | maxmind('org', ip) AS org, 206 | maxmind('country', ip) AS country, 207 | maxmind('subdivision1', ip) AS subdivision1, 208 | maxmind('state', ip) AS state, -- alias for subdivision1 209 | maxmind('subdivision2', ip) AS subdivision2, 210 | maxmind('city', ip) AS city 211 | FROM 212 | ( 213 | SELECT arrayJoin(['129.45.17.12', '173.194.112.139', '77.88.55.66', '2.28.228.0', '95.47.254.1', '62.35.172.0', '2001:4860:4860::8888', '2607:f8b0:4001:c24::65', '2606:4700:4700::1111', '2600:9000:254a:6000:7:49a5:5fd2:2221']) AS ip 214 | ) 215 | ``` 216 | -------------------------------------------------------------------------------- /clickhouse/dictionaries/geoip_asn_blocks_ipv4_dictionary.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | geoip_asn_blocks_ipv4 5 | 6 | 7 | /etc/clickhouse-server/GeoLite2-ASN-Blocks-IPv4.csv 8 | CSVWithNames 9 | 10 | 11 | 300 12 | 13 | 14 | 15 | 16 | true 17 | 18 | 19 | 20 | 21 | 22 | network 23 | String 24 | 25 | 26 | 27 | autonomous_system_number 28 | UInt32 29 | 0 30 | 31 | 32 | autonomous_system_organization 33 | String 34 | ? 35 | 36 | 37 | 38 | 39 | -------------------------------------------------------------------------------- /clickhouse/dictionaries/geoip_asn_blocks_ipv6_dictionary.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | geoip_asn_blocks_ipv6 5 | 6 | 7 | /etc/clickhouse-server/GeoLite2-ASN-Blocks-IPv6.csv 8 | CSVWithNames 9 | 10 | 11 | 300 12 | 13 | 14 | 15 | 16 | true 17 | 18 | 19 | 20 | 21 | 22 | network 23 | String 24 | 25 | 26 | 27 | autonomous_system_number 28 | UInt32 29 | 0 30 | 31 | 32 | autonomous_system_organization 33 | String 34 | ? 35 | 36 | 37 | 38 | 39 | -------------------------------------------------------------------------------- /clickhouse/dictionaries/geoip_city_blocks_ipv4_dictionary.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | geoip_city_blocks_ipv4 5 | 6 | 7 | /etc/clickhouse-server/GeoLite2-City-Blocks-IPv4.csv 8 | CSVWithNames 9 | 10 | 11 | 300 12 | 13 | 14 | 15 | 16 | true 17 | 18 | 19 | 20 | 21 | 22 | network 23 | String 24 | 25 | 26 | 27 | geoname_id 28 | UInt32 29 | 0 30 | 31 | 32 | registered_country_geoname_id 33 | UInt32 34 | 0 35 | 36 | 37 | represented_country_geoname_id 38 | UInt32 39 | 0 40 | 41 | 42 | is_anonymous_proxy 43 | UInt8 44 | 0 45 | 46 | 47 | is_satellite_provider 48 | UInt8 49 | 0 50 | 51 | 52 | postal_code 53 | String 54 | ? 55 | 56 | 57 | latitude 58 | Float32 59 | 0 60 | 61 | 62 | longitude 63 | Float32 64 | 0 65 | 66 | 67 | accuracy_radius 68 | UInt32 69 | 0 70 | 71 | 72 | 73 | 74 | -------------------------------------------------------------------------------- /clickhouse/dictionaries/geoip_city_blocks_ipv6_dictionary.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | geoip_city_blocks_ipv6 5 | 6 | 7 | /etc/clickhouse-server/GeoLite2-City-Blocks-IPv6.csv 8 | CSVWithNames 9 | 10 | 11 | 300 12 | 13 | 14 | 15 | 16 | true 17 | 18 | 19 | 20 | 21 | 22 | network 23 | String 24 | 25 | 26 | 27 | geoname_id 28 | UInt32 29 | 0 30 | 31 | 32 | registered_country_geoname_id 33 | UInt32 34 | 0 35 | 36 | 37 | represented_country_geoname_id 38 | UInt32 39 | 0 40 | 41 | 42 | is_anonymous_proxy 43 | UInt8 44 | 0 45 | 46 | 47 | is_satellite_provider 48 | UInt8 49 | 0 50 | 51 | 52 | postal_code 53 | String 54 | ? 55 | 56 | 57 | latitude 58 | Float32 59 | 0 60 | 61 | 62 | longitude 63 | Float32 64 | 0 65 | 66 | 67 | accuracy_radius 68 | UInt32 69 | 0 70 | 71 | 72 | 73 | 74 | -------------------------------------------------------------------------------- /clickhouse/dictionaries/geoip_city_locations_en_dictionary.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | geoip_city_locations_en 5 | 6 | 7 | /etc/clickhouse-server/GeoLite2-City-Locations-en.csv 8 | CSVWithNames 9 | 10 | 11 | 0 12 | 13 | 14 | 300 15 | 16 | 17 | 18 | 19 | 20 | geoname_id 21 | 22 | 23 | locale_code 24 | String 25 | 26 | 27 | 28 | continent_code 29 | String 30 | 31 | 32 | 33 | continent_name 34 | String 35 | 36 | 37 | 38 | country_iso_code 39 | String 40 | 41 | 42 | 43 | country_name 44 | String 45 | 46 | 47 | 48 | subdivision_1_iso_code 49 | String 50 | 51 | 52 | 53 | subdivision_1_name 54 | String 55 | 56 | 57 | 58 | subdivision_2_iso_code 59 | String 60 | 61 | 62 | 63 | subdivision_2_name 64 | String 65 | 66 | 67 | 68 | city_name 69 | String 70 | 71 | 72 | 73 | metro_code 74 | UInt32 75 | 0 76 | 77 | 78 | time_zone 79 | String 80 | 81 | 82 | 83 | is_in_european_union 84 | UInt8 85 | 0 86 | 87 | 88 | 89 | 90 | -------------------------------------------------------------------------------- /clickhouse/dictionaries/geoip_country_blocks_ipv4_dictionary.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | geoip_country_blocks_ipv4 5 | 6 | 7 | /etc/clickhouse-server/GeoLite2-Country-Blocks-IPv4.csv 8 | CSVWithNames 9 | 10 | 11 | 300 12 | 13 | 14 | 15 | 16 | true 17 | 18 | 19 | 20 | 21 | 22 | network 23 | String 24 | 25 | 26 | 27 | geoname_id 28 | UInt32 29 | 0 30 | 31 | 32 | registered_country_geoname_id 33 | UInt32 34 | 0 35 | 36 | 37 | represented_country_geoname_id 38 | UInt32 39 | 0 40 | 41 | 42 | is_anonymous_proxy 43 | UInt8 44 | 0 45 | 46 | 47 | is_satellite_provider 48 | UInt8 49 | 0 50 | 51 | 52 | 53 | 54 | -------------------------------------------------------------------------------- /clickhouse/dictionaries/geoip_country_blocks_ipv6_dictionary.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | geoip_country_blocks_ipv6 5 | 6 | 7 | /etc/clickhouse-server/GeoLite2-Country-Blocks-IPv6.csv 8 | CSVWithNames 9 | 10 | 11 | 300 12 | 13 | 14 | 15 | 16 | true 17 | 18 | 19 | 20 | 21 | 22 | network 23 | String 24 | 25 | 26 | 27 | geoname_id 28 | UInt32 29 | 0 30 | 31 | 32 | registered_country_geoname_id 33 | UInt32 34 | 0 35 | 36 | 37 | represented_country_geoname_id 38 | UInt32 39 | 0 40 | 41 | 42 | is_anonymous_proxy 43 | UInt8 44 | 0 45 | 46 | 47 | is_satellite_provider 48 | UInt8 49 | 0 50 | 51 | 52 | 53 | 54 | -------------------------------------------------------------------------------- /clickhouse/dictionaries/geoip_country_locations_en_dictionary.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | geoip_country_locations_en 5 | 6 | 7 | /etc/clickhouse-server/GeoLite2-Country-Locations-en.csv 8 | CSVWithNames 9 | 10 | 11 | 300 12 | 13 | 14 | 15 | 16 | 17 | geoname_id 18 | 19 | 20 | locale_code 21 | String 22 | 23 | 24 | 25 | continent_code 26 | String 27 | 28 | 29 | 30 | continent_name 31 | String 32 | 33 | 34 | 35 | country_iso_code 36 | String 37 | 38 | 39 | 40 | country_name 41 | String 42 | 43 | 44 | 45 | is_in_european_union 46 | UInt8 47 | 0 48 | 49 | 50 | 51 | 52 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | services: 3 | clickhouse: 4 | image: clickhousepro/clickhouse-server-maxmind:22.8 5 | build: 6 | context: . 7 | args: 8 | - GEOIP_LICENSE_KEY=${GEOIP_LICENSE_KEY:-create_account_on_maxmind.com} 9 | ports: 10 | - "8123:8123" 11 | - "9000:9000" 12 | -------------------------------------------------------------------------------- /functions.sql: -------------------------------------------------------------------------------- 1 | DROP FUNCTION IF EXISTS maxmind_asn; 2 | CREATE FUNCTION maxmind_asn AS (ip) -> 3 | multiIf( 4 | isIPv4String(ip), dictGetUInt32('geoip_asn_blocks_ipv4', 5 | 'autonomous_system_number', 6 | tuple(IPv4StringToNumOrDefault(toString(ip)))), 7 | isIPv6String(ip), dictGetUInt32('geoip_asn_blocks_ipv6', 8 | 'autonomous_system_number', 9 | tuple(IPv6StringToNumOrDefault(toString(ip)))), 10 | NULL 11 | ) 12 | ; 13 | DROP FUNCTION IF EXISTS maxmind_org; 14 | CREATE FUNCTION maxmind_org AS (ip) -> 15 | multiIf( 16 | isIPv4String(ip), dictGetString('geoip_asn_blocks_ipv4', 17 | 'autonomous_system_organization', 18 | tuple(IPv4StringToNumOrDefault(toString(ip)))), 19 | isIPv6String(ip), dictGetString('geoip_asn_blocks_ipv6', 20 | 'autonomous_system_organization', 21 | tuple(IPv6StringToNumOrDefault(toString(ip)))), 22 | NULL 23 | ) 24 | ; 25 | DROP FUNCTION IF EXISTS maxmind_geoname_id; 26 | CREATE FUNCTION maxmind_geoname_id AS (ip) -> 27 | toUInt64(multiIf( 28 | isIPv4String(ip), dictGetUInt32('geoip_city_blocks_ipv4', 29 | 'geoname_id', 30 | tuple(IPv4StringToNumOrDefault(toString(ip)))), 31 | isIPv6String(ip), dictGetUInt32('geoip_city_blocks_ipv6', 32 | 'geoname_id', 33 | tuple(IPv6StringToNumOrDefault(toString(ip)))), 34 | 0 35 | )) 36 | ; 37 | DROP FUNCTION IF EXISTS maxmind_country; 38 | CREATE FUNCTION maxmind_country AS (ip) -> 39 | dictGetString('geoip_city_locations_en', 40 | 'country_name', 41 | maxmind_geoname_id(ip) 42 | ) 43 | ; 44 | DROP FUNCTION IF EXISTS maxmind_subdivision1; 45 | CREATE FUNCTION maxmind_subdivision1 AS (ip) -> 46 | dictGetString('geoip_city_locations_en', 47 | 'subdivision_1_name', 48 | maxmind_geoname_id(ip) 49 | ) 50 | ; 51 | DROP FUNCTION IF EXISTS maxmind_subdivision2; 52 | CREATE FUNCTION maxmind_subdivision2 AS (ip) -> 53 | dictGetString('geoip_city_locations_en', 54 | 'subdivision_2_name', 55 | maxmind_geoname_id(ip) 56 | ) 57 | ; 58 | DROP FUNCTION IF EXISTS maxmind_city; 59 | CREATE FUNCTION maxmind_city AS (ip) -> 60 | dictGetString('geoip_city_locations_en', 61 | 'city_name', 62 | maxmind_geoname_id(ip) 63 | ) 64 | ; 65 | DROP FUNCTION IF EXISTS maxmind; 66 | CREATE FUNCTION maxmind AS (type, ip) -> 67 | multiIf( 68 | type = 'asn', toString(maxmind_asn(ip)), 69 | type = 'org', maxmind_org(ip), 70 | type = 'country', maxmind_country(ip), 71 | type = 'subdivision1', maxmind_subdivision1(ip), 72 | type = 'state', maxmind_subdivision1(ip), 73 | type = 'subdivision2', maxmind_subdivision2(ip), 74 | type = 'city', maxmind_city(ip), 75 | NULL 76 | ) 77 | ; -------------------------------------------------------------------------------- /kubernetes-example.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: "clickhouse.altinity.com/v1" 2 | kind: "ClickHouseInstallation" 3 | metadata: 4 | name: "maxmind-example" 5 | spec: 6 | templates: 7 | podTemplates: 8 | - name: maxmind 9 | spec: 10 | containers: 11 | - name: clickhouse 12 | # image: clickhouse/clickhouse-server:22.3 13 | image: clickhousepro/clickhouse-server-maxmind:22.3 14 | imagePullPolicy: Always 15 | defaults: 16 | templates: 17 | podTemplate: maxmind 18 | configuration: 19 | clusters: 20 | - name: "maxmind" 21 | files: 22 | config.d/dictionaries.xml: | 23 | dictionaries/*.xml -------------------------------------------------------------------------------- /schema.sql: -------------------------------------------------------------------------------- 1 | create table geoip_asn_blocks_ipv4 2 | ( 3 | prefix String, 4 | autonomous_system_number UInt32, 5 | autonomous_system_organization String 6 | ) ENGINE = Dictionary('geoip_asn_blocks_ipv4'); 7 | 8 | create table geoip_asn_blocks_ipv6 9 | ( 10 | prefix String, 11 | autonomous_system_number UInt32, 12 | autonomous_system_organization String 13 | ) ENGINE = Dictionary('geoip_asn_blocks_ipv6'); 14 | 15 | create table geoip_city_locations_en 16 | ( 17 | geoname_id UInt64, 18 | locale_code String, 19 | continent_code String, 20 | continent_name String, 21 | country_iso_code String, 22 | country_name String, 23 | subdivision_1_iso_code String, 24 | subdivision_1_name String, 25 | subdivision_2_iso_code String, 26 | subdivision_2_name String, 27 | city_name String, 28 | metro_code UInt32, 29 | time_zone String, 30 | is_in_european_union UInt8 31 | ) ENGINE = Dictionary('geoip_city_locations_en'); 32 | 33 | create table geoip_city_blocks_ipv4 34 | ( 35 | prefix String, 36 | geoname_id UInt32, 37 | registered_country_geoname_id UInt32, 38 | represented_country_geoname_id UInt32, 39 | postal_code String, 40 | latitude Float32, 41 | longitude Float32, 42 | accuracy_radius UInt32 43 | ) ENGINE = Dictionary('geoip_city_blocks_ipv4'); 44 | 45 | create table geoip_city_blocks_ipv6 46 | ( 47 | prefix String, 48 | geoname_id UInt32, 49 | registered_country_geoname_id UInt32, 50 | represented_country_geoname_id UInt32, 51 | postal_code String, 52 | latitude Float32, 53 | longitude Float32, 54 | accuracy_radius UInt32 55 | ) ENGINE = Dictionary('geoip_city_blocks_ipv6'); 56 | 57 | create table geoip_country_locations_en 58 | ( 59 | geoname_id UInt64, 60 | locale_code String, 61 | continent_code String, 62 | continent_name String, 63 | country_iso_code String, 64 | country_name String, 65 | is_in_european_union UInt8 66 | ) ENGINE = Dictionary('geoip_country_locations_en'); 67 | 68 | create table geoip_country_blocks_ipv4 69 | ( 70 | prefix String, 71 | geoname_id UInt32, 72 | registered_country_geoname_id UInt32, 73 | represented_country_geoname_id UInt32 74 | ) ENGINE = Dictionary('geoip_country_blocks_ipv4'); 75 | 76 | create table geoip_country_blocks_ipv6 77 | ( 78 | prefix String, 79 | geoname_id UInt32, 80 | registered_country_geoname_id UInt32, 81 | represented_country_geoname_id UInt32 82 | ) ENGINE = Dictionary('geoip_country_blocks_ipv6'); 83 | --------------------------------------------------------------------------------