├── geodis ├── data │ ├── __init__.py │ └── geonames2mysql.py ├── provider │ ├── __init__.py │ ├── importer.py │ ├── ip2location.py │ ├── geonames.py │ ├── factual.py │ └── zipcodes.py ├── __init__.py ├── zipcode.py ├── business.py ├── iprange.py ├── us_states.py ├── geodis ├── city.py ├── location.py ├── index.py └── countries.py ├── MANIFEST.in ├── test ├── requirements.txt ├── test_countries.py ├── testGeodis.py ├── benchmark.py └── data │ └── ip2location.csv ├── requirements.txt ├── .travis.yml ├── setup.py ├── .gitignore ├── external ├── ip2location │ ├── update.sh │ └── download.pl └── geonames │ └── update_countries.py └── README.md /geodis/data/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include geodis/data/* 2 | -------------------------------------------------------------------------------- /test/requirements.txt: -------------------------------------------------------------------------------- 1 | nose 2 | coverage 3 | -------------------------------------------------------------------------------- /geodis/provider/__init__.py: -------------------------------------------------------------------------------- 1 | #__all__ = ['geonames', 'ip2location'] -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | redis>=2.7.1 2 | geohasher==0.1dev 3 | upoints==0.12.2 4 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | before_install: 2 | - pip install -r test/requirements.txt 3 | 4 | language: python 5 | python: 6 | - 2.7 7 | services: 8 | - redis-server 9 | env: TEST_REDIS_PORT=6379 10 | 11 | install: 12 | - python setup.py install 13 | 14 | script: 15 | - nosetests --with-coverage --cover-package=geodis 16 | 17 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from setuptools import setup, find_packages 3 | 4 | setup( 5 | name='geodis', 6 | description = 'A fast Redis based Geo resolving library', 7 | version='2.0.9', 8 | author='DoAT Media LTD.', 9 | author_email='opensource@everything.me', 10 | url='https://github.com/EverythingMe/geodis', 11 | scripts=['geodis/geodis'], 12 | packages=find_packages(), 13 | include_package_data = True, 14 | install_requires=['redis>=2.7.1', 'geohasher==0.1dev', 'upoints==0.12.2'] 15 | 16 | ) 17 | 18 | 19 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # geodis specific 2 | data/*.csv 3 | 4 | # Byte-compiled / optimized / DLL files 5 | __pycache__/ 6 | *.py[cod] 7 | 8 | # C extensions 9 | *.so 10 | 11 | # Distribution / packaging 12 | .Python 13 | env/ 14 | build/ 15 | develop-eggs/ 16 | dist/ 17 | downloads/ 18 | eggs/ 19 | lib/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | 46 | # Translations 47 | *.mo 48 | *.pot 49 | 50 | # Django stuff: 51 | *.log 52 | 53 | # Sphinx documentation 54 | docs/_build/ 55 | 56 | # PyBuilder 57 | target/ 58 | *.pyc 59 | *.pyo 60 | .idea 61 | .project 62 | .pydevproject 63 | build 64 | -------------------------------------------------------------------------------- /geodis/__init__.py: -------------------------------------------------------------------------------- 1 | #Copyright 2011 Do@. All rights reserved. 2 | # 3 | #Redistribution and use in source and binary forms, with or without modification, are 4 | #permitted provided that the following conditions are met: 5 | # 6 | # 1. Redistributions of source code must retain the above copyright notice, this list of 7 | # conditions and the following disclaimer. 8 | # 9 | # 2. Redistributions in binary form must reproduce the above copyright notice, this list 10 | # of conditions and the following disclaimer in the documentation and/or other materials 11 | # provided with the distribution. 12 | # 13 | #THIS SOFTWARE IS PROVIDED BY Do@ ``AS IS'' AND ANY EXPRESS OR IMPLIED 14 | #WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 15 | #FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL OR 16 | #CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 17 | #CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 18 | #SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 19 | #ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 20 | #NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 21 | #ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 22 | # 23 | #The views and conclusions contained in the software and documentation are those of the 24 | #authors and should not be interpreted as representing official policies, either expressed 25 | #or implied, of Do@. 26 | 27 | -------------------------------------------------------------------------------- /external/ip2location/update.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash -x 2 | MONTH=$(date +%m) 3 | REDIS_HOST="127.0.0.1" 4 | REDIS_DB=8 5 | REDIS_PORT=6379 6 | 7 | args=$(getopt -o l:p:g: -l login:,password:,package:,redis-port:,redis-host:,redis-db: -- "$@" ) 8 | eval set -- "$args" 9 | for i; do 10 | case $i in 11 | -l|--login) LOGIN=$2; shift 2 ;; 12 | -p|--password) PASSWORD=$2; shift 2 ;; 13 | -g|--package) PKG=$2; shift 2 ;; 14 | --redis-port) REDIS_PORT=$2; shift 2 ;; 15 | --redis-host) REDIS_HOST=$2; shift 2 ;; 16 | --redis-db) REDIS_DB=$2; shift 2 ;; 17 | esac 18 | done 19 | 20 | die() { 21 | echo $1 >&2 22 | exit 2 23 | } 24 | 25 | usage() { 26 | echo "Usage: $0 -g|--package PACKAGE -l|--login LOGIN -p|--password PASSWORD [--redis-port PORT] [--redis-host HOST] [--redis-db DB_NUMBER]" 27 | exit 1 28 | } 29 | 30 | depends_on() { 31 | which $1 &>/dev/null || die "$1 is missing!" 32 | } 33 | 34 | verify_zip() { 35 | [ -f "$1" ] || die "Can't find zip file $1" 36 | unzip -l "$1" >/dev/null || die "zip file $1 is corrupted." 37 | } 38 | 39 | [ -n "$LOGIN" ] || usage 40 | [ -n "$PASSWORD" ] || usage 41 | [ -n "$PKG" ] || usage 42 | 43 | [ -z "$TEMP" ] && TEMP=/tmp 44 | TMPDIR=$TEMP/ip2location/$MONTH 45 | 46 | depends_on unzip 47 | depends_on perl 48 | # redis and python are kind of obvious, so no point in checking. 49 | 50 | mkdir -p $TMPDIR 51 | cd $(dirname $0) 52 | ./download.pl -package $PKG -login $LOGIN -password $PASSWORD -output "$TMPDIR/$PKG-$MONTH.zip" || \ 53 | die "Failed to download, quitting" 54 | verify_zip "$TMPDIR/$PKG-$MONTH.zip" 55 | unzip -u -o -d $TMPDIR "$TMPDIR/$PKG-$MONTH.zip" 'IP*.CSV' || die "Failed to download, quitting" 56 | 57 | PKG_FILE="$TMPDIR/IP-COUNTRY-REGION-CITY-LATITUDE-LONGITUDE-ZIPCODE.CSV" 58 | cd ../.. 59 | ./geodis/geodis -i -f $PKG_FILE -n "$REDIS_DB" -H "$REDIS_HOST" -p "$REDIS_PORT" || die "Update failed, your database is empty" 60 | -------------------------------------------------------------------------------- /geodis/provider/importer.py: -------------------------------------------------------------------------------- 1 | #Copyright 2011 Do@. All rights reserved. 2 | # 3 | #Redistribution and use in source and binary forms, with or without modification, are 4 | #permitted provided that the following conditions are met: 5 | # 6 | # 1. Redistributions of source code must retain the above copyright notice, this list of 7 | # conditions and the following disclaimer. 8 | # 9 | # 2. Redistributions in binary form must reproduce the above copyright notice, this list 10 | # of conditions and the following disclaimer in the documentation and/or other materials 11 | # provided with the distribution. 12 | # 13 | #THIS SOFTWARE IS PROVIDED BY Do@ ``AS IS'' AND ANY EXPRESS OR IMPLIED 14 | #WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 15 | #FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL OR 16 | #CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 17 | #CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 18 | #SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 19 | #ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 20 | #NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 21 | #ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 22 | # 23 | #The views and conclusions contained in the software and documentation are those of the 24 | #authors and should not be interpreted as representing official policies, either expressed 25 | #or implied, of Do@. 26 | 27 | import redis 28 | 29 | class Importer(object): 30 | """ 31 | Base class for all importer scripts, inits a redis connection and file name 32 | """ 33 | def __init__(self, fileName, redisHost, redisPort, redisDB, redisPWD=None): 34 | self.fileName = fileName 35 | self.redis = redis.Redis(host=redisHost, port=redisPort, db=redisDB, password=redisPWD) 36 | 37 | 38 | def reset(self, cls): 39 | self.redis.delete(cls.getGeohashIndexKey()) 40 | -------------------------------------------------------------------------------- /external/geonames/update_countries.py: -------------------------------------------------------------------------------- 1 | """ 2 | Generate geodis/countries.py from a TSV dump at geonames.org 3 | 4 | The generated code is part of the repository, if you ever wish to update it, run this again. 5 | """ 6 | 7 | import urllib 8 | import csv 9 | import itertools 10 | import collections 11 | 12 | url = 'http://download.geonames.org/export/dump/countryInfo.txt' 13 | fields = 'ISO', 'ISO3', 'ISOnumeric', 'fips', 'name', 'capital', 'area', 'population', 'continent', 'tld', 'currencyCode', 'currencyName', 'phone', 'postalCodeFormat', 'postalCodeRegex', 'languages', 'id', 'neighbours', 'equivalentFipsCode' 14 | split_to_set = lambda s: set(s.split(',')) 15 | types = { 16 | 'area': float, 17 | 'id': int, 18 | 'population': int, 19 | 'ISOnumeric': int, 20 | 'languages': split_to_set, 21 | 'neighbours': split_to_set 22 | } 23 | 24 | f = urllib.urlopen(url) 25 | Country = collections.namedtuple('Country', fields) 26 | source = itertools.dropwhile(lambda l: l.startswith('#'), f) 27 | reader = csv.DictReader(source, fields, delimiter='\t') 28 | 29 | print 'import collections' 30 | print 'Country = collections.namedtuple(\'Country\', {})'.format(fields) 31 | print 'countries = [' 32 | 33 | for line in reader: 34 | for field in fields: 35 | t = types.get(field, str) 36 | attr = line[field].strip() 37 | line[field] = t(attr) if attr else None 38 | print ' {},'.format(Country(**line)) 39 | 40 | print ']' 41 | 42 | # Generate getters (i.e: getIdByName, get2LetterCodeById) 43 | _attrs = ('Id', 'id'), ('Name', 'name'), ('2LetterCode', 'ISO'), ('3LetterCode', 'ISO3') 44 | for attr in _attrs: 45 | print 46 | print 47 | lookup = 'countriesBy{}'.format(attr[0]) 48 | print '{} = {{c.{}: c for c in countries}}'.format(lookup, attr[1]) 49 | 50 | others = set(_attrs) - {attr} 51 | for other in others: 52 | print 53 | print 54 | print '''def get{other[0]}By{attr[0]}({attr[1]}): 55 | """Get country {other[0]} by {attr[0]}""" 56 | return {lookup}[{attr[1]}].{other[1]}'''.format(other=other, attr=attr, lookup=lookup) 57 | -------------------------------------------------------------------------------- /test/test_countries.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | from geodis.countries import * 3 | 4 | 5 | class CountriesTestCase(TestCase): 6 | def testIds(self): 7 | for country in countries: 8 | if country.ISO3 in ('SCG', 'ANT'): 9 | continue 10 | self.assertTrue(country.id, 'Country {} has invalid id'.format(country)) 11 | self.assertEqual(getNameById(country.id), country.name) 12 | self.assertEqual(get2LetterCodeById(country.id), country.ISO) 13 | self.assertEqual(get3LetterCodeById(country.id), country.ISO3) 14 | ids = [country.id for country in countries] 15 | assert len(set(ids)) == len(ids) - 1 # SCG, ANT have id = None 16 | 17 | def testNames(self): 18 | for country in countries: 19 | self.assertTrue(country.name, 'Country {} has invalid name'.format(country)) 20 | self.assertEqual(getIdByName(country.name), country.id) 21 | self.assertEqual(get2LetterCodeByName(country.name), country.ISO) 22 | self.assertEqual(get3LetterCodeByName(country.name), country.ISO3) 23 | names = [country.name for country in countries] 24 | assert len(set(names)) == len(names) 25 | 26 | def test2LetterCodes(self): 27 | for country in countries: 28 | self.assertTrue(country.ISO, 'Country {} has invalid ISO'.format(country)) 29 | self.assertEqual(getIdBy2LetterCode(country.ISO), country.id) 30 | self.assertEqual(getNameBy2LetterCode(country.ISO), country.name) 31 | self.assertEqual(get3LetterCodeBy2LetterCode(country.ISO), country.ISO3) 32 | ISOs = [country.ISO for country in countries] 33 | assert len(set(ISOs)) == len(ISOs) 34 | 35 | def test3LetterCodes(self): 36 | for country in countries: 37 | self.assertTrue(country.ISO3, 'Country {} has invalid ISO3'.format(country)) 38 | self.assertEqual(getIdBy3LetterCode(country.ISO3), country.id) 39 | self.assertEqual(getNameBy3LetterCode(country.ISO3), country.name) 40 | self.assertEqual(get2LetterCodeBy3LetterCode(country.ISO3), country.ISO) 41 | ISO3s = [country.ISO3 for country in countries] 42 | assert len(set(ISO3s)) == len(ISO3s) 43 | -------------------------------------------------------------------------------- /geodis/zipcode.py: -------------------------------------------------------------------------------- 1 | #Copyright 2011 Do@. All rights reserved. 2 | # 3 | #Redistribution and use in source and binary forms, with or without modification, are 4 | #permitted provided that the following conditions are met: 5 | # 6 | # 1. Redistributions of source code must retain the above copyright notice, this list of 7 | # conditions and the following disclaimer. 8 | # 9 | # 2. Redistributions in binary form must reproduce the above copyright notice, this list 10 | # of conditions and the following disclaimer in the documentation and/or other materials 11 | # provided with the distribution. 12 | # 13 | #THIS SOFTWARE IS PROVIDED BY Do@ ``AS IS'' AND ANY EXPRESS OR IMPLIED 14 | #WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 15 | #FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL OR 16 | #CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 17 | #CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 18 | #SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 19 | #ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 20 | #NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 21 | #ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 22 | # 23 | #The views and conclusions contained in the software and documentation are those of the 24 | #authors and should not be interpreted as representing official policies, either expressed 25 | #or implied, of Do@. 26 | 27 | from . import countries 28 | from .location import Location 29 | from .index import GeoboxIndex 30 | 31 | class ZIPCode(Location): 32 | 33 | __spec__ = Location.__spec__ + ['continent', 'country', 'state', 'city', 'continentId', 'countryId', 'stateId', 'cityId'] 34 | __keyspec__ = ['name'] 35 | _keys = { 36 | 'geobox': GeoboxIndex('Zip', [GeoboxIndex.RES_1KM, GeoboxIndex.RES_4KM, GeoboxIndex.RES_128KM]) } 37 | def __init__(self, **kwargs): 38 | 39 | super(ZIPCode, self).__init__(**kwargs) 40 | 41 | self.continent = kwargs.get('continent', '').strip() 42 | try: 43 | self.country = countries.getNameBy2LetterCode(kwargs.get('country', '')) 44 | except KeyError: 45 | self.country = kwargs.get('country', '').strip() 46 | self.state = kwargs.get('state', '').strip() 47 | self.city = kwargs.get('city', '').strip() 48 | 49 | self.continentId = kwargs.get('continentId', 0) 50 | self.countryId = kwargs.get('countryId', 0) 51 | self.stateId = kwargs.get('stateId', 0) 52 | self.cityId = kwargs.get('cityId', 0) 53 | 54 | 55 | -------------------------------------------------------------------------------- /geodis/provider/ip2location.py: -------------------------------------------------------------------------------- 1 | #Copyright 2011 Do@. All rights reserved. 2 | # 3 | #Redistribution and use in source and binary forms, with or without modification, are 4 | #permitted provided that the following conditions are met: 5 | # 6 | # 1. Redistributions of source code must retain the above copyright notice, this list of 7 | # conditions and the following disclaimer. 8 | # 9 | # 2. Redistributions in binary form must reproduce the above copyright notice, this list 10 | # of conditions and the following disclaimer in the documentation and/or other materials 11 | # provided with the distribution. 12 | # 13 | #THIS SOFTWARE IS PROVIDED BY Do@ ``AS IS'' AND ANY EXPRESS OR IMPLIED 14 | #WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 15 | #FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL OR 16 | #CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 17 | #CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 18 | #SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 19 | #ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 20 | #NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 21 | #ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 22 | # 23 | #The views and conclusions contained in the software and documentation are those of the 24 | #authors and should not be interpreted as representing official policies, either expressed 25 | #or implied, of Do@. 26 | 27 | #Importer for locations from ip2location.com databases 28 | 29 | 30 | import csv 31 | import logging 32 | import redis 33 | 34 | from .importer import Importer 35 | from ..iprange import IPRange 36 | 37 | class IP2LocationImporter(Importer): 38 | 39 | def runImport(self, reset = False): 40 | """ 41 | File Format: 42 | "67134976","67135231","US","UNITED STATES","CALIFORNIA","LOS ANGELES","34.045200","-118.284000","90001" 43 | 44 | """ 45 | 46 | try: 47 | fp = open(self.fileName) 48 | except Exception, e: 49 | logging.error("could not open file %s for reading: %s" ,self.fileName, e) 50 | return False 51 | 52 | if reset: 53 | print "Deleting old ip data..." 54 | self.redis.delete(IPRange._indexKey) 55 | 56 | print "Starting import..." 57 | 58 | reader = csv.reader(fp, delimiter=',', quotechar='"') 59 | pipe = self.redis.pipeline() 60 | 61 | i = 0 62 | for row in reader: 63 | 64 | try: 65 | #parse the row 66 | countryCode = row[3] 67 | rangeMin = int(row[0]) 68 | rangeMax = int(row[1]) 69 | lat = float(row[6]) 70 | lon = float(row[7]) 71 | 72 | #take the zipcode if possible 73 | try: 74 | zipcode = row[8] 75 | except: 76 | zipcode = '' 77 | 78 | 79 | #junk record 80 | if countryCode == '-' and (not lat and not lon): 81 | continue 82 | 83 | range = IPRange(rangeMin, rangeMax, lat, lon, zipcode) 84 | range.save(pipe) 85 | 86 | except Exception, e: 87 | logging.error("Could not save record: %s" , e) 88 | 89 | i += 1 90 | if i % 10000 == 0: 91 | logging.info("Dumping pipe. did %d ranges" , i) 92 | pipe.execute() 93 | 94 | pipe.execute() 95 | logging.info("Imported %d locations" , i) 96 | 97 | return i 98 | 99 | -------------------------------------------------------------------------------- /geodis/provider/geonames.py: -------------------------------------------------------------------------------- 1 | #Copyright 2011 Do@. All rights reserved. 2 | # 3 | #Redistribution and use in source and binary forms, with or without modification, are 4 | #permitted provided that the following conditions are met: 5 | # 6 | # 1. Redistributions of source code must retain the above copyright notice, this list of 7 | # conditions and the following disclaimer. 8 | # 9 | # 2. Redistributions in binary form must reproduce the above copyright notice, this list 10 | # of conditions and the following disclaimer in the documentation and/or other materials 11 | # provided with the distribution. 12 | # 13 | #THIS SOFTWARE IS PROVIDED BY Do@ ``AS IS'' AND ANY EXPRESS OR IMPLIED 14 | #WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 15 | #FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL OR 16 | #CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 17 | #CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 18 | #SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 19 | #ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 20 | #NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 21 | #ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 22 | # 23 | #The views and conclusions contained in the software and documentation are those of the 24 | #authors and should not be interpreted as representing official policies, either expressed 25 | #or implied, of Do@. 26 | 27 | #Importer for locations from geonames 28 | 29 | import csv, sys 30 | csv.field_size_limit(sys.maxsize) 31 | import logging 32 | import redis 33 | import re 34 | import json 35 | 36 | from ..city import City 37 | from .importer import Importer 38 | 39 | class GeonamesImporter(Importer): 40 | 41 | def runImport(self): 42 | """ 43 | File Format: 44 | continentId continentName countryId countryName stateId stateName cityId cityName lat lon 45 | """ 46 | 47 | 48 | try: 49 | fp = open(self.fileName) 50 | except Exception, e: 51 | logging.error("could not open file %s for reading: %s" ,self.fileName, e) 52 | return False 53 | 54 | self.reset(City) 55 | 56 | pipe = self.redis.pipeline() 57 | #reader = csv.reader(fp, delimiter='\t', quotechar = '"') 58 | 59 | i = 0 60 | fails = 0 61 | for line in fp: 62 | 63 | try: 64 | row = [x.encode('utf-8') for x in json.loads(line)] 65 | loc = City ( 66 | continentId = row[0], 67 | continent = row[1], 68 | countryId = row[2], 69 | country = row[3], 70 | stateId = row[4], 71 | state = row[5], 72 | cityId = row[6], 73 | name = row[7], 74 | lat = float(row[8]), 75 | lon = float(row[9]), 76 | aliases = row[10], 77 | population = int(row[11]) 78 | ) 79 | 80 | loc.save(pipe) 81 | 82 | except Exception, e: 83 | logging.exception("Could not import line %s: %s" ,line, e) 84 | fails+=1 85 | return 86 | 87 | 88 | i += 1 89 | if i % 1000 == 0: 90 | pipe.execute() 91 | pipe.execute() 92 | 93 | logging.info("Imported %d cities, failed %d times" , i, fails) 94 | print "Finished!" 95 | return True 96 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Build Status](https://travis-ci.org/EverythingMe/geodis.svg?branch=master)](https://travis-ci.org/EverythingMe/geodis) 2 | 3 | # Geodis - a Redis based geo resolving library 4 | 5 | 6 | * * * * * * 7 | 8 | Geodis is a simple and fast python module that allows you to convert IP addresses and latitude/longitude 9 | coordinates into geographical locations such as cities, zipcodes and countries. 10 | 11 | It currently supports cities worldwide, and zipcode areas in the US (of course each of these includes higher level data such as country). 12 | But it is written in an extensible way, so that adding a new layer of objects and indexing them is very simple. 13 | 14 | Geodis is fast, since it uses redis, which is a very fast in memory database, and geohashing to index coordinates. 15 | 16 | A single thread, single process python program can resolve about 2000 ips and 3000 lat/lon pairs per second on 17 | a regular desktop machine, when the database is fully loaded with IP ranges, zipcodes and all major cities in the world. 18 | 19 | # Geodis - Getting started 20 | 21 | Before you jump into python shell to test geodis , you must first ensure that redis server is running and on which port. 22 | If you do not have a clue how to get started with redis-server [here is a link](https://redis.io/topics/quickstart) to get started, download install. 23 | 24 | once redis-server is up and running you can test in python shell as shown below: 25 | 26 | >>> import redis 27 | >>> rs = redis.Redis("localhost") 28 | >>> rs 29 | Redis>> 30 | 31 | The above line showing host and port ensures that redis server is running and accepting incoming connections at port `6379`. 32 | 33 | 34 | USAGE 35 | ------------------------ 36 | >>> import redis 37 | >>> import geodis.city 38 | >>> conn = redis.Redis() 39 | 40 | #getting a city by lat,lon 41 | >>> print geodis.city.City.getByLatLon(31.78,35.21, conn) 42 | Location: {'name': 'West Jerusalem', 'country': 'Israel', 'lon': '35.21961', 'zipcode': '', 'state': 'Jerusalem District', 'lat': '31.78199'} 43 | 44 | #getting a location by ip 45 | >>> print geodis.iprange.IPRange.getCity('62.219.0.221', '62.219.0.221', 31.78, 35.21, conn) 46 | Location: {'name': 'West Jerusalem', 'country': 'Israel', 'lon': '35.21961', 'zipcode': '', 'state': 'Jerusalem District', 'lat': '31.78199'} 47 | 48 | 49 | Geodis can also be used as a command line utility 50 | ------------------------ 51 | $ geodis -P 188.127.241.156 -p 6379 52 | Location: {'name': 'Crosby', 'country': 'United Kingdom', 'lon': '-3.03333', 'zipcode': '', 'state': 'England', 'key': 'loc:crosby:united kingdom:england:', 'lat': '53.47778'} 53 | 54 | $ geodis -L 40.90732,-74.07514 -p 6379 55 | Location: {'name': 'Rochelle Park', 'country': 'United States', 'lon': '-74.07514', 'zipcode': '', 'state': 'New Jersey', 'key': 'loc:rochelle park:united states:new jersey:', 'lat': '40.90732'} 56 | 57 | IMPORTING DATA 58 | ------------------------ 59 | Geodis needs to import its data into redis. In the data folder you will find a list of all cities in the world, and a zipcode database. 60 | 61 | The data files should be where the geodis files are installed if you've installed from pip (e.g `/usr/local/lib/python2.7/site-packages/geodis/data/cities1000.json`), or in the source tree if you've cloned this repo. 62 | 63 | data is imported using a utility called geodis.py. run ./geodis.py --help for more details on importing it. 64 | 65 | Examples: 66 | 67 | * Cities are imported by running 68 | 69 | geodis -g -f /cities1000.json -p 6379 70 | 71 | * Zipcodes are imported by running 72 | 73 | geodis -z -f /zipcode.csv -p 6379 74 | 75 | 76 | ** *IMPORTANT*: IP to location data is not provided, you need to buy an ip resolving database that can resolve ip ranges to lat,lon pairs ** 77 | 78 | Refreshing countries mapping: 79 | ------------------------ 80 | The data is already generated but if you ever need to update, use: 81 | 82 | python external/geonames/update.py > geodis/countries.py 83 | 84 | 85 | INSTALLING: 86 | ------------------------ 87 | 88 | * `pip install geodis` 89 | 90 | 91 | RUNNING: 92 | ------------------------ 93 | 1. Install geodis 94 | 95 | 2. Install redis 96 | 97 | 3. Import data as described above. 98 | 99 | -------------------------------------------------------------------------------- /geodis/provider/factual.py: -------------------------------------------------------------------------------- 1 | #Copyright 2011 Do@. All rights reserved. 2 | # 3 | #Redistribution and use in source and binary forms, with or without modification, are 4 | #permitted provided that the following conditions are met: 5 | # 6 | # 1. Redistributions of source code must retain the above copyright notice, this list of 7 | # conditions and the following disclaimer. 8 | # 9 | # 2. Redistributions in binary form must reproduce the above copyright notice, this list 10 | # of conditions and the following disclaimer in the documentation and/or other materials 11 | # provided with the distribution. 12 | # 13 | #THIS SOFTWARE IS PROVIDED BY Do@ ``AS IS'' AND ANY EXPRESS OR IMPLIED 14 | #WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 15 | #FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL OR 16 | #CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 17 | #CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 18 | #SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 19 | #ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 20 | #NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 21 | #ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 22 | # 23 | #The views and conclusions contained in the software and documentation are those of the 24 | #authors and should not be interpreted as representing official policies, either expressed 25 | #or implied, of Do@. 26 | 27 | #Importer for locations from geonames 28 | import csv 29 | import logging 30 | import redis 31 | import re 32 | 33 | from ..business import Business 34 | from .importer import Importer 35 | 36 | class BusinessImporter(Importer): 37 | 38 | def runImport(self): 39 | """ 40 | File Format: 41 | 47f2642f-a337-42e4-9a88-edc2f0709d89 Dominos Pizza North Park 1925 El Cajon Blvd San Diego CA 92104 (619) 294-4570 Food & Beverage > Restaurants > Pizza 32.755249 42 | -117.144707 43 | 44 | """ 45 | 46 | 47 | try: 48 | fp = open(self.fileName) 49 | except Exception, e: 50 | logging.error("could not open file %s for reading: %s" ,self.fileName, e) 51 | return False 52 | 53 | self.reset(Business) 54 | 55 | pipe = self.redis.pipeline() 56 | reader = csv.reader(fp, delimiter='\t', quotechar = '"') 57 | 58 | 59 | 60 | i = 0 61 | fails = 0 62 | for row in reader: 63 | try: 64 | cat = row[8] 65 | if not re.match('^(%s)\s' % "|".join([re.escape(x) for x in ( 66 | 'Arts, Entertainment', 67 | 'Shopping', 68 | 'Food & Beverage' 69 | )]), 70 | cat): 71 | 72 | continue 73 | 74 | if row[10] and row[11]: 75 | loc = Business ( 76 | name = row[1], 77 | address = row[2], 78 | continent = 'North America', 79 | country = 'United States', 80 | city = row[3], 81 | state = row[4], 82 | zip = row[5], 83 | category = row[9], 84 | lat = row[10], 85 | lon = row[11], 86 | type = row[8] 87 | 88 | ) 89 | #print loc 90 | loc.save(pipe) 91 | 92 | except Exception, e: 93 | logging.exception("Could not import line %s: %s" ,row, e) 94 | fails+=1 95 | 96 | 97 | i += 1 98 | if i % 1000 == 0: 99 | print i 100 | pipe.execute() 101 | pipe.execute() 102 | 103 | logging.info("Imported %d businesses, failed %d times" , i, fails) 104 | print "Finished!" 105 | return True -------------------------------------------------------------------------------- /geodis/business.py: -------------------------------------------------------------------------------- 1 | #Copyright 2011 Do@. All rights reserved. 2 | # 3 | #Redistribution and use in source and binary forms, with or without modification, are 4 | #permitted provided that the following conditions are met: 5 | # 6 | # 1. Redistributions of source code must retain the above copyright notice, this list of 7 | # conditions and the following disclaimer. 8 | # 9 | # 2. Redistributions in binary form must reproduce the above copyright notice, this list 10 | # of conditions and the following disclaimer in the documentation and/or other materials 11 | # provided with the distribution. 12 | # 13 | #THIS SOFTWARE IS PROVIDED BY Do@ ``AS IS'' AND ANY EXPRESS OR IMPLIED 14 | #WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 15 | #FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL OR 16 | #CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 17 | #CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 18 | #SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 19 | #ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 20 | #NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 21 | #ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 22 | # 23 | #The views and conclusions contained in the software and documentation are those of the 24 | #authors and should not be interpreted as representing official policies, either expressed 25 | #or implied, of Do@. 26 | 27 | import re 28 | import math 29 | import itertools 30 | 31 | from geohasher import hasher 32 | 33 | from . import countries 34 | from .location import Location 35 | from .index import GeoBoxTextIndex, GeoboxIndex, TimeSampler 36 | from .us_states import State 37 | 38 | class Business(Location): 39 | """ 40 | Wrapper for a local business object 41 | """ 42 | 43 | #what we want to save for a city 44 | __spec__ = Location.__spec__ + ['continent', 'country', 'city', 'state', 'street', 'address', 'type'] 45 | __keyspec__ = Location.__spec__ + ['street', 'city', 'state' ] 46 | 47 | _keys = {'name_radius': GeoBoxTextIndex('Business', [GeoboxIndex.RES_1KM , GeoboxIndex.RES_4KM, GeoboxIndex.RES_16KM], 48 | ('name', 'street'), ' ')} 49 | def __init__(self, **kwargs): 50 | 51 | super(Business, self).__init__(**kwargs) 52 | 53 | self.continent = kwargs.get('continent', '').strip() 54 | try: 55 | self.country = countries.getNameBy2LetterCode(kwargs.get('country', None)) 56 | except KeyError: 57 | self.country = kwargs.get('country', '').strip() 58 | 59 | self.state = kwargs.get('state', '').strip() 60 | self.city = kwargs.get('city', '').strip() 61 | 62 | self.address = kwargs.get('address', '').strip() 63 | 64 | self.street = re.sub('\\b(St|Rd|Blvd|Street|Drive|Dr)$', '', re.sub('^[0-9]+ ', '', self.address)).strip() 65 | self.zip = kwargs.get('zip', '').strip() 66 | 67 | _type = kwargs.get('type', '').strip() 68 | if _type: 69 | 70 | self.type = _type.split('>')[-1].strip() 71 | else: 72 | self.type = '' 73 | 74 | def score(self, refLat, refLon, factor): 75 | 76 | ret = getattr(self, '_score', None) 77 | if not ret: 78 | 79 | if refLat and refLon: 80 | d = Location.getLatLonDistance((self.lat, self.lon), (refLat, refLon)) 81 | 82 | 83 | ret = factor/d 84 | 85 | self._score = ret 86 | return ret 87 | 88 | 89 | 90 | @classmethod 91 | def getByRadius(cls, lat, lon, radius, redisConn, text = None): 92 | 93 | 94 | return cls.loadByNamedKey('name_radius', redisConn, lat, lon, radius, text) 95 | 96 | 97 | 98 | if __name__ == '__main__': 99 | 100 | import redis, time 101 | 102 | 103 | r = redis.Redis(db = 8, host = 'localhost', port = 6375) 104 | 105 | 106 | lat = 32.702374 107 | lon = -117.137677 108 | 109 | #lat,lon = 32.0667,34.7667 110 | d = 2 111 | st = time.time() 112 | nodes = Business.getByRadius(lat, lon, 10, r, 'mcdonalds') 113 | #nodes.sort(lambda x,y: cmp(y.score(lat, lon), x.score(lat, lon))) 114 | et = time.time() 115 | print len(nodes) 116 | for n in nodes: 117 | print n.name, ',', n.address, Location.getLatLonDistance((lat, lon), (n.lat, n.lon)), "km" 118 | print 1000*(et-st) 119 | # for city in cities: 120 | # print city.name, city.country, Location.getLatLonDistance((lat, lon), (city.lat, city.lon)) 121 | #import redis 122 | 123 | #c.save(r) 124 | #c = City(lat = 40.1143, lon= -74.106, country = "United States", state= "New York", name = "New York") 125 | #c.save(r) -------------------------------------------------------------------------------- /geodis/iprange.py: -------------------------------------------------------------------------------- 1 | #Copyright 2011 Do@. All rights reserved. 2 | # 3 | #Redistribution and use in source and binary forms, with or without modification, are 4 | #permitted provided that the following conditions are met: 5 | # 6 | # 1. Redistributions of source code must retain the above copyright notice, this list of 7 | # conditions and the following disclaimer. 8 | # 9 | # 2. Redistributions in binary form must reproduce the above copyright notice, this list 10 | # of conditions and the following disclaimer in the documentation and/or other materials 11 | # provided with the distribution. 12 | # 13 | #THIS SOFTWARE IS PROVIDED BY Do@ ``AS IS'' AND ANY EXPRESS OR IMPLIED 14 | #WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 15 | #FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL OR 16 | #CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 17 | #CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 18 | #SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 19 | #ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 20 | #NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 21 | #ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 22 | # 23 | #The views and conclusions contained in the software and documentation are those of the 24 | #authors and should not be interpreted as representing official policies, either expressed 25 | #or implied, of Do@. 26 | 27 | 28 | import socket, struct, re 29 | from geohasher import hasher 30 | import struct 31 | 32 | from .city import City 33 | from .zipcode import ZIPCode 34 | 35 | 36 | class IPRange(object): 37 | 38 | _indexKey = 'iprange:locations' 39 | def __init__(self, rangeMin, rangeMax, lat, lon, zipcode = ''): 40 | 41 | self.rangeMin = rangeMin 42 | self.rangeMax = rangeMax 43 | self.lat = lat 44 | self.lon = lon 45 | self.zipcode = zipcode 46 | 47 | #encode a numeric geohash key 48 | self.geoKey = hasher.encode(lat, lon) 49 | 50 | self.key = '%s:%s:%s' % (self.rangeMin, self.rangeMax, self.zipcode) 51 | 52 | def save(self, redisConn): 53 | """ 54 | Save an IP range to redis 55 | @param redisConn a redis connectino or pipeline 56 | """ 57 | 58 | redisConn.zadd(self._indexKey, '%s@%s' % (self.geoKey, self.key) , self.rangeMax) 59 | 60 | 61 | def __str__(self): 62 | """ 63 | textual representation 64 | """ 65 | return "IPRange: %s" % self.__dict__ 66 | 67 | @staticmethod 68 | def get(ip, redisConn): 69 | """ 70 | Get a range and all its data by ip 71 | """ 72 | 73 | ipnum = IPRange.ip2long(ip) 74 | 75 | #get the location record from redis 76 | record = redisConn.zrangebyscore(IPRange._indexKey, ipnum ,'+inf', 0, 1, True) 77 | if not record: 78 | #not found? k! 79 | return None 80 | 81 | #extract location id 82 | try: 83 | geoKey,rng = record[0][0].split('@') 84 | 85 | lat,lon = hasher.decode(long(geoKey)) 86 | 87 | rngMin, rngMax, zipcode = rng.split(':') 88 | rngMin = int(rngMin) 89 | rngMax = int(rngMax) 90 | except IndexError: 91 | return None 92 | 93 | #address not in any range 94 | if not rngMin <= ipnum <= rngMax: 95 | return None 96 | 97 | return IPRange(rngMin, rngMax, lat, lon, zipcode) 98 | 99 | @staticmethod 100 | def getZIP(ip, redisConn): 101 | """ 102 | Get a zipcode location object based on an IP 103 | will return None if you are outside the US 104 | """ 105 | 106 | range = IPRange.get(ip, redisConn) 107 | 108 | 109 | if not range or not re.match('^[0-9]{5}$', range.zipcode): 110 | return None 111 | 112 | return ZIPCode.getByKey(redisConn, name = range.zipcode) 113 | 114 | 115 | 116 | 117 | 118 | 119 | @staticmethod 120 | def getCity(ip, redisConn): 121 | """ 122 | Get location object by resolving an IP address 123 | @param ip IPv4 address string (e.g. 127.0.0.1) 124 | @oaram redisConn redis connection to the database 125 | @return a Location object if we can resolve this ip, else None 126 | """ 127 | 128 | range = IPRange.get(ip, redisConn) 129 | if not range: 130 | return None 131 | 132 | 133 | 134 | #load a location by the 135 | return City.getByGeohash(hasher.encode(range.lat, range.lon), redisConn) 136 | 137 | 138 | @staticmethod 139 | def ip2long(ip): 140 | """ 141 | Convert an IP string to long 142 | """ 143 | ip_packed = socket.inet_aton(ip) 144 | return struct.unpack("!L", ip_packed)[0] 145 | -------------------------------------------------------------------------------- /geodis/provider/zipcodes.py: -------------------------------------------------------------------------------- 1 | #Copyright 2011 Do@. All rights reserved. 2 | # 3 | #Redistribution and use in source and binary forms, with or without modification, are 4 | #permitted provided that the following conditions are met: 5 | # 6 | # 1. Redistributions of source code must retain the above copyright notice, this list of 7 | # conditions and the following disclaimer. 8 | # 9 | # 2. Redistributions in binary form must reproduce the above copyright notice, this list 10 | # of conditions and the following disclaimer in the documentation and/or other materials 11 | # provided with the distribution. 12 | # 13 | #THIS SOFTWARE IS PROVIDED BY Do@ ``AS IS'' AND ANY EXPRESS OR IMPLIED 14 | #WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 15 | #FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL OR 16 | #CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 17 | #CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 18 | #SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 19 | #ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 20 | #NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 21 | #ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 22 | # 23 | #The views and conclusions contained in the software and documentation are those of the 24 | #authors and should not be interpreted as representing official policies, either expressed 25 | #or implied, of Do@. 26 | 27 | #Importer for zipcodes.csv file found in /data 28 | 29 | 30 | import csv 31 | import logging 32 | 33 | from .. import us_states 34 | from ..zipcode import ZIPCode 35 | from ..city import City 36 | 37 | from importer import Importer 38 | 39 | class ZIPImporter(Importer): 40 | 41 | def runImport(self): 42 | """ 43 | File Format: 44 | "00210","Portsmouth","NH","43.005895","-71.013202","-5","1" 45 | """ 46 | 47 | try: 48 | fp = open(self.fileName) 49 | except Exception, e: 50 | logging.error("could not open file %s for reading: %s" ,self.fileName, e) 51 | return False 52 | 53 | self.reset(ZIPCode) 54 | 55 | reader = csv.reader(fp, delimiter=',', quotechar = '"') 56 | 57 | features = {} 58 | countryId = None 59 | continentId = None 60 | for key in self.redis.sort(City.getGeohashIndexKey(), by='nosort'): 61 | try: 62 | city = self.redis.hgetall(key)#dict(zip(City.__spec__, key.split(':')[1:])) 63 | except Exception, e: 64 | logging.error(e) 65 | continue 66 | 67 | if city.get('country')=='United States': 68 | 69 | if not continentId: 70 | continentId = city['continentId'] 71 | 72 | if not countryId: 73 | countryId = city['countryId'] 74 | 75 | 76 | state = features.get(city['state'].lower(), None) 77 | if not state: 78 | state = {'id': city['stateId'], 'cities':{}} 79 | features[city['state'].lower()] = state 80 | state['cities'][city['name']] = city['cityId'] 81 | 82 | 83 | print features.keys() 84 | pipe = self.redis.pipeline() 85 | i = 0 86 | fails = 0 87 | for row in reader: 88 | if len(row)==0: 89 | continue 90 | try: 91 | name = row[0] 92 | city = row[1] 93 | stateCode = row[2] 94 | lat = float(row[3]) 95 | lon = float(row[4]) 96 | 97 | state = stateCode#code_to_state.get(stateCode, '').title() 98 | country = 'US' 99 | continent = 'North America' 100 | 101 | 102 | state = us_states.State.get(stateCode) 103 | stateName = '' 104 | 105 | if state: 106 | stateName = state.name 107 | 108 | stateId = features[stateName.lower()]['id'] 109 | cityId = features[stateName.lower()]['cities'].get(city) 110 | 111 | 112 | loc = ZIPCode(name = name, 113 | city = city, 114 | cityId = cityId, 115 | state = stateCode, 116 | stateId = stateId, 117 | country = country, 118 | countryId = countryId, 119 | continent = continent, 120 | continentId = continentId, 121 | lat = lat, 122 | lon = lon) 123 | 124 | loc.save(pipe) 125 | 126 | 127 | 128 | except Exception, e: 129 | logging.exception("Could not import line #%d: %s, %s: %s" % (i+1, city, state, e)) 130 | fails += 1 131 | # 132 | i += 1 133 | if i % 1000 == 0: 134 | pipe.execute() 135 | 136 | pipe.execute() 137 | 138 | logging.info("Imported %d locations, failed %d times" , i, fails) 139 | 140 | return True 141 | -------------------------------------------------------------------------------- /test/testGeodis.py: -------------------------------------------------------------------------------- 1 | import sys,os; sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)) + '/../src') 2 | import unittest 3 | import redis 4 | import os 5 | from geodis.provider.geonames import GeonamesImporter 6 | from geodis.provider.ip2location import IP2LocationImporter 7 | from geodis.provider.zipcodes import ZIPImporter 8 | 9 | 10 | from geodis.city import City 11 | from geodis.iprange import IPRange 12 | from geodis.zipcode import ZIPCode 13 | from geodis import countries 14 | 15 | 16 | atdir = lambda *f: os.path.join(os.path.abspath(os.path.dirname(__file__)), *f) 17 | 18 | 19 | class TestGeodis(unittest.TestCase): 20 | def setUp(self): 21 | self.redisHost = os.getenv('TEST_REDIS_HOST', 'localhost') 22 | self.redisPort = int(os.getenv('TEST_REDIS_PORT', '6375')) 23 | self.redisDB = int(os.getenv('TEST_REDIS_DBNUMBER', '8')) 24 | self.redis = redis.Redis(self.redisHost, self.redisPort, self.redisDB) 25 | 26 | def test1_ImportGeonames(self): 27 | 28 | importer = GeonamesImporter(atdir('data/cities.json'), self.redisHost, self.redisPort, self.redisDB) 29 | self.assertGreater(importer.runImport(), 0, 'Could not import cities json') 30 | 31 | def test2_ImportIP2Location(self): 32 | 33 | importer = IP2LocationImporter(atdir('data/ip2location.csv'), self.redisHost, self.redisPort, self.redisDB) 34 | self.assertGreater(importer.runImport(), 0, 'Could not import ip ranges csv') 35 | 36 | def test3_ImportZIP(self): 37 | importer = ZIPImporter(atdir('data/zipcodes.csv'), self.redisHost, self.redisPort, self.redisDB) 38 | self.assertGreater(importer.runImport(), 0, 'Could not import zipcodes csv') 39 | 40 | def test4_resolve_by_coords(self): 41 | loc = City.getByLatLon(34.05223, -118.24368, self.redis) 42 | 43 | self.assertIsNotNone(loc) 44 | 45 | self.assertEqual(loc.country, 'United States') 46 | self.assertIn(loc.state, ('CA', 'California')) 47 | 48 | def test5_resolve_by_textual_search(self): 49 | locs = City.getByName('san francisco', self.redis, 44.0462, -123.022, 'united states') 50 | 51 | self.assertGreater(len(locs), 0) 52 | self.assertEqual(locs[0].country, 'United States') 53 | self.assertEqual(locs[0].name, 'San Francisco') 54 | self.assertEqual(locs[0].state, 'California') 55 | 56 | locs = City.getByName('san francisco', self.redis) 57 | 58 | self.assertGreater(len(locs), 0) 59 | self.assertEqual(locs[0].country, 'United States') 60 | self.assertEqual(locs[0].name, 'San Francisco') 61 | self.assertEqual(locs[0].state, 'California') 62 | 63 | def test_city(self): 64 | 65 | self.assertTrue(City.exist(['san francisco'], self.redis)) 66 | self.assertFalse(City.exist(['ban hranbisco'], self.redis)) 67 | 68 | locs = City.getByName('san francisco', self.redis) 69 | self.assertEquals(1, len(locs)) 70 | city = locs[0] 71 | 72 | score1 = city.score(37.7833, -122.4167) 73 | self.assertGreater(score1, 0) 74 | 75 | # we need a second instance of the city to get a different score because scores are cached 76 | city2 = City.getByName('san francisco', self.redis)[0] 77 | score2 = city2.score(38.7833, -102.4167) 78 | self.assertGreater(score2, 0) 79 | self.assertGreater(score1, score2) 80 | 81 | 82 | #test converting to country only info 83 | city.toCountry() 84 | for k in City.__cityspec__: 85 | self.assertEquals(getattr(city,k), None) 86 | 87 | 88 | #test get by radius 89 | locs = City.getByRadius(37.7833, -122.4167, 4, self.redis, None, 5) 90 | self.assertEqual(5, len(locs)) 91 | self.assertEqual(locs[0].name, 'San Francisco') 92 | 93 | 94 | 95 | 96 | def test6_resolve_by_ip(self): 97 | loc = IPRange.getCity('4.3.68.1', self.redis) 98 | 99 | self.assertIsNotNone(loc) 100 | self.assertEqual(loc.country, 'United States') 101 | self.assertIn(loc.state, ('CA', 'California')) 102 | 103 | def test7_resolve_zip_by_lat_lon(self): 104 | loc = ZIPCode.getByLatLon(34.0452, -118.284, self.redis) 105 | 106 | self.assertIsNotNone(loc) 107 | self.assertEqual(loc.name, '90006') 108 | self.assertEqual(loc.country, 'United States') 109 | self.assertIn(loc.state, ('CA', 'California')) 110 | 111 | def test8_resolve_zip_by_ip(self): 112 | loc = IPRange.getZIP('4.3.68.1', self.redis) 113 | 114 | self.assertIsNotNone(loc) 115 | self.assertEqual(loc.name, '90001') 116 | self.assertEqual(loc.country, 'United States') 117 | self.assertIn(loc.state, ('CA', 'California')) 118 | 119 | def testCountryUtils(self): 120 | testData = [('Israel', 'IL', 294640), ( "Sweden", 'SE', '2661886'), ('United States', 'US', 6252001)] 121 | 122 | for name, code, cid in testData: 123 | 124 | self.assertEqual(countries.get2LetterCodeByName(name), code) 125 | self.assertEqual(countries.getNameBy2LetterCode(code), name) 126 | self.assertEqual(countries.getIdByName(name), int(cid)) 127 | self.assertEqual(countries.getIdBy2LetterCode(code), int(cid)) 128 | 129 | self.assertEqual(countries.getNameById(cid), name) 130 | self.assertEqual(countries.get2LetterCodeById(cid), code) 131 | 132 | def testNameCodeConversionReversable(self): 133 | countryNames = [c.name for c in countries.countries] 134 | countryCodes = [c.ISO for c in countries.countries] 135 | 136 | transformed = [countries.getNameBy2LetterCode(countries.get2LetterCodeByName(c)) for c in countryNames] 137 | self.assertEqual(countryNames, transformed) 138 | 139 | transformed = [countries.get2LetterCodeByName(countries.getNameBy2LetterCode(c)) for c in countryCodes] 140 | self.assertEqual(countryCodes, transformed) 141 | 142 | 143 | if __name__ == '__main__': 144 | import sys 145 | sys.argv = ['', 'TestGeodis'] 146 | 147 | unittest.main() 148 | -------------------------------------------------------------------------------- /geodis/us_states.py: -------------------------------------------------------------------------------- 1 | #US State codes taken from http://www.cmmichael.com/blog/2006/12/29/state-code-mappings-for-python 2 | 3 | from .location import Location 4 | 5 | import math 6 | 7 | state_to_code = {'VERMONT': 'VT', 'GEORGIA': 'GA', 'IOWA': 'IA', 'Armed Forces Pacific': 'AP', 'GUAM': 'GU', 'KANSAS': 'KS', 'FLORIDA': 'FL', 'AMERICAN SAMOA': 'AS', 'NORTH CAROLINA': 'NC', 'HAWAII': 'HI', 'NEW YORK': 'NY', 'CALIFORNIA': 'CA', 'ALABAMA': 'AL', 'IDAHO': 'ID', 'FEDERATED STATES OF MICRONESIA': 'FM', 'Armed Forces Americas': 'AA', 'DELAWARE': 'DE', 'ALASKA': 'AK', 'ILLINOIS': 'IL', 'Armed Forces Africa': 'AE', 'SOUTH DAKOTA': 'SD', 'CONNECTICUT': 'CT', 'MONTANA': 'MT', 'MASSACHUSETTS': 'MA', 'PUERTO RICO': 'PR', 'Armed Forces Canada': 'AE', 'NEW HAMPSHIRE': 'NH', 'MARYLAND': 'MD', 'NEW MEXICO': 'NM', 'MISSISSIPPI': 'MS', 'TENNESSEE': 'TN', 'PALAU': 'PW', 'COLORADO': 'CO', 'Armed Forces Middle East': 'AE', 'NEW JERSEY': 'NJ', 'UTAH': 'UT', 'MICHIGAN': 'MI', 'WEST VIRGINIA': 'WV', 'WASHINGTON': 'WA', 'MINNESOTA': 'MN', 'OREGON': 'OR', 'VIRGINIA': 'VA', 'VIRGIN ISLANDS': 'VI', 'MARSHALL ISLANDS': 'MH', 'WYOMING': 'WY', 'OHIO': 'OH', 'SOUTH CAROLINA': 'SC', 'INDIANA': 'IN', 'NEVADA': 'NV', 'LOUISIANA': 'LA', 'NORTHERN MARIANA ISLANDS': 'MP', 'NEBRASKA': 'NE', 'ARIZONA': 'AZ', 'WISCONSIN': 'WI', 'NORTH DAKOTA': 'ND', 'Armed Forces Europe': 'AE', 'PENNSYLVANIA': 'PA', 'OKLAHOMA': 'OK', 'KENTUCKY': 'KY', 'RHODE ISLAND': 'RI', 'WASHINGTON, DC': 'DC', 'ARKANSAS': 'AR', 'MISSOURI': 'MO', 'TEXAS': 'TX', 'MAINE': 'ME'} 8 | 9 | code_to_state = {'WA': 'WASHINGTON', 'VA': 'VIRGINIA', 'DE': 'DELAWARE', 'DC': 'WASHINGTON, D.C.', 'WI': 'WISCONSIN', 'WV': 'WEST VIRGINIA', 'HI': 'HAWAII', 'AE': 'Armed Forces Middle East', 'FL': 'FLORIDA', 'FM': 'FEDERATED STATES OF MICRONESIA', 'WY': 'WYOMING', 'NH': 'NEW HAMPSHIRE', 'NJ': 'NEW JERSEY', 'NM': 'NEW MEXICO', 'TX': 'TEXAS', 'LA': 'LOUISIANA', 'NC': 'NORTH CAROLINA', 'ND': 'NORTH DAKOTA', 'NE': 'NEBRASKA', 'TN': 'TENNESSEE', 'NY': 'NEW YORK', 'PA': 'PENNSYLVANIA', 'CA': 'CALIFORNIA', 'NV': 'NEVADA', 'AA': 'Armed Forces Americas', 'PW': 'PALAU', 'GU': 'GUAM', 'CO': 'COLORADO', 'VI': 'VIRGIN ISLANDS', 'AK': 'ALASKA', 'AL': 'ALABAMA', 'AP': 'Armed Forces Pacific', 'AS': 'AMERICAN SAMOA', 'AR': 'ARKANSAS', 'VT': 'VERMONT', 'IL': 'ILLINOIS', 'GA': 'GEORGIA', 'IN': 'INDIANA', 'IA': 'IOWA', 'OK': 'OKLAHOMA', 'AZ': 'ARIZONA', 'ID': 'IDAHO', 'CT': 'CONNECTICUT', 'ME': 'MAINE', 'MD': 'MARYLAND', 'MA': 'MASSACHUSETTS', 'OH': 'OHIO', 'UT': 'UTAH', 'MO': 'MISSOURI', 'MN': 'MINNESOTA', 'MI': 'MICHIGAN', 'MH': 'MARSHALL ISLANDS', 'RI': 'RHODE ISLAND', 'KS': 'KANSAS', 'MT': 'MONTANA', 'MP': 'NORTHERN MARIANA ISLANDS', 'MS': 'MISSISSIPPI', 'PR': 'PUERTO RICO', 'SC': 'SOUTH CAROLINA', 'KY': 'KENTUCKY', 'OR': 'OREGON', 'SD': 'SOUTH DAKOTA'} 10 | 11 | state_geocodes = { 'AK': (61.3850,-152.2683), 12 | 'AL': (32.7990,-86.8073), 13 | 'AR': (34.9513,-92.3809), 14 | 'AS': (14.2417,-170.7197), 15 | 'AZ': (33.7712,-111.3877), 16 | 'CA': (36.1700,-119.7462), 17 | 'CO': (39.0646,-105.3272), 18 | 'CT': (41.5834,-72.7622), 19 | 'DC': (38.8964,-77.0262), 20 | 'DE': (39.3498,-75.5148), 21 | 'FL': (27.8333,-81.7170), 22 | 'GA': (32.9866,-83.6487), 23 | 'HI': (21.1098,-157.5311), 24 | 'IA': (42.0046,-93.2140), 25 | 'ID': (44.2394,-114.5103), 26 | 'IL': (40.3363,-89.0022), 27 | 'IN': (39.8647,-86.2604), 28 | 'KS': (38.5111,-96.8005), 29 | 'KY': (37.6690,-84.6514), 30 | 'LA': (31.1801,-91.8749), 31 | 'MA': (42.2373,-71.5314), 32 | 'MD': (39.0724,-76.7902), 33 | 'ME': (44.6074,-69.3977), 34 | 'MI': (43.3504,-84.5603), 35 | 'MN': (45.7326,-93.9196), 36 | 'MO': (38.4623,-92.3020), 37 | 'MP': (14.8058,145.5505), 38 | 'MS': (32.7673,-89.6812), 39 | 'MT': (46.9048,-110.3261), 40 | 'NC': (35.6411,-79.8431), 41 | 'ND': (47.5362,-99.7930), 42 | 'NE': (41.1289,-98.2883), 43 | 'NH': (43.4108,-71.5653), 44 | 'NJ': (40.3140,-74.5089), 45 | 'NM': (34.8375,-106.2371), 46 | 'NV': (38.4199,-117.1219), 47 | 'NY': (42.1497,-74.9384), 48 | 'OH': (40.3736,-82.7755), 49 | 'OK': (35.5376,-96.9247), 50 | 'OR': (44.5672,-122.1269), 51 | 'PA': (40.5773,-77.2640), 52 | 'PR': (18.2766,-66.3350), 53 | 'RI': (41.6772,-71.5101), 54 | 'SC': (33.8191,-80.9066), 55 | 'SD': (44.2853,-99.4632), 56 | 'TN': (35.7449,-86.7489), 57 | 'TX': (31.1060,-97.6475), 58 | 'UT': (40.1135,-111.8535), 59 | 'VA': (37.7680,-78.2057), 60 | 'VI': (18.0001,-64.8199), 61 | 'VT': (44.0407,-72.7093), 62 | 'WA': (47.3917,-121.5708), 63 | 'WI': (44.2563,-89.6385), 64 | 'WV': (38.4680,-80.9696), 65 | 'WY': (42.7475,-107.2085) 66 | } 67 | 68 | 69 | 70 | 71 | class State(object): 72 | 73 | 74 | _index_ = {} 75 | def __init__(self, name, code, lat = None, lon = None): 76 | self.name = name 77 | self.code = code 78 | self.lat = lat 79 | self.lon = lon 80 | 81 | 82 | def __repr__(self): 83 | 84 | return "%s" % self.__dict__ 85 | 86 | 87 | @staticmethod 88 | def buildIndex(): 89 | """ 90 | Init the state index 91 | we index both by name and by code. 92 | This gets called automatically when this module is imprted for the first time 93 | """ 94 | 95 | if not State._index_: 96 | for code, geocode in state_geocodes.iteritems(): 97 | name = code_to_state.get(code, '').capitalize() 98 | State._index_[code] = State(name, code, geocode[0], geocode[1]) 99 | State._index_[name.upper()] = State._index_[code] 100 | 101 | @staticmethod 102 | def get(stateOrCode): 103 | 104 | """ 105 | Get state object by code or name, case insensitive 106 | @return the state if found, or None otherwise 107 | """ 108 | 109 | s = stateOrCode.upper().strip() 110 | 111 | return State._index_.get(s) 112 | 113 | def score(self, refLat, refLon): 114 | 115 | if refLat is None or refLon is None: 116 | return 0.2 117 | d = Location.getLatLonDistance((self.lat, self.lon), (refLat, refLon)) 118 | dScore = max(0.2, 1 - 1/(1+math.exp(-0.012*d+2*math.e) )) 119 | 120 | 121 | return dScore 122 | 123 | State.buildIndex() 124 | 125 | 126 | if __name__ == '__main__': 127 | 128 | print State.get('CA') 129 | 130 | 131 | -------------------------------------------------------------------------------- /geodis/geodis: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | #Copyright 2011 Do@. All rights reserved. 4 | # 5 | #Redistribution and use in source and binary forms, with or without modification, are 6 | #permitted provided that the following conditions are met: 7 | # 8 | # 1. Redistributions of source code must retain the above copyright notice, this list of 9 | # conditions and the following disclaimer. 10 | # 11 | # 2. Redistributions in binary form must reproduce the above copyright notice, this list 12 | # of conditions and the following disclaimer in the documentation and/or other materials 13 | # provided with the distribution. 14 | # 15 | #THIS SOFTWARE IS PROVIDED BY Do@ ``AS IS'' AND ANY EXPRESS OR IMPLIED 16 | #WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 17 | #FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL OR 18 | #CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 19 | #CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 20 | #SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 21 | #ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 22 | #NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 23 | #ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | # 25 | #The views and conclusions contained in the software and documentation are those of the 26 | #authors and should not be interpreted as representing official policies, either expressed 27 | #or implied, of Do@. 28 | 29 | import redis 30 | import logging 31 | import sys 32 | from optparse import OptionParser 33 | 34 | from geodis.provider.geonames import GeonamesImporter 35 | from geodis.provider.ip2location import IP2LocationImporter 36 | from geodis.provider.zipcodes import ZIPImporter 37 | from geodis.provider.factual import BusinessImporter 38 | from geodis.iprange import IPRange 39 | from geodis.city import City 40 | from geodis.zipcode import ZIPCode 41 | 42 | from geodis import us_states 43 | 44 | 45 | __author__="dvirsky" 46 | __date__ ="$Mar 25, 2011 4:44:22 PM$" 47 | 48 | redis_host = 'localhost' 49 | redis_port = 6379 50 | redis_db = 8 51 | 52 | def importGeonames(fileName): 53 | 54 | global redis_host, redis_port, redis_db 55 | importer = GeonamesImporter(fileName, redis_host, redis_port, redis_db) 56 | if not importer.runImport(): 57 | print "Could not import geonames database..." 58 | sys.exit(1) 59 | 60 | 61 | def importBusinesses(fileName): 62 | 63 | global redis_host, redis_port, redis_db 64 | importer = BusinessImporter(fileName, redis_host, redis_port, redis_db) 65 | if not importer.runImport(): 66 | print "Could not import business database..." 67 | sys.exit(1) 68 | 69 | 70 | def importIP2Location(fileName): 71 | 72 | global redis_host, redis_port, redis_db 73 | importer = IP2LocationImporter(fileName, redis_host, redis_port, redis_db) 74 | if not importer.runImport(True): 75 | print "Could not import geonames database..." 76 | sys.exit(1) 77 | 78 | 79 | def importZIPCode(fileName): 80 | 81 | global redis_host, redis_port, redis_db 82 | importer = ZIPImporter(fileName, redis_host, redis_port, redis_db) 83 | if not importer.runImport(): 84 | print "Could not import geonames database..." 85 | sys.exit(1) 86 | 87 | 88 | def resolveIP(ip): 89 | global redis_host, redis_port, redis_db 90 | r = redis.Redis(host = redis_host, port = redis_port, db = redis_db) 91 | 92 | loc = IPRange.getCity(ip, r) 93 | print loc 94 | 95 | 96 | def resolveCoords(lat, lon): 97 | global redis_host, redis_port, redis_db 98 | r = redis.Redis(host = redis_host, port = redis_port, db = redis_db) 99 | loc = City.getByLatLon(lat, lon, r) 100 | print loc 101 | 102 | 103 | if __name__ == "__main__": 104 | 105 | 106 | 107 | logging.basicConfig( 108 | level = logging.INFO, 109 | format='%(asctime)s %(levelname)s in %(module)s.%(funcName)s (%(filename)s:%(lineno)s): %(message)s', 110 | ) 111 | #build options parser 112 | parser = OptionParser(usage="\n\n%prog [--import_geonames | --import_ip2location | --import_businesses] --file=FILE", version="%prog 0.1") 113 | 114 | parser.add_option("-g", "--import_geonames", dest="import_geonames", 115 | action='store_true', default=False, 116 | help='Import locations from Geonames data dump') 117 | 118 | parser.add_option("-i", "--import_ip2coutnry", dest="import_ip2location", 119 | action='store_true', default=False, 120 | help='Import ip ranges from ip2country.com dumps') 121 | 122 | parser.add_option("-z", "--import_zipcodes", dest="import_zipcodes", 123 | action='store_true', default=False, 124 | help='Import zipcodes') 125 | parser.add_option("-b", "--import_businesses", dest="import_businesses", 126 | action='store_true', default=False, 127 | help='Import businesses') 128 | parser.add_option("-f", "--file", dest="import_file", 129 | help="Location of the file we want to import", metavar="FILE") 130 | 131 | parser.add_option("-d", "--dir", dest="import_dir", 132 | help="Location of the files we want to import", metavar="DIR") 133 | 134 | parser.add_option("-P", "--resolve_ip", dest="resolve_ip", default = None, 135 | help="resolve an ip address to location", metavar="IP_ADDR") 136 | 137 | 138 | parser.add_option("-L", "--resolve_latlon", dest="resolve_latlon", default = None, 139 | help="resolve an lat,lon pair into location", metavar="LAT,LON") 140 | 141 | 142 | parser.add_option("-H", "--redis_host", dest="redis_host", default = 'localhost', 143 | help="redis host to use", metavar="HOST") 144 | 145 | parser.add_option("-p", "--redis_port", dest="redis_port", default = 6375, 146 | type="int", help="redis port to use", metavar="PORT") 147 | 148 | parser.add_option("-n", "--redis_database", dest="redis_db", default = 8, 149 | type="int", help="redis database to use (default 8)", metavar="DB_NUM") 150 | 151 | (options, args) = parser.parse_args() 152 | redis_host = options.redis_host 153 | redis_port = options.redis_port 154 | redis_db = options.redis_db 155 | 156 | if options.import_geonames: 157 | importGeonames(options.import_file) 158 | 159 | elif options.import_ip2location: 160 | importIP2Location(options.import_file) 161 | 162 | elif options.import_zipcodes: 163 | importZIPCode(options.import_file) 164 | 165 | elif options.import_businesses: 166 | importBusinesses(options.import_file) 167 | 168 | elif options.resolve_ip: 169 | resolveIP(options.resolve_ip) 170 | 171 | 172 | elif options.resolve_latlon: 173 | coords = [float(p) for p in options.resolve_latlon.split(',')] 174 | resolveCoords(*coords) 175 | else: 176 | parser.print_help() 177 | 178 | sys.exit(0) 179 | -------------------------------------------------------------------------------- /geodis/city.py: -------------------------------------------------------------------------------- 1 | #Copyright 2011 Do@. All rights reserved. 2 | # 3 | #Redistribution and use in source and binary forms, with or without modification, are 4 | #permitted provided that the following conditions are met: 5 | # 6 | # 1. Redistributions of source code must retain the above copyright notice, this list of 7 | # conditions and the following disclaimer. 8 | # 9 | # 2. Redistributions in binary form must reproduce the above copyright notice, this list 10 | # of conditions and the following disclaimer in the documentation and/or other materials 11 | # provided with the distribution. 12 | # 13 | #THIS SOFTWARE IS PROVIDED BY Do@ ``AS IS'' AND ANY EXPRESS OR IMPLIED 14 | #WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 15 | #FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL OR 16 | #CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 17 | #CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 18 | #SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 19 | #ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 20 | #NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 21 | #ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 22 | # 23 | #The views and conclusions contained in the software and documentation are those of the 24 | #authors and should not be interpreted as representing official policies, either expressed 25 | #or implied, of Do@. 26 | 27 | from .location import Location 28 | from .index import TextIndex, GeoboxIndex, GeoBoxTextIndex 29 | from .us_states import State 30 | 31 | import re 32 | import math 33 | import logging 34 | 35 | class City(Location): 36 | """ 37 | Wrapper for a city location object 38 | """ 39 | 40 | #what we want to save for a city 41 | __countryspec__ = ['continent', 'country', 'continentId', 'countryId'] 42 | __spec__ = Location.__spec__ + __countryspec__ + ['state', 'stateId', 'cityId', 'aliases', 'population'] 43 | __keyspec__ = Location.__spec__ + ['country', 'state' ] 44 | 45 | __cityspec__ = set(__spec__) - set(__countryspec__) 46 | 47 | _keys = {'name': TextIndex('City', ('name', 'aliases'), ','), 48 | 'geoname': GeoBoxTextIndex('City', [GeoboxIndex.RES_128KM], ('name', 'aliases'), ',') 49 | } 50 | 51 | def __init__(self, **kwargs): 52 | 53 | super(City, self).__init__(**kwargs) 54 | 55 | self.continent = kwargs.get('continent', '').strip() 56 | self.country = kwargs.get('country','').strip() 57 | self.state = kwargs.get('state', '').strip() 58 | 59 | self.continentId = kwargs.get('continentId', 0) 60 | self.countryId = kwargs.get('countryId', 0) 61 | self.stateId = kwargs.get('stateId', 0) 62 | self.cityId = kwargs.get('cityId', 0) 63 | 64 | aliases = set(filter(lambda x: re.match('^[a-zA-Z0-9,\\-\'": ]+$', x), kwargs.get('aliases', '').split(','))) 65 | state = State.get(self.state) 66 | if state: 67 | aliases.add("%s %s" % (self.name, state.code)) 68 | aliases.add("%s %s" % (self.name, state.name)) 69 | 70 | self.aliases = ",".join(aliases) 71 | 72 | #index as an alias 73 | 74 | self.population = kwargs.get('population', 0) 75 | 76 | 77 | def score(self, refLat, refLon): 78 | 79 | ret = getattr(self, '_score', None) 80 | if not ret: 81 | population = float(self.population) 82 | dScore = 0.5 83 | if not population: 84 | population = 10 85 | popScore = 1 - math.exp(-0.000004*population) 86 | 87 | if refLat and refLon: 88 | d = Location.getLatLonDistance((self.lat, self.lon), (refLat, refLon)) 89 | dScore = max(0.5, math.pow(1 - 1/(1+math.exp(-0.02*d+2*math.e) ), 0.3)) 90 | 91 | logging.info("SCORE FOR %s, %s: distance %skm, population %s, dscore: %s, popscore: %s score: %s", self.name, self.country, d, population, dScore, popScore, dScore * popScore) 92 | ret = popScore * dScore 93 | 94 | #print ret 95 | self._score = ret 96 | return ret 97 | 98 | def toCountry(self): 99 | """ 100 | Removes all non-country and non-continent values from the city 101 | """ 102 | for k in self.__cityspec__: 103 | self.__dict__[k]=None 104 | 105 | @classmethod 106 | def exist(cls, terms, redisConn): 107 | 108 | return cls._keys['name'].exist(terms, redisConn) 109 | 110 | @classmethod 111 | def getByName(cls, name, redisConn, referenceLat = None, referenceLon = None, countryLimit = None, limit = 5): 112 | """ 113 | Load a citiy or a list of city by name or alias to the city. for example, name can be New York or NYC 114 | @return a list of City objects that can be empty 115 | """ 116 | 117 | cities = cls.loadByNamedKey('name', redisConn, name) 118 | 119 | #if no need to sort - just return what we get 120 | if len(cities) > 1: 121 | 122 | #sort by distance to the user 123 | cities.sort(lambda x,y: cmp(x.score(referenceLat, referenceLon), 124 | y.score(referenceLat, referenceLon) 125 | ), reverse=True) 126 | 127 | 128 | 129 | if countryLimit: 130 | cities = filter(lambda x: x.country.lower() == countryLimit.lower(), cities) 131 | return cities[:limit] 132 | 133 | @classmethod 134 | def getByRadius(cls, lat, lon, radius, redisConn, text = None, limit = 5): 135 | 136 | 137 | nodes = cls.loadByNamedKey('geoname', redisConn, lat, lon, radius, text or '') 138 | nodes.sort(lambda x,y: cmp(y.score(lat, lon), x.score(lat,lon))) 139 | return nodes[:limit] 140 | 141 | if __name__ == '__main__': 142 | 143 | import redis, time 144 | 145 | 146 | r = redis.Redis(db = 8, host = 'localhost', port = 6375) 147 | logging.basicConfig(level = 0) 148 | #c = City(lat = 40.7143, lon= -74.006, country = "United States", state= "New York", name = "New York") 149 | #c.save(r) 150 | #lat = 51.5085 151 | #lon = -0.1 152 | 153 | lat,lon = 32.0667,34.7667 154 | d = 128 155 | st = time.time() 156 | #cities = City.getByRadius(lat, lon, d, r, "haifa") 157 | cities = City.getByName('tel aviv', r, lat, lon) 158 | et = time.time() 159 | print 1000*(et - st),"ms" 160 | print "Found %d cities!" % len(cities) 161 | print "\n".join(["%s %s, %s %.02fkm pop %s score %s" % (c.country, c.name, c.state, Location.getLatLonDistance((lat, lon), (c.lat, c.lon)), c.population, c.score(lat, lon)) for c in cities]) 162 | 163 | 164 | # for city in cities: 165 | # print city.name, city.country, Location.getLatLonDistance((lat, lon), (city.lat, city.lon)) 166 | #import redis 167 | 168 | #c.save(r) 169 | #c = City(lat = 40.1143, lon= -74.106, country = "United States", state= "New York", name = "New York") 170 | #c.save(r) -------------------------------------------------------------------------------- /test/benchmark.py: -------------------------------------------------------------------------------- 1 | #performance benchmarks 2 | 3 | import sys 4 | import os 5 | import redis 6 | from iprange import IPRange 7 | from city import City 8 | import time 9 | 10 | def benchResolveIPs(num): 11 | ips = ['209.85.238.11', 12 | '209.85.238.4', 13 | '216.239.33.96', 14 | '216.239.33.97', 15 | '216.239.33.98', 16 | '216.239.33.99', 17 | '216.239.37.98', 18 | '216.239.37.99', 19 | '216.239.39.98', 20 | '216.239.39.99', 21 | '216.239.41.96', 22 | '216.239.41.97', 23 | '216.239.41.98', 24 | '216.239.41.99', 25 | '216.239.45.4', 26 | '216.239.51.96', 27 | '216.239.51.97', 28 | '216.239.51.98', 29 | '216.239.51.99', 30 | '216.239.53.98', 31 | '216.239.53.99', 32 | '216.239.57.96', 33 | '216.239.57.97', 34 | '216.239.57.98', 35 | '216.239.57.99', 36 | '216.239.59.98', 37 | '216.239.59.99', 38 | '216.33.229.163', 39 | '64.233.173.193', 40 | '64.233.173.194', 41 | '64.233.173.195', 42 | '64.233.173.196', 43 | '64.233.173.197', 44 | '64.233.173.198', 45 | '64.233.173.199', 46 | '64.233.173.200', 47 | '64.233.173.201', 48 | '64.233.173.202', 49 | '64.233.173.203', 50 | '64.233.173.204', 51 | '64.233.173.205', 52 | '64.233.173.206', 53 | '64.233.173.207', 54 | '64.233.173.208', 55 | '64.233.173.209', 56 | '64.233.173.210', 57 | '64.233.173.211', 58 | '64.233.173.212', 59 | '64.233.173.213', 60 | '64.233.173.214', 61 | '64.233.173.215', 62 | '64.233.173.216', 63 | '64.233.173.217', 64 | '64.233.173.218', 65 | '64.233.173.219', 66 | '64.233.173.220', 67 | '64.233.173.221', 68 | '64.233.173.222', 69 | '64.233.173.223', 70 | '64.233.173.224', 71 | '64.233.173.225', 72 | '64.233.173.226', 73 | '64.233.173.227', 74 | '64.233.173.228', 75 | '64.233.173.229', 76 | '64.233.173.230', 77 | '64.233.173.231', 78 | '64.233.173.232', 79 | '64.233.173.233', 80 | '64.233.173.234', 81 | '64.233.173.235', 82 | '64.233.173.236', 83 | '64.233.173.237', 84 | '64.233.173.238', 85 | '64.233.173.239', 86 | '64.233.173.240', 87 | '64.233.173.241', 88 | '64.233.173.242', 89 | '64.233.173.243', 90 | '64.233.173.244', 91 | '64.233.173.245', 92 | '64.233.173.246', 93 | '64.233.173.247', 94 | '64.233.173.248', 95 | '64.233.173.249', 96 | '64.233.173.250', 97 | '64.233.173.251', 98 | '64.233.173.252', 99 | '64.233.173.253', 100 | '64.233.173.254', 101 | '64.233.173.255', 102 | '64.68.90.1', 103 | '64.68.90.10', 104 | '64.68.90.11', 105 | '64.68.90.12', 106 | '64.68.90.129', 107 | '64.68.90.13', 108 | '64.68.90.130', 109 | '64.68.90.131', 110 | '64.68.90.132', 111 | '64.68.90.133', 112 | '64.68.90.134', 113 | '64.68.90.135', 114 | '64.68.90.136', 115 | '64.68.90.137', 116 | '64.68.90.138', 117 | '64.68.90.139', 118 | '64.68.90.14', 119 | '64.68.90.140', 120 | '64.68.90.141', 121 | '64.68.90.142', 122 | '64.68.90.143', 123 | '64.68.90.144', 124 | '64.68.90.145', 125 | '64.68.90.146', 126 | '64.68.90.147', 127 | '64.68.90.148', 128 | '64.68.90.149', 129 | '64.68.90.15', 130 | '64.68.90.150', 131 | '64.68.90.151', 132 | '64.68.90.152', 133 | '64.68.90.153', 134 | '64.68.90.154', 135 | '64.68.90.155', 136 | '64.68.90.156', 137 | '64.68.90.157', 138 | '64.68.90.158', 139 | '64.68.90.159', 140 | '64.68.90.16', 141 | '64.68.90.160', 142 | '64.68.90.161', 143 | '64.68.90.162', 144 | '64.68.90.163', 145 | '64.68.90.164', 146 | '64.68.90.165', 147 | '64.68.90.166', 148 | '64.68.90.167', 149 | '64.68.90.168', 150 | '64.68.90.169', 151 | '64.68.90.17', 152 | '64.68.90.170', 153 | '64.68.90.171', 154 | '64.68.90.172', 155 | '64.68.90.173', 156 | '64.68.90.174', 157 | '64.68.90.175', 158 | '64.68.90.176', 159 | '64.68.90.177', 160 | '64.68.90.178', 161 | '64.68.90.179', 162 | '64.68.90.18', 163 | '64.68.90.180', 164 | '64.68.90.181', 165 | '64.68.90.182', 166 | '64.68.90.183', 167 | '64.68.90.184', 168 | '64.68.90.185', 169 | '64.68.90.186', 170 | '64.68.90.187', 171 | '64.68.90.188', 172 | '64.68.90.189', 173 | '64.68.90.19', 174 | '64.68.90.190', 175 | '64.68.90.191', 176 | '64.68.90.192', 177 | '64.68.90.193', 178 | '64.68.90.194', 179 | '64.68.90.195', 180 | '64.68.90.196', 181 | '64.68.90.197', 182 | '64.68.90.198', 183 | '64.68.90.199', 184 | '64.68.90.2', 185 | '64.68.90.20', 186 | '64.68.90.200', 187 | '64.68.90.201', 188 | '64.68.90.202', 189 | '64.68.90.203', 190 | '64.68.90.204', 191 | '64.68.90.205', 192 | '64.68.90.206', 193 | '64.68.90.207', 194 | '64.68.90.208', 195 | '64.68.90.21', 196 | '64.68.90.22', 197 | '64.68.90.23', 198 | '64.68.90.24', 199 | '64.68.90.25', 200 | '64.68.90.26', 201 | '64.68.90.27', 202 | '64.68.90.28', 203 | '64.68.90.29', 204 | '64.68.90.3', 205 | '64.68.90.30', 206 | '64.68.90.31', 207 | '64.68.90.32', 208 | '64.68.90.33', 209 | '64.68.90.34', 210 | '64.68.90.35', 211 | '64.68.90.36', 212 | '64.68.90.37', 213 | '64.68.90.38', 214 | '64.68.90.39', 215 | '64.68.90.4', 216 | '64.68.90.40', 217 | '64.68.90.41', 218 | '64.68.90.42', 219 | '64.68.90.43', 220 | '64.68.90.44', 221 | '64.68.90.45', 222 | '64.68.90.46', 223 | '64.68.90.47', 224 | '64.68.90.48', 225 | '64.68.90.49', 226 | '64.68.90.5', 227 | '64.68.90.50', 228 | '64.68.90.51', 229 | '64.68.90.52', 230 | '64.68.90.53', 231 | '64.68.90.54', 232 | '64.68.90.55', 233 | '64.68.90.56', 234 | '64.68.90.57', 235 | '64.68.90.58', 236 | '64.68.90.59', 237 | '64.68.90.6', 238 | '64.68.90.60', 239 | '64.68.90.61', 240 | '64.68.90.62', 241 | '64.68.90.63', 242 | '64.68.90.64', 243 | '64.68.90.65', 244 | '64.68.90.66', 245 | '64.68.90.67', 246 | '64.68.90.68', 247 | '64.68.90.69', 248 | '64.68.90.7', 249 | '64.68.90.70', 250 | '64.68.90.71', 251 | '64.68.90.72', 252 | '64.68.90.73', 253 | '64.68.90.74', 254 | '64.68.90.75', 255 | '64.68.90.76', 256 | '64.68.90.77', 257 | '64.68.90.78', 258 | '64.68.90.79', 259 | '64.68.90.8', 260 | '64.68.90.80', 261 | '64.68.90.9'] 262 | 263 | #ips = ['166.205.138.92', '62.0.18.221', '69.147.125.65', '188.127.241.156', '79.178.26.33'] 264 | r = redis.Redis() 265 | nips = len(ips) 266 | for i in xrange(num): 267 | ip = ips[i % nips] 268 | loc = IPRange.getCity(ip, r) 269 | 270 | return num 271 | 272 | def benchResolveCoords(num): 273 | 274 | coords = [(-3.03333,53.47778), (40.7226,-74.66544), (31.78199,35.2196), (0,0),(45,45)] 275 | r = redis.Redis() 276 | ncoords = len(coords) 277 | for i in xrange(num): 278 | lat,lon = coords[i % ncoords] 279 | loc = City.getByLatLon(lat,lon, r) 280 | 281 | 282 | return num 283 | 284 | def benchSingleProc(func, num): 285 | 286 | print "Running benchmark %s for %d times..." % (func.__name__, num) 287 | st = time.time() 288 | num = func(num) 289 | et = time.time() 290 | 291 | print "time: %.03fsec, rate: %.03fq/s" % (et - st, (float(num) / (et-st))) 292 | 293 | if __name__ == "__main__": 294 | 295 | 296 | benchSingleProc(benchResolveCoords, 10000) 297 | benchSingleProc(benchResolveIPs, 10000) -------------------------------------------------------------------------------- /geodis/location.py: -------------------------------------------------------------------------------- 1 | #Copyright 2011 Do@. All rights reserved. 2 | # 3 | #Redistribution and use in source and binary forms, with or without modification, are 4 | #permitted provided that the following conditions are met: 5 | # 6 | # 1. Redistributions of source code must retain the above copyright notice, this list of 7 | # conditions and the following disclaimer. 8 | # 9 | # 2. Redistributions in binary form must reproduce the above copyright notice, this list 10 | # of conditions and the following disclaimer in the documentation and/or other materials 11 | # provided with the distribution. 12 | # 13 | #THIS SOFTWARE IS PROVIDED BY Do@ ``AS IS'' AND ANY EXPRESS OR IMPLIED 14 | #WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 15 | #FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL OR 16 | #CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 17 | #CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 18 | #SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 19 | #ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 20 | #NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 21 | #ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 22 | # 23 | #The views and conclusions contained in the software and documentation are those of the 24 | #authors and should not be interpreted as representing official policies, either expressed 25 | #or implied, of Do@. 26 | 27 | from .countries import countries 28 | 29 | from geohasher import hasher 30 | import math 31 | import struct 32 | import base64 33 | import logging 34 | 35 | class Location(object): 36 | """ 37 | This is the base class for all location subclasses 38 | """ 39 | 40 | __spec__ = ['lat', 'lon', 'name'] 41 | __keyspec__ = None 42 | 43 | #keys should be named so we can query by the key names 44 | _keys = {} 45 | 46 | def __init__(self, **kwargs): 47 | 48 | self.lat = float(kwargs.get('lat', None)) 49 | self.lon = float(kwargs.get('lon', None)) 50 | self.name = kwargs.get('name', '').strip() 51 | 52 | 53 | @classmethod 54 | def _key(cls, _valdict): 55 | 56 | h = hash(':'.join((str(_valdict.get(x)) for x in cls.__keyspec__ or cls.__spec__))) 57 | return '%s:%s' % (cls.__name__, base64.b64encode(struct.pack('q', h).strip('='))) 58 | 59 | def getId(self): 60 | 61 | #h = hash(':'.join((str(getattr(self, x)) for x in self.__keyspec__ or self.__spec__))) 62 | #'%s:%s' % (self.__class__.__name__, base64.b64encode(struct.pack('q', h).strip('='))) 63 | return self._key(self.__dict__) 64 | 65 | def get(self, prop): 66 | 67 | return getattr(self, prop) 68 | 69 | 70 | @classmethod 71 | def getGeohashIndexKey(cls): 72 | 73 | return '%s:geohash' % cls.__name__ 74 | 75 | def save(self, redisConn): 76 | 77 | 78 | #save all properties 79 | redisConn.hmset(self.getId(), dict(((k, getattr(self, k)) for k in \ 80 | self.__spec__))) 81 | 82 | self._indexGeohash(redisConn) 83 | 84 | for k in self._keys.values(): 85 | k.save(self, redisConn) 86 | 87 | def _indexGeohash(self, redisConn): 88 | """ 89 | Save the key of the object into the goehash index for this object type 90 | """ 91 | 92 | redisConn.zadd(self.getGeohashIndexKey(), self.getId(), hasher.encode(self.lat, self.lon)) 93 | 94 | 95 | def __str__(self): 96 | return "%s: %s" % (self.__class__.__name__, self.__dict__) 97 | 98 | def __repr__(self): 99 | return "%s: %s" % (self.__class__.__name__, self.__dict__) 100 | 101 | @classmethod 102 | def load(cls, key, redisConn): 103 | """ 104 | a Factory function to load a location from a given location key 105 | """ 106 | 107 | d = redisConn.hgetall(str(key)) 108 | 109 | if not d: 110 | return None 111 | 112 | #build a new object based on the loaded dict 113 | return cls(**d) 114 | 115 | def score(self, refLat, refLon): 116 | """ 117 | To be implemented by child classes 118 | """ 119 | return 1 120 | 121 | 122 | 123 | @classmethod 124 | def multiLoad(cls, keys, redisConn): 125 | """ 126 | a Factory function to load a location from a given location key 127 | """ 128 | 129 | p = redisConn.pipeline() 130 | [p.hgetall(str(key)) for key in keys] 131 | rx = p.execute() 132 | 133 | 134 | #build a new object based on the loaded dict 135 | return [cls(**d) for d in rx if d is not None] 136 | 137 | @classmethod 138 | def getIdsByNamedKey(cls, keyName, redisConn, *args, **kwargs): 139 | k = cls._keys[keyName] 140 | 141 | return k.getIds(redisConn, *args, **kwargs) 142 | 143 | @classmethod 144 | def loadByNamedKey(cls, keyName, redisConn, *args, **kwargs): 145 | """ 146 | Load a class by a named key indexing some if its fields 147 | """ 148 | 149 | k = cls._keys[keyName] 150 | 151 | ids = k.getIds(redisConn, *args, **kwargs) 152 | 153 | logging.info("Found %d ids for %s",len(ids or []), keyName) 154 | p = redisConn.pipeline(False) 155 | [p.hgetall(id) for id in ids] 156 | rx = p.execute() 157 | 158 | ret = [cls(**d) for d in filter(None, rx)] 159 | 160 | # for id in ids: 161 | # ret.append(cls.load(id, redisConn)) 162 | 163 | return ret 164 | 165 | 166 | 167 | @classmethod 168 | def getByKey(cls, redisConn, **kwargs): 169 | """ 170 | Load an object by combining data from kwargs to create the unique key for this object 171 | useful for loading ZIP codes with only the known zip 172 | """ 173 | key = cls._key(kwargs) 174 | 175 | return cls.load(key, redisConn) 176 | 177 | 178 | @classmethod 179 | def getByLatLon(cls, lat, lon, redisConn): 180 | 181 | geoKey = hasher.encode(lat, lon) 182 | 183 | return cls.getByGeohash(geoKey, redisConn) 184 | 185 | @staticmethod 186 | def getDistance(geoHash1, geoHash2): 187 | """ 188 | Estimate the distance between 2 geohashes in uint64 format 189 | """ 190 | 191 | # return abs(geoHash1 - geoHash2) 192 | 193 | try: 194 | coords1 = hasher.decode(geoHash1) 195 | coords2 = hasher.decode(geoHash2) 196 | return Location.getLatLonDistance(coords1, coords2) 197 | #return math.sqrt(math.pow(coords1[0] - coords2[0], 2) + 198 | #math.pow(coords1[1] - coords2[1], 2)) 199 | except Exception, e: 200 | print e 201 | return None 202 | 203 | 204 | @staticmethod 205 | def getLatLonDistance(x, y): 206 | 207 | 208 | R = 6371 209 | dLat = math.radians(y[0]-x[0]) 210 | dLon = math.radians(y[1]-x[1]) 211 | a = math.sin(dLat/2) * math.sin(dLat/2) + \ 212 | math.cos( math.radians(x[0])) * math.cos( math.radians(y[0])) * \ 213 | math.sin(dLon/2) * math.sin(dLon/2); 214 | 215 | c = 2 * math.atan2(math.sqrt(a), math.sqrt(1-a)) 216 | return R * c 217 | 218 | 219 | 220 | @classmethod 221 | def getByGeohash(cls, geoKey, redisConn): 222 | """ 223 | Get a location (used directly on a subclass only!) according to a geohash key 224 | """ 225 | 226 | 227 | key = cls.getGeohashIndexKey() 228 | tx = redisConn.pipeline() 229 | tx.zrangebyscore(key, geoKey, 'inf', 0, 10, True) 230 | tx.zrevrangebyscore(key, geoKey, '-inf', 0, 10, True) 231 | ret = tx.execute() 232 | 233 | #find the two closest locations to the left and to the right of us 234 | candidates = filter(None, ret[0]) + filter(None, ret[1]) 235 | 236 | closestDist = None 237 | selected = None 238 | if not candidates : 239 | return None 240 | 241 | for i in xrange(len(candidates)): 242 | 243 | gk = long(candidates[i][1]) 244 | 245 | dist = Location.getDistance(geoKey, gk) 246 | if dist is None: 247 | continue 248 | 249 | if not closestDist or dist < closestDist: 250 | closestDist = dist 251 | selected = i 252 | 253 | 254 | if selected is None: 255 | return None 256 | 257 | 258 | return cls.load(str(candidates[selected][0]), redisConn) 259 | 260 | 261 | 262 | 263 | -------------------------------------------------------------------------------- /test/data/ip2location.csv: -------------------------------------------------------------------------------- 1 | 2 | "67297920","67297927","US","UNITED STATES","TENNESSEE","MEMPHIS","35.1292","-89.9892","38103" 3 | "67297928","67297935","US","UNITED STATES","TEXAS","ENNIS","32.3387","-96.6021","75119" 4 | "67297936","67297943","US","UNITED STATES","MISSOURI","BRANSON","36.6647","-93.2412","65616" 5 | "67297944","67297951","US","UNITED STATES","SOUTH CAROLINA","GEORGETOWN","33.4905","-79.2882","29440" 6 | "67297952","67298015","US","UNITED STATES","TEXAS","IRVING","32.8412","-96.9596","75038" 7 | "67298016","67298047","US","UNITED STATES","NEW YORK","NEW YORK","40.7488","-73.9846","10016" 8 | "67298048","67298111","US","UNITED STATES","SOUTH CAROLINA","CONWAY","33.8157","-79.0739","29526" 9 | "67298112","67298119","US","UNITED STATES","INDIANA","GOSHEN","41.5836","-85.8345","46526" 10 | "67298120","67298135","US","UNITED STATES","OHIO","CINCINNATI","39.0975","-84.472","45202" 11 | "67298136","67298175","US","UNITED STATES","-","-","38.9048","-77.0354","-" 12 | "67298176","67298183","US","UNITED STATES","TENNESSEE","MEMPHIS","35.1292","-89.9892","38118" 13 | "67298184","67298191","US","UNITED STATES","MARYLAND","GAITHERSBURG","39.1578","-77.1963","20878" 14 | "67298192","67298207","US","UNITED STATES","-","-","38.9048","-77.0354","-" 15 | "67298208","67298239","US","UNITED STATES","VIRGINIA","VIENNA","38.9097","-77.2717","22180" 16 | "67298240","67298271","US","UNITED STATES","SOUTH CAROLINA","MYRTLE BEACH","33.6903","-78.9245","29577" 17 | "67298272","67298279","US","UNITED STATES","MARYLAND","BALTIMORE","39.2894","-76.6384","21230" 18 | "67298280","67299327","US","UNITED STATES","-","-","38.9048","-77.0354","-" 19 | "67299328","67305471","US","UNITED STATES","CALIFORNIA","LOS ANGELES","34.0452","-118.284","90001" 20 | "67305472","67306495","US","UNITED STATES","NORTH CAROLINA","DURHAM","35.9971","-78.9038","27701" 21 | "67306496","67307007","US","UNITED STATES","CALIFORNIA","LOS ANGELES","34.0452","-118.284","90001" 22 | "67307008","67307519","US","UNITED STATES","INDIANA","INDIANAPOLIS","39.8643","-86.1477","46201" 23 | "67307520","67309567","US","UNITED STATES","CALIFORNIA","LONG BEACH","33.7876","-118.17","90745" 24 | "67309568","67309823","US","UNITED STATES","NORTH CAROLINA","DURHAM","35.9971","-78.9038","27701" 25 | "67309824","67310079","US","UNITED STATES","VIRGINIA","ARLINGTON","38.8783","-77.1078","22201" 26 | "67310080","67311359","US","UNITED STATES","NORTH CAROLINA","DURHAM","35.9971","-78.9038","27701" 27 | "67311360","67311615","US","UNITED STATES","CALIFORNIA","LOS ANGELES","34.0452","-118.284","90001" 28 | "67311616","67311871","US","UNITED STATES","NORTH CAROLINA","DURHAM","35.9971","-78.9038","27701" 29 | "67311872","67312127","US","UNITED STATES","CALIFORNIA","LOS ANGELES","34.0452","-118.284","90001" 30 | "67312128","67312383","US","UNITED STATES","WASHINGTON","EVERETT","47.9563","-122.216","98201" 31 | "67312384","67312639","US","UNITED STATES","CALIFORNIA","RANCHO CUCAMONGA","34.1237","-117.572","91701" 32 | "67312640","67312895","US","UNITED STATES","CALIFORNIA","MOORPARK","34.3023","-118.873","93020" 33 | "67312896","67313151","US","UNITED STATES","FLORIDA","TAMPA","27.9733","-82.4717","33601" 34 | "67313152","67313407","US","UNITED STATES","CALIFORNIA","TORRANCE","33.8413","-118.333","90501" 35 | "67313408","67313663","US","UNITED STATES","NORTH CAROLINA","DURHAM","35.9971","-78.9038","27701" 36 | "67313664","67314687","US","UNITED STATES","WASHINGTON","EVERETT","47.9563","-122.216","98201" 37 | "67314688","67314943","US","UNITED STATES","CALIFORNIA","WEST COVINA","34.0535","-117.918","91790" 38 | "67314944","67315711","US","UNITED STATES","WASHINGTON","EVERETT","47.9563","-122.216","98201" 39 | "67315712","67318015","US","UNITED STATES","CALIFORNIA","SYLMAR","34.3177","-118.419","91342" 40 | "67318016","67318527","US","UNITED STATES","MICHIGAN","LUDINGTON","43.9737","-86.4001","49431" 41 | "67318528","67319039","US","UNITED STATES","WASHINGTON","EVERETT","47.9563","-122.216","98201" 42 | "67319040","67319295","US","UNITED STATES","ILLINOIS","ROCKFORD","42.2722","-89.0688","61101" 43 | "67319296","67319551","US","UNITED STATES","WASHINGTON","EVERETT","47.9563","-122.216","98201" 44 | "67319552","67319807","US","UNITED STATES","INDIANA","LAFAYETTE","40.4091","-86.8592","47901" 45 | "67319808","67320063","US","UNITED STATES","MICHIGAN","LUDINGTON","43.9737","-86.4001","49431" 46 | "67320064","67320319","US","UNITED STATES","FLORIDA","SARASOTA","27.3595","-82.5012","34230" 47 | "67320320","67320575","US","UNITED STATES","VIRGINIA","RESTON","38.9623","-77.3442","20191" 48 | "67320576","67320831","US","UNITED STATES","TEXAS","CARROLLTON","32.9949","-96.8967","75006" 49 | "67320832","67321087","US","UNITED STATES","CALIFORNIA","LONG BEACH","33.7876","-118.17","90745" 50 | "67321088","67321343","US","UNITED STATES","CALIFORNIA","MOORPARK","34.3023","-118.873","93020" 51 | "67321344","67321599","US","UNITED STATES","VIRGINIA","RESTON","38.9623","-77.3442","20191" 52 | "67321600","67321855","US","UNITED STATES","CALIFORNIA","LOS ANGELES","34.0452","-118.284","90001" 53 | "67321856","67322879","US","UNITED STATES","CALIFORNIA","AZUSA","34.1312","-117.916","91702" 54 | "67322880","67323135","US","UNITED STATES","CALIFORNIA","LOS ANGELES","34.0452","-118.284","90001" 55 | "67323136","67323391","US","UNITED STATES","CALIFORNIA","RANCHO CUCAMONGA","34.1237","-117.572","91701" 56 | "67323392","67323647","US","UNITED STATES","VIRGINIA","RESTON","38.9623","-77.3442","20191" 57 | "67323648","67323903","US","UNITED STATES","TEXAS","GARLAND","32.9161","-96.6327","75040" 58 | "67323904","67324159","US","UNITED STATES","CALIFORNIA","RESEDA","34.2011","-118.538","91335" 59 | "67324160","67324671","US","UNITED STATES","CALIFORNIA","REDONDO BEACH","33.8521","-118.377","90277" 60 | "67324672","67325183","US","UNITED STATES","CALIFORNIA","RESEDA","34.2011","-118.538","91335" 61 | "67325184","67325439","US","UNITED STATES","CALIFORNIA","LOS ANGELES","34.0452","-118.284","90001" 62 | "67325440","67325695","US","UNITED STATES","CALIFORNIA","RESEDA","34.2011","-118.538","91335" 63 | "67325696","67325951","US","UNITED STATES","CALIFORNIA","LOS ANGELES","34.0452","-118.284","90001" 64 | "67325952","67326463","US","UNITED STATES","TEXAS","ARLINGTON","32.7173","-97.1143","76001" 65 | "67326464","67326719","US","UNITED STATES","TEXAS","DALLAS","32.7961","-96.8024","75201" 66 | "67326720","67326975","US","UNITED STATES","FLORIDA","BRADENTON","27.4649","-82.5465","34201" 67 | "67326976","67327231","US","UNITED STATES","CALIFORNIA","MONTCLAIR","34.0753","-117.697","91763" 68 | "67327232","67327999","US","UNITED STATES","CALIFORNIA","OXNARD","34.1928","-119.176","93030" 69 | "67328000","67329023","US","UNITED STATES","CALIFORNIA","LOS ANGELES","34.0452","-118.284","90001" 70 | "67329024","67330047","US","UNITED STATES","NEW YORK","NEW YORK","40.7488","-73.9846","10001" 71 | "67330048","67330559","US","UNITED STATES","TEXAS","DALLAS","32.7961","-96.8024","75201" 72 | "67330560","67330815","US","UNITED STATES","MISSOURI","COLUMBIA","38.9424","-92.3145","65201" 73 | "67330816","67331071","US","UNITED STATES","TEXAS","DALLAS","32.7961","-96.8024","75201" 74 | "67331072","67331327","US","UNITED STATES","FLORIDA","BRADENTON","27.4649","-82.5465","34201" 75 | "67331328","67331583","US","UNITED STATES","FLORIDA","-","38.9048","-77.0354","-" 76 | "67331584","67331839","US","UNITED STATES","TEXAS","PLANO","33.0404","-96.7238","75023" 77 | "67331840","67332095","US","UNITED STATES","CALIFORNIA","LONG BEACH","33.7876","-118.17","90745" 78 | "67332096","67334399","US","UNITED STATES","CALIFORNIA","LA PUENTE","34.0292","-117.955","91744" 79 | "67334400","67334655","US","UNITED STATES","TEXAS","DALLAS","32.7961","-96.8024","75201" 80 | "67334656","67334911","US","UNITED STATES","TEXAS","PLANO","33.0404","-96.7238","75023" 81 | "67334912","67335167","US","UNITED STATES","TEXAS","DALLAS","32.7961","-96.8024","75201" 82 | "67335168","67335679","US","UNITED STATES","TEXAS","PLANO","33.0404","-96.7238","75023" 83 | "67335680","67335935","US","UNITED STATES","TEXAS","PILOT POINT","33.3456","-96.9066","76258" 84 | "67335936","67336191","US","UNITED STATES","VIRGINIA","MANASSAS","38.7415","-77.4674","20108" 85 | "67336192","67336447","US","UNITED STATES","CALIFORNIA","LONG BEACH","33.7876","-118.17","90745" 86 | "67336448","67336703","US","UNITED STATES","OREGON","PORTLAND","45.4793","-122.689","97201" 87 | "67336704","67336959","US","UNITED STATES","CALIFORNIA","LOS ANGELES","34.0452","-118.284","90001" 88 | "67336960","67337215","US","UNITED STATES","INDIANA","WEST LAFAYETTE","40.4427","-86.9237","47906" 89 | "67337216","67338239","US","UNITED STATES","CALIFORNIA","LOS ANGELES","34.0452","-118.284","90001" 90 | "67338240","67339263","US","UNITED STATES","WASHINGTON","EVERETT","47.9563","-122.216","98201" 91 | "67339264","67340287","US","UNITED STATES","CALIFORNIA","LONG BEACH","33.7876","-118.17","90745" 92 | "67340288","67340543","US","UNITED STATES","TEXAS","PLANO","33.0404","-96.7238","75023" 93 | "67340544","67341311","US","UNITED STATES","TEXAS","DALLAS","32.7961","-96.8024","75201" 94 | "67341312","67341567","US","UNITED STATES","TEXAS","PLANO","33.0404","-96.7238","75023" 95 | "67341568","67342079","US","UNITED STATES","NEW YORK","BROOKLYN","40.6594","-73.9625","11201" 96 | "67342080","67342335","US","UNITED STATES","TEXAS","PLANO","33.0404","-96.7238","75023" 97 | "67342336","67342591","US","UNITED STATES","CALIFORNIA","LOS ANGELES","34.0452","-118.284","90001" 98 | "67342592","67342847","US","UNITED STATES","CALIFORNIA","RESEDA","34.2011","-118.538","91335" 99 | "67342848","67343871","US","UNITED STATES","CALIFORNIA","LOS ANGELES","34.0452","-118.284","90001" 100 | "67343872","67344127","US","UNITED STATES","CALIFORNIA","REDONDO BEACH","33.8521","-118.377","90277" 101 | "67344128","67344639","US","UNITED STATES","CALIFORNIA","LOS ANGELES","34.0452","-118.284","90001" 102 | 103 | -------------------------------------------------------------------------------- /geodis/index.py: -------------------------------------------------------------------------------- 1 | #Copyright 2011 Do@. All rights reserved. 2 | # 3 | #Redistribution and use in source and binary forms, with or without modification, are 4 | #permitted provided that the following conditions are met: 5 | # 6 | # 1. Redistributions of source code must retain the above copyright notice, this list of 7 | # conditions and the following disclaimer. 8 | # 9 | # 2. Redistributions in binary form must reproduce the above copyright notice, this list 10 | # of conditions and the following disclaimer in the documentation and/or other materials 11 | # provided with the distribution. 12 | # 13 | #THIS SOFTWARE IS PROVIDED BY Do@ ``AS IS'' AND ANY EXPRESS OR IMPLIED 14 | #WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 15 | #FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL OR 16 | #CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 17 | #CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 18 | #SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 19 | #ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 20 | #NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 21 | #ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 22 | # 23 | #The views and conclusions contained in the software and documentation are those of the 24 | #authors and should not be interpreted as representing official policies, either expressed 25 | #or implied, of Do@. 26 | 27 | 28 | 29 | import string 30 | import re 31 | import struct 32 | from upoints.point import Point 33 | import math 34 | import logging 35 | from contextlib import contextmanager 36 | 37 | from .location import Location 38 | 39 | class AbstractIndex(object): 40 | 41 | def __init__(self, className): 42 | 43 | self.className = className 44 | 45 | 46 | 47 | class TextIndex(AbstractIndex): 48 | ''' 49 | classdocs 50 | ''' 51 | 52 | trantab = string.maketrans("-_,", " ") 53 | stopchars = "\"'\\`'[]{}()./?:)(*&^%$#@!=" 54 | TMP_KEY_EXPIRATION = 600 55 | 56 | def __init__(self, className, fields, delimiter = ' '): 57 | ''' 58 | Constructor 59 | ''' 60 | AbstractIndex.__init__(self, className) 61 | self.fields = fields 62 | self.delimiter = delimiter 63 | 64 | def getKey(self, word): 65 | 66 | return 'ft:%s:%s' % (self.className, word) 67 | 68 | def exist(self, terms, redisConn): 69 | 70 | keys = [self.getKey(t) for t in terms] 71 | 72 | ret = [] 73 | p = redisConn.pipeline() 74 | [p.exists(k) for k in keys] 75 | 76 | rx = p.execute() 77 | for idx, k in enumerate(keys): 78 | if rx[idx]: 79 | ret.append(terms[idx]) 80 | 81 | return ret 82 | 83 | @staticmethod 84 | def normalizeString(str_): 85 | 86 | return str_.translate(TextIndex.trantab, TextIndex.stopchars).lower().strip().replace(' ', ' ') 87 | 88 | def save(self, obj, redisConn): 89 | 90 | indexKeys = set() 91 | for f in self.fields: 92 | [indexKeys.add(self.normalizeString(x.lower().strip())) for x in (getattr(obj, f, '').split(self.delimiter)) ] 93 | 94 | 95 | for x in indexKeys: 96 | redisConn.zadd(self.getKey(x), **{obj.getId(): 1/float(len(indexKeys)) }) 97 | 98 | 99 | def getIds(self, redisConn, value, store = False): 100 | 101 | values = re.split(self.delimiter, self.normalizeString(value.lower().strip())) 102 | 103 | if not values: 104 | return [] 105 | keys = [self.getKey(value) for value in values] 106 | 107 | if len(keys) == 1 and store: 108 | return keys[0] 109 | 110 | tmpKey = 'ft_tmp:%s:%s' % (self.className, " ".join(values)) 111 | p = redisConn.pipeline(False) 112 | p.zinterstore(tmpKey, keys, aggregate = 'SUM') 113 | p.expire(tmpKey, self.TMP_KEY_EXPIRATION) 114 | if not store: 115 | p.zrevrange(tmpKey, 0,-1, True) 116 | 117 | rx = p.execute() 118 | return [x[0] for x in rx[-1]] 119 | else: 120 | p.execute() 121 | #only return if we have any results 122 | return tmpKey if p[0] > 0 else None 123 | 124 | 125 | from geohasher import hasher 126 | import time 127 | import itertools 128 | TSTabs = 0 129 | @contextmanager 130 | def TimeSampler(func = None, actionDescription = ''): 131 | global TSTabs 132 | TSTabs += 1 133 | 134 | st = time.time() 135 | yield 136 | et = time.time() 137 | TSTabs -= 1 138 | msg =(TSTabs * '\t') + ('Action %s took %.03fms' % (actionDescription, 1000*(et - st))) 139 | 140 | if func: 141 | func(msg) 142 | else: 143 | logging.info(msg) 144 | 145 | 146 | class GeoboxIndex(AbstractIndex): 147 | 148 | 149 | RES_1KM = 1 150 | RES_4KM = 4 151 | RES_16KM = 16 152 | RES_64KM = 64 153 | RES_128KM = 128 154 | 155 | #Mapping bit resultions to *very* rough geo box size precision 156 | BIT_RESOLUTIONS = { 157 | 158 | RES_1KM: 35, 159 | RES_4KM: 39, 160 | RES_16KM: 43, 161 | RES_64KM: 47, 162 | RES_128KM: 49 163 | } 164 | 165 | def __init__(self, className, resolutionsInKM): 166 | 167 | AbstractIndex.__init__(self, className) 168 | self.resolutions = resolutionsInKM 169 | 170 | 171 | def getKey(self, resolution, cell): 172 | 173 | return 'box:%s:%s:%x' % (self.className, resolution, cell >> 32) 174 | 175 | 176 | def getGeocell(self, lat, lon, bitres): 177 | 178 | return (hasher.encode(lat, lon) & int('1'*(64 - bitres) + '0'*bitres, 2)) 179 | 180 | def save(self, obj, redisConn): 181 | 182 | 183 | p = redisConn.pipeline() 184 | for r in self.resolutions: 185 | 186 | cell = self.getGeocell(obj.lat, obj.lon, self.BIT_RESOLUTIONS[r]) 187 | 188 | k = self.getKey(r, cell) 189 | #print _hash 190 | p.zadd(k, **{obj.getId(): hasher.encode(obj.lat, obj.lon)}) 191 | 192 | p.execute() 193 | 194 | 195 | def getIds(self, redisConn, lat, lon, radius, store = False ): 196 | 197 | 198 | res = None 199 | for r in self.resolutions: 200 | if r >= radius: 201 | res = r 202 | break 203 | 204 | 205 | if not res: 206 | logging.warn("Radius too big for available resolutions") 207 | return [] 208 | closest = set() 209 | 210 | 211 | if radius > 0 and radius <= self.RES_128KM: 212 | bitres = self.BIT_RESOLUTIONS[res] 213 | cell = self.getGeocell(lat, lon, bitres) 214 | closest.add(self.getKey(res, cell)) 215 | p = Point(lat, lon) 216 | 217 | with TimeSampler(None, 'collecting cells'): 218 | for bearing in (0, 45, 90, 135, 180, 225, 270, 315): 219 | 220 | dest = p.destination(bearing, math.sqrt(2 * (radius**2)) if bearing % 90 else radius) 221 | 222 | cell = self.getGeocell(dest.latitude, dest.longitude, self.BIT_RESOLUTIONS[res]) 223 | 224 | closest.add(self.getKey(res, cell)) 225 | 226 | 227 | tmpKey = 'box:%s:%s,%s' % (self.className, lat,lon) 228 | if not store: 229 | redisConn.zunionstore(tmpKey, list(closest)) 230 | return redisConn.zrevrange(tmpKey, 0, -1, withscores=True) 231 | else: 232 | return list(closest) 233 | 234 | 235 | 236 | return [] if not store else None 237 | 238 | 239 | class GeoBoxTextIndex(AbstractIndex): 240 | """ 241 | Mashup of textual and geobox indices 242 | """ 243 | 244 | def __init__(self, className, resolutionsInKM, fields, delimiter = ' '): 245 | 246 | self.geoIndex = GeoboxIndex(className, resolutionsInKM) 247 | self.textIndex = TextIndex(className, fields, delimiter) 248 | 249 | def save(self, obj, redisConn): 250 | 251 | self.geoIndex.save(obj, redisConn) 252 | self.textIndex.save(obj, redisConn) 253 | 254 | 255 | def getIds(self, redisConn, lat, lon, radius, text = ''): 256 | 257 | if not text: 258 | 259 | ids = self.geoIndex.getIds(redisConn, lat, lon, radius, False) 260 | #nodes = filter(lambda c: c and Location.getLatLonDistance((lat, lon), c[1]) <= radius, ids) 261 | return [id[0] for id in ids] 262 | else: 263 | 264 | #store matching elements from text key 265 | nameKey = self.textIndex.getIds(redisConn, text, True) 266 | geoKeys = self.geoIndex.getIds(redisConn, lat, lon, radius, True) 267 | 268 | if nameKey and geoKeys: 269 | 270 | tmpKey = 'tk:%s::%%s' % (hash('%s' % [lat,lon,radius,text or ''])) 271 | with TimeSampler(None, 'Getting shit done'): 272 | p = redisConn.pipeline(False) 273 | for id, gk in enumerate(geoKeys): 274 | p.zinterstore(tmpKey % id, {gk: 1, nameKey: 0}, 'SUM') 275 | for id, gk in enumerate(geoKeys): 276 | p.zrevrange(tmpKey % id, 0, -1, True) 277 | 278 | rx = p.execute() 279 | with TimeSampler(None, 'Filtering shit out'): 280 | ids = filter(lambda x: Location.getLatLonDistance((lat, lon), x[1]) <= radius, ((x[0], hasher.decode(long(x[1]))) for x in itertools.chain(*(rx[len(geoKeys):])))) 281 | 282 | return [id[0] for id in ids] 283 | else: 284 | return [] 285 | 286 | 287 | class IndexableObject(object): 288 | 289 | _keys_ = {} 290 | 291 | def save(self, redisConn): 292 | 293 | for k in self._keys_.values(): 294 | k.save(self, redisConn) 295 | -------------------------------------------------------------------------------- /geodis/data/geonames2mysql.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | ''' 3 | Created on May 30, 2011 4 | 5 | @author: dor 6 | ''' 7 | 8 | import MySQLdb 9 | import logging 10 | import time 11 | from optparse import OptionParser 12 | import re 13 | import json 14 | 15 | class Cols: 16 | geonameid = 0 17 | name = 1 18 | asciiname = 2 19 | alternatenames = 3 20 | latitude = 4 21 | longitude = 5 22 | feature_class = 6 23 | feature_code = 7 24 | country_code = 8 25 | cc2 = 9 26 | admin1_code = 10 27 | admin2_code = 11 28 | admin3_code = 12 29 | admin4_code = 13 30 | population = 14 31 | elevation = 15 32 | gtopo3 = 16 33 | timezone = 17 34 | modification_date = 18 35 | 36 | class FeatureType: 37 | world = 'World' 38 | continent = 'Continent' 39 | country = 'Country' 40 | region = state = 'Region' 41 | city = 'City' 42 | 43 | parents = { 44 | city: region, 45 | region: country, 46 | country: continent, 47 | continent: world 48 | } 49 | 50 | featureCodes = { 51 | 'CONT': FeatureType.continent, 52 | 53 | 'PCL': FeatureType.country, 54 | 'PCLI': FeatureType.country, 55 | 'PCLD': FeatureType.country, 56 | 'PCLF': FeatureType.country, 57 | 'PCLIX': FeatureType.country, 58 | 'PCLS': FeatureType.country, 59 | 'TERR': FeatureType.country, 60 | 61 | 'ADM1': FeatureType.region, 62 | 63 | 'PPL': FeatureType.city, 64 | 'PPLA': FeatureType.city, 65 | 'PPLA2': FeatureType.city, 66 | 'PPLC': FeatureType.city, 67 | 'PPLS': FeatureType.city 68 | } 69 | 70 | ids = { 71 | FeatureType.country: [Cols.country_code], 72 | FeatureType.region: [Cols.country_code, Cols.admin1_code] 73 | } 74 | 75 | class GeonamesLoader: 76 | 77 | def __init__(self, **dbparams): 78 | 79 | self.names = {} 80 | self.links = {} 81 | 82 | self.db=MySQLdb.connect(**dbparams) 83 | 84 | def _loadAlternates(self, alternates): 85 | self.names = {} 86 | self.links = {} 87 | 88 | strengths = {} 89 | 90 | numeric = re.compile('^\d+$') 91 | 92 | for l in alternates.xreadlines(): 93 | r = l.strip().split('\t') 94 | 95 | if r[2]=='en' and not numeric.match(r[3]): 96 | 97 | id = r[1] 98 | strength = 0 99 | l = len(r) 100 | 101 | if l>5 and r[5]=='1': 102 | strength += 70 103 | 104 | if l>4 and r[4]=='1': 105 | strength += 30 106 | 107 | if strength==100 or strength>strengths.get(id): 108 | self.names[id] = r[3] 109 | strengths[id] = strength 110 | 111 | elif r[2]=='link' and 'http://en.wikipedia.org' in r[3]: 112 | self.links[r[1]] = r[3] 113 | 114 | def _loadHierarchy(self, hierarchy): 115 | # reading hierarchy 116 | 117 | self.hierarchy = {} 118 | for l in hierarchy.xreadlines(): 119 | r = l.strip().split('\t') 120 | if len(r)>2 and r[2]=='ADM': 121 | self.hierarchy[r[1]] = r[0] 122 | 123 | 124 | 125 | def load(self, allCountires, alternates, hierarchy): 126 | 127 | logging.info('Loading %s...' % alternates) 128 | self._loadAlternates(open(alternates)) 129 | 130 | logging.info('Loading %s...' % hierarchy) 131 | self._loadHierarchy(open(hierarchy)) 132 | 133 | ref = {} 134 | data = [] 135 | 136 | logging.info('Loading %s...' % allCountires) 137 | for line in open(allCountires).xreadlines(): 138 | r = line.split('\t') 139 | fc = r[Cols.feature_code] 140 | #print r[Cols.name], fc 141 | 142 | if fc in featureCodes and \ 143 | (r[Cols.feature_class]!='P' or int(r[Cols.population])>=1000) and \ 144 | (r[Cols.feature_code]!='TERR' or int(r[Cols.gtopo3])>=0): 145 | 146 | if fc == 'TERR' and r[Cols.country_code] == 'AU': 147 | print r 148 | continue 149 | 150 | ft = featureCodes[fc] 151 | if ft in ids: 152 | id = '_'.join([r[id] for id in ids[ft]]) 153 | ref[id] = r[Cols.geonameid] 154 | 155 | parentFT = FeatureType.parents.get(ft, None) 156 | tempParentId = '_'.join([r[id] for id in ids[parentFT]]) if parentFT in ids else None 157 | 158 | # skipping all entities inside Greater London except London itself 159 | if r[Cols.admin2_code]=='GLA' and r[Cols.geonameid]!='2643743': 160 | continue 161 | 162 | gid = r[Cols.geonameid] 163 | data.append([ 164 | tempParentId, 165 | gid, 166 | self.names.get(gid, r[Cols.name]), 167 | r[Cols.longitude], 168 | r[Cols.latitude], 169 | self.links.get(gid, None), 170 | ft, 171 | r[Cols.alternatenames], 172 | r[Cols.population] 173 | ]) 174 | 175 | 176 | logging.info('Assigning parents...') 177 | for rec in data: 178 | if rec[0]: 179 | parentId = ref.get(rec[0], None) 180 | if not parentId and '_' in rec[0]: 181 | parentId = ref.get(rec[0].split('_')[0], None) 182 | rec[0] = parentId 183 | 184 | if not rec[0]: 185 | rec[0] = self.hierarchy.get(rec[1], None) 186 | 187 | if not rec[0]: 188 | logging.warn("Can't place '%s' (%s), skipping" % (rec[2], rec[1])) 189 | continue 190 | 191 | logging.info('Commiting...') 192 | cur = self.db.cursor() 193 | cur.execute("SET FOREIGN_KEY_CHECKS=0") 194 | cur.execute('TRUNCATE locations') 195 | cur.execute("INSERT INTO locations (parentId, id, name, lon, lat, type) VALUES (%s, %s, %s, %s, %s, %s)", [None, '6295630', 'Worldwide', 0, 0, FeatureType.world]) 196 | cur.executemany("INSERT INTO locations (parentId, id, name, lon, lat, info, type, aliases, population) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)", data) 197 | cur.execute('COMMIT') 198 | cur.execute("SET FOREIGN_KEY_CHECKS=1") 199 | return True 200 | 201 | def dump(self): 202 | logging.info('Dumping CSV') 203 | cur = self.db.cursor() 204 | cur.execute(""" 205 | SELECT continent.id as continent_id, continent.name as continent_name, country.id as country_id, country.name as country_name, state.id as state_id,state.name as state_name, 206 | city.id as city_id, city.name as city_name, city.lat as lat, city.lon as lon, city.aliases as aliases, city.population as population, 207 | state.type as type2, country.type as type1, continent.type as type0 208 | FROM locations city 209 | JOIN locations state ON (city.parentId=state.id) 210 | JOIN locations country ON (state.parentId=country.id) 211 | JOIN locations continent ON (continent.id=country.parentId) 212 | WHERE city.type = 'City' 213 | """) 214 | 215 | for row in cur.fetchall(): 216 | t = row[-3] 217 | record = map(str,row)[:-3] 218 | if t=='Country': 219 | record = record[2:] 220 | record.insert(4, '0') 221 | record.insert(5, '') 222 | 223 | print json.dumps(record, ensure_ascii=False) 224 | 225 | return True 226 | 227 | if __name__ == '__main__': 228 | 229 | logging.basicConfig(level=logging.DEBUG) 230 | 231 | parser = OptionParser()#usage="\n\n%prog [--import_geonames | --import_ip2location] --file=FILE", version="%prog 0.1") 232 | 233 | parser.add_option("-i", "--import_geonames", dest="import_geonames", 234 | action='store_true', default=False, 235 | help='Import locations from Geonames data dump to mysql') 236 | 237 | parser.add_option("-x", "--export_csv", dest="export_csv", 238 | action='store_true', default=False, 239 | help='Export locations to csv') 240 | 241 | parser.add_option("-D", "--dir", dest="import_dir", 242 | help="Location of the files we want to import", metavar="DIR") 243 | 244 | parser.add_option("-H", "--host", dest="host", default = 'localhost', 245 | help="mysql host to use", metavar="HOST") 246 | 247 | parser.add_option("-P", "--port", dest="port", default = 3306, 248 | type="int", help="mysql port to use", metavar="PORT") 249 | 250 | parser.add_option("-u", "--user", dest="user", default = 'root', 251 | help="mysql user", metavar="USER") 252 | 253 | parser.add_option("-p", "--password", dest="passwd", default = '', 254 | help="mysql password", metavar="PASSWD") 255 | 256 | parser.add_option("-d", "--database", dest="database", default = 'doat', 257 | help="mysql database", metavar="DB") 258 | 259 | start = time.time() 260 | try: 261 | (options, args) = parser.parse_args() 262 | loader = GeonamesLoader(host=options.host, port=options.port, user=options.user, passwd=options.passwd, db=options.database) 263 | if options.import_geonames: 264 | dir = options.import_dir 265 | loader.load('%s/allCountries.txt'%dir, '%s/alternateNames.txt'%dir, '%s/hierarchy.txt'%dir) 266 | elif options.export_csv: 267 | loader.dump() 268 | 269 | logging.info('Done') 270 | except Exception: 271 | logging.exception("Error") 272 | logging.info('Operation took %s seconds' % (time.time() - start)) 273 | -------------------------------------------------------------------------------- /external/ip2location/download.pl: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # IP2Location Download Client 3 | ############################################################################### 4 | # Perl script to download IP2Location(tm) batabase from the server. 5 | # Note: User subscription login and password required. 6 | # 7 | # There is no warranty or guarantee conveyed by the author/copyright holder of 8 | # this script. By obtaining, installing, and using this program, you agree and 9 | # understand that the author and copyright holder are not responsible for any 10 | # damages caused under any conditions due to the malfunction of the script(s) 11 | # on your server or otherwise. 12 | # 13 | # REVISION HISTORY 14 | # ================ 15 | # 1.0.0 Initial Release 16 | # 1.1.0 Support IP2Location DB11 + DB12 + DB13 + DB14 17 | # 1.2.0 Change URL to IP2Location.com 18 | # 2.0.0 Support IP2Location DB15 + DB16 + DB17 + DB18 19 | # Support IP2Proxy PX1 20 | # Support Command Prompt in Windows as EXE 21 | # 2.1.0 Support Proxy Server 22 | # 2.2.0 Support CIDR + ACL 23 | # 3.0.0 Support IP2Location DB19 + DB20 24 | # 25 | # Copyright (C) 2005-2010 IP2Location.com. All rights reserved. 26 | # 27 | ############################################################################### 28 | #!/usr/bin/perl -w 29 | use Getopt::Long; 30 | use strict; 31 | $|++; 32 | #eval("use LWP 5.6.9;"); die "[ERROR] LWP 5.6.9 or greater required.\n" if $@; 33 | eval("use LWP;"); die "[ERROR] LWP library required.\n" if $@; 34 | 35 | my $VERSION = "3.0.0"; 36 | my $opt_package = ""; 37 | my $opt_login = ""; 38 | my $opt_password = ""; 39 | my $opt_output = ""; 40 | my $opt_proxy = ""; 41 | my $help = 0; 42 | 43 | my $result = GetOptions('package=s' => \$opt_package, 44 | 'login:s' => \$opt_login, 45 | 'password:s' => \$opt_password, 46 | 'output:s' => \$opt_output, 47 | 'proxy:s' => \$opt_proxy, 48 | 'help' => \$help); 49 | 50 | if ($help) { 51 | &print_help; 52 | exit(0); 53 | } 54 | 55 | my $final_data = ""; 56 | my $total_size = 0; 57 | my $expiry_date = ""; 58 | my $database_version = ""; 59 | 60 | my $urlversion = "http://www.ip2location.com/download/downloaderversion.txt"; 61 | my $urlinfo = "http://www.ip2location.com/downloadinfo.aspx"; 62 | my $url = "http://www.ip2location.com/download.aspx"; 63 | 64 | my $login = ''; 65 | my $password = ''; 66 | my $filename = ''; 67 | my $output = ''; 68 | my $package = ''; 69 | my $proxy = ''; 70 | 71 | if ($opt_package ne "") { 72 | $package = $opt_package; 73 | } else { 74 | &print_help; 75 | print "[Error] Missing -package command line switch or parameter.\n"; 76 | exit(0); 77 | } 78 | 79 | if ($opt_login ne "") { 80 | $login = $opt_login; 81 | } else { 82 | &print_help; 83 | print "[Error] Missing -login command line switch or parameter.\n"; 84 | exit(0); 85 | } 86 | 87 | if ($opt_password ne "") { 88 | $password = $opt_password; 89 | } else { 90 | &print_help; 91 | print "[Error] Missing -password command line switch or parameter.\n"; 92 | exit(0); 93 | } 94 | 95 | if ($opt_proxy ne "") { 96 | $proxy = lc($opt_proxy); 97 | } 98 | 99 | 100 | $package = uc($package); 101 | 102 | if ($package eq "DB1") { $filename = "IPCountry-FULL.zip"; $output = "IP-COUNTRY-FULL.ZIP"; } 103 | elsif ($package eq "DB2") { $filename = "IPISP-FULL.zip"; $output = "IP-COUNTRY-ISP-FULL.ZIP"; } 104 | elsif ($package eq "DB3") { $filename = "IP-COUNTRY-REGION-CITY-FULL.ZIP"; $output = $filename; } 105 | elsif ($package eq "DB4") { $filename = "IP-COUNTRY-REGION-CITY-ISP-FULL.ZIP"; $output = $filename; } 106 | elsif ($package eq "DB5") { $filename = "IP-COUNTRY-REGION-CITY-LATITUDE-LONGITUDE-FULL.ZIP"; $output = $filename; } 107 | elsif ($package eq "DB6") { $filename = "IP-COUNTRY-REGION-CITY-LATITUDE-LONGITUDE-ISP-FULL.ZIP"; $output = $filename; } 108 | elsif ($package eq "DB7") { $filename = "IP-COUNTRY-REGION-CITY-ISP-DOMAIN-FULL.ZIP"; $output = $filename; } 109 | elsif ($package eq "DB8") { $filename = "IP-COUNTRY-REGION-CITY-LATITUDE-LONGITUDE-ISP-DOMAIN-FULL.ZIP"; $output = $filename; } 110 | elsif ($package eq "DB9") { $filename = "IP-COUNTRY-REGION-CITY-LATITUDE-LONGITUDE-ZIPCODE-FULL.ZIP"; $output = $filename; } 111 | elsif ($package eq "DB10") { $filename = "IP-COUNTRY-REGION-CITY-LATITUDE-LONGITUDE-ZIPCODE-ISP-DOMAIN-FULL.ZIP"; $output = $filename; } 112 | elsif ($package eq "DB11") { $filename = "IP-COUNTRY-REGION-CITY-LATITUDE-LONGITUDE-ZIPCODE-TIMEZONE-FULL.ZIP"; $output = $filename; } 113 | elsif ($package eq "DB12") { $filename = "IP-COUNTRY-REGION-CITY-LATITUDE-LONGITUDE-ZIPCODE-TIMEZONE-ISP-DOMAIN-FULL.ZIP"; $output = $filename; } 114 | elsif ($package eq "DB13") { $filename = "IP-COUNTRY-REGION-CITY-LATITUDE-LONGITUDE-TIMEZONE-NETSPEED-FULL.ZIP"; $output = $filename; } 115 | elsif ($package eq "DB14") { $filename = "IP-COUNTRY-REGION-CITY-LATITUDE-LONGITUDE-ZIPCODE-TIMEZONE-ISP-DOMAIN-NETSPEED-FULL.ZIP"; $output = $filename; } 116 | elsif ($package eq "DB15") { $filename = "IP-COUNTRY-REGION-CITY-LATITUDE-LONGITUDE-ZIPCODE-TIMEZONE-AREACODE-FULL.ZIP"; $output = $filename; } 117 | elsif ($package eq "DB16") { $filename = "IP-COUNTRY-REGION-CITY-LATITUDE-LONGITUDE-ZIPCODE-TIMEZONE-ISP-DOMAIN-NETSPEED-AREACODE-FULL.ZIP"; $output = $filename; } 118 | elsif ($package eq "DB17") { $filename = "IP-COUNTRY-REGION-CITY-LATITUDE-LONGITUDE-TIMEZONE-NETSPEED-WEATHER-FULL.ZIP"; $output = $filename; } 119 | elsif ($package eq "DB18") { $filename = "IP-COUNTRY-REGION-CITY-LATITUDE-LONGITUDE-ZIPCODE-TIMEZONE-ISP-DOMAIN-NETSPEED-AREACODE-WEATHER-FULL.ZIP"; $output = $filename; } 120 | elsif ($package eq "DB19") { $filename = "IP-COUNTRY-REGION-CITY-LATITUDE-LONGITUDE-ISP-DOMAIN-MOBILE-FULL.ZIP"; $output = $filename; } 121 | elsif ($package eq "DB20") { $filename = "IP-COUNTRY-REGION-CITY-LATITUDE-LONGITUDE-ZIPCODE-TIMEZONE-ISP-DOMAIN-NETSPEED-AREACODE-WEATHER-MOBILE-FULL.ZIP"; $output = $filename; } 122 | elsif ($package eq "DB1BIN") { $filename = "IP-COUNTRY.BIN.ZIP"; $output = $filename; } 123 | elsif ($package eq "DB2BIN") { $filename = "IP-COUNTRY-ISP.BIN.ZIP"; $output = $filename; } 124 | elsif ($package eq "DB3BIN") { $filename = "IP-COUNTRY-REGION-CITY.BIN.ZIP"; $output = $filename; } 125 | elsif ($package eq "DB4BIN") { $filename = "IP-COUNTRY-REGION-CITY-ISP.BIN.ZIP"; $output = $filename; } 126 | elsif ($package eq "DB5BIN") { $filename = "IP-COUNTRY-REGION-CITY-LATITUDE-LONGITUDE.BIN.ZIP"; $output = $filename; } 127 | elsif ($package eq "DB6BIN") { $filename = "IP-COUNTRY-REGION-CITY-LATITUDE-LONGITUDE-ISP.BIN.ZIP"; $output = $filename; } 128 | elsif ($package eq "DB7BIN") { $filename = "IP-COUNTRY-REGION-CITY-ISP-DOMAIN.BIN.ZIP"; $output = $filename; } 129 | elsif ($package eq "DB8BIN") { $filename = "IP-COUNTRY-REGION-CITY-LATITUDE-LONGITUDE-ISP-DOMAIN.BIN.ZIP"; $output = $filename; } 130 | elsif ($package eq "DB9BIN") { $filename = "IP-COUNTRY-REGION-CITY-LATITUDE-LONGITUDE-ZIPCODE.BIN.ZIP"; $output = $filename; } 131 | elsif ($package eq "DB10BIN") { $filename = "IP-COUNTRY-REGION-CITY-LATITUDE-LONGITUDE-ZIPCODE-ISP-DOMAIN.BIN.ZIP"; $output = $filename; } 132 | elsif ($package eq "DB11BIN") { $filename = "IP-COUNTRY-REGION-CITY-LATITUDE-LONGITUDE-ZIPCODE-TIMEZONE.BIN.ZIP"; $output = $filename; } 133 | elsif ($package eq "DB12BIN") { $filename = "IP-COUNTRY-REGION-CITY-LATITUDE-LONGITUDE-ZIPCODE-TIMEZONE-ISP-DOMAIN.BIN.ZIP"; $output = $filename; } 134 | elsif ($package eq "DB13BIN") { $filename = "IP-COUNTRY-REGION-CITY-LATITUDE-LONGITUDE-TIMEZONE-NETSPEED.BIN.ZIP"; $output = $filename; } 135 | elsif ($package eq "DB14BIN") { $filename = "IP-COUNTRY-REGION-CITY-LATITUDE-LONGITUDE-ZIPCODE-TIMEZONE-ISP-DOMAIN-NETSPEED.BIN.ZIP"; $output = $filename; } 136 | elsif ($package eq "DB15BIN") { $filename = "IP-COUNTRY-REGION-CITY-LATITUDE-LONGITUDE-ZIPCODE-TIMEZONE-AREACODE.BIN.ZIP"; $output = $filename; } 137 | elsif ($package eq "DB16BIN") { $filename = "IP-COUNTRY-REGION-CITY-LATITUDE-LONGITUDE-ZIPCODE-TIMEZONE-ISP-DOMAIN-NETSPEED-AREACODE.BIN.ZIP"; $output = $filename; } 138 | elsif ($package eq "DB17BIN") { $filename = "IP-COUNTRY-REGION-CITY-LATITUDE-LONGITUDE-TIMEZONE-NETSPEED-WEATHER.BIN.ZIP"; $output = $filename; } 139 | elsif ($package eq "DB18BIN") { $filename = "IP-COUNTRY-REGION-CITY-LATITUDE-LONGITUDE-ZIPCODE-TIMEZONE-ISP-DOMAIN-NETSPEED-AREACODE-WEATHER.BIN.ZIP"; $output = $filename; } 140 | elsif ($package eq "DB19BIN") { $filename = "IP-COUNTRY-REGION-CITY-LATITUDE-LONGITUDE-ISP-DOMAIN-MOBILE.BIN.ZIP"; $output = $filename; } 141 | elsif ($package eq "DB20BIN") { $filename = "IP-COUNTRY-REGION-CITY-LATITUDE-LONGITUDE-ZIPCODE-TIMEZONE-ISP-DOMAIN-NETSPEED-AREACODE-WEATHER-MOBILE.BIN.ZIP"; $output = $filename; } 142 | elsif ($package eq "PX1") { $filename = "IP2PROXY-IP-COUNTRY.ZIP"; $output = $filename; } 143 | elsif ($package eq "DB1CIDR") { $filename = "IP2LOCATION-IP-COUNTRY-CIDR.ZIP"; $output = $filename; } 144 | elsif ($package eq "DB1ACL") { $filename = "IP2LOCATION-IP-COUNTRY-ACL.ZIP"; $output = $filename; } 145 | else { 146 | print "[Error] Unknown -package command line parameter."; 147 | exit(0); 148 | } 149 | 150 | if ($opt_output ne "") { 151 | $output = $opt_output; 152 | } 153 | 154 | &check_info(); 155 | &download(); 156 | &check_update(); 157 | 158 | sub check_info() { 159 | my $ua; 160 | my $response; 161 | my $message; 162 | my @data; 163 | my $localpackage = $package; 164 | 165 | if ($package eq "DB1CIDR") { 166 | $localpackage = "DB1"; 167 | } elsif ($package eq "DB1ACL") { 168 | $localpackage = "DB1"; 169 | } 170 | 171 | $ua = LWP::UserAgent->new( ); 172 | if ($proxy ne "") { 173 | $ua->proxy('http', $proxy); 174 | } 175 | 176 | $response = $ua->get($urlinfo . "?email=$login&password=$password&productcode=$localpackage"); 177 | $message = $response->content(); 178 | @data = split(/\;/, $message); 179 | if ($data[0] eq "OK") 180 | { 181 | $total_size = $data[3]; 182 | $expiry_date = $data[1]; 183 | $database_version = $data[2]; 184 | } elsif ($data[0] eq "EXPIRED") { 185 | print "[Error] This download account has been expired since $data[1]. Please visit http://www.ip2location.com to renew the subscription."; 186 | exit(0); 187 | } elsif ($data[0] eq "INVALID") { 188 | print "[Error] Invalid account name or password."; 189 | exit(0); 190 | } elsif ($data[0] eq "NOPERMISSION") { 191 | print "[Error] This download account could not download required database because of permission issue."; 192 | exit(0); 193 | } else { 194 | print "[Error] Unknown issue $message. Please contact support\@ip2location.com."; 195 | exit(0); 196 | } 197 | } 198 | 199 | sub download() { 200 | print_header(); 201 | print "Downloading ", $output, " ...\n"; 202 | 203 | my $ua; 204 | my $response; 205 | 206 | $ua = LWP::UserAgent->new( ); 207 | if ($proxy ne "") { 208 | $ua->proxy('http', $proxy); 209 | } 210 | push @{ $ua->requests_redirectable }, 'POST'; 211 | 212 | my %form; 213 | 214 | $form{'login'} = $login; 215 | $form{'password'} = $password; 216 | $form{'btnDownload'} = "btnDownload"; 217 | 218 | $response = $ua->post($url . "?productcode=$package", \%form, ':content_cb' => \&callback ); 219 | 220 | die "$url error: ", $response->status_line unless $response->is_success; 221 | 222 | open OUT1, ">$output" or die; 223 | binmode(OUT1); 224 | print OUT1 $final_data; 225 | close OUT1; 226 | } 227 | 228 | sub check_update() { 229 | my $ua; 230 | my $response; 231 | my $message; 232 | $ua = LWP::UserAgent->new(); 233 | if ($proxy ne "") { 234 | $ua->proxy('http', $proxy); 235 | } 236 | $response = $ua->get($urlversion); 237 | $message = $response->content(); 238 | $message =~ s/\.//g; 239 | my $thisversion = $VERSION; 240 | $thisversion =~ s/\.//g; 241 | 242 | if ($message > $thisversion) { 243 | print "[IMPORTANT] New script version detected. Please download the latest version from http://www.ip2location.com/download/download.pl.zip"; 244 | } 245 | } 246 | 247 | sub callback { 248 | my ($data, $response, $protocol) = @_; 249 | $final_data .= $data; 250 | print progress_bar( length($final_data), $total_size, 25, '=' ); 251 | } 252 | 253 | sub progress_bar { 254 | my ( $got, $total, $width, $char ) = @_; 255 | $width ||= 25; $char ||= '='; 256 | my $num_width = length($total); 257 | sprintf "|%-${width}s| Got %${num_width}s bytes of %s (%.2f%%)\r", 258 | $char x (($width-1)*$got/$total). '>', 259 | $got, $total, 100*$got/+$total; 260 | } 261 | 262 | sub print_help { 263 | print_header(); 264 | print < -login -password -output -proxy 269 | 270 | package - Database Package (DB1, DB2...DB20, PX1 or DB1BIN, DB2BIN...DB20BIN) 271 | login - Login 272 | password - Password 273 | proxy - Proxy Server with Port (Optional) 274 | output - Output Filename (Optional) 275 | 276 | Please contact support\@ip2location.com for technical support. 277 | 278 | HELP 279 | } 280 | 281 | sub print_header { 282 | print <