├── metadata ├── README.txt ├── run.sh ├── urls.txt └── get_metadata.py ├── extract_zipcode_latlon ├── README.txt ├── index │ ├── raleigh_zipcode_index.txt │ ├── boston_latlon_index.txt │ ├── seattle_zipcode_index.txt │ ├── kcmo_latlon_index.txt │ ├── austin_latlon_index.txt │ ├── austin_zipcode_index.txt │ ├── sf_latlon_index.txt │ ├── nyc_latlon_index.txt │ ├── boston_zipcode_index.txt │ ├── baltimore_zipcode_index.txt │ ├── chicago_latlon_index.txt │ ├── chicago_zipcode_index.txt │ ├── sf_zipcode_index.txt │ └── kcmo_zipcode_index.txt ├── ijson │ ├── compat.py │ ├── backends │ │ ├── __init__.py │ │ ├── yajl.py │ │ ├── yajl2.py │ │ └── python.py │ ├── __init__.py │ ├── utils.py │ └── common.py ├── run.sh ├── city_list.txt └── collect_data.py ├── tagcloud ├── data │ └── skipwords.txt ├── README.txt └── tagcloud.r ├── latlon_to_zipcode ├── Makefile ├── zipcode ├── convert.sh ├── convert_points.py ├── README.txt ├── convert_shapefile_to_bboxes.py ├── main.cpp ├── Neighborhoods.hpp └── KdTreeBB.hpp ├── linechart ├── timeline.png ├── timeline_year.png ├── date2count.csv ├── timeline.py └── timeline_year.py ├── heatmap ├── shapefile │ ├── nyc_zipcta.dbf │ ├── nyc_zipcta.sbn │ ├── nyc_zipcta.sbx │ ├── nyc_zipcta.shp │ ├── nyc_zipcta.shx │ ├── Neighboorhoods.dbf │ ├── Neighboorhoods.sbn │ ├── Neighboorhoods.sbx │ ├── Neighboorhoods.shp │ ├── Neighboorhoods.shx │ ├── Neighboorhoods.prj │ └── nyc_zipcta.prj ├── README.txt ├── dbfUtils.py ├── chicago.py └── nyc.py ├── download ├── ids │ ├── deleon_ids.txt │ ├── redmond_ids.txt │ ├── wellington_ids.txt │ ├── honolulu_ids.txt │ ├── nola_ids.txt │ ├── slc_ids.txt │ ├── madison_ids.txt │ ├── weatherford_ids.txt │ ├── oaklandnet_ids.txt │ ├── somervillema_ids.txt │ ├── boston_ids.txt │ ├── austin_ids.txt │ └── edmonton_ids.txt ├── download_json_sf.sh ├── download_json_kcmo.sh ├── download_json_nola.sh ├── download_json_slc.sh ├── download_json_nyc.sh ├── download_json_redmond.sh ├── download_json_seattle.sh ├── download_json_austintexas.sh ├── download_json_boston.sh ├── download_json_chicago.sh ├── download_json_deleon.sh ├── download_json_edmonton.sh ├── download_json_honolulu.sh ├── download_json_raleighnc.sh ├── download_json_madison.sh ├── download_json_oaklandnet.sh ├── download_json_wellingtonfl.sh ├── download_json_baltimorecity.sh ├── download_json_somervillema.sh ├── download_json_weatherfordtx.sh └── README.txt ├── type_detection ├── ids │ ├── deleon_ids.txt │ ├── redmond_ids.txt │ ├── wellington_ids.txt │ ├── honolulu_ids.txt │ ├── nola_ids.txt │ ├── slc_ids.txt │ ├── madison_ids.txt │ ├── weatherford_ids.txt │ ├── oaklandnet_ids.txt │ ├── somervillema_ids.txt │ ├── boston_ids.txt │ ├── austin_ids.txt │ └── edmonton_ids.txt ├── README.txt ├── ijson │ ├── compat.py │ ├── backends │ │ ├── __init__.py │ │ ├── yajl.py │ │ ├── yajl2.py │ │ └── python.py │ ├── __init__.py │ ├── utils.py │ └── common.py ├── city_list.txt ├── run.sh ├── detect.py ├── zipcode.txt └── sample.py ├── barchart ├── README.txt ├── time.csv ├── loc.csv ├── time_loc_number.csv ├── barchart_loc.py ├── barchart_time_loc_num.py └── barchart_time.py ├── schema_similarity ├── run.sh ├── city_list.txt └── schema_similarity.py ├── README.md └── matrix_heatmap ├── boston.html └── nyc_no311.html /metadata/README.txt: -------------------------------------------------------------------------------- 1 | HOWTORUN: 2 | $./run.sh 3 | -------------------------------------------------------------------------------- /extract_zipcode_latlon/README.txt: -------------------------------------------------------------------------------- 1 | HOWTORUN: 2 | $./run.sh 3 | -------------------------------------------------------------------------------- /tagcloud/data/skipwords.txt: -------------------------------------------------------------------------------- 1 | tif 2 | kml 3 | kmz 4 | gis 5 | -------------------------------------------------------------------------------- /latlon_to_zipcode/Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | g++ -o zipcode main.cpp 3 | -------------------------------------------------------------------------------- /extract_zipcode_latlon/index/raleigh_zipcode_index.txt: -------------------------------------------------------------------------------- 1 | 4uh5-z7g3 addrzipcod 20 2 | -------------------------------------------------------------------------------- /linechart/timeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VIDA-NYU/urban-data-study/HEAD/linechart/timeline.png -------------------------------------------------------------------------------- /latlon_to_zipcode/zipcode: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VIDA-NYU/urban-data-study/HEAD/latlon_to_zipcode/zipcode -------------------------------------------------------------------------------- /linechart/timeline_year.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VIDA-NYU/urban-data-study/HEAD/linechart/timeline_year.png -------------------------------------------------------------------------------- /heatmap/shapefile/nyc_zipcta.dbf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VIDA-NYU/urban-data-study/HEAD/heatmap/shapefile/nyc_zipcta.dbf -------------------------------------------------------------------------------- /heatmap/shapefile/nyc_zipcta.sbn: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VIDA-NYU/urban-data-study/HEAD/heatmap/shapefile/nyc_zipcta.sbn -------------------------------------------------------------------------------- /heatmap/shapefile/nyc_zipcta.sbx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VIDA-NYU/urban-data-study/HEAD/heatmap/shapefile/nyc_zipcta.sbx -------------------------------------------------------------------------------- /heatmap/shapefile/nyc_zipcta.shp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VIDA-NYU/urban-data-study/HEAD/heatmap/shapefile/nyc_zipcta.shp -------------------------------------------------------------------------------- /heatmap/shapefile/nyc_zipcta.shx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VIDA-NYU/urban-data-study/HEAD/heatmap/shapefile/nyc_zipcta.shx -------------------------------------------------------------------------------- /heatmap/shapefile/Neighboorhoods.dbf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VIDA-NYU/urban-data-study/HEAD/heatmap/shapefile/Neighboorhoods.dbf -------------------------------------------------------------------------------- /heatmap/shapefile/Neighboorhoods.sbn: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VIDA-NYU/urban-data-study/HEAD/heatmap/shapefile/Neighboorhoods.sbn -------------------------------------------------------------------------------- /heatmap/shapefile/Neighboorhoods.sbx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VIDA-NYU/urban-data-study/HEAD/heatmap/shapefile/Neighboorhoods.sbx -------------------------------------------------------------------------------- /heatmap/shapefile/Neighboorhoods.shp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VIDA-NYU/urban-data-study/HEAD/heatmap/shapefile/Neighboorhoods.shp -------------------------------------------------------------------------------- /heatmap/shapefile/Neighboorhoods.shx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VIDA-NYU/urban-data-study/HEAD/heatmap/shapefile/Neighboorhoods.shx -------------------------------------------------------------------------------- /latlon_to_zipcode/convert.sh: -------------------------------------------------------------------------------- 1 | mkdir converted_shapefile 2 | python convert_shapefile_to_bboxes.py 3 | python convert_points.py 4 | -------------------------------------------------------------------------------- /heatmap/README.txt: -------------------------------------------------------------------------------- 1 | HOWTORUN: 2 | + $python nyc.py 3 | Generate the heatmap for NYC 4 | + $python chicago.py 5 | Generate the heatmap for Chicago 6 | -------------------------------------------------------------------------------- /download/ids/deleon_ids.txt: -------------------------------------------------------------------------------- 1 | rv3w-5qsf 2 | igce-t68e 3 | 2z9y-pnc7 4 | cts4-q984 5 | ehs9-mzvg 6 | xbf3-af3b 7 | k37x-psqp 8 | 2vcj-mxa2 9 | 299w-4uik 10 | -------------------------------------------------------------------------------- /type_detection/ids/deleon_ids.txt: -------------------------------------------------------------------------------- 1 | rv3w-5qsf 2 | igce-t68e 3 | 2z9y-pnc7 4 | cts4-q984 5 | ehs9-mzvg 6 | xbf3-af3b 7 | k37x-psqp 8 | 2vcj-mxa2 9 | 299w-4uik 10 | -------------------------------------------------------------------------------- /metadata/run.sh: -------------------------------------------------------------------------------- 1 | OUTPUT_PATH="data" 2 | mkdir $OUTPUT_PATH 3 | cat urls.txt | while read LINE 4 | do 5 | echo $LINE 6 | python get_metadata.py $LINE $OUTPUT_PATH 7 | done 8 | -------------------------------------------------------------------------------- /type_detection/README.txt: -------------------------------------------------------------------------------- 1 | HOWTORUN: 2 | + Add information to city_list.txt, which stores and paths to JSON files. The format of each line is cityname + "\t" + path 3 | + Run $./run.sh 4 | -------------------------------------------------------------------------------- /extract_zipcode_latlon/index/boston_latlon_index.txt: -------------------------------------------------------------------------------- 1 | 9yb5-8pvg location_x 23 location_y 24 2 | 9tfg-3jic coordinates 4 3 | snj3-z8hh coordinates 5 4 | 23yb-cufe centroidx 2 centroidy 3 5 | 7idu-4tds coordinates 4 6 | ekiy-2qmz coordinates 5 7 | -------------------------------------------------------------------------------- /barchart/README.txt: -------------------------------------------------------------------------------- 1 | HOWTORUN: 2 | + run "$python barchart_time_loc_number.py" to generate the bar chart of generic types 3 | + run "$python barchart_loc.py" to generate the bar chart of location types 4 | + run "$python barchart_time.py" to generate the bar chart of time types 5 | -------------------------------------------------------------------------------- /extract_zipcode_latlon/index/seattle_zipcode_index.txt: -------------------------------------------------------------------------------- 1 | khxu-spqg zip 9 2 | n3gw-htbc zipcode 12 3 | m6tf-bxss zip 10 4 | 4enm-t3vn zip 10 5 | qmtq-5rpt city_state_zip 9 6 | zn5m-qb7h zip 12 7 | r9tj-tvtt city_state_zip 9 8 | c3ri-wwcn city_state_zip 8 9 | 5iir-m2en zip 4 zipcode 5 10 | evxh-x3jp city_state_zip 8 11 | -------------------------------------------------------------------------------- /type_detection/ijson/compat.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Python2/Python3 compatibility utilities. 3 | ''' 4 | 5 | import sys 6 | 7 | 8 | IS_PY2 = sys.version_info[0] < 3 9 | 10 | 11 | if IS_PY2: 12 | b2s = lambda s: s 13 | chr = unichr 14 | else: 15 | def b2s(b): 16 | return b.decode('utf-8') 17 | chr = chr 18 | -------------------------------------------------------------------------------- /extract_zipcode_latlon/ijson/compat.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Python2/Python3 compatibility utilities. 3 | ''' 4 | 5 | import sys 6 | 7 | 8 | IS_PY2 = sys.version_info[0] < 3 9 | 10 | 11 | if IS_PY2: 12 | b2s = lambda s: s 13 | chr = unichr 14 | else: 15 | def b2s(b): 16 | return b.decode('utf-8') 17 | chr = chr 18 | -------------------------------------------------------------------------------- /download/download_json_sf.sh: -------------------------------------------------------------------------------- 1 | path="sf" 2 | url="https://data.sfgov.org" 3 | IDS="ids/sf_ids.txt" 4 | mkdir $path 5 | 6 | cat $IDS | while read LINE 7 | do 8 | if [ ! -f $path/$LINE.json ] 9 | then 10 | wget -t 1 --output-document=$path/$LINE.json --timeout=10 "$url/api/views/$LINE/rows.json?accessType=DOWNLOAD" 11 | fi 12 | done 13 | 14 | -------------------------------------------------------------------------------- /download/download_json_kcmo.sh: -------------------------------------------------------------------------------- 1 | path="kcmo" 2 | url="https://data.kcmo.org" 3 | IDS="ids/kcmo_ids.txt" 4 | mkdir $path 5 | 6 | cat $IDS | while read LINE 7 | do 8 | if [ ! -f $path/$LINE.json ] 9 | then 10 | wget -t 1 --output-document=$path/$LINE.json --timeout=10 "$url/api/views/$LINE/rows.json?accessType=DOWNLOAD" 11 | fi 12 | done 13 | 14 | -------------------------------------------------------------------------------- /download/download_json_nola.sh: -------------------------------------------------------------------------------- 1 | path="nola" 2 | url="http://data.nola.gov" 3 | IDS="ids/nola_ids.txt" 4 | mkdir $path 5 | 6 | cat $IDS | while read LINE 7 | do 8 | if [ ! -f $path/$LINE.json ] 9 | then 10 | wget -t 1 --output-document=$path/$LINE.json --timeout=10 "$url/api/views/$LINE/rows.json?accessType=DOWNLOAD" 11 | fi 12 | done 13 | 14 | -------------------------------------------------------------------------------- /download/download_json_slc.sh: -------------------------------------------------------------------------------- 1 | path="slc" 2 | url="http://data.slcgov.com" 3 | IDS="ids/slc_ids.txt" 4 | mkdir $path 5 | 6 | cat $IDS | while read LINE 7 | do 8 | if [ ! -f $path/$LINE.json ] 9 | then 10 | wget -t 1 --output-document=$path/$LINE.json --timeout=10 "$url/api/views/$LINE/rows.json?accessType=DOWNLOAD" 11 | fi 12 | done 13 | 14 | -------------------------------------------------------------------------------- /download/download_json_nyc.sh: -------------------------------------------------------------------------------- 1 | path="nyc" 2 | url="http://nycopendata.socrata.com" 3 | ids="ids/nyc_ids.txt" 4 | mkdir $path 5 | 6 | cat $ids | while read LINE 7 | do 8 | if [ ! -f $path/$LINE.json ] 9 | then 10 | wget -t 1 --output-document=$path/$LINE.json --timeout=10 "$url/api/views/$LINE/rows.json?accessType=DOWNLOAD" 11 | fi 12 | done 13 | 14 | -------------------------------------------------------------------------------- /download/download_json_redmond.sh: -------------------------------------------------------------------------------- 1 | path="redmond" 2 | url="http://data.redmond.gov" 3 | IDS="ids/redmond_ids.txt" 4 | mkdir $path 5 | 6 | cat $IDS | while read LINE 7 | do 8 | if [ ! -f $path/$LINE.json ] 9 | then 10 | wget -t 1 --output-document=$path/$LINE.json --timeout=10 "$url/api/views/$LINE/rows.json?accessType=DOWNLOAD" 11 | fi 12 | done 13 | 14 | -------------------------------------------------------------------------------- /download/download_json_seattle.sh: -------------------------------------------------------------------------------- 1 | path="seattle" 2 | url="http://data.seattle.gov" 3 | IDS="ids/seattle_ids.txt" 4 | mkdir $path 5 | 6 | cat $IDS | while read LINE 7 | do 8 | if [ ! -f $path/$LINE.json ] 9 | then 10 | wget -t 1 --output-document=$path/$LINE.json --timeout=10 "$url/api/views/$LINE/rows.json?accessType=DOWNLOAD" 11 | fi 12 | done 13 | 14 | -------------------------------------------------------------------------------- /download/download_json_austintexas.sh: -------------------------------------------------------------------------------- 1 | path="austin" 2 | url="http://data.austintexas.gov" 3 | ids="ids/austin_ids.txt" 4 | mkdir $path 5 | 6 | cat $ids | while read LINE 7 | do 8 | if [ ! -f $path/$LINE.json ] 9 | then 10 | wget -t 1 --output-document=$path/$LINE.json --timeout=10 "$url/api/views/$LINE/rows.json?accessType=DOWNLOAD" 11 | fi 12 | done 13 | 14 | -------------------------------------------------------------------------------- /download/download_json_boston.sh: -------------------------------------------------------------------------------- 1 | path="boston" 2 | url="http://data.cityofboston.gov" 3 | IDS="ids/boston_ids.txt" 4 | mkdir $path 5 | 6 | cat $IDS | while read LINE 7 | do 8 | if [ ! -f $path/$LINE.json ] 9 | then 10 | wget -t 1 --output-document=$path/$LINE.json --timeout=10 "$url/api/views/$LINE/rows.json?accessType=DOWNLOAD" 11 | fi 12 | done 13 | 14 | -------------------------------------------------------------------------------- /download/download_json_chicago.sh: -------------------------------------------------------------------------------- 1 | path="chicago" 2 | url="http://data.cityofchicago.org" 3 | IDS="ids/chicago_ids.txt" 4 | mkdir $path 5 | 6 | cat $IDS | while read LINE 7 | do 8 | if [ ! -f $path/$LINE.json ] 9 | then 10 | wget -t 1 --output-document=$path/$LINE.json --timeout=10 "$url/api/views/$LINE/rows.json?accessType=DOWNLOAD" 11 | fi 12 | done 13 | 14 | -------------------------------------------------------------------------------- /download/download_json_deleon.sh: -------------------------------------------------------------------------------- 1 | path="deleon" 2 | url="http://data.cityofdeleon.org" 3 | IDS="ids/deleon_ids.txt" 4 | mkdir $path 5 | 6 | cat $IDS | while read LINE 7 | do 8 | if [ ! -f $path/$LINE.json ] 9 | then 10 | wget -t 1 --output-document=$path/$LINE.json --timeout=10 "$url/api/views/$LINE/rows.json?accessType=DOWNLOAD" 11 | fi 12 | done 13 | 14 | -------------------------------------------------------------------------------- /download/download_json_edmonton.sh: -------------------------------------------------------------------------------- 1 | path="edmonton" 2 | url="http://data.edmonton.ca" 3 | IDS="ids/edmonton_ids.txt" 4 | mkdir $path 5 | 6 | cat $IDS | while read LINE 7 | do 8 | if [ ! -f $path/$LINE.json ] 9 | then 10 | wget -t 1 --output-document=$path/$LINE.json --timeout=10 "$url/api/views/$LINE/rows.json?accessType=DOWNLOAD" 11 | fi 12 | done 13 | 14 | -------------------------------------------------------------------------------- /download/download_json_honolulu.sh: -------------------------------------------------------------------------------- 1 | path="honolulu" 2 | url="https://data.honolulu.gov" 3 | IDS="ids/honolulu_ids.txt" 4 | mkdir $path 5 | 6 | cat $IDS | while read LINE 7 | do 8 | if [ ! -f $path/$LINE.json ] 9 | then 10 | wget -t 1 --output-document=$path/$LINE.json --timeout=10 "$url/api/views/$LINE/rows.json?accessType=DOWNLOAD" 11 | fi 12 | done 13 | 14 | -------------------------------------------------------------------------------- /download/download_json_raleighnc.sh: -------------------------------------------------------------------------------- 1 | path="raleigh" 2 | url="https://data.raleighnc.gov" 3 | IDS="ids/raleigh_ids.txt" 4 | mkdir $path 5 | 6 | cat $IDS | while read LINE 7 | do 8 | if [ ! -f $path/$LINE.json ] 9 | then 10 | wget -t 1 --output-document=$path/$LINE.json --timeout=10 "$url/api/views/$LINE/rows.json?accessType=DOWNLOAD" 11 | fi 12 | done 13 | 14 | -------------------------------------------------------------------------------- /schema_similarity/run.sh: -------------------------------------------------------------------------------- 1 | OUT="./similarity" #Directory that stores similarity scores 2 | mkdir $OUT 3 | 4 | cat city_list.txt | while read LINE # 5 | do 6 | arr=(${LINE//;/ }) 7 | JSON_PATH=${arr[1]} #Path to the directory that contains JSON files 8 | CITY=${arr[0]} #City name 9 | python schema_similarity.py $CITY $JSON_PATH $OUT 10 | break 11 | done 12 | -------------------------------------------------------------------------------- /download/download_json_madison.sh: -------------------------------------------------------------------------------- 1 | path="madison" 2 | url="https://data.cityofmadison.com" 3 | IDS="ids/madison_ids.txt" 4 | mkdir $path 5 | 6 | cat $IDS | while read LINE 7 | do 8 | if [ ! -f $path/$LINE.json ] 9 | then 10 | wget -t 1 --output-document=$path/$LINE.json --timeout=10 "$url/api/views/$LINE/rows.json?accessType=DOWNLOAD" 11 | fi 12 | done 13 | 14 | -------------------------------------------------------------------------------- /download/download_json_oaklandnet.sh: -------------------------------------------------------------------------------- 1 | path="oaklandnet" 2 | url="https://data.oaklandnet.com" 3 | IDS="ids/oaklandnet_ids.txt" 4 | mkdir $path 5 | 6 | cat $IDS | while read LINE 7 | do 8 | if [ ! -f $path/$LINE.json ] 9 | then 10 | wget -t 1 --output-document=$path/$LINE.json --timeout=10 "$url/api/views/$LINE/rows.json?accessType=DOWNLOAD" 11 | fi 12 | done 13 | 14 | -------------------------------------------------------------------------------- /download/download_json_wellingtonfl.sh: -------------------------------------------------------------------------------- 1 | path="wellington" 2 | url="http://data.wellingtonfl.gov" 3 | IDS="ids/wellington_ids.txt" 4 | mkdir $path 5 | 6 | cat $IDS | while read LINE 7 | do 8 | if [ ! -f $path/$LINE.json ] 9 | then 10 | wget -t 1 --output-document=$path/$LINE.json --timeout=10 "$url/api/views/$LINE/rows.json?accessType=DOWNLOAD" 11 | fi 12 | done 13 | 14 | -------------------------------------------------------------------------------- /download/download_json_baltimorecity.sh: -------------------------------------------------------------------------------- 1 | path="baltimore" 2 | url="https://data.baltimorecity.gov" 3 | IDS="ids/baltimore_ids.txt" 4 | mkdir $path 5 | 6 | cat $IDS | while read LINE 7 | do 8 | if [ ! -f $path/$LINE.json ] 9 | then 10 | wget -t 1 --output-document=$path/$LINE.json --timeout=10 "$url/api/views/$LINE/rows.json?accessType=DOWNLOAD" 11 | fi 12 | done 13 | 14 | -------------------------------------------------------------------------------- /download/download_json_somervillema.sh: -------------------------------------------------------------------------------- 1 | path="somervillema" 2 | url="http://data.somervillema.gov" 3 | IDS="ids/somervillema_ids.txt" 4 | mkdir $path 5 | 6 | cat $IDS | while read LINE 7 | do 8 | if [ ! -f $path/$LINE.json ] 9 | then 10 | wget -t 1 --output-document=$path/$LINE.json --timeout=10 "$url/api/views/$LINE/rows.json?accessType=DOWNLOAD" 11 | fi 12 | done 13 | 14 | -------------------------------------------------------------------------------- /download/download_json_weatherfordtx.sh: -------------------------------------------------------------------------------- 1 | path="weatherford" 2 | url="https://data.weatherfordtx.gov" 3 | IDS="ids/weatherford_ids.txt" 4 | mkdir $path 5 | 6 | cat $IDS | while read LINE 7 | do 8 | if [ ! -f $path/$LINE.json ] 9 | then 10 | wget -t 1 --output-document=$path/$LINE.json --timeout=10 "$url/api/views/$LINE/rows.json?accessType=DOWNLOAD" 11 | fi 12 | done 13 | 14 | -------------------------------------------------------------------------------- /download/ids/redmond_ids.txt: -------------------------------------------------------------------------------- 1 | 7v22-4z3a 2 | 4xwk-j2qj 3 | vqdz-eefx 4 | 7zus-64fj 5 | h54f-2ybz 6 | tugv-zk5z 7 | 9nf4-5b5t 8 | 7wz2-cdjk 9 | bs2q-ismz 10 | cp7w-w9h6 11 | wzgk-dadm 12 | xxwc-wtzp 13 | 3imt-pe6h 14 | 7vm8-w63z 15 | 3b7t-empc 16 | 5vmk-ujkk 17 | gw4v-ktut 18 | wv5k-d6vv 19 | 58qf-bc4p 20 | erks-xyhk 21 | gdzn-64j8 22 | jax4-9jsz 23 | nfhm-aphc 24 | yhjf-fsue 25 | yp53-gb6d 26 | -------------------------------------------------------------------------------- /extract_zipcode_latlon/run.sh: -------------------------------------------------------------------------------- 1 | OUTPUT="data" #Directory to store the result 2 | mkdir $OUTPUT 3 | 4 | cat city_list.txt | while read LINE # 5 | do 6 | arr=(${LINE//;/ }) 7 | JSON_PATH=${arr[1]} #Path to the directory that contains JSON files 8 | CITY=${arr[0]} #City name 9 | echo $CITY $JSON_PATH $OUTPUT 10 | python collect_data.py $CITY $JSON_PATH $OUTPUT 11 | break 12 | done 13 | -------------------------------------------------------------------------------- /type_detection/ids/redmond_ids.txt: -------------------------------------------------------------------------------- 1 | 7v22-4z3a 2 | 4xwk-j2qj 3 | vqdz-eefx 4 | 7zus-64fj 5 | h54f-2ybz 6 | tugv-zk5z 7 | 9nf4-5b5t 8 | 7wz2-cdjk 9 | bs2q-ismz 10 | cp7w-w9h6 11 | wzgk-dadm 12 | xxwc-wtzp 13 | 3imt-pe6h 14 | 7vm8-w63z 15 | 3b7t-empc 16 | 5vmk-ujkk 17 | gw4v-ktut 18 | wv5k-d6vv 19 | 58qf-bc4p 20 | erks-xyhk 21 | gdzn-64j8 22 | jax4-9jsz 23 | nfhm-aphc 24 | yhjf-fsue 25 | yp53-gb6d 26 | -------------------------------------------------------------------------------- /tagcloud/README.txt: -------------------------------------------------------------------------------- 1 | HOWTORUN: 2 | 3 | $Rscript tagcloud.r data/allcities.txt 500 0.5 10 4 | $Rscript tagcloud.r data/chicago.txt 100 1 5 5 | $Rscript tagcloud.r data/kansas.txt 6 | $Rscript tagcloud.r data/nyc.txt 500 1 10 7 | $Rscript tagcloud.r data/seattle.txt 500 1 10 8 | $Rscript tagcloud.r data/top1000.txt 200 0.3 7 9 | $Rscript tagcloud.r data/top100.txt 200 0.3 7 10 | $Rscript tagcloud.r data/top500.txt 200 0.3 7 11 | -------------------------------------------------------------------------------- /download/ids/wellington_ids.txt: -------------------------------------------------------------------------------- 1 | duvw-hfu5 2 | 3xrt-ting 3 | ezn9-g8km 4 | 9yb5-p9fa 5 | sa92-xi27 6 | gria-8rsx 7 | 6aei-bxzm 8 | 6snd-rfqw 9 | wkzt-vhm6 10 | fykz-53hw 11 | g9js-e7hn 12 | fg2a-eh5h 13 | 3wx5-9qcg 14 | ku3p-pbxj 15 | pnde-vucq 16 | 2eiw-55u9 17 | neth-2qv6 18 | e5tv-z73h 19 | hcqt-5rjv 20 | appr-veui 21 | sb7m-xbq5 22 | njsp-pbsx 23 | vz2v-akh2 24 | nrbt-ch4g 25 | nhev-i6ea 26 | ckeq-6y99 27 | zg84-4xj4 28 | sxvy-f7ph 29 | yukt-pbhp 30 | -------------------------------------------------------------------------------- /type_detection/ids/wellington_ids.txt: -------------------------------------------------------------------------------- 1 | duvw-hfu5 2 | 3xrt-ting 3 | ezn9-g8km 4 | 9yb5-p9fa 5 | sa92-xi27 6 | gria-8rsx 7 | 6aei-bxzm 8 | 6snd-rfqw 9 | wkzt-vhm6 10 | fykz-53hw 11 | g9js-e7hn 12 | fg2a-eh5h 13 | 3wx5-9qcg 14 | ku3p-pbxj 15 | pnde-vucq 16 | 2eiw-55u9 17 | neth-2qv6 18 | e5tv-z73h 19 | hcqt-5rjv 20 | appr-veui 21 | sb7m-xbq5 22 | njsp-pbsx 23 | vz2v-akh2 24 | nrbt-ch4g 25 | nhev-i6ea 26 | ckeq-6y99 27 | zg84-4xj4 28 | sxvy-f7ph 29 | yukt-pbhp 30 | -------------------------------------------------------------------------------- /latlon_to_zipcode/convert_points.py: -------------------------------------------------------------------------------- 1 | output = open("converted_shapefile/point.txt", "w") 2 | output.write("33144\n") 3 | with open("converted_shapefile/points.csv") as lines: 4 | for line in lines: 5 | a = line.strip("\n").split("\t") 6 | zipcode = a[0] 7 | output.write(a[0] + "\n" + "1" + "\n" + str(len(a) - 1) + "\n") 8 | for latlon in a[1:]: 9 | x = latlon.split(",") 10 | lon = x[0] 11 | lat = x[1] 12 | output.write(lon + " " + lat + "\n") 13 | -------------------------------------------------------------------------------- /type_detection/city_list.txt: -------------------------------------------------------------------------------- 1 | austin;./austin 2 | nyc;./nyc 3 | baltimore;./baltimore 4 | boston;./boston 5 | chicago;./chicago 6 | deleon;./deleon 7 | madison;./madison 8 | edmonton;./edmonton 9 | honolulu;./honolulu 10 | kcmo;./kcmo 11 | nola;./nola 12 | oaklandnet;./oaklandnet 13 | raleigh;./raleigh 14 | redmond;./redmond 15 | seattle;./seattle 16 | sf;./sf 17 | slc;./slc 18 | somervillema;./somervillema 19 | weatherford;./weatherford 20 | wellington;./wellington 21 | -------------------------------------------------------------------------------- /schema_similarity/city_list.txt: -------------------------------------------------------------------------------- 1 | austin;./austin 2 | nyc;./nyc 3 | baltimore;./baltimore 4 | boston;./boston 5 | chicago;./chicago 6 | deleon;./deleon 7 | madison;./madison 8 | edmonton;./edmonton 9 | honolulu;./honolulu 10 | kcmo;./kcmo 11 | nola;./nola 12 | oaklandnet;./oaklandnet 13 | raleigh;./raleigh 14 | redmond;./redmond 15 | seattle;./seattle 16 | sf;./sf 17 | slc;./slc 18 | somervillema;./somervillema 19 | weatherford;./weatherford 20 | wellington;./wellington 21 | -------------------------------------------------------------------------------- /extract_zipcode_latlon/city_list.txt: -------------------------------------------------------------------------------- 1 | austin;./austin 2 | nyc;./nyc 3 | baltimore;./baltimore 4 | boston;./boston 5 | chicago;./chicago 6 | deleon;./deleon 7 | madison;./madison 8 | edmonton;./edmonton 9 | honolulu;./honolulu 10 | kcmo;./kcmo 11 | nola;./nola 12 | oaklandnet;./oaklandnet 13 | raleigh;./raleigh 14 | redmond;./redmond 15 | seattle;./seattle 16 | sf;./sf 17 | slc;./slc 18 | somervillema;./somervillema 19 | weatherford;./weatherford 20 | wellington;./wellington 21 | -------------------------------------------------------------------------------- /heatmap/shapefile/Neighboorhoods.prj: -------------------------------------------------------------------------------- 1 | PROJCS["NAD_1983_StatePlane_Illinois_East_FIPS_1201_Feet",GEOGCS["GCS_North_American_1983",DATUM["D_North_American_1983",SPHEROID["GRS_1980",6378137.0,298.257222101]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]],PROJECTION["Transverse_Mercator"],PARAMETER["False_Easting",984250.0],PARAMETER["False_Northing",0.0],PARAMETER["Central_Meridian",-88.33333333333333],PARAMETER["Scale_Factor",0.999975],PARAMETER["Latitude_Of_Origin",36.66666666666666],UNIT["Foot_US",0.3048006096012192]] -------------------------------------------------------------------------------- /extract_zipcode_latlon/index/kcmo_latlon_index.txt: -------------------------------------------------------------------------------- 1 | 4bd6-gqwq building_latitude 15 building_longitude 16 2 | d6ps-dq2e intptlat10 16 intptlon10 17 3 | wbx5-smj6 intptlat 11 intptlon 12 4 | 8ejy-sj4q intptlat 7 intptlon 8 5 | isk8-6s6i intptlat10 16 intptlon10 17 6 | byps-gsbw centroid_latitude 8 centroid_longitude 9 7 | rtst-p7t3 intptlat 16 intptlon 17 8 | 2mjh-qv84 building_latitude 15 building_longitude 16 9 | ex28-gm4e building_latitude 15 building_longitude 16 10 | miam-vibb centroid_latitude 8 centroid_longitude 9 11 | -------------------------------------------------------------------------------- /barchart/time.csv: -------------------------------------------------------------------------------- 1 | 0.36827458256,0.0162337662338,0.481910946197 2 | 0.28855721393,0.0348258706468,0.44776119403 3 | 0.58950617284,0.0,0.524691358025 4 | 0.308089500861,0.0430292598967,0.447504302926 5 | 0.586261980831,0.0,0.20607028754 6 | 0.570135746606,0.00452488687783,0.497737556561 7 | 0.114285714286,0.00879120879121,0.920879120879 8 | 0.173913043478,0.0461956521739,0.383152173913 9 | 0.533980582524,0.00970873786408,0.466019417476 10 | 0.302158273381,0.0503597122302,0.467625899281 11 | 0.403606311044,0.0177310293013,0.48369646882 12 | -------------------------------------------------------------------------------- /barchart/loc.csv: -------------------------------------------------------------------------------- 1 | 0.422077922078,0.433209647495,0.439239332096 2 | 0.445273631841,0.13184079602,0.440298507463 3 | 0.616255144033,0.512345679012,0.0236625514403 4 | 0.240963855422,0.294320137694,0.173838209983 5 | 0.329073482428,0.562300319489,0.258785942492 6 | 0.359728506787,0.285067873303,0.235294117647 7 | 0.134065934066,0.0131868131868,0.021978021978 8 | 0.440217391304,0.277173913043,0.0 9 | 0.718446601942,0.679611650485,0.708737864078 10 | 0.359712230216,0.237410071942,0.280575539568 11 | 0.401352366642,0.365890308039,0.254545454545 12 | -------------------------------------------------------------------------------- /download/ids/honolulu_ids.txt: -------------------------------------------------------------------------------- 1 | std8-yakc 2 | ix32-iw26 3 | a96q-gyhq 4 | fdx8-nih6 5 | yef5-h88r 6 | dcdf-43kn 7 | cdq8-ccz7 8 | 6qpe-gunp 9 | dcm2-4u9j 10 | 3dxw-z8rr 11 | 5fhm-vea5 12 | ab7c-s2jr 13 | iz58-35eb 14 | necy-6u7t 15 | ifzd-2k3p 16 | akkw-prc5 17 | pvti-pwka 18 | 3duq-5rzf 19 | k2yj-i4jp 20 | 2swm-eusf 21 | gp9s-unfc 22 | 6x78-edqg 23 | w4ir-s4fd 24 | sbdw-8u88 25 | a3ah-kpkr 26 | 7kck-y29a 27 | nrsx-ip5q 28 | rh9s-z3mn 29 | t6ff-mewd 30 | smuq-xtz4 31 | g5bc-jnuv 32 | uvv2-62xi 33 | 84fd-3fzf 34 | vf2g-cf6g 35 | ef93-z5du 36 | -------------------------------------------------------------------------------- /type_detection/ids/honolulu_ids.txt: -------------------------------------------------------------------------------- 1 | std8-yakc 2 | ix32-iw26 3 | a96q-gyhq 4 | fdx8-nih6 5 | yef5-h88r 6 | dcdf-43kn 7 | cdq8-ccz7 8 | 6qpe-gunp 9 | dcm2-4u9j 10 | 3dxw-z8rr 11 | 5fhm-vea5 12 | ab7c-s2jr 13 | iz58-35eb 14 | necy-6u7t 15 | ifzd-2k3p 16 | akkw-prc5 17 | pvti-pwka 18 | 3duq-5rzf 19 | k2yj-i4jp 20 | 2swm-eusf 21 | gp9s-unfc 22 | 6x78-edqg 23 | w4ir-s4fd 24 | sbdw-8u88 25 | a3ah-kpkr 26 | 7kck-y29a 27 | nrsx-ip5q 28 | rh9s-z3mn 29 | t6ff-mewd 30 | smuq-xtz4 31 | g5bc-jnuv 32 | uvv2-62xi 33 | 84fd-3fzf 34 | vf2g-cf6g 35 | ef93-z5du 36 | -------------------------------------------------------------------------------- /barchart/time_loc_number.csv: -------------------------------------------------------------------------------- 1 | 0.589981447124,0.513914656772,0.842764378479 2 | 0.57960199005,0.475124378109,0.902985074627 3 | 0.67695473251,0.609053497942,0.862139917695 4 | 0.339070567986,0.471600688468,0.879518072289 5 | 0.672523961661,0.591054313099,0.811501597444 6 | 0.441176470588,0.676470588235,0.889140271493 7 | 0.134065934066,0.940659340659,0.984615384615 8 | 0.470108695652,0.426630434783,0.959239130435 9 | 0.815533980583,0.533980582524,0.854368932039 10 | 0.467625899281,0.575539568345,0.841726618705 11 | 0.529376408715,0.56664162284,0.863260706236 12 | -------------------------------------------------------------------------------- /heatmap/shapefile/nyc_zipcta.prj: -------------------------------------------------------------------------------- 1 | PROJCS["NAD_1983_StatePlane_New_York_Long_Island_FIPS_3104_Feet",GEOGCS["GCS_North_American_1983",DATUM["D_North_American_1983",SPHEROID["GRS_1980",6378137.0,298.257222101]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]],PROJECTION["Lambert_Conformal_Conic"],PARAMETER["False_Easting",984250.0],PARAMETER["False_Northing",0.0],PARAMETER["Central_Meridian",-74.0],PARAMETER["Standard_Parallel_1",40.66666666666666],PARAMETER["Standard_Parallel_2",41.03333333333333],PARAMETER["Latitude_Of_Origin",40.16666666666666],UNIT["Foot_US",0.3048006096012192]] -------------------------------------------------------------------------------- /latlon_to_zipcode/README.txt: -------------------------------------------------------------------------------- 1 | - Download US Shapefile: ftp://ftp2.census.gov/geo/tiger/TIGER2013/ZCTA5/tl_2013_us_zcta510.zip 2 | - Extract to ./shapefile/ 3 | - If point.txt and bbox.csv are not existed in converted_shapefile/ 4 | + Run $./convert.sh to convert original shapefile to point.txt and bbox.csv 5 | - Compile: run $make 6 | - Run a test: 7 | 8 | $./zipcode 40.667098 -73.982363 9 | 10 | - Run a full conversion: 11 | 12 | $./zipcode 13 | 14 | + Input: latlon.txt: each line in this file refers to a file that contain lat/lon. Each line of lat/lon file has the format: lat,lon 15 | -------------------------------------------------------------------------------- /type_detection/ijson/backends/__init__.py: -------------------------------------------------------------------------------- 1 | from ctypes import util, cdll 2 | 3 | class YAJLImportError(ImportError): 4 | pass 5 | 6 | def find_yajl(required): 7 | so_name = util.find_library('yajl') 8 | if so_name is None: 9 | raise YAJLImportError('YAJL shared object not found.') 10 | yajl = cdll.LoadLibrary(so_name) 11 | major, rest = divmod(yajl.yajl_version(), 10000) 12 | minor, micro = divmod(rest, 100) 13 | if major != required: 14 | raise YAJLImportError('YAJL version %s.x required, found %s.%s.%s' % (required, major, minor, micro)) 15 | return yajl 16 | -------------------------------------------------------------------------------- /extract_zipcode_latlon/ijson/backends/__init__.py: -------------------------------------------------------------------------------- 1 | from ctypes import util, cdll 2 | 3 | class YAJLImportError(ImportError): 4 | pass 5 | 6 | def find_yajl(required): 7 | so_name = util.find_library('yajl') 8 | if so_name is None: 9 | raise YAJLImportError('YAJL shared object not found.') 10 | yajl = cdll.LoadLibrary(so_name) 11 | major, rest = divmod(yajl.yajl_version(), 10000) 12 | minor, micro = divmod(rest, 100) 13 | if major != required: 14 | raise YAJLImportError('YAJL version %s.x required, found %s.%s.%s' % (required, major, minor, micro)) 15 | return yajl 16 | -------------------------------------------------------------------------------- /extract_zipcode_latlon/index/austin_latlon_index.txt: -------------------------------------------------------------------------------- 1 | b4y9-5x39 longitude 6 latitude 7 2 | 64cq-wf5u y 2 x 3 3 | nq9x-w8sx lat_dd_wgs84 19 lon_dd_wgs84 20 4 | szku-46rx y 2 x 3 5 | jbaf-xebm lat_dd_wgs84 19 lon_dd_wgs84 20 6 | r6sg-xka2 longitude 6 latitude 7 7 | gr59-ids7 longitude 6 latitude 7 8 | r5kt-xq3y lat_dd_wgs84 5 lon_dd_wgs84 6 9 | 3gc4-g537 latitude 6 longitude 7 10 | ei2n-fehk longitude 6 latitude 7 11 | 4c6h-tv2y longitude 6 latitude 7 12 | ga9y-ypai lat_dd_wgs84 19 lon_dd_wgs84 20 13 | tx8s-62r6 lat_dd_wgs84 19 lon_dd_wgs84 20 14 | 5gjn-nmcf latitude 5 longitude 6 15 | b6cd-bhbk lat_dd_wgs84 5 lon_dd_wgs84 6 16 | -------------------------------------------------------------------------------- /download/ids/nola_ids.txt: -------------------------------------------------------------------------------- 1 | 2mq3-p3xc 2 | aexs-y2ma 3 | j4pt-mz93 4 | mesf-89bm 5 | 5fn8-vtui 6 | mbxb-ejdy 7 | r82n-4xx7 8 | rv3g-ypg7 9 | 3utr-tkrh 10 | 4uek-d54m 11 | 4ts9-u65y 12 | 28ec-c8d6 13 | e3wd-h7q2 14 | hpm5-48nj 15 | d9hd-x6nn 16 | 65t6-gi32 17 | rcm3-fn58 18 | kpc9-4t3j 19 | dasg-fxyv 20 | 5ktx-e9wc 21 | a6tx-de8c 22 | cba3-mptn 23 | 8tsm-38gz 24 | utqx-f83p 25 | pqgq-8it9 26 | sgfw-jy2v 27 | 8pqz-ftzc 28 | uh5a-f7uw 29 | 44ct-56tr 30 | u6yx-v2tw 31 | aned-jbk9 32 | q4nv-wks6 33 | mce3-wqh4 34 | ypza-44w8 35 | jsyu-nz5r 36 | raeu-276s 37 | kg5e-js8i 38 | 347f-j9w7 39 | vgrg-et3t 40 | 5hq5-im7i 41 | d2is-2r79 42 | 4d8g-jra3 43 | -------------------------------------------------------------------------------- /type_detection/ids/nola_ids.txt: -------------------------------------------------------------------------------- 1 | 2mq3-p3xc 2 | aexs-y2ma 3 | j4pt-mz93 4 | mesf-89bm 5 | 5fn8-vtui 6 | mbxb-ejdy 7 | r82n-4xx7 8 | rv3g-ypg7 9 | 3utr-tkrh 10 | 4uek-d54m 11 | 4ts9-u65y 12 | 28ec-c8d6 13 | e3wd-h7q2 14 | hpm5-48nj 15 | d9hd-x6nn 16 | 65t6-gi32 17 | rcm3-fn58 18 | kpc9-4t3j 19 | dasg-fxyv 20 | 5ktx-e9wc 21 | a6tx-de8c 22 | cba3-mptn 23 | 8tsm-38gz 24 | utqx-f83p 25 | pqgq-8it9 26 | sgfw-jy2v 27 | 8pqz-ftzc 28 | uh5a-f7uw 29 | 44ct-56tr 30 | u6yx-v2tw 31 | aned-jbk9 32 | q4nv-wks6 33 | mce3-wqh4 34 | ypza-44w8 35 | jsyu-nz5r 36 | raeu-276s 37 | kg5e-js8i 38 | 347f-j9w7 39 | vgrg-et3t 40 | 5hq5-im7i 41 | d2is-2r79 42 | 4d8g-jra3 43 | -------------------------------------------------------------------------------- /extract_zipcode_latlon/index/austin_zipcode_index.txt: -------------------------------------------------------------------------------- 1 | u3yy-shmz association_zip_code 3 primary_contact_zipcode 8 secondary_contact_zipcode 13 2 | hqa6-stx4 zip_code 1 3 | 9bpw-2ysw zip_code 13 4 | ur6a-fvpc zip 9 5 | ajpy-mwjj zip_code 1 6 | gzyt-t2by zip_code 1 7 | rfif-mmvg zip 9 8 | gt3n-akq9 zip4 297 zip5 298 9 | nmp9-45v2 giinstalledzip 8 10 | 3ebq-e9iz zip 18 11 | rb6p-jsp4 zip 9 12 | 3w87-zbw7 zip 9 13 | ecmv-9xxi zip_code 1 14 | nynz-w2da zip 9 15 | b73m-kiye zip_code 1 16 | ct7f-fbbn primary_contact_zipcode 9 secondary_contact_zipcode 14 17 | qzi7-nx8g zip_code 13 18 | g9bx-8meu zip_code 1 19 | iuw2-kwij zipcode 0 20 | 8zu2-guks zip 9 21 | -------------------------------------------------------------------------------- /download/README.txt: -------------------------------------------------------------------------------- 1 | HOWTORUN: 2 | 3 | $./download_json_austintexas.sh 4 | $./download_json_baltimorecity.sh 5 | $./download_json_boston.sh 6 | $./download_json_chicago.sh 7 | $./download_json_deleon.sh 8 | $./download_json_edmonton.sh 9 | $./download_json_honolulu.sh 10 | $./download_json_kcmo.sh 11 | $./download_json_madison.sh 12 | $./download_json_nola.sh 13 | $./download_json_oaklandnet.sh 14 | $./download_json_raleighnc.sh 15 | $./download_json_redmond.sh 16 | $./download_json_seattle.sh 17 | $./download_json_sf.sh 18 | $./download_json_slc.sh 19 | $./download_json_somervillema.sh 20 | $./download_json_weatherfordtx.sh 21 | $./download_json_wellingtonfl.sh 22 | -------------------------------------------------------------------------------- /download/ids/slc_ids.txt: -------------------------------------------------------------------------------- 1 | syic-a6rq 2 | g5ni-ehfe 3 | myq9-p4zu 4 | rtcx-we7f 5 | duwd-wq3e 6 | 7faz-pyum 7 | s62m-p2ci 8 | qcea-2qur 9 | p2dy-h2sr 10 | 9b2y-pidk 11 | usi3-xfks 12 | q9gq-vb9z 13 | wng6-vv2r 14 | vhm2-rnvr 15 | fthp-f7h3 16 | 5h33-khmk 17 | kz9n-dpay 18 | dait-ivxs 19 | uh9c-a9zt 20 | 3kgt-vcwy 21 | tn2w-p83j 22 | 5f5e-rfen 23 | s79j-pjmr 24 | ugfz-sxyz 25 | 5ate-q28a 26 | jww7-nxe8 27 | un62-z97s 28 | vytj-hddx 29 | e82v-m3sg 30 | epu4-hi64 31 | m8iz-py6s 32 | smri-mj5y 33 | 3auw-s6ah 34 | 5gsj-w587 35 | 79jz-dibw 36 | 8m2r-p53k 37 | 92gv-x3hr 38 | jphp-kas7 39 | agjx-fggm 40 | wrtx-pisx 41 | sx9e-aefu 42 | 7vfv-qtsf 43 | k35s-9qmi 44 | i98d-m2z6 45 | -------------------------------------------------------------------------------- /type_detection/ids/slc_ids.txt: -------------------------------------------------------------------------------- 1 | syic-a6rq 2 | g5ni-ehfe 3 | myq9-p4zu 4 | rtcx-we7f 5 | duwd-wq3e 6 | 7faz-pyum 7 | s62m-p2ci 8 | qcea-2qur 9 | p2dy-h2sr 10 | 9b2y-pidk 11 | usi3-xfks 12 | q9gq-vb9z 13 | wng6-vv2r 14 | vhm2-rnvr 15 | fthp-f7h3 16 | 5h33-khmk 17 | kz9n-dpay 18 | dait-ivxs 19 | uh9c-a9zt 20 | 3kgt-vcwy 21 | tn2w-p83j 22 | 5f5e-rfen 23 | s79j-pjmr 24 | ugfz-sxyz 25 | 5ate-q28a 26 | jww7-nxe8 27 | un62-z97s 28 | vytj-hddx 29 | e82v-m3sg 30 | epu4-hi64 31 | m8iz-py6s 32 | smri-mj5y 33 | 3auw-s6ah 34 | 5gsj-w587 35 | 79jz-dibw 36 | 8m2r-p53k 37 | 92gv-x3hr 38 | jphp-kas7 39 | agjx-fggm 40 | wrtx-pisx 41 | sx9e-aefu 42 | 7vfv-qtsf 43 | k35s-9qmi 44 | i98d-m2z6 45 | -------------------------------------------------------------------------------- /metadata/urls.txt: -------------------------------------------------------------------------------- 1 | http://data.austintexas.gov austin 2 | https://data.baltimorecity.gov baltimore 3 | http://data.cityofchicago.org chicago 4 | http://data.cityofdeleon.org deleon 5 | http://data.edmonton.ca edmonton 6 | http://data.nola.gov nola 7 | https://data.sfgov.org sf 8 | http://data.seattle.gov seattle 9 | http://nycopendata.socrata.com nyc 10 | https://data.honolulu.gov honolulu 11 | http://data.somervillema.gov somervillema 12 | https://data.cityofboston.gov boston 13 | http://data.slcgov.com slc 14 | https://data.oaklandnet.com oaklandnet 15 | https://data.cityofmadison.com madison 16 | https://data.kcmo.org kcmo 17 | https://data.raleighnc.gov raleigh 18 | https://data.redmond.gov redmond 19 | https://data.weatherfordtx.gov weatherford 20 | http://data.wellingtonfl.gov wellington 21 | -------------------------------------------------------------------------------- /latlon_to_zipcode/convert_shapefile_to_bboxes.py: -------------------------------------------------------------------------------- 1 | import shapefile 2 | 3 | sf = shapefile.Reader("shapefile/tl_2013_us_zcta510.shp") 4 | bboxes = open("converted_shapefile/bboxes.csv", "w") 5 | points = open("converted_shapefile/points.csv", "w") 6 | shapes = sf.shapes() 7 | records = sf.records() 8 | # Read the bounding box from the 4th shape 9 | for i in range(len(shapes)): 10 | bbox = str(records[i][0]) + "\t" + \ 11 | str(shapes[i].bbox[0]) + "\t" + \ 12 | str(shapes[i].bbox[1]) + "\t" + \ 13 | str(shapes[i].bbox[2]) + "\t" + \ 14 | str(shapes[i].bbox[3]) + "\n" 15 | bboxes.write(bbox) 16 | 17 | point = str(records[i][0]) 18 | for p in shapes[i].points: 19 | point += "\t" + str(p[0]) + "," + str(p[1]) 20 | points.write(point + "\n") 21 | 22 | bboxes.close() 23 | points.close() 24 | 25 | -------------------------------------------------------------------------------- /linechart/date2count.csv: -------------------------------------------------------------------------------- 1 | 201009 17 2 | 201008 16 3 | 201110 341 4 | 201403 546 5 | 201402 498 6 | 201401 474 7 | 201407 276 8 | 201406 400 9 | 201405 514 10 | 201404 682 11 | 201312 477 12 | 201311 1003 13 | 201310 361 14 | 201205 145 15 | 201204 116 16 | 201207 294 17 | 201206 126 18 | 201201 168 19 | 201203 130 20 | 201202 226 21 | 201010 13 22 | 201011 15 23 | 201012 45 24 | 201111 121 25 | 201208 350 26 | 201112 189 27 | 201108 152 28 | 201109 491 29 | 201003 6 30 | 201002 3 31 | 201209 238 32 | 201007 7 33 | 201006 4 34 | 201005 19 35 | 201004 95 36 | 201308 231 37 | 201309 318 38 | 201304 375 39 | 201305 278 40 | 201306 500 41 | 201307 227 42 | 201301 385 43 | 201302 503 44 | 201303 871 45 | 201212 229 46 | 201210 186 47 | 201211 176 48 | 201106 83 49 | 201107 115 50 | 201104 182 51 | 201105 169 52 | 201102 50 53 | 201103 13 54 | 201101 89 55 | -------------------------------------------------------------------------------- /type_detection/run.sh: -------------------------------------------------------------------------------- 1 | a1="./sample_result" #Directory to store sampling results (OUTPUT) 2 | a2="./detection_result" #Directory to store detection results (OUTPUT) 3 | a3="./ids" #Directory that stores dataset ids (INPUT) 4 | 5 | cat city_list.txt | while read LINE # 6 | do 7 | arr=(${LINE//;/ }) 8 | JSON_PATH=${arr[1]} #Path to the directory that contains JSON files 9 | CITY=${arr[0]} #City name 10 | mkdir $a1 11 | mkdir $a2 12 | python sample.py $JSON_PATH $a3"/"$CITY"_ids.txt" $a1 $CITY #Sampling data 13 | python detect.py $a1 $a3"/"$CITY"_ids.txt" $a2 $CITY #Detect type based on sampled data 14 | # break 15 | done 16 | 17 | #Collect information to generate the barchart 18 | a4="generic.csv" #(Name of output file) 19 | a5="loc.csv" #(Name of output file) 20 | a6="time.csv" #(Name of output file) 21 | JSON_PATH="./" #(Name of output file) 22 | echo $a3 $JSON_PATH $a4 $a5 $a6 23 | python collect.py $a4 $a5 $a6 $a3 $JSON_PATH $a2 24 | -------------------------------------------------------------------------------- /extract_zipcode_latlon/index/sf_latlon_index.txt: -------------------------------------------------------------------------------- 1 | f3fp-qypd x 9 y 10 2 | akvp-jmwa x 9 y 10 3 | 5q3n-q6kw x 8 y 9 4 | snsg-xkfg x 9 y 10 5 | fa4n-5inm latitude 15 longitude 16 6 | tkzw-k3nq latitude 15 longitude 16 7 | 337t-q2b4 latitude 15 longitude 16 8 | 3fig-nit3 x 8 y 9 9 | rwxz-qq2e x 9 y 10 10 | hqjf-mpne x 9 y 10 11 | rqzj-sfat latitude 14 longitude 15 12 | fi3h-6q7h latitude 14 longitude 15 13 | ytdu-3kte x 9 y 10 14 | 3nwz-3n68 x 9 y 10 15 | te8q-3pjv x 9 y 10 16 | u563-z39k x 9 y 10 17 | 3hay-yzem x 9 y 10 18 | 5wbp-dwzt latitude 10 longitude 11 19 | gxxq-x39z x 9 y 10 20 | di4e-7emh x 9 y 10 21 | 7ybj-xpju x 6 y 7 22 | px6q-wjh5 latitude 14 longitude 15 23 | sf93-6dmr latitude 22 longitude 23 24 | xu5w-5kgd latitude 7 longitude 8 25 | xtjp-rjug latitude 7 longitude 8 26 | 99js-dqmz x 9 y 10 27 | 3twj-ueew x 9 y 10 28 | kaw6-dfy2 x 9 y 10 29 | uh2u-53ta x 2 y 3 30 | yani-faij x 9 y 10 31 | 4ang-frd3 x 9 y 10 32 | tmnf-yvry x 9 y 10 33 | sh6e-276z x 9 y 10 34 | -------------------------------------------------------------------------------- /extract_zipcode_latlon/index/nyc_latlon_index.txt: -------------------------------------------------------------------------------- 1 | txfh-8uny latitude 4 longitude 5 2 | ckr8-miyf latitude 49 longitude 50 3 | 8ne5-dgau latitude 45 longitude 46 4 | iy29-ps3i latitude 49 longitude 50 5 | aiep-cw6w latitude 49 longitude 50 6 | bfxz-fd5f latitude 49 longitude 50 7 | nbh5-finw latitude 45 longitude 46 8 | xgwb-peav latitude 49 longitude 50 9 | iru4-p66v latitude 2 longitude 3 10 | cwr9-upi8 latitude 45 longitude 46 11 | my38-3fq2 latitude 49 longitude 50 12 | w7w5-eh7d latitude 26 longitude 27 13 | sa3i-xbm2 latitude 8 longitude 9 14 | 6wrh-b4p8 latitude 49 longitude 50 15 | mfbr-gvpd latitude 49 longitude 50 16 | q5vx-2yhj latitude 10 longitude 11 17 | vwdc-epd2 latitude 21 longitude 22 18 | wwjt-8agi latitude 49 longitude 50 19 | qvr2-gw69 latitude 49 longitude 50 20 | jhqa-6dzr latitude 8 longitude 9 21 | ypm5-ig5p latitude 6 longitude 7 22 | gbih-sbdw latitude 49 longitude 50 23 | sxx4-xhzg latitude 4 longitude 5 24 | anfv-hhsi latitude 10 longitude 11 25 | -------------------------------------------------------------------------------- /extract_zipcode_latlon/index/boston_zipcode_index.txt: -------------------------------------------------------------------------------- 1 | pvrp-csfj location_zipcode 22 2 | 7wt6-9hdh location_zipcode 22 3 | v6fi-4hdu location_zipcode 22 4 | qz58-xbtz location_zipcode 22 5 | rtbk-4hc4 location_zipcode 22 6 | uqjh-rsbj location_zipcode 22 7 | f4ev-s6tx location_zipcode 22 8 | j2a7-cdyk location_zipcode 22 9 | x8in-twjt zip_code 11 10 | enuq-8kmn location_zipcode 22 11 | ehda-cg39 location_zipcode 22 12 | k9pj-rna9 location_zipcode 22 13 | dtud-qyw9 location_zipcode 22 14 | gfvf-83vt location_zipcode 22 15 | csea-5edd zip 11 16 | c7cs-bcq5 location_zipcode 13 17 | yfam-b7bg location_zipcode 22 18 | c3yg-bknc location_zipcode 22 19 | dp5b-mgir location_zipcode 22 20 | w6u4-3pp8 location_zipcode 22 21 | mwxg-8ix6 location_zipcode 13 22 | mbdv-4g6k location_zipcode 22 23 | d5jd-s3az location_zipcode 5 24 | vivu-bt5s location_zipcode 22 25 | ynt4-n6g9 location_zipcode 22 26 | effb-uspk zip 11 27 | 4kc2-vxvv location_zipcode 22 28 | hkne-4xqd location_zipcode 22 29 | -------------------------------------------------------------------------------- /type_detection/ijson/__init__.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Iterative JSON parser. 3 | 4 | Main API: 5 | 6 | - ``ijson.parse``: iterator returning parsing events with the object tree context, 7 | see ``ijson.common.parse`` for docs. 8 | 9 | - ``ijson.items``: iterator returning Python objects found under a specified prefix, 10 | see ``ijson.common.items`` for docs. 11 | 12 | Top-level ``ijson`` module tries to automatically find and import a suitable 13 | parsing backend. You can also explicitly import a required backend from 14 | ``ijson.backends``. 15 | ''' 16 | 17 | from ijson.common import JSONError, IncompleteJSONError, ObjectBuilder 18 | from ijson.backends import YAJLImportError 19 | 20 | try: 21 | import ijson.backends.yajl2 as backend 22 | except YAJLImportError: 23 | try: 24 | import ijson.backends.yajl as backend 25 | except YAJLImportError: 26 | import ijson.backends.python as backend 27 | 28 | 29 | basic_parse = backend.basic_parse 30 | parse = backend.parse 31 | items = backend.items 32 | -------------------------------------------------------------------------------- /download/ids/madison_ids.txt: -------------------------------------------------------------------------------- 1 | 4ng4-nf3c 2 | u7ns-6d4x 3 | 4gss-84dk 4 | 3kgn-2bpa 5 | 99g9-p6ki 6 | d686-rvcw 7 | wwtc-pw9p 8 | gxhk-44q9 9 | rtyh-6ucr 10 | spu7-hym6 11 | p4au-pwd2 12 | kfv2-f9ss 13 | svr6-6gvb 14 | f5sy-kcer 15 | iig4-49xp 16 | hb5z-buaz 17 | fvxz-66tr 18 | t5vc-2fm7 19 | 7dbz-yi8h 20 | 9u47-9h3u 21 | 6ym2-385s 22 | b7xj-5uyg 23 | f4km-tx65 24 | vf3w-yibt 25 | hwdm-jhzj 26 | t89i-9tka 27 | geuk-tayq 28 | 32m2-fqa2 29 | efhs-2ube 30 | 2tcz-87nc 31 | jbpd-4xxj 32 | tbc5-gynu 33 | 2a9g-qge2 34 | cq85-dipd 35 | 7shu-mkhv 36 | kbjz-kaud 37 | qcfn-n3we 38 | gqa8-dxgc 39 | 9wf4-wytn 40 | pveg-u4zq 41 | ipd7-scz8 42 | j4t2-fn7a 43 | jvs7-37vw 44 | 38c6-a8m4 45 | 3a6w-jfnq 46 | r98k-9799 47 | hi83-zfb3 48 | r7yp-j3t7 49 | mser-b9tq 50 | 3syz-mw6z 51 | s9b5-pi49 52 | 5eh3-n3ms 53 | bgrv-wya2 54 | mz4p-68jd 55 | wz7i-taa5 56 | vb36-v77y 57 | t6n5-id96 58 | q5sg-7k3x 59 | 4kgp-uj2z 60 | 84vr-dpbk 61 | miyr-ap4j 62 | qdb8-htgr 63 | b8bg-px3e 64 | kfv3-7qjn 65 | pi5r-e26i 66 | wf2u-ezp8 67 | m8qu-5gbp 68 | r6nk-wjfh 69 | -------------------------------------------------------------------------------- /extract_zipcode_latlon/ijson/__init__.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Iterative JSON parser. 3 | 4 | Main API: 5 | 6 | - ``ijson.parse``: iterator returning parsing events with the object tree context, 7 | see ``ijson.common.parse`` for docs. 8 | 9 | - ``ijson.items``: iterator returning Python objects found under a specified prefix, 10 | see ``ijson.common.items`` for docs. 11 | 12 | Top-level ``ijson`` module tries to automatically find and import a suitable 13 | parsing backend. You can also explicitly import a required backend from 14 | ``ijson.backends``. 15 | ''' 16 | 17 | from ijson.common import JSONError, IncompleteJSONError, ObjectBuilder 18 | from ijson.backends import YAJLImportError 19 | 20 | try: 21 | import ijson.backends.yajl2 as backend 22 | except YAJLImportError: 23 | try: 24 | import ijson.backends.yajl as backend 25 | except YAJLImportError: 26 | import ijson.backends.python as backend 27 | 28 | 29 | basic_parse = backend.basic_parse 30 | parse = backend.parse 31 | items = backend.items 32 | -------------------------------------------------------------------------------- /type_detection/ids/madison_ids.txt: -------------------------------------------------------------------------------- 1 | 4ng4-nf3c 2 | u7ns-6d4x 3 | 4gss-84dk 4 | 3kgn-2bpa 5 | 99g9-p6ki 6 | d686-rvcw 7 | wwtc-pw9p 8 | gxhk-44q9 9 | rtyh-6ucr 10 | spu7-hym6 11 | p4au-pwd2 12 | kfv2-f9ss 13 | svr6-6gvb 14 | f5sy-kcer 15 | iig4-49xp 16 | hb5z-buaz 17 | fvxz-66tr 18 | t5vc-2fm7 19 | 7dbz-yi8h 20 | 9u47-9h3u 21 | 6ym2-385s 22 | b7xj-5uyg 23 | f4km-tx65 24 | vf3w-yibt 25 | hwdm-jhzj 26 | t89i-9tka 27 | geuk-tayq 28 | 32m2-fqa2 29 | efhs-2ube 30 | 2tcz-87nc 31 | jbpd-4xxj 32 | tbc5-gynu 33 | 2a9g-qge2 34 | cq85-dipd 35 | 7shu-mkhv 36 | kbjz-kaud 37 | qcfn-n3we 38 | gqa8-dxgc 39 | 9wf4-wytn 40 | pveg-u4zq 41 | ipd7-scz8 42 | j4t2-fn7a 43 | jvs7-37vw 44 | 38c6-a8m4 45 | 3a6w-jfnq 46 | r98k-9799 47 | hi83-zfb3 48 | r7yp-j3t7 49 | mser-b9tq 50 | 3syz-mw6z 51 | s9b5-pi49 52 | 5eh3-n3ms 53 | bgrv-wya2 54 | mz4p-68jd 55 | wz7i-taa5 56 | vb36-v77y 57 | t6n5-id96 58 | q5sg-7k3x 59 | 4kgp-uj2z 60 | 84vr-dpbk 61 | miyr-ap4j 62 | qdb8-htgr 63 | b8bg-px3e 64 | kfv3-7qjn 65 | pi5r-e26i 66 | wf2u-ezp8 67 | m8qu-5gbp 68 | r6nk-wjfh 69 | -------------------------------------------------------------------------------- /tagcloud/tagcloud.r: -------------------------------------------------------------------------------- 1 | require("tm"); 2 | require("wordcloud"); 3 | input <- commandArgs(trailingOnly = TRUE); 4 | min_scale = 1; 5 | max_scale = 10; 6 | max_word = 700; 7 | if (length(input) != 1) 8 | { 9 | max_word = as.numeric(input[2]); 10 | min_scale = as.numeric(input[3]); 11 | max_scale = as.numeric(input[4]); 12 | } 13 | print(min_scale); 14 | print(max_word); 15 | #tags <- Corpus (DirSource(input)); 16 | tags <- Corpus(VectorSource(readLines(input[1]))); 17 | tags <- tm_map(tags, stripWhitespace); 18 | tags <- tm_map(tags, tolower); 19 | tags <- tm_map(tags, removeWords, stopwords("english")); 20 | skipwords <- as.character(readLines("data/skipwords.txt")); 21 | tags <- tm_map(tags, removeWords, skipwords); 22 | #par(mfrow=c(3,1)) 23 | #wordcloud(tags, scale=c(10,0.3), max.words=5000, random.order=FALSE, rot.per=0.35, use.r.layout=TRUE, colors=brewer.pal(8, "Dark2")); 24 | wordcloud(tags, scale=c(max_scale,min_scale), max.words=max_word, random.order=FALSE, rot.per=0.35, use.r.layout=TRUE, colors=brewer.pal(8, "Dark2")); 25 | -------------------------------------------------------------------------------- /download/ids/weatherford_ids.txt: -------------------------------------------------------------------------------- 1 | hybg-vty2 2 | ve8y-5avw 3 | 2ek5-qq7s 4 | 8bm3-mh2f 5 | j2k9-jf7m 6 | pi2r-w2wn 7 | scg7-wbcw 8 | rmvj-bpp5 9 | d6ka-5zdp 10 | memn-fv5t 11 | 4kr8-nw7w 12 | kyrg-v24v 13 | x7ik-kbby 14 | e68q-zdjm 15 | bu65-w3ez 16 | 3n5h-hdsi 17 | bgjw-54en 18 | 3bed-i88z 19 | gpjj-upqz 20 | n4m9-h86u 21 | 6edd-iufq 22 | cfuq-zji7 23 | ny9a-t4pz 24 | fq45-73gh 25 | kwwr-agj9 26 | 5rrs-sgue 27 | 3aqf-4m7m 28 | qekk-5pfa 29 | 7tdp-p5kk 30 | s2dc-5w34 31 | 8bte-7cqp 32 | 3xxn-pcj5 33 | bg9q-v7x9 34 | gax8-krdx 35 | c4zr-3y7x 36 | dngh-t9qr 37 | cuz9-rcre 38 | vy7g-yivs 39 | xzd8-2b3e 40 | vhv4-pkrx 41 | hphz-3y4w 42 | t7t8-t82j 43 | a9tx-k4s7 44 | ysfs-8f2v 45 | u2u7-hf87 46 | t6i6-pnn2 47 | mvy8-6q2t 48 | uhdw-jeqx 49 | bpma-ut4v 50 | idj9-c9dm 51 | 88pd-2kqk 52 | rmsq-r7j2 53 | c68f-eup2 54 | 32ak-r84i 55 | q7xu-xtzf 56 | v6eu-rt9x 57 | 39gt-rxzc 58 | 5t7p-7njb 59 | cdyu-igpi 60 | 3usd-zinv 61 | ce3q-vytn 62 | v44e-g82x 63 | vrhc-6z87 64 | dy5q-p5dt 65 | k7b9-7zjb 66 | 648e-teft 67 | 8m3c-9aap 68 | catd-f4rf 69 | d5tp-wn69 70 | fgmy-jv95 71 | ivjg-v96d 72 | cb27-ccqz 73 | -------------------------------------------------------------------------------- /type_detection/ids/weatherford_ids.txt: -------------------------------------------------------------------------------- 1 | hybg-vty2 2 | ve8y-5avw 3 | 2ek5-qq7s 4 | 8bm3-mh2f 5 | j2k9-jf7m 6 | pi2r-w2wn 7 | scg7-wbcw 8 | rmvj-bpp5 9 | d6ka-5zdp 10 | memn-fv5t 11 | 4kr8-nw7w 12 | kyrg-v24v 13 | x7ik-kbby 14 | e68q-zdjm 15 | bu65-w3ez 16 | 3n5h-hdsi 17 | bgjw-54en 18 | 3bed-i88z 19 | gpjj-upqz 20 | n4m9-h86u 21 | 6edd-iufq 22 | cfuq-zji7 23 | ny9a-t4pz 24 | fq45-73gh 25 | kwwr-agj9 26 | 5rrs-sgue 27 | 3aqf-4m7m 28 | qekk-5pfa 29 | 7tdp-p5kk 30 | s2dc-5w34 31 | 8bte-7cqp 32 | 3xxn-pcj5 33 | bg9q-v7x9 34 | gax8-krdx 35 | c4zr-3y7x 36 | dngh-t9qr 37 | cuz9-rcre 38 | vy7g-yivs 39 | xzd8-2b3e 40 | vhv4-pkrx 41 | hphz-3y4w 42 | t7t8-t82j 43 | a9tx-k4s7 44 | ysfs-8f2v 45 | u2u7-hf87 46 | t6i6-pnn2 47 | mvy8-6q2t 48 | uhdw-jeqx 49 | bpma-ut4v 50 | idj9-c9dm 51 | 88pd-2kqk 52 | rmsq-r7j2 53 | c68f-eup2 54 | 32ak-r84i 55 | q7xu-xtzf 56 | v6eu-rt9x 57 | 39gt-rxzc 58 | 5t7p-7njb 59 | cdyu-igpi 60 | 3usd-zinv 61 | ce3q-vytn 62 | v44e-g82x 63 | vrhc-6z87 64 | dy5q-p5dt 65 | k7b9-7zjb 66 | 648e-teft 67 | 8m3c-9aap 68 | catd-f4rf 69 | d5tp-wn69 70 | fgmy-jv95 71 | ivjg-v96d 72 | cb27-ccqz 73 | -------------------------------------------------------------------------------- /download/ids/oaklandnet_ids.txt: -------------------------------------------------------------------------------- 1 | kzer-wcj5 2 | ym6k-rx7a 3 | qyh9-i9dw 4 | fw6y-ui8e 5 | hfn8-32wd 6 | 6nxw-pzj5 7 | e4gx-8458 8 | ajaj-fa72 9 | uq9e-ncfu 10 | uyih-vzuc 11 | kq8i-6bzk 12 | qezs-bkz9 13 | 7dcq-8atp 14 | dutj-j949 15 | sduu-bfki 16 | muvj-xztc 17 | va73-j3gz 18 | 3y2t-a5mc 19 | t35d-4vyj 20 | x678-6ymc 21 | kx4s-uqgi 22 | j4xf-2t25 23 | dxdg-872h 24 | creu-dzki 25 | 4jcx-enxf 26 | b8mb-8tti 27 | quth-gb8e 28 | wm75-yhqe 29 | 65yj-mc7w 30 | erq5-ht9e 31 | aahx-6i3p 32 | h2rc-b7xm 33 | dnd6-8ry2 34 | ncmw-m42x 35 | 58ik-33wk 36 | wakt-xmha 37 | sr5q-rm7d 38 | 56xf-w7yc 39 | yra4-ynr5 40 | un3r-mf7q 41 | j9qk-t2ht 42 | trbj-7f28 43 | pvzf-dbpc 44 | vrkv-jmjc 45 | b9mi-cs4z 46 | hqcd-z3hu 47 | spgt-auvy 48 | rbqz-eaj4 49 | 8jcq-6ucy 50 | 7quj-zssa 51 | tt6i-5mkh 52 | az7b-di6w 53 | fzzu-umm5 54 | 67wz-betr 55 | i2cv-32w5 56 | vpjp-6gdf 57 | j4eu-nx3y 58 | 4rrq-475h 59 | 3bum-78vz 60 | 4k8k-rw55 61 | 5afy-hx65 62 | 7u2h-e4rx 63 | c9h9-wdx3 64 | dcit-4sk8 65 | g4ft-bk9f 66 | guag-xf4x 67 | hxu6-rrid 68 | kezn-d3a8 69 | qfcb-d6ux 70 | tcde-a2rg 71 | wau4-95ys 72 | y9sn-rk9p 73 | vyhb-nqtw 74 | fvtg-s7gp 75 | qsv2-89sf 76 | udg4-vz9p 77 | 68fg-z9fi 78 | 4yez-5h4p 79 | vi6t-i2f3 80 | 6qwi-azmw 81 | geib-kan6 82 | ejsa-p6i4 83 | kzkk-c7a4 84 | -------------------------------------------------------------------------------- /type_detection/ids/oaklandnet_ids.txt: -------------------------------------------------------------------------------- 1 | kzer-wcj5 2 | ym6k-rx7a 3 | qyh9-i9dw 4 | fw6y-ui8e 5 | hfn8-32wd 6 | 6nxw-pzj5 7 | e4gx-8458 8 | ajaj-fa72 9 | uq9e-ncfu 10 | uyih-vzuc 11 | kq8i-6bzk 12 | qezs-bkz9 13 | 7dcq-8atp 14 | dutj-j949 15 | sduu-bfki 16 | muvj-xztc 17 | va73-j3gz 18 | 3y2t-a5mc 19 | t35d-4vyj 20 | x678-6ymc 21 | kx4s-uqgi 22 | j4xf-2t25 23 | dxdg-872h 24 | creu-dzki 25 | 4jcx-enxf 26 | b8mb-8tti 27 | quth-gb8e 28 | wm75-yhqe 29 | 65yj-mc7w 30 | erq5-ht9e 31 | aahx-6i3p 32 | h2rc-b7xm 33 | dnd6-8ry2 34 | ncmw-m42x 35 | 58ik-33wk 36 | wakt-xmha 37 | sr5q-rm7d 38 | 56xf-w7yc 39 | yra4-ynr5 40 | un3r-mf7q 41 | j9qk-t2ht 42 | trbj-7f28 43 | pvzf-dbpc 44 | vrkv-jmjc 45 | b9mi-cs4z 46 | hqcd-z3hu 47 | spgt-auvy 48 | rbqz-eaj4 49 | 8jcq-6ucy 50 | 7quj-zssa 51 | tt6i-5mkh 52 | az7b-di6w 53 | fzzu-umm5 54 | 67wz-betr 55 | i2cv-32w5 56 | vpjp-6gdf 57 | j4eu-nx3y 58 | 4rrq-475h 59 | 3bum-78vz 60 | 4k8k-rw55 61 | 5afy-hx65 62 | 7u2h-e4rx 63 | c9h9-wdx3 64 | dcit-4sk8 65 | g4ft-bk9f 66 | guag-xf4x 67 | hxu6-rrid 68 | kezn-d3a8 69 | qfcb-d6ux 70 | tcde-a2rg 71 | wau4-95ys 72 | y9sn-rk9p 73 | vyhb-nqtw 74 | fvtg-s7gp 75 | qsv2-89sf 76 | udg4-vz9p 77 | 68fg-z9fi 78 | 4yez-5h4p 79 | vi6t-i2f3 80 | 6qwi-azmw 81 | geib-kan6 82 | ejsa-p6i4 83 | kzkk-c7a4 84 | -------------------------------------------------------------------------------- /download/ids/somervillema_ids.txt: -------------------------------------------------------------------------------- 1 | tp6j-gpfj 2 | 7u5v-yw4j 3 | kja3-3jiv 4 | pjhx-dusc 5 | 9cwr-3jjr 6 | 5peg-3mcc 7 | 8y4j-ucsg 8 | 6x93-dy4s 9 | sebz-uihb 10 | hwvc-m8fm 11 | dtkn-fv7f 12 | dqe2-eu72 13 | u6u9-gmux 14 | vfqx-2vkk 15 | vcmv-r7ky 16 | 9p7s-uyz7 17 | ssw2-4kcp 18 | 5qt4-dip4 19 | iye3-wp6v 20 | 8g7d-pg76 21 | q5g3-jif5 22 | mny4-tj6m 23 | vpdq-svp4 24 | 2y56-m77e 25 | 9gy9-2p5u 26 | j545-qb59 27 | w5r4-iy52 28 | 5pvr-cpn3 29 | 3qwf-fgnz 30 | 635v-aavc 31 | 8x35-9ng3 32 | 9uzy-4h8m 33 | 2g5h-2e8r 34 | kdby-j7rs 35 | tkit-6b73 36 | ty6m-bn6q 37 | vw3t-2xhg 38 | 4uyb-gfsm 39 | it9v-824j 40 | i427-734p 41 | p44d-dqzq 42 | w62m-jxtq 43 | qr7g-u54h 44 | dcp6-gcay 45 | m7ah-26yy 46 | j95z-kira 47 | tgqx-pv5x 48 | cpci-gw44 49 | caa8-adi3 50 | jyc2-yxnj 51 | 7w39-s85f 52 | quyn-7i4y 53 | szji-58dd 54 | 8na6-jytu 55 | htg6-e8ia 56 | 33fn-xnzu 57 | x332-bdd7 58 | pgsb-2rr6 59 | ckr3-jqgv 60 | c2xz-m2g7 61 | 6ssq-xzqu 62 | rb6v-e8zn 63 | wppa-gx6f 64 | hr39-b39y 65 | 69vm-7n6w 66 | 6pwe-s49m 67 | xjt9-vc89 68 | 7g35-yebv 69 | r9cu-f8pg 70 | ypxj-qtcw 71 | 4n2x-t8ew 72 | 3tkv-xx4f 73 | a6cq-eqmq 74 | qkid-icys 75 | tfzf-bzmb 76 | a2xm-guu9 77 | wqq6-wyhr 78 | ecmw-4hgh 79 | 97w8-xar9 80 | rzd3-6sat 81 | 4wyh-gtfb 82 | qa92-wva4 83 | 4xpt-vaa8 84 | 37ik-ii75 85 | j38e-t8aq 86 | -------------------------------------------------------------------------------- /type_detection/ids/somervillema_ids.txt: -------------------------------------------------------------------------------- 1 | tp6j-gpfj 2 | 7u5v-yw4j 3 | kja3-3jiv 4 | pjhx-dusc 5 | 9cwr-3jjr 6 | 5peg-3mcc 7 | 8y4j-ucsg 8 | 6x93-dy4s 9 | sebz-uihb 10 | hwvc-m8fm 11 | dtkn-fv7f 12 | dqe2-eu72 13 | u6u9-gmux 14 | vfqx-2vkk 15 | vcmv-r7ky 16 | 9p7s-uyz7 17 | ssw2-4kcp 18 | 5qt4-dip4 19 | iye3-wp6v 20 | 8g7d-pg76 21 | q5g3-jif5 22 | mny4-tj6m 23 | vpdq-svp4 24 | 2y56-m77e 25 | 9gy9-2p5u 26 | j545-qb59 27 | w5r4-iy52 28 | 5pvr-cpn3 29 | 3qwf-fgnz 30 | 635v-aavc 31 | 8x35-9ng3 32 | 9uzy-4h8m 33 | 2g5h-2e8r 34 | kdby-j7rs 35 | tkit-6b73 36 | ty6m-bn6q 37 | vw3t-2xhg 38 | 4uyb-gfsm 39 | it9v-824j 40 | i427-734p 41 | p44d-dqzq 42 | w62m-jxtq 43 | qr7g-u54h 44 | dcp6-gcay 45 | m7ah-26yy 46 | j95z-kira 47 | tgqx-pv5x 48 | cpci-gw44 49 | caa8-adi3 50 | jyc2-yxnj 51 | 7w39-s85f 52 | quyn-7i4y 53 | szji-58dd 54 | 8na6-jytu 55 | htg6-e8ia 56 | 33fn-xnzu 57 | x332-bdd7 58 | pgsb-2rr6 59 | ckr3-jqgv 60 | c2xz-m2g7 61 | 6ssq-xzqu 62 | rb6v-e8zn 63 | wppa-gx6f 64 | hr39-b39y 65 | 69vm-7n6w 66 | 6pwe-s49m 67 | xjt9-vc89 68 | 7g35-yebv 69 | r9cu-f8pg 70 | ypxj-qtcw 71 | 4n2x-t8ew 72 | 3tkv-xx4f 73 | a6cq-eqmq 74 | qkid-icys 75 | tfzf-bzmb 76 | a2xm-guu9 77 | wqq6-wyhr 78 | ecmw-4hgh 79 | 97w8-xar9 80 | rzd3-6sat 81 | 4wyh-gtfb 82 | qa92-wva4 83 | 4xpt-vaa8 84 | 37ik-ii75 85 | j38e-t8aq 86 | -------------------------------------------------------------------------------- /extract_zipcode_latlon/index/baltimore_zipcode_index.txt: -------------------------------------------------------------------------------- 1 | 53u2-uece zipcode 1 2 | rep9-vf9x zip 6 3 | cpd3-yi9b zipcode 1 4 | auqk-g78m addrzip 17 5 | 5vwi-a39d addrzip 17 6 | fexy-c3hs zip 6 7 | 98gi-te7t zipcode 2 8 | h774-6wsz zip 6 9 | szmc-i2rj zip 6 10 | npj6-ttes zip 6 11 | kjyz-iyjf zip 6 12 | wasd-qc7e zipcode 1 13 | g9ck-7zns zipcode 2 14 | r4ur-u5nm zipcode 3 15 | u7bw-gha5 zipcode 3 16 | 4d7j-z8em zip 6 17 | btz4-brkj zip 6 18 | 8hgq-9pi6 zipcode 1 19 | 9agw-sxsr zip 6 20 | tpag-zk4d zipcode 8 21 | cpxf-kxp3 zipcode 1 22 | nf24-syy3 addrzip 17 23 | zdgj-m9f8 addrzip 11 24 | uds6-qsb6 zip 6 25 | v77m-e78p zip 5 26 | vvxf-wiyc zipcode 3 27 | xv8d-bwgi addrzip 17 28 | tgtv-wr5u zipcode 1 29 | ik5a-kimj addrzip 17 30 | q974-nn4i zip 6 31 | 6kkw-bck6 zipcode 2 32 | x3dq-8uhg zip 6 33 | uuwk-975y zipcode 2 34 | jhbg-n8w2 zipcode 3 35 | 3ah4-gcgf zipcode 2 36 | g244-i383 zipcode 1 37 | dmje-2r3h zip 6 38 | k4km-9d4r zipcode 2 39 | kbdc-bpw3 zipcode 2 40 | 253h-2qmt zip 6 41 | q2vm-e9dp zipcode 1 42 | wdpa-2rxb addrzip 17 43 | h77s-araf zipcode 2 44 | us2p-bijb zipcode 3 45 | rzct-w9hm zip 3 46 | 35wi-jfre addrzip 17 47 | bcxw-m234 addrzip 11 48 | 2kb9-5zeh zip 6 49 | 2js8-vxjk zip 6 50 | bin3-c64n zip 6 51 | 53js-3bkd zipcode 1 52 | eehw-fgh8 zip 6 53 | ejc5-uinh addrzip 17 54 | k5ry-ef3g zipcode 1 55 | yc75-xbrv zipcode 2 56 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Open Data Analysis 2 | ================= 3 | Please read README.txt in each directory to find the instruction to run the source code. 4 | Below is the brief description of each directory: 5 | ## download 6 | * Download datasets in JSON format. (Shell script) 7 | 8 | ## schema_similarity 9 | * Compute the similarity between schemata. (Python) 10 | 11 | ## matrix_heatmap 12 | * (Figure 8) Generate matrix heatmap using schema similarity scores. (Java Script) 13 | 14 | ## metadata 15 | * Retrieve metadata including tags, schema, description using Socrata APIs. (Python) 16 | 17 | # tagcloud 18 | * (Figure 5) Generate tag cloud using tags associated with the dataset. (R) 19 | * Require result from metadata 20 | 21 | ## type_detection 22 | * Detect attribute type (Python) 23 | 24 | ## barchart (Require result from type_detection) 25 | * (Figure 9) Generate a barchart of data type ratio across cities. (Python) 26 | 27 | ## extract_zipcode_latlon 28 | * Read result from type_detection and extract all lat/lon, zipcode values. (Python) 29 | * Require result from type_detection 30 | 31 | ## latlon_to_zipcode 32 | * Convert lat/long to zipcode. (C++) 33 | 34 | ## heatmap 35 | * (Figure 12) Generate a heat map of geographical coverage based on zip code values in NYC and Chicago. (Python) 36 | * Require result from extract_zipcode_latlon and latlon_to_zipcode 37 | -------------------------------------------------------------------------------- /matrix_heatmap/boston.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 54 | -------------------------------------------------------------------------------- /matrix_heatmap/nyc_no311.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 54 | -------------------------------------------------------------------------------- /download/ids/boston_ids.txt: -------------------------------------------------------------------------------- 1 | awu8-dc52 2 | rtbk-4hc4 3 | 7cdf-6fgx 4 | c3yg-bknc 5 | qndu-wx8w 6 | effb-uspk 7 | csea-5edd 8 | uqjh-rsbj 9 | j2a7-cdyk 10 | gb6y-34cq 11 | 5b2m-jtw4 12 | dtsk-jcvs 13 | r3qt-vrtj 14 | vrhg-954w 15 | w6u4-3pp8 16 | e29s-ympv 17 | 7wt6-9hdh 18 | mbdv-4g6k 19 | 3a6m-dwve 20 | enuq-8kmn 21 | c7cz-29ak 22 | msk6-43c6 23 | gqai-h7bg 24 | rtqb-8pht 25 | 3j3e-cr8p 26 | vjvb-2kg6 27 | 6yws-tqu3 28 | rzdm-34k2 29 | 23yb-cufe 30 | byxy-288e 31 | cr3i-jj7v 32 | mwxg-8ix6 33 | c7cs-bcq5 34 | ehda-cg39 35 | ekiy-2qmz 36 | 7xqx-zy2t 37 | 4vcu-nshu 38 | thm8-kfjj 39 | cich-iivi 40 | f4ev-s6tx 41 | 8igg-7sbf 42 | c7vc-ep7b 43 | qz58-xbtz 44 | 46f7-2snz 45 | snj3-z8hh 46 | 2tib-uhic 47 | 9tfg-3jic 48 | qqwn-zzmv 49 | 984s-h86s 50 | pmdu-upcu 51 | wv26-euyd 52 | efzp-pcmd 53 | 7ygz-72yc 54 | 742w-2qkx 55 | p9yd-36dn 56 | rww2-sqpe 57 | rvw3-dget 58 | sb5j-x59u 59 | xqmb-ucdr 60 | sgf2-btru 61 | ipwb-93aq 62 | cxb7-aa9j 63 | d5jd-s3az 64 | dp5b-mgir 65 | ciur-a7cc 66 | udwx-qxp4 67 | dvjg-bv4z 68 | axcy-y39t 69 | vivu-bt5s 70 | 7idu-4tds 71 | eymz-pqcb 72 | h64y-whx9 73 | idz9-gdbc 74 | i59n-zrgu 75 | esg7-pz3n 76 | euq9-fuzw 77 | q4vk-zgiq 78 | r5wd-vzpa 79 | su77-pn2k 80 | 4kc2-vxvv 81 | 9yb5-8pvg 82 | dtud-qyw9 83 | f6t4-vve7 84 | gfvf-83vt 85 | hkne-4xqd 86 | k9pj-rna9 87 | pvrp-csfj 88 | v6fi-4hdu 89 | yfam-b7bg 90 | ynt4-n6g9 91 | 52xb-ggdw 92 | j7zj-cq7e 93 | tvzm-wwrp 94 | vwgc-k7be 95 | t85d-b449 96 | 89gv-qm3p 97 | uwfh-jrgi 98 | x8in-twjt 99 | 6uv6-kxqp 100 | viyp-qdei 101 | b6jk-5x5h 102 | tma6-pdxu 103 | bcnb-bux2 104 | tvvb-g9ni 105 | 354i-aiec 106 | xkfj-zz8i 107 | 7wih-gq3k 108 | fhku-uixf 109 | 8sq6-p7et 110 | 755x-x44q 111 | evkj-7j3w 112 | 9j5j-ped2 113 | av6t-57nx 114 | wivc-syw7 115 | api6-u3fp 116 | fb8c-dnd3 117 | wqbg-exmn 118 | 9rag-2mng 119 | sb57-rjn9 120 | cd7h-u9nu 121 | krii-vyri 122 | -------------------------------------------------------------------------------- /type_detection/ids/boston_ids.txt: -------------------------------------------------------------------------------- 1 | awu8-dc52 2 | rtbk-4hc4 3 | 7cdf-6fgx 4 | c3yg-bknc 5 | qndu-wx8w 6 | effb-uspk 7 | csea-5edd 8 | uqjh-rsbj 9 | j2a7-cdyk 10 | gb6y-34cq 11 | 5b2m-jtw4 12 | dtsk-jcvs 13 | r3qt-vrtj 14 | vrhg-954w 15 | w6u4-3pp8 16 | e29s-ympv 17 | 7wt6-9hdh 18 | mbdv-4g6k 19 | 3a6m-dwve 20 | enuq-8kmn 21 | c7cz-29ak 22 | msk6-43c6 23 | gqai-h7bg 24 | rtqb-8pht 25 | 3j3e-cr8p 26 | vjvb-2kg6 27 | 6yws-tqu3 28 | rzdm-34k2 29 | 23yb-cufe 30 | byxy-288e 31 | cr3i-jj7v 32 | mwxg-8ix6 33 | c7cs-bcq5 34 | ehda-cg39 35 | ekiy-2qmz 36 | 7xqx-zy2t 37 | 4vcu-nshu 38 | thm8-kfjj 39 | cich-iivi 40 | f4ev-s6tx 41 | 8igg-7sbf 42 | c7vc-ep7b 43 | qz58-xbtz 44 | 46f7-2snz 45 | snj3-z8hh 46 | 2tib-uhic 47 | 9tfg-3jic 48 | qqwn-zzmv 49 | 984s-h86s 50 | pmdu-upcu 51 | wv26-euyd 52 | efzp-pcmd 53 | 7ygz-72yc 54 | 742w-2qkx 55 | p9yd-36dn 56 | rww2-sqpe 57 | rvw3-dget 58 | sb5j-x59u 59 | xqmb-ucdr 60 | sgf2-btru 61 | ipwb-93aq 62 | cxb7-aa9j 63 | d5jd-s3az 64 | dp5b-mgir 65 | ciur-a7cc 66 | udwx-qxp4 67 | dvjg-bv4z 68 | axcy-y39t 69 | vivu-bt5s 70 | 7idu-4tds 71 | eymz-pqcb 72 | h64y-whx9 73 | idz9-gdbc 74 | i59n-zrgu 75 | esg7-pz3n 76 | euq9-fuzw 77 | q4vk-zgiq 78 | r5wd-vzpa 79 | su77-pn2k 80 | 4kc2-vxvv 81 | 9yb5-8pvg 82 | dtud-qyw9 83 | f6t4-vve7 84 | gfvf-83vt 85 | hkne-4xqd 86 | k9pj-rna9 87 | pvrp-csfj 88 | v6fi-4hdu 89 | yfam-b7bg 90 | ynt4-n6g9 91 | 52xb-ggdw 92 | j7zj-cq7e 93 | tvzm-wwrp 94 | vwgc-k7be 95 | t85d-b449 96 | 89gv-qm3p 97 | uwfh-jrgi 98 | x8in-twjt 99 | 6uv6-kxqp 100 | viyp-qdei 101 | b6jk-5x5h 102 | tma6-pdxu 103 | bcnb-bux2 104 | tvvb-g9ni 105 | 354i-aiec 106 | xkfj-zz8i 107 | 7wih-gq3k 108 | fhku-uixf 109 | 8sq6-p7et 110 | 755x-x44q 111 | evkj-7j3w 112 | 9j5j-ped2 113 | av6t-57nx 114 | wivc-syw7 115 | api6-u3fp 116 | fb8c-dnd3 117 | wqbg-exmn 118 | 9rag-2mng 119 | sb57-rjn9 120 | cd7h-u9nu 121 | krii-vyri 122 | -------------------------------------------------------------------------------- /extract_zipcode_latlon/index/chicago_latlon_index.txt: -------------------------------------------------------------------------------- 1 | ys2m-44iv latitude 14 longitude 15 2 | tdab-kixi latitude 6 longitude 7 3 | hwmb-iu8j latitude 12 longitude 13 location 14 4 | qnrb-dui6 latitude 16 longitude 17 5 | mh59-yhwm latitude 15 longitude 16 6 | n4j6-wkkf start_lon 8 _lif_lat 9 _lit_lon 10 _lit_lat 11 7 | a95h-gwzm latitude 19 longitude 20 8 | 7wwb-3vgm latitude 15 longitude 16 9 | hu6v-hsqb latitude 10 longitude 11 10 | pa8e-mhbh lat 6 lon 7 11 | c6au-zpvv latitude 17 longitude 18 12 | 7rz2-h8u9 latitude 10 longitude 11 13 | cbyb-69xx latitude 6 longitude 7 f12 9 f13 10 14 | kmt9-pg57 latitude 9 longitude 10 15 | pfsx-4n4m latitude 6 longitude 7 16 | q3z3-udcz latitude 12 longitude 13 17 | qqw2-hwkh latitude 15 longitude 16 18 | awnt-66py latitude 15 longitude 16 19 | 4ndg-wq3w latitude 6 longitude 7 20 | hx8q-mf9v latitude 18 longitude 20 21 | xa2r-bcfc latitude 15 longitude 16 22 | c4ep-ee5m latitude 15 longitude 16 23 | 4ywc-hr3a latitude 6 longitude 7 f12 9 f13 10 24 | mw4h-s8xu longitude 6 latitude 7 25 | 4x56-dvnp latitude 9 longitude 10 26 | q4de-h6yq latitude 15 longitude 16 27 | 58td-isfp latitude 10 longitude 11 28 | dfnk-7re6 latitude 14 longitude 15 29 | pf56-35rv latitude 6 longitude 7 30 | 37g7-p8eh latitude 9 longitude 10 31 | t2qc-9pjd _west 2 _east 3 _south 4 _north 5 32 | 5cq6-qygt longitude 6 latitude 7 33 | 2u2y-n6dm location 12 34 | zjqd-uvky latitude 15 longitude 16 35 | vfd5-f3kt latitude 9 longitude 10 36 | 9i8j-865n latitude 10 longitude 11 37 | 8v97-unyc latitude 15 longitude 16 38 | i6k7-i6md latitude 15 longitude 16 39 | habu-n236 latitude 6 longitude 7 40 | bj7p-98q2 latitude 17 longitude 18 41 | zgvr-7yfd latitude 9 longitude 10 42 | x2n5-8w5q latitude 14 longitude 15 43 | vaxn-3ims latitude 15 longitude 16 44 | u23m-pa73 latitude 12 longitude 13 45 | 4guy-sfss latitude 15 longitude 16 46 | atzs-u7pv latitude 10 longitude 11 47 | i8y3-ytj4 latitude 10 longitude 11 48 | ag7u-gr9m lat 6 lon 7 49 | g9qy-h66j latitude 15 longitude 16 50 | -------------------------------------------------------------------------------- /type_detection/ijson/utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | from functools import wraps 3 | 4 | 5 | def coroutine(func): 6 | ''' 7 | Wraps a generator which intended to be used as a pure coroutine by 8 | .send()ing it values. The only thing that the wrapper does is calling 9 | .next() for the first time which is required by Python generator protocol. 10 | ''' 11 | @wraps(func) 12 | def wrapper(*args, **kwargs): 13 | g = func(*args, **kwargs) 14 | next(g) 15 | return g 16 | return wrapper 17 | 18 | @coroutine 19 | def foreach(coroutine_func): 20 | ''' 21 | Dispatches each JSON array item to a handler coroutine. A coroutine is 22 | created anew for each item by calling `coroutine_func` callable. The 23 | resulting coroutine should accept value in the form of tuple of values 24 | generated by rich JSON parser: (prefix, event, value). 25 | 26 | First event received by foreach should be a "start_array" event. 27 | ''' 28 | g = None 29 | base, event, value = yield 30 | if event != 'start_array': 31 | raise Exception('foreach requires "start_array" as the first event, got %s' % repr((base, event, value))) 32 | START_EVENTS = set(['start_map', 'start_array', 'null', 'boolean', 'number', 'string']) 33 | itemprefix = base + '.item' if base else 'item' 34 | while True: 35 | prefix, event, value = yield 36 | if prefix == itemprefix and event in START_EVENTS: 37 | g = coroutine_func() 38 | if (prefix, event) != (base, 'end_array'): 39 | g.send((prefix, event, value)) 40 | 41 | @coroutine 42 | def dispatcher(targets): 43 | ''' 44 | Dispatches JSON parser events into several handlers depending on event 45 | prefixes. 46 | 47 | Accepts a list of tuples (base_prefix, coroutine). A coroutine then 48 | receives all the events with prefixes starting with its base_prefix. 49 | ''' 50 | while True: 51 | prefix, event, value = yield 52 | for base, target in targets: 53 | if prefix.startswith(base): 54 | target.send((prefix, event, value)) 55 | break 56 | -------------------------------------------------------------------------------- /extract_zipcode_latlon/ijson/utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | from functools import wraps 3 | 4 | 5 | def coroutine(func): 6 | ''' 7 | Wraps a generator which intended to be used as a pure coroutine by 8 | .send()ing it values. The only thing that the wrapper does is calling 9 | .next() for the first time which is required by Python generator protocol. 10 | ''' 11 | @wraps(func) 12 | def wrapper(*args, **kwargs): 13 | g = func(*args, **kwargs) 14 | next(g) 15 | return g 16 | return wrapper 17 | 18 | @coroutine 19 | def foreach(coroutine_func): 20 | ''' 21 | Dispatches each JSON array item to a handler coroutine. A coroutine is 22 | created anew for each item by calling `coroutine_func` callable. The 23 | resulting coroutine should accept value in the form of tuple of values 24 | generated by rich JSON parser: (prefix, event, value). 25 | 26 | First event received by foreach should be a "start_array" event. 27 | ''' 28 | g = None 29 | base, event, value = yield 30 | if event != 'start_array': 31 | raise Exception('foreach requires "start_array" as the first event, got %s' % repr((base, event, value))) 32 | START_EVENTS = set(['start_map', 'start_array', 'null', 'boolean', 'number', 'string']) 33 | itemprefix = base + '.item' if base else 'item' 34 | while True: 35 | prefix, event, value = yield 36 | if prefix == itemprefix and event in START_EVENTS: 37 | g = coroutine_func() 38 | if (prefix, event) != (base, 'end_array'): 39 | g.send((prefix, event, value)) 40 | 41 | @coroutine 42 | def dispatcher(targets): 43 | ''' 44 | Dispatches JSON parser events into several handlers depending on event 45 | prefixes. 46 | 47 | Accepts a list of tuples (base_prefix, coroutine). A coroutine then 48 | receives all the events with prefixes starting with its base_prefix. 49 | ''' 50 | while True: 51 | prefix, event, value = yield 52 | for base, target in targets: 53 | if prefix.startswith(base): 54 | target.send((prefix, event, value)) 55 | break 56 | -------------------------------------------------------------------------------- /linechart/timeline.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import sys 3 | from os import walk 4 | import re 5 | import datetime 6 | import os.path 7 | 8 | def add_date(filename, date2count): 9 | with open(filename) as lines: 10 | for line in lines: 11 | id, pdate, cdate = line.strip("\n").split("\t") 12 | date = datetime.datetime.fromtimestamp(int(pdate)) #only use publication date 13 | if date.month < 10: 14 | month = "0" + str(date.month) 15 | else: 16 | month = str(date.month) 17 | year_month = str(date.year) + month 18 | if year_month not in date2count: 19 | date2count[year_month] = 1 20 | else: 21 | date2count[year_month] += 1 22 | return date2count 23 | 24 | def prepare_data(path): 25 | date2count = {} #mapping between date and number of datasets 26 | for (dirpath, dirnames, filenames) in walk(path): 27 | for filename in filenames: 28 | if re.search("id_date", filename): 29 | print path + filename 30 | date2count = add_date(path + filename, date2count) 31 | break 32 | print date2count 33 | out = open("date2count.csv", "w") 34 | for date in date2count.keys(): 35 | out.write(date + "\t" + str(date2count[date]) + "\n") 36 | out.close() 37 | 38 | def get_data(path): 39 | if not os.path.isfile("date2count.csv"): 40 | prepare_data(path) 41 | 42 | date_count = [] 43 | with open("date2count.csv") as lines: 44 | for line in lines: 45 | ym, count = line.strip("\n").split("\t") 46 | date_count.append([ym, count]) 47 | date_count.sort(key=lambda x: x[0]) 48 | return date_count 49 | 50 | def main(argv): 51 | date_count = get_data("../metadata/data/") 52 | idx = 0 53 | dates = [] 54 | area = [] 55 | radius = [] 56 | s = 0 57 | for (date, count) in date_count: 58 | idx += 1 59 | s += int(count) 60 | if idx%4==0: 61 | radius.append(idx) 62 | date = date[:4] + "/" + date[4:] 63 | dates.append(date) 64 | area.append(s) 65 | plt.xticks(radius, dates) 66 | plt.xticks(rotation=50) 67 | plt.plot(radius, area) 68 | plt.xlabel('Timeline') 69 | plt.ylabel('Number of tables') 70 | #plt.title('Title here') 71 | plt.grid() 72 | plt.show() 73 | 74 | if __name__=="__main__": 75 | main(sys.argv[1:]) 76 | -------------------------------------------------------------------------------- /extract_zipcode_latlon/index/chicago_zipcode_index.txt: -------------------------------------------------------------------------------- 1 | 7eyu-q9ez lobbyist_zip 10 employer_zip 21 correspondent_zip 33 client_zip 45 2 | azpf-uc4s zip_code 5 3 | hcqp-hyqa zip 8 4 | wwy2-k7b3 zip 3 5 | ddxq-pdr6 zip_code 4 6 | jsdv-pwf2 zip 3 7 | e4sp-itvq zip_code 9 8 | 7nii-7srd zip_code 14 9 | nrmj-3kcf zip_code 7 10 | 97t6-zrhs zip_code 11 11 | msrk-w9ih zip 6 12 | meks-hp6f zip_code 8 13 | 53t8-wyrc zip_code 6 14 | ngxm-jbc3 zip_code 3 15 | cnfp-tsxc zip 8 16 | 2ft4-4uik lobbyist_zip 4 17 | htai-wnw4 zipcode 5 city_hall_zipcode 13 18 | ydr8-5enu contractor_2_zipcode 35 contractor_3_zipcode 42 contractor_4_zipcode 49 contractor_5_zipcode 56 contractor_6_zipcode 63 contractor_7_zipcode 70 contractor_8_zipcode 77 19 | zh3n-jtnt zip 3 20 | qhfc-4cw2 zip 6 21 | 65vt-ydgw zipcode 5 22 | xxwy-zyzu zip_code 3 23 | pvu3-9dfs lobbyist_zip 6 24 | g85x-gwmp zipcode 5 25 | h243-v2q5 zip 6 26 | cpva-49fs zip 3 27 | wrhz-xeta zip_code 6 28 | vazh-t57q zip_code_or_aggregate_ 0 29 | uxic-zsuj zip_code 7 30 | gkur-vufi zip 4 31 | rsxa-ify5 zip 15 32 | 9zqv-3uhs zip_code 10 33 | me59-5fac zip_code 7 34 | ypez-j3yg lobbyist_zip 10 employer_zip 21 correspondent_zip 33 35 | r5kz-chrr zip_code 9 36 | iq3c-68ew zip 7 37 | 9xs2-f89t zip_code 6 38 | tfmt-mmy2 zip_code 3 39 | 8bap-6xg8 zip_code 7 40 | mab8-y9h3 zip_code 9 41 | 5gdk-uk7w zip 7 42 | 2eaw-bdhe zip 3 43 | 495s-83kj zip_code 6 44 | y93d-d9e3 zip_code 4 45 | p97q-qace zip 3 46 | vekt-28b5 zip_code_or_aggregate 0 47 | 9ksk-na4q zip_code 9 48 | s6ha-ppgi zip_code 5 49 | t28b-ys7j zip_code 6 50 | zuxi-7xem zip_code 6 51 | 4jy7-7m68 zip_code 7 52 | r23p-6uic zip 6 53 | spxm-tnai zip_code 6 54 | uupf-x98q zip_code 7 55 | 4u6w-irs9 zip_code 6 56 | wryv-d7zf zip_code 4 57 | 7fu8-t497 zip 3 58 | 6uah-qehh zip_code 3 59 | hp65-bcxv zip 4 60 | 8ayb-6mjs zip 6 61 | tpf5-fgtw lobbyist_zip 7 62 | 8k9i-ia3x zip 7 63 | 28km-gtjn zip 4 64 | z8bn-74gv zip 4 65 | vuf2-qfik zip_code 3 66 | x8fc-8rcq zip 7 67 | 3r8a-9kby zip_code 7 68 | f7f2-ggz5 zip 7 69 | hxh5-e8eh zip_code 12 70 | d9re-tmpw zip_code 14 71 | 4ijn-s7e5 zip 9 72 | nen3-vcxj zip 5 73 | 3c9v-pnva zip_code 12 74 | 7as2-ds3y zip 9 75 | 3aav-uy2v zip_code 6 76 | egku-46f2 zip 3 77 | vf9u-9xcm lobbyist_zip 7 78 | 7pb7-6889 zip 3 79 | qrxi-q28n zip 3 80 | ti44-vee7 zip 3 81 | hec5-y4x5 zip_code 8 82 | dgeh-7h9y zip 3 83 | 8yti-tif4 zipcode 5 city_hall_zipcode 13 84 | x74m-smqb zip 3 85 | -------------------------------------------------------------------------------- /extract_zipcode_latlon/index/sf_zipcode_index.txt: -------------------------------------------------------------------------------- 1 | v22h-ujnv zip_code 1 2 | mupm-43n6 tran_zip4 22 3 | 6iqh-u3hk payee_zip4 22 4 | k78b-bnzt business_zip 5 mailing_city_state_zip_code 14 5 | funx-qxxn business_zip 7 mailing_city_state_zip_code 16 6 | dzre-gd7q business_zip 4 mailing_city_state_zip_code 13 7 | qwgb-tu3g tran_zip4 22 8 | ujme-i5np zip_code 0 9 | e2x8-npup business_zip 4 mailing_city_state_zip_code 13 10 | 6ssu-ewuc tran_zip4 22 11 | hmda-uywf business_zip 7 mailing_city_state_zip_code 16 12 | z76i-7s65 zipcode 6 13 | phrj-5yd5 business_zip 3 mailing_city_state_zip_code 12 14 | fp2p-prbg tran_zip4 22 15 | hbjm-s5ay business_zip 3 mailing_city_state_zip_code 12 16 | n65z-9iyj business_zip 4 mailing_city_state_zip_code 13 17 | whm4-mn72 business_zip 7 mailing_city_state_zip_code 16 18 | 5ayi-he6v zip 7 19 | pyxa-3r7p payee_zip4 22 20 | v456-mgti tran_zip4 22 21 | 4phr-3hrm loan_zip4 22 22 | rx2w-v8zb mailing_address_city_state_and_zip_code 6 business_address_city_state_and_zip_code 8 23 | bgq6-5mf8 business_zip 4 mailing_city_state_zip_code 13 24 | nvpi-vhb7 business_zip 7 mailing_city_state_zip_code 16 25 | b6tj-gt35 permit_zipcode 11 26 | w3ep-wixv tran_zip4 22 27 | tr8k-7cit payee_zip4 22 28 | rzvw-zvmg tran_zip4 22 29 | efrz-5mfq business_zip 7 mailing_city_state_zip_code 16 30 | anpk-hx6u zip 9 31 | mz4g-xxwd business_zip 7 mailing_city_state_zip_code 16 32 | 6jj7-u7ax zip_code 11 33 | ntkt-myzv tran_zip4 22 34 | q66q-d2tr tran_zip4 22 35 | 8ud4-mc82 business_zip 4 mailing_city_state_zip_code 13 36 | rynu-4e44 tran_zip4 22 37 | p5kp-5mtp client_zip 8 38 | vsy2-vybn tran_zip4 22 39 | aayf-qzg2 business_zip 3 mailing_city_state_zip_code 12 40 | gk2f-isrp payee_zip4 22 41 | iuv4-tqzq zip_code 11 42 | hc26-j9if payee_zip4 22 43 | k4ji-djiq business_zip 7 mailing_city_state_zip_code 16 44 | 86nq-bynj payee_zip4 22 bus_zip4 69 45 | 4zbw-xuig payee_zip4 16 46 | jyag-jj92 tran_zip4 22 intr_zip4 50 47 | by7b-r76m tran_zip4 22 48 | dg5s-2n6f business_zip 7 mailing_city_state_zip_code 16 49 | 62ex-d3qk loan_zip4 22 50 | crqn-k9bw zip 9 51 | wy9w-f6fu zipcode 6 52 | wrjq-a6r8 business_zip 4 mailing_city_state_zip_code 13 53 | 2tgf-pc6f business_zip 7 mailing_city_state_zip_code 16 54 | s57h-9wm9 mailing_address_city_state_and_zip_code 6 business_address_city_state_and_zip_code 8 55 | 4q92-gm9f vendor_zip 8 56 | gz8r-ag83 payee_zip4 22 57 | pn39-4xw4 tran_zip4 22 58 | y8r8-8ptg tran_zip4 22 59 | p4sp-es3b tran_zip4 22 60 | k76b-4yme tran_zip4 22 61 | dvrf-izet tran_zip4 22 62 | p3r9-xbpg tran_zip4 22 63 | ehdn-tx7u tran_zip4 22 64 | u4y3-k4vs client_zip 8 65 | capd-mzck tran_zip4 22 66 | 4vqi-vw9j tran_zip4 10 67 | nn8w-ruis permit_zipcode 11 68 | ec3y-6ty9 loan_zip4 22 69 | -------------------------------------------------------------------------------- /linechart/timeline_year.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import sys 3 | from os import walk 4 | import re 5 | import datetime 6 | import os.path 7 | from matplotlib.ticker import AutoMinorLocator 8 | 9 | def add_date(filename, date2count): 10 | with open(filename) as lines: 11 | for line in lines: 12 | id, pdate, cdate = line.strip("\n").split("\t") 13 | date = datetime.datetime.fromtimestamp(int(pdate)) #only use publication date 14 | if date.month < 10: 15 | month = "0" + str(date.month) 16 | else: 17 | month = str(date.month) 18 | year_month = str(date.year) + month 19 | if year_month not in date2count: 20 | date2count[year_month] = 1 21 | else: 22 | date2count[year_month] += 1 23 | return date2count 24 | 25 | def prepare_data(path): 26 | date2count = {} #mapping between date and number of datasets 27 | for (dirpath, dirnames, filenames) in walk(path): 28 | for filename in filenames: 29 | if re.search("id_date", filename): 30 | print path + filename 31 | date2count = add_date(path + filename, date2count) 32 | break 33 | print date2count 34 | out = open("date2count.csv", "w") 35 | for date in date2count.keys(): 36 | out.write(date + "\t" + str(date2count[date]) + "\n") 37 | out.close() 38 | 39 | def get_data(path): 40 | if not os.path.isfile("date2count.csv"): 41 | prepare_data(path) 42 | 43 | date_count = [] 44 | with open("date2count.csv") as lines: 45 | for line in lines: 46 | ym, count = line.strip("\n").split("\t") 47 | date_count.append([ym, count]) 48 | date_count.sort(key=lambda x: x[0]) 49 | return date_count 50 | 51 | def main(argv): 52 | date_count = get_data("../metadata/data/") 53 | idx = 0 54 | dates = [] 55 | area = [] 56 | radius = [] 57 | s = 0 58 | for (date, count) in date_count: 59 | idx += 1 60 | s += int(count) 61 | if (idx%12==0): 62 | #radius.append(idx) 63 | # date = date[:4] + "/" + date[4:] 64 | date = date[:4] 65 | else: 66 | date = "" 67 | if (idx%6==0): 68 | radius.append(idx) 69 | dates.append(date) 70 | area.append(s) 71 | 72 | #minorLocator = AutoMinorLocator() 73 | #fig, ax = plt.subplots() 74 | #ax.xaxis.set_minor_locator(minorLocator) 75 | #plt.tick_params(which='major', length=8) 76 | #plt.tick_params(which='minor', length=4, color='r') 77 | 78 | plt.xticks(radius, dates) 79 | # plt.xticks(rotation=50) 80 | plt.plot(radius, area) 81 | plt.xlabel('Timeline') 82 | plt.ylabel('Number of tables') 83 | #plt.title('Title here') 84 | plt.grid() 85 | plt.show() 86 | 87 | if __name__=="__main__": 88 | main(sys.argv[1:]) 89 | -------------------------------------------------------------------------------- /extract_zipcode_latlon/index/kcmo_zipcode_index.txt: -------------------------------------------------------------------------------- 1 | wzjw-x2er zip_code 1 2 | mzdm-6rvx zip_code 9 3 | yyhn-562y zipcode 6 4 | xth3-vdqe zip_code 9 5 | yfgx-yvnj facility_zip 7 6 | svsn-4cui zip_code 9 7 | qayy-erge zip_code 1 8 | hikg-9gqg zipcode 7 9 | 94ay-nuwv facility_zip 7 10 | d9fr-pncw zipcode 8 11 | dmn8-83j9 zipcode 8 12 | n3tx-eq5q zip_code 14 13 | fkt2-8smh zip_code 1 14 | cmk4-vs7v zip_code 9 15 | p5hs-w2pi zipcode 6 16 | hsus-bft8 zipcode 8 17 | 7qxw-drid zip_code 9 18 | 9p66-paw4 zip_code 1 19 | byd5-svc3 zip 5 20 | ch8a-uu5r zip_code 9 21 | siaf-2z3j zip_code 9 22 | 3uzv-gzzp zip_code 2 23 | i4wi-hu6e zip_code 14 24 | nead-3ngg zip_code 1 25 | kfzc-riej zipcode 9 26 | przf-icvh zip_code 9 27 | sz9c-c5ux facility_zip 7 28 | ru3v-3yc4 zip_code 9 29 | c2gn-nkss zipcode 5 30 | hbmv-rqk9 zip_code 21 31 | kbwn-nqvd zipcode 10 32 | r843-p6xb zipcode 6 33 | j8id-nv5v zipcode 6 34 | dcx5-vae4 zip_code 1 35 | wsnb-g5wd zip_code 9 36 | tn7g-ce9i zip_code 9 37 | 4kqe-4ud4 zip_code 9 38 | p2ie-br32 zipcode 5 39 | 2h8d-4rhx zip_code 9 40 | mgvq-ati4 zipcode 3 41 | cgbu-k38b zip 6 42 | fbte-5u6g zipcode 11 43 | 83tc-w4pj zip_code 9 44 | ykyy-vsei zipcode 10 45 | r3ef-ysd4 zipcode 10 46 | dipa-3cie zip 7 47 | 4vt8-kfnu zip_code 1 48 | ks2s-yguy zipcode 3 49 | 4vq7-4p6a zip_code 2 50 | sy5m-x8pe zip_code 9 51 | aket-wpiw zip_code 9 52 | uza4-2fj4 zipcode 6 53 | 7i6x-m8sc zipcode 9 54 | 5sfv-g7jd zip_code 9 55 | 5349-7a9t zip_code 9 56 | ay5t-zph8 zip_code 1 57 | ntq9-ups2 zipcode 6 58 | 8aqp-4djd zipcode 6 59 | ctk7-ig49 zip_code 9 60 | qp8i-q9d5 zipcode 7 61 | nytw-fmz3 zip_code 14 62 | p333-ufv5 zipcode 5 63 | aez3-wg7q zipcode 6 64 | grzb-gufz zip_code 9 65 | 8sve-3g9n facility_zip 7 66 | 8h39-q9hm zipcode 9 67 | jkbq-7h2i zip 5 68 | ehh5-32d3 zip_code 9 69 | dqet-j5bc zipcode 8 70 | dfuy-hga5 zip_code 9 71 | txik-ccii zip_code 9 72 | jx2x-bscc facility_zip 7 73 | ir3u-dv6q zipcode 9 74 | hcyh-6t9m zipcode 7 75 | adrn-rsvg zipcode 6 76 | mmy6-sscw zip_code 9 77 | h29t-a3ti zip_code 9 78 | ei27-98n4 zip_code 9 79 | tbcn-7xmb zipcode 9 80 | tgwe-hk6n zipcode 9 81 | cgpa-pjva zipcode 9 82 | heym-2frk zipcode 5 83 | gx5v-53hg zipcode 6 84 | uavk-68tt zip_code 9 85 | g5up-449w zip_code 9 86 | hywd-y4vv groupzip 8 87 | x5sp-av9v zipcode 9 88 | 2ezj-prfz zip_code 9 89 | j3vd-wims zip_code 1 90 | x5mb-q9se zip_code 21 91 | 8kfr-ui65 zip_code 1 92 | nypf-bz5b zipcode 5 93 | yi2v-3ssg zip_code 9 94 | 7kv2-vpry zipcode 6 95 | gj9g-ucun zip_code 9 96 | xw5s-y9pu zipcode 9 97 | p2vx-ev69 zipcode 9 98 | sm5y-c9pj zip 6 99 | 2cun-c2xr zip_code 9 100 | 6445-ctvs zip_code 9 101 | 839a-34uw zip_code 9 102 | fvhn-5vsv zip 5 103 | ibbi-sirw zip_code 9 104 | 9d9w-zre9 zip_code 1 105 | dymb-xy5c zip_code 14 106 | wu22-kvdm zipcode 8 107 | im7g-fucq zip_code 1 108 | xnn5-tna4 zipcode 10 109 | wzhv-ftxn zipcode 7 110 | js3d-4ga8 zip_code 9 111 | mmn5-wy78 zip_code 1 112 | 6pi7-rfgq zipcode 9 113 | djv7-4q5r zip_code 9 114 | 9zpd-u4mp zipcode 11 115 | 5n8b-dbbb zipcode 10 116 | pzip-wwk6 zip 5 117 | -------------------------------------------------------------------------------- /download/ids/austin_ids.txt: -------------------------------------------------------------------------------- 1 | ri75-pahg 2 | 8jyt-x94k 3 | ecmv-9xxi 4 | 5tye-7ray 5 | h3i4-5e5v 6 | hqa6-stx4 7 | szku-46rx 8 | x442-h34c 9 | trxj-f8br 10 | 8c6z-qnmj 11 | q37s-pqpu 12 | wrwk-skv6 13 | 4c6h-tv2y 14 | d6z4-s3ex 15 | 88dg-7xxd 16 | siyu-szxn 17 | bqav-9x6a 18 | 64cq-wf5u 19 | b4y9-5x39 20 | c6ja-7mhw 21 | gzyt-t2by 22 | 4i8t-nckg 23 | q7wj-9ws7 24 | gr59-ids7 25 | ga9y-ypai 26 | jbaf-xebm 27 | dv3q-tn2r 28 | hek3-kuva 29 | amh5-bifm 30 | 8aah-diw2 31 | hut9-4n8t 32 | fhca-e5je 33 | s4tf-m9g2 34 | 5tx2-pk4n 35 | tx8s-62r6 36 | gqmc-bxs4 37 | b6cd-bhbk 38 | cr7p-ssq7 39 | 84ih-p28j 40 | nq9x-w8sx 41 | 5gjn-nmcf 42 | r5kt-xq3y 43 | rfif-mmvg 44 | scqk-petw 45 | mwqa-epx5 46 | 5fnu-ngjq 47 | ykw4-j3aj 48 | gx7t-wzxw 49 | 4sf2-s9as 50 | g9bx-8meu 51 | 959k-a8yh 52 | r6sg-xka2 53 | a6pm-qynf 54 | de95-4khj 55 | uszv-p75d 56 | nttt-2a35 57 | iuw2-kwij 58 | nmp9-45v2 59 | xj3h-ppw2 60 | kidc-knry 61 | yqxj-7evp 62 | zzix-yxi4 63 | wr7f-jdtu 64 | fsgj-5xyt 65 | 2ds5-jyca 66 | awqv-vbfj 67 | 3gc4-g537 68 | gt3n-akq9 69 | wd9d-2jf3 70 | 4u75-seeq 71 | 3qu3-nwxj 72 | ei2n-fehk 73 | ab9p-kxqp 74 | ec78-i9z5 75 | b73m-kiye 76 | ajpy-mwjj 77 | cusd-m48y 78 | f7fd-4st5 79 | a9hv-5z8i 80 | i6dj-uuqe 81 | 9jwp-y89b 82 | ahj3-w5hk 83 | sswp-u5uh 84 | u3yy-shmz 85 | hdpu-g3yy 86 | jmp6-p8e2 87 | chv7-cszp 88 | gwrj-cykm 89 | vwcu-h3qu 90 | uqcn-typ6 91 | rxia-etc2 92 | 6yeq-zz6u 93 | kxm4-pr4y 94 | n63c-e24q 95 | uber-bhwe 96 | vgkf-yny7 97 | 7j64-2qf8 98 | s6n4-3bq2 99 | xcd2-xf2f 100 | nynz-w2da 101 | gxwy-g5wa 102 | 8zu2-guks 103 | w6f6-d2ag 104 | yj9d-ajag 105 | utnt-hag5 106 | m9jn-qzir 107 | ergh-7g8p 108 | dtkn-v97q 109 | 3e38-4hji 110 | hh3n-3s7c 111 | h4as-bnn4 112 | fksj-fw68 113 | uvma-gv9c 114 | ur6a-fvpc 115 | 3ebq-e9iz 116 | 8uvp-rwpt 117 | sasb-f978 118 | paa2-kvza 119 | sute-ma6h 120 | k8rc-mjrt 121 | 3w87-zbw7 122 | rb6p-jsp4 123 | 5brd-nqzg 124 | 54hu-zyfw 125 | cpdm-pgcz 126 | e5iz-h53i 127 | eqg5-tgc4 128 | jbk5-567r 129 | ysk2-5se4 130 | 3rv7-26gf 131 | 7d7e-riap 132 | c6vs-wub4 133 | eqas-3yai 134 | h8vx-fici 135 | k58w-wc5b 136 | r5a8-wp8c 137 | s78c-gi5b 138 | gj8a-7w2i 139 | ctpt-q8h6 140 | h2ns-nnc3 141 | 92xm-uf99 142 | 4wv7-h5ag 143 | 54j7-ewrt 144 | cffj-ydng 145 | 7dis-buys 146 | 558y-rgv6 147 | d7k5-jyb8 148 | kp2i-ttw8 149 | xa6d-gfkg 150 | ct7f-fbbn 151 | 9bpw-2ysw 152 | qzi7-nx8g 153 | 8u76-ei8i 154 | e8fp-i3ts 155 | eqcz-7qvc 156 | m38i-k8s7 157 | i26j-ai4z 158 | wstj-t8me 159 | xwdj-i9he 160 | c5ah-7mah 161 | 7s8g-vgat 162 | 9brw-ikmh 163 | cutd-edeq 164 | w2wa-sfs6 165 | aimq-hsia 166 | d5qe-8uyf 167 | e3jj-bj6e 168 | p6kk-bbf5 169 | 42ix-g4e3 170 | yann-xf22 171 | n9gm-fzdc 172 | c69b-fkfx 173 | u8uw-t2sm 174 | md9p-6y8z 175 | 4gv8-96x2 176 | uqe6-trgb 177 | yh8u-4rgy 178 | bqki-3pkf 179 | rsp7-azrf 180 | wu45-d3h5 181 | 7it9-7pjx 182 | efix-ampv 183 | gw3x-dtde 184 | whim-t39w 185 | m44u-tdna 186 | uwbz-byyt 187 | pe4x-g4qi 188 | kfeh-ue8m 189 | tz7h-nvqd 190 | efz6-47ik 191 | p47h-h7ra 192 | dtqa-6pjt 193 | 8ruh-ty5d 194 | xbbp-8bw7 195 | 8pvq-5pcm 196 | 2yh2-pequ 197 | yur9-jspm 198 | v7cg-67vv 199 | 36xs-z29u 200 | iwgt-862p 201 | epxw-n458 202 | yrpa-wmth 203 | 3ghn-wv5a 204 | vzty-yezt 205 | 4sv9-5zm8 206 | ba4t-mrbm 207 | 74y5-wjkf 208 | 567b-4d24 209 | ps5c-8d86 210 | eg8t-399m 211 | ykzu-pxxq 212 | qmwp-kjjs 213 | ydem-x7j5 214 | nqv2-nbrj 215 | -------------------------------------------------------------------------------- /type_detection/ids/austin_ids.txt: -------------------------------------------------------------------------------- 1 | ri75-pahg 2 | 8jyt-x94k 3 | ecmv-9xxi 4 | 5tye-7ray 5 | h3i4-5e5v 6 | hqa6-stx4 7 | szku-46rx 8 | x442-h34c 9 | trxj-f8br 10 | 8c6z-qnmj 11 | q37s-pqpu 12 | wrwk-skv6 13 | 4c6h-tv2y 14 | d6z4-s3ex 15 | 88dg-7xxd 16 | siyu-szxn 17 | bqav-9x6a 18 | 64cq-wf5u 19 | b4y9-5x39 20 | c6ja-7mhw 21 | gzyt-t2by 22 | 4i8t-nckg 23 | q7wj-9ws7 24 | gr59-ids7 25 | ga9y-ypai 26 | jbaf-xebm 27 | dv3q-tn2r 28 | hek3-kuva 29 | amh5-bifm 30 | 8aah-diw2 31 | hut9-4n8t 32 | fhca-e5je 33 | s4tf-m9g2 34 | 5tx2-pk4n 35 | tx8s-62r6 36 | gqmc-bxs4 37 | b6cd-bhbk 38 | cr7p-ssq7 39 | 84ih-p28j 40 | nq9x-w8sx 41 | 5gjn-nmcf 42 | r5kt-xq3y 43 | rfif-mmvg 44 | scqk-petw 45 | mwqa-epx5 46 | 5fnu-ngjq 47 | ykw4-j3aj 48 | gx7t-wzxw 49 | 4sf2-s9as 50 | g9bx-8meu 51 | 959k-a8yh 52 | r6sg-xka2 53 | a6pm-qynf 54 | de95-4khj 55 | uszv-p75d 56 | nttt-2a35 57 | iuw2-kwij 58 | nmp9-45v2 59 | xj3h-ppw2 60 | kidc-knry 61 | yqxj-7evp 62 | zzix-yxi4 63 | wr7f-jdtu 64 | fsgj-5xyt 65 | 2ds5-jyca 66 | awqv-vbfj 67 | 3gc4-g537 68 | gt3n-akq9 69 | wd9d-2jf3 70 | 4u75-seeq 71 | 3qu3-nwxj 72 | ei2n-fehk 73 | ab9p-kxqp 74 | ec78-i9z5 75 | b73m-kiye 76 | ajpy-mwjj 77 | cusd-m48y 78 | f7fd-4st5 79 | a9hv-5z8i 80 | i6dj-uuqe 81 | 9jwp-y89b 82 | ahj3-w5hk 83 | sswp-u5uh 84 | u3yy-shmz 85 | hdpu-g3yy 86 | jmp6-p8e2 87 | chv7-cszp 88 | gwrj-cykm 89 | vwcu-h3qu 90 | uqcn-typ6 91 | rxia-etc2 92 | 6yeq-zz6u 93 | kxm4-pr4y 94 | n63c-e24q 95 | uber-bhwe 96 | vgkf-yny7 97 | 7j64-2qf8 98 | s6n4-3bq2 99 | xcd2-xf2f 100 | nynz-w2da 101 | gxwy-g5wa 102 | 8zu2-guks 103 | w6f6-d2ag 104 | yj9d-ajag 105 | utnt-hag5 106 | m9jn-qzir 107 | ergh-7g8p 108 | dtkn-v97q 109 | 3e38-4hji 110 | hh3n-3s7c 111 | h4as-bnn4 112 | fksj-fw68 113 | uvma-gv9c 114 | ur6a-fvpc 115 | 3ebq-e9iz 116 | 8uvp-rwpt 117 | sasb-f978 118 | paa2-kvza 119 | sute-ma6h 120 | k8rc-mjrt 121 | 3w87-zbw7 122 | rb6p-jsp4 123 | 5brd-nqzg 124 | 54hu-zyfw 125 | cpdm-pgcz 126 | e5iz-h53i 127 | eqg5-tgc4 128 | jbk5-567r 129 | ysk2-5se4 130 | 3rv7-26gf 131 | 7d7e-riap 132 | c6vs-wub4 133 | eqas-3yai 134 | h8vx-fici 135 | k58w-wc5b 136 | r5a8-wp8c 137 | s78c-gi5b 138 | gj8a-7w2i 139 | ctpt-q8h6 140 | h2ns-nnc3 141 | 92xm-uf99 142 | 4wv7-h5ag 143 | 54j7-ewrt 144 | cffj-ydng 145 | 7dis-buys 146 | 558y-rgv6 147 | d7k5-jyb8 148 | kp2i-ttw8 149 | xa6d-gfkg 150 | ct7f-fbbn 151 | 9bpw-2ysw 152 | qzi7-nx8g 153 | 8u76-ei8i 154 | e8fp-i3ts 155 | eqcz-7qvc 156 | m38i-k8s7 157 | i26j-ai4z 158 | wstj-t8me 159 | xwdj-i9he 160 | c5ah-7mah 161 | 7s8g-vgat 162 | 9brw-ikmh 163 | cutd-edeq 164 | w2wa-sfs6 165 | aimq-hsia 166 | d5qe-8uyf 167 | e3jj-bj6e 168 | p6kk-bbf5 169 | 42ix-g4e3 170 | yann-xf22 171 | n9gm-fzdc 172 | c69b-fkfx 173 | u8uw-t2sm 174 | md9p-6y8z 175 | 4gv8-96x2 176 | uqe6-trgb 177 | yh8u-4rgy 178 | bqki-3pkf 179 | rsp7-azrf 180 | wu45-d3h5 181 | 7it9-7pjx 182 | efix-ampv 183 | gw3x-dtde 184 | whim-t39w 185 | m44u-tdna 186 | uwbz-byyt 187 | pe4x-g4qi 188 | kfeh-ue8m 189 | tz7h-nvqd 190 | efz6-47ik 191 | p47h-h7ra 192 | dtqa-6pjt 193 | 8ruh-ty5d 194 | xbbp-8bw7 195 | 8pvq-5pcm 196 | 2yh2-pequ 197 | yur9-jspm 198 | v7cg-67vv 199 | 36xs-z29u 200 | iwgt-862p 201 | epxw-n458 202 | yrpa-wmth 203 | 3ghn-wv5a 204 | vzty-yezt 205 | 4sv9-5zm8 206 | ba4t-mrbm 207 | 74y5-wjkf 208 | 567b-4d24 209 | ps5c-8d86 210 | eg8t-399m 211 | ykzu-pxxq 212 | qmwp-kjjs 213 | ydem-x7j5 214 | nqv2-nbrj 215 | -------------------------------------------------------------------------------- /type_detection/detect.py: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | ## 3 | ## Copyright (C) 2014, New York University. 4 | ## All rights reserved. 5 | ## Contact: kien.pham@nyu.edu 6 | ## 7 | ## "Redistribution and use in source and binary forms, with or without 8 | ## modification, are permitted provided that the following conditions are met: 9 | ## 10 | ## - Redistributions of source code must retain the above copyright notice, 11 | ## this list of conditions and the following disclaimer. 12 | ## - Redistributions in binary form must reproduce the above copyright 13 | ## notice, this list of conditions and the following disclaimer in the 14 | ## documentation and/or other materials provided with the distribution. 15 | ## - Neither the name of New York University nor the names of its 16 | ## contributors may be used to endorse or promote products derived from 17 | ## this software without specific prior written permission. 18 | ## 19 | ## THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | ## AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 21 | ## THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 | ## PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR 23 | ## CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 24 | ## EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 25 | ## PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 26 | ## OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 27 | ## WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 28 | ## OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 29 | ## ADVISED OF THE POSSIBILITY OF SUCH DAMAGE." 30 | ## 31 | ############################################################################### 32 | 33 | import sys 34 | import regex 35 | import os.path 36 | import re 37 | 38 | def detect_type(sample_path, output_path, city, id): 39 | filename = sample_path + "/" + city + "_" + id + ".txt" 40 | if not os.path.isfile(filename): 41 | return 42 | 43 | output_detail_file = output_path + "/" + city + "_" + id + ".txt" 44 | # output_detail_file = "detail_type/" + city + "_" + id + ".txt" 45 | 46 | if os.path.isfile(output_detail_file): 47 | print "File " + output_detail_file + " is existed" 48 | return 49 | 50 | output = open(output_detail_file, "w") 51 | types = {} 52 | for t in regex.Type: 53 | types[t] = 0 54 | with open(filename) as lines: 55 | for line in lines: 56 | a = line.strip("\n").split("\t") 57 | if len(a) < 1: 58 | continue 59 | column = a[0] 60 | values = a[1:] 61 | type = regex.detect(column, values) 62 | if len(type) > 0: 63 | for t in type: 64 | output.write(column + "\t" + t + "\n") 65 | output.close() 66 | 67 | def main(argv): 68 | ''' 69 | First Argument: path to directory containing sampling values of all data sets 70 | Second Argument: path to the file containing all data set ids. 71 | Third Argument: path to directory containing detection results. 72 | Fourth Argument: city name, which is used as a prefix for output files. 73 | ''' 74 | if len(argv) != 4: 75 | print "The program takes 4 arguments, " + str(len(argv)) + " is given." 76 | return 77 | sample_path = argv[0] 78 | ids_file = argv[1] 79 | output_path = argv[2] 80 | city = argv[3] 81 | 82 | with open(ids_file) as lines: 83 | for line in lines: 84 | id = line.strip("\n") 85 | detect_type(sample_path, output_path, city, id) 86 | 87 | if __name__=="__main__": 88 | main(sys.argv[1:]) 89 | -------------------------------------------------------------------------------- /barchart/barchart_loc.py: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | ## 3 | ## Copyright (C) 2014, New York University. 4 | ## All rights reserved. 5 | ## Contact: kien.pham@nyu.edu 6 | ## 7 | ## "Redistribution and use in source and binary forms, with or without 8 | ## modification, are permitted provided that the following conditions are met: 9 | ## 10 | ## - Redistributions of source code must retain the above copyright notice, 11 | ## this list of conditions and the following disclaimer. 12 | ## - Redistributions in binary form must reproduce the above copyright 13 | ## notice, this list of conditions and the following disclaimer in the 14 | ## documentation and/or other materials provided with the distribution. 15 | ## - Neither the name of New York University nor the names of its 16 | ## contributors may be used to endorse or promote products derived from 17 | ## this software without specific prior written permission. 18 | ## 19 | ## THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | ## AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 21 | ## THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 | ## PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR 23 | ## CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 24 | ## EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 25 | ## PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 26 | ## OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 27 | ## WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 28 | ## OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 29 | ## ADVISED OF THE POSSIBILITY OF SUCH DAMAGE." 30 | ## 31 | ############################################################################### 32 | import sys 33 | import matplotlib.pyplot as plt 34 | import numpy as np 35 | 36 | def getData(): 37 | with open("loc.csv") as lines: 38 | nn = [] 39 | for line in lines: 40 | a = line.strip("\n").split(",") 41 | n = [] 42 | for x in a: 43 | n.append(float(x)) 44 | nn.append(n) 45 | m = {} 46 | m["NYC"] = nn[0] 47 | m["Kansas"] = nn[1] 48 | m["Seattle"] = nn[2] 49 | m["Chicago"] = nn[3] 50 | m["Baltimore"] = nn[4] 51 | m["SF"] = nn[5] 52 | m["Raleigh"] = nn[6] 53 | m["Edmonton"] = nn[7] 54 | m["Boston"] = nn[8] 55 | m["Austin"] = nn[9] 56 | m["All Cities"] = nn[10] 57 | s = [] 58 | s.append("NYC") 59 | s.append("Kansas") 60 | s.append("Seattle") 61 | s.append("Chicago") 62 | s.append("Baltimore") 63 | s.append("SF") 64 | s.append("Raleigh") 65 | s.append("Edmonton") 66 | s.append("Boston") 67 | s.append("Austin") 68 | s.append("All Cities") 69 | return [s, m] 70 | 71 | 72 | def main(argv): 73 | s, m = getData() 74 | label = ["Lat/Lon", "Address", "Zipcode"] #Label for x axis 75 | _color = { 76 | "Baltimore":"#a6cee3", 77 | "Chicago":"#1f78b4", 78 | "Edmonton":"#b2df8a", 79 | "Kansas":"#33a02c", 80 | "Seattle":"#fb9a99", 81 | "SF":"#e31a1c", 82 | "NYC":"#fdbf6f", 83 | "Boston":"#ff7f00", 84 | "Austin":"#cab2d6", 85 | "Raleigh":"#6a3d9a", 86 | "All Cities":"#ffff99"} 87 | 88 | #for key in m.keys(): 89 | # plt.plot(axis, m[key], marker=_marker[key], label=key) 90 | 91 | N = 3 92 | width = 0.07 93 | ind = np.arange(N) 94 | print ind 95 | fig, ax = plt.subplots() 96 | count = 1 97 | for key in s: 98 | rec0 = ax.bar(ind+count*width, m[key], width, color=_color[key]) 99 | count += 1 100 | 101 | plt.ylim(0, 1.0) 102 | # plt.xlabel('Attribute Type', fontsize=25) 103 | plt.ylabel('Percentage of Datasets', fontsize=30) 104 | plt.xticks(ind+0.4, label, fontsize=25) 105 | plt.yticks(fontsize=17) 106 | # ax.legend(s,prop={'size':10.5},loc=1) 107 | 108 | plt.show() 109 | 110 | if __name__=="__main__": 111 | main(sys.argv[1:]) 112 | 113 | -------------------------------------------------------------------------------- /barchart/barchart_time_loc_num.py: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | ## 3 | ## Copyright (C) 2014, New York University. 4 | ## All rights reserved. 5 | ## Contact: kien.pham@nyu.edu 6 | ## 7 | ## "Redistribution and use in source and binary forms, with or without 8 | ## modification, are permitted provided that the following conditions are met: 9 | ## 10 | ## - Redistributions of source code must retain the above copyright notice, 11 | ## this list of conditions and the following disclaimer. 12 | ## - Redistributions in binary form must reproduce the above copyright 13 | ## notice, this list of conditions and the following disclaimer in the 14 | ## documentation and/or other materials provided with the distribution. 15 | ## - Neither the name of New York University nor the names of its 16 | ## contributors may be used to endorse or promote products derived from 17 | ## this software without specific prior written permission. 18 | ## 19 | ## THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | ## AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 21 | ## THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 | ## PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR 23 | ## CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 24 | ## EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 25 | ## PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 26 | ## OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 27 | ## WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 28 | ## OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 29 | ## ADVISED OF THE POSSIBILITY OF SUCH DAMAGE." 30 | ## 31 | ############################################################################### 32 | 33 | import sys 34 | import matplotlib.pyplot as plt 35 | import numpy as np 36 | 37 | def getData(): 38 | with open("time_loc_number.csv") as lines: 39 | nn = [] 40 | for line in lines: 41 | a = line.strip("\n").split(",") 42 | n = [] 43 | for x in a: 44 | n.append(float(x)) 45 | nn.append(n) 46 | m = {} 47 | m["NYC"] = nn[0] 48 | m["Kansas"] = nn[1] 49 | m["Seattle"] = nn[2] 50 | m["Chicago"] = nn[3] 51 | m["Baltimore"] = nn[4] 52 | m["SF"] = nn[5] 53 | m["Raleigh"] = nn[6] 54 | m["Edmonton"] = nn[7] 55 | m["Boston"] = nn[8] 56 | m["Austin"] = nn[9] 57 | m["All Cities"] = nn[10] 58 | s = [] 59 | s.append("NYC") 60 | s.append("Kansas") 61 | s.append("Seattle") 62 | s.append("Chicago") 63 | s.append("Baltimore") 64 | s.append("SF") 65 | s.append("Raleigh") 66 | s.append("Edmonton") 67 | s.append("Boston") 68 | s.append("Austin") 69 | s.append("All Cities") 70 | return [s, m] 71 | 72 | def main(argv): 73 | s, m = getData() 74 | label = ["Location", "Time", "Number"] #Label for x axis 75 | _color = { 76 | "Baltimore":"#a6cee3", 77 | "Chicago":"#1f78b4", 78 | "Edmonton":"#b2df8a", 79 | "Kansas":"#33a02c", 80 | "Seattle":"#fb9a99", 81 | "SF":"#e31a1c", 82 | "NYC":"#fdbf6f", 83 | "Boston":"#ff7f00", 84 | "Austin":"#cab2d6", 85 | "Raleigh":"#6a3d9a", 86 | "All Cities":"#ffff99"} 87 | 88 | #for key in m.keys(): 89 | # plt.plot(axis, m[key], marker=_marker[key], label=key) 90 | 91 | N = 3 92 | width = 0.07 93 | ind = np.arange(N) 94 | print ind 95 | fig, ax = plt.subplots() 96 | count = 1 97 | for key in s: 98 | rec0 = ax.bar(ind+count*width, m[key], width, color=_color[key]) 99 | count += 1 100 | plt.ylim(0,1.0) 101 | 102 | plt.ylabel('Percentage of Datasets', fontsize=30) 103 | plt.xticks(ind+0.5, label, fontsize=25) 104 | plt.tick_params(axis='y', labelsize=17) 105 | # ax.legend(s, loc=1,prop={'size':13}) 106 | 107 | plt.show() 108 | 109 | if __name__=="__main__": 110 | main(sys.argv[1:]) 111 | 112 | -------------------------------------------------------------------------------- /barchart/barchart_time.py: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | ## 3 | ## Copyright (C) 2014, New York University. 4 | ## All rights reserved. 5 | ## Contact: kien.pham@nyu.edu 6 | ## 7 | ## "Redistribution and use in source and binary forms, with or without 8 | ## modification, are permitted provided that the following conditions are met: 9 | ## 10 | ## - Redistributions of source code must retain the above copyright notice, 11 | ## this list of conditions and the following disclaimer. 12 | ## - Redistributions in binary form must reproduce the above copyright 13 | ## notice, this list of conditions and the following disclaimer in the 14 | ## documentation and/or other materials provided with the distribution. 15 | ## - Neither the name of New York University nor the names of its 16 | ## contributors may be used to endorse or promote products derived from 17 | ## this software without specific prior written permission. 18 | ## 19 | ## THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | ## AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 21 | ## THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 | ## PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR 23 | ## CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 24 | ## EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 25 | ## PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 26 | ## OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 27 | ## WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 28 | ## OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 29 | ## ADVISED OF THE POSSIBILITY OF SUCH DAMAGE." 30 | ## 31 | ############################################################################### 32 | 33 | import sys 34 | import matplotlib.pyplot as plt 35 | import numpy as np 36 | 37 | def getData(): 38 | with open("time.csv") as lines: 39 | nn = [] 40 | for line in lines: 41 | a = line.strip("\n").split(",") 42 | n = [] 43 | for x in a: 44 | n.append(float(x)) 45 | nn.append(n) 46 | m = {} 47 | m["NYC"] = nn[0] 48 | m["Kansas"] = nn[1] 49 | m["Seattle"] = nn[2] 50 | m["Chicago"] = nn[3] 51 | m["Baltimore"] = nn[4] 52 | m["SF"] = nn[5] 53 | m["Raleigh"] = nn[6] 54 | m["Edmonton"] = nn[7] 55 | m["Boston"] = nn[8] 56 | m["Austin"] = nn[9] 57 | m["All Cities"] = nn[10] 58 | s = [] 59 | s.append("NYC") 60 | s.append("Kansas") 61 | s.append("Seattle") 62 | s.append("Chicago") 63 | s.append("Baltimore") 64 | s.append("SF") 65 | s.append("Raleigh") 66 | s.append("Edmonton") 67 | s.append("Boston") 68 | s.append("Austin") 69 | s.append("All Cities") 70 | 71 | 72 | return [s, m] 73 | 74 | 75 | def main(argv): 76 | s, m = getData() 77 | label = ["Date", "Month", "Year"] #Label for x axis 78 | _color = { 79 | "Baltimore":"#a6cee3", 80 | "Chicago":"#1f78b4", 81 | "Edmonton":"#b2df8a", 82 | "Kansas":"#33a02c", 83 | "Seattle":"#fb9a99", 84 | "SF":"#e31a1c", 85 | "NYC":"#fdbf6f", 86 | "Boston":"#ff7f00", 87 | "Austin":"#cab2d6", 88 | "Raleigh":"#6a3d9a", 89 | "All Cities":"#ffff99"} 90 | 91 | #for key in m.keys(): 92 | # plt.plot(axis, m[key], marker=_marker[key], label=key) 93 | 94 | N = 3 95 | width = 0.07 96 | ind = np.arange(N) 97 | print ind 98 | fig, ax = plt.subplots() 99 | count = 1 100 | for key in s: 101 | rec0 = ax.bar(ind+count*width, m[key], width, color=_color[key]) 102 | count += 1 103 | 104 | plt.ylim(0,1.0) 105 | # plt.xlabel('Attribute Type', fontsize=25) 106 | plt.ylabel('Percentage of Datasets', fontsize=30) 107 | plt.xticks(ind+0.4, label, fontsize=25) 108 | plt.tick_params(axis='y', labelsize=17) 109 | # ax.legend(s,prop={'size':10.5},loc=1) 110 | 111 | plt.show() 112 | 113 | if __name__=="__main__": 114 | main(sys.argv[1:]) 115 | 116 | -------------------------------------------------------------------------------- /latlon_to_zipcode/main.cpp: -------------------------------------------------------------------------------- 1 | #include "KdTreeBB.hpp" 2 | #include "Neighborhoods.hpp" 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | KdTreeBB::Item* loadItems(char* file, KdTreeBB::Item* &items, int &size) 10 | { 11 | std::ifstream in(file); 12 | std::string line; 13 | 14 | float left, right, top, bottom; 15 | int zipcode; 16 | 17 | std::getline(in, line); 18 | size = atoi(line.c_str()); //number of lines 19 | items = (KdTreeBB::Item*)malloc(sizeof(KdTreeBB::Item)*size); 20 | int *zipCodes = (int*)malloc(sizeof(int)*size); 21 | int index = 0; 22 | while(std::getline(in, line)) 23 | { 24 | std::stringstream lineStream(line); 25 | sscanf(line.c_str(), "%d %f %f %f %f", &zipcode, &left, &bottom, &right, &top); 26 | 27 | items[index].bbox[0][0] = left; 28 | items[index].bbox[0][1] = right; 29 | items[index].bbox[1][0] = bottom; 30 | items[index].bbox[1][1] = top; 31 | zipCodes[index] = zipcode; 32 | items[index].data = zipCodes+index; 33 | index ++; 34 | } 35 | return items; 36 | } 37 | 38 | void Initialize(Neighborhoods &nb, KdTreeBB &kdtree) 39 | { 40 | //Create KDTree 41 | int size; 42 | KdTreeBB::Item* items; 43 | loadItems("converted_shapefile/bboxes.csv", items, size); 44 | kdtree.createKdTree(items, size); 45 | 46 | nb.loadFromFile("converted_shapefile/point.txt"); 47 | } 48 | 49 | int searchZipCode(float lat, float lon, Neighborhoods &nb, const KdTreeBB &kdtree) 50 | { 51 | KdTreeBB::Query q; 52 | q.setViewport(lon, lat, lon, lat); 53 | KdTreeBB::QueryResult result; 54 | kdtree.query(q, result); 55 | 56 | for (int i=0; i zips;//Output 69 | 70 | std::string output = std::string(filename) + std::string(".zipcode"); 71 | std::ofstream outFile; 72 | outFile.open(output.c_str()); 73 | 74 | 75 | float lat, lon; 76 | while(std::getline(in, line)) 77 | { 78 | try 79 | { 80 | sscanf(line.c_str(), "%f,%f", &lat, &lon); 81 | int zip = searchZipCode(lat, lon, nb, kdtree); 82 | if (zip != -1) 83 | zips.push_back(zip); 84 | outFile<::iterator it=zips.begin(); it!=zips.end(); ++it) 97 | // if (*it > 0) 98 | // outFile <<*it<8: 61 | schema.append(v) 62 | else: 63 | break 64 | else: 65 | print filepath + ' is not json file' 66 | except Exception as ex: 67 | print ex 68 | print "Error line: " + str(sys.exc_traceback.tb_lineno) 69 | return schema 70 | 71 | def get_all_schema(path): 72 | m = {} #Map between id and schema 73 | try: 74 | for (dirpath, dirnames, filenames) in walk(path): 75 | for filename in filenames: 76 | if filename[-4:] == 'json': 77 | id = filename[:-5] 78 | schema = get_schema(dirpath + filename) 79 | if schema: 80 | m[id] = schema 81 | break 82 | return m 83 | except Exception as ex: 84 | print ex 85 | return None 86 | 87 | return m 88 | 89 | def jaccard_similarity(schema1, schema2): 90 | return len(schema1.intersection(schema2))/float(len(schema1.union(schema2))) 91 | 92 | def run(city, in_path, out_path): 93 | f = open(out_path + "/" + city + "_schema_similarity.txt", "w") 94 | m = get_all_schema(in_path) 95 | ids = m.keys() 96 | for i in range(len(ids)): 97 | for j in range(i+1, len(ids)): 98 | sim = jaccard_similarity(set(m[ids[i]]), set(m[ids[j]])) 99 | if (sim>0): 100 | f.write(ids[i] + "\t" + ids[j] + "\t" + str(sim) + "\n") 101 | f.close() 102 | 103 | def main(argv): 104 | if len(argv) != 3: 105 | print "The program takes 3 arguments, " + str(len(argv)) + " is given." 106 | return 107 | city = argv[0] 108 | in_path = argv[1] + "/" 109 | out_path = argv[2] 110 | #path = "data/" 111 | run(city, in_path, out_path) 112 | 113 | if __name__=="__main__": 114 | main(sys.argv[1:]) 115 | -------------------------------------------------------------------------------- /type_detection/ijson/backends/yajl.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Wrapper for YAJL C library version 1.x. 3 | ''' 4 | 5 | from ctypes import Structure, c_uint, c_ubyte, c_int, c_long, c_double, \ 6 | c_void_p, c_char_p, CFUNCTYPE, POINTER, byref, string_at, cast , \ 7 | cdll, util, c_char 8 | from decimal import Decimal 9 | 10 | from ijson import common, backends 11 | from ijson.compat import b2s 12 | 13 | 14 | yajl = backends.find_yajl(1) 15 | 16 | yajl.yajl_alloc.restype = POINTER(c_char) 17 | yajl.yajl_get_error.restype = POINTER(c_char) 18 | 19 | C_EMPTY = CFUNCTYPE(c_int, c_void_p) 20 | C_INT = CFUNCTYPE(c_int, c_void_p, c_int) 21 | C_LONG = CFUNCTYPE(c_int, c_void_p, c_long) 22 | C_DOUBLE = CFUNCTYPE(c_int, c_void_p, c_double) 23 | C_STR = CFUNCTYPE(c_int, c_void_p, POINTER(c_ubyte), c_uint) 24 | 25 | 26 | def number(value): 27 | ''' 28 | Helper function casting a string that represents any Javascript number 29 | into appropriate Python value: either int or Decimal. 30 | ''' 31 | try: 32 | return int(value) 33 | except ValueError: 34 | return Decimal(value) 35 | 36 | _callback_data = [ 37 | # Mapping of JSON parser events to callback C types and value converters. 38 | # Used to define the Callbacks structure and actual callback functions 39 | # inside the parse function. 40 | ('null', C_EMPTY, lambda: None), 41 | ('boolean', C_INT, lambda v: bool(v)), 42 | # "integer" and "double" aren't actually yielded by yajl since "number" 43 | # takes precedence if defined 44 | ('integer', C_LONG, lambda v, l: int(string_at(v, l))), 45 | ('double', C_DOUBLE, lambda v, l: float(string_at(v, l))), 46 | ('number', C_STR, lambda v, l: number(b2s(string_at(v, l)))), 47 | ('string', C_STR, lambda v, l: string_at(v, l).decode('utf-8')), 48 | ('start_map', C_EMPTY, lambda: None), 49 | ('map_key', C_STR, lambda v, l: b2s(string_at(v, l))), 50 | ('end_map', C_EMPTY, lambda: None), 51 | ('start_array', C_EMPTY, lambda: None), 52 | ('end_array', C_EMPTY, lambda: None), 53 | ] 54 | 55 | class Callbacks(Structure): 56 | _fields_ = [(name, type) for name, type, func in _callback_data] 57 | 58 | class Config(Structure): 59 | _fields_ = [ 60 | ("allowComments", c_uint), 61 | ("checkUTF8", c_uint) 62 | ] 63 | 64 | YAJL_OK = 0 65 | YAJL_CANCELLED = 1 66 | YAJL_INSUFFICIENT_DATA = 2 67 | YAJL_ERROR = 3 68 | 69 | 70 | def basic_parse(f, allow_comments=False, check_utf8=False, buf_size=64 * 1024): 71 | ''' 72 | Iterator yielding unprefixed events. 73 | 74 | Parameters: 75 | 76 | - f: a readable file-like object with JSON input 77 | - allow_comments: tells parser to allow comments in JSON input 78 | - check_utf8: if True, parser will cause an error if input is invalid utf-8 79 | - buf_size: a size of an input buffer 80 | ''' 81 | events = [] 82 | 83 | def callback(event, func_type, func): 84 | def c_callback(context, *args): 85 | events.append((event, func(*args))) 86 | return 1 87 | return func_type(c_callback) 88 | 89 | callbacks = Callbacks(*[callback(*data) for data in _callback_data]) 90 | config = Config(allow_comments, check_utf8) 91 | handle = yajl.yajl_alloc(byref(callbacks), byref(config), None, None) 92 | try: 93 | while True: 94 | buffer = f.read(buf_size) 95 | if buffer: 96 | result = yajl.yajl_parse(handle, buffer, len(buffer)) 97 | else: 98 | result = yajl.yajl_parse_complete(handle) 99 | if result == YAJL_ERROR: 100 | perror = yajl.yajl_get_error(handle, 1, buffer, len(buffer)) 101 | error = cast(perror, c_char_p).value 102 | yajl.yajl_free_error(handle, perror) 103 | raise common.JSONError(error) 104 | if not buffer and not events: 105 | if result == YAJL_INSUFFICIENT_DATA: 106 | raise common.IncompleteJSONError() 107 | break 108 | 109 | for event in events: 110 | yield event 111 | events = [] 112 | finally: 113 | yajl.yajl_free(handle) 114 | 115 | def parse(file, **kwargs): 116 | ''' 117 | Backend-specific wrapper for ijson.common.parse. 118 | ''' 119 | return common.parse(basic_parse(file, **kwargs)) 120 | 121 | def items(file, prefix): 122 | ''' 123 | Backend-specific wrapper for ijson.common.items. 124 | ''' 125 | return common.items(parse(file), prefix) 126 | -------------------------------------------------------------------------------- /extract_zipcode_latlon/ijson/backends/yajl.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Wrapper for YAJL C library version 1.x. 3 | ''' 4 | 5 | from ctypes import Structure, c_uint, c_ubyte, c_int, c_long, c_double, \ 6 | c_void_p, c_char_p, CFUNCTYPE, POINTER, byref, string_at, cast , \ 7 | cdll, util, c_char 8 | from decimal import Decimal 9 | 10 | from ijson import common, backends 11 | from ijson.compat import b2s 12 | 13 | 14 | yajl = backends.find_yajl(1) 15 | 16 | yajl.yajl_alloc.restype = POINTER(c_char) 17 | yajl.yajl_get_error.restype = POINTER(c_char) 18 | 19 | C_EMPTY = CFUNCTYPE(c_int, c_void_p) 20 | C_INT = CFUNCTYPE(c_int, c_void_p, c_int) 21 | C_LONG = CFUNCTYPE(c_int, c_void_p, c_long) 22 | C_DOUBLE = CFUNCTYPE(c_int, c_void_p, c_double) 23 | C_STR = CFUNCTYPE(c_int, c_void_p, POINTER(c_ubyte), c_uint) 24 | 25 | 26 | def number(value): 27 | ''' 28 | Helper function casting a string that represents any Javascript number 29 | into appropriate Python value: either int or Decimal. 30 | ''' 31 | try: 32 | return int(value) 33 | except ValueError: 34 | return Decimal(value) 35 | 36 | _callback_data = [ 37 | # Mapping of JSON parser events to callback C types and value converters. 38 | # Used to define the Callbacks structure and actual callback functions 39 | # inside the parse function. 40 | ('null', C_EMPTY, lambda: None), 41 | ('boolean', C_INT, lambda v: bool(v)), 42 | # "integer" and "double" aren't actually yielded by yajl since "number" 43 | # takes precedence if defined 44 | ('integer', C_LONG, lambda v, l: int(string_at(v, l))), 45 | ('double', C_DOUBLE, lambda v, l: float(string_at(v, l))), 46 | ('number', C_STR, lambda v, l: number(b2s(string_at(v, l)))), 47 | ('string', C_STR, lambda v, l: string_at(v, l).decode('utf-8')), 48 | ('start_map', C_EMPTY, lambda: None), 49 | ('map_key', C_STR, lambda v, l: b2s(string_at(v, l))), 50 | ('end_map', C_EMPTY, lambda: None), 51 | ('start_array', C_EMPTY, lambda: None), 52 | ('end_array', C_EMPTY, lambda: None), 53 | ] 54 | 55 | class Callbacks(Structure): 56 | _fields_ = [(name, type) for name, type, func in _callback_data] 57 | 58 | class Config(Structure): 59 | _fields_ = [ 60 | ("allowComments", c_uint), 61 | ("checkUTF8", c_uint) 62 | ] 63 | 64 | YAJL_OK = 0 65 | YAJL_CANCELLED = 1 66 | YAJL_INSUFFICIENT_DATA = 2 67 | YAJL_ERROR = 3 68 | 69 | 70 | def basic_parse(f, allow_comments=False, check_utf8=False, buf_size=64 * 1024): 71 | ''' 72 | Iterator yielding unprefixed events. 73 | 74 | Parameters: 75 | 76 | - f: a readable file-like object with JSON input 77 | - allow_comments: tells parser to allow comments in JSON input 78 | - check_utf8: if True, parser will cause an error if input is invalid utf-8 79 | - buf_size: a size of an input buffer 80 | ''' 81 | events = [] 82 | 83 | def callback(event, func_type, func): 84 | def c_callback(context, *args): 85 | events.append((event, func(*args))) 86 | return 1 87 | return func_type(c_callback) 88 | 89 | callbacks = Callbacks(*[callback(*data) for data in _callback_data]) 90 | config = Config(allow_comments, check_utf8) 91 | handle = yajl.yajl_alloc(byref(callbacks), byref(config), None, None) 92 | try: 93 | while True: 94 | buffer = f.read(buf_size) 95 | if buffer: 96 | result = yajl.yajl_parse(handle, buffer, len(buffer)) 97 | else: 98 | result = yajl.yajl_parse_complete(handle) 99 | if result == YAJL_ERROR: 100 | perror = yajl.yajl_get_error(handle, 1, buffer, len(buffer)) 101 | error = cast(perror, c_char_p).value 102 | yajl.yajl_free_error(handle, perror) 103 | raise common.JSONError(error) 104 | if not buffer and not events: 105 | if result == YAJL_INSUFFICIENT_DATA: 106 | raise common.IncompleteJSONError() 107 | break 108 | 109 | for event in events: 110 | yield event 111 | events = [] 112 | finally: 113 | yajl.yajl_free(handle) 114 | 115 | def parse(file, **kwargs): 116 | ''' 117 | Backend-specific wrapper for ijson.common.parse. 118 | ''' 119 | return common.parse(basic_parse(file, **kwargs)) 120 | 121 | def items(file, prefix): 122 | ''' 123 | Backend-specific wrapper for ijson.common.items. 124 | ''' 125 | return common.items(parse(file), prefix) 126 | -------------------------------------------------------------------------------- /type_detection/ijson/backends/yajl2.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Wrapper for YAJL C library version 2.x. 3 | ''' 4 | 5 | from ctypes import Structure, c_uint, c_ubyte, c_int, c_long, c_double, \ 6 | c_void_p, c_char_p, CFUNCTYPE, POINTER, byref, string_at, cast , \ 7 | cdll, util, c_char 8 | from decimal import Decimal 9 | 10 | from ijson import common, backends 11 | from ijson.compat import b2s 12 | 13 | 14 | yajl = backends.find_yajl(2) 15 | 16 | yajl.yajl_alloc.restype = POINTER(c_char) 17 | yajl.yajl_get_error.restype = POINTER(c_char) 18 | 19 | C_EMPTY = CFUNCTYPE(c_int, c_void_p) 20 | C_INT = CFUNCTYPE(c_int, c_void_p, c_int) 21 | C_LONG = CFUNCTYPE(c_int, c_void_p, c_long) 22 | C_DOUBLE = CFUNCTYPE(c_int, c_void_p, c_double) 23 | C_STR = CFUNCTYPE(c_int, c_void_p, POINTER(c_ubyte), c_uint) 24 | 25 | 26 | def number(value): 27 | ''' 28 | Helper function casting a string that represents any Javascript number 29 | into appropriate Python value: either int or Decimal. 30 | ''' 31 | try: 32 | return int(value) 33 | except ValueError: 34 | return Decimal(value) 35 | 36 | _callback_data = [ 37 | # Mapping of JSON parser events to callback C types and value converters. 38 | # Used to define the Callbacks structure and actual callback functions 39 | # inside the parse function. 40 | ('null', C_EMPTY, lambda: None), 41 | ('boolean', C_INT, lambda v: bool(v)), 42 | # "integer" and "double" aren't actually yielded by yajl since "number" 43 | # takes precedence if defined 44 | ('integer', C_LONG, lambda v, l: int(string_at(v, l))), 45 | ('double', C_DOUBLE, lambda v, l: float(string_at(v, l))), 46 | ('number', C_STR, lambda v, l: number(b2s(string_at(v, l)))), 47 | ('string', C_STR, lambda v, l: string_at(v, l).decode('utf-8')), 48 | ('start_map', C_EMPTY, lambda: None), 49 | ('map_key', C_STR, lambda v, l: b2s(string_at(v, l))), 50 | ('end_map', C_EMPTY, lambda: None), 51 | ('start_array', C_EMPTY, lambda: None), 52 | ('end_array', C_EMPTY, lambda: None), 53 | ] 54 | 55 | class Callbacks(Structure): 56 | _fields_ = [(name, type) for name, type, func in _callback_data] 57 | 58 | YAJL_OK = 0 59 | YAJL_CANCELLED = 1 60 | YAJL_INSUFFICIENT_DATA = 2 61 | YAJL_ERROR = 3 62 | 63 | # constants defined in yajl_parse.h 64 | YAJL_ALLOW_COMMENTS = 1 65 | YAJL_MULTIPLE_VALUES = 8 66 | 67 | 68 | def basic_parse(f, allow_comments=False, buf_size=64 * 1024, 69 | multiple_values=False): 70 | ''' 71 | Iterator yielding unprefixed events. 72 | 73 | Parameters: 74 | 75 | - f: a readable file-like object with JSON input 76 | - allow_comments: tells parser to allow comments in JSON input 77 | - buf_size: a size of an input buffer 78 | - multiple_values: allows the parser to parse multiple JSON objects 79 | ''' 80 | events = [] 81 | 82 | def callback(event, func_type, func): 83 | def c_callback(context, *args): 84 | events.append((event, func(*args))) 85 | return 1 86 | return func_type(c_callback) 87 | 88 | callbacks = Callbacks(*[callback(*data) for data in _callback_data]) 89 | handle = yajl.yajl_alloc(byref(callbacks), None, None) 90 | if allow_comments: 91 | yajl.yajl_config(handle, YAJL_ALLOW_COMMENTS, 1) 92 | if multiple_values: 93 | yajl.yajl_config(handle, YAJL_MULTIPLE_VALUES, 1) 94 | try: 95 | while True: 96 | buffer = f.read(buf_size) 97 | if buffer: 98 | result = yajl.yajl_parse(handle, buffer, len(buffer)) 99 | else: 100 | result = yajl.yajl_complete_parse(handle) 101 | if result == YAJL_ERROR: 102 | perror = yajl.yajl_get_error(handle, 1, buffer, len(buffer)) 103 | error = cast(perror, c_char_p).value 104 | yajl.yajl_free_error(handle, perror) 105 | raise common.JSONError(error) 106 | if not buffer and not events: 107 | if result == YAJL_INSUFFICIENT_DATA: 108 | raise common.IncompleteJSONError() 109 | break 110 | 111 | for event in events: 112 | yield event 113 | events = [] 114 | finally: 115 | yajl.yajl_free(handle) 116 | 117 | def parse(file, **kwargs): 118 | ''' 119 | Backend-specific wrapper for ijson.common.parse. 120 | ''' 121 | return common.parse(basic_parse(file, **kwargs)) 122 | 123 | def items(file, prefix): 124 | ''' 125 | Backend-specific wrapper for ijson.common.items. 126 | ''' 127 | return common.items(parse(file), prefix) 128 | -------------------------------------------------------------------------------- /extract_zipcode_latlon/ijson/backends/yajl2.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Wrapper for YAJL C library version 2.x. 3 | ''' 4 | 5 | from ctypes import Structure, c_uint, c_ubyte, c_int, c_long, c_double, \ 6 | c_void_p, c_char_p, CFUNCTYPE, POINTER, byref, string_at, cast , \ 7 | cdll, util, c_char 8 | from decimal import Decimal 9 | 10 | from ijson import common, backends 11 | from ijson.compat import b2s 12 | 13 | 14 | yajl = backends.find_yajl(2) 15 | 16 | yajl.yajl_alloc.restype = POINTER(c_char) 17 | yajl.yajl_get_error.restype = POINTER(c_char) 18 | 19 | C_EMPTY = CFUNCTYPE(c_int, c_void_p) 20 | C_INT = CFUNCTYPE(c_int, c_void_p, c_int) 21 | C_LONG = CFUNCTYPE(c_int, c_void_p, c_long) 22 | C_DOUBLE = CFUNCTYPE(c_int, c_void_p, c_double) 23 | C_STR = CFUNCTYPE(c_int, c_void_p, POINTER(c_ubyte), c_uint) 24 | 25 | 26 | def number(value): 27 | ''' 28 | Helper function casting a string that represents any Javascript number 29 | into appropriate Python value: either int or Decimal. 30 | ''' 31 | try: 32 | return int(value) 33 | except ValueError: 34 | return Decimal(value) 35 | 36 | _callback_data = [ 37 | # Mapping of JSON parser events to callback C types and value converters. 38 | # Used to define the Callbacks structure and actual callback functions 39 | # inside the parse function. 40 | ('null', C_EMPTY, lambda: None), 41 | ('boolean', C_INT, lambda v: bool(v)), 42 | # "integer" and "double" aren't actually yielded by yajl since "number" 43 | # takes precedence if defined 44 | ('integer', C_LONG, lambda v, l: int(string_at(v, l))), 45 | ('double', C_DOUBLE, lambda v, l: float(string_at(v, l))), 46 | ('number', C_STR, lambda v, l: number(b2s(string_at(v, l)))), 47 | ('string', C_STR, lambda v, l: string_at(v, l).decode('utf-8')), 48 | ('start_map', C_EMPTY, lambda: None), 49 | ('map_key', C_STR, lambda v, l: b2s(string_at(v, l))), 50 | ('end_map', C_EMPTY, lambda: None), 51 | ('start_array', C_EMPTY, lambda: None), 52 | ('end_array', C_EMPTY, lambda: None), 53 | ] 54 | 55 | class Callbacks(Structure): 56 | _fields_ = [(name, type) for name, type, func in _callback_data] 57 | 58 | YAJL_OK = 0 59 | YAJL_CANCELLED = 1 60 | YAJL_INSUFFICIENT_DATA = 2 61 | YAJL_ERROR = 3 62 | 63 | # constants defined in yajl_parse.h 64 | YAJL_ALLOW_COMMENTS = 1 65 | YAJL_MULTIPLE_VALUES = 8 66 | 67 | 68 | def basic_parse(f, allow_comments=False, buf_size=64 * 1024, 69 | multiple_values=False): 70 | ''' 71 | Iterator yielding unprefixed events. 72 | 73 | Parameters: 74 | 75 | - f: a readable file-like object with JSON input 76 | - allow_comments: tells parser to allow comments in JSON input 77 | - buf_size: a size of an input buffer 78 | - multiple_values: allows the parser to parse multiple JSON objects 79 | ''' 80 | events = [] 81 | 82 | def callback(event, func_type, func): 83 | def c_callback(context, *args): 84 | events.append((event, func(*args))) 85 | return 1 86 | return func_type(c_callback) 87 | 88 | callbacks = Callbacks(*[callback(*data) for data in _callback_data]) 89 | handle = yajl.yajl_alloc(byref(callbacks), None, None) 90 | if allow_comments: 91 | yajl.yajl_config(handle, YAJL_ALLOW_COMMENTS, 1) 92 | if multiple_values: 93 | yajl.yajl_config(handle, YAJL_MULTIPLE_VALUES, 1) 94 | try: 95 | while True: 96 | buffer = f.read(buf_size) 97 | if buffer: 98 | result = yajl.yajl_parse(handle, buffer, len(buffer)) 99 | else: 100 | result = yajl.yajl_complete_parse(handle) 101 | if result == YAJL_ERROR: 102 | perror = yajl.yajl_get_error(handle, 1, buffer, len(buffer)) 103 | error = cast(perror, c_char_p).value 104 | yajl.yajl_free_error(handle, perror) 105 | raise common.JSONError(error) 106 | if not buffer and not events: 107 | if result == YAJL_INSUFFICIENT_DATA: 108 | raise common.IncompleteJSONError() 109 | break 110 | 111 | for event in events: 112 | yield event 113 | events = [] 114 | finally: 115 | yajl.yajl_free(handle) 116 | 117 | def parse(file, **kwargs): 118 | ''' 119 | Backend-specific wrapper for ijson.common.parse. 120 | ''' 121 | return common.parse(basic_parse(file, **kwargs)) 122 | 123 | def items(file, prefix): 124 | ''' 125 | Backend-specific wrapper for ijson.common.items. 126 | ''' 127 | return common.items(parse(file), prefix) 128 | -------------------------------------------------------------------------------- /heatmap/dbfUtils.py: -------------------------------------------------------------------------------- 1 | import struct, datetime, decimal, itertools 2 | # dbfUtils.py 3 | # By Raymond Hettinger 4 | # http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/362715 5 | 6 | def dbfreader(f): 7 | """Returns an iterator over records in a Xbase DBF file. 8 | 9 | The first row returned contains the field names. 10 | The second row contains field specs: (type, size, decimal places). 11 | Subsequent rows contain the data records. 12 | If a record is marked as deleted, it is skipped. 13 | 14 | File should be opened for binary reads. 15 | 16 | """ 17 | # See DBF format spec at: 18 | # http://www.pgts.com.au/download/public/xbase.htm#DBF_STRUCT 19 | 20 | numrec, lenheader = struct.unpack(') 28 | ('number', ) 29 | ('string', ) 30 | ('map_key', ) 31 | ('start_map', None) 32 | ('end_map', None) 33 | ('start_array', None) 34 | ('end_array', None) 35 | 36 | Prefixes represent the path to the nested elements from the root of the JSON 37 | document. For example, given this document:: 38 | 39 | { 40 | "array": [1, 2], 41 | "map": { 42 | "key": "value" 43 | } 44 | } 45 | 46 | the parser would yield events: 47 | 48 | ('', 'start_map', None) 49 | ('', 'map_key', 'array') 50 | ('array', 'start_array', None) 51 | ('array.item', 'number', 1) 52 | ('array.item', 'number', 2) 53 | ('array', 'end_array', None) 54 | ('', 'map_key', 'map') 55 | ('map', 'start_map', None) 56 | ('map', 'map_key', 'key') 57 | ('map.key', 'string', u'value') 58 | ('map', 'end_map', None) 59 | ('', 'end_map', None) 60 | 61 | ''' 62 | path = [] 63 | for event, value in basic_events: 64 | if event == 'map_key': 65 | prefix = '.'.join(path[:-1]) 66 | path[-1] = value 67 | elif event == 'start_map': 68 | prefix = '.'.join(path) 69 | path.append(None) 70 | elif event == 'end_map': 71 | path.pop() 72 | prefix = '.'.join(path) 73 | elif event == 'start_array': 74 | prefix = '.'.join(path) 75 | path.append('item') 76 | elif event == 'end_array': 77 | path.pop() 78 | prefix = '.'.join(path) 79 | else: # any scalar value 80 | prefix = '.'.join(path) 81 | 82 | yield prefix, event, value 83 | 84 | 85 | class ObjectBuilder(object): 86 | ''' 87 | Incrementally builds an object from JSON parser events. Events are passed 88 | into the `event` function that accepts two parameters: event type and 89 | value. The object being built is available at any time from the `value` 90 | attribute. 91 | 92 | Example:: 93 | 94 | from StringIO import StringIO 95 | from ijson.parse import basic_parse 96 | from ijson.utils import ObjectBuilder 97 | 98 | builder = ObjectBuilder() 99 | f = StringIO('{"key": "value"}) 100 | for event, value in basic_parse(f): 101 | builder.event(event, value) 102 | print builder.value 103 | 104 | ''' 105 | def __init__(self): 106 | def initial_set(value): 107 | self.value = value 108 | self.containers = [initial_set] 109 | 110 | def event(self, event, value): 111 | if event == 'map_key': 112 | self.key = value 113 | elif event == 'start_map': 114 | map = {} 115 | self.containers[-1](map) 116 | def setter(value): 117 | map[self.key] = value 118 | self.containers.append(setter) 119 | elif event == 'start_array': 120 | array = [] 121 | self.containers[-1](array) 122 | self.containers.append(array.append) 123 | elif event == 'end_array' or event == 'end_map': 124 | self.containers.pop() 125 | else: 126 | self.containers[-1](value) 127 | 128 | def items(prefixed_events, prefix): 129 | ''' 130 | An iterator returning native Python objects constructed from the events 131 | under a given prefix. 132 | ''' 133 | prefixed_events = iter(prefixed_events) 134 | try: 135 | while True: 136 | current, event, value = next(prefixed_events) 137 | if current == prefix: 138 | if event in ('start_map', 'start_array'): 139 | builder = ObjectBuilder() 140 | end_event = event.replace('start', 'end') 141 | while (current, event) != (prefix, end_event): 142 | builder.event(event, value) 143 | current, event, value = next(prefixed_events) 144 | yield builder.value 145 | else: 146 | yield value 147 | except StopIteration: 148 | pass 149 | -------------------------------------------------------------------------------- /extract_zipcode_latlon/ijson/common.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Backend independent higher level interfaces, common exceptions. 3 | ''' 4 | 5 | class JSONError(Exception): 6 | ''' 7 | Base exception for all parsing errors. 8 | ''' 9 | pass 10 | 11 | class IncompleteJSONError(JSONError): 12 | ''' 13 | Raised when the parser expects data and it's not available. May be 14 | caused by malformed syntax or a broken source stream. 15 | ''' 16 | def __init__(self): 17 | super(IncompleteJSONError, self).__init__('Incomplete or empty JSON data') 18 | 19 | def parse(basic_events): 20 | ''' 21 | An iterator returning parsing events with the information about their location 22 | with the JSON object tree. Events are tuples ``(prefix, type, value)``. 23 | 24 | Available types and values are: 25 | 26 | ('null', None) 27 | ('boolean', ) 28 | ('number', ) 29 | ('string', ) 30 | ('map_key', ) 31 | ('start_map', None) 32 | ('end_map', None) 33 | ('start_array', None) 34 | ('end_array', None) 35 | 36 | Prefixes represent the path to the nested elements from the root of the JSON 37 | document. For example, given this document:: 38 | 39 | { 40 | "array": [1, 2], 41 | "map": { 42 | "key": "value" 43 | } 44 | } 45 | 46 | the parser would yield events: 47 | 48 | ('', 'start_map', None) 49 | ('', 'map_key', 'array') 50 | ('array', 'start_array', None) 51 | ('array.item', 'number', 1) 52 | ('array.item', 'number', 2) 53 | ('array', 'end_array', None) 54 | ('', 'map_key', 'map') 55 | ('map', 'start_map', None) 56 | ('map', 'map_key', 'key') 57 | ('map.key', 'string', u'value') 58 | ('map', 'end_map', None) 59 | ('', 'end_map', None) 60 | 61 | ''' 62 | path = [] 63 | for event, value in basic_events: 64 | if event == 'map_key': 65 | prefix = '.'.join(path[:-1]) 66 | path[-1] = value 67 | elif event == 'start_map': 68 | prefix = '.'.join(path) 69 | path.append(None) 70 | elif event == 'end_map': 71 | path.pop() 72 | prefix = '.'.join(path) 73 | elif event == 'start_array': 74 | prefix = '.'.join(path) 75 | path.append('item') 76 | elif event == 'end_array': 77 | path.pop() 78 | prefix = '.'.join(path) 79 | else: # any scalar value 80 | prefix = '.'.join(path) 81 | 82 | yield prefix, event, value 83 | 84 | 85 | class ObjectBuilder(object): 86 | ''' 87 | Incrementally builds an object from JSON parser events. Events are passed 88 | into the `event` function that accepts two parameters: event type and 89 | value. The object being built is available at any time from the `value` 90 | attribute. 91 | 92 | Example:: 93 | 94 | from StringIO import StringIO 95 | from ijson.parse import basic_parse 96 | from ijson.utils import ObjectBuilder 97 | 98 | builder = ObjectBuilder() 99 | f = StringIO('{"key": "value"}) 100 | for event, value in basic_parse(f): 101 | builder.event(event, value) 102 | print builder.value 103 | 104 | ''' 105 | def __init__(self): 106 | def initial_set(value): 107 | self.value = value 108 | self.containers = [initial_set] 109 | 110 | def event(self, event, value): 111 | if event == 'map_key': 112 | self.key = value 113 | elif event == 'start_map': 114 | map = {} 115 | self.containers[-1](map) 116 | def setter(value): 117 | map[self.key] = value 118 | self.containers.append(setter) 119 | elif event == 'start_array': 120 | array = [] 121 | self.containers[-1](array) 122 | self.containers.append(array.append) 123 | elif event == 'end_array' or event == 'end_map': 124 | self.containers.pop() 125 | else: 126 | self.containers[-1](value) 127 | 128 | def items(prefixed_events, prefix): 129 | ''' 130 | An iterator returning native Python objects constructed from the events 131 | under a given prefix. 132 | ''' 133 | prefixed_events = iter(prefixed_events) 134 | try: 135 | while True: 136 | current, event, value = next(prefixed_events) 137 | if current == prefix: 138 | if event in ('start_map', 'start_array'): 139 | builder = ObjectBuilder() 140 | end_event = event.replace('start', 'end') 141 | while (current, event) != (prefix, end_event): 142 | builder.event(event, value) 143 | current, event, value = next(prefixed_events) 144 | yield builder.value 145 | else: 146 | yield value 147 | except StopIteration: 148 | pass 149 | -------------------------------------------------------------------------------- /metadata/get_metadata.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | #Input: 4 | #1) url, i.e: http://data.austintexas.gov 5 | #2) output path 6 | #Get number of datasets, metadata (name, description, tags) and schema using following APIs: 7 | #url + /api/views.json?count=True: COUNT 8 | #url + /api/views.json?limit=200&page=1: METADATA 9 | #url + /api/views/ + datasetid + .json: SCHEMA 10 | 11 | import json 12 | import urllib 13 | import codecs 14 | import sys 15 | import os.path 16 | 17 | def loadIDs(outpath): 18 | #Input 19 | filepath = outpath + '/metadata.csv' 20 | if not os.path.isfile(filepath): 21 | return set() 22 | ids = set([]) 23 | with open(filepath) as f: 24 | for line in f: 25 | _array = line.split('\t') 26 | ids.add(_array[0]) 27 | print 'Done loading IDs' 28 | return ids 29 | 30 | def main(argv): 31 | url = argv[0] #input 32 | city = argv[1] 33 | outpath = argv[2] #output 34 | #Get the number of datasets 35 | urlhandle = urllib.urlopen(url + '/api/views.json?count=True') 36 | content = urlhandle.read() 37 | js = json.loads(content) 38 | count = js['count'] 39 | 40 | #Output 41 | meta_f = codecs.open(outpath + '/' + city + '_metadata.csv', 'a', 'utf-8') 42 | tag_f = codecs.open(outpath + '/' + city + '_tags.csv', 'a', 'utf-8') 43 | schema_f = codecs.open(outpath + '/' + city + '_schema.csv', 'a', 'utf-8') 44 | id_tag_f = codecs.open(outpath + '/' + city + '_id_tag.csv', 'a', 'utf-8') 45 | id_downloadcount_f = codecs.open(outpath + '/' + city + '_id_downloadcount.csv', 'a', 'utf-8') 46 | id_viewcount_f = codecs.open(outpath + '/' + city + '_id_viewcount.csv', 'a', 'utf-8') 47 | id_date_f = open(outpath + '/' + city + '_id_date.csv', 'w') 48 | id_size_f = open(outpath + '/' + city + '_id_size.csv', 'w') 49 | 50 | #Load id of the datasets whose metadatas were already retrieved 51 | ids = loadIDs(outpath) 52 | 53 | #Get metadata of the all datasets 54 | #Metadata for one dataset is formated in one line. Each attribute value is seperated by tab character and empty value is replaced by the string 'Null' 55 | print 'Total: ' + str(count) 56 | pages = count/200 + 1 #total number of pages 57 | 58 | for i in range(1, pages+1): 59 | sys.stdout.write('Getting data from page ' + str(i) + ' ... ') 60 | urlhandle = urllib.urlopen(url + "/api/views.json?limit=200&page=" + str(i)) 61 | 62 | content = urlhandle.read() 63 | js = json.loads(content) 64 | for j in range(0, len(js)): 65 | #Check whether the metadata was already retrieved 66 | _id = js[j]['id'] 67 | if _id in ids: 68 | continue 69 | 70 | #Get metadata of each dataset 71 | #ID and NAME 72 | id = js[j]['id'] 73 | meta = id + '\t' + js[j]['name'] 74 | 75 | #DESCRIPTION 76 | if js[j].has_key('description'): 77 | meta_f.write(meta + "\t" + js[j]['description'].replace('\n', ' ') + "\n") 78 | #else: 79 | # meta_f.write(meta + '\t' + 'null\n') 80 | 81 | #View count 82 | if js[j].has_key('viewCount'): 83 | id_viewcount_f.write(id + "\t" + str(js[j]["viewCount"]) + "\n") 84 | else: 85 | id_viewcount_f.write(id + "\tnull\n") 86 | 87 | #Download count 88 | if js[j].has_key('downloadCount'): 89 | id_downloadcount_f.write(id + "\t" + str(js[j]["downloadCount"]) + "\n") 90 | else: 91 | id_downloadcount_f.write(id + "\tnull\n") 92 | 93 | #Publication date 94 | pdate = "None" 95 | cdate = "None" 96 | if js[j].has_key('publicationDate'): 97 | pdate = str(js[j]['publicationDate']) #Timestamp format 98 | #Created date 99 | if js[j].has_key('createdAt'): 100 | cdate = str(js[j]['createdAt']) #Timestamp format 101 | id_date_f.write(id + "\t" + cdate + "\t" + pdate + "\n") 102 | 103 | #blobsize 104 | bsize = "None" 105 | if js[j].has_key('blobFileSize'): 106 | bsize = str(js[j]['blobFileSize']) 107 | id_size_f.write(id + "\t" + bsize + "\n") 108 | 109 | #TAGS 110 | tag = '' 111 | if js[j].has_key('tags'): 112 | for t in js[j]['tags']: 113 | tag_f.write(t + '\n') 114 | tag = tag + ' ' + t 115 | id_tag_f.write(id + "\t" + tag + "\n") 116 | else: 117 | id_tag_f.write(id + "\tnull\n") 118 | 119 | #Get schema of each dataset 120 | schemaurl = url + "/api/views/" + js[j]['id'] + '.json' 121 | aJS = json.loads(urllib.urlopen(schemaurl).read()) 122 | if aJS.has_key('columns'): 123 | schema_js = aJS['columns'] 124 | schema = js[j]['id'] 125 | if schema_js != None: 126 | for field in schema_js: 127 | schema = schema + '\t' + field['fieldName'] 128 | schema_f.write(schema + '\n') 129 | else: 130 | print js[j]['id'] 131 | 132 | print 'Done' 133 | print 'Done' 134 | meta_f.close() 135 | tag_f.close() 136 | schema_f.close() 137 | id_tag_f.close() 138 | id_downloadcount_f.close() 139 | id_viewcount_f.close() 140 | id_date_f.close() 141 | id_size_f.close() 142 | 143 | if __name__ == "__main__": 144 | main(sys.argv[1:]) 145 | -------------------------------------------------------------------------------- /latlon_to_zipcode/Neighborhoods.hpp: -------------------------------------------------------------------------------- 1 | //############################################################################# 2 | //// 3 | //// Copyright (C) 2014, New York University. 4 | //// All rights reserved. 5 | //// Contact: huy.vo@nyu.edu, kien.pham@nyu.edu 6 | //// 7 | //// "Redistribution and use in source and binary forms, with or without 8 | //// modification, are permitted provided that the following conditions are met: 9 | //// 10 | //// - Redistributions of source code must retain the above copyright notice, 11 | //// this list of conditions and the following disclaimer. 12 | //// - Redistributions in binary form must reproduce the above copyright 13 | //// notice, this list of conditions and the following disclaimer in the 14 | //// documentation and/or other materials provided with the distribution. 15 | //// - Neither the name of New York University nor the names of its 16 | //// contributors may be used to endorse or promote products derived from 17 | //// this software without specific prior written permission. 18 | //// 19 | //// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | //// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 21 | //// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 | //// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR 23 | //// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 24 | //// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 25 | //// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 26 | //// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 27 | //// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 28 | //// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 29 | //// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE." 30 | //// 31 | ////############################################################################# 32 | #ifndef NEIGHBORHOODS_HPP 33 | #define NEIGHBORHOODS_HPP 34 | 35 | #include 36 | #include 37 | #include 38 | #include 39 | #include 40 | 41 | class Neighborhoods 42 | { 43 | public: 44 | 45 | typedef std::vector< std::pair > Geometry; 46 | typedef boost::unordered_map GeometryMap; 47 | 48 | Neighborhoods() {} 49 | 50 | Neighborhoods(const char *filename) 51 | { 52 | this->loadFromFile(filename); 53 | } 54 | 55 | void loadFromFile(const char *filename) 56 | { 57 | char name[128]; 58 | int N, nPoly, nPoint; 59 | float lat, lon; 60 | Geometry poly; 61 | 62 | FILE *fi = fopen(filename, "r"); 63 | fscanf(fi, "%d", &N); 64 | fgets(name, sizeof(name), fi); 65 | 66 | this->geometries.clear(); 67 | for (int i=0; igeometries[atoi(name)] = poly; 80 | } 81 | fclose(fi); 82 | std::cout<<"File loaded! Number of elements: "<bounds[2]) bounds[2] = geom[i].first; 97 | if (geom[i].secondbounds[3]) bounds[3] = geom[i].second; 99 | } 100 | } 101 | } 102 | 103 | static bool isInside(int nvert, float *vert, float testx, float testy) 104 | { 105 | if (nvert<=0) return true; 106 | float firstX = vert[0]; 107 | float firstY = vert[1]; 108 | int i, j, c = 0; 109 | for (i = 1, j = 0; i < nvert; j = i++) { 110 | if ( ((vert[i*2+1]>testy) != (vert[j*2+1]>testy)) && 111 | (testx < (vert[j*2]-vert[i*2]) * (testy-vert[i*2+1]) / (vert[j*2+1]-vert[i*2+1]) + vert[i*2]) ) 112 | c = !c; 113 | if (vert[i*2]==firstX && vert[i*2+1]==firstY) { 114 | if (++ilon) != (vert[j].second>lon)) && 130 | (lat < (vert[j].first-vert[i].first) * (lon-vert[i].second) / (vert[j].second-vert[i].second) + vert[i].first) ) 131 | c = !c; 132 | } 133 | return c; 134 | } 135 | 136 | private: 137 | GeometryMap geometries; 138 | }; 139 | 140 | #endif 141 | -------------------------------------------------------------------------------- /type_detection/sample.py: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | ## 3 | ## Copyright (C) 2014, New York University. 4 | ## All rights reserved. 5 | ## Contact: kien.pham@nyu.edu 6 | ## 7 | ## "Redistribution and use in source and binary forms, with or without 8 | ## modification, are permitted provided that the following conditions are met: 9 | ## 10 | ## - Redistributions of source code must retain the above copyright notice, 11 | ## this list of conditions and the following disclaimer. 12 | ## - Redistributions in binary form must reproduce the above copyright 13 | ## notice, this list of conditions and the following disclaimer in the 14 | ## documentation and/or other materials provided with the distribution. 15 | ## - Neither the name of New York University nor the names of its 16 | ## contributors may be used to endorse or promote products derived from 17 | ## this software without specific prior written permission. 18 | ## 19 | ## THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | ## AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 21 | ## THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 | ## PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR 23 | ## CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 24 | ## EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 25 | ## PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 26 | ## OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 27 | ## WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 28 | ## OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 29 | ## ADVISED OF THE POSSIBILITY OF SUCH DAMAGE." 30 | ## 31 | ############################################################################### 32 | 33 | import sys 34 | import ijson 35 | import os.path 36 | import re 37 | 38 | def get_schema(filename): 39 | ''' 40 | Extract column names of a given dataset from JSON file 41 | ''' 42 | schema = [] 43 | schema_set = set() 44 | try: 45 | if filename[-4:]=='json': 46 | count = 0 47 | with open(filename) as lines: 48 | for line in lines: 49 | if re.search("\"data\" :", line) is None: 50 | if line[-2] == ',': 51 | kv = line.strip("\n ,").split(" : ") 52 | if len(kv)==2: 53 | k = kv[0].strip("\"") 54 | v = kv[1].strip("\"") 55 | if k=="fieldName": 56 | count += 1 57 | if count>8: 58 | if v not in schema_set: 59 | schema.append(v) 60 | schema_set.add(v) 61 | else: 62 | break 63 | else: 64 | print filename + ' is not JSON file' 65 | except Exception as ex: 66 | print ex 67 | print "Error line: " + str(sys.exc_traceback.tb_lineno) 68 | return schema 69 | 70 | def is_none(item): 71 | ''' 72 | Check if an object is None or list of None items 73 | ''' 74 | if item == None: 75 | return True 76 | if type(item) == list: 77 | for e in item: 78 | if e != None: 79 | return False 80 | return True 81 | else: 82 | return False 83 | 84 | def tostr(obj): 85 | ''' 86 | Convert an object to string, lower case and remove end line characters 87 | ''' 88 | if type(obj) is unicode: 89 | return obj.encode('utf-8').replace("\n", "").lower() 90 | else: 91 | return str(obj).replace("\n", " ").lower() 92 | 93 | def sample(data_path, id, output_path, city, max, MAX): 94 | filename = data_path + "/" + id + ".json" 95 | if not os.path.isfile(filename): 96 | return 97 | 98 | output_file = output_path + "/" + city + "_" + id + ".txt" 99 | if os.path.isfile(output_file): 100 | print "File " + output_file + " is existed." 101 | return 102 | output = open(output_file, "w") 103 | schema = get_schema(filename) 104 | 105 | count = 0 106 | item = [] 107 | try: 108 | filehandle = open(filename) 109 | data = ijson.items(filehandle, "data.item") 110 | values_list = [] 111 | for atb in schema: 112 | values_list.append([atb]) 113 | for item in data: 114 | count += 1 115 | if count == MAX: 116 | break 117 | item = item[8:] 118 | if count == 1: #only do this once 119 | values_list = values_list[0:len(item)] 120 | for i in range(len(item)): 121 | if (len(values_list[i]) max: 68 | max = m[zipcode] 69 | unit = max/8 70 | print max 71 | 72 | for i in range(0,len(shpRecords)): 73 | # x and y are empty lists to be populated with the coordinates of each geometry. 74 | x = [] 75 | y = [] 76 | for j in range(0,len(shpRecords[i]['shp_data']['parts'][0]['points'])): 77 | # This is the number of vertices in the ith geometry. 78 | # The parts list is [0] as it is singlepart. 79 | # get x and y coordinates. 80 | tempx = float(shpRecords[i]['shp_data']['parts'][0]['points'][j]['x']) 81 | tempy = float(shpRecords[i]['shp_data']['parts'][0]['points'][j]['y']) 82 | x.append(tempx) 83 | y.append(tempy) # Populate the lists 84 | 85 | # Creates a polygon in matplotlib for each geometry in the shapefile 86 | zipcode = shpRecords[i]["dbf_data"]["ZIP"] 87 | if m.has_key(zipcode): 88 | colour = colours[m[zipcode]/unit] 89 | else: 90 | colour = colours[0] 91 | plt.fill(x, y, fc=colour, ec='0.7', lw=0.1) 92 | 93 | #Create legend 94 | p0 = plt.Rectangle((0, 0), 1, 1, fc=colours[0]) 95 | p1 = plt.Rectangle((0, 0), 1, 1, fc=colours[1]) 96 | p2 = plt.Rectangle((0, 0), 1, 1, fc=colours[2]) 97 | p3 = plt.Rectangle((0, 0), 1, 1, fc=colours[3]) 98 | p4 = plt.Rectangle((0, 0), 1, 1, fc=colours[4]) 99 | p5 = plt.Rectangle((0, 0), 1, 1, fc=colours[5]) 100 | p6 = plt.Rectangle((0, 0), 1, 1, fc=colours[6]) 101 | p7 = plt.Rectangle((0, 0), 1, 1, fc=colours[7]) 102 | p8 = plt.Rectangle((0, 0), 1, 1, fc=colours[8]) 103 | extra = plt.Rectangle((0, 0), 1, 1, fc="w", fill=False, edgecolor='none', linewidth=0) 104 | 105 | plt.legend([extra, p0,p1,p2,p3,p4,p5,p6,p7,p8],\ 106 | ["Number of records",\ 107 | "0-%d records" %((1*unit-1)),\ 108 | "%d-%d records" %(1*unit,(2*unit-1)),\ 109 | "%d-%d records" %(2*unit,(3*unit-1)),\ 110 | "%d-%d records" %(3*unit,(4*unit-1)),\ 111 | "%d-%d records" %(4*unit,(5*unit-1)),\ 112 | "%d-%d records" %(5*unit,(6*unit-1)),\ 113 | "%d-%d records" %(6*unit,(7*unit-1)),\ 114 | "%d-%d records" %(7*unit,(8*unit-1)),\ 115 | ">%d records" %(8*unit)],\ 116 | prop={'size':10}, loc = 1) 117 | 118 | plt.legend([extra, p0,p1,p2,p3,p4,p5,p6,p7,p8],\ 119 | ["Number of records",\ 120 | "0-%dk" %((1*unit/1000-1)),\ 121 | "%dk-%dk" %(1*unit/1000,(2*unit/1000-1)),\ 122 | "%dk-%dk" %(2*unit/1000,(3*unit/1000-1)),\ 123 | "%dk-%dk" %(3*unit/1000,(4*unit/1000-1)),\ 124 | "%dk-%dk" %(4*unit/1000,(5*unit/1000-1)),\ 125 | "%dk-%dk" %(5*unit/1000,(6*unit/1000-1)),\ 126 | "%dk-%dk" %(6*unit/1000,(7*unit/1000-1)),\ 127 | "%dk-%dk" %(7*unit/1000,(8*unit/1000-1)),\ 128 | ">%dk" %(8*unit/1000)],\ 129 | prop={'size':10.6}, loc = 3) 130 | 131 | #plt.title("Chicago ZipCode Overlap") 132 | plt.axis('off') 133 | plt.savefig('chicagoallzipcode.jpg', format='jpg', dpi=700) 134 | plt.show() 135 | 136 | -------------------------------------------------------------------------------- /heatmap/nyc.py: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | ## 3 | ## Copyright (C) 2014, New York University. 4 | ## All rights reserved. 5 | ## Contact: kien.pham@nyu.edu 6 | ## 7 | ## "Redistribution and use in source and binary forms, with or without 8 | ## modification, are permitted provided that the following conditions are met: 9 | ## 10 | ## - Redistributions of source code must retain the above copyright notice, 11 | ## this list of conditions and the following disclaimer. 12 | ## - Redistributions in binary form must reproduce the above copyright 13 | ## notice, this list of conditions and the following disclaimer in the 14 | ## documentation and/or other materials provided with the distribution. 15 | ## - Neither the name of New York University nor the names of its 16 | ## contributors may be used to endorse or promote products derived from 17 | ## this software without specific prior written permission. 18 | ## 19 | ## THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | ## AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 21 | ## THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 | ## PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR 23 | ## CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 24 | ## EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 25 | ## PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 26 | ## OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 27 | ## WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 28 | ## OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 29 | ## ADVISED OF THE POSSIBILITY OF SUCH DAMAGE." 30 | ## 31 | ############################################################################### 32 | 33 | 34 | import shpUtils 35 | import matplotlib.pyplot as plt 36 | 37 | 38 | def loadZipcode(zipcodefile): 39 | m = {} 40 | with open(zipcodefile) as lines: 41 | for line in lines: 42 | a = line.strip("\n").split("\t") 43 | zipcode = a[0] 44 | occurencyNumb = int(a[1]) 45 | m[zipcode] = occurencyNumb 46 | return m 47 | 48 | #Declare inputs 49 | zipcodefile = "nyc.csv" 50 | shapefile = "shapefile/nyc_zipcta.shp" 51 | #define colours 52 | #colours = {0:"#F7FCF0", 1:"#E0F3DB", 2:"#CCEBC5", 3:"#A8DDB5", 4:"#7BCCC4", 5:"#4EB3D3", 6:"#2B8CBE", 7:"#0868AC", 8:"#084081"} 53 | colours = {0:"#ffffff", 1:"#fcfcff", 2:"#ebecff", 3:"#ebecff", 4:"#dadcff", 5:"#c9ccff", 6:"#b8bcff", 7:"#a7acff", 8:"#969cff", 9:"#858cff", 10:"#747cff", 11:"#636cff", 12:"#525dff", 13:"#414dff", 14:"#303dff", 15:"#1f2dff", 16:"#0e1dff", 17:"#0010fc", 18:"#000feb", 19:"#000eda", 20:"#000dc9", 21:"#000bb8", 22:"#000aa7"} 54 | #colours = {0:"#F7FCF0", 1:"#F7FCF0", 2:"#E0F3DB", 3:"#E0F3DB", 4:"#CCEBC5", 5:"#CCEBC5", 6:"#A8DDB5", 7:"#7BCCC4", 8:"#4EB3D3", 9:"#2B8CBE", 10:"#0868AC", 11:"#084081"} 55 | #colours = {0:"", 1:"", 2:"", 3:"", 4:"", 5:"", 6:"", 7:"", 8:""} 56 | #colours = {0:"#FFF7EC", 1:"#FEE8C8", 2:"#FDD49E", 3:"#FDBB84", 4:"#FC8D59", 5:"#EF6548", 6:"#D7301F", 7:"#B30000", 8:"#7F0000"} 57 | 58 | # load the shapefile 59 | shpRecords = shpUtils.loadShapefile(shapefile) 60 | # load zipcodefile 61 | m = loadZipcode(zipcodefile) 62 | max = 0 63 | min = 1000 64 | for i in range(0,len(shpRecords)): 65 | zipcode = shpRecords[i]["dbf_data"]["ZCTA5CE00"] 66 | if m[zipcode] > max: 67 | max = m[zipcode] 68 | if m[zipcode] < min: 69 | min = m[zipcode] 70 | unit = (max-min)/22 71 | print max 72 | print min 73 | for key in m.keys(): 74 | m[key] = m[key] - min 75 | 76 | for i in range(0,len(shpRecords)): 77 | # x and y are empty lists to be populated with the coordinates of each geometry. 78 | x = [] 79 | y = [] 80 | for j in range(0,len(shpRecords[i]['shp_data']['parts'][0]['points'])): 81 | # This is the number of vertices in the ith geometry. 82 | # The parts list is [0] as it is singlepart. 83 | # get x and y coordinates. 84 | tempx = float(shpRecords[i]['shp_data']['parts'][0]['points'][j]['x']) 85 | tempy = float(shpRecords[i]['shp_data']['parts'][0]['points'][j]['y']) 86 | x.append(tempx) 87 | y.append(tempy) # Populate the lists 88 | 89 | # Creates a polygon in matplotlib for each geometry in the shapefile 90 | zipcode = shpRecords[i]["dbf_data"]["ZCTA5CE00"] 91 | if m.has_key(zipcode): 92 | colour = colours[m[zipcode]/unit] 93 | else: 94 | colour = colours[0] 95 | plt.fill(x, y, fc=colour, ec='0.7', lw=0.1) 96 | 97 | #Create legend 98 | p0 = plt.Rectangle((0, 0), 1, 1, fc=colours[0]) 99 | p1 = plt.Rectangle((0, 0), 1, 1, fc=colours[2]) 100 | p2 = plt.Rectangle((0, 0), 1, 1, fc=colours[4]) 101 | p3 = plt.Rectangle((0, 0), 1, 1, fc=colours[6]) 102 | p4 = plt.Rectangle((0, 0), 1, 1, fc=colours[8]) 103 | p5 = plt.Rectangle((0, 0), 1, 1, fc=colours[10]) 104 | p6 = plt.Rectangle((0, 0), 1, 1, fc=colours[12]) 105 | p7 = plt.Rectangle((0, 0), 1, 1, fc=colours[14]) 106 | p8 = plt.Rectangle((0, 0), 1, 1, fc=colours[16]) 107 | p9 = plt.Rectangle((0, 0), 1, 1, fc=colours[18]) 108 | p10 = plt.Rectangle((0, 0), 1, 1, fc=colours[20]) 109 | p11 = plt.Rectangle((0, 0), 1, 1, fc=colours[22]) 110 | extra = plt.Rectangle((0, 0), 1, 1, fc="w", fill=False, edgecolor='none', linewidth=0) 111 | #This legend will show the exact number 112 | plt.legend([p0,p1,p2,p3,p4,p5,p6,p7,p8,p9,p10,p11],\ 113 | ["0-%d records" %((2*unit-1)),\ 114 | "%d-%d records" %(2*unit,(4*unit-1)),\ 115 | "%d-%d records" %(4*unit,(6*unit-1)),\ 116 | "%d-%d records" %(6*unit,(8*unit-1)),\ 117 | "%d-%d records" %(8*unit,(10*unit-1)),\ 118 | "%d-%d records" %(10*unit,(12*unit-1)),\ 119 | "%d-%d records" %(12*unit,(14*unit-1)),\ 120 | "%d-%d records" %(14*unit,(16*unit-1)),\ 121 | "%d-%d records" %(16*unit,(18*unit-1)),\ 122 | "%d-%d records" %(18*unit,(20*unit-1)),\ 123 | "%d-%d records" %(20*unit,(22*unit-1)),\ 124 | ">%d records" %(22*unit)],\ 125 | prop={'size':8}, loc = 2) 126 | 127 | #The short version of legend: 128 | plt.legend([extra, p0,p1,p2,p3,p4,p5,p6,p7,p8,p9,p10,p11],\ 129 | ["Number of records",\ 130 | "0-%dk" %((2*unit/1000-1)),\ 131 | "%dk-%dk" %(2*unit/1000,(4*unit/1000-1)),\ 132 | "%dk-%dk" %(4*unit/1000,(6*unit/1000-1)),\ 133 | "%dk-%dk" %(6*unit/1000,(8*unit/1000-1)),\ 134 | "%dk-%dk" %(8*unit/1000,(10*unit/1000-1)),\ 135 | "%dk-%dk" %(10*unit/1000,(12*unit/1000-1)),\ 136 | "%dk-%dk" %(12*unit/1000,(14*unit/1000-1)),\ 137 | "%dk-%dk" %(14*unit/1000,(16*unit/1000-1)),\ 138 | "%dk-%dk" %(16*unit/1000,(18*unit/1000-1)),\ 139 | "%dk-%dk" %(18*unit/1000,(20*unit/1000-1)),\ 140 | "%dk-%dk" %(20*unit/1000,(22*unit/1000-1)),\ 141 | ">%dk" %(22*unit/1000)],\ 142 | prop={'size':10.6}, loc = 2) 143 | 144 | #plt.title("NYC ZipCode Overlap") 145 | plt.axis('off') 146 | plt.savefig('nycallzipcodeoverlap.jpg', format='jpg', dpi=700) 147 | plt.show() 148 | 149 | -------------------------------------------------------------------------------- /download/ids/edmonton_ids.txt: -------------------------------------------------------------------------------- 1 | sthd-gad4 2 | 62wr-c9ye 3 | cggb-hzzm 4 | yutc-c5ju 5 | 7gh5-bnbs 6 | htrg-yirr 7 | 5zeu-wkpv 8 | jyra-si4k 9 | ickf-52qg 10 | 65fr-66s6 11 | 7yqh-39tz 12 | h9sn-ds8j 13 | id8i-xwha 14 | 2d5a-esdr 15 | w4cu-waf8 16 | k29i-u25u 17 | qh6u-3haa 18 | febx-sqja 19 | ebvt-eg97 20 | ehbr-emhe 21 | qsmn-3w99 22 | c4nr-3quz 23 | 25b5-e682 24 | f2bf-5wqc 25 | 2n87-9vjk 26 | ih46-vxeq 27 | ieti-capx 28 | jn25-zspi 29 | ia7a-fuzq 30 | 83ud-pzgi 31 | gem5-v2v7 32 | zxf6-rv9a 33 | rm66-tzy5 34 | 7ndn-2x34 35 | xt3a-rjpj 36 | 8muu-jq8v 37 | mqxt-kvm4 38 | nmpv-7vqg 39 | 6avx-8i8e 40 | prfy-5m97 41 | nfvw-n5k3 42 | xjvb-fthb 43 | eecg-fc54 44 | 2rpu-peb3 45 | f6w2-hzex 46 | auxe-iahd 47 | yznh-9n2u 48 | kcuu-2yxg 49 | dpcz-nupn 50 | kaps-rk7z 51 | ykpk-qyvg 52 | eaag-ra2c 53 | 9j6k-uzig 54 | bwpb-bppt 55 | 2976-zu3f 56 | mbxg-7qmu 57 | kyhu-r33m 58 | mu9e-3mkm 59 | y29c-39y9 60 | nc6t-tngg 61 | a56e-tkzk 62 | znrg-tr8s 63 | 7isy-c8ka 64 | n7fj-2yfs 65 | ysgw-us2d 66 | v6pz-ntae 67 | gp5m-pueh 68 | 4vt2-8zrq 69 | gw7p-ee8r 70 | iysa-xg87 71 | hubs-ug3y 72 | dnbn-a9ny 73 | j7nr-ekvn 74 | 8xti-zu8p 75 | f6ne-aqna 76 | g57i-jmvw 77 | u6ue-anax 78 | e7aq-scxv 79 | gy8c-eq6u 80 | uwuq-3zq3 81 | 72aw-fq6m 82 | 6mre-4inz 83 | 9et8-3bg5 84 | es6z-qyb7 85 | 2mxf-2xyw 86 | 46fr-szhj 87 | c7ev-7y4u 88 | mst8-di4d 89 | 82ij-m8wd 90 | 3yki-4nkh 91 | ctwr-tvrd 92 | bfsu-5y9w 93 | h5ce-uubp 94 | qzrg-q4nv 95 | d577-xky7 96 | xsrd-4894 97 | 7fus-qa4r 98 | hnhf-yaps 99 | knai-w28i 100 | 5ayy-wxat 101 | vaiq-ubi5 102 | gajw-anue 103 | 2hvz-kffj 104 | fqnx-qdsk 105 | px79-vegt 106 | bqmh-j34s 107 | shh7-vzch 108 | punv-um3q 109 | si3b-qk24 110 | mvbx-i64u 111 | 2crc-aced 112 | gdd9-eqv9 113 | 9khp-yjaa 114 | b4y7-zhnz 115 | scjj-9qzz 116 | uytp-iqga 117 | 5xcn-q5zx 118 | 9hwi-bdju 119 | i32t-b6vf 120 | ei4u-794h 121 | wr39-6xm9 122 | j6uz-tjg8 123 | 9nqb-w48x 124 | tm8z-k466 125 | y7yb-jtjn 126 | dknb-ctqa 127 | bnuf-aarq 128 | 84rh-tbam 129 | 7rap-ipwf 130 | xzjj-3r8e 131 | xydx-f66g 132 | qdcb-svkz 133 | 4adt-tbf8 134 | beu6-4urm 135 | y4r5-xpku 136 | 9edk-tayw 137 | egz6-jdf3 138 | ggak-65yh 139 | jr4d-a5zs 140 | vmrg-prz6 141 | 6hiw-nih3 142 | iwjg-nq4w 143 | jruw-eduf 144 | t37t-2z9n 145 | xhpv-2y2t 146 | q78x-bpvt 147 | pwa8-zn84 148 | 375q-qr55 149 | 98hy-qgun 150 | 6gjh-jny3 151 | b58q-nxjr 152 | 57bz-3nsb 153 | fbs7-uv7y 154 | rjq9-ge5s 155 | 55vi-yv73 156 | g2ht-buzt 157 | 33re-ygv7 158 | ggrb-bzbj 159 | 4agm-wt9h 160 | 8h3r-5ys2 161 | f9i7-hdjq 162 | ks8u-4a6b 163 | ud9b-q65g 164 | p26r-vxf7 165 | nrf5-ighq 166 | x8j3-j3q3 167 | x2gc-xf9e 168 | 7yt8-7467 169 | tzuj-jcfp 170 | j5gy-p5g8 171 | my5s-s4eh 172 | 7njw-4e49 173 | 9isd-fcah 174 | df5n-u36u 175 | s2tr-4vrj 176 | r45z-99vd 177 | rgfm-xqag 178 | cjgx-qcup 179 | f2sy-bth7 180 | 692u-9tuj 181 | xgwu-c37w 182 | rh82-ntt9 183 | kffi-kb6e 184 | ddqk-i2ey 185 | gds5-6aiq 186 | nqs8-f6fc 187 | zmac-3mxq 188 | pkdk-9dwc 189 | jir4-uuhx 190 | myu9-qngm 191 | a76p-ee58 192 | bnfb-vu2t 193 | vbdj-jxmq 194 | npym-f8ef 195 | qt34-hwip 196 | bdis-rq5p 197 | djyx-z2bv 198 | b2ak-ut7u 199 | 87u8-3yfv 200 | qrrq-9bsx 201 | ykrg-bwt7 202 | paqn-uf4u 203 | mnwt-r49h 204 | kiry-88gi 205 | 65xu-w8px 206 | 2694-uced 207 | achm-af7d 208 | nekk-97cp 209 | ngd7-ejms 210 | nna3-34af 211 | 8svr-ivxz 212 | em7g-s625 213 | hk82-7wj7 214 | gjev-z5ji 215 | 8peh-czku 216 | igwr-fbps 217 | 3nma-d8m3 218 | tizy-3vkk 219 | nzgd-btmv 220 | mvki-i26q 221 | 4ev8-6z47 222 | 4mm2-9j7w 223 | 9s54-maam 224 | uwg3-tppw 225 | mpd9-nhrp 226 | i73h-tmq7 227 | da5j-rtis 228 | icpf-mjv6 229 | fftr-j9wv 230 | a5ef-84t3 231 | x7r5-d4hj 232 | wans-jfwv 233 | mgii-4cth 234 | 6w5t-p7n9 235 | 5md5-bka8 236 | kdc2-xufp 237 | 8s6g-dibx 238 | y9rm-5xha 239 | abvn-qkj9 240 | i2gf-u5vm 241 | h69z-r89y 242 | egfg-bqgy 243 | vp2f-wzyp 244 | ybx9-858a 245 | zh7a-ng4h 246 | uqac-iii2 247 | me84-5r83 248 | bms6-cuv9 249 | g4vh-839k 250 | pa5u-sgw4 251 | 3n37-c973 252 | 3qef-bvyj 253 | 63px-qewx 254 | 48sf-3wbh 255 | yyj6-c972 256 | 3dsx-jp6d 257 | csvn-zna4 258 | d54a-rnqn 259 | srfv-kt6f 260 | 62em-cru9 261 | rk7f-7aur 262 | n3b6-y2as 263 | uqbc-s85r 264 | 4bzd-pbh9 265 | n3qd-yamd 266 | 86gx-6tq9 267 | ike9-p5uk 268 | u6tw-de8t 269 | 7afr-kaaj 270 | 33jk-s9st 271 | btu7-6eug 272 | 2q9r-ang2 273 | 59j9-dv23 274 | 3qga-tuus 275 | 5ezz-fzka 276 | khtg-hjme 277 | wsq9-3pi7 278 | pubm-q7h7 279 | 7wta-wwze 280 | vni7-52xi 281 | vizs-ak74 282 | inrc-3sdk 283 | dep6-aymg 284 | u943-cjsi 285 | q9ik-kfw5 286 | kdh3-uyqv 287 | qujq-hb7j 288 | 4xre-g8wq 289 | vtgf-yvms 290 | 9j7i-47f8 291 | 3e3e-bpvz 292 | nzvx-6q5z 293 | chad-mx22 294 | u9uc-sdkn 295 | 7tzq-etnt 296 | ug6h-umbi 297 | hcdm-kjgn 298 | ht22-msjv 299 | ktbb-ft9x 300 | kbbe-7sj5 301 | y354-d5xg 302 | k5sh-vdtn 303 | qs7f-tjd2 304 | a5i7-qmwv 305 | 6wmn-5es3 306 | aszk-8atd 307 | xiut-24bw 308 | tjkh-4hzc 309 | 6sba-d8sc 310 | d8qh-bhiz 311 | fyy2-btx2 312 | 3b6m-fezs 313 | 595d-uejx 314 | n39a-jdzb 315 | i5mb-yzrn 316 | 62zc-p5yh 317 | qn6v-4dbh 318 | muf4-te7d 319 | whpx-8wpg 320 | dbkf-sdrd 321 | 8cqf-3hmz 322 | tgy2-3bts 323 | xxuz-7axm 324 | 4fa9-6zye 325 | xvh3-c5hd 326 | fvmf-h4id 327 | a225-i9ns 328 | w2ms-kfus 329 | ciib-ux6u 330 | 8ae2-m26m 331 | 4v56-4hak 332 | sgnx-e3u2 333 | 4ee7-dsf6 334 | gnse-cass 335 | mjk8-4ukd 336 | fjvg-9ez2 337 | 6x5f-8hj7 338 | cax4-b3pr 339 | htu3-4pz3 340 | c89k-cwcz 341 | isug-45sj 342 | cfh4-sh93 343 | ydv5-y4pu 344 | s993-cqfv 345 | afax-7r8v 346 | 7t29-dqaa 347 | 7yns-bcn4 348 | 2gjm-4p4u 349 | b6ng-fzk2 350 | cz52-j8kf 351 | bia2-hxpv 352 | ttdf-sm74 353 | p9vx-6egn 354 | rvwz-r2uq 355 | 7bc3-xv6c 356 | fib4-gufy 357 | 9xcd-grar 358 | 5h4y-jwsi 359 | bn8p-vmrc 360 | 6fc6-nb9b 361 | eua2-4g54 362 | i378-jjyk 363 | inh5-kwtp 364 | jtyk-xyvk 365 | iu2r-7x7v 366 | kq82-ivk4 367 | qhsy-xvcf 368 | 8z24-x5k8 369 | frxt-9vri 370 | dqag-kq5r 371 | q2gy-8hxm 372 | gqet-eavx 373 | fwq5-ux79 374 | prdj-dgnz 375 | y4rx-kdcn 376 | mhj4-e4bq 377 | vbxz-36ag 378 | smk2-dtnx 379 | mner-asqn 380 | qri7-6kh2 381 | tbpd-v3xm 382 | 2zm2-j9bx 383 | 3ang-jx8p 384 | 4adh-4bvw 385 | 5c4w-qa5z 386 | 8559-68u4 387 | 8dgt-s4f6 388 | 8ykz-eevg 389 | bbbg-3rfk 390 | bpwx-u8fh 391 | bvkc-z2zh 392 | c7fz-ay4i 393 | dxb6-mnqt 394 | e6xv-4pu8 395 | fvzv-dxut 396 | gzey-cwe5 397 | h9v5-2eis 398 | hedd-bhp6 399 | kga2-r2kk 400 | knbn-7s9q 401 | mqpu-hu3f 402 | nc9g-y3uy 403 | qa2a-xevx 404 | s32g-v2f9 405 | swue-sa5z 406 | um2u-tk2u 407 | vhdz-3ngk 408 | vkvi-vkp5 409 | x9fe-c4x6 410 | xyw3-igvf 411 | kgyy-nc79 412 | ref6-s87x 413 | iaa7-x8kk 414 | jy6g-t358 415 | 8ys2-jbnc 416 | tkwe-shaj 417 | c2gf-23xz 418 | heq4-cm4h 419 | jwmg-yacn 420 | mcgy-76ui 421 | gdcb-e7r7 422 | 5up9-65nx 423 | 4b36-t359 424 | ufaw-xtbh 425 | r52a-rz3g 426 | mg8q-us7m 427 | dvei-8sdh 428 | gv7r-t5a7 429 | 99wm-kyny 430 | 3pi9-b3rf 431 | enev-ph2s 432 | gv6h-cphf 433 | 7wwb-zx48 434 | g2nn-qd2k 435 | nqq8-ixbd 436 | s5d6-6x7q 437 | 7eaf-pe73 438 | 3qt5-kz7y 439 | vgvc-bpwz 440 | hhjj-czpq 441 | q4c4-5fu4 442 | wap3-zzbk 443 | s2r9-4htf 444 | tqgb-ivff 445 | m3rm-9ij3 446 | -------------------------------------------------------------------------------- /type_detection/ids/edmonton_ids.txt: -------------------------------------------------------------------------------- 1 | sthd-gad4 2 | 62wr-c9ye 3 | cggb-hzzm 4 | yutc-c5ju 5 | 7gh5-bnbs 6 | htrg-yirr 7 | 5zeu-wkpv 8 | jyra-si4k 9 | ickf-52qg 10 | 65fr-66s6 11 | 7yqh-39tz 12 | h9sn-ds8j 13 | id8i-xwha 14 | 2d5a-esdr 15 | w4cu-waf8 16 | k29i-u25u 17 | qh6u-3haa 18 | febx-sqja 19 | ebvt-eg97 20 | ehbr-emhe 21 | qsmn-3w99 22 | c4nr-3quz 23 | 25b5-e682 24 | f2bf-5wqc 25 | 2n87-9vjk 26 | ih46-vxeq 27 | ieti-capx 28 | jn25-zspi 29 | ia7a-fuzq 30 | 83ud-pzgi 31 | gem5-v2v7 32 | zxf6-rv9a 33 | rm66-tzy5 34 | 7ndn-2x34 35 | xt3a-rjpj 36 | 8muu-jq8v 37 | mqxt-kvm4 38 | nmpv-7vqg 39 | 6avx-8i8e 40 | prfy-5m97 41 | nfvw-n5k3 42 | xjvb-fthb 43 | eecg-fc54 44 | 2rpu-peb3 45 | f6w2-hzex 46 | auxe-iahd 47 | yznh-9n2u 48 | kcuu-2yxg 49 | dpcz-nupn 50 | kaps-rk7z 51 | ykpk-qyvg 52 | eaag-ra2c 53 | 9j6k-uzig 54 | bwpb-bppt 55 | 2976-zu3f 56 | mbxg-7qmu 57 | kyhu-r33m 58 | mu9e-3mkm 59 | y29c-39y9 60 | nc6t-tngg 61 | a56e-tkzk 62 | znrg-tr8s 63 | 7isy-c8ka 64 | n7fj-2yfs 65 | ysgw-us2d 66 | v6pz-ntae 67 | gp5m-pueh 68 | 4vt2-8zrq 69 | gw7p-ee8r 70 | iysa-xg87 71 | hubs-ug3y 72 | dnbn-a9ny 73 | j7nr-ekvn 74 | 8xti-zu8p 75 | f6ne-aqna 76 | g57i-jmvw 77 | u6ue-anax 78 | e7aq-scxv 79 | gy8c-eq6u 80 | uwuq-3zq3 81 | 72aw-fq6m 82 | 6mre-4inz 83 | 9et8-3bg5 84 | es6z-qyb7 85 | 2mxf-2xyw 86 | 46fr-szhj 87 | c7ev-7y4u 88 | mst8-di4d 89 | 82ij-m8wd 90 | 3yki-4nkh 91 | ctwr-tvrd 92 | bfsu-5y9w 93 | h5ce-uubp 94 | qzrg-q4nv 95 | d577-xky7 96 | xsrd-4894 97 | 7fus-qa4r 98 | hnhf-yaps 99 | knai-w28i 100 | 5ayy-wxat 101 | vaiq-ubi5 102 | gajw-anue 103 | 2hvz-kffj 104 | fqnx-qdsk 105 | px79-vegt 106 | bqmh-j34s 107 | shh7-vzch 108 | punv-um3q 109 | si3b-qk24 110 | mvbx-i64u 111 | 2crc-aced 112 | gdd9-eqv9 113 | 9khp-yjaa 114 | b4y7-zhnz 115 | scjj-9qzz 116 | uytp-iqga 117 | 5xcn-q5zx 118 | 9hwi-bdju 119 | i32t-b6vf 120 | ei4u-794h 121 | wr39-6xm9 122 | j6uz-tjg8 123 | 9nqb-w48x 124 | tm8z-k466 125 | y7yb-jtjn 126 | dknb-ctqa 127 | bnuf-aarq 128 | 84rh-tbam 129 | 7rap-ipwf 130 | xzjj-3r8e 131 | xydx-f66g 132 | qdcb-svkz 133 | 4adt-tbf8 134 | beu6-4urm 135 | y4r5-xpku 136 | 9edk-tayw 137 | egz6-jdf3 138 | ggak-65yh 139 | jr4d-a5zs 140 | vmrg-prz6 141 | 6hiw-nih3 142 | iwjg-nq4w 143 | jruw-eduf 144 | t37t-2z9n 145 | xhpv-2y2t 146 | q78x-bpvt 147 | pwa8-zn84 148 | 375q-qr55 149 | 98hy-qgun 150 | 6gjh-jny3 151 | b58q-nxjr 152 | 57bz-3nsb 153 | fbs7-uv7y 154 | rjq9-ge5s 155 | 55vi-yv73 156 | g2ht-buzt 157 | 33re-ygv7 158 | ggrb-bzbj 159 | 4agm-wt9h 160 | 8h3r-5ys2 161 | f9i7-hdjq 162 | ks8u-4a6b 163 | ud9b-q65g 164 | p26r-vxf7 165 | nrf5-ighq 166 | x8j3-j3q3 167 | x2gc-xf9e 168 | 7yt8-7467 169 | tzuj-jcfp 170 | j5gy-p5g8 171 | my5s-s4eh 172 | 7njw-4e49 173 | 9isd-fcah 174 | df5n-u36u 175 | s2tr-4vrj 176 | r45z-99vd 177 | rgfm-xqag 178 | cjgx-qcup 179 | f2sy-bth7 180 | 692u-9tuj 181 | xgwu-c37w 182 | rh82-ntt9 183 | kffi-kb6e 184 | ddqk-i2ey 185 | gds5-6aiq 186 | nqs8-f6fc 187 | zmac-3mxq 188 | pkdk-9dwc 189 | jir4-uuhx 190 | myu9-qngm 191 | a76p-ee58 192 | bnfb-vu2t 193 | vbdj-jxmq 194 | npym-f8ef 195 | qt34-hwip 196 | bdis-rq5p 197 | djyx-z2bv 198 | b2ak-ut7u 199 | 87u8-3yfv 200 | qrrq-9bsx 201 | ykrg-bwt7 202 | paqn-uf4u 203 | mnwt-r49h 204 | kiry-88gi 205 | 65xu-w8px 206 | 2694-uced 207 | achm-af7d 208 | nekk-97cp 209 | ngd7-ejms 210 | nna3-34af 211 | 8svr-ivxz 212 | em7g-s625 213 | hk82-7wj7 214 | gjev-z5ji 215 | 8peh-czku 216 | igwr-fbps 217 | 3nma-d8m3 218 | tizy-3vkk 219 | nzgd-btmv 220 | mvki-i26q 221 | 4ev8-6z47 222 | 4mm2-9j7w 223 | 9s54-maam 224 | uwg3-tppw 225 | mpd9-nhrp 226 | i73h-tmq7 227 | da5j-rtis 228 | icpf-mjv6 229 | fftr-j9wv 230 | a5ef-84t3 231 | x7r5-d4hj 232 | wans-jfwv 233 | mgii-4cth 234 | 6w5t-p7n9 235 | 5md5-bka8 236 | kdc2-xufp 237 | 8s6g-dibx 238 | y9rm-5xha 239 | abvn-qkj9 240 | i2gf-u5vm 241 | h69z-r89y 242 | egfg-bqgy 243 | vp2f-wzyp 244 | ybx9-858a 245 | zh7a-ng4h 246 | uqac-iii2 247 | me84-5r83 248 | bms6-cuv9 249 | g4vh-839k 250 | pa5u-sgw4 251 | 3n37-c973 252 | 3qef-bvyj 253 | 63px-qewx 254 | 48sf-3wbh 255 | yyj6-c972 256 | 3dsx-jp6d 257 | csvn-zna4 258 | d54a-rnqn 259 | srfv-kt6f 260 | 62em-cru9 261 | rk7f-7aur 262 | n3b6-y2as 263 | uqbc-s85r 264 | 4bzd-pbh9 265 | n3qd-yamd 266 | 86gx-6tq9 267 | ike9-p5uk 268 | u6tw-de8t 269 | 7afr-kaaj 270 | 33jk-s9st 271 | btu7-6eug 272 | 2q9r-ang2 273 | 59j9-dv23 274 | 3qga-tuus 275 | 5ezz-fzka 276 | khtg-hjme 277 | wsq9-3pi7 278 | pubm-q7h7 279 | 7wta-wwze 280 | vni7-52xi 281 | vizs-ak74 282 | inrc-3sdk 283 | dep6-aymg 284 | u943-cjsi 285 | q9ik-kfw5 286 | kdh3-uyqv 287 | qujq-hb7j 288 | 4xre-g8wq 289 | vtgf-yvms 290 | 9j7i-47f8 291 | 3e3e-bpvz 292 | nzvx-6q5z 293 | chad-mx22 294 | u9uc-sdkn 295 | 7tzq-etnt 296 | ug6h-umbi 297 | hcdm-kjgn 298 | ht22-msjv 299 | ktbb-ft9x 300 | kbbe-7sj5 301 | y354-d5xg 302 | k5sh-vdtn 303 | qs7f-tjd2 304 | a5i7-qmwv 305 | 6wmn-5es3 306 | aszk-8atd 307 | xiut-24bw 308 | tjkh-4hzc 309 | 6sba-d8sc 310 | d8qh-bhiz 311 | fyy2-btx2 312 | 3b6m-fezs 313 | 595d-uejx 314 | n39a-jdzb 315 | i5mb-yzrn 316 | 62zc-p5yh 317 | qn6v-4dbh 318 | muf4-te7d 319 | whpx-8wpg 320 | dbkf-sdrd 321 | 8cqf-3hmz 322 | tgy2-3bts 323 | xxuz-7axm 324 | 4fa9-6zye 325 | xvh3-c5hd 326 | fvmf-h4id 327 | a225-i9ns 328 | w2ms-kfus 329 | ciib-ux6u 330 | 8ae2-m26m 331 | 4v56-4hak 332 | sgnx-e3u2 333 | 4ee7-dsf6 334 | gnse-cass 335 | mjk8-4ukd 336 | fjvg-9ez2 337 | 6x5f-8hj7 338 | cax4-b3pr 339 | htu3-4pz3 340 | c89k-cwcz 341 | isug-45sj 342 | cfh4-sh93 343 | ydv5-y4pu 344 | s993-cqfv 345 | afax-7r8v 346 | 7t29-dqaa 347 | 7yns-bcn4 348 | 2gjm-4p4u 349 | b6ng-fzk2 350 | cz52-j8kf 351 | bia2-hxpv 352 | ttdf-sm74 353 | p9vx-6egn 354 | rvwz-r2uq 355 | 7bc3-xv6c 356 | fib4-gufy 357 | 9xcd-grar 358 | 5h4y-jwsi 359 | bn8p-vmrc 360 | 6fc6-nb9b 361 | eua2-4g54 362 | i378-jjyk 363 | inh5-kwtp 364 | jtyk-xyvk 365 | iu2r-7x7v 366 | kq82-ivk4 367 | qhsy-xvcf 368 | 8z24-x5k8 369 | frxt-9vri 370 | dqag-kq5r 371 | q2gy-8hxm 372 | gqet-eavx 373 | fwq5-ux79 374 | prdj-dgnz 375 | y4rx-kdcn 376 | mhj4-e4bq 377 | vbxz-36ag 378 | smk2-dtnx 379 | mner-asqn 380 | qri7-6kh2 381 | tbpd-v3xm 382 | 2zm2-j9bx 383 | 3ang-jx8p 384 | 4adh-4bvw 385 | 5c4w-qa5z 386 | 8559-68u4 387 | 8dgt-s4f6 388 | 8ykz-eevg 389 | bbbg-3rfk 390 | bpwx-u8fh 391 | bvkc-z2zh 392 | c7fz-ay4i 393 | dxb6-mnqt 394 | e6xv-4pu8 395 | fvzv-dxut 396 | gzey-cwe5 397 | h9v5-2eis 398 | hedd-bhp6 399 | kga2-r2kk 400 | knbn-7s9q 401 | mqpu-hu3f 402 | nc9g-y3uy 403 | qa2a-xevx 404 | s32g-v2f9 405 | swue-sa5z 406 | um2u-tk2u 407 | vhdz-3ngk 408 | vkvi-vkp5 409 | x9fe-c4x6 410 | xyw3-igvf 411 | kgyy-nc79 412 | ref6-s87x 413 | iaa7-x8kk 414 | jy6g-t358 415 | 8ys2-jbnc 416 | tkwe-shaj 417 | c2gf-23xz 418 | heq4-cm4h 419 | jwmg-yacn 420 | mcgy-76ui 421 | gdcb-e7r7 422 | 5up9-65nx 423 | 4b36-t359 424 | ufaw-xtbh 425 | r52a-rz3g 426 | mg8q-us7m 427 | dvei-8sdh 428 | gv7r-t5a7 429 | 99wm-kyny 430 | 3pi9-b3rf 431 | enev-ph2s 432 | gv6h-cphf 433 | 7wwb-zx48 434 | g2nn-qd2k 435 | nqq8-ixbd 436 | s5d6-6x7q 437 | 7eaf-pe73 438 | 3qt5-kz7y 439 | vgvc-bpwz 440 | hhjj-czpq 441 | q4c4-5fu4 442 | wap3-zzbk 443 | s2r9-4htf 444 | tqgb-ivff 445 | m3rm-9ij3 446 | -------------------------------------------------------------------------------- /type_detection/ijson/backends/python.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Pure-python parsing backend. 3 | ''' 4 | from __future__ import unicode_literals 5 | from decimal import Decimal 6 | import re 7 | from codecs import unicode_escape_decode 8 | 9 | from ijson import common 10 | from ijson.compat import chr 11 | 12 | 13 | BUFSIZE = 16 * 1024 14 | NONWS = re.compile(r'\S') 15 | LEXTERM = re.compile(r'[^a-z0-9\.+-]') 16 | 17 | 18 | class UnexpectedSymbol(common.JSONError): 19 | def __init__(self, symbol, reader): 20 | super(UnexpectedSymbol, self).__init__('Unexpected symbol "%s" at %d' % (symbol[0], reader.pos - len(symbol))) 21 | 22 | class Lexer(object): 23 | ''' 24 | JSON lexer. Supports iterator interface. 25 | ''' 26 | def __init__(self, f): 27 | self.f = f 28 | 29 | def __iter__(self): 30 | self.buffer = '' 31 | self.pos = 0 32 | return self 33 | 34 | def __next__(self): 35 | while True: 36 | match = NONWS.search(self.buffer, self.pos) 37 | if match: 38 | self.pos = match.start() 39 | char = self.buffer[self.pos] 40 | if 'a' <= char <= 'z' or '0' <= char <= '9' or char == '-': 41 | return self.lexem() 42 | elif char == '"': 43 | return self.stringlexem() 44 | else: 45 | self.pos += 1 46 | return char 47 | self.buffer = self.f.read(BUFSIZE).decode('utf-8') 48 | self.pos = 0 49 | if not len(self.buffer): 50 | raise StopIteration 51 | next = __next__ 52 | 53 | def lexem(self): 54 | current = self.pos 55 | while True: 56 | match = LEXTERM.search(self.buffer, current) 57 | if match: 58 | current = match.start() 59 | break 60 | else: 61 | current = len(self.buffer) 62 | self.buffer += self.f.read(BUFSIZE).decode('utf-8') 63 | if len(self.buffer) == current: 64 | break 65 | result = self.buffer[self.pos:current] 66 | self.pos = current 67 | if self.pos > BUFSIZE: 68 | self.buffer = self.buffer[self.pos:] 69 | self.pos = 0 70 | return result 71 | 72 | def stringlexem(self): 73 | start = self.pos + 1 74 | while True: 75 | try: 76 | end = self.buffer.index('"', start) 77 | escpos = end - 1 78 | while self.buffer[escpos] == '\\': 79 | escpos -= 1 80 | if (end - escpos) % 2 == 0: 81 | start = end + 1 82 | else: 83 | result = self.buffer[self.pos:end + 1] 84 | self.pos = end + 1 85 | return result 86 | except ValueError: 87 | old_len = len(self.buffer) 88 | self.buffer += self.f.read(BUFSIZE).decode('utf-8') 89 | if len(self.buffer) == old_len: 90 | raise common.IncompleteJSONError() 91 | 92 | def unescape(s): 93 | start = 0 94 | while start < len(s): 95 | pos = s.find('\\', start) 96 | if pos == -1: 97 | yield s[start:] 98 | break 99 | yield s[start:pos] 100 | pos += 1 101 | esc = s[pos] 102 | if esc == 'b': 103 | yield '\b' 104 | elif esc == 'f': 105 | yield '\f' 106 | elif esc == 'n': 107 | yield '\n' 108 | elif esc == 'r': 109 | yield '\r' 110 | elif esc == 't': 111 | yield '\t' 112 | elif esc == 'u': 113 | yield chr(int(s[pos + 1:pos + 5], 16)) 114 | pos += 4 115 | else: 116 | yield esc 117 | start = pos + 1 118 | 119 | def parse_value(lexer, symbol=None): 120 | try: 121 | if symbol is None: 122 | symbol = next(lexer) 123 | if symbol == 'null': 124 | yield ('null', None) 125 | elif symbol == 'true': 126 | yield ('boolean', True) 127 | elif symbol == 'false': 128 | yield ('boolean', False) 129 | elif symbol == '[': 130 | for event in parse_array(lexer): 131 | yield event 132 | elif symbol == '{': 133 | for event in parse_object(lexer): 134 | yield event 135 | elif symbol[0] == '"': 136 | yield ('string', ''.join(unescape(symbol[1:-1]))) 137 | else: 138 | try: 139 | number = Decimal(symbol) if '.' in symbol else int(symbol) 140 | yield ('number', number) 141 | except ValueError: 142 | raise UnexpectedSymbol(symbol, lexer) 143 | except StopIteration: 144 | raise common.IncompleteJSONError() 145 | 146 | def parse_array(lexer): 147 | yield ('start_array', None) 148 | symbol = next(lexer) 149 | if symbol != ']': 150 | while True: 151 | for event in parse_value(lexer, symbol): 152 | yield event 153 | symbol = next(lexer) 154 | if symbol == ']': 155 | break 156 | if symbol != ',': 157 | raise UnexpectedSymbol(symbol, lexer) 158 | symbol = next(lexer) 159 | yield ('end_array', None) 160 | 161 | def parse_object(lexer): 162 | yield ('start_map', None) 163 | symbol = next(lexer) 164 | if symbol != '}': 165 | while True: 166 | if symbol[0] != '"': 167 | raise UnexpectedSymbol(symbol, lexer) 168 | yield ('map_key', symbol[1:-1]) 169 | symbol = next(lexer) 170 | if symbol != ':': 171 | raise UnexpectedSymbol(symbol, lexer) 172 | for event in parse_value(lexer): 173 | yield event 174 | symbol = next(lexer) 175 | if symbol == '}': 176 | break 177 | if symbol != ',': 178 | raise UnexpectedSymbol(symbol, lexer) 179 | symbol = next(lexer) 180 | yield ('end_map', None) 181 | 182 | def basic_parse(file): 183 | ''' 184 | Iterator yielding unprefixed events. 185 | 186 | Parameters: 187 | 188 | - file: a readable file-like object with JSON input 189 | ''' 190 | lexer = iter(Lexer(file)) 191 | for value in parse_value(lexer): 192 | yield value 193 | try: 194 | next(lexer) 195 | except StopIteration: 196 | pass 197 | else: 198 | raise common.JSONError('Additional data') 199 | 200 | def parse(file): 201 | ''' 202 | Backend-specific wrapper for ijson.common.parse. 203 | ''' 204 | return common.parse(basic_parse(file)) 205 | 206 | def items(file, prefix): 207 | ''' 208 | Backend-specific wrapper for ijson.common.items. 209 | ''' 210 | return common.items(parse(file), prefix) 211 | -------------------------------------------------------------------------------- /extract_zipcode_latlon/ijson/backends/python.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Pure-python parsing backend. 3 | ''' 4 | from __future__ import unicode_literals 5 | from decimal import Decimal 6 | import re 7 | from codecs import unicode_escape_decode 8 | 9 | from ijson import common 10 | from ijson.compat import chr 11 | 12 | 13 | BUFSIZE = 16 * 1024 14 | NONWS = re.compile(r'\S') 15 | LEXTERM = re.compile(r'[^a-z0-9\.+-]') 16 | 17 | 18 | class UnexpectedSymbol(common.JSONError): 19 | def __init__(self, symbol, reader): 20 | super(UnexpectedSymbol, self).__init__('Unexpected symbol "%s" at %d' % (symbol[0], reader.pos - len(symbol))) 21 | 22 | class Lexer(object): 23 | ''' 24 | JSON lexer. Supports iterator interface. 25 | ''' 26 | def __init__(self, f): 27 | self.f = f 28 | 29 | def __iter__(self): 30 | self.buffer = '' 31 | self.pos = 0 32 | return self 33 | 34 | def __next__(self): 35 | while True: 36 | match = NONWS.search(self.buffer, self.pos) 37 | if match: 38 | self.pos = match.start() 39 | char = self.buffer[self.pos] 40 | if 'a' <= char <= 'z' or '0' <= char <= '9' or char == '-': 41 | return self.lexem() 42 | elif char == '"': 43 | return self.stringlexem() 44 | else: 45 | self.pos += 1 46 | return char 47 | self.buffer = self.f.read(BUFSIZE).decode('utf-8') 48 | self.pos = 0 49 | if not len(self.buffer): 50 | raise StopIteration 51 | next = __next__ 52 | 53 | def lexem(self): 54 | current = self.pos 55 | while True: 56 | match = LEXTERM.search(self.buffer, current) 57 | if match: 58 | current = match.start() 59 | break 60 | else: 61 | current = len(self.buffer) 62 | self.buffer += self.f.read(BUFSIZE).decode('utf-8') 63 | if len(self.buffer) == current: 64 | break 65 | result = self.buffer[self.pos:current] 66 | self.pos = current 67 | if self.pos > BUFSIZE: 68 | self.buffer = self.buffer[self.pos:] 69 | self.pos = 0 70 | return result 71 | 72 | def stringlexem(self): 73 | start = self.pos + 1 74 | while True: 75 | try: 76 | end = self.buffer.index('"', start) 77 | escpos = end - 1 78 | while self.buffer[escpos] == '\\': 79 | escpos -= 1 80 | if (end - escpos) % 2 == 0: 81 | start = end + 1 82 | else: 83 | result = self.buffer[self.pos:end + 1] 84 | self.pos = end + 1 85 | return result 86 | except ValueError: 87 | old_len = len(self.buffer) 88 | self.buffer += self.f.read(BUFSIZE).decode('utf-8') 89 | if len(self.buffer) == old_len: 90 | raise common.IncompleteJSONError() 91 | 92 | def unescape(s): 93 | start = 0 94 | while start < len(s): 95 | pos = s.find('\\', start) 96 | if pos == -1: 97 | yield s[start:] 98 | break 99 | yield s[start:pos] 100 | pos += 1 101 | esc = s[pos] 102 | if esc == 'b': 103 | yield '\b' 104 | elif esc == 'f': 105 | yield '\f' 106 | elif esc == 'n': 107 | yield '\n' 108 | elif esc == 'r': 109 | yield '\r' 110 | elif esc == 't': 111 | yield '\t' 112 | elif esc == 'u': 113 | yield chr(int(s[pos + 1:pos + 5], 16)) 114 | pos += 4 115 | else: 116 | yield esc 117 | start = pos + 1 118 | 119 | def parse_value(lexer, symbol=None): 120 | try: 121 | if symbol is None: 122 | symbol = next(lexer) 123 | if symbol == 'null': 124 | yield ('null', None) 125 | elif symbol == 'true': 126 | yield ('boolean', True) 127 | elif symbol == 'false': 128 | yield ('boolean', False) 129 | elif symbol == '[': 130 | for event in parse_array(lexer): 131 | yield event 132 | elif symbol == '{': 133 | for event in parse_object(lexer): 134 | yield event 135 | elif symbol[0] == '"': 136 | yield ('string', ''.join(unescape(symbol[1:-1]))) 137 | else: 138 | try: 139 | number = Decimal(symbol) if '.' in symbol else int(symbol) 140 | yield ('number', number) 141 | except ValueError: 142 | raise UnexpectedSymbol(symbol, lexer) 143 | except StopIteration: 144 | raise common.IncompleteJSONError() 145 | 146 | def parse_array(lexer): 147 | yield ('start_array', None) 148 | symbol = next(lexer) 149 | if symbol != ']': 150 | while True: 151 | for event in parse_value(lexer, symbol): 152 | yield event 153 | symbol = next(lexer) 154 | if symbol == ']': 155 | break 156 | if symbol != ',': 157 | raise UnexpectedSymbol(symbol, lexer) 158 | symbol = next(lexer) 159 | yield ('end_array', None) 160 | 161 | def parse_object(lexer): 162 | yield ('start_map', None) 163 | symbol = next(lexer) 164 | if symbol != '}': 165 | while True: 166 | if symbol[0] != '"': 167 | raise UnexpectedSymbol(symbol, lexer) 168 | yield ('map_key', symbol[1:-1]) 169 | symbol = next(lexer) 170 | if symbol != ':': 171 | raise UnexpectedSymbol(symbol, lexer) 172 | for event in parse_value(lexer): 173 | yield event 174 | symbol = next(lexer) 175 | if symbol == '}': 176 | break 177 | if symbol != ',': 178 | raise UnexpectedSymbol(symbol, lexer) 179 | symbol = next(lexer) 180 | yield ('end_map', None) 181 | 182 | def basic_parse(file): 183 | ''' 184 | Iterator yielding unprefixed events. 185 | 186 | Parameters: 187 | 188 | - file: a readable file-like object with JSON input 189 | ''' 190 | lexer = iter(Lexer(file)) 191 | for value in parse_value(lexer): 192 | yield value 193 | try: 194 | next(lexer) 195 | except StopIteration: 196 | pass 197 | else: 198 | raise common.JSONError('Additional data') 199 | 200 | def parse(file): 201 | ''' 202 | Backend-specific wrapper for ijson.common.parse. 203 | ''' 204 | return common.parse(basic_parse(file)) 205 | 206 | def items(file, prefix): 207 | ''' 208 | Backend-specific wrapper for ijson.common.items. 209 | ''' 210 | return common.items(parse(file), prefix) 211 | -------------------------------------------------------------------------------- /extract_zipcode_latlon/collect_data.py: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | ## 3 | ## Copyright (C) 2014, New York University. 4 | ## All rights reserved. 5 | ## Contact: kien.pham@nyu.edu 6 | ## 7 | ## "Redistribution and use in source and binary forms, with or without 8 | ## modification, are permitted provided that the following conditions are met: 9 | ## 10 | ## - Redistributions of source code must retain the above copyright notice, 11 | ## this list of conditions and the following disclaimer. 12 | ## - Redistributions in binary form must reproduce the above copyright 13 | ## notice, this list of conditions and the following disclaimer in the 14 | ## documentation and/or other materials provided with the distribution. 15 | ## - Neither the name of New York University nor the names of its 16 | ## contributors may be used to endorse or promote products derived from 17 | ## this software without specific prior written permission. 18 | ## 19 | ## THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | ## AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 21 | ## THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 | ## PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR 23 | ## CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 24 | ## EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 25 | ## PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 26 | ## OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 27 | ## WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 28 | ## OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 29 | ## ADVISED OF THE POSSIBILITY OF SUCH DAMAGE." 30 | ## 31 | ############################################################################### 32 | 33 | # -*- coding:utf-8 -*- 34 | from __future__ import unicode_literals 35 | import json 36 | import sys 37 | import ijson 38 | import codecs 39 | import re 40 | import os.path 41 | 42 | def get_zipcode(f, id2index): 43 | with open(f) as lines: 44 | for line in lines: 45 | a = line.strip("\n").split("\t") 46 | id = a[0] 47 | index_list = [] 48 | for i in range(1,len(a)): 49 | if i%2 == 0: 50 | index_list.append(int(a[i])) 51 | if id2index.has_key(id): 52 | id2index[id][0] = index_list 53 | else: 54 | id2index[id] = [index_list, [], []] #first list contains zipcode, second list contains latlon, third list contains time 55 | 56 | def get_latlon(f, id2index): 57 | lat = ["latitude", "x", "lat_dd_wgs84", "location_x", "centroidx", "coordinates", "lat", "location", "_lit_lat", "_south", "stop_lat", "building_latitude", "centroid_latitude", "intptlat", "intptlat10", "xpos", "_47_564727820", "x"] 58 | lon = ["longitude", "y", "lon_dd_wgs84", "location_y", "centroidy", "coordinates", "lon", "location", "_lit_lon", "_west", "stop_lon", "building_longitude", "centroid_longitude", "intptlon", "intptlat10", "ypos", "_122_363840492", "y"] 59 | with open(f) as lines: 60 | for line in lines: 61 | a = line.strip("\n").split("\t") 62 | id = a[0] 63 | index_list = [-1, -1] #first element is lat, second one is lon 64 | pre_item = "" # 65 | for i in range(1,len(a)): 66 | if (index_list[0] != -1) & (index_list[1] != -1): #if we already found latitude and longitude index => stop checking 67 | break 68 | if pre_item == "": 69 | if a[i] in lat : 70 | pre_item = "latitude" 71 | continue 72 | if a[i] in lon: 73 | pre_item = "longitude" 74 | continue 75 | if pre_item == "latitude": 76 | index_list[0] = int(a[i]) 77 | pre_item = "" 78 | continue 79 | if pre_item == "longitude": 80 | index_list[1] = int(a[i]) 81 | pre_item = "" 82 | continue 83 | 84 | if (index_list[0] != -1) & (index_list[1] != -1): 85 | if id2index.has_key(id): 86 | id2index[id][1] = index_list 87 | else: 88 | id2index[id] = [[], index_list, []] #first list contains zipcode, second list contains latlon, third list contains time 89 | 90 | def get_data(output_path, data_path, city, id, id2index): 91 | #Open files to write 92 | #OUTPUT: *zipcode.csv and *latlon.csv store list of zipcode and lat/long respectively 93 | zipcode_file = open(output_path + "/" + city + "_" + id + "-zipcode.txt", "w") 94 | latlon_file = open(output_path + "/" + city + "_" + id + "-latlon.txt", "w") 95 | 96 | #Initialize sets. Set contains distinct values 97 | zipcode_set = [] 98 | latlon_set = [] 99 | 100 | index = id2index[id] 101 | zipcode_index = index[0] 102 | latlon_index = index[1] 103 | 104 | content = open(data_path + "/" + id + ".json") 105 | data = ijson.items(content, 'data.item') 106 | Zipcode = re.compile('([\d]{5})') 107 | try: 108 | for item in data: 109 | item = item[8:] 110 | if len(zipcode_index) > 0: #If there is zipcode attribute 111 | for i in zipcode_index:#for each dataset, there could be more than row containing zipcode 112 | if item[i]: #if value is not None 113 | match_zipcode = Zipcode.search(str(item[i])) 114 | if match_zipcode: 115 | zipcode = match_zipcode.group(1) 116 | zipcode_set.append(zipcode) 117 | elif len(latlon_index) == 2: #if there are lat/lon attributes 118 | lat = item[latlon_index[0]] 119 | lon = item[latlon_index[1]] 120 | if (lat != None) & (lon != None): # if values are not None 121 | latlon = lat + "," + lon 122 | latlon_set.append(latlon) 123 | except: 124 | print id + "\tException" 125 | #Write to file 126 | if len(zipcode_set) > 0: 127 | for item in zipcode_set: 128 | try: 129 | zipcode_file.write(item + "\n") 130 | except: 131 | print id + "\tException" 132 | continue 133 | if len(latlon_set) > 0: 134 | for item in latlon_set: 135 | try: 136 | latlon_file.write(item + "\n") 137 | except: 138 | print id + "\tException" 139 | continue 140 | zipcode_file.close() 141 | latlon_file.close() 142 | 143 | def main(argv): 144 | if len(argv) != 3: 145 | print "The program takes 3 arguments, " + str(len(argv)) + " is given." 146 | return 147 | 148 | city = argv[0] #City name 149 | data_path = argv[1] #Directory that store JSON files 150 | output_path = argv[2] #Directory that stores result 151 | 152 | zipcode_file = "index/" + city + "_zipcode_index.txt" 153 | latlon_file = "index/" + city + "_latlon_index.txt" 154 | print zipcode_file 155 | print latlon_file 156 | if (os.path.isfile(zipcode_file)) & (os.path.isfile(latlon_file)): 157 | id2index = {} #Each id is mapped to 3 lists. first list contains zipcode, second list contains latlon, third list contains time 158 | get_zipcode(zipcode_file, id2index) 159 | get_latlon(latlon_file, id2index) 160 | for id in id2index: 161 | get_data(output_path, data_path, city, id, id2index) 162 | 163 | if __name__=="__main__": 164 | main(sys.argv[1:]) 165 | -------------------------------------------------------------------------------- /latlon_to_zipcode/KdTreeBB.hpp: -------------------------------------------------------------------------------- 1 | //############################################################################# 2 | // 3 | // Copyright (C) 2014, New York University. 4 | // All rights reserved. 5 | // Contact: huy.vo@nyu.edu, kien.pham@nyu.edu 6 | // 7 | // "Redistribution and use in source and binary forms, with or without 8 | // modification, are permitted provided that the following conditions are met: 9 | // 10 | // - Redistributions of source code must retain the above copyright notice, 11 | // this list of conditions and the following disclaimer. 12 | // - Redistributions in binary form must reproduce the above copyright 13 | // notice, this list of conditions and the following disclaimer in the 14 | // documentation and/or other materials provided with the distribution. 15 | // - Neither the name of New York University nor the names of its 16 | // contributors may be used to endorse or promote products derived from 17 | // this software without specific prior written permission. 18 | // 19 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 21 | // THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 | // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR 23 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 24 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 25 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 26 | // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 27 | // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 28 | // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 29 | // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE." 30 | // 31 | //############################################################################# 32 | 33 | 34 | #ifndef KD_TREE_BB_HPP 35 | #define KD_TREE_BB_HPP 36 | 37 | #include 38 | #include 39 | #include 40 | #include 41 | #include 42 | #include 43 | #include 44 | #include 45 | 46 | class KdTreeBB 47 | { 48 | public: 49 | #pragma pack(push, 1) 50 | struct Item { 51 | float bbox[2][2]; 52 | void *data; 53 | }; 54 | 55 | struct KdNode { 56 | uint32_t child_node; 57 | float leftBounds[2]; 58 | float rightBounds[2]; 59 | }; 60 | #pragma pack(pop) 61 | 62 | static bool dimIntersect(const float a[2], const float b[2]) { 63 | return std::max(a[0], b[0])<=std::min(a[1], b[1]); 64 | } 65 | 66 | struct Query 67 | { 68 | Query() { 69 | for (int i=0; i<2; i++) { 70 | this->bounds[i][0] = -FLT_MAX; 71 | this->bounds[i][1] = FLT_MAX; 72 | } 73 | } 74 | 75 | void setViewport(float left, float bottom, float right, float top) 76 | { 77 | this->bounds[0][0] = left; 78 | this->bounds[0][1] = right; 79 | this->bounds[1][0] = top; 80 | this->bounds[1][1] = bottom; 81 | } 82 | 83 | bool isMatched(const Item *item) const 84 | { 85 | for (int i=0; i<2; i++) 86 | if (!dimIntersect(item->bbox[i], this->bounds[i])) 87 | return false; 88 | return true; 89 | } 90 | 91 | float bounds[2][2]; 92 | }; 93 | 94 | typedef std::vector QueryResult; 95 | 96 | public: 97 | 98 | KdTreeBB() 99 | { 100 | } 101 | 102 | void query(const Query &q, QueryResult &result) const { 103 | searchKdTree(this->nodes.data(), 0, 0, q, result); 104 | } 105 | 106 | void createKdTree(Item *items, int n) { 107 | this->maxDepth = 0; 108 | this->nodes.resize(std::max(n*4,1)); 109 | float *tmp = (float*)malloc(sizeof(float)*n); 110 | int freeNode = 1; 111 | buildKdTree(this->nodes.data(), tmp, items, n, 0, 0, freeNode); 112 | fprintf(stderr, "Created a Kd tree for bounding boxes with %d nodes and depth %d.\n", freeNode, this->maxDepth); 113 | free(tmp); 114 | } 115 | 116 | private: 117 | std::vector nodes; 118 | int maxDepth; 119 | 120 | void buildKdTree(KdNode *nodes, float *tmp, Item *items, int n, int depth, int thisNode, int &freeNode) { 121 | KdNode *node = nodes + thisNode; 122 | int keyIndex = depth%2; 123 | if (n==0) { 124 | node->child_node = -1; 125 | return; 126 | } 127 | if (n<2) { 128 | node->child_node = 0; 129 | node->leftBounds[0] = items->bbox[keyIndex][0]; 130 | node->leftBounds[1] = items->bbox[keyIndex][1]; 131 | *((void**)(node->rightBounds)) = items->data; 132 | if (this->maxDepthmaxDepth = depth; 134 | return; 135 | } 136 | int medianIndex = n/2-1; 137 | for (size_t i=0; i=0 && items[r].bbox[keyIndex][0]>median) r--; 146 | if (lleftBounds[0] = node->rightBounds[0] = FLT_MAX; 153 | node->leftBounds[1] = node->rightBounds[1] = -FLT_MAX; 154 | 155 | for (unsigned i=0; i<=medianIndex; i++) { 156 | if (items[i].bbox[keyIndex][0]leftBounds[0]) 157 | node->leftBounds[0] = items[i].bbox[keyIndex][0]; 158 | if (items[i].bbox[keyIndex][1]>node->leftBounds[1]) 159 | node->leftBounds[1] = items[i].bbox[keyIndex][1]; 160 | } 161 | 162 | for (unsigned i=medianIndex+1; irightBounds[0]) 164 | node->rightBounds[0] = items[i].bbox[keyIndex][0]; 165 | if (items[i].bbox[keyIndex][1]>node->rightBounds[1]) 166 | node->rightBounds[1] = items[i].bbox[keyIndex][1]; 167 | } 168 | 169 | node->child_node = freeNode; 170 | freeNode += 2; 171 | buildKdTree(nodes, tmp, items, medianIndex+1, depth+1, node->child_node, freeNode); 172 | if (medianIndexchild_node+1, freeNode); 175 | else 176 | nodes[node->child_node+1].child_node = -1; 177 | } 178 | 179 | void searchKdTree(const KdNode *nodes, uint32_t root, int depth, const Query &query, QueryResult &result) const { 180 | const KdNode *node = nodes + root; 181 | int rangeIndex = depth%2; 182 | 183 | if (node->child_node==-1) { 184 | return; 185 | } 186 | if (node->child_node==0) { 187 | if (dimIntersect(node->leftBounds, query.bounds[rangeIndex])) 188 | { 189 | //fprintf(stderr, "%d\n", *(*(int**)(node->rightBounds))); 190 | result.push_back(*(*(int**)(node->rightBounds))); 191 | } 192 | //report(*((void**)(node->rightBounds))); 193 | return; 194 | } 195 | if (dimIntersect(node->leftBounds, query.bounds[rangeIndex])) 196 | searchKdTree(nodes, node->child_node, depth+1, query, result); 197 | if (dimIntersect(node->rightBounds, query.bounds[rangeIndex])) 198 | searchKdTree(nodes, node->child_node+1, depth+1, query, result); 199 | } 200 | }; 201 | 202 | #endif 203 | --------------------------------------------------------------------------------