├── metadata
├── README.txt
├── run.sh
├── urls.txt
└── get_metadata.py
├── extract_zipcode_latlon
├── README.txt
├── index
│ ├── raleigh_zipcode_index.txt
│ ├── boston_latlon_index.txt
│ ├── seattle_zipcode_index.txt
│ ├── kcmo_latlon_index.txt
│ ├── austin_latlon_index.txt
│ ├── austin_zipcode_index.txt
│ ├── sf_latlon_index.txt
│ ├── nyc_latlon_index.txt
│ ├── boston_zipcode_index.txt
│ ├── baltimore_zipcode_index.txt
│ ├── chicago_latlon_index.txt
│ ├── chicago_zipcode_index.txt
│ ├── sf_zipcode_index.txt
│ └── kcmo_zipcode_index.txt
├── ijson
│ ├── compat.py
│ ├── backends
│ │ ├── __init__.py
│ │ ├── yajl.py
│ │ ├── yajl2.py
│ │ └── python.py
│ ├── __init__.py
│ ├── utils.py
│ └── common.py
├── run.sh
├── city_list.txt
└── collect_data.py
├── tagcloud
├── data
│ └── skipwords.txt
├── README.txt
└── tagcloud.r
├── latlon_to_zipcode
├── Makefile
├── zipcode
├── convert.sh
├── convert_points.py
├── README.txt
├── convert_shapefile_to_bboxes.py
├── main.cpp
├── Neighborhoods.hpp
└── KdTreeBB.hpp
├── linechart
├── timeline.png
├── timeline_year.png
├── date2count.csv
├── timeline.py
└── timeline_year.py
├── heatmap
├── shapefile
│ ├── nyc_zipcta.dbf
│ ├── nyc_zipcta.sbn
│ ├── nyc_zipcta.sbx
│ ├── nyc_zipcta.shp
│ ├── nyc_zipcta.shx
│ ├── Neighboorhoods.dbf
│ ├── Neighboorhoods.sbn
│ ├── Neighboorhoods.sbx
│ ├── Neighboorhoods.shp
│ ├── Neighboorhoods.shx
│ ├── Neighboorhoods.prj
│ └── nyc_zipcta.prj
├── README.txt
├── dbfUtils.py
├── chicago.py
└── nyc.py
├── download
├── ids
│ ├── deleon_ids.txt
│ ├── redmond_ids.txt
│ ├── wellington_ids.txt
│ ├── honolulu_ids.txt
│ ├── nola_ids.txt
│ ├── slc_ids.txt
│ ├── madison_ids.txt
│ ├── weatherford_ids.txt
│ ├── oaklandnet_ids.txt
│ ├── somervillema_ids.txt
│ ├── boston_ids.txt
│ ├── austin_ids.txt
│ └── edmonton_ids.txt
├── download_json_sf.sh
├── download_json_kcmo.sh
├── download_json_nola.sh
├── download_json_slc.sh
├── download_json_nyc.sh
├── download_json_redmond.sh
├── download_json_seattle.sh
├── download_json_austintexas.sh
├── download_json_boston.sh
├── download_json_chicago.sh
├── download_json_deleon.sh
├── download_json_edmonton.sh
├── download_json_honolulu.sh
├── download_json_raleighnc.sh
├── download_json_madison.sh
├── download_json_oaklandnet.sh
├── download_json_wellingtonfl.sh
├── download_json_baltimorecity.sh
├── download_json_somervillema.sh
├── download_json_weatherfordtx.sh
└── README.txt
├── type_detection
├── ids
│ ├── deleon_ids.txt
│ ├── redmond_ids.txt
│ ├── wellington_ids.txt
│ ├── honolulu_ids.txt
│ ├── nola_ids.txt
│ ├── slc_ids.txt
│ ├── madison_ids.txt
│ ├── weatherford_ids.txt
│ ├── oaklandnet_ids.txt
│ ├── somervillema_ids.txt
│ ├── boston_ids.txt
│ ├── austin_ids.txt
│ └── edmonton_ids.txt
├── README.txt
├── ijson
│ ├── compat.py
│ ├── backends
│ │ ├── __init__.py
│ │ ├── yajl.py
│ │ ├── yajl2.py
│ │ └── python.py
│ ├── __init__.py
│ ├── utils.py
│ └── common.py
├── city_list.txt
├── run.sh
├── detect.py
├── zipcode.txt
└── sample.py
├── barchart
├── README.txt
├── time.csv
├── loc.csv
├── time_loc_number.csv
├── barchart_loc.py
├── barchart_time_loc_num.py
└── barchart_time.py
├── schema_similarity
├── run.sh
├── city_list.txt
└── schema_similarity.py
├── README.md
└── matrix_heatmap
├── boston.html
└── nyc_no311.html
/metadata/README.txt:
--------------------------------------------------------------------------------
1 | HOWTORUN:
2 | $./run.sh
3 |
--------------------------------------------------------------------------------
/extract_zipcode_latlon/README.txt:
--------------------------------------------------------------------------------
1 | HOWTORUN:
2 | $./run.sh
3 |
--------------------------------------------------------------------------------
/tagcloud/data/skipwords.txt:
--------------------------------------------------------------------------------
1 | tif
2 | kml
3 | kmz
4 | gis
5 |
--------------------------------------------------------------------------------
/latlon_to_zipcode/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 | g++ -o zipcode main.cpp
3 |
--------------------------------------------------------------------------------
/extract_zipcode_latlon/index/raleigh_zipcode_index.txt:
--------------------------------------------------------------------------------
1 | 4uh5-z7g3 addrzipcod 20
2 |
--------------------------------------------------------------------------------
/linechart/timeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VIDA-NYU/urban-data-study/HEAD/linechart/timeline.png
--------------------------------------------------------------------------------
/latlon_to_zipcode/zipcode:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VIDA-NYU/urban-data-study/HEAD/latlon_to_zipcode/zipcode
--------------------------------------------------------------------------------
/linechart/timeline_year.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VIDA-NYU/urban-data-study/HEAD/linechart/timeline_year.png
--------------------------------------------------------------------------------
/heatmap/shapefile/nyc_zipcta.dbf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VIDA-NYU/urban-data-study/HEAD/heatmap/shapefile/nyc_zipcta.dbf
--------------------------------------------------------------------------------
/heatmap/shapefile/nyc_zipcta.sbn:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VIDA-NYU/urban-data-study/HEAD/heatmap/shapefile/nyc_zipcta.sbn
--------------------------------------------------------------------------------
/heatmap/shapefile/nyc_zipcta.sbx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VIDA-NYU/urban-data-study/HEAD/heatmap/shapefile/nyc_zipcta.sbx
--------------------------------------------------------------------------------
/heatmap/shapefile/nyc_zipcta.shp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VIDA-NYU/urban-data-study/HEAD/heatmap/shapefile/nyc_zipcta.shp
--------------------------------------------------------------------------------
/heatmap/shapefile/nyc_zipcta.shx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VIDA-NYU/urban-data-study/HEAD/heatmap/shapefile/nyc_zipcta.shx
--------------------------------------------------------------------------------
/heatmap/shapefile/Neighboorhoods.dbf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VIDA-NYU/urban-data-study/HEAD/heatmap/shapefile/Neighboorhoods.dbf
--------------------------------------------------------------------------------
/heatmap/shapefile/Neighboorhoods.sbn:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VIDA-NYU/urban-data-study/HEAD/heatmap/shapefile/Neighboorhoods.sbn
--------------------------------------------------------------------------------
/heatmap/shapefile/Neighboorhoods.sbx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VIDA-NYU/urban-data-study/HEAD/heatmap/shapefile/Neighboorhoods.sbx
--------------------------------------------------------------------------------
/heatmap/shapefile/Neighboorhoods.shp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VIDA-NYU/urban-data-study/HEAD/heatmap/shapefile/Neighboorhoods.shp
--------------------------------------------------------------------------------
/heatmap/shapefile/Neighboorhoods.shx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VIDA-NYU/urban-data-study/HEAD/heatmap/shapefile/Neighboorhoods.shx
--------------------------------------------------------------------------------
/latlon_to_zipcode/convert.sh:
--------------------------------------------------------------------------------
1 | mkdir converted_shapefile
2 | python convert_shapefile_to_bboxes.py
3 | python convert_points.py
4 |
--------------------------------------------------------------------------------
/heatmap/README.txt:
--------------------------------------------------------------------------------
1 | HOWTORUN:
2 | + $python nyc.py
3 | Generate the heatmap for NYC
4 | + $python chicago.py
5 | Generate the heatmap for Chicago
6 |
--------------------------------------------------------------------------------
/download/ids/deleon_ids.txt:
--------------------------------------------------------------------------------
1 | rv3w-5qsf
2 | igce-t68e
3 | 2z9y-pnc7
4 | cts4-q984
5 | ehs9-mzvg
6 | xbf3-af3b
7 | k37x-psqp
8 | 2vcj-mxa2
9 | 299w-4uik
10 |
--------------------------------------------------------------------------------
/type_detection/ids/deleon_ids.txt:
--------------------------------------------------------------------------------
1 | rv3w-5qsf
2 | igce-t68e
3 | 2z9y-pnc7
4 | cts4-q984
5 | ehs9-mzvg
6 | xbf3-af3b
7 | k37x-psqp
8 | 2vcj-mxa2
9 | 299w-4uik
10 |
--------------------------------------------------------------------------------
/metadata/run.sh:
--------------------------------------------------------------------------------
1 | OUTPUT_PATH="data"
2 | mkdir $OUTPUT_PATH
3 | cat urls.txt | while read LINE
4 | do
5 | echo $LINE
6 | python get_metadata.py $LINE $OUTPUT_PATH
7 | done
8 |
--------------------------------------------------------------------------------
/type_detection/README.txt:
--------------------------------------------------------------------------------
1 | HOWTORUN:
2 | + Add information to city_list.txt, which stores and paths to JSON files. The format of each line is cityname + "\t" + path
3 | + Run $./run.sh
4 |
--------------------------------------------------------------------------------
/extract_zipcode_latlon/index/boston_latlon_index.txt:
--------------------------------------------------------------------------------
1 | 9yb5-8pvg location_x 23 location_y 24
2 | 9tfg-3jic coordinates 4
3 | snj3-z8hh coordinates 5
4 | 23yb-cufe centroidx 2 centroidy 3
5 | 7idu-4tds coordinates 4
6 | ekiy-2qmz coordinates 5
7 |
--------------------------------------------------------------------------------
/barchart/README.txt:
--------------------------------------------------------------------------------
1 | HOWTORUN:
2 | + run "$python barchart_time_loc_number.py" to generate the bar chart of generic types
3 | + run "$python barchart_loc.py" to generate the bar chart of location types
4 | + run "$python barchart_time.py" to generate the bar chart of time types
5 |
--------------------------------------------------------------------------------
/extract_zipcode_latlon/index/seattle_zipcode_index.txt:
--------------------------------------------------------------------------------
1 | khxu-spqg zip 9
2 | n3gw-htbc zipcode 12
3 | m6tf-bxss zip 10
4 | 4enm-t3vn zip 10
5 | qmtq-5rpt city_state_zip 9
6 | zn5m-qb7h zip 12
7 | r9tj-tvtt city_state_zip 9
8 | c3ri-wwcn city_state_zip 8
9 | 5iir-m2en zip 4 zipcode 5
10 | evxh-x3jp city_state_zip 8
11 |
--------------------------------------------------------------------------------
/type_detection/ijson/compat.py:
--------------------------------------------------------------------------------
1 | '''
2 | Python2/Python3 compatibility utilities.
3 | '''
4 |
5 | import sys
6 |
7 |
8 | IS_PY2 = sys.version_info[0] < 3
9 |
10 |
11 | if IS_PY2:
12 | b2s = lambda s: s
13 | chr = unichr
14 | else:
15 | def b2s(b):
16 | return b.decode('utf-8')
17 | chr = chr
18 |
--------------------------------------------------------------------------------
/extract_zipcode_latlon/ijson/compat.py:
--------------------------------------------------------------------------------
1 | '''
2 | Python2/Python3 compatibility utilities.
3 | '''
4 |
5 | import sys
6 |
7 |
8 | IS_PY2 = sys.version_info[0] < 3
9 |
10 |
11 | if IS_PY2:
12 | b2s = lambda s: s
13 | chr = unichr
14 | else:
15 | def b2s(b):
16 | return b.decode('utf-8')
17 | chr = chr
18 |
--------------------------------------------------------------------------------
/download/download_json_sf.sh:
--------------------------------------------------------------------------------
1 | path="sf"
2 | url="https://data.sfgov.org"
3 | IDS="ids/sf_ids.txt"
4 | mkdir $path
5 |
6 | cat $IDS | while read LINE
7 | do
8 | if [ ! -f $path/$LINE.json ]
9 | then
10 | wget -t 1 --output-document=$path/$LINE.json --timeout=10 "$url/api/views/$LINE/rows.json?accessType=DOWNLOAD"
11 | fi
12 | done
13 |
14 |
--------------------------------------------------------------------------------
/download/download_json_kcmo.sh:
--------------------------------------------------------------------------------
1 | path="kcmo"
2 | url="https://data.kcmo.org"
3 | IDS="ids/kcmo_ids.txt"
4 | mkdir $path
5 |
6 | cat $IDS | while read LINE
7 | do
8 | if [ ! -f $path/$LINE.json ]
9 | then
10 | wget -t 1 --output-document=$path/$LINE.json --timeout=10 "$url/api/views/$LINE/rows.json?accessType=DOWNLOAD"
11 | fi
12 | done
13 |
14 |
--------------------------------------------------------------------------------
/download/download_json_nola.sh:
--------------------------------------------------------------------------------
1 | path="nola"
2 | url="http://data.nola.gov"
3 | IDS="ids/nola_ids.txt"
4 | mkdir $path
5 |
6 | cat $IDS | while read LINE
7 | do
8 | if [ ! -f $path/$LINE.json ]
9 | then
10 | wget -t 1 --output-document=$path/$LINE.json --timeout=10 "$url/api/views/$LINE/rows.json?accessType=DOWNLOAD"
11 | fi
12 | done
13 |
14 |
--------------------------------------------------------------------------------
/download/download_json_slc.sh:
--------------------------------------------------------------------------------
1 | path="slc"
2 | url="http://data.slcgov.com"
3 | IDS="ids/slc_ids.txt"
4 | mkdir $path
5 |
6 | cat $IDS | while read LINE
7 | do
8 | if [ ! -f $path/$LINE.json ]
9 | then
10 | wget -t 1 --output-document=$path/$LINE.json --timeout=10 "$url/api/views/$LINE/rows.json?accessType=DOWNLOAD"
11 | fi
12 | done
13 |
14 |
--------------------------------------------------------------------------------
/download/download_json_nyc.sh:
--------------------------------------------------------------------------------
1 | path="nyc"
2 | url="http://nycopendata.socrata.com"
3 | ids="ids/nyc_ids.txt"
4 | mkdir $path
5 |
6 | cat $ids | while read LINE
7 | do
8 | if [ ! -f $path/$LINE.json ]
9 | then
10 | wget -t 1 --output-document=$path/$LINE.json --timeout=10 "$url/api/views/$LINE/rows.json?accessType=DOWNLOAD"
11 | fi
12 | done
13 |
14 |
--------------------------------------------------------------------------------
/download/download_json_redmond.sh:
--------------------------------------------------------------------------------
1 | path="redmond"
2 | url="http://data.redmond.gov"
3 | IDS="ids/redmond_ids.txt"
4 | mkdir $path
5 |
6 | cat $IDS | while read LINE
7 | do
8 | if [ ! -f $path/$LINE.json ]
9 | then
10 | wget -t 1 --output-document=$path/$LINE.json --timeout=10 "$url/api/views/$LINE/rows.json?accessType=DOWNLOAD"
11 | fi
12 | done
13 |
14 |
--------------------------------------------------------------------------------
/download/download_json_seattle.sh:
--------------------------------------------------------------------------------
1 | path="seattle"
2 | url="http://data.seattle.gov"
3 | IDS="ids/seattle_ids.txt"
4 | mkdir $path
5 |
6 | cat $IDS | while read LINE
7 | do
8 | if [ ! -f $path/$LINE.json ]
9 | then
10 | wget -t 1 --output-document=$path/$LINE.json --timeout=10 "$url/api/views/$LINE/rows.json?accessType=DOWNLOAD"
11 | fi
12 | done
13 |
14 |
--------------------------------------------------------------------------------
/download/download_json_austintexas.sh:
--------------------------------------------------------------------------------
1 | path="austin"
2 | url="http://data.austintexas.gov"
3 | ids="ids/austin_ids.txt"
4 | mkdir $path
5 |
6 | cat $ids | while read LINE
7 | do
8 | if [ ! -f $path/$LINE.json ]
9 | then
10 | wget -t 1 --output-document=$path/$LINE.json --timeout=10 "$url/api/views/$LINE/rows.json?accessType=DOWNLOAD"
11 | fi
12 | done
13 |
14 |
--------------------------------------------------------------------------------
/download/download_json_boston.sh:
--------------------------------------------------------------------------------
1 | path="boston"
2 | url="http://data.cityofboston.gov"
3 | IDS="ids/boston_ids.txt"
4 | mkdir $path
5 |
6 | cat $IDS | while read LINE
7 | do
8 | if [ ! -f $path/$LINE.json ]
9 | then
10 | wget -t 1 --output-document=$path/$LINE.json --timeout=10 "$url/api/views/$LINE/rows.json?accessType=DOWNLOAD"
11 | fi
12 | done
13 |
14 |
--------------------------------------------------------------------------------
/download/download_json_chicago.sh:
--------------------------------------------------------------------------------
1 | path="chicago"
2 | url="http://data.cityofchicago.org"
3 | IDS="ids/chicago_ids.txt"
4 | mkdir $path
5 |
6 | cat $IDS | while read LINE
7 | do
8 | if [ ! -f $path/$LINE.json ]
9 | then
10 | wget -t 1 --output-document=$path/$LINE.json --timeout=10 "$url/api/views/$LINE/rows.json?accessType=DOWNLOAD"
11 | fi
12 | done
13 |
14 |
--------------------------------------------------------------------------------
/download/download_json_deleon.sh:
--------------------------------------------------------------------------------
1 | path="deleon"
2 | url="http://data.cityofdeleon.org"
3 | IDS="ids/deleon_ids.txt"
4 | mkdir $path
5 |
6 | cat $IDS | while read LINE
7 | do
8 | if [ ! -f $path/$LINE.json ]
9 | then
10 | wget -t 1 --output-document=$path/$LINE.json --timeout=10 "$url/api/views/$LINE/rows.json?accessType=DOWNLOAD"
11 | fi
12 | done
13 |
14 |
--------------------------------------------------------------------------------
/download/download_json_edmonton.sh:
--------------------------------------------------------------------------------
1 | path="edmonton"
2 | url="http://data.edmonton.ca"
3 | IDS="ids/edmonton_ids.txt"
4 | mkdir $path
5 |
6 | cat $IDS | while read LINE
7 | do
8 | if [ ! -f $path/$LINE.json ]
9 | then
10 | wget -t 1 --output-document=$path/$LINE.json --timeout=10 "$url/api/views/$LINE/rows.json?accessType=DOWNLOAD"
11 | fi
12 | done
13 |
14 |
--------------------------------------------------------------------------------
/download/download_json_honolulu.sh:
--------------------------------------------------------------------------------
1 | path="honolulu"
2 | url="https://data.honolulu.gov"
3 | IDS="ids/honolulu_ids.txt"
4 | mkdir $path
5 |
6 | cat $IDS | while read LINE
7 | do
8 | if [ ! -f $path/$LINE.json ]
9 | then
10 | wget -t 1 --output-document=$path/$LINE.json --timeout=10 "$url/api/views/$LINE/rows.json?accessType=DOWNLOAD"
11 | fi
12 | done
13 |
14 |
--------------------------------------------------------------------------------
/download/download_json_raleighnc.sh:
--------------------------------------------------------------------------------
1 | path="raleigh"
2 | url="https://data.raleighnc.gov"
3 | IDS="ids/raleigh_ids.txt"
4 | mkdir $path
5 |
6 | cat $IDS | while read LINE
7 | do
8 | if [ ! -f $path/$LINE.json ]
9 | then
10 | wget -t 1 --output-document=$path/$LINE.json --timeout=10 "$url/api/views/$LINE/rows.json?accessType=DOWNLOAD"
11 | fi
12 | done
13 |
14 |
--------------------------------------------------------------------------------
/schema_similarity/run.sh:
--------------------------------------------------------------------------------
1 | OUT="./similarity" #Directory that stores similarity scores
2 | mkdir $OUT
3 |
4 | cat city_list.txt | while read LINE #
5 | do
6 | arr=(${LINE//;/ })
7 | JSON_PATH=${arr[1]} #Path to the directory that contains JSON files
8 | CITY=${arr[0]} #City name
9 | python schema_similarity.py $CITY $JSON_PATH $OUT
10 | break
11 | done
12 |
--------------------------------------------------------------------------------
/download/download_json_madison.sh:
--------------------------------------------------------------------------------
1 | path="madison"
2 | url="https://data.cityofmadison.com"
3 | IDS="ids/madison_ids.txt"
4 | mkdir $path
5 |
6 | cat $IDS | while read LINE
7 | do
8 | if [ ! -f $path/$LINE.json ]
9 | then
10 | wget -t 1 --output-document=$path/$LINE.json --timeout=10 "$url/api/views/$LINE/rows.json?accessType=DOWNLOAD"
11 | fi
12 | done
13 |
14 |
--------------------------------------------------------------------------------
/download/download_json_oaklandnet.sh:
--------------------------------------------------------------------------------
1 | path="oaklandnet"
2 | url="https://data.oaklandnet.com"
3 | IDS="ids/oaklandnet_ids.txt"
4 | mkdir $path
5 |
6 | cat $IDS | while read LINE
7 | do
8 | if [ ! -f $path/$LINE.json ]
9 | then
10 | wget -t 1 --output-document=$path/$LINE.json --timeout=10 "$url/api/views/$LINE/rows.json?accessType=DOWNLOAD"
11 | fi
12 | done
13 |
14 |
--------------------------------------------------------------------------------
/download/download_json_wellingtonfl.sh:
--------------------------------------------------------------------------------
1 | path="wellington"
2 | url="http://data.wellingtonfl.gov"
3 | IDS="ids/wellington_ids.txt"
4 | mkdir $path
5 |
6 | cat $IDS | while read LINE
7 | do
8 | if [ ! -f $path/$LINE.json ]
9 | then
10 | wget -t 1 --output-document=$path/$LINE.json --timeout=10 "$url/api/views/$LINE/rows.json?accessType=DOWNLOAD"
11 | fi
12 | done
13 |
14 |
--------------------------------------------------------------------------------
/download/download_json_baltimorecity.sh:
--------------------------------------------------------------------------------
1 | path="baltimore"
2 | url="https://data.baltimorecity.gov"
3 | IDS="ids/baltimore_ids.txt"
4 | mkdir $path
5 |
6 | cat $IDS | while read LINE
7 | do
8 | if [ ! -f $path/$LINE.json ]
9 | then
10 | wget -t 1 --output-document=$path/$LINE.json --timeout=10 "$url/api/views/$LINE/rows.json?accessType=DOWNLOAD"
11 | fi
12 | done
13 |
14 |
--------------------------------------------------------------------------------
/download/download_json_somervillema.sh:
--------------------------------------------------------------------------------
1 | path="somervillema"
2 | url="http://data.somervillema.gov"
3 | IDS="ids/somervillema_ids.txt"
4 | mkdir $path
5 |
6 | cat $IDS | while read LINE
7 | do
8 | if [ ! -f $path/$LINE.json ]
9 | then
10 | wget -t 1 --output-document=$path/$LINE.json --timeout=10 "$url/api/views/$LINE/rows.json?accessType=DOWNLOAD"
11 | fi
12 | done
13 |
14 |
--------------------------------------------------------------------------------
/download/download_json_weatherfordtx.sh:
--------------------------------------------------------------------------------
1 | path="weatherford"
2 | url="https://data.weatherfordtx.gov"
3 | IDS="ids/weatherford_ids.txt"
4 | mkdir $path
5 |
6 | cat $IDS | while read LINE
7 | do
8 | if [ ! -f $path/$LINE.json ]
9 | then
10 | wget -t 1 --output-document=$path/$LINE.json --timeout=10 "$url/api/views/$LINE/rows.json?accessType=DOWNLOAD"
11 | fi
12 | done
13 |
14 |
--------------------------------------------------------------------------------
/download/ids/redmond_ids.txt:
--------------------------------------------------------------------------------
1 | 7v22-4z3a
2 | 4xwk-j2qj
3 | vqdz-eefx
4 | 7zus-64fj
5 | h54f-2ybz
6 | tugv-zk5z
7 | 9nf4-5b5t
8 | 7wz2-cdjk
9 | bs2q-ismz
10 | cp7w-w9h6
11 | wzgk-dadm
12 | xxwc-wtzp
13 | 3imt-pe6h
14 | 7vm8-w63z
15 | 3b7t-empc
16 | 5vmk-ujkk
17 | gw4v-ktut
18 | wv5k-d6vv
19 | 58qf-bc4p
20 | erks-xyhk
21 | gdzn-64j8
22 | jax4-9jsz
23 | nfhm-aphc
24 | yhjf-fsue
25 | yp53-gb6d
26 |
--------------------------------------------------------------------------------
/extract_zipcode_latlon/run.sh:
--------------------------------------------------------------------------------
1 | OUTPUT="data" #Directory to store the result
2 | mkdir $OUTPUT
3 |
4 | cat city_list.txt | while read LINE #
5 | do
6 | arr=(${LINE//;/ })
7 | JSON_PATH=${arr[1]} #Path to the directory that contains JSON files
8 | CITY=${arr[0]} #City name
9 | echo $CITY $JSON_PATH $OUTPUT
10 | python collect_data.py $CITY $JSON_PATH $OUTPUT
11 | break
12 | done
13 |
--------------------------------------------------------------------------------
/type_detection/ids/redmond_ids.txt:
--------------------------------------------------------------------------------
1 | 7v22-4z3a
2 | 4xwk-j2qj
3 | vqdz-eefx
4 | 7zus-64fj
5 | h54f-2ybz
6 | tugv-zk5z
7 | 9nf4-5b5t
8 | 7wz2-cdjk
9 | bs2q-ismz
10 | cp7w-w9h6
11 | wzgk-dadm
12 | xxwc-wtzp
13 | 3imt-pe6h
14 | 7vm8-w63z
15 | 3b7t-empc
16 | 5vmk-ujkk
17 | gw4v-ktut
18 | wv5k-d6vv
19 | 58qf-bc4p
20 | erks-xyhk
21 | gdzn-64j8
22 | jax4-9jsz
23 | nfhm-aphc
24 | yhjf-fsue
25 | yp53-gb6d
26 |
--------------------------------------------------------------------------------
/tagcloud/README.txt:
--------------------------------------------------------------------------------
1 | HOWTORUN:
2 |
3 | $Rscript tagcloud.r data/allcities.txt 500 0.5 10
4 | $Rscript tagcloud.r data/chicago.txt 100 1 5
5 | $Rscript tagcloud.r data/kansas.txt
6 | $Rscript tagcloud.r data/nyc.txt 500 1 10
7 | $Rscript tagcloud.r data/seattle.txt 500 1 10
8 | $Rscript tagcloud.r data/top1000.txt 200 0.3 7
9 | $Rscript tagcloud.r data/top100.txt 200 0.3 7
10 | $Rscript tagcloud.r data/top500.txt 200 0.3 7
11 |
--------------------------------------------------------------------------------
/download/ids/wellington_ids.txt:
--------------------------------------------------------------------------------
1 | duvw-hfu5
2 | 3xrt-ting
3 | ezn9-g8km
4 | 9yb5-p9fa
5 | sa92-xi27
6 | gria-8rsx
7 | 6aei-bxzm
8 | 6snd-rfqw
9 | wkzt-vhm6
10 | fykz-53hw
11 | g9js-e7hn
12 | fg2a-eh5h
13 | 3wx5-9qcg
14 | ku3p-pbxj
15 | pnde-vucq
16 | 2eiw-55u9
17 | neth-2qv6
18 | e5tv-z73h
19 | hcqt-5rjv
20 | appr-veui
21 | sb7m-xbq5
22 | njsp-pbsx
23 | vz2v-akh2
24 | nrbt-ch4g
25 | nhev-i6ea
26 | ckeq-6y99
27 | zg84-4xj4
28 | sxvy-f7ph
29 | yukt-pbhp
30 |
--------------------------------------------------------------------------------
/type_detection/ids/wellington_ids.txt:
--------------------------------------------------------------------------------
1 | duvw-hfu5
2 | 3xrt-ting
3 | ezn9-g8km
4 | 9yb5-p9fa
5 | sa92-xi27
6 | gria-8rsx
7 | 6aei-bxzm
8 | 6snd-rfqw
9 | wkzt-vhm6
10 | fykz-53hw
11 | g9js-e7hn
12 | fg2a-eh5h
13 | 3wx5-9qcg
14 | ku3p-pbxj
15 | pnde-vucq
16 | 2eiw-55u9
17 | neth-2qv6
18 | e5tv-z73h
19 | hcqt-5rjv
20 | appr-veui
21 | sb7m-xbq5
22 | njsp-pbsx
23 | vz2v-akh2
24 | nrbt-ch4g
25 | nhev-i6ea
26 | ckeq-6y99
27 | zg84-4xj4
28 | sxvy-f7ph
29 | yukt-pbhp
30 |
--------------------------------------------------------------------------------
/latlon_to_zipcode/convert_points.py:
--------------------------------------------------------------------------------
1 | output = open("converted_shapefile/point.txt", "w")
2 | output.write("33144\n")
3 | with open("converted_shapefile/points.csv") as lines:
4 | for line in lines:
5 | a = line.strip("\n").split("\t")
6 | zipcode = a[0]
7 | output.write(a[0] + "\n" + "1" + "\n" + str(len(a) - 1) + "\n")
8 | for latlon in a[1:]:
9 | x = latlon.split(",")
10 | lon = x[0]
11 | lat = x[1]
12 | output.write(lon + " " + lat + "\n")
13 |
--------------------------------------------------------------------------------
/type_detection/city_list.txt:
--------------------------------------------------------------------------------
1 | austin;./austin
2 | nyc;./nyc
3 | baltimore;./baltimore
4 | boston;./boston
5 | chicago;./chicago
6 | deleon;./deleon
7 | madison;./madison
8 | edmonton;./edmonton
9 | honolulu;./honolulu
10 | kcmo;./kcmo
11 | nola;./nola
12 | oaklandnet;./oaklandnet
13 | raleigh;./raleigh
14 | redmond;./redmond
15 | seattle;./seattle
16 | sf;./sf
17 | slc;./slc
18 | somervillema;./somervillema
19 | weatherford;./weatherford
20 | wellington;./wellington
21 |
--------------------------------------------------------------------------------
/schema_similarity/city_list.txt:
--------------------------------------------------------------------------------
1 | austin;./austin
2 | nyc;./nyc
3 | baltimore;./baltimore
4 | boston;./boston
5 | chicago;./chicago
6 | deleon;./deleon
7 | madison;./madison
8 | edmonton;./edmonton
9 | honolulu;./honolulu
10 | kcmo;./kcmo
11 | nola;./nola
12 | oaklandnet;./oaklandnet
13 | raleigh;./raleigh
14 | redmond;./redmond
15 | seattle;./seattle
16 | sf;./sf
17 | slc;./slc
18 | somervillema;./somervillema
19 | weatherford;./weatherford
20 | wellington;./wellington
21 |
--------------------------------------------------------------------------------
/extract_zipcode_latlon/city_list.txt:
--------------------------------------------------------------------------------
1 | austin;./austin
2 | nyc;./nyc
3 | baltimore;./baltimore
4 | boston;./boston
5 | chicago;./chicago
6 | deleon;./deleon
7 | madison;./madison
8 | edmonton;./edmonton
9 | honolulu;./honolulu
10 | kcmo;./kcmo
11 | nola;./nola
12 | oaklandnet;./oaklandnet
13 | raleigh;./raleigh
14 | redmond;./redmond
15 | seattle;./seattle
16 | sf;./sf
17 | slc;./slc
18 | somervillema;./somervillema
19 | weatherford;./weatherford
20 | wellington;./wellington
21 |
--------------------------------------------------------------------------------
/heatmap/shapefile/Neighboorhoods.prj:
--------------------------------------------------------------------------------
1 | PROJCS["NAD_1983_StatePlane_Illinois_East_FIPS_1201_Feet",GEOGCS["GCS_North_American_1983",DATUM["D_North_American_1983",SPHEROID["GRS_1980",6378137.0,298.257222101]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]],PROJECTION["Transverse_Mercator"],PARAMETER["False_Easting",984250.0],PARAMETER["False_Northing",0.0],PARAMETER["Central_Meridian",-88.33333333333333],PARAMETER["Scale_Factor",0.999975],PARAMETER["Latitude_Of_Origin",36.66666666666666],UNIT["Foot_US",0.3048006096012192]]
--------------------------------------------------------------------------------
/extract_zipcode_latlon/index/kcmo_latlon_index.txt:
--------------------------------------------------------------------------------
1 | 4bd6-gqwq building_latitude 15 building_longitude 16
2 | d6ps-dq2e intptlat10 16 intptlon10 17
3 | wbx5-smj6 intptlat 11 intptlon 12
4 | 8ejy-sj4q intptlat 7 intptlon 8
5 | isk8-6s6i intptlat10 16 intptlon10 17
6 | byps-gsbw centroid_latitude 8 centroid_longitude 9
7 | rtst-p7t3 intptlat 16 intptlon 17
8 | 2mjh-qv84 building_latitude 15 building_longitude 16
9 | ex28-gm4e building_latitude 15 building_longitude 16
10 | miam-vibb centroid_latitude 8 centroid_longitude 9
11 |
--------------------------------------------------------------------------------
/barchart/time.csv:
--------------------------------------------------------------------------------
1 | 0.36827458256,0.0162337662338,0.481910946197
2 | 0.28855721393,0.0348258706468,0.44776119403
3 | 0.58950617284,0.0,0.524691358025
4 | 0.308089500861,0.0430292598967,0.447504302926
5 | 0.586261980831,0.0,0.20607028754
6 | 0.570135746606,0.00452488687783,0.497737556561
7 | 0.114285714286,0.00879120879121,0.920879120879
8 | 0.173913043478,0.0461956521739,0.383152173913
9 | 0.533980582524,0.00970873786408,0.466019417476
10 | 0.302158273381,0.0503597122302,0.467625899281
11 | 0.403606311044,0.0177310293013,0.48369646882
12 |
--------------------------------------------------------------------------------
/barchart/loc.csv:
--------------------------------------------------------------------------------
1 | 0.422077922078,0.433209647495,0.439239332096
2 | 0.445273631841,0.13184079602,0.440298507463
3 | 0.616255144033,0.512345679012,0.0236625514403
4 | 0.240963855422,0.294320137694,0.173838209983
5 | 0.329073482428,0.562300319489,0.258785942492
6 | 0.359728506787,0.285067873303,0.235294117647
7 | 0.134065934066,0.0131868131868,0.021978021978
8 | 0.440217391304,0.277173913043,0.0
9 | 0.718446601942,0.679611650485,0.708737864078
10 | 0.359712230216,0.237410071942,0.280575539568
11 | 0.401352366642,0.365890308039,0.254545454545
12 |
--------------------------------------------------------------------------------
/download/ids/honolulu_ids.txt:
--------------------------------------------------------------------------------
1 | std8-yakc
2 | ix32-iw26
3 | a96q-gyhq
4 | fdx8-nih6
5 | yef5-h88r
6 | dcdf-43kn
7 | cdq8-ccz7
8 | 6qpe-gunp
9 | dcm2-4u9j
10 | 3dxw-z8rr
11 | 5fhm-vea5
12 | ab7c-s2jr
13 | iz58-35eb
14 | necy-6u7t
15 | ifzd-2k3p
16 | akkw-prc5
17 | pvti-pwka
18 | 3duq-5rzf
19 | k2yj-i4jp
20 | 2swm-eusf
21 | gp9s-unfc
22 | 6x78-edqg
23 | w4ir-s4fd
24 | sbdw-8u88
25 | a3ah-kpkr
26 | 7kck-y29a
27 | nrsx-ip5q
28 | rh9s-z3mn
29 | t6ff-mewd
30 | smuq-xtz4
31 | g5bc-jnuv
32 | uvv2-62xi
33 | 84fd-3fzf
34 | vf2g-cf6g
35 | ef93-z5du
36 |
--------------------------------------------------------------------------------
/type_detection/ids/honolulu_ids.txt:
--------------------------------------------------------------------------------
1 | std8-yakc
2 | ix32-iw26
3 | a96q-gyhq
4 | fdx8-nih6
5 | yef5-h88r
6 | dcdf-43kn
7 | cdq8-ccz7
8 | 6qpe-gunp
9 | dcm2-4u9j
10 | 3dxw-z8rr
11 | 5fhm-vea5
12 | ab7c-s2jr
13 | iz58-35eb
14 | necy-6u7t
15 | ifzd-2k3p
16 | akkw-prc5
17 | pvti-pwka
18 | 3duq-5rzf
19 | k2yj-i4jp
20 | 2swm-eusf
21 | gp9s-unfc
22 | 6x78-edqg
23 | w4ir-s4fd
24 | sbdw-8u88
25 | a3ah-kpkr
26 | 7kck-y29a
27 | nrsx-ip5q
28 | rh9s-z3mn
29 | t6ff-mewd
30 | smuq-xtz4
31 | g5bc-jnuv
32 | uvv2-62xi
33 | 84fd-3fzf
34 | vf2g-cf6g
35 | ef93-z5du
36 |
--------------------------------------------------------------------------------
/barchart/time_loc_number.csv:
--------------------------------------------------------------------------------
1 | 0.589981447124,0.513914656772,0.842764378479
2 | 0.57960199005,0.475124378109,0.902985074627
3 | 0.67695473251,0.609053497942,0.862139917695
4 | 0.339070567986,0.471600688468,0.879518072289
5 | 0.672523961661,0.591054313099,0.811501597444
6 | 0.441176470588,0.676470588235,0.889140271493
7 | 0.134065934066,0.940659340659,0.984615384615
8 | 0.470108695652,0.426630434783,0.959239130435
9 | 0.815533980583,0.533980582524,0.854368932039
10 | 0.467625899281,0.575539568345,0.841726618705
11 | 0.529376408715,0.56664162284,0.863260706236
12 |
--------------------------------------------------------------------------------
/heatmap/shapefile/nyc_zipcta.prj:
--------------------------------------------------------------------------------
1 | PROJCS["NAD_1983_StatePlane_New_York_Long_Island_FIPS_3104_Feet",GEOGCS["GCS_North_American_1983",DATUM["D_North_American_1983",SPHEROID["GRS_1980",6378137.0,298.257222101]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]],PROJECTION["Lambert_Conformal_Conic"],PARAMETER["False_Easting",984250.0],PARAMETER["False_Northing",0.0],PARAMETER["Central_Meridian",-74.0],PARAMETER["Standard_Parallel_1",40.66666666666666],PARAMETER["Standard_Parallel_2",41.03333333333333],PARAMETER["Latitude_Of_Origin",40.16666666666666],UNIT["Foot_US",0.3048006096012192]]
--------------------------------------------------------------------------------
/latlon_to_zipcode/README.txt:
--------------------------------------------------------------------------------
1 | - Download US Shapefile: ftp://ftp2.census.gov/geo/tiger/TIGER2013/ZCTA5/tl_2013_us_zcta510.zip
2 | - Extract to ./shapefile/
3 | - If point.txt and bbox.csv are not existed in converted_shapefile/
4 | + Run $./convert.sh to convert original shapefile to point.txt and bbox.csv
5 | - Compile: run $make
6 | - Run a test:
7 |
8 | $./zipcode 40.667098 -73.982363
9 |
10 | - Run a full conversion:
11 |
12 | $./zipcode
13 |
14 | + Input: latlon.txt: each line in this file refers to a file that contain lat/lon. Each line of lat/lon file has the format: lat,lon
15 |
--------------------------------------------------------------------------------
/type_detection/ijson/backends/__init__.py:
--------------------------------------------------------------------------------
1 | from ctypes import util, cdll
2 |
3 | class YAJLImportError(ImportError):
4 | pass
5 |
6 | def find_yajl(required):
7 | so_name = util.find_library('yajl')
8 | if so_name is None:
9 | raise YAJLImportError('YAJL shared object not found.')
10 | yajl = cdll.LoadLibrary(so_name)
11 | major, rest = divmod(yajl.yajl_version(), 10000)
12 | minor, micro = divmod(rest, 100)
13 | if major != required:
14 | raise YAJLImportError('YAJL version %s.x required, found %s.%s.%s' % (required, major, minor, micro))
15 | return yajl
16 |
--------------------------------------------------------------------------------
/extract_zipcode_latlon/ijson/backends/__init__.py:
--------------------------------------------------------------------------------
1 | from ctypes import util, cdll
2 |
3 | class YAJLImportError(ImportError):
4 | pass
5 |
6 | def find_yajl(required):
7 | so_name = util.find_library('yajl')
8 | if so_name is None:
9 | raise YAJLImportError('YAJL shared object not found.')
10 | yajl = cdll.LoadLibrary(so_name)
11 | major, rest = divmod(yajl.yajl_version(), 10000)
12 | minor, micro = divmod(rest, 100)
13 | if major != required:
14 | raise YAJLImportError('YAJL version %s.x required, found %s.%s.%s' % (required, major, minor, micro))
15 | return yajl
16 |
--------------------------------------------------------------------------------
/extract_zipcode_latlon/index/austin_latlon_index.txt:
--------------------------------------------------------------------------------
1 | b4y9-5x39 longitude 6 latitude 7
2 | 64cq-wf5u y 2 x 3
3 | nq9x-w8sx lat_dd_wgs84 19 lon_dd_wgs84 20
4 | szku-46rx y 2 x 3
5 | jbaf-xebm lat_dd_wgs84 19 lon_dd_wgs84 20
6 | r6sg-xka2 longitude 6 latitude 7
7 | gr59-ids7 longitude 6 latitude 7
8 | r5kt-xq3y lat_dd_wgs84 5 lon_dd_wgs84 6
9 | 3gc4-g537 latitude 6 longitude 7
10 | ei2n-fehk longitude 6 latitude 7
11 | 4c6h-tv2y longitude 6 latitude 7
12 | ga9y-ypai lat_dd_wgs84 19 lon_dd_wgs84 20
13 | tx8s-62r6 lat_dd_wgs84 19 lon_dd_wgs84 20
14 | 5gjn-nmcf latitude 5 longitude 6
15 | b6cd-bhbk lat_dd_wgs84 5 lon_dd_wgs84 6
16 |
--------------------------------------------------------------------------------
/download/ids/nola_ids.txt:
--------------------------------------------------------------------------------
1 | 2mq3-p3xc
2 | aexs-y2ma
3 | j4pt-mz93
4 | mesf-89bm
5 | 5fn8-vtui
6 | mbxb-ejdy
7 | r82n-4xx7
8 | rv3g-ypg7
9 | 3utr-tkrh
10 | 4uek-d54m
11 | 4ts9-u65y
12 | 28ec-c8d6
13 | e3wd-h7q2
14 | hpm5-48nj
15 | d9hd-x6nn
16 | 65t6-gi32
17 | rcm3-fn58
18 | kpc9-4t3j
19 | dasg-fxyv
20 | 5ktx-e9wc
21 | a6tx-de8c
22 | cba3-mptn
23 | 8tsm-38gz
24 | utqx-f83p
25 | pqgq-8it9
26 | sgfw-jy2v
27 | 8pqz-ftzc
28 | uh5a-f7uw
29 | 44ct-56tr
30 | u6yx-v2tw
31 | aned-jbk9
32 | q4nv-wks6
33 | mce3-wqh4
34 | ypza-44w8
35 | jsyu-nz5r
36 | raeu-276s
37 | kg5e-js8i
38 | 347f-j9w7
39 | vgrg-et3t
40 | 5hq5-im7i
41 | d2is-2r79
42 | 4d8g-jra3
43 |
--------------------------------------------------------------------------------
/type_detection/ids/nola_ids.txt:
--------------------------------------------------------------------------------
1 | 2mq3-p3xc
2 | aexs-y2ma
3 | j4pt-mz93
4 | mesf-89bm
5 | 5fn8-vtui
6 | mbxb-ejdy
7 | r82n-4xx7
8 | rv3g-ypg7
9 | 3utr-tkrh
10 | 4uek-d54m
11 | 4ts9-u65y
12 | 28ec-c8d6
13 | e3wd-h7q2
14 | hpm5-48nj
15 | d9hd-x6nn
16 | 65t6-gi32
17 | rcm3-fn58
18 | kpc9-4t3j
19 | dasg-fxyv
20 | 5ktx-e9wc
21 | a6tx-de8c
22 | cba3-mptn
23 | 8tsm-38gz
24 | utqx-f83p
25 | pqgq-8it9
26 | sgfw-jy2v
27 | 8pqz-ftzc
28 | uh5a-f7uw
29 | 44ct-56tr
30 | u6yx-v2tw
31 | aned-jbk9
32 | q4nv-wks6
33 | mce3-wqh4
34 | ypza-44w8
35 | jsyu-nz5r
36 | raeu-276s
37 | kg5e-js8i
38 | 347f-j9w7
39 | vgrg-et3t
40 | 5hq5-im7i
41 | d2is-2r79
42 | 4d8g-jra3
43 |
--------------------------------------------------------------------------------
/extract_zipcode_latlon/index/austin_zipcode_index.txt:
--------------------------------------------------------------------------------
1 | u3yy-shmz association_zip_code 3 primary_contact_zipcode 8 secondary_contact_zipcode 13
2 | hqa6-stx4 zip_code 1
3 | 9bpw-2ysw zip_code 13
4 | ur6a-fvpc zip 9
5 | ajpy-mwjj zip_code 1
6 | gzyt-t2by zip_code 1
7 | rfif-mmvg zip 9
8 | gt3n-akq9 zip4 297 zip5 298
9 | nmp9-45v2 giinstalledzip 8
10 | 3ebq-e9iz zip 18
11 | rb6p-jsp4 zip 9
12 | 3w87-zbw7 zip 9
13 | ecmv-9xxi zip_code 1
14 | nynz-w2da zip 9
15 | b73m-kiye zip_code 1
16 | ct7f-fbbn primary_contact_zipcode 9 secondary_contact_zipcode 14
17 | qzi7-nx8g zip_code 13
18 | g9bx-8meu zip_code 1
19 | iuw2-kwij zipcode 0
20 | 8zu2-guks zip 9
21 |
--------------------------------------------------------------------------------
/download/README.txt:
--------------------------------------------------------------------------------
1 | HOWTORUN:
2 |
3 | $./download_json_austintexas.sh
4 | $./download_json_baltimorecity.sh
5 | $./download_json_boston.sh
6 | $./download_json_chicago.sh
7 | $./download_json_deleon.sh
8 | $./download_json_edmonton.sh
9 | $./download_json_honolulu.sh
10 | $./download_json_kcmo.sh
11 | $./download_json_madison.sh
12 | $./download_json_nola.sh
13 | $./download_json_oaklandnet.sh
14 | $./download_json_raleighnc.sh
15 | $./download_json_redmond.sh
16 | $./download_json_seattle.sh
17 | $./download_json_sf.sh
18 | $./download_json_slc.sh
19 | $./download_json_somervillema.sh
20 | $./download_json_weatherfordtx.sh
21 | $./download_json_wellingtonfl.sh
22 |
--------------------------------------------------------------------------------
/download/ids/slc_ids.txt:
--------------------------------------------------------------------------------
1 | syic-a6rq
2 | g5ni-ehfe
3 | myq9-p4zu
4 | rtcx-we7f
5 | duwd-wq3e
6 | 7faz-pyum
7 | s62m-p2ci
8 | qcea-2qur
9 | p2dy-h2sr
10 | 9b2y-pidk
11 | usi3-xfks
12 | q9gq-vb9z
13 | wng6-vv2r
14 | vhm2-rnvr
15 | fthp-f7h3
16 | 5h33-khmk
17 | kz9n-dpay
18 | dait-ivxs
19 | uh9c-a9zt
20 | 3kgt-vcwy
21 | tn2w-p83j
22 | 5f5e-rfen
23 | s79j-pjmr
24 | ugfz-sxyz
25 | 5ate-q28a
26 | jww7-nxe8
27 | un62-z97s
28 | vytj-hddx
29 | e82v-m3sg
30 | epu4-hi64
31 | m8iz-py6s
32 | smri-mj5y
33 | 3auw-s6ah
34 | 5gsj-w587
35 | 79jz-dibw
36 | 8m2r-p53k
37 | 92gv-x3hr
38 | jphp-kas7
39 | agjx-fggm
40 | wrtx-pisx
41 | sx9e-aefu
42 | 7vfv-qtsf
43 | k35s-9qmi
44 | i98d-m2z6
45 |
--------------------------------------------------------------------------------
/type_detection/ids/slc_ids.txt:
--------------------------------------------------------------------------------
1 | syic-a6rq
2 | g5ni-ehfe
3 | myq9-p4zu
4 | rtcx-we7f
5 | duwd-wq3e
6 | 7faz-pyum
7 | s62m-p2ci
8 | qcea-2qur
9 | p2dy-h2sr
10 | 9b2y-pidk
11 | usi3-xfks
12 | q9gq-vb9z
13 | wng6-vv2r
14 | vhm2-rnvr
15 | fthp-f7h3
16 | 5h33-khmk
17 | kz9n-dpay
18 | dait-ivxs
19 | uh9c-a9zt
20 | 3kgt-vcwy
21 | tn2w-p83j
22 | 5f5e-rfen
23 | s79j-pjmr
24 | ugfz-sxyz
25 | 5ate-q28a
26 | jww7-nxe8
27 | un62-z97s
28 | vytj-hddx
29 | e82v-m3sg
30 | epu4-hi64
31 | m8iz-py6s
32 | smri-mj5y
33 | 3auw-s6ah
34 | 5gsj-w587
35 | 79jz-dibw
36 | 8m2r-p53k
37 | 92gv-x3hr
38 | jphp-kas7
39 | agjx-fggm
40 | wrtx-pisx
41 | sx9e-aefu
42 | 7vfv-qtsf
43 | k35s-9qmi
44 | i98d-m2z6
45 |
--------------------------------------------------------------------------------
/metadata/urls.txt:
--------------------------------------------------------------------------------
1 | http://data.austintexas.gov austin
2 | https://data.baltimorecity.gov baltimore
3 | http://data.cityofchicago.org chicago
4 | http://data.cityofdeleon.org deleon
5 | http://data.edmonton.ca edmonton
6 | http://data.nola.gov nola
7 | https://data.sfgov.org sf
8 | http://data.seattle.gov seattle
9 | http://nycopendata.socrata.com nyc
10 | https://data.honolulu.gov honolulu
11 | http://data.somervillema.gov somervillema
12 | https://data.cityofboston.gov boston
13 | http://data.slcgov.com slc
14 | https://data.oaklandnet.com oaklandnet
15 | https://data.cityofmadison.com madison
16 | https://data.kcmo.org kcmo
17 | https://data.raleighnc.gov raleigh
18 | https://data.redmond.gov redmond
19 | https://data.weatherfordtx.gov weatherford
20 | http://data.wellingtonfl.gov wellington
21 |
--------------------------------------------------------------------------------
/latlon_to_zipcode/convert_shapefile_to_bboxes.py:
--------------------------------------------------------------------------------
1 | import shapefile
2 |
3 | sf = shapefile.Reader("shapefile/tl_2013_us_zcta510.shp")
4 | bboxes = open("converted_shapefile/bboxes.csv", "w")
5 | points = open("converted_shapefile/points.csv", "w")
6 | shapes = sf.shapes()
7 | records = sf.records()
8 | # Read the bounding box from the 4th shape
9 | for i in range(len(shapes)):
10 | bbox = str(records[i][0]) + "\t" + \
11 | str(shapes[i].bbox[0]) + "\t" + \
12 | str(shapes[i].bbox[1]) + "\t" + \
13 | str(shapes[i].bbox[2]) + "\t" + \
14 | str(shapes[i].bbox[3]) + "\n"
15 | bboxes.write(bbox)
16 |
17 | point = str(records[i][0])
18 | for p in shapes[i].points:
19 | point += "\t" + str(p[0]) + "," + str(p[1])
20 | points.write(point + "\n")
21 |
22 | bboxes.close()
23 | points.close()
24 |
25 |
--------------------------------------------------------------------------------
/linechart/date2count.csv:
--------------------------------------------------------------------------------
1 | 201009 17
2 | 201008 16
3 | 201110 341
4 | 201403 546
5 | 201402 498
6 | 201401 474
7 | 201407 276
8 | 201406 400
9 | 201405 514
10 | 201404 682
11 | 201312 477
12 | 201311 1003
13 | 201310 361
14 | 201205 145
15 | 201204 116
16 | 201207 294
17 | 201206 126
18 | 201201 168
19 | 201203 130
20 | 201202 226
21 | 201010 13
22 | 201011 15
23 | 201012 45
24 | 201111 121
25 | 201208 350
26 | 201112 189
27 | 201108 152
28 | 201109 491
29 | 201003 6
30 | 201002 3
31 | 201209 238
32 | 201007 7
33 | 201006 4
34 | 201005 19
35 | 201004 95
36 | 201308 231
37 | 201309 318
38 | 201304 375
39 | 201305 278
40 | 201306 500
41 | 201307 227
42 | 201301 385
43 | 201302 503
44 | 201303 871
45 | 201212 229
46 | 201210 186
47 | 201211 176
48 | 201106 83
49 | 201107 115
50 | 201104 182
51 | 201105 169
52 | 201102 50
53 | 201103 13
54 | 201101 89
55 |
--------------------------------------------------------------------------------
/type_detection/run.sh:
--------------------------------------------------------------------------------
1 | a1="./sample_result" #Directory to store sampling results (OUTPUT)
2 | a2="./detection_result" #Directory to store detection results (OUTPUT)
3 | a3="./ids" #Directory that stores dataset ids (INPUT)
4 |
5 | cat city_list.txt | while read LINE #
6 | do
7 | arr=(${LINE//;/ })
8 | JSON_PATH=${arr[1]} #Path to the directory that contains JSON files
9 | CITY=${arr[0]} #City name
10 | mkdir $a1
11 | mkdir $a2
12 | python sample.py $JSON_PATH $a3"/"$CITY"_ids.txt" $a1 $CITY #Sampling data
13 | python detect.py $a1 $a3"/"$CITY"_ids.txt" $a2 $CITY #Detect type based on sampled data
14 | # break
15 | done
16 |
17 | #Collect information to generate the barchart
18 | a4="generic.csv" #(Name of output file)
19 | a5="loc.csv" #(Name of output file)
20 | a6="time.csv" #(Name of output file)
21 | JSON_PATH="./" #(Name of output file)
22 | echo $a3 $JSON_PATH $a4 $a5 $a6
23 | python collect.py $a4 $a5 $a6 $a3 $JSON_PATH $a2
24 |
--------------------------------------------------------------------------------
/extract_zipcode_latlon/index/sf_latlon_index.txt:
--------------------------------------------------------------------------------
1 | f3fp-qypd x 9 y 10
2 | akvp-jmwa x 9 y 10
3 | 5q3n-q6kw x 8 y 9
4 | snsg-xkfg x 9 y 10
5 | fa4n-5inm latitude 15 longitude 16
6 | tkzw-k3nq latitude 15 longitude 16
7 | 337t-q2b4 latitude 15 longitude 16
8 | 3fig-nit3 x 8 y 9
9 | rwxz-qq2e x 9 y 10
10 | hqjf-mpne x 9 y 10
11 | rqzj-sfat latitude 14 longitude 15
12 | fi3h-6q7h latitude 14 longitude 15
13 | ytdu-3kte x 9 y 10
14 | 3nwz-3n68 x 9 y 10
15 | te8q-3pjv x 9 y 10
16 | u563-z39k x 9 y 10
17 | 3hay-yzem x 9 y 10
18 | 5wbp-dwzt latitude 10 longitude 11
19 | gxxq-x39z x 9 y 10
20 | di4e-7emh x 9 y 10
21 | 7ybj-xpju x 6 y 7
22 | px6q-wjh5 latitude 14 longitude 15
23 | sf93-6dmr latitude 22 longitude 23
24 | xu5w-5kgd latitude 7 longitude 8
25 | xtjp-rjug latitude 7 longitude 8
26 | 99js-dqmz x 9 y 10
27 | 3twj-ueew x 9 y 10
28 | kaw6-dfy2 x 9 y 10
29 | uh2u-53ta x 2 y 3
30 | yani-faij x 9 y 10
31 | 4ang-frd3 x 9 y 10
32 | tmnf-yvry x 9 y 10
33 | sh6e-276z x 9 y 10
34 |
--------------------------------------------------------------------------------
/extract_zipcode_latlon/index/nyc_latlon_index.txt:
--------------------------------------------------------------------------------
1 | txfh-8uny latitude 4 longitude 5
2 | ckr8-miyf latitude 49 longitude 50
3 | 8ne5-dgau latitude 45 longitude 46
4 | iy29-ps3i latitude 49 longitude 50
5 | aiep-cw6w latitude 49 longitude 50
6 | bfxz-fd5f latitude 49 longitude 50
7 | nbh5-finw latitude 45 longitude 46
8 | xgwb-peav latitude 49 longitude 50
9 | iru4-p66v latitude 2 longitude 3
10 | cwr9-upi8 latitude 45 longitude 46
11 | my38-3fq2 latitude 49 longitude 50
12 | w7w5-eh7d latitude 26 longitude 27
13 | sa3i-xbm2 latitude 8 longitude 9
14 | 6wrh-b4p8 latitude 49 longitude 50
15 | mfbr-gvpd latitude 49 longitude 50
16 | q5vx-2yhj latitude 10 longitude 11
17 | vwdc-epd2 latitude 21 longitude 22
18 | wwjt-8agi latitude 49 longitude 50
19 | qvr2-gw69 latitude 49 longitude 50
20 | jhqa-6dzr latitude 8 longitude 9
21 | ypm5-ig5p latitude 6 longitude 7
22 | gbih-sbdw latitude 49 longitude 50
23 | sxx4-xhzg latitude 4 longitude 5
24 | anfv-hhsi latitude 10 longitude 11
25 |
--------------------------------------------------------------------------------
/extract_zipcode_latlon/index/boston_zipcode_index.txt:
--------------------------------------------------------------------------------
1 | pvrp-csfj location_zipcode 22
2 | 7wt6-9hdh location_zipcode 22
3 | v6fi-4hdu location_zipcode 22
4 | qz58-xbtz location_zipcode 22
5 | rtbk-4hc4 location_zipcode 22
6 | uqjh-rsbj location_zipcode 22
7 | f4ev-s6tx location_zipcode 22
8 | j2a7-cdyk location_zipcode 22
9 | x8in-twjt zip_code 11
10 | enuq-8kmn location_zipcode 22
11 | ehda-cg39 location_zipcode 22
12 | k9pj-rna9 location_zipcode 22
13 | dtud-qyw9 location_zipcode 22
14 | gfvf-83vt location_zipcode 22
15 | csea-5edd zip 11
16 | c7cs-bcq5 location_zipcode 13
17 | yfam-b7bg location_zipcode 22
18 | c3yg-bknc location_zipcode 22
19 | dp5b-mgir location_zipcode 22
20 | w6u4-3pp8 location_zipcode 22
21 | mwxg-8ix6 location_zipcode 13
22 | mbdv-4g6k location_zipcode 22
23 | d5jd-s3az location_zipcode 5
24 | vivu-bt5s location_zipcode 22
25 | ynt4-n6g9 location_zipcode 22
26 | effb-uspk zip 11
27 | 4kc2-vxvv location_zipcode 22
28 | hkne-4xqd location_zipcode 22
29 |
--------------------------------------------------------------------------------
/type_detection/ijson/__init__.py:
--------------------------------------------------------------------------------
1 | '''
2 | Iterative JSON parser.
3 |
4 | Main API:
5 |
6 | - ``ijson.parse``: iterator returning parsing events with the object tree context,
7 | see ``ijson.common.parse`` for docs.
8 |
9 | - ``ijson.items``: iterator returning Python objects found under a specified prefix,
10 | see ``ijson.common.items`` for docs.
11 |
12 | Top-level ``ijson`` module tries to automatically find and import a suitable
13 | parsing backend. You can also explicitly import a required backend from
14 | ``ijson.backends``.
15 | '''
16 |
17 | from ijson.common import JSONError, IncompleteJSONError, ObjectBuilder
18 | from ijson.backends import YAJLImportError
19 |
20 | try:
21 | import ijson.backends.yajl2 as backend
22 | except YAJLImportError:
23 | try:
24 | import ijson.backends.yajl as backend
25 | except YAJLImportError:
26 | import ijson.backends.python as backend
27 |
28 |
29 | basic_parse = backend.basic_parse
30 | parse = backend.parse
31 | items = backend.items
32 |
--------------------------------------------------------------------------------
/download/ids/madison_ids.txt:
--------------------------------------------------------------------------------
1 | 4ng4-nf3c
2 | u7ns-6d4x
3 | 4gss-84dk
4 | 3kgn-2bpa
5 | 99g9-p6ki
6 | d686-rvcw
7 | wwtc-pw9p
8 | gxhk-44q9
9 | rtyh-6ucr
10 | spu7-hym6
11 | p4au-pwd2
12 | kfv2-f9ss
13 | svr6-6gvb
14 | f5sy-kcer
15 | iig4-49xp
16 | hb5z-buaz
17 | fvxz-66tr
18 | t5vc-2fm7
19 | 7dbz-yi8h
20 | 9u47-9h3u
21 | 6ym2-385s
22 | b7xj-5uyg
23 | f4km-tx65
24 | vf3w-yibt
25 | hwdm-jhzj
26 | t89i-9tka
27 | geuk-tayq
28 | 32m2-fqa2
29 | efhs-2ube
30 | 2tcz-87nc
31 | jbpd-4xxj
32 | tbc5-gynu
33 | 2a9g-qge2
34 | cq85-dipd
35 | 7shu-mkhv
36 | kbjz-kaud
37 | qcfn-n3we
38 | gqa8-dxgc
39 | 9wf4-wytn
40 | pveg-u4zq
41 | ipd7-scz8
42 | j4t2-fn7a
43 | jvs7-37vw
44 | 38c6-a8m4
45 | 3a6w-jfnq
46 | r98k-9799
47 | hi83-zfb3
48 | r7yp-j3t7
49 | mser-b9tq
50 | 3syz-mw6z
51 | s9b5-pi49
52 | 5eh3-n3ms
53 | bgrv-wya2
54 | mz4p-68jd
55 | wz7i-taa5
56 | vb36-v77y
57 | t6n5-id96
58 | q5sg-7k3x
59 | 4kgp-uj2z
60 | 84vr-dpbk
61 | miyr-ap4j
62 | qdb8-htgr
63 | b8bg-px3e
64 | kfv3-7qjn
65 | pi5r-e26i
66 | wf2u-ezp8
67 | m8qu-5gbp
68 | r6nk-wjfh
69 |
--------------------------------------------------------------------------------
/extract_zipcode_latlon/ijson/__init__.py:
--------------------------------------------------------------------------------
1 | '''
2 | Iterative JSON parser.
3 |
4 | Main API:
5 |
6 | - ``ijson.parse``: iterator returning parsing events with the object tree context,
7 | see ``ijson.common.parse`` for docs.
8 |
9 | - ``ijson.items``: iterator returning Python objects found under a specified prefix,
10 | see ``ijson.common.items`` for docs.
11 |
12 | Top-level ``ijson`` module tries to automatically find and import a suitable
13 | parsing backend. You can also explicitly import a required backend from
14 | ``ijson.backends``.
15 | '''
16 |
17 | from ijson.common import JSONError, IncompleteJSONError, ObjectBuilder
18 | from ijson.backends import YAJLImportError
19 |
20 | try:
21 | import ijson.backends.yajl2 as backend
22 | except YAJLImportError:
23 | try:
24 | import ijson.backends.yajl as backend
25 | except YAJLImportError:
26 | import ijson.backends.python as backend
27 |
28 |
29 | basic_parse = backend.basic_parse
30 | parse = backend.parse
31 | items = backend.items
32 |
--------------------------------------------------------------------------------
/type_detection/ids/madison_ids.txt:
--------------------------------------------------------------------------------
1 | 4ng4-nf3c
2 | u7ns-6d4x
3 | 4gss-84dk
4 | 3kgn-2bpa
5 | 99g9-p6ki
6 | d686-rvcw
7 | wwtc-pw9p
8 | gxhk-44q9
9 | rtyh-6ucr
10 | spu7-hym6
11 | p4au-pwd2
12 | kfv2-f9ss
13 | svr6-6gvb
14 | f5sy-kcer
15 | iig4-49xp
16 | hb5z-buaz
17 | fvxz-66tr
18 | t5vc-2fm7
19 | 7dbz-yi8h
20 | 9u47-9h3u
21 | 6ym2-385s
22 | b7xj-5uyg
23 | f4km-tx65
24 | vf3w-yibt
25 | hwdm-jhzj
26 | t89i-9tka
27 | geuk-tayq
28 | 32m2-fqa2
29 | efhs-2ube
30 | 2tcz-87nc
31 | jbpd-4xxj
32 | tbc5-gynu
33 | 2a9g-qge2
34 | cq85-dipd
35 | 7shu-mkhv
36 | kbjz-kaud
37 | qcfn-n3we
38 | gqa8-dxgc
39 | 9wf4-wytn
40 | pveg-u4zq
41 | ipd7-scz8
42 | j4t2-fn7a
43 | jvs7-37vw
44 | 38c6-a8m4
45 | 3a6w-jfnq
46 | r98k-9799
47 | hi83-zfb3
48 | r7yp-j3t7
49 | mser-b9tq
50 | 3syz-mw6z
51 | s9b5-pi49
52 | 5eh3-n3ms
53 | bgrv-wya2
54 | mz4p-68jd
55 | wz7i-taa5
56 | vb36-v77y
57 | t6n5-id96
58 | q5sg-7k3x
59 | 4kgp-uj2z
60 | 84vr-dpbk
61 | miyr-ap4j
62 | qdb8-htgr
63 | b8bg-px3e
64 | kfv3-7qjn
65 | pi5r-e26i
66 | wf2u-ezp8
67 | m8qu-5gbp
68 | r6nk-wjfh
69 |
--------------------------------------------------------------------------------
/tagcloud/tagcloud.r:
--------------------------------------------------------------------------------
1 | require("tm");
2 | require("wordcloud");
3 | input <- commandArgs(trailingOnly = TRUE);
4 | min_scale = 1;
5 | max_scale = 10;
6 | max_word = 700;
7 | if (length(input) != 1)
8 | {
9 | max_word = as.numeric(input[2]);
10 | min_scale = as.numeric(input[3]);
11 | max_scale = as.numeric(input[4]);
12 | }
13 | print(min_scale);
14 | print(max_word);
15 | #tags <- Corpus (DirSource(input));
16 | tags <- Corpus(VectorSource(readLines(input[1])));
17 | tags <- tm_map(tags, stripWhitespace);
18 | tags <- tm_map(tags, tolower);
19 | tags <- tm_map(tags, removeWords, stopwords("english"));
20 | skipwords <- as.character(readLines("data/skipwords.txt"));
21 | tags <- tm_map(tags, removeWords, skipwords);
22 | #par(mfrow=c(3,1))
23 | #wordcloud(tags, scale=c(10,0.3), max.words=5000, random.order=FALSE, rot.per=0.35, use.r.layout=TRUE, colors=brewer.pal(8, "Dark2"));
24 | wordcloud(tags, scale=c(max_scale,min_scale), max.words=max_word, random.order=FALSE, rot.per=0.35, use.r.layout=TRUE, colors=brewer.pal(8, "Dark2"));
25 |
--------------------------------------------------------------------------------
/download/ids/weatherford_ids.txt:
--------------------------------------------------------------------------------
1 | hybg-vty2
2 | ve8y-5avw
3 | 2ek5-qq7s
4 | 8bm3-mh2f
5 | j2k9-jf7m
6 | pi2r-w2wn
7 | scg7-wbcw
8 | rmvj-bpp5
9 | d6ka-5zdp
10 | memn-fv5t
11 | 4kr8-nw7w
12 | kyrg-v24v
13 | x7ik-kbby
14 | e68q-zdjm
15 | bu65-w3ez
16 | 3n5h-hdsi
17 | bgjw-54en
18 | 3bed-i88z
19 | gpjj-upqz
20 | n4m9-h86u
21 | 6edd-iufq
22 | cfuq-zji7
23 | ny9a-t4pz
24 | fq45-73gh
25 | kwwr-agj9
26 | 5rrs-sgue
27 | 3aqf-4m7m
28 | qekk-5pfa
29 | 7tdp-p5kk
30 | s2dc-5w34
31 | 8bte-7cqp
32 | 3xxn-pcj5
33 | bg9q-v7x9
34 | gax8-krdx
35 | c4zr-3y7x
36 | dngh-t9qr
37 | cuz9-rcre
38 | vy7g-yivs
39 | xzd8-2b3e
40 | vhv4-pkrx
41 | hphz-3y4w
42 | t7t8-t82j
43 | a9tx-k4s7
44 | ysfs-8f2v
45 | u2u7-hf87
46 | t6i6-pnn2
47 | mvy8-6q2t
48 | uhdw-jeqx
49 | bpma-ut4v
50 | idj9-c9dm
51 | 88pd-2kqk
52 | rmsq-r7j2
53 | c68f-eup2
54 | 32ak-r84i
55 | q7xu-xtzf
56 | v6eu-rt9x
57 | 39gt-rxzc
58 | 5t7p-7njb
59 | cdyu-igpi
60 | 3usd-zinv
61 | ce3q-vytn
62 | v44e-g82x
63 | vrhc-6z87
64 | dy5q-p5dt
65 | k7b9-7zjb
66 | 648e-teft
67 | 8m3c-9aap
68 | catd-f4rf
69 | d5tp-wn69
70 | fgmy-jv95
71 | ivjg-v96d
72 | cb27-ccqz
73 |
--------------------------------------------------------------------------------
/type_detection/ids/weatherford_ids.txt:
--------------------------------------------------------------------------------
1 | hybg-vty2
2 | ve8y-5avw
3 | 2ek5-qq7s
4 | 8bm3-mh2f
5 | j2k9-jf7m
6 | pi2r-w2wn
7 | scg7-wbcw
8 | rmvj-bpp5
9 | d6ka-5zdp
10 | memn-fv5t
11 | 4kr8-nw7w
12 | kyrg-v24v
13 | x7ik-kbby
14 | e68q-zdjm
15 | bu65-w3ez
16 | 3n5h-hdsi
17 | bgjw-54en
18 | 3bed-i88z
19 | gpjj-upqz
20 | n4m9-h86u
21 | 6edd-iufq
22 | cfuq-zji7
23 | ny9a-t4pz
24 | fq45-73gh
25 | kwwr-agj9
26 | 5rrs-sgue
27 | 3aqf-4m7m
28 | qekk-5pfa
29 | 7tdp-p5kk
30 | s2dc-5w34
31 | 8bte-7cqp
32 | 3xxn-pcj5
33 | bg9q-v7x9
34 | gax8-krdx
35 | c4zr-3y7x
36 | dngh-t9qr
37 | cuz9-rcre
38 | vy7g-yivs
39 | xzd8-2b3e
40 | vhv4-pkrx
41 | hphz-3y4w
42 | t7t8-t82j
43 | a9tx-k4s7
44 | ysfs-8f2v
45 | u2u7-hf87
46 | t6i6-pnn2
47 | mvy8-6q2t
48 | uhdw-jeqx
49 | bpma-ut4v
50 | idj9-c9dm
51 | 88pd-2kqk
52 | rmsq-r7j2
53 | c68f-eup2
54 | 32ak-r84i
55 | q7xu-xtzf
56 | v6eu-rt9x
57 | 39gt-rxzc
58 | 5t7p-7njb
59 | cdyu-igpi
60 | 3usd-zinv
61 | ce3q-vytn
62 | v44e-g82x
63 | vrhc-6z87
64 | dy5q-p5dt
65 | k7b9-7zjb
66 | 648e-teft
67 | 8m3c-9aap
68 | catd-f4rf
69 | d5tp-wn69
70 | fgmy-jv95
71 | ivjg-v96d
72 | cb27-ccqz
73 |
--------------------------------------------------------------------------------
/download/ids/oaklandnet_ids.txt:
--------------------------------------------------------------------------------
1 | kzer-wcj5
2 | ym6k-rx7a
3 | qyh9-i9dw
4 | fw6y-ui8e
5 | hfn8-32wd
6 | 6nxw-pzj5
7 | e4gx-8458
8 | ajaj-fa72
9 | uq9e-ncfu
10 | uyih-vzuc
11 | kq8i-6bzk
12 | qezs-bkz9
13 | 7dcq-8atp
14 | dutj-j949
15 | sduu-bfki
16 | muvj-xztc
17 | va73-j3gz
18 | 3y2t-a5mc
19 | t35d-4vyj
20 | x678-6ymc
21 | kx4s-uqgi
22 | j4xf-2t25
23 | dxdg-872h
24 | creu-dzki
25 | 4jcx-enxf
26 | b8mb-8tti
27 | quth-gb8e
28 | wm75-yhqe
29 | 65yj-mc7w
30 | erq5-ht9e
31 | aahx-6i3p
32 | h2rc-b7xm
33 | dnd6-8ry2
34 | ncmw-m42x
35 | 58ik-33wk
36 | wakt-xmha
37 | sr5q-rm7d
38 | 56xf-w7yc
39 | yra4-ynr5
40 | un3r-mf7q
41 | j9qk-t2ht
42 | trbj-7f28
43 | pvzf-dbpc
44 | vrkv-jmjc
45 | b9mi-cs4z
46 | hqcd-z3hu
47 | spgt-auvy
48 | rbqz-eaj4
49 | 8jcq-6ucy
50 | 7quj-zssa
51 | tt6i-5mkh
52 | az7b-di6w
53 | fzzu-umm5
54 | 67wz-betr
55 | i2cv-32w5
56 | vpjp-6gdf
57 | j4eu-nx3y
58 | 4rrq-475h
59 | 3bum-78vz
60 | 4k8k-rw55
61 | 5afy-hx65
62 | 7u2h-e4rx
63 | c9h9-wdx3
64 | dcit-4sk8
65 | g4ft-bk9f
66 | guag-xf4x
67 | hxu6-rrid
68 | kezn-d3a8
69 | qfcb-d6ux
70 | tcde-a2rg
71 | wau4-95ys
72 | y9sn-rk9p
73 | vyhb-nqtw
74 | fvtg-s7gp
75 | qsv2-89sf
76 | udg4-vz9p
77 | 68fg-z9fi
78 | 4yez-5h4p
79 | vi6t-i2f3
80 | 6qwi-azmw
81 | geib-kan6
82 | ejsa-p6i4
83 | kzkk-c7a4
84 |
--------------------------------------------------------------------------------
/type_detection/ids/oaklandnet_ids.txt:
--------------------------------------------------------------------------------
1 | kzer-wcj5
2 | ym6k-rx7a
3 | qyh9-i9dw
4 | fw6y-ui8e
5 | hfn8-32wd
6 | 6nxw-pzj5
7 | e4gx-8458
8 | ajaj-fa72
9 | uq9e-ncfu
10 | uyih-vzuc
11 | kq8i-6bzk
12 | qezs-bkz9
13 | 7dcq-8atp
14 | dutj-j949
15 | sduu-bfki
16 | muvj-xztc
17 | va73-j3gz
18 | 3y2t-a5mc
19 | t35d-4vyj
20 | x678-6ymc
21 | kx4s-uqgi
22 | j4xf-2t25
23 | dxdg-872h
24 | creu-dzki
25 | 4jcx-enxf
26 | b8mb-8tti
27 | quth-gb8e
28 | wm75-yhqe
29 | 65yj-mc7w
30 | erq5-ht9e
31 | aahx-6i3p
32 | h2rc-b7xm
33 | dnd6-8ry2
34 | ncmw-m42x
35 | 58ik-33wk
36 | wakt-xmha
37 | sr5q-rm7d
38 | 56xf-w7yc
39 | yra4-ynr5
40 | un3r-mf7q
41 | j9qk-t2ht
42 | trbj-7f28
43 | pvzf-dbpc
44 | vrkv-jmjc
45 | b9mi-cs4z
46 | hqcd-z3hu
47 | spgt-auvy
48 | rbqz-eaj4
49 | 8jcq-6ucy
50 | 7quj-zssa
51 | tt6i-5mkh
52 | az7b-di6w
53 | fzzu-umm5
54 | 67wz-betr
55 | i2cv-32w5
56 | vpjp-6gdf
57 | j4eu-nx3y
58 | 4rrq-475h
59 | 3bum-78vz
60 | 4k8k-rw55
61 | 5afy-hx65
62 | 7u2h-e4rx
63 | c9h9-wdx3
64 | dcit-4sk8
65 | g4ft-bk9f
66 | guag-xf4x
67 | hxu6-rrid
68 | kezn-d3a8
69 | qfcb-d6ux
70 | tcde-a2rg
71 | wau4-95ys
72 | y9sn-rk9p
73 | vyhb-nqtw
74 | fvtg-s7gp
75 | qsv2-89sf
76 | udg4-vz9p
77 | 68fg-z9fi
78 | 4yez-5h4p
79 | vi6t-i2f3
80 | 6qwi-azmw
81 | geib-kan6
82 | ejsa-p6i4
83 | kzkk-c7a4
84 |
--------------------------------------------------------------------------------
/download/ids/somervillema_ids.txt:
--------------------------------------------------------------------------------
1 | tp6j-gpfj
2 | 7u5v-yw4j
3 | kja3-3jiv
4 | pjhx-dusc
5 | 9cwr-3jjr
6 | 5peg-3mcc
7 | 8y4j-ucsg
8 | 6x93-dy4s
9 | sebz-uihb
10 | hwvc-m8fm
11 | dtkn-fv7f
12 | dqe2-eu72
13 | u6u9-gmux
14 | vfqx-2vkk
15 | vcmv-r7ky
16 | 9p7s-uyz7
17 | ssw2-4kcp
18 | 5qt4-dip4
19 | iye3-wp6v
20 | 8g7d-pg76
21 | q5g3-jif5
22 | mny4-tj6m
23 | vpdq-svp4
24 | 2y56-m77e
25 | 9gy9-2p5u
26 | j545-qb59
27 | w5r4-iy52
28 | 5pvr-cpn3
29 | 3qwf-fgnz
30 | 635v-aavc
31 | 8x35-9ng3
32 | 9uzy-4h8m
33 | 2g5h-2e8r
34 | kdby-j7rs
35 | tkit-6b73
36 | ty6m-bn6q
37 | vw3t-2xhg
38 | 4uyb-gfsm
39 | it9v-824j
40 | i427-734p
41 | p44d-dqzq
42 | w62m-jxtq
43 | qr7g-u54h
44 | dcp6-gcay
45 | m7ah-26yy
46 | j95z-kira
47 | tgqx-pv5x
48 | cpci-gw44
49 | caa8-adi3
50 | jyc2-yxnj
51 | 7w39-s85f
52 | quyn-7i4y
53 | szji-58dd
54 | 8na6-jytu
55 | htg6-e8ia
56 | 33fn-xnzu
57 | x332-bdd7
58 | pgsb-2rr6
59 | ckr3-jqgv
60 | c2xz-m2g7
61 | 6ssq-xzqu
62 | rb6v-e8zn
63 | wppa-gx6f
64 | hr39-b39y
65 | 69vm-7n6w
66 | 6pwe-s49m
67 | xjt9-vc89
68 | 7g35-yebv
69 | r9cu-f8pg
70 | ypxj-qtcw
71 | 4n2x-t8ew
72 | 3tkv-xx4f
73 | a6cq-eqmq
74 | qkid-icys
75 | tfzf-bzmb
76 | a2xm-guu9
77 | wqq6-wyhr
78 | ecmw-4hgh
79 | 97w8-xar9
80 | rzd3-6sat
81 | 4wyh-gtfb
82 | qa92-wva4
83 | 4xpt-vaa8
84 | 37ik-ii75
85 | j38e-t8aq
86 |
--------------------------------------------------------------------------------
/type_detection/ids/somervillema_ids.txt:
--------------------------------------------------------------------------------
1 | tp6j-gpfj
2 | 7u5v-yw4j
3 | kja3-3jiv
4 | pjhx-dusc
5 | 9cwr-3jjr
6 | 5peg-3mcc
7 | 8y4j-ucsg
8 | 6x93-dy4s
9 | sebz-uihb
10 | hwvc-m8fm
11 | dtkn-fv7f
12 | dqe2-eu72
13 | u6u9-gmux
14 | vfqx-2vkk
15 | vcmv-r7ky
16 | 9p7s-uyz7
17 | ssw2-4kcp
18 | 5qt4-dip4
19 | iye3-wp6v
20 | 8g7d-pg76
21 | q5g3-jif5
22 | mny4-tj6m
23 | vpdq-svp4
24 | 2y56-m77e
25 | 9gy9-2p5u
26 | j545-qb59
27 | w5r4-iy52
28 | 5pvr-cpn3
29 | 3qwf-fgnz
30 | 635v-aavc
31 | 8x35-9ng3
32 | 9uzy-4h8m
33 | 2g5h-2e8r
34 | kdby-j7rs
35 | tkit-6b73
36 | ty6m-bn6q
37 | vw3t-2xhg
38 | 4uyb-gfsm
39 | it9v-824j
40 | i427-734p
41 | p44d-dqzq
42 | w62m-jxtq
43 | qr7g-u54h
44 | dcp6-gcay
45 | m7ah-26yy
46 | j95z-kira
47 | tgqx-pv5x
48 | cpci-gw44
49 | caa8-adi3
50 | jyc2-yxnj
51 | 7w39-s85f
52 | quyn-7i4y
53 | szji-58dd
54 | 8na6-jytu
55 | htg6-e8ia
56 | 33fn-xnzu
57 | x332-bdd7
58 | pgsb-2rr6
59 | ckr3-jqgv
60 | c2xz-m2g7
61 | 6ssq-xzqu
62 | rb6v-e8zn
63 | wppa-gx6f
64 | hr39-b39y
65 | 69vm-7n6w
66 | 6pwe-s49m
67 | xjt9-vc89
68 | 7g35-yebv
69 | r9cu-f8pg
70 | ypxj-qtcw
71 | 4n2x-t8ew
72 | 3tkv-xx4f
73 | a6cq-eqmq
74 | qkid-icys
75 | tfzf-bzmb
76 | a2xm-guu9
77 | wqq6-wyhr
78 | ecmw-4hgh
79 | 97w8-xar9
80 | rzd3-6sat
81 | 4wyh-gtfb
82 | qa92-wva4
83 | 4xpt-vaa8
84 | 37ik-ii75
85 | j38e-t8aq
86 |
--------------------------------------------------------------------------------
/extract_zipcode_latlon/index/baltimore_zipcode_index.txt:
--------------------------------------------------------------------------------
1 | 53u2-uece zipcode 1
2 | rep9-vf9x zip 6
3 | cpd3-yi9b zipcode 1
4 | auqk-g78m addrzip 17
5 | 5vwi-a39d addrzip 17
6 | fexy-c3hs zip 6
7 | 98gi-te7t zipcode 2
8 | h774-6wsz zip 6
9 | szmc-i2rj zip 6
10 | npj6-ttes zip 6
11 | kjyz-iyjf zip 6
12 | wasd-qc7e zipcode 1
13 | g9ck-7zns zipcode 2
14 | r4ur-u5nm zipcode 3
15 | u7bw-gha5 zipcode 3
16 | 4d7j-z8em zip 6
17 | btz4-brkj zip 6
18 | 8hgq-9pi6 zipcode 1
19 | 9agw-sxsr zip 6
20 | tpag-zk4d zipcode 8
21 | cpxf-kxp3 zipcode 1
22 | nf24-syy3 addrzip 17
23 | zdgj-m9f8 addrzip 11
24 | uds6-qsb6 zip 6
25 | v77m-e78p zip 5
26 | vvxf-wiyc zipcode 3
27 | xv8d-bwgi addrzip 17
28 | tgtv-wr5u zipcode 1
29 | ik5a-kimj addrzip 17
30 | q974-nn4i zip 6
31 | 6kkw-bck6 zipcode 2
32 | x3dq-8uhg zip 6
33 | uuwk-975y zipcode 2
34 | jhbg-n8w2 zipcode 3
35 | 3ah4-gcgf zipcode 2
36 | g244-i383 zipcode 1
37 | dmje-2r3h zip 6
38 | k4km-9d4r zipcode 2
39 | kbdc-bpw3 zipcode 2
40 | 253h-2qmt zip 6
41 | q2vm-e9dp zipcode 1
42 | wdpa-2rxb addrzip 17
43 | h77s-araf zipcode 2
44 | us2p-bijb zipcode 3
45 | rzct-w9hm zip 3
46 | 35wi-jfre addrzip 17
47 | bcxw-m234 addrzip 11
48 | 2kb9-5zeh zip 6
49 | 2js8-vxjk zip 6
50 | bin3-c64n zip 6
51 | 53js-3bkd zipcode 1
52 | eehw-fgh8 zip 6
53 | ejc5-uinh addrzip 17
54 | k5ry-ef3g zipcode 1
55 | yc75-xbrv zipcode 2
56 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | Open Data Analysis
2 | =================
3 | Please read README.txt in each directory to find the instruction to run the source code.
4 | Below is the brief description of each directory:
5 | ## download
6 | * Download datasets in JSON format. (Shell script)
7 |
8 | ## schema_similarity
9 | * Compute the similarity between schemata. (Python)
10 |
11 | ## matrix_heatmap
12 | * (Figure 8) Generate matrix heatmap using schema similarity scores. (Java Script)
13 |
14 | ## metadata
15 | * Retrieve metadata including tags, schema, description using Socrata APIs. (Python)
16 |
17 | # tagcloud
18 | * (Figure 5) Generate tag cloud using tags associated with the dataset. (R)
19 | * Require result from metadata
20 |
21 | ## type_detection
22 | * Detect attribute type (Python)
23 |
24 | ## barchart (Require result from type_detection)
25 | * (Figure 9) Generate a barchart of data type ratio across cities. (Python)
26 |
27 | ## extract_zipcode_latlon
28 | * Read result from type_detection and extract all lat/lon, zipcode values. (Python)
29 | * Require result from type_detection
30 |
31 | ## latlon_to_zipcode
32 | * Convert lat/long to zipcode. (C++)
33 |
34 | ## heatmap
35 | * (Figure 12) Generate a heat map of geographical coverage based on zip code values in NYC and Chicago. (Python)
36 | * Require result from extract_zipcode_latlon and latlon_to_zipcode
37 |
--------------------------------------------------------------------------------
/matrix_heatmap/boston.html:
--------------------------------------------------------------------------------
1 |
3 |
4 |
5 |
54 |
--------------------------------------------------------------------------------
/matrix_heatmap/nyc_no311.html:
--------------------------------------------------------------------------------
1 |
3 |
4 |
5 |
54 |
--------------------------------------------------------------------------------
/download/ids/boston_ids.txt:
--------------------------------------------------------------------------------
1 | awu8-dc52
2 | rtbk-4hc4
3 | 7cdf-6fgx
4 | c3yg-bknc
5 | qndu-wx8w
6 | effb-uspk
7 | csea-5edd
8 | uqjh-rsbj
9 | j2a7-cdyk
10 | gb6y-34cq
11 | 5b2m-jtw4
12 | dtsk-jcvs
13 | r3qt-vrtj
14 | vrhg-954w
15 | w6u4-3pp8
16 | e29s-ympv
17 | 7wt6-9hdh
18 | mbdv-4g6k
19 | 3a6m-dwve
20 | enuq-8kmn
21 | c7cz-29ak
22 | msk6-43c6
23 | gqai-h7bg
24 | rtqb-8pht
25 | 3j3e-cr8p
26 | vjvb-2kg6
27 | 6yws-tqu3
28 | rzdm-34k2
29 | 23yb-cufe
30 | byxy-288e
31 | cr3i-jj7v
32 | mwxg-8ix6
33 | c7cs-bcq5
34 | ehda-cg39
35 | ekiy-2qmz
36 | 7xqx-zy2t
37 | 4vcu-nshu
38 | thm8-kfjj
39 | cich-iivi
40 | f4ev-s6tx
41 | 8igg-7sbf
42 | c7vc-ep7b
43 | qz58-xbtz
44 | 46f7-2snz
45 | snj3-z8hh
46 | 2tib-uhic
47 | 9tfg-3jic
48 | qqwn-zzmv
49 | 984s-h86s
50 | pmdu-upcu
51 | wv26-euyd
52 | efzp-pcmd
53 | 7ygz-72yc
54 | 742w-2qkx
55 | p9yd-36dn
56 | rww2-sqpe
57 | rvw3-dget
58 | sb5j-x59u
59 | xqmb-ucdr
60 | sgf2-btru
61 | ipwb-93aq
62 | cxb7-aa9j
63 | d5jd-s3az
64 | dp5b-mgir
65 | ciur-a7cc
66 | udwx-qxp4
67 | dvjg-bv4z
68 | axcy-y39t
69 | vivu-bt5s
70 | 7idu-4tds
71 | eymz-pqcb
72 | h64y-whx9
73 | idz9-gdbc
74 | i59n-zrgu
75 | esg7-pz3n
76 | euq9-fuzw
77 | q4vk-zgiq
78 | r5wd-vzpa
79 | su77-pn2k
80 | 4kc2-vxvv
81 | 9yb5-8pvg
82 | dtud-qyw9
83 | f6t4-vve7
84 | gfvf-83vt
85 | hkne-4xqd
86 | k9pj-rna9
87 | pvrp-csfj
88 | v6fi-4hdu
89 | yfam-b7bg
90 | ynt4-n6g9
91 | 52xb-ggdw
92 | j7zj-cq7e
93 | tvzm-wwrp
94 | vwgc-k7be
95 | t85d-b449
96 | 89gv-qm3p
97 | uwfh-jrgi
98 | x8in-twjt
99 | 6uv6-kxqp
100 | viyp-qdei
101 | b6jk-5x5h
102 | tma6-pdxu
103 | bcnb-bux2
104 | tvvb-g9ni
105 | 354i-aiec
106 | xkfj-zz8i
107 | 7wih-gq3k
108 | fhku-uixf
109 | 8sq6-p7et
110 | 755x-x44q
111 | evkj-7j3w
112 | 9j5j-ped2
113 | av6t-57nx
114 | wivc-syw7
115 | api6-u3fp
116 | fb8c-dnd3
117 | wqbg-exmn
118 | 9rag-2mng
119 | sb57-rjn9
120 | cd7h-u9nu
121 | krii-vyri
122 |
--------------------------------------------------------------------------------
/type_detection/ids/boston_ids.txt:
--------------------------------------------------------------------------------
1 | awu8-dc52
2 | rtbk-4hc4
3 | 7cdf-6fgx
4 | c3yg-bknc
5 | qndu-wx8w
6 | effb-uspk
7 | csea-5edd
8 | uqjh-rsbj
9 | j2a7-cdyk
10 | gb6y-34cq
11 | 5b2m-jtw4
12 | dtsk-jcvs
13 | r3qt-vrtj
14 | vrhg-954w
15 | w6u4-3pp8
16 | e29s-ympv
17 | 7wt6-9hdh
18 | mbdv-4g6k
19 | 3a6m-dwve
20 | enuq-8kmn
21 | c7cz-29ak
22 | msk6-43c6
23 | gqai-h7bg
24 | rtqb-8pht
25 | 3j3e-cr8p
26 | vjvb-2kg6
27 | 6yws-tqu3
28 | rzdm-34k2
29 | 23yb-cufe
30 | byxy-288e
31 | cr3i-jj7v
32 | mwxg-8ix6
33 | c7cs-bcq5
34 | ehda-cg39
35 | ekiy-2qmz
36 | 7xqx-zy2t
37 | 4vcu-nshu
38 | thm8-kfjj
39 | cich-iivi
40 | f4ev-s6tx
41 | 8igg-7sbf
42 | c7vc-ep7b
43 | qz58-xbtz
44 | 46f7-2snz
45 | snj3-z8hh
46 | 2tib-uhic
47 | 9tfg-3jic
48 | qqwn-zzmv
49 | 984s-h86s
50 | pmdu-upcu
51 | wv26-euyd
52 | efzp-pcmd
53 | 7ygz-72yc
54 | 742w-2qkx
55 | p9yd-36dn
56 | rww2-sqpe
57 | rvw3-dget
58 | sb5j-x59u
59 | xqmb-ucdr
60 | sgf2-btru
61 | ipwb-93aq
62 | cxb7-aa9j
63 | d5jd-s3az
64 | dp5b-mgir
65 | ciur-a7cc
66 | udwx-qxp4
67 | dvjg-bv4z
68 | axcy-y39t
69 | vivu-bt5s
70 | 7idu-4tds
71 | eymz-pqcb
72 | h64y-whx9
73 | idz9-gdbc
74 | i59n-zrgu
75 | esg7-pz3n
76 | euq9-fuzw
77 | q4vk-zgiq
78 | r5wd-vzpa
79 | su77-pn2k
80 | 4kc2-vxvv
81 | 9yb5-8pvg
82 | dtud-qyw9
83 | f6t4-vve7
84 | gfvf-83vt
85 | hkne-4xqd
86 | k9pj-rna9
87 | pvrp-csfj
88 | v6fi-4hdu
89 | yfam-b7bg
90 | ynt4-n6g9
91 | 52xb-ggdw
92 | j7zj-cq7e
93 | tvzm-wwrp
94 | vwgc-k7be
95 | t85d-b449
96 | 89gv-qm3p
97 | uwfh-jrgi
98 | x8in-twjt
99 | 6uv6-kxqp
100 | viyp-qdei
101 | b6jk-5x5h
102 | tma6-pdxu
103 | bcnb-bux2
104 | tvvb-g9ni
105 | 354i-aiec
106 | xkfj-zz8i
107 | 7wih-gq3k
108 | fhku-uixf
109 | 8sq6-p7et
110 | 755x-x44q
111 | evkj-7j3w
112 | 9j5j-ped2
113 | av6t-57nx
114 | wivc-syw7
115 | api6-u3fp
116 | fb8c-dnd3
117 | wqbg-exmn
118 | 9rag-2mng
119 | sb57-rjn9
120 | cd7h-u9nu
121 | krii-vyri
122 |
--------------------------------------------------------------------------------
/extract_zipcode_latlon/index/chicago_latlon_index.txt:
--------------------------------------------------------------------------------
1 | ys2m-44iv latitude 14 longitude 15
2 | tdab-kixi latitude 6 longitude 7
3 | hwmb-iu8j latitude 12 longitude 13 location 14
4 | qnrb-dui6 latitude 16 longitude 17
5 | mh59-yhwm latitude 15 longitude 16
6 | n4j6-wkkf start_lon 8 _lif_lat 9 _lit_lon 10 _lit_lat 11
7 | a95h-gwzm latitude 19 longitude 20
8 | 7wwb-3vgm latitude 15 longitude 16
9 | hu6v-hsqb latitude 10 longitude 11
10 | pa8e-mhbh lat 6 lon 7
11 | c6au-zpvv latitude 17 longitude 18
12 | 7rz2-h8u9 latitude 10 longitude 11
13 | cbyb-69xx latitude 6 longitude 7 f12 9 f13 10
14 | kmt9-pg57 latitude 9 longitude 10
15 | pfsx-4n4m latitude 6 longitude 7
16 | q3z3-udcz latitude 12 longitude 13
17 | qqw2-hwkh latitude 15 longitude 16
18 | awnt-66py latitude 15 longitude 16
19 | 4ndg-wq3w latitude 6 longitude 7
20 | hx8q-mf9v latitude 18 longitude 20
21 | xa2r-bcfc latitude 15 longitude 16
22 | c4ep-ee5m latitude 15 longitude 16
23 | 4ywc-hr3a latitude 6 longitude 7 f12 9 f13 10
24 | mw4h-s8xu longitude 6 latitude 7
25 | 4x56-dvnp latitude 9 longitude 10
26 | q4de-h6yq latitude 15 longitude 16
27 | 58td-isfp latitude 10 longitude 11
28 | dfnk-7re6 latitude 14 longitude 15
29 | pf56-35rv latitude 6 longitude 7
30 | 37g7-p8eh latitude 9 longitude 10
31 | t2qc-9pjd _west 2 _east 3 _south 4 _north 5
32 | 5cq6-qygt longitude 6 latitude 7
33 | 2u2y-n6dm location 12
34 | zjqd-uvky latitude 15 longitude 16
35 | vfd5-f3kt latitude 9 longitude 10
36 | 9i8j-865n latitude 10 longitude 11
37 | 8v97-unyc latitude 15 longitude 16
38 | i6k7-i6md latitude 15 longitude 16
39 | habu-n236 latitude 6 longitude 7
40 | bj7p-98q2 latitude 17 longitude 18
41 | zgvr-7yfd latitude 9 longitude 10
42 | x2n5-8w5q latitude 14 longitude 15
43 | vaxn-3ims latitude 15 longitude 16
44 | u23m-pa73 latitude 12 longitude 13
45 | 4guy-sfss latitude 15 longitude 16
46 | atzs-u7pv latitude 10 longitude 11
47 | i8y3-ytj4 latitude 10 longitude 11
48 | ag7u-gr9m lat 6 lon 7
49 | g9qy-h66j latitude 15 longitude 16
50 |
--------------------------------------------------------------------------------
/type_detection/ijson/utils.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | from functools import wraps
3 |
4 |
5 | def coroutine(func):
6 | '''
7 | Wraps a generator which intended to be used as a pure coroutine by
8 | .send()ing it values. The only thing that the wrapper does is calling
9 | .next() for the first time which is required by Python generator protocol.
10 | '''
11 | @wraps(func)
12 | def wrapper(*args, **kwargs):
13 | g = func(*args, **kwargs)
14 | next(g)
15 | return g
16 | return wrapper
17 |
18 | @coroutine
19 | def foreach(coroutine_func):
20 | '''
21 | Dispatches each JSON array item to a handler coroutine. A coroutine is
22 | created anew for each item by calling `coroutine_func` callable. The
23 | resulting coroutine should accept value in the form of tuple of values
24 | generated by rich JSON parser: (prefix, event, value).
25 |
26 | First event received by foreach should be a "start_array" event.
27 | '''
28 | g = None
29 | base, event, value = yield
30 | if event != 'start_array':
31 | raise Exception('foreach requires "start_array" as the first event, got %s' % repr((base, event, value)))
32 | START_EVENTS = set(['start_map', 'start_array', 'null', 'boolean', 'number', 'string'])
33 | itemprefix = base + '.item' if base else 'item'
34 | while True:
35 | prefix, event, value = yield
36 | if prefix == itemprefix and event in START_EVENTS:
37 | g = coroutine_func()
38 | if (prefix, event) != (base, 'end_array'):
39 | g.send((prefix, event, value))
40 |
41 | @coroutine
42 | def dispatcher(targets):
43 | '''
44 | Dispatches JSON parser events into several handlers depending on event
45 | prefixes.
46 |
47 | Accepts a list of tuples (base_prefix, coroutine). A coroutine then
48 | receives all the events with prefixes starting with its base_prefix.
49 | '''
50 | while True:
51 | prefix, event, value = yield
52 | for base, target in targets:
53 | if prefix.startswith(base):
54 | target.send((prefix, event, value))
55 | break
56 |
--------------------------------------------------------------------------------
/extract_zipcode_latlon/ijson/utils.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | from functools import wraps
3 |
4 |
5 | def coroutine(func):
6 | '''
7 | Wraps a generator which intended to be used as a pure coroutine by
8 | .send()ing it values. The only thing that the wrapper does is calling
9 | .next() for the first time which is required by Python generator protocol.
10 | '''
11 | @wraps(func)
12 | def wrapper(*args, **kwargs):
13 | g = func(*args, **kwargs)
14 | next(g)
15 | return g
16 | return wrapper
17 |
18 | @coroutine
19 | def foreach(coroutine_func):
20 | '''
21 | Dispatches each JSON array item to a handler coroutine. A coroutine is
22 | created anew for each item by calling `coroutine_func` callable. The
23 | resulting coroutine should accept value in the form of tuple of values
24 | generated by rich JSON parser: (prefix, event, value).
25 |
26 | First event received by foreach should be a "start_array" event.
27 | '''
28 | g = None
29 | base, event, value = yield
30 | if event != 'start_array':
31 | raise Exception('foreach requires "start_array" as the first event, got %s' % repr((base, event, value)))
32 | START_EVENTS = set(['start_map', 'start_array', 'null', 'boolean', 'number', 'string'])
33 | itemprefix = base + '.item' if base else 'item'
34 | while True:
35 | prefix, event, value = yield
36 | if prefix == itemprefix and event in START_EVENTS:
37 | g = coroutine_func()
38 | if (prefix, event) != (base, 'end_array'):
39 | g.send((prefix, event, value))
40 |
41 | @coroutine
42 | def dispatcher(targets):
43 | '''
44 | Dispatches JSON parser events into several handlers depending on event
45 | prefixes.
46 |
47 | Accepts a list of tuples (base_prefix, coroutine). A coroutine then
48 | receives all the events with prefixes starting with its base_prefix.
49 | '''
50 | while True:
51 | prefix, event, value = yield
52 | for base, target in targets:
53 | if prefix.startswith(base):
54 | target.send((prefix, event, value))
55 | break
56 |
--------------------------------------------------------------------------------
/linechart/timeline.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | import sys
3 | from os import walk
4 | import re
5 | import datetime
6 | import os.path
7 |
8 | def add_date(filename, date2count):
9 | with open(filename) as lines:
10 | for line in lines:
11 | id, pdate, cdate = line.strip("\n").split("\t")
12 | date = datetime.datetime.fromtimestamp(int(pdate)) #only use publication date
13 | if date.month < 10:
14 | month = "0" + str(date.month)
15 | else:
16 | month = str(date.month)
17 | year_month = str(date.year) + month
18 | if year_month not in date2count:
19 | date2count[year_month] = 1
20 | else:
21 | date2count[year_month] += 1
22 | return date2count
23 |
24 | def prepare_data(path):
25 | date2count = {} #mapping between date and number of datasets
26 | for (dirpath, dirnames, filenames) in walk(path):
27 | for filename in filenames:
28 | if re.search("id_date", filename):
29 | print path + filename
30 | date2count = add_date(path + filename, date2count)
31 | break
32 | print date2count
33 | out = open("date2count.csv", "w")
34 | for date in date2count.keys():
35 | out.write(date + "\t" + str(date2count[date]) + "\n")
36 | out.close()
37 |
38 | def get_data(path):
39 | if not os.path.isfile("date2count.csv"):
40 | prepare_data(path)
41 |
42 | date_count = []
43 | with open("date2count.csv") as lines:
44 | for line in lines:
45 | ym, count = line.strip("\n").split("\t")
46 | date_count.append([ym, count])
47 | date_count.sort(key=lambda x: x[0])
48 | return date_count
49 |
50 | def main(argv):
51 | date_count = get_data("../metadata/data/")
52 | idx = 0
53 | dates = []
54 | area = []
55 | radius = []
56 | s = 0
57 | for (date, count) in date_count:
58 | idx += 1
59 | s += int(count)
60 | if idx%4==0:
61 | radius.append(idx)
62 | date = date[:4] + "/" + date[4:]
63 | dates.append(date)
64 | area.append(s)
65 | plt.xticks(radius, dates)
66 | plt.xticks(rotation=50)
67 | plt.plot(radius, area)
68 | plt.xlabel('Timeline')
69 | plt.ylabel('Number of tables')
70 | #plt.title('Title here')
71 | plt.grid()
72 | plt.show()
73 |
74 | if __name__=="__main__":
75 | main(sys.argv[1:])
76 |
--------------------------------------------------------------------------------
/extract_zipcode_latlon/index/chicago_zipcode_index.txt:
--------------------------------------------------------------------------------
1 | 7eyu-q9ez lobbyist_zip 10 employer_zip 21 correspondent_zip 33 client_zip 45
2 | azpf-uc4s zip_code 5
3 | hcqp-hyqa zip 8
4 | wwy2-k7b3 zip 3
5 | ddxq-pdr6 zip_code 4
6 | jsdv-pwf2 zip 3
7 | e4sp-itvq zip_code 9
8 | 7nii-7srd zip_code 14
9 | nrmj-3kcf zip_code 7
10 | 97t6-zrhs zip_code 11
11 | msrk-w9ih zip 6
12 | meks-hp6f zip_code 8
13 | 53t8-wyrc zip_code 6
14 | ngxm-jbc3 zip_code 3
15 | cnfp-tsxc zip 8
16 | 2ft4-4uik lobbyist_zip 4
17 | htai-wnw4 zipcode 5 city_hall_zipcode 13
18 | ydr8-5enu contractor_2_zipcode 35 contractor_3_zipcode 42 contractor_4_zipcode 49 contractor_5_zipcode 56 contractor_6_zipcode 63 contractor_7_zipcode 70 contractor_8_zipcode 77
19 | zh3n-jtnt zip 3
20 | qhfc-4cw2 zip 6
21 | 65vt-ydgw zipcode 5
22 | xxwy-zyzu zip_code 3
23 | pvu3-9dfs lobbyist_zip 6
24 | g85x-gwmp zipcode 5
25 | h243-v2q5 zip 6
26 | cpva-49fs zip 3
27 | wrhz-xeta zip_code 6
28 | vazh-t57q zip_code_or_aggregate_ 0
29 | uxic-zsuj zip_code 7
30 | gkur-vufi zip 4
31 | rsxa-ify5 zip 15
32 | 9zqv-3uhs zip_code 10
33 | me59-5fac zip_code 7
34 | ypez-j3yg lobbyist_zip 10 employer_zip 21 correspondent_zip 33
35 | r5kz-chrr zip_code 9
36 | iq3c-68ew zip 7
37 | 9xs2-f89t zip_code 6
38 | tfmt-mmy2 zip_code 3
39 | 8bap-6xg8 zip_code 7
40 | mab8-y9h3 zip_code 9
41 | 5gdk-uk7w zip 7
42 | 2eaw-bdhe zip 3
43 | 495s-83kj zip_code 6
44 | y93d-d9e3 zip_code 4
45 | p97q-qace zip 3
46 | vekt-28b5 zip_code_or_aggregate 0
47 | 9ksk-na4q zip_code 9
48 | s6ha-ppgi zip_code 5
49 | t28b-ys7j zip_code 6
50 | zuxi-7xem zip_code 6
51 | 4jy7-7m68 zip_code 7
52 | r23p-6uic zip 6
53 | spxm-tnai zip_code 6
54 | uupf-x98q zip_code 7
55 | 4u6w-irs9 zip_code 6
56 | wryv-d7zf zip_code 4
57 | 7fu8-t497 zip 3
58 | 6uah-qehh zip_code 3
59 | hp65-bcxv zip 4
60 | 8ayb-6mjs zip 6
61 | tpf5-fgtw lobbyist_zip 7
62 | 8k9i-ia3x zip 7
63 | 28km-gtjn zip 4
64 | z8bn-74gv zip 4
65 | vuf2-qfik zip_code 3
66 | x8fc-8rcq zip 7
67 | 3r8a-9kby zip_code 7
68 | f7f2-ggz5 zip 7
69 | hxh5-e8eh zip_code 12
70 | d9re-tmpw zip_code 14
71 | 4ijn-s7e5 zip 9
72 | nen3-vcxj zip 5
73 | 3c9v-pnva zip_code 12
74 | 7as2-ds3y zip 9
75 | 3aav-uy2v zip_code 6
76 | egku-46f2 zip 3
77 | vf9u-9xcm lobbyist_zip 7
78 | 7pb7-6889 zip 3
79 | qrxi-q28n zip 3
80 | ti44-vee7 zip 3
81 | hec5-y4x5 zip_code 8
82 | dgeh-7h9y zip 3
83 | 8yti-tif4 zipcode 5 city_hall_zipcode 13
84 | x74m-smqb zip 3
85 |
--------------------------------------------------------------------------------
/extract_zipcode_latlon/index/sf_zipcode_index.txt:
--------------------------------------------------------------------------------
1 | v22h-ujnv zip_code 1
2 | mupm-43n6 tran_zip4 22
3 | 6iqh-u3hk payee_zip4 22
4 | k78b-bnzt business_zip 5 mailing_city_state_zip_code 14
5 | funx-qxxn business_zip 7 mailing_city_state_zip_code 16
6 | dzre-gd7q business_zip 4 mailing_city_state_zip_code 13
7 | qwgb-tu3g tran_zip4 22
8 | ujme-i5np zip_code 0
9 | e2x8-npup business_zip 4 mailing_city_state_zip_code 13
10 | 6ssu-ewuc tran_zip4 22
11 | hmda-uywf business_zip 7 mailing_city_state_zip_code 16
12 | z76i-7s65 zipcode 6
13 | phrj-5yd5 business_zip 3 mailing_city_state_zip_code 12
14 | fp2p-prbg tran_zip4 22
15 | hbjm-s5ay business_zip 3 mailing_city_state_zip_code 12
16 | n65z-9iyj business_zip 4 mailing_city_state_zip_code 13
17 | whm4-mn72 business_zip 7 mailing_city_state_zip_code 16
18 | 5ayi-he6v zip 7
19 | pyxa-3r7p payee_zip4 22
20 | v456-mgti tran_zip4 22
21 | 4phr-3hrm loan_zip4 22
22 | rx2w-v8zb mailing_address_city_state_and_zip_code 6 business_address_city_state_and_zip_code 8
23 | bgq6-5mf8 business_zip 4 mailing_city_state_zip_code 13
24 | nvpi-vhb7 business_zip 7 mailing_city_state_zip_code 16
25 | b6tj-gt35 permit_zipcode 11
26 | w3ep-wixv tran_zip4 22
27 | tr8k-7cit payee_zip4 22
28 | rzvw-zvmg tran_zip4 22
29 | efrz-5mfq business_zip 7 mailing_city_state_zip_code 16
30 | anpk-hx6u zip 9
31 | mz4g-xxwd business_zip 7 mailing_city_state_zip_code 16
32 | 6jj7-u7ax zip_code 11
33 | ntkt-myzv tran_zip4 22
34 | q66q-d2tr tran_zip4 22
35 | 8ud4-mc82 business_zip 4 mailing_city_state_zip_code 13
36 | rynu-4e44 tran_zip4 22
37 | p5kp-5mtp client_zip 8
38 | vsy2-vybn tran_zip4 22
39 | aayf-qzg2 business_zip 3 mailing_city_state_zip_code 12
40 | gk2f-isrp payee_zip4 22
41 | iuv4-tqzq zip_code 11
42 | hc26-j9if payee_zip4 22
43 | k4ji-djiq business_zip 7 mailing_city_state_zip_code 16
44 | 86nq-bynj payee_zip4 22 bus_zip4 69
45 | 4zbw-xuig payee_zip4 16
46 | jyag-jj92 tran_zip4 22 intr_zip4 50
47 | by7b-r76m tran_zip4 22
48 | dg5s-2n6f business_zip 7 mailing_city_state_zip_code 16
49 | 62ex-d3qk loan_zip4 22
50 | crqn-k9bw zip 9
51 | wy9w-f6fu zipcode 6
52 | wrjq-a6r8 business_zip 4 mailing_city_state_zip_code 13
53 | 2tgf-pc6f business_zip 7 mailing_city_state_zip_code 16
54 | s57h-9wm9 mailing_address_city_state_and_zip_code 6 business_address_city_state_and_zip_code 8
55 | 4q92-gm9f vendor_zip 8
56 | gz8r-ag83 payee_zip4 22
57 | pn39-4xw4 tran_zip4 22
58 | y8r8-8ptg tran_zip4 22
59 | p4sp-es3b tran_zip4 22
60 | k76b-4yme tran_zip4 22
61 | dvrf-izet tran_zip4 22
62 | p3r9-xbpg tran_zip4 22
63 | ehdn-tx7u tran_zip4 22
64 | u4y3-k4vs client_zip 8
65 | capd-mzck tran_zip4 22
66 | 4vqi-vw9j tran_zip4 10
67 | nn8w-ruis permit_zipcode 11
68 | ec3y-6ty9 loan_zip4 22
69 |
--------------------------------------------------------------------------------
/linechart/timeline_year.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | import sys
3 | from os import walk
4 | import re
5 | import datetime
6 | import os.path
7 | from matplotlib.ticker import AutoMinorLocator
8 |
9 | def add_date(filename, date2count):
10 | with open(filename) as lines:
11 | for line in lines:
12 | id, pdate, cdate = line.strip("\n").split("\t")
13 | date = datetime.datetime.fromtimestamp(int(pdate)) #only use publication date
14 | if date.month < 10:
15 | month = "0" + str(date.month)
16 | else:
17 | month = str(date.month)
18 | year_month = str(date.year) + month
19 | if year_month not in date2count:
20 | date2count[year_month] = 1
21 | else:
22 | date2count[year_month] += 1
23 | return date2count
24 |
25 | def prepare_data(path):
26 | date2count = {} #mapping between date and number of datasets
27 | for (dirpath, dirnames, filenames) in walk(path):
28 | for filename in filenames:
29 | if re.search("id_date", filename):
30 | print path + filename
31 | date2count = add_date(path + filename, date2count)
32 | break
33 | print date2count
34 | out = open("date2count.csv", "w")
35 | for date in date2count.keys():
36 | out.write(date + "\t" + str(date2count[date]) + "\n")
37 | out.close()
38 |
39 | def get_data(path):
40 | if not os.path.isfile("date2count.csv"):
41 | prepare_data(path)
42 |
43 | date_count = []
44 | with open("date2count.csv") as lines:
45 | for line in lines:
46 | ym, count = line.strip("\n").split("\t")
47 | date_count.append([ym, count])
48 | date_count.sort(key=lambda x: x[0])
49 | return date_count
50 |
51 | def main(argv):
52 | date_count = get_data("../metadata/data/")
53 | idx = 0
54 | dates = []
55 | area = []
56 | radius = []
57 | s = 0
58 | for (date, count) in date_count:
59 | idx += 1
60 | s += int(count)
61 | if (idx%12==0):
62 | #radius.append(idx)
63 | # date = date[:4] + "/" + date[4:]
64 | date = date[:4]
65 | else:
66 | date = ""
67 | if (idx%6==0):
68 | radius.append(idx)
69 | dates.append(date)
70 | area.append(s)
71 |
72 | #minorLocator = AutoMinorLocator()
73 | #fig, ax = plt.subplots()
74 | #ax.xaxis.set_minor_locator(minorLocator)
75 | #plt.tick_params(which='major', length=8)
76 | #plt.tick_params(which='minor', length=4, color='r')
77 |
78 | plt.xticks(radius, dates)
79 | # plt.xticks(rotation=50)
80 | plt.plot(radius, area)
81 | plt.xlabel('Timeline')
82 | plt.ylabel('Number of tables')
83 | #plt.title('Title here')
84 | plt.grid()
85 | plt.show()
86 |
87 | if __name__=="__main__":
88 | main(sys.argv[1:])
89 |
--------------------------------------------------------------------------------
/extract_zipcode_latlon/index/kcmo_zipcode_index.txt:
--------------------------------------------------------------------------------
1 | wzjw-x2er zip_code 1
2 | mzdm-6rvx zip_code 9
3 | yyhn-562y zipcode 6
4 | xth3-vdqe zip_code 9
5 | yfgx-yvnj facility_zip 7
6 | svsn-4cui zip_code 9
7 | qayy-erge zip_code 1
8 | hikg-9gqg zipcode 7
9 | 94ay-nuwv facility_zip 7
10 | d9fr-pncw zipcode 8
11 | dmn8-83j9 zipcode 8
12 | n3tx-eq5q zip_code 14
13 | fkt2-8smh zip_code 1
14 | cmk4-vs7v zip_code 9
15 | p5hs-w2pi zipcode 6
16 | hsus-bft8 zipcode 8
17 | 7qxw-drid zip_code 9
18 | 9p66-paw4 zip_code 1
19 | byd5-svc3 zip 5
20 | ch8a-uu5r zip_code 9
21 | siaf-2z3j zip_code 9
22 | 3uzv-gzzp zip_code 2
23 | i4wi-hu6e zip_code 14
24 | nead-3ngg zip_code 1
25 | kfzc-riej zipcode 9
26 | przf-icvh zip_code 9
27 | sz9c-c5ux facility_zip 7
28 | ru3v-3yc4 zip_code 9
29 | c2gn-nkss zipcode 5
30 | hbmv-rqk9 zip_code 21
31 | kbwn-nqvd zipcode 10
32 | r843-p6xb zipcode 6
33 | j8id-nv5v zipcode 6
34 | dcx5-vae4 zip_code 1
35 | wsnb-g5wd zip_code 9
36 | tn7g-ce9i zip_code 9
37 | 4kqe-4ud4 zip_code 9
38 | p2ie-br32 zipcode 5
39 | 2h8d-4rhx zip_code 9
40 | mgvq-ati4 zipcode 3
41 | cgbu-k38b zip 6
42 | fbte-5u6g zipcode 11
43 | 83tc-w4pj zip_code 9
44 | ykyy-vsei zipcode 10
45 | r3ef-ysd4 zipcode 10
46 | dipa-3cie zip 7
47 | 4vt8-kfnu zip_code 1
48 | ks2s-yguy zipcode 3
49 | 4vq7-4p6a zip_code 2
50 | sy5m-x8pe zip_code 9
51 | aket-wpiw zip_code 9
52 | uza4-2fj4 zipcode 6
53 | 7i6x-m8sc zipcode 9
54 | 5sfv-g7jd zip_code 9
55 | 5349-7a9t zip_code 9
56 | ay5t-zph8 zip_code 1
57 | ntq9-ups2 zipcode 6
58 | 8aqp-4djd zipcode 6
59 | ctk7-ig49 zip_code 9
60 | qp8i-q9d5 zipcode 7
61 | nytw-fmz3 zip_code 14
62 | p333-ufv5 zipcode 5
63 | aez3-wg7q zipcode 6
64 | grzb-gufz zip_code 9
65 | 8sve-3g9n facility_zip 7
66 | 8h39-q9hm zipcode 9
67 | jkbq-7h2i zip 5
68 | ehh5-32d3 zip_code 9
69 | dqet-j5bc zipcode 8
70 | dfuy-hga5 zip_code 9
71 | txik-ccii zip_code 9
72 | jx2x-bscc facility_zip 7
73 | ir3u-dv6q zipcode 9
74 | hcyh-6t9m zipcode 7
75 | adrn-rsvg zipcode 6
76 | mmy6-sscw zip_code 9
77 | h29t-a3ti zip_code 9
78 | ei27-98n4 zip_code 9
79 | tbcn-7xmb zipcode 9
80 | tgwe-hk6n zipcode 9
81 | cgpa-pjva zipcode 9
82 | heym-2frk zipcode 5
83 | gx5v-53hg zipcode 6
84 | uavk-68tt zip_code 9
85 | g5up-449w zip_code 9
86 | hywd-y4vv groupzip 8
87 | x5sp-av9v zipcode 9
88 | 2ezj-prfz zip_code 9
89 | j3vd-wims zip_code 1
90 | x5mb-q9se zip_code 21
91 | 8kfr-ui65 zip_code 1
92 | nypf-bz5b zipcode 5
93 | yi2v-3ssg zip_code 9
94 | 7kv2-vpry zipcode 6
95 | gj9g-ucun zip_code 9
96 | xw5s-y9pu zipcode 9
97 | p2vx-ev69 zipcode 9
98 | sm5y-c9pj zip 6
99 | 2cun-c2xr zip_code 9
100 | 6445-ctvs zip_code 9
101 | 839a-34uw zip_code 9
102 | fvhn-5vsv zip 5
103 | ibbi-sirw zip_code 9
104 | 9d9w-zre9 zip_code 1
105 | dymb-xy5c zip_code 14
106 | wu22-kvdm zipcode 8
107 | im7g-fucq zip_code 1
108 | xnn5-tna4 zipcode 10
109 | wzhv-ftxn zipcode 7
110 | js3d-4ga8 zip_code 9
111 | mmn5-wy78 zip_code 1
112 | 6pi7-rfgq zipcode 9
113 | djv7-4q5r zip_code 9
114 | 9zpd-u4mp zipcode 11
115 | 5n8b-dbbb zipcode 10
116 | pzip-wwk6 zip 5
117 |
--------------------------------------------------------------------------------
/download/ids/austin_ids.txt:
--------------------------------------------------------------------------------
1 | ri75-pahg
2 | 8jyt-x94k
3 | ecmv-9xxi
4 | 5tye-7ray
5 | h3i4-5e5v
6 | hqa6-stx4
7 | szku-46rx
8 | x442-h34c
9 | trxj-f8br
10 | 8c6z-qnmj
11 | q37s-pqpu
12 | wrwk-skv6
13 | 4c6h-tv2y
14 | d6z4-s3ex
15 | 88dg-7xxd
16 | siyu-szxn
17 | bqav-9x6a
18 | 64cq-wf5u
19 | b4y9-5x39
20 | c6ja-7mhw
21 | gzyt-t2by
22 | 4i8t-nckg
23 | q7wj-9ws7
24 | gr59-ids7
25 | ga9y-ypai
26 | jbaf-xebm
27 | dv3q-tn2r
28 | hek3-kuva
29 | amh5-bifm
30 | 8aah-diw2
31 | hut9-4n8t
32 | fhca-e5je
33 | s4tf-m9g2
34 | 5tx2-pk4n
35 | tx8s-62r6
36 | gqmc-bxs4
37 | b6cd-bhbk
38 | cr7p-ssq7
39 | 84ih-p28j
40 | nq9x-w8sx
41 | 5gjn-nmcf
42 | r5kt-xq3y
43 | rfif-mmvg
44 | scqk-petw
45 | mwqa-epx5
46 | 5fnu-ngjq
47 | ykw4-j3aj
48 | gx7t-wzxw
49 | 4sf2-s9as
50 | g9bx-8meu
51 | 959k-a8yh
52 | r6sg-xka2
53 | a6pm-qynf
54 | de95-4khj
55 | uszv-p75d
56 | nttt-2a35
57 | iuw2-kwij
58 | nmp9-45v2
59 | xj3h-ppw2
60 | kidc-knry
61 | yqxj-7evp
62 | zzix-yxi4
63 | wr7f-jdtu
64 | fsgj-5xyt
65 | 2ds5-jyca
66 | awqv-vbfj
67 | 3gc4-g537
68 | gt3n-akq9
69 | wd9d-2jf3
70 | 4u75-seeq
71 | 3qu3-nwxj
72 | ei2n-fehk
73 | ab9p-kxqp
74 | ec78-i9z5
75 | b73m-kiye
76 | ajpy-mwjj
77 | cusd-m48y
78 | f7fd-4st5
79 | a9hv-5z8i
80 | i6dj-uuqe
81 | 9jwp-y89b
82 | ahj3-w5hk
83 | sswp-u5uh
84 | u3yy-shmz
85 | hdpu-g3yy
86 | jmp6-p8e2
87 | chv7-cszp
88 | gwrj-cykm
89 | vwcu-h3qu
90 | uqcn-typ6
91 | rxia-etc2
92 | 6yeq-zz6u
93 | kxm4-pr4y
94 | n63c-e24q
95 | uber-bhwe
96 | vgkf-yny7
97 | 7j64-2qf8
98 | s6n4-3bq2
99 | xcd2-xf2f
100 | nynz-w2da
101 | gxwy-g5wa
102 | 8zu2-guks
103 | w6f6-d2ag
104 | yj9d-ajag
105 | utnt-hag5
106 | m9jn-qzir
107 | ergh-7g8p
108 | dtkn-v97q
109 | 3e38-4hji
110 | hh3n-3s7c
111 | h4as-bnn4
112 | fksj-fw68
113 | uvma-gv9c
114 | ur6a-fvpc
115 | 3ebq-e9iz
116 | 8uvp-rwpt
117 | sasb-f978
118 | paa2-kvza
119 | sute-ma6h
120 | k8rc-mjrt
121 | 3w87-zbw7
122 | rb6p-jsp4
123 | 5brd-nqzg
124 | 54hu-zyfw
125 | cpdm-pgcz
126 | e5iz-h53i
127 | eqg5-tgc4
128 | jbk5-567r
129 | ysk2-5se4
130 | 3rv7-26gf
131 | 7d7e-riap
132 | c6vs-wub4
133 | eqas-3yai
134 | h8vx-fici
135 | k58w-wc5b
136 | r5a8-wp8c
137 | s78c-gi5b
138 | gj8a-7w2i
139 | ctpt-q8h6
140 | h2ns-nnc3
141 | 92xm-uf99
142 | 4wv7-h5ag
143 | 54j7-ewrt
144 | cffj-ydng
145 | 7dis-buys
146 | 558y-rgv6
147 | d7k5-jyb8
148 | kp2i-ttw8
149 | xa6d-gfkg
150 | ct7f-fbbn
151 | 9bpw-2ysw
152 | qzi7-nx8g
153 | 8u76-ei8i
154 | e8fp-i3ts
155 | eqcz-7qvc
156 | m38i-k8s7
157 | i26j-ai4z
158 | wstj-t8me
159 | xwdj-i9he
160 | c5ah-7mah
161 | 7s8g-vgat
162 | 9brw-ikmh
163 | cutd-edeq
164 | w2wa-sfs6
165 | aimq-hsia
166 | d5qe-8uyf
167 | e3jj-bj6e
168 | p6kk-bbf5
169 | 42ix-g4e3
170 | yann-xf22
171 | n9gm-fzdc
172 | c69b-fkfx
173 | u8uw-t2sm
174 | md9p-6y8z
175 | 4gv8-96x2
176 | uqe6-trgb
177 | yh8u-4rgy
178 | bqki-3pkf
179 | rsp7-azrf
180 | wu45-d3h5
181 | 7it9-7pjx
182 | efix-ampv
183 | gw3x-dtde
184 | whim-t39w
185 | m44u-tdna
186 | uwbz-byyt
187 | pe4x-g4qi
188 | kfeh-ue8m
189 | tz7h-nvqd
190 | efz6-47ik
191 | p47h-h7ra
192 | dtqa-6pjt
193 | 8ruh-ty5d
194 | xbbp-8bw7
195 | 8pvq-5pcm
196 | 2yh2-pequ
197 | yur9-jspm
198 | v7cg-67vv
199 | 36xs-z29u
200 | iwgt-862p
201 | epxw-n458
202 | yrpa-wmth
203 | 3ghn-wv5a
204 | vzty-yezt
205 | 4sv9-5zm8
206 | ba4t-mrbm
207 | 74y5-wjkf
208 | 567b-4d24
209 | ps5c-8d86
210 | eg8t-399m
211 | ykzu-pxxq
212 | qmwp-kjjs
213 | ydem-x7j5
214 | nqv2-nbrj
215 |
--------------------------------------------------------------------------------
/type_detection/ids/austin_ids.txt:
--------------------------------------------------------------------------------
1 | ri75-pahg
2 | 8jyt-x94k
3 | ecmv-9xxi
4 | 5tye-7ray
5 | h3i4-5e5v
6 | hqa6-stx4
7 | szku-46rx
8 | x442-h34c
9 | trxj-f8br
10 | 8c6z-qnmj
11 | q37s-pqpu
12 | wrwk-skv6
13 | 4c6h-tv2y
14 | d6z4-s3ex
15 | 88dg-7xxd
16 | siyu-szxn
17 | bqav-9x6a
18 | 64cq-wf5u
19 | b4y9-5x39
20 | c6ja-7mhw
21 | gzyt-t2by
22 | 4i8t-nckg
23 | q7wj-9ws7
24 | gr59-ids7
25 | ga9y-ypai
26 | jbaf-xebm
27 | dv3q-tn2r
28 | hek3-kuva
29 | amh5-bifm
30 | 8aah-diw2
31 | hut9-4n8t
32 | fhca-e5je
33 | s4tf-m9g2
34 | 5tx2-pk4n
35 | tx8s-62r6
36 | gqmc-bxs4
37 | b6cd-bhbk
38 | cr7p-ssq7
39 | 84ih-p28j
40 | nq9x-w8sx
41 | 5gjn-nmcf
42 | r5kt-xq3y
43 | rfif-mmvg
44 | scqk-petw
45 | mwqa-epx5
46 | 5fnu-ngjq
47 | ykw4-j3aj
48 | gx7t-wzxw
49 | 4sf2-s9as
50 | g9bx-8meu
51 | 959k-a8yh
52 | r6sg-xka2
53 | a6pm-qynf
54 | de95-4khj
55 | uszv-p75d
56 | nttt-2a35
57 | iuw2-kwij
58 | nmp9-45v2
59 | xj3h-ppw2
60 | kidc-knry
61 | yqxj-7evp
62 | zzix-yxi4
63 | wr7f-jdtu
64 | fsgj-5xyt
65 | 2ds5-jyca
66 | awqv-vbfj
67 | 3gc4-g537
68 | gt3n-akq9
69 | wd9d-2jf3
70 | 4u75-seeq
71 | 3qu3-nwxj
72 | ei2n-fehk
73 | ab9p-kxqp
74 | ec78-i9z5
75 | b73m-kiye
76 | ajpy-mwjj
77 | cusd-m48y
78 | f7fd-4st5
79 | a9hv-5z8i
80 | i6dj-uuqe
81 | 9jwp-y89b
82 | ahj3-w5hk
83 | sswp-u5uh
84 | u3yy-shmz
85 | hdpu-g3yy
86 | jmp6-p8e2
87 | chv7-cszp
88 | gwrj-cykm
89 | vwcu-h3qu
90 | uqcn-typ6
91 | rxia-etc2
92 | 6yeq-zz6u
93 | kxm4-pr4y
94 | n63c-e24q
95 | uber-bhwe
96 | vgkf-yny7
97 | 7j64-2qf8
98 | s6n4-3bq2
99 | xcd2-xf2f
100 | nynz-w2da
101 | gxwy-g5wa
102 | 8zu2-guks
103 | w6f6-d2ag
104 | yj9d-ajag
105 | utnt-hag5
106 | m9jn-qzir
107 | ergh-7g8p
108 | dtkn-v97q
109 | 3e38-4hji
110 | hh3n-3s7c
111 | h4as-bnn4
112 | fksj-fw68
113 | uvma-gv9c
114 | ur6a-fvpc
115 | 3ebq-e9iz
116 | 8uvp-rwpt
117 | sasb-f978
118 | paa2-kvza
119 | sute-ma6h
120 | k8rc-mjrt
121 | 3w87-zbw7
122 | rb6p-jsp4
123 | 5brd-nqzg
124 | 54hu-zyfw
125 | cpdm-pgcz
126 | e5iz-h53i
127 | eqg5-tgc4
128 | jbk5-567r
129 | ysk2-5se4
130 | 3rv7-26gf
131 | 7d7e-riap
132 | c6vs-wub4
133 | eqas-3yai
134 | h8vx-fici
135 | k58w-wc5b
136 | r5a8-wp8c
137 | s78c-gi5b
138 | gj8a-7w2i
139 | ctpt-q8h6
140 | h2ns-nnc3
141 | 92xm-uf99
142 | 4wv7-h5ag
143 | 54j7-ewrt
144 | cffj-ydng
145 | 7dis-buys
146 | 558y-rgv6
147 | d7k5-jyb8
148 | kp2i-ttw8
149 | xa6d-gfkg
150 | ct7f-fbbn
151 | 9bpw-2ysw
152 | qzi7-nx8g
153 | 8u76-ei8i
154 | e8fp-i3ts
155 | eqcz-7qvc
156 | m38i-k8s7
157 | i26j-ai4z
158 | wstj-t8me
159 | xwdj-i9he
160 | c5ah-7mah
161 | 7s8g-vgat
162 | 9brw-ikmh
163 | cutd-edeq
164 | w2wa-sfs6
165 | aimq-hsia
166 | d5qe-8uyf
167 | e3jj-bj6e
168 | p6kk-bbf5
169 | 42ix-g4e3
170 | yann-xf22
171 | n9gm-fzdc
172 | c69b-fkfx
173 | u8uw-t2sm
174 | md9p-6y8z
175 | 4gv8-96x2
176 | uqe6-trgb
177 | yh8u-4rgy
178 | bqki-3pkf
179 | rsp7-azrf
180 | wu45-d3h5
181 | 7it9-7pjx
182 | efix-ampv
183 | gw3x-dtde
184 | whim-t39w
185 | m44u-tdna
186 | uwbz-byyt
187 | pe4x-g4qi
188 | kfeh-ue8m
189 | tz7h-nvqd
190 | efz6-47ik
191 | p47h-h7ra
192 | dtqa-6pjt
193 | 8ruh-ty5d
194 | xbbp-8bw7
195 | 8pvq-5pcm
196 | 2yh2-pequ
197 | yur9-jspm
198 | v7cg-67vv
199 | 36xs-z29u
200 | iwgt-862p
201 | epxw-n458
202 | yrpa-wmth
203 | 3ghn-wv5a
204 | vzty-yezt
205 | 4sv9-5zm8
206 | ba4t-mrbm
207 | 74y5-wjkf
208 | 567b-4d24
209 | ps5c-8d86
210 | eg8t-399m
211 | ykzu-pxxq
212 | qmwp-kjjs
213 | ydem-x7j5
214 | nqv2-nbrj
215 |
--------------------------------------------------------------------------------
/type_detection/detect.py:
--------------------------------------------------------------------------------
1 | ###############################################################################
2 | ##
3 | ## Copyright (C) 2014, New York University.
4 | ## All rights reserved.
5 | ## Contact: kien.pham@nyu.edu
6 | ##
7 | ## "Redistribution and use in source and binary forms, with or without
8 | ## modification, are permitted provided that the following conditions are met:
9 | ##
10 | ## - Redistributions of source code must retain the above copyright notice,
11 | ## this list of conditions and the following disclaimer.
12 | ## - Redistributions in binary form must reproduce the above copyright
13 | ## notice, this list of conditions and the following disclaimer in the
14 | ## documentation and/or other materials provided with the distribution.
15 | ## - Neither the name of New York University nor the names of its
16 | ## contributors may be used to endorse or promote products derived from
17 | ## this software without specific prior written permission.
18 | ##
19 | ## THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | ## AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21 | ## THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 | ## PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
23 | ## CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24 | ## EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25 | ## PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
26 | ## OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27 | ## WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
28 | ## OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
29 | ## ADVISED OF THE POSSIBILITY OF SUCH DAMAGE."
30 | ##
31 | ###############################################################################
32 |
33 | import sys
34 | import regex
35 | import os.path
36 | import re
37 |
38 | def detect_type(sample_path, output_path, city, id):
39 | filename = sample_path + "/" + city + "_" + id + ".txt"
40 | if not os.path.isfile(filename):
41 | return
42 |
43 | output_detail_file = output_path + "/" + city + "_" + id + ".txt"
44 | # output_detail_file = "detail_type/" + city + "_" + id + ".txt"
45 |
46 | if os.path.isfile(output_detail_file):
47 | print "File " + output_detail_file + " is existed"
48 | return
49 |
50 | output = open(output_detail_file, "w")
51 | types = {}
52 | for t in regex.Type:
53 | types[t] = 0
54 | with open(filename) as lines:
55 | for line in lines:
56 | a = line.strip("\n").split("\t")
57 | if len(a) < 1:
58 | continue
59 | column = a[0]
60 | values = a[1:]
61 | type = regex.detect(column, values)
62 | if len(type) > 0:
63 | for t in type:
64 | output.write(column + "\t" + t + "\n")
65 | output.close()
66 |
67 | def main(argv):
68 | '''
69 | First Argument: path to directory containing sampling values of all data sets
70 | Second Argument: path to the file containing all data set ids.
71 | Third Argument: path to directory containing detection results.
72 | Fourth Argument: city name, which is used as a prefix for output files.
73 | '''
74 | if len(argv) != 4:
75 | print "The program takes 4 arguments, " + str(len(argv)) + " is given."
76 | return
77 | sample_path = argv[0]
78 | ids_file = argv[1]
79 | output_path = argv[2]
80 | city = argv[3]
81 |
82 | with open(ids_file) as lines:
83 | for line in lines:
84 | id = line.strip("\n")
85 | detect_type(sample_path, output_path, city, id)
86 |
87 | if __name__=="__main__":
88 | main(sys.argv[1:])
89 |
--------------------------------------------------------------------------------
/barchart/barchart_loc.py:
--------------------------------------------------------------------------------
1 | ###############################################################################
2 | ##
3 | ## Copyright (C) 2014, New York University.
4 | ## All rights reserved.
5 | ## Contact: kien.pham@nyu.edu
6 | ##
7 | ## "Redistribution and use in source and binary forms, with or without
8 | ## modification, are permitted provided that the following conditions are met:
9 | ##
10 | ## - Redistributions of source code must retain the above copyright notice,
11 | ## this list of conditions and the following disclaimer.
12 | ## - Redistributions in binary form must reproduce the above copyright
13 | ## notice, this list of conditions and the following disclaimer in the
14 | ## documentation and/or other materials provided with the distribution.
15 | ## - Neither the name of New York University nor the names of its
16 | ## contributors may be used to endorse or promote products derived from
17 | ## this software without specific prior written permission.
18 | ##
19 | ## THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | ## AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21 | ## THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 | ## PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
23 | ## CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24 | ## EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25 | ## PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
26 | ## OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27 | ## WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
28 | ## OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
29 | ## ADVISED OF THE POSSIBILITY OF SUCH DAMAGE."
30 | ##
31 | ###############################################################################
32 | import sys
33 | import matplotlib.pyplot as plt
34 | import numpy as np
35 |
36 | def getData():
37 | with open("loc.csv") as lines:
38 | nn = []
39 | for line in lines:
40 | a = line.strip("\n").split(",")
41 | n = []
42 | for x in a:
43 | n.append(float(x))
44 | nn.append(n)
45 | m = {}
46 | m["NYC"] = nn[0]
47 | m["Kansas"] = nn[1]
48 | m["Seattle"] = nn[2]
49 | m["Chicago"] = nn[3]
50 | m["Baltimore"] = nn[4]
51 | m["SF"] = nn[5]
52 | m["Raleigh"] = nn[6]
53 | m["Edmonton"] = nn[7]
54 | m["Boston"] = nn[8]
55 | m["Austin"] = nn[9]
56 | m["All Cities"] = nn[10]
57 | s = []
58 | s.append("NYC")
59 | s.append("Kansas")
60 | s.append("Seattle")
61 | s.append("Chicago")
62 | s.append("Baltimore")
63 | s.append("SF")
64 | s.append("Raleigh")
65 | s.append("Edmonton")
66 | s.append("Boston")
67 | s.append("Austin")
68 | s.append("All Cities")
69 | return [s, m]
70 |
71 |
72 | def main(argv):
73 | s, m = getData()
74 | label = ["Lat/Lon", "Address", "Zipcode"] #Label for x axis
75 | _color = {
76 | "Baltimore":"#a6cee3",
77 | "Chicago":"#1f78b4",
78 | "Edmonton":"#b2df8a",
79 | "Kansas":"#33a02c",
80 | "Seattle":"#fb9a99",
81 | "SF":"#e31a1c",
82 | "NYC":"#fdbf6f",
83 | "Boston":"#ff7f00",
84 | "Austin":"#cab2d6",
85 | "Raleigh":"#6a3d9a",
86 | "All Cities":"#ffff99"}
87 |
88 | #for key in m.keys():
89 | # plt.plot(axis, m[key], marker=_marker[key], label=key)
90 |
91 | N = 3
92 | width = 0.07
93 | ind = np.arange(N)
94 | print ind
95 | fig, ax = plt.subplots()
96 | count = 1
97 | for key in s:
98 | rec0 = ax.bar(ind+count*width, m[key], width, color=_color[key])
99 | count += 1
100 |
101 | plt.ylim(0, 1.0)
102 | # plt.xlabel('Attribute Type', fontsize=25)
103 | plt.ylabel('Percentage of Datasets', fontsize=30)
104 | plt.xticks(ind+0.4, label, fontsize=25)
105 | plt.yticks(fontsize=17)
106 | # ax.legend(s,prop={'size':10.5},loc=1)
107 |
108 | plt.show()
109 |
110 | if __name__=="__main__":
111 | main(sys.argv[1:])
112 |
113 |
--------------------------------------------------------------------------------
/barchart/barchart_time_loc_num.py:
--------------------------------------------------------------------------------
1 | ###############################################################################
2 | ##
3 | ## Copyright (C) 2014, New York University.
4 | ## All rights reserved.
5 | ## Contact: kien.pham@nyu.edu
6 | ##
7 | ## "Redistribution and use in source and binary forms, with or without
8 | ## modification, are permitted provided that the following conditions are met:
9 | ##
10 | ## - Redistributions of source code must retain the above copyright notice,
11 | ## this list of conditions and the following disclaimer.
12 | ## - Redistributions in binary form must reproduce the above copyright
13 | ## notice, this list of conditions and the following disclaimer in the
14 | ## documentation and/or other materials provided with the distribution.
15 | ## - Neither the name of New York University nor the names of its
16 | ## contributors may be used to endorse or promote products derived from
17 | ## this software without specific prior written permission.
18 | ##
19 | ## THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | ## AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21 | ## THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 | ## PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
23 | ## CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24 | ## EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25 | ## PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
26 | ## OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27 | ## WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
28 | ## OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
29 | ## ADVISED OF THE POSSIBILITY OF SUCH DAMAGE."
30 | ##
31 | ###############################################################################
32 |
33 | import sys
34 | import matplotlib.pyplot as plt
35 | import numpy as np
36 |
37 | def getData():
38 | with open("time_loc_number.csv") as lines:
39 | nn = []
40 | for line in lines:
41 | a = line.strip("\n").split(",")
42 | n = []
43 | for x in a:
44 | n.append(float(x))
45 | nn.append(n)
46 | m = {}
47 | m["NYC"] = nn[0]
48 | m["Kansas"] = nn[1]
49 | m["Seattle"] = nn[2]
50 | m["Chicago"] = nn[3]
51 | m["Baltimore"] = nn[4]
52 | m["SF"] = nn[5]
53 | m["Raleigh"] = nn[6]
54 | m["Edmonton"] = nn[7]
55 | m["Boston"] = nn[8]
56 | m["Austin"] = nn[9]
57 | m["All Cities"] = nn[10]
58 | s = []
59 | s.append("NYC")
60 | s.append("Kansas")
61 | s.append("Seattle")
62 | s.append("Chicago")
63 | s.append("Baltimore")
64 | s.append("SF")
65 | s.append("Raleigh")
66 | s.append("Edmonton")
67 | s.append("Boston")
68 | s.append("Austin")
69 | s.append("All Cities")
70 | return [s, m]
71 |
72 | def main(argv):
73 | s, m = getData()
74 | label = ["Location", "Time", "Number"] #Label for x axis
75 | _color = {
76 | "Baltimore":"#a6cee3",
77 | "Chicago":"#1f78b4",
78 | "Edmonton":"#b2df8a",
79 | "Kansas":"#33a02c",
80 | "Seattle":"#fb9a99",
81 | "SF":"#e31a1c",
82 | "NYC":"#fdbf6f",
83 | "Boston":"#ff7f00",
84 | "Austin":"#cab2d6",
85 | "Raleigh":"#6a3d9a",
86 | "All Cities":"#ffff99"}
87 |
88 | #for key in m.keys():
89 | # plt.plot(axis, m[key], marker=_marker[key], label=key)
90 |
91 | N = 3
92 | width = 0.07
93 | ind = np.arange(N)
94 | print ind
95 | fig, ax = plt.subplots()
96 | count = 1
97 | for key in s:
98 | rec0 = ax.bar(ind+count*width, m[key], width, color=_color[key])
99 | count += 1
100 | plt.ylim(0,1.0)
101 |
102 | plt.ylabel('Percentage of Datasets', fontsize=30)
103 | plt.xticks(ind+0.5, label, fontsize=25)
104 | plt.tick_params(axis='y', labelsize=17)
105 | # ax.legend(s, loc=1,prop={'size':13})
106 |
107 | plt.show()
108 |
109 | if __name__=="__main__":
110 | main(sys.argv[1:])
111 |
112 |
--------------------------------------------------------------------------------
/barchart/barchart_time.py:
--------------------------------------------------------------------------------
1 | ###############################################################################
2 | ##
3 | ## Copyright (C) 2014, New York University.
4 | ## All rights reserved.
5 | ## Contact: kien.pham@nyu.edu
6 | ##
7 | ## "Redistribution and use in source and binary forms, with or without
8 | ## modification, are permitted provided that the following conditions are met:
9 | ##
10 | ## - Redistributions of source code must retain the above copyright notice,
11 | ## this list of conditions and the following disclaimer.
12 | ## - Redistributions in binary form must reproduce the above copyright
13 | ## notice, this list of conditions and the following disclaimer in the
14 | ## documentation and/or other materials provided with the distribution.
15 | ## - Neither the name of New York University nor the names of its
16 | ## contributors may be used to endorse or promote products derived from
17 | ## this software without specific prior written permission.
18 | ##
19 | ## THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | ## AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21 | ## THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 | ## PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
23 | ## CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24 | ## EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25 | ## PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
26 | ## OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27 | ## WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
28 | ## OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
29 | ## ADVISED OF THE POSSIBILITY OF SUCH DAMAGE."
30 | ##
31 | ###############################################################################
32 |
33 | import sys
34 | import matplotlib.pyplot as plt
35 | import numpy as np
36 |
37 | def getData():
38 | with open("time.csv") as lines:
39 | nn = []
40 | for line in lines:
41 | a = line.strip("\n").split(",")
42 | n = []
43 | for x in a:
44 | n.append(float(x))
45 | nn.append(n)
46 | m = {}
47 | m["NYC"] = nn[0]
48 | m["Kansas"] = nn[1]
49 | m["Seattle"] = nn[2]
50 | m["Chicago"] = nn[3]
51 | m["Baltimore"] = nn[4]
52 | m["SF"] = nn[5]
53 | m["Raleigh"] = nn[6]
54 | m["Edmonton"] = nn[7]
55 | m["Boston"] = nn[8]
56 | m["Austin"] = nn[9]
57 | m["All Cities"] = nn[10]
58 | s = []
59 | s.append("NYC")
60 | s.append("Kansas")
61 | s.append("Seattle")
62 | s.append("Chicago")
63 | s.append("Baltimore")
64 | s.append("SF")
65 | s.append("Raleigh")
66 | s.append("Edmonton")
67 | s.append("Boston")
68 | s.append("Austin")
69 | s.append("All Cities")
70 |
71 |
72 | return [s, m]
73 |
74 |
75 | def main(argv):
76 | s, m = getData()
77 | label = ["Date", "Month", "Year"] #Label for x axis
78 | _color = {
79 | "Baltimore":"#a6cee3",
80 | "Chicago":"#1f78b4",
81 | "Edmonton":"#b2df8a",
82 | "Kansas":"#33a02c",
83 | "Seattle":"#fb9a99",
84 | "SF":"#e31a1c",
85 | "NYC":"#fdbf6f",
86 | "Boston":"#ff7f00",
87 | "Austin":"#cab2d6",
88 | "Raleigh":"#6a3d9a",
89 | "All Cities":"#ffff99"}
90 |
91 | #for key in m.keys():
92 | # plt.plot(axis, m[key], marker=_marker[key], label=key)
93 |
94 | N = 3
95 | width = 0.07
96 | ind = np.arange(N)
97 | print ind
98 | fig, ax = plt.subplots()
99 | count = 1
100 | for key in s:
101 | rec0 = ax.bar(ind+count*width, m[key], width, color=_color[key])
102 | count += 1
103 |
104 | plt.ylim(0,1.0)
105 | # plt.xlabel('Attribute Type', fontsize=25)
106 | plt.ylabel('Percentage of Datasets', fontsize=30)
107 | plt.xticks(ind+0.4, label, fontsize=25)
108 | plt.tick_params(axis='y', labelsize=17)
109 | # ax.legend(s,prop={'size':10.5},loc=1)
110 |
111 | plt.show()
112 |
113 | if __name__=="__main__":
114 | main(sys.argv[1:])
115 |
116 |
--------------------------------------------------------------------------------
/latlon_to_zipcode/main.cpp:
--------------------------------------------------------------------------------
1 | #include "KdTreeBB.hpp"
2 | #include "Neighborhoods.hpp"
3 | #include
4 | #include
5 | #include
6 | #include
7 | #include
8 |
9 | KdTreeBB::Item* loadItems(char* file, KdTreeBB::Item* &items, int &size)
10 | {
11 | std::ifstream in(file);
12 | std::string line;
13 |
14 | float left, right, top, bottom;
15 | int zipcode;
16 |
17 | std::getline(in, line);
18 | size = atoi(line.c_str()); //number of lines
19 | items = (KdTreeBB::Item*)malloc(sizeof(KdTreeBB::Item)*size);
20 | int *zipCodes = (int*)malloc(sizeof(int)*size);
21 | int index = 0;
22 | while(std::getline(in, line))
23 | {
24 | std::stringstream lineStream(line);
25 | sscanf(line.c_str(), "%d %f %f %f %f", &zipcode, &left, &bottom, &right, &top);
26 |
27 | items[index].bbox[0][0] = left;
28 | items[index].bbox[0][1] = right;
29 | items[index].bbox[1][0] = bottom;
30 | items[index].bbox[1][1] = top;
31 | zipCodes[index] = zipcode;
32 | items[index].data = zipCodes+index;
33 | index ++;
34 | }
35 | return items;
36 | }
37 |
38 | void Initialize(Neighborhoods &nb, KdTreeBB &kdtree)
39 | {
40 | //Create KDTree
41 | int size;
42 | KdTreeBB::Item* items;
43 | loadItems("converted_shapefile/bboxes.csv", items, size);
44 | kdtree.createKdTree(items, size);
45 |
46 | nb.loadFromFile("converted_shapefile/point.txt");
47 | }
48 |
49 | int searchZipCode(float lat, float lon, Neighborhoods &nb, const KdTreeBB &kdtree)
50 | {
51 | KdTreeBB::Query q;
52 | q.setViewport(lon, lat, lon, lat);
53 | KdTreeBB::QueryResult result;
54 | kdtree.query(q, result);
55 |
56 | for (int i=0; i zips;//Output
69 |
70 | std::string output = std::string(filename) + std::string(".zipcode");
71 | std::ofstream outFile;
72 | outFile.open(output.c_str());
73 |
74 |
75 | float lat, lon;
76 | while(std::getline(in, line))
77 | {
78 | try
79 | {
80 | sscanf(line.c_str(), "%f,%f", &lat, &lon);
81 | int zip = searchZipCode(lat, lon, nb, kdtree);
82 | if (zip != -1)
83 | zips.push_back(zip);
84 | outFile<::iterator it=zips.begin(); it!=zips.end(); ++it)
97 | // if (*it > 0)
98 | // outFile <<*it<8:
61 | schema.append(v)
62 | else:
63 | break
64 | else:
65 | print filepath + ' is not json file'
66 | except Exception as ex:
67 | print ex
68 | print "Error line: " + str(sys.exc_traceback.tb_lineno)
69 | return schema
70 |
71 | def get_all_schema(path):
72 | m = {} #Map between id and schema
73 | try:
74 | for (dirpath, dirnames, filenames) in walk(path):
75 | for filename in filenames:
76 | if filename[-4:] == 'json':
77 | id = filename[:-5]
78 | schema = get_schema(dirpath + filename)
79 | if schema:
80 | m[id] = schema
81 | break
82 | return m
83 | except Exception as ex:
84 | print ex
85 | return None
86 |
87 | return m
88 |
89 | def jaccard_similarity(schema1, schema2):
90 | return len(schema1.intersection(schema2))/float(len(schema1.union(schema2)))
91 |
92 | def run(city, in_path, out_path):
93 | f = open(out_path + "/" + city + "_schema_similarity.txt", "w")
94 | m = get_all_schema(in_path)
95 | ids = m.keys()
96 | for i in range(len(ids)):
97 | for j in range(i+1, len(ids)):
98 | sim = jaccard_similarity(set(m[ids[i]]), set(m[ids[j]]))
99 | if (sim>0):
100 | f.write(ids[i] + "\t" + ids[j] + "\t" + str(sim) + "\n")
101 | f.close()
102 |
103 | def main(argv):
104 | if len(argv) != 3:
105 | print "The program takes 3 arguments, " + str(len(argv)) + " is given."
106 | return
107 | city = argv[0]
108 | in_path = argv[1] + "/"
109 | out_path = argv[2]
110 | #path = "data/"
111 | run(city, in_path, out_path)
112 |
113 | if __name__=="__main__":
114 | main(sys.argv[1:])
115 |
--------------------------------------------------------------------------------
/type_detection/ijson/backends/yajl.py:
--------------------------------------------------------------------------------
1 | '''
2 | Wrapper for YAJL C library version 1.x.
3 | '''
4 |
5 | from ctypes import Structure, c_uint, c_ubyte, c_int, c_long, c_double, \
6 | c_void_p, c_char_p, CFUNCTYPE, POINTER, byref, string_at, cast , \
7 | cdll, util, c_char
8 | from decimal import Decimal
9 |
10 | from ijson import common, backends
11 | from ijson.compat import b2s
12 |
13 |
14 | yajl = backends.find_yajl(1)
15 |
16 | yajl.yajl_alloc.restype = POINTER(c_char)
17 | yajl.yajl_get_error.restype = POINTER(c_char)
18 |
19 | C_EMPTY = CFUNCTYPE(c_int, c_void_p)
20 | C_INT = CFUNCTYPE(c_int, c_void_p, c_int)
21 | C_LONG = CFUNCTYPE(c_int, c_void_p, c_long)
22 | C_DOUBLE = CFUNCTYPE(c_int, c_void_p, c_double)
23 | C_STR = CFUNCTYPE(c_int, c_void_p, POINTER(c_ubyte), c_uint)
24 |
25 |
26 | def number(value):
27 | '''
28 | Helper function casting a string that represents any Javascript number
29 | into appropriate Python value: either int or Decimal.
30 | '''
31 | try:
32 | return int(value)
33 | except ValueError:
34 | return Decimal(value)
35 |
36 | _callback_data = [
37 | # Mapping of JSON parser events to callback C types and value converters.
38 | # Used to define the Callbacks structure and actual callback functions
39 | # inside the parse function.
40 | ('null', C_EMPTY, lambda: None),
41 | ('boolean', C_INT, lambda v: bool(v)),
42 | # "integer" and "double" aren't actually yielded by yajl since "number"
43 | # takes precedence if defined
44 | ('integer', C_LONG, lambda v, l: int(string_at(v, l))),
45 | ('double', C_DOUBLE, lambda v, l: float(string_at(v, l))),
46 | ('number', C_STR, lambda v, l: number(b2s(string_at(v, l)))),
47 | ('string', C_STR, lambda v, l: string_at(v, l).decode('utf-8')),
48 | ('start_map', C_EMPTY, lambda: None),
49 | ('map_key', C_STR, lambda v, l: b2s(string_at(v, l))),
50 | ('end_map', C_EMPTY, lambda: None),
51 | ('start_array', C_EMPTY, lambda: None),
52 | ('end_array', C_EMPTY, lambda: None),
53 | ]
54 |
55 | class Callbacks(Structure):
56 | _fields_ = [(name, type) for name, type, func in _callback_data]
57 |
58 | class Config(Structure):
59 | _fields_ = [
60 | ("allowComments", c_uint),
61 | ("checkUTF8", c_uint)
62 | ]
63 |
64 | YAJL_OK = 0
65 | YAJL_CANCELLED = 1
66 | YAJL_INSUFFICIENT_DATA = 2
67 | YAJL_ERROR = 3
68 |
69 |
70 | def basic_parse(f, allow_comments=False, check_utf8=False, buf_size=64 * 1024):
71 | '''
72 | Iterator yielding unprefixed events.
73 |
74 | Parameters:
75 |
76 | - f: a readable file-like object with JSON input
77 | - allow_comments: tells parser to allow comments in JSON input
78 | - check_utf8: if True, parser will cause an error if input is invalid utf-8
79 | - buf_size: a size of an input buffer
80 | '''
81 | events = []
82 |
83 | def callback(event, func_type, func):
84 | def c_callback(context, *args):
85 | events.append((event, func(*args)))
86 | return 1
87 | return func_type(c_callback)
88 |
89 | callbacks = Callbacks(*[callback(*data) for data in _callback_data])
90 | config = Config(allow_comments, check_utf8)
91 | handle = yajl.yajl_alloc(byref(callbacks), byref(config), None, None)
92 | try:
93 | while True:
94 | buffer = f.read(buf_size)
95 | if buffer:
96 | result = yajl.yajl_parse(handle, buffer, len(buffer))
97 | else:
98 | result = yajl.yajl_parse_complete(handle)
99 | if result == YAJL_ERROR:
100 | perror = yajl.yajl_get_error(handle, 1, buffer, len(buffer))
101 | error = cast(perror, c_char_p).value
102 | yajl.yajl_free_error(handle, perror)
103 | raise common.JSONError(error)
104 | if not buffer and not events:
105 | if result == YAJL_INSUFFICIENT_DATA:
106 | raise common.IncompleteJSONError()
107 | break
108 |
109 | for event in events:
110 | yield event
111 | events = []
112 | finally:
113 | yajl.yajl_free(handle)
114 |
115 | def parse(file, **kwargs):
116 | '''
117 | Backend-specific wrapper for ijson.common.parse.
118 | '''
119 | return common.parse(basic_parse(file, **kwargs))
120 |
121 | def items(file, prefix):
122 | '''
123 | Backend-specific wrapper for ijson.common.items.
124 | '''
125 | return common.items(parse(file), prefix)
126 |
--------------------------------------------------------------------------------
/extract_zipcode_latlon/ijson/backends/yajl.py:
--------------------------------------------------------------------------------
1 | '''
2 | Wrapper for YAJL C library version 1.x.
3 | '''
4 |
5 | from ctypes import Structure, c_uint, c_ubyte, c_int, c_long, c_double, \
6 | c_void_p, c_char_p, CFUNCTYPE, POINTER, byref, string_at, cast , \
7 | cdll, util, c_char
8 | from decimal import Decimal
9 |
10 | from ijson import common, backends
11 | from ijson.compat import b2s
12 |
13 |
14 | yajl = backends.find_yajl(1)
15 |
16 | yajl.yajl_alloc.restype = POINTER(c_char)
17 | yajl.yajl_get_error.restype = POINTER(c_char)
18 |
19 | C_EMPTY = CFUNCTYPE(c_int, c_void_p)
20 | C_INT = CFUNCTYPE(c_int, c_void_p, c_int)
21 | C_LONG = CFUNCTYPE(c_int, c_void_p, c_long)
22 | C_DOUBLE = CFUNCTYPE(c_int, c_void_p, c_double)
23 | C_STR = CFUNCTYPE(c_int, c_void_p, POINTER(c_ubyte), c_uint)
24 |
25 |
26 | def number(value):
27 | '''
28 | Helper function casting a string that represents any Javascript number
29 | into appropriate Python value: either int or Decimal.
30 | '''
31 | try:
32 | return int(value)
33 | except ValueError:
34 | return Decimal(value)
35 |
36 | _callback_data = [
37 | # Mapping of JSON parser events to callback C types and value converters.
38 | # Used to define the Callbacks structure and actual callback functions
39 | # inside the parse function.
40 | ('null', C_EMPTY, lambda: None),
41 | ('boolean', C_INT, lambda v: bool(v)),
42 | # "integer" and "double" aren't actually yielded by yajl since "number"
43 | # takes precedence if defined
44 | ('integer', C_LONG, lambda v, l: int(string_at(v, l))),
45 | ('double', C_DOUBLE, lambda v, l: float(string_at(v, l))),
46 | ('number', C_STR, lambda v, l: number(b2s(string_at(v, l)))),
47 | ('string', C_STR, lambda v, l: string_at(v, l).decode('utf-8')),
48 | ('start_map', C_EMPTY, lambda: None),
49 | ('map_key', C_STR, lambda v, l: b2s(string_at(v, l))),
50 | ('end_map', C_EMPTY, lambda: None),
51 | ('start_array', C_EMPTY, lambda: None),
52 | ('end_array', C_EMPTY, lambda: None),
53 | ]
54 |
55 | class Callbacks(Structure):
56 | _fields_ = [(name, type) for name, type, func in _callback_data]
57 |
58 | class Config(Structure):
59 | _fields_ = [
60 | ("allowComments", c_uint),
61 | ("checkUTF8", c_uint)
62 | ]
63 |
64 | YAJL_OK = 0
65 | YAJL_CANCELLED = 1
66 | YAJL_INSUFFICIENT_DATA = 2
67 | YAJL_ERROR = 3
68 |
69 |
70 | def basic_parse(f, allow_comments=False, check_utf8=False, buf_size=64 * 1024):
71 | '''
72 | Iterator yielding unprefixed events.
73 |
74 | Parameters:
75 |
76 | - f: a readable file-like object with JSON input
77 | - allow_comments: tells parser to allow comments in JSON input
78 | - check_utf8: if True, parser will cause an error if input is invalid utf-8
79 | - buf_size: a size of an input buffer
80 | '''
81 | events = []
82 |
83 | def callback(event, func_type, func):
84 | def c_callback(context, *args):
85 | events.append((event, func(*args)))
86 | return 1
87 | return func_type(c_callback)
88 |
89 | callbacks = Callbacks(*[callback(*data) for data in _callback_data])
90 | config = Config(allow_comments, check_utf8)
91 | handle = yajl.yajl_alloc(byref(callbacks), byref(config), None, None)
92 | try:
93 | while True:
94 | buffer = f.read(buf_size)
95 | if buffer:
96 | result = yajl.yajl_parse(handle, buffer, len(buffer))
97 | else:
98 | result = yajl.yajl_parse_complete(handle)
99 | if result == YAJL_ERROR:
100 | perror = yajl.yajl_get_error(handle, 1, buffer, len(buffer))
101 | error = cast(perror, c_char_p).value
102 | yajl.yajl_free_error(handle, perror)
103 | raise common.JSONError(error)
104 | if not buffer and not events:
105 | if result == YAJL_INSUFFICIENT_DATA:
106 | raise common.IncompleteJSONError()
107 | break
108 |
109 | for event in events:
110 | yield event
111 | events = []
112 | finally:
113 | yajl.yajl_free(handle)
114 |
115 | def parse(file, **kwargs):
116 | '''
117 | Backend-specific wrapper for ijson.common.parse.
118 | '''
119 | return common.parse(basic_parse(file, **kwargs))
120 |
121 | def items(file, prefix):
122 | '''
123 | Backend-specific wrapper for ijson.common.items.
124 | '''
125 | return common.items(parse(file), prefix)
126 |
--------------------------------------------------------------------------------
/type_detection/ijson/backends/yajl2.py:
--------------------------------------------------------------------------------
1 | '''
2 | Wrapper for YAJL C library version 2.x.
3 | '''
4 |
5 | from ctypes import Structure, c_uint, c_ubyte, c_int, c_long, c_double, \
6 | c_void_p, c_char_p, CFUNCTYPE, POINTER, byref, string_at, cast , \
7 | cdll, util, c_char
8 | from decimal import Decimal
9 |
10 | from ijson import common, backends
11 | from ijson.compat import b2s
12 |
13 |
14 | yajl = backends.find_yajl(2)
15 |
16 | yajl.yajl_alloc.restype = POINTER(c_char)
17 | yajl.yajl_get_error.restype = POINTER(c_char)
18 |
19 | C_EMPTY = CFUNCTYPE(c_int, c_void_p)
20 | C_INT = CFUNCTYPE(c_int, c_void_p, c_int)
21 | C_LONG = CFUNCTYPE(c_int, c_void_p, c_long)
22 | C_DOUBLE = CFUNCTYPE(c_int, c_void_p, c_double)
23 | C_STR = CFUNCTYPE(c_int, c_void_p, POINTER(c_ubyte), c_uint)
24 |
25 |
26 | def number(value):
27 | '''
28 | Helper function casting a string that represents any Javascript number
29 | into appropriate Python value: either int or Decimal.
30 | '''
31 | try:
32 | return int(value)
33 | except ValueError:
34 | return Decimal(value)
35 |
36 | _callback_data = [
37 | # Mapping of JSON parser events to callback C types and value converters.
38 | # Used to define the Callbacks structure and actual callback functions
39 | # inside the parse function.
40 | ('null', C_EMPTY, lambda: None),
41 | ('boolean', C_INT, lambda v: bool(v)),
42 | # "integer" and "double" aren't actually yielded by yajl since "number"
43 | # takes precedence if defined
44 | ('integer', C_LONG, lambda v, l: int(string_at(v, l))),
45 | ('double', C_DOUBLE, lambda v, l: float(string_at(v, l))),
46 | ('number', C_STR, lambda v, l: number(b2s(string_at(v, l)))),
47 | ('string', C_STR, lambda v, l: string_at(v, l).decode('utf-8')),
48 | ('start_map', C_EMPTY, lambda: None),
49 | ('map_key', C_STR, lambda v, l: b2s(string_at(v, l))),
50 | ('end_map', C_EMPTY, lambda: None),
51 | ('start_array', C_EMPTY, lambda: None),
52 | ('end_array', C_EMPTY, lambda: None),
53 | ]
54 |
55 | class Callbacks(Structure):
56 | _fields_ = [(name, type) for name, type, func in _callback_data]
57 |
58 | YAJL_OK = 0
59 | YAJL_CANCELLED = 1
60 | YAJL_INSUFFICIENT_DATA = 2
61 | YAJL_ERROR = 3
62 |
63 | # constants defined in yajl_parse.h
64 | YAJL_ALLOW_COMMENTS = 1
65 | YAJL_MULTIPLE_VALUES = 8
66 |
67 |
68 | def basic_parse(f, allow_comments=False, buf_size=64 * 1024,
69 | multiple_values=False):
70 | '''
71 | Iterator yielding unprefixed events.
72 |
73 | Parameters:
74 |
75 | - f: a readable file-like object with JSON input
76 | - allow_comments: tells parser to allow comments in JSON input
77 | - buf_size: a size of an input buffer
78 | - multiple_values: allows the parser to parse multiple JSON objects
79 | '''
80 | events = []
81 |
82 | def callback(event, func_type, func):
83 | def c_callback(context, *args):
84 | events.append((event, func(*args)))
85 | return 1
86 | return func_type(c_callback)
87 |
88 | callbacks = Callbacks(*[callback(*data) for data in _callback_data])
89 | handle = yajl.yajl_alloc(byref(callbacks), None, None)
90 | if allow_comments:
91 | yajl.yajl_config(handle, YAJL_ALLOW_COMMENTS, 1)
92 | if multiple_values:
93 | yajl.yajl_config(handle, YAJL_MULTIPLE_VALUES, 1)
94 | try:
95 | while True:
96 | buffer = f.read(buf_size)
97 | if buffer:
98 | result = yajl.yajl_parse(handle, buffer, len(buffer))
99 | else:
100 | result = yajl.yajl_complete_parse(handle)
101 | if result == YAJL_ERROR:
102 | perror = yajl.yajl_get_error(handle, 1, buffer, len(buffer))
103 | error = cast(perror, c_char_p).value
104 | yajl.yajl_free_error(handle, perror)
105 | raise common.JSONError(error)
106 | if not buffer and not events:
107 | if result == YAJL_INSUFFICIENT_DATA:
108 | raise common.IncompleteJSONError()
109 | break
110 |
111 | for event in events:
112 | yield event
113 | events = []
114 | finally:
115 | yajl.yajl_free(handle)
116 |
117 | def parse(file, **kwargs):
118 | '''
119 | Backend-specific wrapper for ijson.common.parse.
120 | '''
121 | return common.parse(basic_parse(file, **kwargs))
122 |
123 | def items(file, prefix):
124 | '''
125 | Backend-specific wrapper for ijson.common.items.
126 | '''
127 | return common.items(parse(file), prefix)
128 |
--------------------------------------------------------------------------------
/extract_zipcode_latlon/ijson/backends/yajl2.py:
--------------------------------------------------------------------------------
1 | '''
2 | Wrapper for YAJL C library version 2.x.
3 | '''
4 |
5 | from ctypes import Structure, c_uint, c_ubyte, c_int, c_long, c_double, \
6 | c_void_p, c_char_p, CFUNCTYPE, POINTER, byref, string_at, cast , \
7 | cdll, util, c_char
8 | from decimal import Decimal
9 |
10 | from ijson import common, backends
11 | from ijson.compat import b2s
12 |
13 |
14 | yajl = backends.find_yajl(2)
15 |
16 | yajl.yajl_alloc.restype = POINTER(c_char)
17 | yajl.yajl_get_error.restype = POINTER(c_char)
18 |
19 | C_EMPTY = CFUNCTYPE(c_int, c_void_p)
20 | C_INT = CFUNCTYPE(c_int, c_void_p, c_int)
21 | C_LONG = CFUNCTYPE(c_int, c_void_p, c_long)
22 | C_DOUBLE = CFUNCTYPE(c_int, c_void_p, c_double)
23 | C_STR = CFUNCTYPE(c_int, c_void_p, POINTER(c_ubyte), c_uint)
24 |
25 |
26 | def number(value):
27 | '''
28 | Helper function casting a string that represents any Javascript number
29 | into appropriate Python value: either int or Decimal.
30 | '''
31 | try:
32 | return int(value)
33 | except ValueError:
34 | return Decimal(value)
35 |
36 | _callback_data = [
37 | # Mapping of JSON parser events to callback C types and value converters.
38 | # Used to define the Callbacks structure and actual callback functions
39 | # inside the parse function.
40 | ('null', C_EMPTY, lambda: None),
41 | ('boolean', C_INT, lambda v: bool(v)),
42 | # "integer" and "double" aren't actually yielded by yajl since "number"
43 | # takes precedence if defined
44 | ('integer', C_LONG, lambda v, l: int(string_at(v, l))),
45 | ('double', C_DOUBLE, lambda v, l: float(string_at(v, l))),
46 | ('number', C_STR, lambda v, l: number(b2s(string_at(v, l)))),
47 | ('string', C_STR, lambda v, l: string_at(v, l).decode('utf-8')),
48 | ('start_map', C_EMPTY, lambda: None),
49 | ('map_key', C_STR, lambda v, l: b2s(string_at(v, l))),
50 | ('end_map', C_EMPTY, lambda: None),
51 | ('start_array', C_EMPTY, lambda: None),
52 | ('end_array', C_EMPTY, lambda: None),
53 | ]
54 |
55 | class Callbacks(Structure):
56 | _fields_ = [(name, type) for name, type, func in _callback_data]
57 |
58 | YAJL_OK = 0
59 | YAJL_CANCELLED = 1
60 | YAJL_INSUFFICIENT_DATA = 2
61 | YAJL_ERROR = 3
62 |
63 | # constants defined in yajl_parse.h
64 | YAJL_ALLOW_COMMENTS = 1
65 | YAJL_MULTIPLE_VALUES = 8
66 |
67 |
68 | def basic_parse(f, allow_comments=False, buf_size=64 * 1024,
69 | multiple_values=False):
70 | '''
71 | Iterator yielding unprefixed events.
72 |
73 | Parameters:
74 |
75 | - f: a readable file-like object with JSON input
76 | - allow_comments: tells parser to allow comments in JSON input
77 | - buf_size: a size of an input buffer
78 | - multiple_values: allows the parser to parse multiple JSON objects
79 | '''
80 | events = []
81 |
82 | def callback(event, func_type, func):
83 | def c_callback(context, *args):
84 | events.append((event, func(*args)))
85 | return 1
86 | return func_type(c_callback)
87 |
88 | callbacks = Callbacks(*[callback(*data) for data in _callback_data])
89 | handle = yajl.yajl_alloc(byref(callbacks), None, None)
90 | if allow_comments:
91 | yajl.yajl_config(handle, YAJL_ALLOW_COMMENTS, 1)
92 | if multiple_values:
93 | yajl.yajl_config(handle, YAJL_MULTIPLE_VALUES, 1)
94 | try:
95 | while True:
96 | buffer = f.read(buf_size)
97 | if buffer:
98 | result = yajl.yajl_parse(handle, buffer, len(buffer))
99 | else:
100 | result = yajl.yajl_complete_parse(handle)
101 | if result == YAJL_ERROR:
102 | perror = yajl.yajl_get_error(handle, 1, buffer, len(buffer))
103 | error = cast(perror, c_char_p).value
104 | yajl.yajl_free_error(handle, perror)
105 | raise common.JSONError(error)
106 | if not buffer and not events:
107 | if result == YAJL_INSUFFICIENT_DATA:
108 | raise common.IncompleteJSONError()
109 | break
110 |
111 | for event in events:
112 | yield event
113 | events = []
114 | finally:
115 | yajl.yajl_free(handle)
116 |
117 | def parse(file, **kwargs):
118 | '''
119 | Backend-specific wrapper for ijson.common.parse.
120 | '''
121 | return common.parse(basic_parse(file, **kwargs))
122 |
123 | def items(file, prefix):
124 | '''
125 | Backend-specific wrapper for ijson.common.items.
126 | '''
127 | return common.items(parse(file), prefix)
128 |
--------------------------------------------------------------------------------
/heatmap/dbfUtils.py:
--------------------------------------------------------------------------------
1 | import struct, datetime, decimal, itertools
2 | # dbfUtils.py
3 | # By Raymond Hettinger
4 | # http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/362715
5 |
6 | def dbfreader(f):
7 | """Returns an iterator over records in a Xbase DBF file.
8 |
9 | The first row returned contains the field names.
10 | The second row contains field specs: (type, size, decimal places).
11 | Subsequent rows contain the data records.
12 | If a record is marked as deleted, it is skipped.
13 |
14 | File should be opened for binary reads.
15 |
16 | """
17 | # See DBF format spec at:
18 | # http://www.pgts.com.au/download/public/xbase.htm#DBF_STRUCT
19 |
20 | numrec, lenheader = struct.unpack(')
28 | ('number', )
29 | ('string', )
30 | ('map_key', )
31 | ('start_map', None)
32 | ('end_map', None)
33 | ('start_array', None)
34 | ('end_array', None)
35 |
36 | Prefixes represent the path to the nested elements from the root of the JSON
37 | document. For example, given this document::
38 |
39 | {
40 | "array": [1, 2],
41 | "map": {
42 | "key": "value"
43 | }
44 | }
45 |
46 | the parser would yield events:
47 |
48 | ('', 'start_map', None)
49 | ('', 'map_key', 'array')
50 | ('array', 'start_array', None)
51 | ('array.item', 'number', 1)
52 | ('array.item', 'number', 2)
53 | ('array', 'end_array', None)
54 | ('', 'map_key', 'map')
55 | ('map', 'start_map', None)
56 | ('map', 'map_key', 'key')
57 | ('map.key', 'string', u'value')
58 | ('map', 'end_map', None)
59 | ('', 'end_map', None)
60 |
61 | '''
62 | path = []
63 | for event, value in basic_events:
64 | if event == 'map_key':
65 | prefix = '.'.join(path[:-1])
66 | path[-1] = value
67 | elif event == 'start_map':
68 | prefix = '.'.join(path)
69 | path.append(None)
70 | elif event == 'end_map':
71 | path.pop()
72 | prefix = '.'.join(path)
73 | elif event == 'start_array':
74 | prefix = '.'.join(path)
75 | path.append('item')
76 | elif event == 'end_array':
77 | path.pop()
78 | prefix = '.'.join(path)
79 | else: # any scalar value
80 | prefix = '.'.join(path)
81 |
82 | yield prefix, event, value
83 |
84 |
85 | class ObjectBuilder(object):
86 | '''
87 | Incrementally builds an object from JSON parser events. Events are passed
88 | into the `event` function that accepts two parameters: event type and
89 | value. The object being built is available at any time from the `value`
90 | attribute.
91 |
92 | Example::
93 |
94 | from StringIO import StringIO
95 | from ijson.parse import basic_parse
96 | from ijson.utils import ObjectBuilder
97 |
98 | builder = ObjectBuilder()
99 | f = StringIO('{"key": "value"})
100 | for event, value in basic_parse(f):
101 | builder.event(event, value)
102 | print builder.value
103 |
104 | '''
105 | def __init__(self):
106 | def initial_set(value):
107 | self.value = value
108 | self.containers = [initial_set]
109 |
110 | def event(self, event, value):
111 | if event == 'map_key':
112 | self.key = value
113 | elif event == 'start_map':
114 | map = {}
115 | self.containers[-1](map)
116 | def setter(value):
117 | map[self.key] = value
118 | self.containers.append(setter)
119 | elif event == 'start_array':
120 | array = []
121 | self.containers[-1](array)
122 | self.containers.append(array.append)
123 | elif event == 'end_array' or event == 'end_map':
124 | self.containers.pop()
125 | else:
126 | self.containers[-1](value)
127 |
128 | def items(prefixed_events, prefix):
129 | '''
130 | An iterator returning native Python objects constructed from the events
131 | under a given prefix.
132 | '''
133 | prefixed_events = iter(prefixed_events)
134 | try:
135 | while True:
136 | current, event, value = next(prefixed_events)
137 | if current == prefix:
138 | if event in ('start_map', 'start_array'):
139 | builder = ObjectBuilder()
140 | end_event = event.replace('start', 'end')
141 | while (current, event) != (prefix, end_event):
142 | builder.event(event, value)
143 | current, event, value = next(prefixed_events)
144 | yield builder.value
145 | else:
146 | yield value
147 | except StopIteration:
148 | pass
149 |
--------------------------------------------------------------------------------
/extract_zipcode_latlon/ijson/common.py:
--------------------------------------------------------------------------------
1 | '''
2 | Backend independent higher level interfaces, common exceptions.
3 | '''
4 |
5 | class JSONError(Exception):
6 | '''
7 | Base exception for all parsing errors.
8 | '''
9 | pass
10 |
11 | class IncompleteJSONError(JSONError):
12 | '''
13 | Raised when the parser expects data and it's not available. May be
14 | caused by malformed syntax or a broken source stream.
15 | '''
16 | def __init__(self):
17 | super(IncompleteJSONError, self).__init__('Incomplete or empty JSON data')
18 |
19 | def parse(basic_events):
20 | '''
21 | An iterator returning parsing events with the information about their location
22 | with the JSON object tree. Events are tuples ``(prefix, type, value)``.
23 |
24 | Available types and values are:
25 |
26 | ('null', None)
27 | ('boolean', )
28 | ('number', )
29 | ('string', )
30 | ('map_key', )
31 | ('start_map', None)
32 | ('end_map', None)
33 | ('start_array', None)
34 | ('end_array', None)
35 |
36 | Prefixes represent the path to the nested elements from the root of the JSON
37 | document. For example, given this document::
38 |
39 | {
40 | "array": [1, 2],
41 | "map": {
42 | "key": "value"
43 | }
44 | }
45 |
46 | the parser would yield events:
47 |
48 | ('', 'start_map', None)
49 | ('', 'map_key', 'array')
50 | ('array', 'start_array', None)
51 | ('array.item', 'number', 1)
52 | ('array.item', 'number', 2)
53 | ('array', 'end_array', None)
54 | ('', 'map_key', 'map')
55 | ('map', 'start_map', None)
56 | ('map', 'map_key', 'key')
57 | ('map.key', 'string', u'value')
58 | ('map', 'end_map', None)
59 | ('', 'end_map', None)
60 |
61 | '''
62 | path = []
63 | for event, value in basic_events:
64 | if event == 'map_key':
65 | prefix = '.'.join(path[:-1])
66 | path[-1] = value
67 | elif event == 'start_map':
68 | prefix = '.'.join(path)
69 | path.append(None)
70 | elif event == 'end_map':
71 | path.pop()
72 | prefix = '.'.join(path)
73 | elif event == 'start_array':
74 | prefix = '.'.join(path)
75 | path.append('item')
76 | elif event == 'end_array':
77 | path.pop()
78 | prefix = '.'.join(path)
79 | else: # any scalar value
80 | prefix = '.'.join(path)
81 |
82 | yield prefix, event, value
83 |
84 |
85 | class ObjectBuilder(object):
86 | '''
87 | Incrementally builds an object from JSON parser events. Events are passed
88 | into the `event` function that accepts two parameters: event type and
89 | value. The object being built is available at any time from the `value`
90 | attribute.
91 |
92 | Example::
93 |
94 | from StringIO import StringIO
95 | from ijson.parse import basic_parse
96 | from ijson.utils import ObjectBuilder
97 |
98 | builder = ObjectBuilder()
99 | f = StringIO('{"key": "value"})
100 | for event, value in basic_parse(f):
101 | builder.event(event, value)
102 | print builder.value
103 |
104 | '''
105 | def __init__(self):
106 | def initial_set(value):
107 | self.value = value
108 | self.containers = [initial_set]
109 |
110 | def event(self, event, value):
111 | if event == 'map_key':
112 | self.key = value
113 | elif event == 'start_map':
114 | map = {}
115 | self.containers[-1](map)
116 | def setter(value):
117 | map[self.key] = value
118 | self.containers.append(setter)
119 | elif event == 'start_array':
120 | array = []
121 | self.containers[-1](array)
122 | self.containers.append(array.append)
123 | elif event == 'end_array' or event == 'end_map':
124 | self.containers.pop()
125 | else:
126 | self.containers[-1](value)
127 |
128 | def items(prefixed_events, prefix):
129 | '''
130 | An iterator returning native Python objects constructed from the events
131 | under a given prefix.
132 | '''
133 | prefixed_events = iter(prefixed_events)
134 | try:
135 | while True:
136 | current, event, value = next(prefixed_events)
137 | if current == prefix:
138 | if event in ('start_map', 'start_array'):
139 | builder = ObjectBuilder()
140 | end_event = event.replace('start', 'end')
141 | while (current, event) != (prefix, end_event):
142 | builder.event(event, value)
143 | current, event, value = next(prefixed_events)
144 | yield builder.value
145 | else:
146 | yield value
147 | except StopIteration:
148 | pass
149 |
--------------------------------------------------------------------------------
/metadata/get_metadata.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 | # -*- coding: utf-8 -*-
3 | #Input:
4 | #1) url, i.e: http://data.austintexas.gov
5 | #2) output path
6 | #Get number of datasets, metadata (name, description, tags) and schema using following APIs:
7 | #url + /api/views.json?count=True: COUNT
8 | #url + /api/views.json?limit=200&page=1: METADATA
9 | #url + /api/views/ + datasetid + .json: SCHEMA
10 |
11 | import json
12 | import urllib
13 | import codecs
14 | import sys
15 | import os.path
16 |
17 | def loadIDs(outpath):
18 | #Input
19 | filepath = outpath + '/metadata.csv'
20 | if not os.path.isfile(filepath):
21 | return set()
22 | ids = set([])
23 | with open(filepath) as f:
24 | for line in f:
25 | _array = line.split('\t')
26 | ids.add(_array[0])
27 | print 'Done loading IDs'
28 | return ids
29 |
30 | def main(argv):
31 | url = argv[0] #input
32 | city = argv[1]
33 | outpath = argv[2] #output
34 | #Get the number of datasets
35 | urlhandle = urllib.urlopen(url + '/api/views.json?count=True')
36 | content = urlhandle.read()
37 | js = json.loads(content)
38 | count = js['count']
39 |
40 | #Output
41 | meta_f = codecs.open(outpath + '/' + city + '_metadata.csv', 'a', 'utf-8')
42 | tag_f = codecs.open(outpath + '/' + city + '_tags.csv', 'a', 'utf-8')
43 | schema_f = codecs.open(outpath + '/' + city + '_schema.csv', 'a', 'utf-8')
44 | id_tag_f = codecs.open(outpath + '/' + city + '_id_tag.csv', 'a', 'utf-8')
45 | id_downloadcount_f = codecs.open(outpath + '/' + city + '_id_downloadcount.csv', 'a', 'utf-8')
46 | id_viewcount_f = codecs.open(outpath + '/' + city + '_id_viewcount.csv', 'a', 'utf-8')
47 | id_date_f = open(outpath + '/' + city + '_id_date.csv', 'w')
48 | id_size_f = open(outpath + '/' + city + '_id_size.csv', 'w')
49 |
50 | #Load id of the datasets whose metadatas were already retrieved
51 | ids = loadIDs(outpath)
52 |
53 | #Get metadata of the all datasets
54 | #Metadata for one dataset is formated in one line. Each attribute value is seperated by tab character and empty value is replaced by the string 'Null'
55 | print 'Total: ' + str(count)
56 | pages = count/200 + 1 #total number of pages
57 |
58 | for i in range(1, pages+1):
59 | sys.stdout.write('Getting data from page ' + str(i) + ' ... ')
60 | urlhandle = urllib.urlopen(url + "/api/views.json?limit=200&page=" + str(i))
61 |
62 | content = urlhandle.read()
63 | js = json.loads(content)
64 | for j in range(0, len(js)):
65 | #Check whether the metadata was already retrieved
66 | _id = js[j]['id']
67 | if _id in ids:
68 | continue
69 |
70 | #Get metadata of each dataset
71 | #ID and NAME
72 | id = js[j]['id']
73 | meta = id + '\t' + js[j]['name']
74 |
75 | #DESCRIPTION
76 | if js[j].has_key('description'):
77 | meta_f.write(meta + "\t" + js[j]['description'].replace('\n', ' ') + "\n")
78 | #else:
79 | # meta_f.write(meta + '\t' + 'null\n')
80 |
81 | #View count
82 | if js[j].has_key('viewCount'):
83 | id_viewcount_f.write(id + "\t" + str(js[j]["viewCount"]) + "\n")
84 | else:
85 | id_viewcount_f.write(id + "\tnull\n")
86 |
87 | #Download count
88 | if js[j].has_key('downloadCount'):
89 | id_downloadcount_f.write(id + "\t" + str(js[j]["downloadCount"]) + "\n")
90 | else:
91 | id_downloadcount_f.write(id + "\tnull\n")
92 |
93 | #Publication date
94 | pdate = "None"
95 | cdate = "None"
96 | if js[j].has_key('publicationDate'):
97 | pdate = str(js[j]['publicationDate']) #Timestamp format
98 | #Created date
99 | if js[j].has_key('createdAt'):
100 | cdate = str(js[j]['createdAt']) #Timestamp format
101 | id_date_f.write(id + "\t" + cdate + "\t" + pdate + "\n")
102 |
103 | #blobsize
104 | bsize = "None"
105 | if js[j].has_key('blobFileSize'):
106 | bsize = str(js[j]['blobFileSize'])
107 | id_size_f.write(id + "\t" + bsize + "\n")
108 |
109 | #TAGS
110 | tag = ''
111 | if js[j].has_key('tags'):
112 | for t in js[j]['tags']:
113 | tag_f.write(t + '\n')
114 | tag = tag + ' ' + t
115 | id_tag_f.write(id + "\t" + tag + "\n")
116 | else:
117 | id_tag_f.write(id + "\tnull\n")
118 |
119 | #Get schema of each dataset
120 | schemaurl = url + "/api/views/" + js[j]['id'] + '.json'
121 | aJS = json.loads(urllib.urlopen(schemaurl).read())
122 | if aJS.has_key('columns'):
123 | schema_js = aJS['columns']
124 | schema = js[j]['id']
125 | if schema_js != None:
126 | for field in schema_js:
127 | schema = schema + '\t' + field['fieldName']
128 | schema_f.write(schema + '\n')
129 | else:
130 | print js[j]['id']
131 |
132 | print 'Done'
133 | print 'Done'
134 | meta_f.close()
135 | tag_f.close()
136 | schema_f.close()
137 | id_tag_f.close()
138 | id_downloadcount_f.close()
139 | id_viewcount_f.close()
140 | id_date_f.close()
141 | id_size_f.close()
142 |
143 | if __name__ == "__main__":
144 | main(sys.argv[1:])
145 |
--------------------------------------------------------------------------------
/latlon_to_zipcode/Neighborhoods.hpp:
--------------------------------------------------------------------------------
1 | //#############################################################################
2 | ////
3 | //// Copyright (C) 2014, New York University.
4 | //// All rights reserved.
5 | //// Contact: huy.vo@nyu.edu, kien.pham@nyu.edu
6 | ////
7 | //// "Redistribution and use in source and binary forms, with or without
8 | //// modification, are permitted provided that the following conditions are met:
9 | ////
10 | //// - Redistributions of source code must retain the above copyright notice,
11 | //// this list of conditions and the following disclaimer.
12 | //// - Redistributions in binary form must reproduce the above copyright
13 | //// notice, this list of conditions and the following disclaimer in the
14 | //// documentation and/or other materials provided with the distribution.
15 | //// - Neither the name of New York University nor the names of its
16 | //// contributors may be used to endorse or promote products derived from
17 | //// this software without specific prior written permission.
18 | ////
19 | //// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | //// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21 | //// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 | //// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
23 | //// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24 | //// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25 | //// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
26 | //// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27 | //// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
28 | //// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
29 | //// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE."
30 | ////
31 | ////#############################################################################
32 | #ifndef NEIGHBORHOODS_HPP
33 | #define NEIGHBORHOODS_HPP
34 |
35 | #include
36 | #include
37 | #include
38 | #include
39 | #include
40 |
41 | class Neighborhoods
42 | {
43 | public:
44 |
45 | typedef std::vector< std::pair > Geometry;
46 | typedef boost::unordered_map GeometryMap;
47 |
48 | Neighborhoods() {}
49 |
50 | Neighborhoods(const char *filename)
51 | {
52 | this->loadFromFile(filename);
53 | }
54 |
55 | void loadFromFile(const char *filename)
56 | {
57 | char name[128];
58 | int N, nPoly, nPoint;
59 | float lat, lon;
60 | Geometry poly;
61 |
62 | FILE *fi = fopen(filename, "r");
63 | fscanf(fi, "%d", &N);
64 | fgets(name, sizeof(name), fi);
65 |
66 | this->geometries.clear();
67 | for (int i=0; igeometries[atoi(name)] = poly;
80 | }
81 | fclose(fi);
82 | std::cout<<"File loaded! Number of elements: "<bounds[2]) bounds[2] = geom[i].first;
97 | if (geom[i].secondbounds[3]) bounds[3] = geom[i].second;
99 | }
100 | }
101 | }
102 |
103 | static bool isInside(int nvert, float *vert, float testx, float testy)
104 | {
105 | if (nvert<=0) return true;
106 | float firstX = vert[0];
107 | float firstY = vert[1];
108 | int i, j, c = 0;
109 | for (i = 1, j = 0; i < nvert; j = i++) {
110 | if ( ((vert[i*2+1]>testy) != (vert[j*2+1]>testy)) &&
111 | (testx < (vert[j*2]-vert[i*2]) * (testy-vert[i*2+1]) / (vert[j*2+1]-vert[i*2+1]) + vert[i*2]) )
112 | c = !c;
113 | if (vert[i*2]==firstX && vert[i*2+1]==firstY) {
114 | if (++ilon) != (vert[j].second>lon)) &&
130 | (lat < (vert[j].first-vert[i].first) * (lon-vert[i].second) / (vert[j].second-vert[i].second) + vert[i].first) )
131 | c = !c;
132 | }
133 | return c;
134 | }
135 |
136 | private:
137 | GeometryMap geometries;
138 | };
139 |
140 | #endif
141 |
--------------------------------------------------------------------------------
/type_detection/sample.py:
--------------------------------------------------------------------------------
1 | ###############################################################################
2 | ##
3 | ## Copyright (C) 2014, New York University.
4 | ## All rights reserved.
5 | ## Contact: kien.pham@nyu.edu
6 | ##
7 | ## "Redistribution and use in source and binary forms, with or without
8 | ## modification, are permitted provided that the following conditions are met:
9 | ##
10 | ## - Redistributions of source code must retain the above copyright notice,
11 | ## this list of conditions and the following disclaimer.
12 | ## - Redistributions in binary form must reproduce the above copyright
13 | ## notice, this list of conditions and the following disclaimer in the
14 | ## documentation and/or other materials provided with the distribution.
15 | ## - Neither the name of New York University nor the names of its
16 | ## contributors may be used to endorse or promote products derived from
17 | ## this software without specific prior written permission.
18 | ##
19 | ## THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | ## AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21 | ## THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 | ## PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
23 | ## CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24 | ## EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25 | ## PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
26 | ## OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27 | ## WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
28 | ## OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
29 | ## ADVISED OF THE POSSIBILITY OF SUCH DAMAGE."
30 | ##
31 | ###############################################################################
32 |
33 | import sys
34 | import ijson
35 | import os.path
36 | import re
37 |
38 | def get_schema(filename):
39 | '''
40 | Extract column names of a given dataset from JSON file
41 | '''
42 | schema = []
43 | schema_set = set()
44 | try:
45 | if filename[-4:]=='json':
46 | count = 0
47 | with open(filename) as lines:
48 | for line in lines:
49 | if re.search("\"data\" :", line) is None:
50 | if line[-2] == ',':
51 | kv = line.strip("\n ,").split(" : ")
52 | if len(kv)==2:
53 | k = kv[0].strip("\"")
54 | v = kv[1].strip("\"")
55 | if k=="fieldName":
56 | count += 1
57 | if count>8:
58 | if v not in schema_set:
59 | schema.append(v)
60 | schema_set.add(v)
61 | else:
62 | break
63 | else:
64 | print filename + ' is not JSON file'
65 | except Exception as ex:
66 | print ex
67 | print "Error line: " + str(sys.exc_traceback.tb_lineno)
68 | return schema
69 |
70 | def is_none(item):
71 | '''
72 | Check if an object is None or list of None items
73 | '''
74 | if item == None:
75 | return True
76 | if type(item) == list:
77 | for e in item:
78 | if e != None:
79 | return False
80 | return True
81 | else:
82 | return False
83 |
84 | def tostr(obj):
85 | '''
86 | Convert an object to string, lower case and remove end line characters
87 | '''
88 | if type(obj) is unicode:
89 | return obj.encode('utf-8').replace("\n", "").lower()
90 | else:
91 | return str(obj).replace("\n", " ").lower()
92 |
93 | def sample(data_path, id, output_path, city, max, MAX):
94 | filename = data_path + "/" + id + ".json"
95 | if not os.path.isfile(filename):
96 | return
97 |
98 | output_file = output_path + "/" + city + "_" + id + ".txt"
99 | if os.path.isfile(output_file):
100 | print "File " + output_file + " is existed."
101 | return
102 | output = open(output_file, "w")
103 | schema = get_schema(filename)
104 |
105 | count = 0
106 | item = []
107 | try:
108 | filehandle = open(filename)
109 | data = ijson.items(filehandle, "data.item")
110 | values_list = []
111 | for atb in schema:
112 | values_list.append([atb])
113 | for item in data:
114 | count += 1
115 | if count == MAX:
116 | break
117 | item = item[8:]
118 | if count == 1: #only do this once
119 | values_list = values_list[0:len(item)]
120 | for i in range(len(item)):
121 | if (len(values_list[i]) max:
68 | max = m[zipcode]
69 | unit = max/8
70 | print max
71 |
72 | for i in range(0,len(shpRecords)):
73 | # x and y are empty lists to be populated with the coordinates of each geometry.
74 | x = []
75 | y = []
76 | for j in range(0,len(shpRecords[i]['shp_data']['parts'][0]['points'])):
77 | # This is the number of vertices in the ith geometry.
78 | # The parts list is [0] as it is singlepart.
79 | # get x and y coordinates.
80 | tempx = float(shpRecords[i]['shp_data']['parts'][0]['points'][j]['x'])
81 | tempy = float(shpRecords[i]['shp_data']['parts'][0]['points'][j]['y'])
82 | x.append(tempx)
83 | y.append(tempy) # Populate the lists
84 |
85 | # Creates a polygon in matplotlib for each geometry in the shapefile
86 | zipcode = shpRecords[i]["dbf_data"]["ZIP"]
87 | if m.has_key(zipcode):
88 | colour = colours[m[zipcode]/unit]
89 | else:
90 | colour = colours[0]
91 | plt.fill(x, y, fc=colour, ec='0.7', lw=0.1)
92 |
93 | #Create legend
94 | p0 = plt.Rectangle((0, 0), 1, 1, fc=colours[0])
95 | p1 = plt.Rectangle((0, 0), 1, 1, fc=colours[1])
96 | p2 = plt.Rectangle((0, 0), 1, 1, fc=colours[2])
97 | p3 = plt.Rectangle((0, 0), 1, 1, fc=colours[3])
98 | p4 = plt.Rectangle((0, 0), 1, 1, fc=colours[4])
99 | p5 = plt.Rectangle((0, 0), 1, 1, fc=colours[5])
100 | p6 = plt.Rectangle((0, 0), 1, 1, fc=colours[6])
101 | p7 = plt.Rectangle((0, 0), 1, 1, fc=colours[7])
102 | p8 = plt.Rectangle((0, 0), 1, 1, fc=colours[8])
103 | extra = plt.Rectangle((0, 0), 1, 1, fc="w", fill=False, edgecolor='none', linewidth=0)
104 |
105 | plt.legend([extra, p0,p1,p2,p3,p4,p5,p6,p7,p8],\
106 | ["Number of records",\
107 | "0-%d records" %((1*unit-1)),\
108 | "%d-%d records" %(1*unit,(2*unit-1)),\
109 | "%d-%d records" %(2*unit,(3*unit-1)),\
110 | "%d-%d records" %(3*unit,(4*unit-1)),\
111 | "%d-%d records" %(4*unit,(5*unit-1)),\
112 | "%d-%d records" %(5*unit,(6*unit-1)),\
113 | "%d-%d records" %(6*unit,(7*unit-1)),\
114 | "%d-%d records" %(7*unit,(8*unit-1)),\
115 | ">%d records" %(8*unit)],\
116 | prop={'size':10}, loc = 1)
117 |
118 | plt.legend([extra, p0,p1,p2,p3,p4,p5,p6,p7,p8],\
119 | ["Number of records",\
120 | "0-%dk" %((1*unit/1000-1)),\
121 | "%dk-%dk" %(1*unit/1000,(2*unit/1000-1)),\
122 | "%dk-%dk" %(2*unit/1000,(3*unit/1000-1)),\
123 | "%dk-%dk" %(3*unit/1000,(4*unit/1000-1)),\
124 | "%dk-%dk" %(4*unit/1000,(5*unit/1000-1)),\
125 | "%dk-%dk" %(5*unit/1000,(6*unit/1000-1)),\
126 | "%dk-%dk" %(6*unit/1000,(7*unit/1000-1)),\
127 | "%dk-%dk" %(7*unit/1000,(8*unit/1000-1)),\
128 | ">%dk" %(8*unit/1000)],\
129 | prop={'size':10.6}, loc = 3)
130 |
131 | #plt.title("Chicago ZipCode Overlap")
132 | plt.axis('off')
133 | plt.savefig('chicagoallzipcode.jpg', format='jpg', dpi=700)
134 | plt.show()
135 |
136 |
--------------------------------------------------------------------------------
/heatmap/nyc.py:
--------------------------------------------------------------------------------
1 | ###############################################################################
2 | ##
3 | ## Copyright (C) 2014, New York University.
4 | ## All rights reserved.
5 | ## Contact: kien.pham@nyu.edu
6 | ##
7 | ## "Redistribution and use in source and binary forms, with or without
8 | ## modification, are permitted provided that the following conditions are met:
9 | ##
10 | ## - Redistributions of source code must retain the above copyright notice,
11 | ## this list of conditions and the following disclaimer.
12 | ## - Redistributions in binary form must reproduce the above copyright
13 | ## notice, this list of conditions and the following disclaimer in the
14 | ## documentation and/or other materials provided with the distribution.
15 | ## - Neither the name of New York University nor the names of its
16 | ## contributors may be used to endorse or promote products derived from
17 | ## this software without specific prior written permission.
18 | ##
19 | ## THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | ## AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21 | ## THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 | ## PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
23 | ## CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24 | ## EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25 | ## PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
26 | ## OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27 | ## WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
28 | ## OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
29 | ## ADVISED OF THE POSSIBILITY OF SUCH DAMAGE."
30 | ##
31 | ###############################################################################
32 |
33 |
34 | import shpUtils
35 | import matplotlib.pyplot as plt
36 |
37 |
38 | def loadZipcode(zipcodefile):
39 | m = {}
40 | with open(zipcodefile) as lines:
41 | for line in lines:
42 | a = line.strip("\n").split("\t")
43 | zipcode = a[0]
44 | occurencyNumb = int(a[1])
45 | m[zipcode] = occurencyNumb
46 | return m
47 |
48 | #Declare inputs
49 | zipcodefile = "nyc.csv"
50 | shapefile = "shapefile/nyc_zipcta.shp"
51 | #define colours
52 | #colours = {0:"#F7FCF0", 1:"#E0F3DB", 2:"#CCEBC5", 3:"#A8DDB5", 4:"#7BCCC4", 5:"#4EB3D3", 6:"#2B8CBE", 7:"#0868AC", 8:"#084081"}
53 | colours = {0:"#ffffff", 1:"#fcfcff", 2:"#ebecff", 3:"#ebecff", 4:"#dadcff", 5:"#c9ccff", 6:"#b8bcff", 7:"#a7acff", 8:"#969cff", 9:"#858cff", 10:"#747cff", 11:"#636cff", 12:"#525dff", 13:"#414dff", 14:"#303dff", 15:"#1f2dff", 16:"#0e1dff", 17:"#0010fc", 18:"#000feb", 19:"#000eda", 20:"#000dc9", 21:"#000bb8", 22:"#000aa7"}
54 | #colours = {0:"#F7FCF0", 1:"#F7FCF0", 2:"#E0F3DB", 3:"#E0F3DB", 4:"#CCEBC5", 5:"#CCEBC5", 6:"#A8DDB5", 7:"#7BCCC4", 8:"#4EB3D3", 9:"#2B8CBE", 10:"#0868AC", 11:"#084081"}
55 | #colours = {0:"", 1:"", 2:"", 3:"", 4:"", 5:"", 6:"", 7:"", 8:""}
56 | #colours = {0:"#FFF7EC", 1:"#FEE8C8", 2:"#FDD49E", 3:"#FDBB84", 4:"#FC8D59", 5:"#EF6548", 6:"#D7301F", 7:"#B30000", 8:"#7F0000"}
57 |
58 | # load the shapefile
59 | shpRecords = shpUtils.loadShapefile(shapefile)
60 | # load zipcodefile
61 | m = loadZipcode(zipcodefile)
62 | max = 0
63 | min = 1000
64 | for i in range(0,len(shpRecords)):
65 | zipcode = shpRecords[i]["dbf_data"]["ZCTA5CE00"]
66 | if m[zipcode] > max:
67 | max = m[zipcode]
68 | if m[zipcode] < min:
69 | min = m[zipcode]
70 | unit = (max-min)/22
71 | print max
72 | print min
73 | for key in m.keys():
74 | m[key] = m[key] - min
75 |
76 | for i in range(0,len(shpRecords)):
77 | # x and y are empty lists to be populated with the coordinates of each geometry.
78 | x = []
79 | y = []
80 | for j in range(0,len(shpRecords[i]['shp_data']['parts'][0]['points'])):
81 | # This is the number of vertices in the ith geometry.
82 | # The parts list is [0] as it is singlepart.
83 | # get x and y coordinates.
84 | tempx = float(shpRecords[i]['shp_data']['parts'][0]['points'][j]['x'])
85 | tempy = float(shpRecords[i]['shp_data']['parts'][0]['points'][j]['y'])
86 | x.append(tempx)
87 | y.append(tempy) # Populate the lists
88 |
89 | # Creates a polygon in matplotlib for each geometry in the shapefile
90 | zipcode = shpRecords[i]["dbf_data"]["ZCTA5CE00"]
91 | if m.has_key(zipcode):
92 | colour = colours[m[zipcode]/unit]
93 | else:
94 | colour = colours[0]
95 | plt.fill(x, y, fc=colour, ec='0.7', lw=0.1)
96 |
97 | #Create legend
98 | p0 = plt.Rectangle((0, 0), 1, 1, fc=colours[0])
99 | p1 = plt.Rectangle((0, 0), 1, 1, fc=colours[2])
100 | p2 = plt.Rectangle((0, 0), 1, 1, fc=colours[4])
101 | p3 = plt.Rectangle((0, 0), 1, 1, fc=colours[6])
102 | p4 = plt.Rectangle((0, 0), 1, 1, fc=colours[8])
103 | p5 = plt.Rectangle((0, 0), 1, 1, fc=colours[10])
104 | p6 = plt.Rectangle((0, 0), 1, 1, fc=colours[12])
105 | p7 = plt.Rectangle((0, 0), 1, 1, fc=colours[14])
106 | p8 = plt.Rectangle((0, 0), 1, 1, fc=colours[16])
107 | p9 = plt.Rectangle((0, 0), 1, 1, fc=colours[18])
108 | p10 = plt.Rectangle((0, 0), 1, 1, fc=colours[20])
109 | p11 = plt.Rectangle((0, 0), 1, 1, fc=colours[22])
110 | extra = plt.Rectangle((0, 0), 1, 1, fc="w", fill=False, edgecolor='none', linewidth=0)
111 | #This legend will show the exact number
112 | plt.legend([p0,p1,p2,p3,p4,p5,p6,p7,p8,p9,p10,p11],\
113 | ["0-%d records" %((2*unit-1)),\
114 | "%d-%d records" %(2*unit,(4*unit-1)),\
115 | "%d-%d records" %(4*unit,(6*unit-1)),\
116 | "%d-%d records" %(6*unit,(8*unit-1)),\
117 | "%d-%d records" %(8*unit,(10*unit-1)),\
118 | "%d-%d records" %(10*unit,(12*unit-1)),\
119 | "%d-%d records" %(12*unit,(14*unit-1)),\
120 | "%d-%d records" %(14*unit,(16*unit-1)),\
121 | "%d-%d records" %(16*unit,(18*unit-1)),\
122 | "%d-%d records" %(18*unit,(20*unit-1)),\
123 | "%d-%d records" %(20*unit,(22*unit-1)),\
124 | ">%d records" %(22*unit)],\
125 | prop={'size':8}, loc = 2)
126 |
127 | #The short version of legend:
128 | plt.legend([extra, p0,p1,p2,p3,p4,p5,p6,p7,p8,p9,p10,p11],\
129 | ["Number of records",\
130 | "0-%dk" %((2*unit/1000-1)),\
131 | "%dk-%dk" %(2*unit/1000,(4*unit/1000-1)),\
132 | "%dk-%dk" %(4*unit/1000,(6*unit/1000-1)),\
133 | "%dk-%dk" %(6*unit/1000,(8*unit/1000-1)),\
134 | "%dk-%dk" %(8*unit/1000,(10*unit/1000-1)),\
135 | "%dk-%dk" %(10*unit/1000,(12*unit/1000-1)),\
136 | "%dk-%dk" %(12*unit/1000,(14*unit/1000-1)),\
137 | "%dk-%dk" %(14*unit/1000,(16*unit/1000-1)),\
138 | "%dk-%dk" %(16*unit/1000,(18*unit/1000-1)),\
139 | "%dk-%dk" %(18*unit/1000,(20*unit/1000-1)),\
140 | "%dk-%dk" %(20*unit/1000,(22*unit/1000-1)),\
141 | ">%dk" %(22*unit/1000)],\
142 | prop={'size':10.6}, loc = 2)
143 |
144 | #plt.title("NYC ZipCode Overlap")
145 | plt.axis('off')
146 | plt.savefig('nycallzipcodeoverlap.jpg', format='jpg', dpi=700)
147 | plt.show()
148 |
149 |
--------------------------------------------------------------------------------
/download/ids/edmonton_ids.txt:
--------------------------------------------------------------------------------
1 | sthd-gad4
2 | 62wr-c9ye
3 | cggb-hzzm
4 | yutc-c5ju
5 | 7gh5-bnbs
6 | htrg-yirr
7 | 5zeu-wkpv
8 | jyra-si4k
9 | ickf-52qg
10 | 65fr-66s6
11 | 7yqh-39tz
12 | h9sn-ds8j
13 | id8i-xwha
14 | 2d5a-esdr
15 | w4cu-waf8
16 | k29i-u25u
17 | qh6u-3haa
18 | febx-sqja
19 | ebvt-eg97
20 | ehbr-emhe
21 | qsmn-3w99
22 | c4nr-3quz
23 | 25b5-e682
24 | f2bf-5wqc
25 | 2n87-9vjk
26 | ih46-vxeq
27 | ieti-capx
28 | jn25-zspi
29 | ia7a-fuzq
30 | 83ud-pzgi
31 | gem5-v2v7
32 | zxf6-rv9a
33 | rm66-tzy5
34 | 7ndn-2x34
35 | xt3a-rjpj
36 | 8muu-jq8v
37 | mqxt-kvm4
38 | nmpv-7vqg
39 | 6avx-8i8e
40 | prfy-5m97
41 | nfvw-n5k3
42 | xjvb-fthb
43 | eecg-fc54
44 | 2rpu-peb3
45 | f6w2-hzex
46 | auxe-iahd
47 | yznh-9n2u
48 | kcuu-2yxg
49 | dpcz-nupn
50 | kaps-rk7z
51 | ykpk-qyvg
52 | eaag-ra2c
53 | 9j6k-uzig
54 | bwpb-bppt
55 | 2976-zu3f
56 | mbxg-7qmu
57 | kyhu-r33m
58 | mu9e-3mkm
59 | y29c-39y9
60 | nc6t-tngg
61 | a56e-tkzk
62 | znrg-tr8s
63 | 7isy-c8ka
64 | n7fj-2yfs
65 | ysgw-us2d
66 | v6pz-ntae
67 | gp5m-pueh
68 | 4vt2-8zrq
69 | gw7p-ee8r
70 | iysa-xg87
71 | hubs-ug3y
72 | dnbn-a9ny
73 | j7nr-ekvn
74 | 8xti-zu8p
75 | f6ne-aqna
76 | g57i-jmvw
77 | u6ue-anax
78 | e7aq-scxv
79 | gy8c-eq6u
80 | uwuq-3zq3
81 | 72aw-fq6m
82 | 6mre-4inz
83 | 9et8-3bg5
84 | es6z-qyb7
85 | 2mxf-2xyw
86 | 46fr-szhj
87 | c7ev-7y4u
88 | mst8-di4d
89 | 82ij-m8wd
90 | 3yki-4nkh
91 | ctwr-tvrd
92 | bfsu-5y9w
93 | h5ce-uubp
94 | qzrg-q4nv
95 | d577-xky7
96 | xsrd-4894
97 | 7fus-qa4r
98 | hnhf-yaps
99 | knai-w28i
100 | 5ayy-wxat
101 | vaiq-ubi5
102 | gajw-anue
103 | 2hvz-kffj
104 | fqnx-qdsk
105 | px79-vegt
106 | bqmh-j34s
107 | shh7-vzch
108 | punv-um3q
109 | si3b-qk24
110 | mvbx-i64u
111 | 2crc-aced
112 | gdd9-eqv9
113 | 9khp-yjaa
114 | b4y7-zhnz
115 | scjj-9qzz
116 | uytp-iqga
117 | 5xcn-q5zx
118 | 9hwi-bdju
119 | i32t-b6vf
120 | ei4u-794h
121 | wr39-6xm9
122 | j6uz-tjg8
123 | 9nqb-w48x
124 | tm8z-k466
125 | y7yb-jtjn
126 | dknb-ctqa
127 | bnuf-aarq
128 | 84rh-tbam
129 | 7rap-ipwf
130 | xzjj-3r8e
131 | xydx-f66g
132 | qdcb-svkz
133 | 4adt-tbf8
134 | beu6-4urm
135 | y4r5-xpku
136 | 9edk-tayw
137 | egz6-jdf3
138 | ggak-65yh
139 | jr4d-a5zs
140 | vmrg-prz6
141 | 6hiw-nih3
142 | iwjg-nq4w
143 | jruw-eduf
144 | t37t-2z9n
145 | xhpv-2y2t
146 | q78x-bpvt
147 | pwa8-zn84
148 | 375q-qr55
149 | 98hy-qgun
150 | 6gjh-jny3
151 | b58q-nxjr
152 | 57bz-3nsb
153 | fbs7-uv7y
154 | rjq9-ge5s
155 | 55vi-yv73
156 | g2ht-buzt
157 | 33re-ygv7
158 | ggrb-bzbj
159 | 4agm-wt9h
160 | 8h3r-5ys2
161 | f9i7-hdjq
162 | ks8u-4a6b
163 | ud9b-q65g
164 | p26r-vxf7
165 | nrf5-ighq
166 | x8j3-j3q3
167 | x2gc-xf9e
168 | 7yt8-7467
169 | tzuj-jcfp
170 | j5gy-p5g8
171 | my5s-s4eh
172 | 7njw-4e49
173 | 9isd-fcah
174 | df5n-u36u
175 | s2tr-4vrj
176 | r45z-99vd
177 | rgfm-xqag
178 | cjgx-qcup
179 | f2sy-bth7
180 | 692u-9tuj
181 | xgwu-c37w
182 | rh82-ntt9
183 | kffi-kb6e
184 | ddqk-i2ey
185 | gds5-6aiq
186 | nqs8-f6fc
187 | zmac-3mxq
188 | pkdk-9dwc
189 | jir4-uuhx
190 | myu9-qngm
191 | a76p-ee58
192 | bnfb-vu2t
193 | vbdj-jxmq
194 | npym-f8ef
195 | qt34-hwip
196 | bdis-rq5p
197 | djyx-z2bv
198 | b2ak-ut7u
199 | 87u8-3yfv
200 | qrrq-9bsx
201 | ykrg-bwt7
202 | paqn-uf4u
203 | mnwt-r49h
204 | kiry-88gi
205 | 65xu-w8px
206 | 2694-uced
207 | achm-af7d
208 | nekk-97cp
209 | ngd7-ejms
210 | nna3-34af
211 | 8svr-ivxz
212 | em7g-s625
213 | hk82-7wj7
214 | gjev-z5ji
215 | 8peh-czku
216 | igwr-fbps
217 | 3nma-d8m3
218 | tizy-3vkk
219 | nzgd-btmv
220 | mvki-i26q
221 | 4ev8-6z47
222 | 4mm2-9j7w
223 | 9s54-maam
224 | uwg3-tppw
225 | mpd9-nhrp
226 | i73h-tmq7
227 | da5j-rtis
228 | icpf-mjv6
229 | fftr-j9wv
230 | a5ef-84t3
231 | x7r5-d4hj
232 | wans-jfwv
233 | mgii-4cth
234 | 6w5t-p7n9
235 | 5md5-bka8
236 | kdc2-xufp
237 | 8s6g-dibx
238 | y9rm-5xha
239 | abvn-qkj9
240 | i2gf-u5vm
241 | h69z-r89y
242 | egfg-bqgy
243 | vp2f-wzyp
244 | ybx9-858a
245 | zh7a-ng4h
246 | uqac-iii2
247 | me84-5r83
248 | bms6-cuv9
249 | g4vh-839k
250 | pa5u-sgw4
251 | 3n37-c973
252 | 3qef-bvyj
253 | 63px-qewx
254 | 48sf-3wbh
255 | yyj6-c972
256 | 3dsx-jp6d
257 | csvn-zna4
258 | d54a-rnqn
259 | srfv-kt6f
260 | 62em-cru9
261 | rk7f-7aur
262 | n3b6-y2as
263 | uqbc-s85r
264 | 4bzd-pbh9
265 | n3qd-yamd
266 | 86gx-6tq9
267 | ike9-p5uk
268 | u6tw-de8t
269 | 7afr-kaaj
270 | 33jk-s9st
271 | btu7-6eug
272 | 2q9r-ang2
273 | 59j9-dv23
274 | 3qga-tuus
275 | 5ezz-fzka
276 | khtg-hjme
277 | wsq9-3pi7
278 | pubm-q7h7
279 | 7wta-wwze
280 | vni7-52xi
281 | vizs-ak74
282 | inrc-3sdk
283 | dep6-aymg
284 | u943-cjsi
285 | q9ik-kfw5
286 | kdh3-uyqv
287 | qujq-hb7j
288 | 4xre-g8wq
289 | vtgf-yvms
290 | 9j7i-47f8
291 | 3e3e-bpvz
292 | nzvx-6q5z
293 | chad-mx22
294 | u9uc-sdkn
295 | 7tzq-etnt
296 | ug6h-umbi
297 | hcdm-kjgn
298 | ht22-msjv
299 | ktbb-ft9x
300 | kbbe-7sj5
301 | y354-d5xg
302 | k5sh-vdtn
303 | qs7f-tjd2
304 | a5i7-qmwv
305 | 6wmn-5es3
306 | aszk-8atd
307 | xiut-24bw
308 | tjkh-4hzc
309 | 6sba-d8sc
310 | d8qh-bhiz
311 | fyy2-btx2
312 | 3b6m-fezs
313 | 595d-uejx
314 | n39a-jdzb
315 | i5mb-yzrn
316 | 62zc-p5yh
317 | qn6v-4dbh
318 | muf4-te7d
319 | whpx-8wpg
320 | dbkf-sdrd
321 | 8cqf-3hmz
322 | tgy2-3bts
323 | xxuz-7axm
324 | 4fa9-6zye
325 | xvh3-c5hd
326 | fvmf-h4id
327 | a225-i9ns
328 | w2ms-kfus
329 | ciib-ux6u
330 | 8ae2-m26m
331 | 4v56-4hak
332 | sgnx-e3u2
333 | 4ee7-dsf6
334 | gnse-cass
335 | mjk8-4ukd
336 | fjvg-9ez2
337 | 6x5f-8hj7
338 | cax4-b3pr
339 | htu3-4pz3
340 | c89k-cwcz
341 | isug-45sj
342 | cfh4-sh93
343 | ydv5-y4pu
344 | s993-cqfv
345 | afax-7r8v
346 | 7t29-dqaa
347 | 7yns-bcn4
348 | 2gjm-4p4u
349 | b6ng-fzk2
350 | cz52-j8kf
351 | bia2-hxpv
352 | ttdf-sm74
353 | p9vx-6egn
354 | rvwz-r2uq
355 | 7bc3-xv6c
356 | fib4-gufy
357 | 9xcd-grar
358 | 5h4y-jwsi
359 | bn8p-vmrc
360 | 6fc6-nb9b
361 | eua2-4g54
362 | i378-jjyk
363 | inh5-kwtp
364 | jtyk-xyvk
365 | iu2r-7x7v
366 | kq82-ivk4
367 | qhsy-xvcf
368 | 8z24-x5k8
369 | frxt-9vri
370 | dqag-kq5r
371 | q2gy-8hxm
372 | gqet-eavx
373 | fwq5-ux79
374 | prdj-dgnz
375 | y4rx-kdcn
376 | mhj4-e4bq
377 | vbxz-36ag
378 | smk2-dtnx
379 | mner-asqn
380 | qri7-6kh2
381 | tbpd-v3xm
382 | 2zm2-j9bx
383 | 3ang-jx8p
384 | 4adh-4bvw
385 | 5c4w-qa5z
386 | 8559-68u4
387 | 8dgt-s4f6
388 | 8ykz-eevg
389 | bbbg-3rfk
390 | bpwx-u8fh
391 | bvkc-z2zh
392 | c7fz-ay4i
393 | dxb6-mnqt
394 | e6xv-4pu8
395 | fvzv-dxut
396 | gzey-cwe5
397 | h9v5-2eis
398 | hedd-bhp6
399 | kga2-r2kk
400 | knbn-7s9q
401 | mqpu-hu3f
402 | nc9g-y3uy
403 | qa2a-xevx
404 | s32g-v2f9
405 | swue-sa5z
406 | um2u-tk2u
407 | vhdz-3ngk
408 | vkvi-vkp5
409 | x9fe-c4x6
410 | xyw3-igvf
411 | kgyy-nc79
412 | ref6-s87x
413 | iaa7-x8kk
414 | jy6g-t358
415 | 8ys2-jbnc
416 | tkwe-shaj
417 | c2gf-23xz
418 | heq4-cm4h
419 | jwmg-yacn
420 | mcgy-76ui
421 | gdcb-e7r7
422 | 5up9-65nx
423 | 4b36-t359
424 | ufaw-xtbh
425 | r52a-rz3g
426 | mg8q-us7m
427 | dvei-8sdh
428 | gv7r-t5a7
429 | 99wm-kyny
430 | 3pi9-b3rf
431 | enev-ph2s
432 | gv6h-cphf
433 | 7wwb-zx48
434 | g2nn-qd2k
435 | nqq8-ixbd
436 | s5d6-6x7q
437 | 7eaf-pe73
438 | 3qt5-kz7y
439 | vgvc-bpwz
440 | hhjj-czpq
441 | q4c4-5fu4
442 | wap3-zzbk
443 | s2r9-4htf
444 | tqgb-ivff
445 | m3rm-9ij3
446 |
--------------------------------------------------------------------------------
/type_detection/ids/edmonton_ids.txt:
--------------------------------------------------------------------------------
1 | sthd-gad4
2 | 62wr-c9ye
3 | cggb-hzzm
4 | yutc-c5ju
5 | 7gh5-bnbs
6 | htrg-yirr
7 | 5zeu-wkpv
8 | jyra-si4k
9 | ickf-52qg
10 | 65fr-66s6
11 | 7yqh-39tz
12 | h9sn-ds8j
13 | id8i-xwha
14 | 2d5a-esdr
15 | w4cu-waf8
16 | k29i-u25u
17 | qh6u-3haa
18 | febx-sqja
19 | ebvt-eg97
20 | ehbr-emhe
21 | qsmn-3w99
22 | c4nr-3quz
23 | 25b5-e682
24 | f2bf-5wqc
25 | 2n87-9vjk
26 | ih46-vxeq
27 | ieti-capx
28 | jn25-zspi
29 | ia7a-fuzq
30 | 83ud-pzgi
31 | gem5-v2v7
32 | zxf6-rv9a
33 | rm66-tzy5
34 | 7ndn-2x34
35 | xt3a-rjpj
36 | 8muu-jq8v
37 | mqxt-kvm4
38 | nmpv-7vqg
39 | 6avx-8i8e
40 | prfy-5m97
41 | nfvw-n5k3
42 | xjvb-fthb
43 | eecg-fc54
44 | 2rpu-peb3
45 | f6w2-hzex
46 | auxe-iahd
47 | yznh-9n2u
48 | kcuu-2yxg
49 | dpcz-nupn
50 | kaps-rk7z
51 | ykpk-qyvg
52 | eaag-ra2c
53 | 9j6k-uzig
54 | bwpb-bppt
55 | 2976-zu3f
56 | mbxg-7qmu
57 | kyhu-r33m
58 | mu9e-3mkm
59 | y29c-39y9
60 | nc6t-tngg
61 | a56e-tkzk
62 | znrg-tr8s
63 | 7isy-c8ka
64 | n7fj-2yfs
65 | ysgw-us2d
66 | v6pz-ntae
67 | gp5m-pueh
68 | 4vt2-8zrq
69 | gw7p-ee8r
70 | iysa-xg87
71 | hubs-ug3y
72 | dnbn-a9ny
73 | j7nr-ekvn
74 | 8xti-zu8p
75 | f6ne-aqna
76 | g57i-jmvw
77 | u6ue-anax
78 | e7aq-scxv
79 | gy8c-eq6u
80 | uwuq-3zq3
81 | 72aw-fq6m
82 | 6mre-4inz
83 | 9et8-3bg5
84 | es6z-qyb7
85 | 2mxf-2xyw
86 | 46fr-szhj
87 | c7ev-7y4u
88 | mst8-di4d
89 | 82ij-m8wd
90 | 3yki-4nkh
91 | ctwr-tvrd
92 | bfsu-5y9w
93 | h5ce-uubp
94 | qzrg-q4nv
95 | d577-xky7
96 | xsrd-4894
97 | 7fus-qa4r
98 | hnhf-yaps
99 | knai-w28i
100 | 5ayy-wxat
101 | vaiq-ubi5
102 | gajw-anue
103 | 2hvz-kffj
104 | fqnx-qdsk
105 | px79-vegt
106 | bqmh-j34s
107 | shh7-vzch
108 | punv-um3q
109 | si3b-qk24
110 | mvbx-i64u
111 | 2crc-aced
112 | gdd9-eqv9
113 | 9khp-yjaa
114 | b4y7-zhnz
115 | scjj-9qzz
116 | uytp-iqga
117 | 5xcn-q5zx
118 | 9hwi-bdju
119 | i32t-b6vf
120 | ei4u-794h
121 | wr39-6xm9
122 | j6uz-tjg8
123 | 9nqb-w48x
124 | tm8z-k466
125 | y7yb-jtjn
126 | dknb-ctqa
127 | bnuf-aarq
128 | 84rh-tbam
129 | 7rap-ipwf
130 | xzjj-3r8e
131 | xydx-f66g
132 | qdcb-svkz
133 | 4adt-tbf8
134 | beu6-4urm
135 | y4r5-xpku
136 | 9edk-tayw
137 | egz6-jdf3
138 | ggak-65yh
139 | jr4d-a5zs
140 | vmrg-prz6
141 | 6hiw-nih3
142 | iwjg-nq4w
143 | jruw-eduf
144 | t37t-2z9n
145 | xhpv-2y2t
146 | q78x-bpvt
147 | pwa8-zn84
148 | 375q-qr55
149 | 98hy-qgun
150 | 6gjh-jny3
151 | b58q-nxjr
152 | 57bz-3nsb
153 | fbs7-uv7y
154 | rjq9-ge5s
155 | 55vi-yv73
156 | g2ht-buzt
157 | 33re-ygv7
158 | ggrb-bzbj
159 | 4agm-wt9h
160 | 8h3r-5ys2
161 | f9i7-hdjq
162 | ks8u-4a6b
163 | ud9b-q65g
164 | p26r-vxf7
165 | nrf5-ighq
166 | x8j3-j3q3
167 | x2gc-xf9e
168 | 7yt8-7467
169 | tzuj-jcfp
170 | j5gy-p5g8
171 | my5s-s4eh
172 | 7njw-4e49
173 | 9isd-fcah
174 | df5n-u36u
175 | s2tr-4vrj
176 | r45z-99vd
177 | rgfm-xqag
178 | cjgx-qcup
179 | f2sy-bth7
180 | 692u-9tuj
181 | xgwu-c37w
182 | rh82-ntt9
183 | kffi-kb6e
184 | ddqk-i2ey
185 | gds5-6aiq
186 | nqs8-f6fc
187 | zmac-3mxq
188 | pkdk-9dwc
189 | jir4-uuhx
190 | myu9-qngm
191 | a76p-ee58
192 | bnfb-vu2t
193 | vbdj-jxmq
194 | npym-f8ef
195 | qt34-hwip
196 | bdis-rq5p
197 | djyx-z2bv
198 | b2ak-ut7u
199 | 87u8-3yfv
200 | qrrq-9bsx
201 | ykrg-bwt7
202 | paqn-uf4u
203 | mnwt-r49h
204 | kiry-88gi
205 | 65xu-w8px
206 | 2694-uced
207 | achm-af7d
208 | nekk-97cp
209 | ngd7-ejms
210 | nna3-34af
211 | 8svr-ivxz
212 | em7g-s625
213 | hk82-7wj7
214 | gjev-z5ji
215 | 8peh-czku
216 | igwr-fbps
217 | 3nma-d8m3
218 | tizy-3vkk
219 | nzgd-btmv
220 | mvki-i26q
221 | 4ev8-6z47
222 | 4mm2-9j7w
223 | 9s54-maam
224 | uwg3-tppw
225 | mpd9-nhrp
226 | i73h-tmq7
227 | da5j-rtis
228 | icpf-mjv6
229 | fftr-j9wv
230 | a5ef-84t3
231 | x7r5-d4hj
232 | wans-jfwv
233 | mgii-4cth
234 | 6w5t-p7n9
235 | 5md5-bka8
236 | kdc2-xufp
237 | 8s6g-dibx
238 | y9rm-5xha
239 | abvn-qkj9
240 | i2gf-u5vm
241 | h69z-r89y
242 | egfg-bqgy
243 | vp2f-wzyp
244 | ybx9-858a
245 | zh7a-ng4h
246 | uqac-iii2
247 | me84-5r83
248 | bms6-cuv9
249 | g4vh-839k
250 | pa5u-sgw4
251 | 3n37-c973
252 | 3qef-bvyj
253 | 63px-qewx
254 | 48sf-3wbh
255 | yyj6-c972
256 | 3dsx-jp6d
257 | csvn-zna4
258 | d54a-rnqn
259 | srfv-kt6f
260 | 62em-cru9
261 | rk7f-7aur
262 | n3b6-y2as
263 | uqbc-s85r
264 | 4bzd-pbh9
265 | n3qd-yamd
266 | 86gx-6tq9
267 | ike9-p5uk
268 | u6tw-de8t
269 | 7afr-kaaj
270 | 33jk-s9st
271 | btu7-6eug
272 | 2q9r-ang2
273 | 59j9-dv23
274 | 3qga-tuus
275 | 5ezz-fzka
276 | khtg-hjme
277 | wsq9-3pi7
278 | pubm-q7h7
279 | 7wta-wwze
280 | vni7-52xi
281 | vizs-ak74
282 | inrc-3sdk
283 | dep6-aymg
284 | u943-cjsi
285 | q9ik-kfw5
286 | kdh3-uyqv
287 | qujq-hb7j
288 | 4xre-g8wq
289 | vtgf-yvms
290 | 9j7i-47f8
291 | 3e3e-bpvz
292 | nzvx-6q5z
293 | chad-mx22
294 | u9uc-sdkn
295 | 7tzq-etnt
296 | ug6h-umbi
297 | hcdm-kjgn
298 | ht22-msjv
299 | ktbb-ft9x
300 | kbbe-7sj5
301 | y354-d5xg
302 | k5sh-vdtn
303 | qs7f-tjd2
304 | a5i7-qmwv
305 | 6wmn-5es3
306 | aszk-8atd
307 | xiut-24bw
308 | tjkh-4hzc
309 | 6sba-d8sc
310 | d8qh-bhiz
311 | fyy2-btx2
312 | 3b6m-fezs
313 | 595d-uejx
314 | n39a-jdzb
315 | i5mb-yzrn
316 | 62zc-p5yh
317 | qn6v-4dbh
318 | muf4-te7d
319 | whpx-8wpg
320 | dbkf-sdrd
321 | 8cqf-3hmz
322 | tgy2-3bts
323 | xxuz-7axm
324 | 4fa9-6zye
325 | xvh3-c5hd
326 | fvmf-h4id
327 | a225-i9ns
328 | w2ms-kfus
329 | ciib-ux6u
330 | 8ae2-m26m
331 | 4v56-4hak
332 | sgnx-e3u2
333 | 4ee7-dsf6
334 | gnse-cass
335 | mjk8-4ukd
336 | fjvg-9ez2
337 | 6x5f-8hj7
338 | cax4-b3pr
339 | htu3-4pz3
340 | c89k-cwcz
341 | isug-45sj
342 | cfh4-sh93
343 | ydv5-y4pu
344 | s993-cqfv
345 | afax-7r8v
346 | 7t29-dqaa
347 | 7yns-bcn4
348 | 2gjm-4p4u
349 | b6ng-fzk2
350 | cz52-j8kf
351 | bia2-hxpv
352 | ttdf-sm74
353 | p9vx-6egn
354 | rvwz-r2uq
355 | 7bc3-xv6c
356 | fib4-gufy
357 | 9xcd-grar
358 | 5h4y-jwsi
359 | bn8p-vmrc
360 | 6fc6-nb9b
361 | eua2-4g54
362 | i378-jjyk
363 | inh5-kwtp
364 | jtyk-xyvk
365 | iu2r-7x7v
366 | kq82-ivk4
367 | qhsy-xvcf
368 | 8z24-x5k8
369 | frxt-9vri
370 | dqag-kq5r
371 | q2gy-8hxm
372 | gqet-eavx
373 | fwq5-ux79
374 | prdj-dgnz
375 | y4rx-kdcn
376 | mhj4-e4bq
377 | vbxz-36ag
378 | smk2-dtnx
379 | mner-asqn
380 | qri7-6kh2
381 | tbpd-v3xm
382 | 2zm2-j9bx
383 | 3ang-jx8p
384 | 4adh-4bvw
385 | 5c4w-qa5z
386 | 8559-68u4
387 | 8dgt-s4f6
388 | 8ykz-eevg
389 | bbbg-3rfk
390 | bpwx-u8fh
391 | bvkc-z2zh
392 | c7fz-ay4i
393 | dxb6-mnqt
394 | e6xv-4pu8
395 | fvzv-dxut
396 | gzey-cwe5
397 | h9v5-2eis
398 | hedd-bhp6
399 | kga2-r2kk
400 | knbn-7s9q
401 | mqpu-hu3f
402 | nc9g-y3uy
403 | qa2a-xevx
404 | s32g-v2f9
405 | swue-sa5z
406 | um2u-tk2u
407 | vhdz-3ngk
408 | vkvi-vkp5
409 | x9fe-c4x6
410 | xyw3-igvf
411 | kgyy-nc79
412 | ref6-s87x
413 | iaa7-x8kk
414 | jy6g-t358
415 | 8ys2-jbnc
416 | tkwe-shaj
417 | c2gf-23xz
418 | heq4-cm4h
419 | jwmg-yacn
420 | mcgy-76ui
421 | gdcb-e7r7
422 | 5up9-65nx
423 | 4b36-t359
424 | ufaw-xtbh
425 | r52a-rz3g
426 | mg8q-us7m
427 | dvei-8sdh
428 | gv7r-t5a7
429 | 99wm-kyny
430 | 3pi9-b3rf
431 | enev-ph2s
432 | gv6h-cphf
433 | 7wwb-zx48
434 | g2nn-qd2k
435 | nqq8-ixbd
436 | s5d6-6x7q
437 | 7eaf-pe73
438 | 3qt5-kz7y
439 | vgvc-bpwz
440 | hhjj-czpq
441 | q4c4-5fu4
442 | wap3-zzbk
443 | s2r9-4htf
444 | tqgb-ivff
445 | m3rm-9ij3
446 |
--------------------------------------------------------------------------------
/type_detection/ijson/backends/python.py:
--------------------------------------------------------------------------------
1 | '''
2 | Pure-python parsing backend.
3 | '''
4 | from __future__ import unicode_literals
5 | from decimal import Decimal
6 | import re
7 | from codecs import unicode_escape_decode
8 |
9 | from ijson import common
10 | from ijson.compat import chr
11 |
12 |
13 | BUFSIZE = 16 * 1024
14 | NONWS = re.compile(r'\S')
15 | LEXTERM = re.compile(r'[^a-z0-9\.+-]')
16 |
17 |
18 | class UnexpectedSymbol(common.JSONError):
19 | def __init__(self, symbol, reader):
20 | super(UnexpectedSymbol, self).__init__('Unexpected symbol "%s" at %d' % (symbol[0], reader.pos - len(symbol)))
21 |
22 | class Lexer(object):
23 | '''
24 | JSON lexer. Supports iterator interface.
25 | '''
26 | def __init__(self, f):
27 | self.f = f
28 |
29 | def __iter__(self):
30 | self.buffer = ''
31 | self.pos = 0
32 | return self
33 |
34 | def __next__(self):
35 | while True:
36 | match = NONWS.search(self.buffer, self.pos)
37 | if match:
38 | self.pos = match.start()
39 | char = self.buffer[self.pos]
40 | if 'a' <= char <= 'z' or '0' <= char <= '9' or char == '-':
41 | return self.lexem()
42 | elif char == '"':
43 | return self.stringlexem()
44 | else:
45 | self.pos += 1
46 | return char
47 | self.buffer = self.f.read(BUFSIZE).decode('utf-8')
48 | self.pos = 0
49 | if not len(self.buffer):
50 | raise StopIteration
51 | next = __next__
52 |
53 | def lexem(self):
54 | current = self.pos
55 | while True:
56 | match = LEXTERM.search(self.buffer, current)
57 | if match:
58 | current = match.start()
59 | break
60 | else:
61 | current = len(self.buffer)
62 | self.buffer += self.f.read(BUFSIZE).decode('utf-8')
63 | if len(self.buffer) == current:
64 | break
65 | result = self.buffer[self.pos:current]
66 | self.pos = current
67 | if self.pos > BUFSIZE:
68 | self.buffer = self.buffer[self.pos:]
69 | self.pos = 0
70 | return result
71 |
72 | def stringlexem(self):
73 | start = self.pos + 1
74 | while True:
75 | try:
76 | end = self.buffer.index('"', start)
77 | escpos = end - 1
78 | while self.buffer[escpos] == '\\':
79 | escpos -= 1
80 | if (end - escpos) % 2 == 0:
81 | start = end + 1
82 | else:
83 | result = self.buffer[self.pos:end + 1]
84 | self.pos = end + 1
85 | return result
86 | except ValueError:
87 | old_len = len(self.buffer)
88 | self.buffer += self.f.read(BUFSIZE).decode('utf-8')
89 | if len(self.buffer) == old_len:
90 | raise common.IncompleteJSONError()
91 |
92 | def unescape(s):
93 | start = 0
94 | while start < len(s):
95 | pos = s.find('\\', start)
96 | if pos == -1:
97 | yield s[start:]
98 | break
99 | yield s[start:pos]
100 | pos += 1
101 | esc = s[pos]
102 | if esc == 'b':
103 | yield '\b'
104 | elif esc == 'f':
105 | yield '\f'
106 | elif esc == 'n':
107 | yield '\n'
108 | elif esc == 'r':
109 | yield '\r'
110 | elif esc == 't':
111 | yield '\t'
112 | elif esc == 'u':
113 | yield chr(int(s[pos + 1:pos + 5], 16))
114 | pos += 4
115 | else:
116 | yield esc
117 | start = pos + 1
118 |
119 | def parse_value(lexer, symbol=None):
120 | try:
121 | if symbol is None:
122 | symbol = next(lexer)
123 | if symbol == 'null':
124 | yield ('null', None)
125 | elif symbol == 'true':
126 | yield ('boolean', True)
127 | elif symbol == 'false':
128 | yield ('boolean', False)
129 | elif symbol == '[':
130 | for event in parse_array(lexer):
131 | yield event
132 | elif symbol == '{':
133 | for event in parse_object(lexer):
134 | yield event
135 | elif symbol[0] == '"':
136 | yield ('string', ''.join(unescape(symbol[1:-1])))
137 | else:
138 | try:
139 | number = Decimal(symbol) if '.' in symbol else int(symbol)
140 | yield ('number', number)
141 | except ValueError:
142 | raise UnexpectedSymbol(symbol, lexer)
143 | except StopIteration:
144 | raise common.IncompleteJSONError()
145 |
146 | def parse_array(lexer):
147 | yield ('start_array', None)
148 | symbol = next(lexer)
149 | if symbol != ']':
150 | while True:
151 | for event in parse_value(lexer, symbol):
152 | yield event
153 | symbol = next(lexer)
154 | if symbol == ']':
155 | break
156 | if symbol != ',':
157 | raise UnexpectedSymbol(symbol, lexer)
158 | symbol = next(lexer)
159 | yield ('end_array', None)
160 |
161 | def parse_object(lexer):
162 | yield ('start_map', None)
163 | symbol = next(lexer)
164 | if symbol != '}':
165 | while True:
166 | if symbol[0] != '"':
167 | raise UnexpectedSymbol(symbol, lexer)
168 | yield ('map_key', symbol[1:-1])
169 | symbol = next(lexer)
170 | if symbol != ':':
171 | raise UnexpectedSymbol(symbol, lexer)
172 | for event in parse_value(lexer):
173 | yield event
174 | symbol = next(lexer)
175 | if symbol == '}':
176 | break
177 | if symbol != ',':
178 | raise UnexpectedSymbol(symbol, lexer)
179 | symbol = next(lexer)
180 | yield ('end_map', None)
181 |
182 | def basic_parse(file):
183 | '''
184 | Iterator yielding unprefixed events.
185 |
186 | Parameters:
187 |
188 | - file: a readable file-like object with JSON input
189 | '''
190 | lexer = iter(Lexer(file))
191 | for value in parse_value(lexer):
192 | yield value
193 | try:
194 | next(lexer)
195 | except StopIteration:
196 | pass
197 | else:
198 | raise common.JSONError('Additional data')
199 |
200 | def parse(file):
201 | '''
202 | Backend-specific wrapper for ijson.common.parse.
203 | '''
204 | return common.parse(basic_parse(file))
205 |
206 | def items(file, prefix):
207 | '''
208 | Backend-specific wrapper for ijson.common.items.
209 | '''
210 | return common.items(parse(file), prefix)
211 |
--------------------------------------------------------------------------------
/extract_zipcode_latlon/ijson/backends/python.py:
--------------------------------------------------------------------------------
1 | '''
2 | Pure-python parsing backend.
3 | '''
4 | from __future__ import unicode_literals
5 | from decimal import Decimal
6 | import re
7 | from codecs import unicode_escape_decode
8 |
9 | from ijson import common
10 | from ijson.compat import chr
11 |
12 |
13 | BUFSIZE = 16 * 1024
14 | NONWS = re.compile(r'\S')
15 | LEXTERM = re.compile(r'[^a-z0-9\.+-]')
16 |
17 |
18 | class UnexpectedSymbol(common.JSONError):
19 | def __init__(self, symbol, reader):
20 | super(UnexpectedSymbol, self).__init__('Unexpected symbol "%s" at %d' % (symbol[0], reader.pos - len(symbol)))
21 |
22 | class Lexer(object):
23 | '''
24 | JSON lexer. Supports iterator interface.
25 | '''
26 | def __init__(self, f):
27 | self.f = f
28 |
29 | def __iter__(self):
30 | self.buffer = ''
31 | self.pos = 0
32 | return self
33 |
34 | def __next__(self):
35 | while True:
36 | match = NONWS.search(self.buffer, self.pos)
37 | if match:
38 | self.pos = match.start()
39 | char = self.buffer[self.pos]
40 | if 'a' <= char <= 'z' or '0' <= char <= '9' or char == '-':
41 | return self.lexem()
42 | elif char == '"':
43 | return self.stringlexem()
44 | else:
45 | self.pos += 1
46 | return char
47 | self.buffer = self.f.read(BUFSIZE).decode('utf-8')
48 | self.pos = 0
49 | if not len(self.buffer):
50 | raise StopIteration
51 | next = __next__
52 |
53 | def lexem(self):
54 | current = self.pos
55 | while True:
56 | match = LEXTERM.search(self.buffer, current)
57 | if match:
58 | current = match.start()
59 | break
60 | else:
61 | current = len(self.buffer)
62 | self.buffer += self.f.read(BUFSIZE).decode('utf-8')
63 | if len(self.buffer) == current:
64 | break
65 | result = self.buffer[self.pos:current]
66 | self.pos = current
67 | if self.pos > BUFSIZE:
68 | self.buffer = self.buffer[self.pos:]
69 | self.pos = 0
70 | return result
71 |
72 | def stringlexem(self):
73 | start = self.pos + 1
74 | while True:
75 | try:
76 | end = self.buffer.index('"', start)
77 | escpos = end - 1
78 | while self.buffer[escpos] == '\\':
79 | escpos -= 1
80 | if (end - escpos) % 2 == 0:
81 | start = end + 1
82 | else:
83 | result = self.buffer[self.pos:end + 1]
84 | self.pos = end + 1
85 | return result
86 | except ValueError:
87 | old_len = len(self.buffer)
88 | self.buffer += self.f.read(BUFSIZE).decode('utf-8')
89 | if len(self.buffer) == old_len:
90 | raise common.IncompleteJSONError()
91 |
92 | def unescape(s):
93 | start = 0
94 | while start < len(s):
95 | pos = s.find('\\', start)
96 | if pos == -1:
97 | yield s[start:]
98 | break
99 | yield s[start:pos]
100 | pos += 1
101 | esc = s[pos]
102 | if esc == 'b':
103 | yield '\b'
104 | elif esc == 'f':
105 | yield '\f'
106 | elif esc == 'n':
107 | yield '\n'
108 | elif esc == 'r':
109 | yield '\r'
110 | elif esc == 't':
111 | yield '\t'
112 | elif esc == 'u':
113 | yield chr(int(s[pos + 1:pos + 5], 16))
114 | pos += 4
115 | else:
116 | yield esc
117 | start = pos + 1
118 |
119 | def parse_value(lexer, symbol=None):
120 | try:
121 | if symbol is None:
122 | symbol = next(lexer)
123 | if symbol == 'null':
124 | yield ('null', None)
125 | elif symbol == 'true':
126 | yield ('boolean', True)
127 | elif symbol == 'false':
128 | yield ('boolean', False)
129 | elif symbol == '[':
130 | for event in parse_array(lexer):
131 | yield event
132 | elif symbol == '{':
133 | for event in parse_object(lexer):
134 | yield event
135 | elif symbol[0] == '"':
136 | yield ('string', ''.join(unescape(symbol[1:-1])))
137 | else:
138 | try:
139 | number = Decimal(symbol) if '.' in symbol else int(symbol)
140 | yield ('number', number)
141 | except ValueError:
142 | raise UnexpectedSymbol(symbol, lexer)
143 | except StopIteration:
144 | raise common.IncompleteJSONError()
145 |
146 | def parse_array(lexer):
147 | yield ('start_array', None)
148 | symbol = next(lexer)
149 | if symbol != ']':
150 | while True:
151 | for event in parse_value(lexer, symbol):
152 | yield event
153 | symbol = next(lexer)
154 | if symbol == ']':
155 | break
156 | if symbol != ',':
157 | raise UnexpectedSymbol(symbol, lexer)
158 | symbol = next(lexer)
159 | yield ('end_array', None)
160 |
161 | def parse_object(lexer):
162 | yield ('start_map', None)
163 | symbol = next(lexer)
164 | if symbol != '}':
165 | while True:
166 | if symbol[0] != '"':
167 | raise UnexpectedSymbol(symbol, lexer)
168 | yield ('map_key', symbol[1:-1])
169 | symbol = next(lexer)
170 | if symbol != ':':
171 | raise UnexpectedSymbol(symbol, lexer)
172 | for event in parse_value(lexer):
173 | yield event
174 | symbol = next(lexer)
175 | if symbol == '}':
176 | break
177 | if symbol != ',':
178 | raise UnexpectedSymbol(symbol, lexer)
179 | symbol = next(lexer)
180 | yield ('end_map', None)
181 |
182 | def basic_parse(file):
183 | '''
184 | Iterator yielding unprefixed events.
185 |
186 | Parameters:
187 |
188 | - file: a readable file-like object with JSON input
189 | '''
190 | lexer = iter(Lexer(file))
191 | for value in parse_value(lexer):
192 | yield value
193 | try:
194 | next(lexer)
195 | except StopIteration:
196 | pass
197 | else:
198 | raise common.JSONError('Additional data')
199 |
200 | def parse(file):
201 | '''
202 | Backend-specific wrapper for ijson.common.parse.
203 | '''
204 | return common.parse(basic_parse(file))
205 |
206 | def items(file, prefix):
207 | '''
208 | Backend-specific wrapper for ijson.common.items.
209 | '''
210 | return common.items(parse(file), prefix)
211 |
--------------------------------------------------------------------------------
/extract_zipcode_latlon/collect_data.py:
--------------------------------------------------------------------------------
1 | ###############################################################################
2 | ##
3 | ## Copyright (C) 2014, New York University.
4 | ## All rights reserved.
5 | ## Contact: kien.pham@nyu.edu
6 | ##
7 | ## "Redistribution and use in source and binary forms, with or without
8 | ## modification, are permitted provided that the following conditions are met:
9 | ##
10 | ## - Redistributions of source code must retain the above copyright notice,
11 | ## this list of conditions and the following disclaimer.
12 | ## - Redistributions in binary form must reproduce the above copyright
13 | ## notice, this list of conditions and the following disclaimer in the
14 | ## documentation and/or other materials provided with the distribution.
15 | ## - Neither the name of New York University nor the names of its
16 | ## contributors may be used to endorse or promote products derived from
17 | ## this software without specific prior written permission.
18 | ##
19 | ## THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | ## AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21 | ## THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 | ## PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
23 | ## CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24 | ## EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25 | ## PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
26 | ## OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27 | ## WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
28 | ## OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
29 | ## ADVISED OF THE POSSIBILITY OF SUCH DAMAGE."
30 | ##
31 | ###############################################################################
32 |
33 | # -*- coding:utf-8 -*-
34 | from __future__ import unicode_literals
35 | import json
36 | import sys
37 | import ijson
38 | import codecs
39 | import re
40 | import os.path
41 |
42 | def get_zipcode(f, id2index):
43 | with open(f) as lines:
44 | for line in lines:
45 | a = line.strip("\n").split("\t")
46 | id = a[0]
47 | index_list = []
48 | for i in range(1,len(a)):
49 | if i%2 == 0:
50 | index_list.append(int(a[i]))
51 | if id2index.has_key(id):
52 | id2index[id][0] = index_list
53 | else:
54 | id2index[id] = [index_list, [], []] #first list contains zipcode, second list contains latlon, third list contains time
55 |
56 | def get_latlon(f, id2index):
57 | lat = ["latitude", "x", "lat_dd_wgs84", "location_x", "centroidx", "coordinates", "lat", "location", "_lit_lat", "_south", "stop_lat", "building_latitude", "centroid_latitude", "intptlat", "intptlat10", "xpos", "_47_564727820", "x"]
58 | lon = ["longitude", "y", "lon_dd_wgs84", "location_y", "centroidy", "coordinates", "lon", "location", "_lit_lon", "_west", "stop_lon", "building_longitude", "centroid_longitude", "intptlon", "intptlat10", "ypos", "_122_363840492", "y"]
59 | with open(f) as lines:
60 | for line in lines:
61 | a = line.strip("\n").split("\t")
62 | id = a[0]
63 | index_list = [-1, -1] #first element is lat, second one is lon
64 | pre_item = "" #
65 | for i in range(1,len(a)):
66 | if (index_list[0] != -1) & (index_list[1] != -1): #if we already found latitude and longitude index => stop checking
67 | break
68 | if pre_item == "":
69 | if a[i] in lat :
70 | pre_item = "latitude"
71 | continue
72 | if a[i] in lon:
73 | pre_item = "longitude"
74 | continue
75 | if pre_item == "latitude":
76 | index_list[0] = int(a[i])
77 | pre_item = ""
78 | continue
79 | if pre_item == "longitude":
80 | index_list[1] = int(a[i])
81 | pre_item = ""
82 | continue
83 |
84 | if (index_list[0] != -1) & (index_list[1] != -1):
85 | if id2index.has_key(id):
86 | id2index[id][1] = index_list
87 | else:
88 | id2index[id] = [[], index_list, []] #first list contains zipcode, second list contains latlon, third list contains time
89 |
90 | def get_data(output_path, data_path, city, id, id2index):
91 | #Open files to write
92 | #OUTPUT: *zipcode.csv and *latlon.csv store list of zipcode and lat/long respectively
93 | zipcode_file = open(output_path + "/" + city + "_" + id + "-zipcode.txt", "w")
94 | latlon_file = open(output_path + "/" + city + "_" + id + "-latlon.txt", "w")
95 |
96 | #Initialize sets. Set contains distinct values
97 | zipcode_set = []
98 | latlon_set = []
99 |
100 | index = id2index[id]
101 | zipcode_index = index[0]
102 | latlon_index = index[1]
103 |
104 | content = open(data_path + "/" + id + ".json")
105 | data = ijson.items(content, 'data.item')
106 | Zipcode = re.compile('([\d]{5})')
107 | try:
108 | for item in data:
109 | item = item[8:]
110 | if len(zipcode_index) > 0: #If there is zipcode attribute
111 | for i in zipcode_index:#for each dataset, there could be more than row containing zipcode
112 | if item[i]: #if value is not None
113 | match_zipcode = Zipcode.search(str(item[i]))
114 | if match_zipcode:
115 | zipcode = match_zipcode.group(1)
116 | zipcode_set.append(zipcode)
117 | elif len(latlon_index) == 2: #if there are lat/lon attributes
118 | lat = item[latlon_index[0]]
119 | lon = item[latlon_index[1]]
120 | if (lat != None) & (lon != None): # if values are not None
121 | latlon = lat + "," + lon
122 | latlon_set.append(latlon)
123 | except:
124 | print id + "\tException"
125 | #Write to file
126 | if len(zipcode_set) > 0:
127 | for item in zipcode_set:
128 | try:
129 | zipcode_file.write(item + "\n")
130 | except:
131 | print id + "\tException"
132 | continue
133 | if len(latlon_set) > 0:
134 | for item in latlon_set:
135 | try:
136 | latlon_file.write(item + "\n")
137 | except:
138 | print id + "\tException"
139 | continue
140 | zipcode_file.close()
141 | latlon_file.close()
142 |
143 | def main(argv):
144 | if len(argv) != 3:
145 | print "The program takes 3 arguments, " + str(len(argv)) + " is given."
146 | return
147 |
148 | city = argv[0] #City name
149 | data_path = argv[1] #Directory that store JSON files
150 | output_path = argv[2] #Directory that stores result
151 |
152 | zipcode_file = "index/" + city + "_zipcode_index.txt"
153 | latlon_file = "index/" + city + "_latlon_index.txt"
154 | print zipcode_file
155 | print latlon_file
156 | if (os.path.isfile(zipcode_file)) & (os.path.isfile(latlon_file)):
157 | id2index = {} #Each id is mapped to 3 lists. first list contains zipcode, second list contains latlon, third list contains time
158 | get_zipcode(zipcode_file, id2index)
159 | get_latlon(latlon_file, id2index)
160 | for id in id2index:
161 | get_data(output_path, data_path, city, id, id2index)
162 |
163 | if __name__=="__main__":
164 | main(sys.argv[1:])
165 |
--------------------------------------------------------------------------------
/latlon_to_zipcode/KdTreeBB.hpp:
--------------------------------------------------------------------------------
1 | //#############################################################################
2 | //
3 | // Copyright (C) 2014, New York University.
4 | // All rights reserved.
5 | // Contact: huy.vo@nyu.edu, kien.pham@nyu.edu
6 | //
7 | // "Redistribution and use in source and binary forms, with or without
8 | // modification, are permitted provided that the following conditions are met:
9 | //
10 | // - Redistributions of source code must retain the above copyright notice,
11 | // this list of conditions and the following disclaimer.
12 | // - Redistributions in binary form must reproduce the above copyright
13 | // notice, this list of conditions and the following disclaimer in the
14 | // documentation and/or other materials provided with the distribution.
15 | // - Neither the name of New York University nor the names of its
16 | // contributors may be used to endorse or promote products derived from
17 | // this software without specific prior written permission.
18 | //
19 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21 | // THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 | // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
23 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
26 | // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27 | // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
28 | // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
29 | // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE."
30 | //
31 | //#############################################################################
32 |
33 |
34 | #ifndef KD_TREE_BB_HPP
35 | #define KD_TREE_BB_HPP
36 |
37 | #include
38 | #include
39 | #include
40 | #include
41 | #include
42 | #include
43 | #include
44 | #include
45 |
46 | class KdTreeBB
47 | {
48 | public:
49 | #pragma pack(push, 1)
50 | struct Item {
51 | float bbox[2][2];
52 | void *data;
53 | };
54 |
55 | struct KdNode {
56 | uint32_t child_node;
57 | float leftBounds[2];
58 | float rightBounds[2];
59 | };
60 | #pragma pack(pop)
61 |
62 | static bool dimIntersect(const float a[2], const float b[2]) {
63 | return std::max(a[0], b[0])<=std::min(a[1], b[1]);
64 | }
65 |
66 | struct Query
67 | {
68 | Query() {
69 | for (int i=0; i<2; i++) {
70 | this->bounds[i][0] = -FLT_MAX;
71 | this->bounds[i][1] = FLT_MAX;
72 | }
73 | }
74 |
75 | void setViewport(float left, float bottom, float right, float top)
76 | {
77 | this->bounds[0][0] = left;
78 | this->bounds[0][1] = right;
79 | this->bounds[1][0] = top;
80 | this->bounds[1][1] = bottom;
81 | }
82 |
83 | bool isMatched(const Item *item) const
84 | {
85 | for (int i=0; i<2; i++)
86 | if (!dimIntersect(item->bbox[i], this->bounds[i]))
87 | return false;
88 | return true;
89 | }
90 |
91 | float bounds[2][2];
92 | };
93 |
94 | typedef std::vector QueryResult;
95 |
96 | public:
97 |
98 | KdTreeBB()
99 | {
100 | }
101 |
102 | void query(const Query &q, QueryResult &result) const {
103 | searchKdTree(this->nodes.data(), 0, 0, q, result);
104 | }
105 |
106 | void createKdTree(Item *items, int n) {
107 | this->maxDepth = 0;
108 | this->nodes.resize(std::max(n*4,1));
109 | float *tmp = (float*)malloc(sizeof(float)*n);
110 | int freeNode = 1;
111 | buildKdTree(this->nodes.data(), tmp, items, n, 0, 0, freeNode);
112 | fprintf(stderr, "Created a Kd tree for bounding boxes with %d nodes and depth %d.\n", freeNode, this->maxDepth);
113 | free(tmp);
114 | }
115 |
116 | private:
117 | std::vector nodes;
118 | int maxDepth;
119 |
120 | void buildKdTree(KdNode *nodes, float *tmp, Item *items, int n, int depth, int thisNode, int &freeNode) {
121 | KdNode *node = nodes + thisNode;
122 | int keyIndex = depth%2;
123 | if (n==0) {
124 | node->child_node = -1;
125 | return;
126 | }
127 | if (n<2) {
128 | node->child_node = 0;
129 | node->leftBounds[0] = items->bbox[keyIndex][0];
130 | node->leftBounds[1] = items->bbox[keyIndex][1];
131 | *((void**)(node->rightBounds)) = items->data;
132 | if (this->maxDepthmaxDepth = depth;
134 | return;
135 | }
136 | int medianIndex = n/2-1;
137 | for (size_t i=0; i=0 && items[r].bbox[keyIndex][0]>median) r--;
146 | if (lleftBounds[0] = node->rightBounds[0] = FLT_MAX;
153 | node->leftBounds[1] = node->rightBounds[1] = -FLT_MAX;
154 |
155 | for (unsigned i=0; i<=medianIndex; i++) {
156 | if (items[i].bbox[keyIndex][0]leftBounds[0])
157 | node->leftBounds[0] = items[i].bbox[keyIndex][0];
158 | if (items[i].bbox[keyIndex][1]>node->leftBounds[1])
159 | node->leftBounds[1] = items[i].bbox[keyIndex][1];
160 | }
161 |
162 | for (unsigned i=medianIndex+1; irightBounds[0])
164 | node->rightBounds[0] = items[i].bbox[keyIndex][0];
165 | if (items[i].bbox[keyIndex][1]>node->rightBounds[1])
166 | node->rightBounds[1] = items[i].bbox[keyIndex][1];
167 | }
168 |
169 | node->child_node = freeNode;
170 | freeNode += 2;
171 | buildKdTree(nodes, tmp, items, medianIndex+1, depth+1, node->child_node, freeNode);
172 | if (medianIndexchild_node+1, freeNode);
175 | else
176 | nodes[node->child_node+1].child_node = -1;
177 | }
178 |
179 | void searchKdTree(const KdNode *nodes, uint32_t root, int depth, const Query &query, QueryResult &result) const {
180 | const KdNode *node = nodes + root;
181 | int rangeIndex = depth%2;
182 |
183 | if (node->child_node==-1) {
184 | return;
185 | }
186 | if (node->child_node==0) {
187 | if (dimIntersect(node->leftBounds, query.bounds[rangeIndex]))
188 | {
189 | //fprintf(stderr, "%d\n", *(*(int**)(node->rightBounds)));
190 | result.push_back(*(*(int**)(node->rightBounds)));
191 | }
192 | //report(*((void**)(node->rightBounds)));
193 | return;
194 | }
195 | if (dimIntersect(node->leftBounds, query.bounds[rangeIndex]))
196 | searchKdTree(nodes, node->child_node, depth+1, query, result);
197 | if (dimIntersect(node->rightBounds, query.bounds[rangeIndex]))
198 | searchKdTree(nodes, node->child_node+1, depth+1, query, result);
199 | }
200 | };
201 |
202 | #endif
203 |
--------------------------------------------------------------------------------