├── .flake8
├── .gitignore
├── .pylintrc
├── .travis.yml
├── README.md
├── config.py
├── datausa
├── __init__.py
├── acs
│ ├── __init__.py
│ ├── abstract_models.py
│ ├── automap_models.py
│ ├── models.py
│ └── stats_models.py
├── attrs
│ ├── __init__.py
│ ├── consts.py
│ ├── models.py
│ ├── search.py
│ └── views.py
├── bea
│ ├── __init__.py
│ └── models.py
├── bls
│ ├── __init__.py
│ └── models.py
├── cbp
│ ├── __init__.py
│ ├── abstract_models.py
│ └── models.py
├── chr
│ ├── __init__.py
│ └── models.py
├── core
│ ├── __init__.py
│ ├── api.py
│ ├── attr_crosswalking.py
│ ├── crosswalker.py
│ ├── exceptions.py
│ ├── join_api.py
│ ├── models.py
│ ├── registrar.py
│ ├── streaming.py
│ ├── table_manager.py
│ └── views.py
├── dartmouth
│ ├── __init__.py
│ └── models.py
├── database.py
├── ed
│ ├── __init__.py
│ └── models.py
├── freight
│ ├── __init__.py
│ └── models.py
├── ipeds
│ ├── __init__.py
│ ├── abstract_models.py
│ └── models.py
├── onet
│ ├── __init__.py
│ └── models.py
├── opiods
│ ├── __init__.py
│ └── models.py
├── pums
│ ├── __init__.py
│ ├── abstract_models.py
│ ├── models.py
│ └── models_5.py
└── util
│ ├── __init__.py
│ ├── big_places.py
│ └── inmem.py
├── requirements.txt
├── run.py
├── scripts
├── __init__.py
├── alt_fill_cache.py
├── build_search.py
├── cache_helper.applescript
├── fill_cache.py
├── fix_urlnames.py
├── flickr
│ ├── __init__.py
│ ├── analyze.py
│ ├── flickr.py
│ ├── grab.py
│ └── short.py
├── gen_indicies.py
├── get_vnames.py
├── search
│ ├── build_index.py
│ ├── build_var_index.py
│ ├── geo_aliases.csv
│ ├── rebuild_index.py
│ └── zip_lookup.sql
├── university_abbrev_gen.py
├── update_university_keywords.py
└── url_names.py
├── search_index
├── MAIN_WRITELOCK
├── MAIN_hzur5fe2wkrq53me.seg
└── _MAIN_1.toc
├── tests
├── __init__.py
├── test_joins.py
└── test_search.py
└── var_index
├── MAIN_WRITELOCK
├── MAIN_g1c93s1e37q8coxg.seg
└── _MAIN_1.toc
/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | ignore = E501,E402
3 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 |
5 | # C extensions
6 | *.so
7 |
8 | # Distribution / packaging
9 | .Python
10 | env/
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | *.egg-info/
23 | .installed.cfg
24 | *.egg
25 |
26 | # PyInstaller
27 | # Usually these files are written by a python script from a template
28 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
29 | *.manifest
30 | *.spec
31 |
32 | # Installer logs
33 | pip-log.txt
34 | pip-delete-this-directory.txt
35 |
36 | # Unit test / coverage reports
37 | htmlcov/
38 | .tox/
39 | .coverage
40 | .coverage.*
41 | .cache
42 | nosetests.xml
43 | coverage.xml
44 | *,cover
45 |
46 | # Translations
47 | *.mo
48 | *.pot
49 |
50 | # Django stuff:
51 | *.log
52 |
53 | # Sphinx documentation
54 | docs/_build/
55 |
56 | # PyBuilder
57 | target/
58 |
59 | # OSX stuff
60 | .DS_STORE
61 |
62 | # Ignore cache folder
63 | cache/
64 |
65 | .env
66 | .envrc
67 |
--------------------------------------------------------------------------------
/.pylintrc:
--------------------------------------------------------------------------------
1 | [MESSAGES CONTROL]
2 | disable=singleton-comparison
3 | generated-members=query
4 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: python
2 | python:
3 | - 2.7
4 |
5 | install:
6 | - pip install -r requirements.txt
7 |
8 | script: pytest tests/test_search.py
9 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # DataUSA API (Python)
2 |
3 | **⚠️ Deprecated ⚠️: The API that this documentation refers to (api.datausa.io) went into legacy mode in November 2018, and was be replaced by a newer version of the API with access to new & expanded data: https://datausa.io/about/api/**
4 |
5 | To learn more about the API, visit the [DataUSA API Wiki page](https://github.com/DataUSA/datausa-api/wiki) or visit the [DataUSA Quick Start Guide](http://beta.datausa.io/about/api/).
6 |
7 | ## Installation
8 |
9 | **DataUSA** is a web platform built using Flask, an open source Python library for interacting with HTTP. this installation guide is written assuming a Linux or Linux-like environment. The following software is required to be installed locally in order to get DataUSA running:
10 |
11 | * Python
12 | * Postgres
13 |
14 | 1. Clone from Github
15 | git clone https://github.com/Datawheel/datausa-api.git
16 | 2. [optional] Create a virtual environment. We suggest installing [virtualenv](https://pypi.python.org/pypi/virtualenv) with [virtualenvwrapper](http://virtualenvwrapper.readthedocs.org/en/latest/) especially if the machine you are using is used for many other web projects. This allows python libraries to be installed easily and specifically on a per proeject basis.
17 |
18 | Once this is complete, run the following to initialize your datausa environment.
19 |
20 | mkvirtualenv datausa
21 |
22 | 3. Install Prerequisites
23 |
24 | sudo apt-get install python-dev
25 | sudo apt-get install libpq-dev
26 |
27 | 4. Install Python modules
28 |
29 | pip install -r requirements.txt
30 |
31 | 5. Set environment variables
32 |
33 | export DATAUSA_DB_NAME=db_name
34 | export DATAUSA_DB_USER=postgres_user
35 | export DATAUSA_DB_PW=postgres_pw
36 | export DATAUSA_DB_HOST=127.0.0.1
37 |
38 | 6. Run api
39 |
--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import os
3 |
4 | ''' Base directory of where the site is held '''
5 | basedir = os.path.abspath(os.path.dirname(__file__))
6 |
7 | ''' CSRF (cross site forgery) for signing POST requests to server '''
8 | CSRF_EN = True
9 |
10 | ''' Secret key should be set in environment var '''
11 | SECRET_KEY = os.environ.get("DATAVIVA_SECRET_KEY", "default-datausa-secret")
12 |
13 | ''' Default debugging to True '''
14 | DEBUG = True
15 | SQLALCHEMY_ECHO = True
16 | SQLALCHEMY_TRACK_MODIFICATIONS = False
17 | SQLALCHEMY_POOL_SIZE = 15
18 | SQLALCHEMY_POOL_TIMEOUT = 180
19 | SQLALCHEMY_POOL_RECYCLE = 150
20 | SQLALCHEMY_DATABASE_URI = "postgres://{0}:{1}@{2}:{3}/{4}".format(
21 | os.environ.get("DATAUSA_DB_USER", "postgres"),
22 | os.environ.get("DATAUSA_DB_PW", ""),
23 | os.environ.get("DATAUSA_DB_HOST", "localhost"),
24 | os.environ.get("DATAUSA_DB_PORT", 5432),
25 | os.environ.get("DATAUSA_DB_NAME", "postgres"))
26 |
27 | ''' If an env var for production is set turn off all debugging support '''
28 | if "DATAUSA_PRODUCTION" in os.environ:
29 | SQLALCHEMY_ECHO = False
30 | DEBUG = False
31 | ERROR_EMAIL = True
32 |
33 | JSONIFY_PRETTYPRINT_REGULAR = False
34 |
35 | CACHE_TYPE = 'filesystem'
36 | CACHE_DIR = os.path.join(basedir, 'cache/')
37 | CACHE_DEFAULT_TIMEOUT = os.environ.get("CACHE_DEFAULT_TIMEOUT", 60 * 60 * 24 * 7 * 4) # 28 days
38 | CACHE_THRESHOLD = 5000
39 |
40 | FLICKR_DIR = os.environ.get("DATAUSA_FLICKR_DIR", os.path.join(basedir, '../datausa-site/datausa/static/img/splash'))
41 | SEARCH_INDEX_DIR = os.path.join(basedir, 'search_index/')
42 | VAR_INDEX_DIR = os.path.join(basedir, 'var_index/')
43 |
--------------------------------------------------------------------------------
/datausa/__init__.py:
--------------------------------------------------------------------------------
1 | import os
2 | from flask import Flask, jsonify
3 | from flask_compress import Compress
4 | # from flask.ext.cors import CORS
5 | from flask_cache import Cache
6 |
7 | app = Flask(__name__)
8 | app.config.from_object('config')
9 | Compress(app)
10 | cache = Cache(app)
11 |
12 | from datausa.attrs.views import mod as attrs_module
13 | from datausa.core.views import mod as core_module
14 |
15 | app.register_blueprint(attrs_module)
16 | app.register_blueprint(core_module)
17 |
18 | # CORS(app)
19 |
20 | @app.errorhandler(500)
21 | def error_page(err):
22 | return jsonify(error=str(err)), 500
23 |
--------------------------------------------------------------------------------
/datausa/acs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataUSA/datausa-api/7288dede082eda07b61e11cf6dc801fe692f6334/datausa/acs/__init__.py
--------------------------------------------------------------------------------
/datausa/acs/abstract_models.py:
--------------------------------------------------------------------------------
1 | from sqlalchemy.ext.declarative import declared_attr
2 |
3 | from datausa.database import db
4 | from datausa.attrs.models import Geo, AcsOcc, AcsInd, GeoContainment
5 | from datausa.attrs.models import AcsLanguage, Insurance, AgeBucket
6 | from datausa.core.models import BaseModel
7 | from datausa.attrs.consts import NATION, STATE, COUNTY
8 | from datausa.attrs.consts import PUMA, MSA, ALL, GEO
9 | from datausa.attrs.consts import PLACE, TRACT
10 | from sqlalchemy.sql import func
11 |
12 | class BaseHealth(object):
13 | __virtual_schema__ = "acs_health"
14 | hc_pop = db.Column(db.Float)
15 | hc_pop_moe = db.Column(db.Float)
16 | hc_pop_rca = db.Column(db.Float)
17 |
18 | class AcsIndId(object):
19 | LEVELS = ["0", "1", "2", ALL]
20 |
21 | @classmethod
22 | def acs_ind_filter(cls, level):
23 | if level == ALL:
24 | return True
25 | else:
26 | target = (int(level) * 2) + 2
27 | return func.length(cls.acs_ind) == target
28 |
29 | @classmethod
30 | def get_supported_levels(cls):
31 | return {"acs_ind": AcsIndId.LEVELS}
32 |
33 | @declared_attr
34 | def acs_ind(cls):
35 | return db.Column(db.String(), db.ForeignKey(AcsInd.id),
36 | primary_key=True)
37 |
38 | class AcsOccId(object):
39 | LEVELS = ["0", "1", "2", "3", "4", ALL]
40 |
41 | @classmethod
42 | def get_supported_levels(cls):
43 | return {"acs_occ": AcsOccId.LEVELS}
44 |
45 | @declared_attr
46 | def acs_occ(cls):
47 | return db.Column(db.String(), db.ForeignKey(AcsOcc.id),
48 | primary_key=True)
49 |
50 | @classmethod
51 | def acs_occ_filter(cls, level):
52 | if level == ALL:
53 | return True
54 | else:
55 | target = (int(level) * 2) + 2
56 | return func.length(cls.acs_occ) == target
57 |
58 |
59 | class GeoId(object):
60 | LEVELS = [NATION, STATE, COUNTY, MSA, PUMA, PLACE, TRACT, ALL]
61 | LEVELS_1YR = [NATION, STATE, COUNTY, MSA, PLACE, ALL]
62 | LEVELS_5YR = LEVELS
63 |
64 | JOINED_FILTER = {"geo": {
65 | "triggers": [("tract", "160"), ("tract", "310"),
66 | ("tract", "050"), ("county", "310")],
67 | "table": GeoContainment.parent,
68 | "column": GeoContainment.parent_geoid,
69 | "id": GeoContainment.child_geoid,
70 | }}
71 | @classmethod
72 | def get_supported_levels(cls):
73 | return {GEO: GeoId.LEVELS}
74 |
75 | @classmethod
76 | def geo_filter(cls, level):
77 | if level == ALL:
78 | return True
79 | level_map = {NATION: "010", STATE: "040",
80 | PUMA: "795", MSA: "310",
81 | COUNTY: "050", PLACE: "160", TRACT: "140"}
82 | level_code = level_map[level]
83 | return cls.geo.startswith(level_code)
84 |
85 | @declared_attr
86 | def geo(cls):
87 | return db.Column(db.String(), db.ForeignKey(Geo.id), primary_key=True)
88 |
89 | class GeoId1(GeoId):
90 | @classmethod
91 | def get_supported_levels(cls):
92 | return {GEO: GeoId.LEVELS_1YR}
93 |
94 | class GeoId5(GeoId):
95 | @classmethod
96 | def get_supported_levels(cls):
97 | return {GEO: GeoId.LEVELS_5YR}
98 |
99 | class BaseAcs5(db.Model, BaseModel):
100 | __abstract__ = True
101 | schema_name = 'acs_5yr'
102 | __table_args__ = {"schema": schema_name, "extend_existing": True}
103 | supported_levels = {}
104 | source_title = 'ACS 5-year Estimate'
105 | source_link = 'http://www.census.gov/programs-surveys/acs/'
106 | source_org = 'Census Bureau'
107 | CACHED_YEARS = [2013, 2014, 2015, 2016]
108 |
109 | @declared_attr
110 | def year(cls):
111 | return db.Column(db.Integer, primary_key=True)
112 |
113 |
114 | class BaseAcs3(db.Model, BaseModel):
115 | __abstract__ = True
116 | schema_name = 'acs_3year'
117 | __table_args__ = {"schema": schema_name}
118 | supported_levels = {}
119 | source_title = 'ACS 3-year Estimate'
120 | source_link = 'http://www.census.gov/programs-surveys/acs/'
121 | source_org = 'Census Bureau'
122 |
123 | @declared_attr
124 | def year(cls):
125 | return db.Column(db.Integer, primary_key=True)
126 |
127 |
128 | class BaseAcs1(db.Model, BaseModel):
129 | __abstract__ = True
130 | schema_name = 'acs_1yr'
131 | __table_args__ = {"schema": schema_name, "extend_existing": True}
132 | supported_levels = {}
133 | source_title = 'ACS 1-year Estimate'
134 | source_link = 'http://www.census.gov/programs-surveys/acs/'
135 | source_org = 'Census Bureau'
136 | CACHED_YEARS = [2013, 2014, 2015, 2016]
137 |
138 | @declared_attr
139 | def year(cls):
140 | return db.Column(db.Integer, primary_key=True)
141 |
142 |
143 | class Ygl_Speakers(object):
144 | median_moe = 2
145 |
146 | num_speakers = db.Column(db.Float)
147 | num_speakers_moe = db.Column(db.Float)
148 | num_speakers_rca = db.Column(db.Float)
149 |
150 | @declared_attr
151 | def language(cls):
152 | return db.Column(db.String(), db.ForeignKey(AcsLanguage.id), primary_key=True)
153 |
--------------------------------------------------------------------------------
/datausa/acs/automap_models.py:
--------------------------------------------------------------------------------
1 | from datausa.database import db
2 | from datausa import cache
3 | from datausa.acs.abstract_models import BaseAcs1, BaseAcs5, GeoId, GeoId5, GeoId1
4 | from sqlalchemy.ext.automap import automap_base
5 | from sqlalchemy import MetaData
6 |
7 | metadata = cache.get("acs5_metadata")
8 | if not metadata:
9 | metadata = MetaData(schema=BaseAcs5.schema_name, bind=db.engine)
10 | metadata.reflect()
11 | cache.set("acs5_metadata", metadata)
12 |
13 | AutomapBase = automap_base(bind=db.engine, metadata=metadata)
14 |
15 | metadata_1yr = cache.get("acs1_metadata")
16 | if not metadata_1yr:
17 | metadata_1yr = MetaData(schema=BaseAcs1.schema_name, bind=db.engine)
18 | metadata_1yr.reflect()
19 | cache.set("acs1_metadata", metadata_1yr)
20 |
21 | AutomapBase_1yr = automap_base(bind=db.engine, metadata=metadata_1yr)
22 |
23 | # 1 year
24 | class Acs1_Yg_Income(AutomapBase_1yr, BaseAcs1, GeoId1):
25 | __tablename__ = "yg_income"
26 | median_moe = 1.2
27 |
28 | class Acs1_Yg_Poverty(AutomapBase_1yr, BaseAcs1, GeoId1):
29 | __tablename__ = 'yg_poverty'
30 | median_moe = 1.2
31 |
32 | class Acs1_Yg_Tenure(AutomapBase_1yr, BaseAcs1, GeoId1):
33 | __tablename__ = 'yg_tenure'
34 | median_moe = 1.2
35 |
36 | class Acs1_Yg(AutomapBase_1yr, BaseAcs1, GeoId1):
37 | __tablename__ = "yg"
38 | median_moe = 1.2
39 |
40 | class Acs1_Yg_IncDist(AutomapBase_1yr, BaseAcs1, GeoId1):
41 | __tablename__ = "yg_income_distribution"
42 | median_moe = 2.2
43 |
44 | class Acs1_Yg_PovertyRace(AutomapBase_1yr, BaseAcs1, GeoId1):
45 | __tablename__ = 'yg_poverty_race'
46 | median_moe = 2.2
47 |
48 | class Acs1_Yg_NatAge(AutomapBase_1yr, BaseAcs1, GeoId1):
49 | __tablename__ = 'yg_nativity_age'
50 | median_moe = 1.2
51 |
52 | class Acs1_Yg_Race(AutomapBase_1yr, BaseAcs1, GeoId1):
53 | __tablename__ = 'yg_race'
54 | median_moe = 1.2
55 |
56 | class Acs1_Yg_Conflict(AutomapBase_1yr, BaseAcs1, GeoId1):
57 | __tablename__ = "yg_conflict"
58 | median_moe = 2.2
59 |
60 | class Acs1_Yg_PropertyValue(AutomapBase_1yr, BaseAcs1, GeoId1):
61 | __tablename__ = 'yg_property_value'
62 | median_moe = 1.2
63 |
64 | class Acs1_Yg_PropertyTax(AutomapBase_1yr, BaseAcs1, GeoId1):
65 | __tablename__ = 'yg_property_tax'
66 | median_moe = 1.2
67 |
68 | class Acs1_Yg_Vehicles(AutomapBase_1yr, BaseAcs1, GeoId1):
69 | __tablename__ = 'yg_vehicles'
70 | median_moe = 1.2
71 |
72 | class Acs1_Yg_TravelTime(AutomapBase_1yr, BaseAcs1, GeoId1):
73 | __tablename__ = 'yg_travel_time'
74 | median_moe = 1.2
75 |
76 | class Acs1_Yg_Transport(AutomapBase_1yr, BaseAcs1, GeoId1):
77 | __tablename__ = 'yg_transport'
78 | median_moe = 1.2
79 |
80 | # 5 year
81 |
82 | class Acs5_Yg(AutomapBase, BaseAcs5, GeoId5):
83 | __tablename__ = "yg"
84 | median_moe = 1
85 |
86 | class Acs5_Yg_Conflict(AutomapBase, BaseAcs5, GeoId5):
87 | __tablename__ = "yg_conflict"
88 | median_moe = 2
89 |
90 | class Acs5_Yg_Income(AutomapBase, BaseAcs5, GeoId5):
91 | __tablename__ = "yg_income"
92 | median_moe = 1
93 |
94 | class Acs5_Yg_IncDist(AutomapBase, BaseAcs5, GeoId5):
95 | __tablename__ = "yg_income_distribution"
96 | median_moe = 2
97 |
98 | class Acs5_Yg_NatAge(AutomapBase, BaseAcs5, GeoId5):
99 | __tablename__ = 'yg_nativity_age'
100 | median_moe = 1
101 |
102 |
103 | class Acs5_Yg_Poverty(AutomapBase, BaseAcs5, GeoId5):
104 | __tablename__ = 'yg_poverty'
105 | median_moe = 1
106 |
107 |
108 | class Acs5_Yg_PropertyTax(AutomapBase, BaseAcs5, GeoId5):
109 | __tablename__ = 'yg_property_tax'
110 | median_moe = 1
111 |
112 |
113 | class Acs5_Yg_PropertyValue(AutomapBase, BaseAcs5, GeoId5):
114 | __tablename__ = 'yg_property_value'
115 | median_moe = 1
116 |
117 |
118 | class Acs5_Yg_Race(AutomapBase, BaseAcs5, GeoId5):
119 | __tablename__ = 'yg_race'
120 | median_moe = 1
121 |
122 |
123 | class Acs5_Yg_PovertyRace(AutomapBase, BaseAcs5, GeoId5):
124 | __tablename__ = 'yg_poverty_race'
125 | median_moe = 2
126 |
127 |
128 | class Acs5_Yg_Tenure(AutomapBase, BaseAcs5, GeoId5):
129 | __tablename__ = 'yg_tenure'
130 | median_moe = 1
131 |
132 |
133 | class Acs5_Yg_Transport(AutomapBase, BaseAcs5, GeoId5):
134 | __tablename__ = 'yg_transport'
135 | median_moe = 1
136 |
137 |
138 | class Acs5_Yg_TravelTime(AutomapBase, BaseAcs5, GeoId5):
139 | __tablename__ = 'yg_travel_time'
140 | median_moe = 1
141 |
142 |
143 | class Acs5_Yg_Vehicles(AutomapBase, BaseAcs5, GeoId5):
144 | __tablename__ = 'yg_vehicles'
145 | median_moe = 1
146 |
147 | AutomapBase_1yr.prepare(db.engine, reflect=False)
148 | AutomapBase.prepare(db.engine, reflect=False)
149 |
--------------------------------------------------------------------------------
/datausa/acs/models.py:
--------------------------------------------------------------------------------
1 | from datausa.acs.abstract_models import GeoId, AcsOccId, db, AcsIndId
2 | from datausa.acs.abstract_models import BaseAcs1, BaseAcs3, BaseAcs5
3 | from datausa.acs.abstract_models import Ygl_Speakers, GeoId5, GeoId1, BaseHealth
4 | from datausa.attrs import consts
5 | from datausa.attrs.consts import NATION, STATE, MSA, PLACE, PUMA, COUNTY, ALL
6 |
7 | class Acs1_Ygi_Health(BaseAcs1, GeoId1, BaseHealth):
8 | __tablename__ = "ygi_health"
9 | median_moe = 2
10 |
11 | insurance = db.Column(db.String(), primary_key=True)
12 |
13 | @classmethod
14 | def get_supported_levels(cls):
15 | return {
16 | "geo": [NATION, STATE, COUNTY, PLACE, MSA, PUMA, ALL],
17 | "insurance": [ALL]
18 | }
19 |
20 | class Acs1_Yga_Health(BaseAcs1, GeoId1, BaseHealth):
21 | __tablename__ = "yga_health"
22 | median_moe = 2
23 |
24 | age_bucket = db.Column(db.String(), primary_key=True)
25 |
26 | @classmethod
27 | def get_supported_levels(cls):
28 | return {
29 | "geo": [NATION, STATE, COUNTY, PLACE, MSA, PUMA, ALL],
30 | "age_bucket": [ALL]
31 | }
32 |
33 | class Acs1_Ygai_Health(BaseAcs1, GeoId1, BaseHealth):
34 | __tablename__ = "ygai_health"
35 | median_moe = 3
36 |
37 | age_bucket = db.Column(db.String(), primary_key=True)
38 | insurance = db.Column(db.String(), primary_key=True)
39 |
40 | @classmethod
41 | def get_supported_levels(cls):
42 | return {
43 | "geo": [NATION, STATE, COUNTY, PLACE, MSA, PUMA, ALL],
44 | "insurance": [ALL],
45 | "age_bucket": [ALL]
46 | }
47 |
48 |
49 | class Acs1_Ygis_Health(BaseAcs1, GeoId1, BaseHealth):
50 | __tablename__ = "ygis_health"
51 | median_moe = 3
52 |
53 | sex = db.Column(db.String(), primary_key=True)
54 | insurance = db.Column(db.String(), primary_key=True)
55 |
56 | @classmethod
57 | def get_supported_levels(cls):
58 | return {
59 | "geo": [NATION, STATE, COUNTY, PLACE, MSA, PUMA, ALL],
60 | "insurance": [ALL],
61 | "sex": [ALL]
62 | }
63 |
64 |
65 |
66 | class Acs1_Ygas_Health(BaseAcs1, GeoId1, BaseHealth):
67 | __tablename__ = "ygas_health"
68 | median_moe = 3
69 |
70 | sex = db.Column(db.String(), primary_key=True)
71 | age_bucket = db.Column(db.String(), primary_key=True)
72 |
73 | @classmethod
74 | def get_supported_levels(cls):
75 | return {
76 | "geo": [NATION, STATE, COUNTY, PLACE, MSA, PUMA, ALL],
77 | "sex": [ALL],
78 | "age_bucket": [ALL]
79 | }
80 |
81 | class Acs1_Ygs_Health(BaseAcs1, GeoId1, BaseHealth):
82 | __tablename__ = "ygs_health"
83 | median_moe = 2
84 |
85 | sex = db.Column(db.String(), primary_key=True)
86 |
87 | @classmethod
88 | def get_supported_levels(cls):
89 | return {
90 | "geo": [NATION, STATE, COUNTY, PLACE, MSA, PUMA, ALL],
91 | "sex": [ALL]
92 | }
93 |
94 |
95 | class Acs1_Ygais_Health(BaseAcs1, GeoId1, BaseHealth):
96 | __tablename__ = "ygais_health"
97 | median_moe = 4
98 |
99 | sex = db.Column(db.String(), primary_key=True)
100 | age_bucket = db.Column(db.String(), primary_key=True)
101 | insurance = db.Column(db.String(), primary_key=True)
102 |
103 | @classmethod
104 | def get_supported_levels(cls):
105 | return {
106 | "geo": [NATION, STATE, COUNTY, PLACE, MSA, PUMA, ALL],
107 | "sex": [ALL],
108 | "insurance": [ALL],
109 | "age_bucket": [ALL]
110 | }
111 |
112 |
113 | class Acs1_Ygl_Speakers(BaseAcs1, GeoId1, Ygl_Speakers):
114 | __tablename__ = "ygl_speakers"
115 | median_moe = 2.2
116 | CACHED_YEARS = [2013, 2014, 2015]
117 |
118 | @classmethod
119 | def get_supported_levels(cls):
120 | return {"geo": GeoId.LEVELS_1YR, "language": [consts.ALL]}
121 |
122 |
123 | class Acs5_Ygl_Speakers(BaseAcs5, GeoId5, Ygl_Speakers):
124 | __tablename__ = "ygl_speakers"
125 | median_moe = 2
126 | CACHED_YEARS = [2013, 2014, 2015]
127 |
128 | @classmethod
129 | def get_supported_levels(cls):
130 | return {"geo": GeoId.LEVELS_5YR, "language": [consts.ALL]}
131 |
132 |
133 | class Acs3_Ygo_Num_Emp(BaseAcs3, GeoId, AcsOccId):
134 | __tablename__ = "ygo_num_emp"
135 | median_moe = 2
136 |
137 | num_emp = db.Column(db.Float)
138 | num_emp_moe = db.Column(db.Float)
139 | num_emp_rca = db.Column(db.Float)
140 | num_emp_male = db.Column(db.Float)
141 | num_emp_moe_male = db.Column(db.Float)
142 | num_emp_female = db.Column(db.Float)
143 | num_emp_moe_female = db.Column(db.Float)
144 |
145 | @classmethod
146 | def get_supported_levels(cls):
147 | return {"geo": [NATION, STATE, MSA, ALL], "acs_occ": AcsOccId.LEVELS}
148 |
149 |
150 | class Acs1_Ygo_Num_Emp(BaseAcs1, GeoId, AcsOccId):
151 | __tablename__ = "ygo_num_emp"
152 | median_moe = 2.5
153 |
154 | num_emp = db.Column(db.Float)
155 | num_emp_moe = db.Column(db.Float)
156 | num_emp_rca = db.Column(db.Float)
157 | num_emp_male = db.Column(db.Float)
158 | num_emp_moe_male = db.Column(db.Float)
159 | num_emp_female = db.Column(db.Float)
160 | num_emp_moe_female = db.Column(db.Float)
161 |
162 | @classmethod
163 | def get_supported_levels(cls):
164 | return {"geo": [NATION, COUNTY, MSA, PLACE, PUMA, ALL], "acs_occ": AcsOccId.LEVELS}
165 |
166 |
167 | class Acs1_Ygo_Earnings(BaseAcs1, GeoId, AcsOccId):
168 | __tablename__ = "ygo_med_earnings"
169 | median_moe = 2.5
170 |
171 | med_earnings = db.Column(db.Float)
172 | med_earnings_male = db.Column(db.Float)
173 | med_earnings_female = db.Column(db.Float)
174 | med_earnings_moe = db.Column(db.Float)
175 | med_earnings_moe_female = db.Column(db.Float)
176 | med_earnings_moe_male = db.Column(db.Float)
177 | med_earnings_rca = db.Column(db.Float)
178 |
179 | @classmethod
180 | def get_supported_levels(cls):
181 | return {"geo": [NATION, STATE, COUNTY, MSA, PLACE, PUMA, ALL], "acs_occ": AcsOccId.LEVELS}
182 |
183 |
184 | class Acs1_Ygi_Num_Emp(BaseAcs1, AcsIndId, GeoId):
185 | __tablename__ = "ygi_num_emp"
186 | median_moe = 2.5
187 |
188 | num_emp = db.Column(db.Float)
189 | num_emp_moe = db.Column(db.Float)
190 | num_emp_rca = db.Column(db.Float)
191 |
192 | @classmethod
193 | def get_supported_levels(cls):
194 | return {"geo": [NATION, STATE, COUNTY, PLACE, MSA, PUMA, ALL],
195 | "acs_ind": ["0", "1", ALL]}
196 |
197 |
198 | class Acs5_Ygo_Num_Emp(BaseAcs5, GeoId, AcsOccId):
199 | __tablename__ = "ygo_num_emp"
200 | median_moe = 2
201 |
202 | num_emp = db.Column(db.Float)
203 | num_emp_moe = db.Column(db.Float)
204 | num_emp_rca = db.Column(db.Float)
205 | num_emp_male = db.Column(db.Float)
206 | num_emp_moe_male = db.Column(db.Float)
207 | num_emp_female = db.Column(db.Float)
208 | num_emp_moe_female = db.Column(db.Float)
209 |
210 | @classmethod
211 | def get_supported_levels(cls):
212 | return {"geo": [NATION, COUNTY, MSA, PLACE, PUMA, ALL], "acs_occ": AcsOccId.LEVELS}
213 |
214 |
215 | class Acs5_Ygo_Earnings(BaseAcs5, GeoId, AcsOccId):
216 | __tablename__ = "ygo_med_earnings"
217 | median_moe = 2
218 |
219 | med_earnings = db.Column(db.Float)
220 | med_earnings_male = db.Column(db.Float)
221 | med_earnings_female = db.Column(db.Float)
222 | med_earnings_moe = db.Column(db.Float)
223 | med_earnings_moe_female = db.Column(db.Float)
224 | med_earnings_moe_male = db.Column(db.Float)
225 | med_earnings_rca = db.Column(db.Float)
226 |
227 | @classmethod
228 | def get_supported_levels(cls):
229 | return {"geo": [NATION, STATE, COUNTY, MSA, PLACE, PUMA, ALL], "acs_occ": AcsOccId.LEVELS}
230 |
231 |
232 | class Acs3_Ygi_Num_Emp(BaseAcs3, AcsIndId, GeoId):
233 | __tablename__ = "ygi_num_emp"
234 | median_moe = 2
235 |
236 | num_emp = db.Column(db.Float)
237 | num_emp_moe = db.Column(db.Float)
238 | num_emp_rca = db.Column(db.Float)
239 |
240 | @classmethod
241 | def get_supported_levels(cls):
242 | return {"geo": [NATION, STATE, MSA, ALL], "acs_ind": AcsIndId.LEVELS}
243 |
244 |
245 | class Acs5_Ygi_Num_Emp(BaseAcs5, AcsIndId, GeoId):
246 | __tablename__ = "ygi_num_emp"
247 | median_moe = 1.9
248 |
249 | num_emp = db.Column(db.Float)
250 | num_emp_moe = db.Column(db.Float)
251 | num_emp_rca = db.Column(db.Float)
252 |
253 | @classmethod
254 | def get_supported_levels(cls):
255 | return {"geo": [NATION, STATE, COUNTY, PLACE, MSA, PUMA, ALL],
256 | "acs_ind": ["0", "1", ALL]}
257 |
258 |
259 | class Acs3_Ygi_MedEarnings(BaseAcs3, AcsIndId, GeoId):
260 | __tablename__ = "ygi_med_earnings"
261 | median_moe = 2
262 |
263 | med_earnings = db.Column(db.Float)
264 | med_earnings_moe = db.Column(db.Float)
265 |
266 | @classmethod
267 | def get_supported_levels(cls):
268 | return {"geo": [NATION, STATE, COUNTY, MSA, PLACE, PUMA, ALL], "acs_ind": ["0", "1", "all"]}
269 |
270 |
271 | class Acs1_Yg_Num_Emp(BaseAcs1, GeoId):
272 | __tablename__ = "yg_num_emp"
273 | median_moe = 1.2
274 |
275 | civ_labor_force = db.Column(db.Float)
276 | civ_labor_force_moe = db.Column(db.Float)
277 | emp_survey_total = db.Column(db.Float)
278 | emp_survey_total_moe = db.Column(db.Float)
279 | labor_force = db.Column(db.Float)
280 | labor_force_moe = db.Column(db.Float)
281 | not_in_labor_force = db.Column(db.Float)
282 | not_in_labor_force_moe = db.Column(db.Float)
283 | num_armed_forces = db.Column(db.Float)
284 | num_armed_forces_moe = db.Column(db.Float)
285 | num_emp = db.Column(db.Float)
286 | num_emp_moe = db.Column(db.Float)
287 | num_unemp = db.Column(db.Float)
288 | num_unemp_moe = db.Column(db.Float)
289 |
290 | @classmethod
291 | def get_supported_levels(cls):
292 | return {"geo": [NATION, COUNTY, MSA, PLACE, PUMA, ALL], "acs_occ": AcsOccId.LEVELS}
293 |
294 |
295 | class Acs5_Yg_Num_Emp(BaseAcs5, GeoId):
296 | __tablename__ = "yg_num_emp"
297 | median_moe = 1
298 |
299 | civ_labor_force = db.Column(db.Float)
300 | civ_labor_force_moe = db.Column(db.Float)
301 | emp_survey_total = db.Column(db.Float)
302 | emp_survey_total_moe = db.Column(db.Float)
303 | labor_force = db.Column(db.Float)
304 | labor_force_moe = db.Column(db.Float)
305 | not_in_labor_force = db.Column(db.Float)
306 | not_in_labor_force_moe = db.Column(db.Float)
307 | num_armed_forces = db.Column(db.Float)
308 | num_armed_forces_moe = db.Column(db.Float)
309 | num_emp = db.Column(db.Float)
310 | num_emp_moe = db.Column(db.Float)
311 | num_unemp = db.Column(db.Float)
312 | num_unemp_moe = db.Column(db.Float)
313 |
314 | @classmethod
315 | def get_supported_levels(cls):
316 | return {"geo": [NATION, COUNTY, MSA, PLACE, PUMA, ALL], "acs_occ": AcsOccId.LEVELS}
317 |
--------------------------------------------------------------------------------
/datausa/acs/stats_models.py:
--------------------------------------------------------------------------------
1 | from datausa.acs.abstract_models import GeoId, db
2 | from datausa.core.models import BaseModel
3 | from sqlalchemy.dialects import postgresql
4 | from datausa.attrs import consts
5 |
6 |
7 | class BaseStat(db.Model, BaseModel):
8 | __abstract__ = True
9 | __table_args__ = {"schema": "stats"}
10 | supported_levels = {}
11 | source_title = 'ACS 5-year Estimate'
12 | source_link = 'http://www.census.gov/programs-surveys/acs/'
13 | source_org = 'Census Bureau'
14 |
15 |
16 | class StateStats(BaseStat, GeoId):
17 | __tablename__ = "state"
18 | median_moe = 1.2
19 |
20 | year = db.Column(db.Integer, primary_key=True)
21 | state_rank = db.Column(db.Integer)
22 | top_places = db.Column(postgresql.ARRAY(db.String))
23 | top_counties = db.Column(postgresql.ARRAY(db.String))
24 | state_neighbors = db.Column(postgresql.ARRAY(db.String))
25 |
26 | @classmethod
27 | def get_supported_levels(cls):
28 | return {"geo": [consts.STATE]}
29 |
30 |
31 | class CountyStats(BaseStat, GeoId):
32 | __tablename__ = "counties"
33 | median_moe = 1.2
34 |
35 | year = db.Column(db.Integer, primary_key=True)
36 | county_state_rank = db.Column(db.Integer)
37 | places_in_county = db.Column(db.Integer)
38 | top_places = db.Column(postgresql.ARRAY(db.String))
39 | county_neighbors = db.Column(postgresql.ARRAY(db.String))
40 |
41 | @classmethod
42 | def get_supported_levels(cls):
43 | return {"geo": [consts.COUNTY]}
44 |
45 |
46 | class MSAStats(BaseStat, GeoId):
47 | __tablename__ = "msa"
48 | median_moe = 1.2
49 |
50 | top_counties = db.Column(postgresql.ARRAY(db.String))
51 | top_places = db.Column(postgresql.ARRAY(db.String))
52 |
53 | @classmethod
54 | def get_supported_levels(cls):
55 | return {"geo": [consts.MSA]}
56 |
57 |
58 | class PlaceStats(BaseStat, GeoId):
59 | __tablename__ = "place"
60 | median_moe = 1.2
61 |
62 | parent_counties = db.Column(postgresql.ARRAY(db.String))
63 | places_neighbors = db.Column(postgresql.ARRAY(db.String))
64 |
65 | @classmethod
66 | def get_supported_levels(cls):
67 | return {"geo": [consts.PLACE]}
68 |
69 |
70 | class PUMAStats(BaseStat, GeoId):
71 | __tablename__ = "puma"
72 | median_moe = 1.2
73 |
74 | puma_state_rank = db.Column(db.Integer)
75 | pumas_in_state = db.Column(db.Integer)
76 | puma_neighbors = db.Column(postgresql.ARRAY(db.String))
77 |
78 | @classmethod
79 | def get_supported_levels(cls):
80 | return {"geo": [consts.PUMA]}
81 |
--------------------------------------------------------------------------------
/datausa/attrs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataUSA/datausa-api/7288dede082eda07b61e11cf6dc801fe692f6334/datausa/attrs/__init__.py
--------------------------------------------------------------------------------
/datausa/attrs/consts.py:
--------------------------------------------------------------------------------
1 | GEO = 'geo'
2 | PUMA = 'puma'
3 | MSA = 'msa'
4 | COUNTY = 'county'
5 | STATE = 'state'
6 | NATION = 'nation'
7 | MSA = 'msa'
8 | TRACT = 'tract'
9 | PLACE = 'place'
10 | PUMA = 'puma'
11 |
12 | ALL = 'all'
13 | OR = ","
14 | YEAR = 'year'
15 | LATEST = 'latest'
16 | OLDEST = 'oldest'
17 | GEO_LEVEL_MAP = {NATION: "010", STATE: "040", COUNTY: "050",
18 | PUMA: "795", MSA: "310", PLACE: "160", TRACT: "140"}
19 | LEVEL_TO_GEO = {v: k for k,v in GEO_LEVEL_MAP.items()}
20 |
21 | POP_THRESHOLD = 250000
22 | NO_VALUE_ADDED = 'no_value_added'
23 |
--------------------------------------------------------------------------------
/datausa/attrs/search.py:
--------------------------------------------------------------------------------
1 | import re
2 | import json
3 |
4 | from whoosh.qparser import QueryParser
5 | from whoosh import index, sorting, qparser, scoring, query
6 | from config import SEARCH_INDEX_DIR, VAR_INDEX_DIR
7 | from whoosh.lang.porter import stem
8 | from whoosh.analysis import RegexTokenizer
9 |
10 | class SimpleWeighter(scoring.BM25F):
11 | use_final = True
12 |
13 | def __init__(self, fullterm, *args, **kwargs):
14 | self.fullterm = fullterm.lower().strip()
15 | super(SimpleWeighter, self).__init__(*args, **kwargs)
16 |
17 | def final(self, searcher, docnum, score_me):
18 | name = searcher.stored_fields(docnum).get("name")
19 | zvalue = searcher.stored_fields(docnum).get("zvalue")
20 | zscore = zvalue * .15
21 |
22 | if name == self.fullterm:
23 | return score_me * 30 + (25 * abs(zscore))
24 | elif name.startswith(self.fullterm):
25 | if zvalue > 0:
26 | return (score_me * 5.75) + (25 * zscore)
27 | else:
28 | return score_me * 5.75 + (1 - abs(zscore) * 25)
29 | elif self.fullterm.startswith(name[:10]):
30 | return score_me * 3 + abs(zscore)
31 | elif self.fullterm.startswith(name[:5]):
32 | return score_me * 1.5 + abs(zscore)
33 | # return (score_me * 1.75) + (10 * zvalue)
34 | return (score_me * 0.75) + (zscore * 0.31)
35 |
36 |
37 | vars_ix = index.open_dir(VAR_INDEX_DIR)
38 | vars_qp = QueryParser("name", schema=vars_ix.schema, group=qparser.OrGroup)
39 |
40 |
41 | ix = index.open_dir(SEARCH_INDEX_DIR)
42 | qp = QueryParser("name", schema=ix.schema, group=qparser.OrGroup)
43 | facet = sorting.FieldFacet("zvalue", reverse=True)
44 | scores = sorting.ScoreFacet()
45 |
46 |
47 | def nationwide_results(data, my_vars, attr_score, var_score, usr_query):
48 | '''given attribute search results and variable search results, determine
49 | if we should inject the US page into the data'''
50 | attr_ids = [row[0] for row in data]
51 | usa = '01000US'
52 | var_names = [my_var["description"][0].title() for my_var in my_vars] if my_vars else []
53 | var_names = ", ".join(var_names[:-1]) + " and {}".format(var_names[-1]) if len(var_names) > 1 else "".join(var_names)
54 | name = "{} in United States".format(var_names) if my_vars else None
55 |
56 | put_us_first = False
57 |
58 | pos = 0
59 | for row in data[:3]:
60 | raw_name = row[1].lower() if data else ""
61 | first_name = raw_name.split(" ")[0]
62 | put_us_first = not (usr_query.startswith(first_name) or
63 | usr_query.endswith(first_name) or
64 | raw_name[:6] in usr_query or
65 | first_name.startswith(usr_query))
66 | if put_us_first:
67 | break
68 | else:
69 | pos +=1
70 | if my_vars and var_score and var_score * 20 > attr_score:
71 | data.insert(pos, [usa, name, 10, "geo", name, "010", "united-states"])
72 | elif my_vars and usa not in attr_ids and len(data) < 10:
73 | data.insert(pos, [usa, name, 10, "geo", name, "010", "united-states"])
74 |
75 | return data
76 |
77 |
78 |
79 | def do_search(txt, sumlevel=None, kind=None, tries=0, limit=10, is_stem=None, my_vars=None):
80 | txt = txt.replace(",", "")
81 |
82 | my_filter = None
83 |
84 | if kind and sumlevel:
85 | kf = query.Term("kind", kind)
86 | sf = query.Term("sumlevel", sumlevel)
87 | my_filter = query.And([kf, sf])
88 | elif kind:
89 | my_filter = query.Term("kind", kind)
90 | elif sumlevel:
91 | my_filter = query.Term("sumlevel", sumlevel)
92 | if is_stem and is_stem > 0 and my_filter is not None:
93 | my_filter = my_filter & query.NumericRange("is_stem", 1, is_stem)
94 | elif is_stem and is_stem > 0 and my_filter is None:
95 | my_filter = query.NumericRange("is_stem", 1, is_stem)
96 |
97 | if tries > 2:
98 | return [], [], [], []
99 | q = qp.parse(txt)
100 |
101 | rext = RegexTokenizer()
102 | var_txt = u" ".join([stem(token.text) if len(token.text) > 3 else token.text for token in rext(unicode(txt))])
103 |
104 | var_q = vars_qp.parse(var_txt)
105 | var_keywords = {}
106 | vars_max_score = None
107 | # search for variables in query
108 | if not my_vars:
109 | # my_vars can save original vars detected before autocorrecting for spelling,
110 | # so we'll only do var searches that haven't yet been autocorrected
111 | with vars_ix.searcher() as s:
112 | # s = vars_ix.searcher()
113 | results = s.search(var_q)
114 | # raise Exception(list(results)[0])
115 | vscores = [r.score for r in results]
116 | vars_max_score = max(vscores) if vscores else None
117 |
118 | my_vars = [{"matched_on": r.highlights("name"),
119 | "name": r["name"],
120 | "description": r["description"].split(","),
121 | "section": r["section"],
122 | "section_title": r["section_title"],
123 | "related_attrs": r["related_attrs"].split(","),
124 | "related_vars": r["related_vars"].split(","),
125 | "params": json.loads(r["params"]) if 'params' in r else None} for r in results]
126 | if my_vars:
127 | already_seen = []
128 | filtered_my_vars = []
129 | for my_var in my_vars:
130 | if my_var["related_vars"] not in already_seen:
131 | filtered_my_vars.append(my_var)
132 | already_seen.append(my_var["related_vars"])
133 | highlight_txt = my_var["matched_on"]
134 |
135 | if highlight_txt:
136 | matches = re.findall(r'([^>]+)', highlight_txt)
137 | if matches:
138 | for matched_txt in matches:
139 | var_keywords[matched_txt] = True
140 | my_vars = filtered_my_vars
141 |
142 | try:
143 | for term in q:
144 | for keyword in var_keywords.keys():
145 | if term.text == 'in' and " in " in txt:
146 | term.boost = -1
147 | elif term.text in keyword or keyword in term.text:
148 | term.boost = -0.5
149 | except NotImplementedError:
150 | for keyword in var_keywords.keys():
151 | if q.text == 'in' and " in " in txt:
152 | q.boost = -1
153 | elif q.text in keyword or keyword in q.text:
154 | q.boost = -0.5
155 |
156 | weighter = SimpleWeighter(txt, B=.6, content_B=1.0, K1=2.75)
157 | with ix.searcher(weighting=weighter) as s:
158 | if len(txt) > 2:
159 | corrector = s.corrector("display")
160 | suggs = corrector.suggest(txt, limit=10, maxdist=2, prefix=3)
161 | else:
162 | suggs = []
163 | results = s.search_page(q, 1, sortedby=[scores], pagelen=20, filter=my_filter)
164 | data = [[r["id"], r["name"], r["zvalue"],
165 | r["kind"], r["display"],
166 | r["sumlevel"] if "sumlevel" in r else "",
167 | r["is_stem"] if "is_stem" in r else False,
168 | r["url_name"] if "url_name" in r else None]
169 | for r in results]
170 |
171 | if not data and suggs:
172 | return do_search(suggs[0], sumlevel, kind, tries=tries+1, limit=limit, is_stem=is_stem,
173 | my_vars=my_vars)
174 |
175 | ascores = [r.score for r in results]
176 | attr_max_score = max(ascores) if ascores else 0
177 | # raise Exception(attr_max_score, vars_max_score)
178 | # insert nationwide linkage
179 | data = nationwide_results(data, my_vars, attr_max_score, vars_max_score, txt)
180 |
181 | return data, suggs, tries, my_vars
182 |
--------------------------------------------------------------------------------
/datausa/attrs/views.py:
--------------------------------------------------------------------------------
1 | import re
2 | from flask import Blueprint, request, jsonify, abort
3 |
4 | mod = Blueprint('attrs', __name__, url_prefix='/attrs')
5 | from datausa.attrs.models import Cip, Naics, University, Soc, Degree
6 | from datausa.attrs.models import Race, Search, ZipLookup, GeoNeighbors
7 | from datausa.attrs.models import OccCrosswalk, IndCrosswalk, ProductCrosswalk
8 | from datausa.attrs.models import Skill, Sector, Geo, AcsInd, PumsIoCrosswalk
9 | from datausa.attrs.models import PumsDegree, PumsNaics, PumsRace, PumsSoc
10 | from datausa.attrs.models import PumsWage, PumsSex, PumsBirthplace
11 | from datausa.attrs.models import LStudy, EnrollmentStatus, LivingArrangement
12 | from datausa.attrs.models import IoCode, AcsOcc, AcsRace, AcsLanguage, Conflict
13 | from datausa.attrs.models import Insurance, Cohort, Sctg, Napcs, IPedsRace
14 | from datausa.attrs.models import IncomeRange, IPedsOcc, AcademicRank
15 | from datausa.attrs.models import IPedsToPumsCrosswalk, Carnegie, IPedsExpense
16 | from datausa.attrs.models import Opeid, SchoolType, EthnicCode, ProgramLength
17 | from datausa.attrs.models import SimilarUniversities, RateType
18 | from datausa.attrs.consts import GEO, GEO_LEVEL_MAP
19 | from datausa.attrs.search import do_search
20 | from datausa.database import db
21 |
22 |
23 | def to_bool(x):
24 | return x and x.lower() == "true"
25 |
26 |
27 | attr_map = {"soc": PumsSoc, "naics": PumsNaics, "cip": Cip,
28 | "geo": Geo, "university": University, "degree": Degree,
29 | "skill": Skill, "sector": Sector,
30 | "pums_degree": PumsDegree,
31 | "pums_race": PumsRace, "sex": PumsSex,
32 | "birthplace": PumsBirthplace,
33 | "wage_bin": PumsWage, "iocode": IoCode,
34 | "race": Race, "acs_race": AcsRace,
35 | "acs_occ": AcsOcc, "conflict": Conflict, "acs_ind": AcsInd,
36 | "language": AcsLanguage,
37 | "bls_soc": Soc, "bls_naics": Naics,
38 | "insurance": Insurance, "cohort": Cohort,
39 | "sctg": Sctg, "napcs": Napcs, "opeid": Opeid,
40 | "ethnic_code": EthnicCode, "program_length": ProgramLength,
41 | "school_type": SchoolType,
42 | "lstudy": LStudy, "enrollment_status": EnrollmentStatus,
43 | "ipeds_race": IPedsRace,
44 | "living_arrangement": LivingArrangement,
45 | "income_range": IncomeRange,
46 | "academic_rank": AcademicRank,
47 | "ipeds_occ": IPedsOcc,
48 | "ipeds_expense": IPedsExpense,
49 | "carnegie": Carnegie,
50 | "rate_type": RateType}
51 |
52 |
53 | def show_attrs(attr_obj, sumlevels=None):
54 | if sumlevels is not None:
55 | if attr_obj is Geo:
56 | sumlevels = [GEO_LEVEL_MAP[lvl] if lvl in GEO_LEVEL_MAP else lvl for lvl in sumlevels]
57 | attrs = attr_obj.query.filter(attr_obj.sumlevel.in_(sumlevels)).all()
58 | else:
59 | attrs = attr_obj.query.filter(attr_obj.level.in_(sumlevels)).all()
60 | elif attr_obj is Geo:
61 | # exclude census tracts and ZIPs
62 | attrs = attr_obj.query.filter(~Geo.id.startswith("140"), ~Geo.id.startswith("860")).all()
63 | else:
64 | attrs = attr_obj.query.all()
65 |
66 | data = []
67 | headers = []
68 | for a in attrs:
69 | obj = a.serialize()
70 | data.append(obj.values())
71 | if not headers:
72 | headers = obj.keys()
73 | return jsonify(data=data, headers=headers)
74 |
75 |
76 | @mod.route("/pums//")
77 | def pums_attrs(kind):
78 | return attrs("pums_{}".format(kind))
79 |
80 |
81 | @mod.route("/pums///")
82 | def pums_attr_id(kind, pums_attr_id):
83 | return attrs_by_id("pums_{}".format(kind), pums_attr_id)
84 |
85 |
86 | @mod.route("//")
87 | def attrs(kind):
88 |
89 | if kind in attr_map:
90 | attr_obj = attr_map[kind]
91 | sumlevel = request.args.get("sumlevel", None)
92 | sumlevels = sumlevel.split(",") if sumlevel else None
93 | return show_attrs(attr_obj, sumlevels=sumlevels)
94 | raise Exception("Invalid attribute type.")
95 |
96 |
97 | @mod.route("///")
98 | def attrs_by_id(kind, attr_id):
99 |
100 | if kind in attr_map:
101 | attr_obj = attr_map[kind]
102 | if kind in ["naics", "soc"]:
103 | aid_obj = attr_obj.query.filter_by(id=attr_id).order_by(attr_obj.level.asc()).first()
104 | else:
105 | aid_obj = attr_obj.query.get(attr_id)
106 | tmp = aid_obj.serialize()
107 | return jsonify(data=[tmp.values()], headers=tmp.keys())
108 | raise Exception("Invalid attribute type.")
109 |
110 |
111 | @mod.route("/list/")
112 | def attrs_list():
113 | return jsonify(data=attr_map.keys())
114 |
115 |
116 | @mod.route("///parents/")
117 | def get_parents(kind, attr_id):
118 | if kind in attr_map:
119 | attr_obj = attr_map[kind]
120 | data, headers = attr_obj.parents(attr_id)
121 | return jsonify(data=data, headers=headers)
122 | raise Exception("Invalid attribute type.")
123 |
124 |
125 | @mod.route("///children/")
126 | def get_children(kind, attr_id):
127 | if kind in attr_map:
128 | attr_obj = attr_map[kind]
129 | kwargs = request.args
130 | data, headers = attr_obj.children(attr_id, **kwargs)
131 | return jsonify(data=data, headers=headers)
132 | raise Exception("Invalid attribute type.")
133 |
134 |
135 | @mod.route("/search/")
136 | def search():
137 | offset = request.args.get("offset", None)
138 | limit = int(request.args.get("limit", 10))
139 | kind = request.args.get("kind", None)
140 | sumlevel = request.args.get("sumlevel", None)
141 | txt = request.args.get("q", '').lower()
142 | is_stem = int(request.args.get("is_stem", 0))
143 |
144 | if txt and re.match('^[0-9]{1,5}$', txt):
145 | return zip_search(txt, limit=limit)
146 | elif not txt or len(txt) <= 1:
147 | return search_old()
148 |
149 | data, suggs, tries, my_vars = do_search(txt, sumlevel, kind, limit=limit, is_stem=is_stem)
150 | headers = ["id", "name", "zvalue", "kind", "display", "sumlevel", "is_stem", "url_name"]
151 | autocorrected = tries > 0
152 | suggs = [x for x in suggs if x != txt]
153 | return jsonify(data=data, headers=headers, suggestions=suggs, autocorrected=autocorrected, related_vars=my_vars)
154 |
155 |
156 | @mod.route("/search_old/")
157 | def search_old():
158 | q = request.args.get("q", '')
159 | q = q.lower()
160 | offset = request.args.get("offset", None)
161 | limit = request.args.get("limit", 100)
162 | kind = request.args.get("kind", None)
163 | sumlevel = request.args.get("sumlevel", None)
164 | is_stem = int(request.args.get("is_stem", 0))
165 | filters = [Search.name.like("%{}%".format(q))]
166 | if kind:
167 | filters.append(Search.kind == kind)
168 | if sumlevel:
169 | filters.append(Search.sumlevel == sumlevel)
170 | if is_stem == 1:
171 | filters.append(Search.is_stem == is_stem)
172 | elif is_stem == 2:
173 | filters.append(Search.is_stem >= 1)
174 | qry = Search.query.filter(*filters).order_by(Search.zvalue.desc())
175 | if limit:
176 | qry = qry.limit(int(limit))
177 | if offset:
178 | qry = qry.offset(int(offset))
179 | qry = qry.all()
180 |
181 | data = [[a.id, a.name, a.zvalue, a.kind, a.display, a.sumlevel, a.is_stem, a.url_name] for a in qry]
182 |
183 | headers = ["id", "name", "zvalue", "kind", "display", "sumlevel", "is_stem", "url_name"]
184 | return jsonify(data=data, headers=headers)
185 |
186 |
187 | @mod.route("/ranks/")
188 | def ranks():
189 | attr_sumlvls = {
190 | "soc": {"0": 6, "1": 17, "2": 24, "3": 478},
191 | "naics": {"0": 14, "1": 21, "2": 266},
192 | "cip": {"2": 38, "4": 368, "6": 1416},
193 | "geo": {"nation": 1,
194 | "state": 52,
195 | "county": 3221,
196 | "msa": 929,
197 | "place": 29509,
198 | "puma": 2378}
199 | }
200 | return jsonify(data=attr_sumlvls)
201 |
202 |
203 | def zip_search(zc, limit=10):
204 | if len(zc) != 5:
205 | zc += "%"
206 | zc = "86000US" + zc
207 |
208 | filters = [
209 | ZipLookup.child_geoid.like(zc),
210 | ZipLookup.percent_covered >= 90,
211 | Search.id == ZipLookup.parent_geoid
212 | ]
213 |
214 | qry = Search.query.join(ZipLookup).filter(*filters)
215 | qry = qry.order_by(ZipLookup.parent_area.asc())
216 |
217 | qry = qry.with_entities(Search.id, Search.name, Search.zvalue, Search.kind,
218 | Search.display, Search.sumlevel, ZipLookup.child_geoid, Search.is_stem, Search.url_name)
219 | qry = qry.limit(limit)
220 | data = [list(row) for row in qry]
221 | headers = ["id", "name", "zvalue", "kind", "display", "sumlevel", "zipcode", "is_stem", "url_name"]
222 | return jsonify(data=data, headers=headers, zip_search=True)
223 |
224 |
225 | @mod.route("/geo//neighbors/")
226 | def neighbors(geo_id):
227 | results = GeoNeighbors.query.filter_by(geo=geo_id).all()
228 | headers = ["geo", "neighbor"]
229 | data = [[result.geo, result.neighbor] for result in results]
230 | return jsonify(data=data, headers=headers)
231 |
232 |
233 | @mod.route("/geo//ipeds/")
234 | def has_ipeds_data(attr_id):
235 | from datausa.util import inmem
236 | # first check, do I have any data
237 | data, headers = Geo.parents(attr_id)
238 | id_idx = headers.index("id")
239 | ipeds_places = inmem.ipeds_place_map()
240 | if attr_id in ipeds_places:
241 | return jsonify(data=[], headers=[])
242 | data.reverse()
243 | for row in data:
244 | geo_id = row[id_idx]
245 | if geo_id in ipeds_places:
246 | return jsonify(data=[geo_id], headers=[GEO])
247 |
248 |
249 | @mod.route("/crosswalk///")
250 | def crosswalk_acs(attr_kind, attr_id):
251 | if attr_kind not in ["acs_occ", "acs_ind", "iocode", "sctg", "ipeds_occ"]:
252 | return abort(404)
253 | if attr_kind == "sctg":
254 | results = ProductCrosswalk.query.filter(ProductCrosswalk.sctg == attr_id)
255 | results = [[item.napcs, "napcs"] for item in results]
256 | elif attr_kind == "ipeds_occ":
257 | results = IPedsToPumsCrosswalk.query.filter(IPedsToPumsCrosswalk.ipeds_occ == attr_id).all()
258 | results = [[item.pums_soc, "soc"] for item in results]
259 | elif attr_kind == "iocode":
260 | results = PumsIoCrosswalk.query.filter(PumsIoCrosswalk.iocode == attr_id).all()
261 | results = [[item.pums_naics, "naics"] for item in results]
262 | else:
263 | attr_obj = {"acs_occ": OccCrosswalk, "acs_ind": IndCrosswalk}[attr_kind]
264 | header_name = {"acs_occ": "soc", "acs_ind": "naics"}[attr_kind]
265 | col_name = "pums_{}".format(header_name)
266 | results = attr_obj.query.filter(getattr(attr_obj, attr_kind) == attr_id).with_entities(col_name).all()
267 | results = [[getattr(item, col_name), header_name] for item in results]
268 | return jsonify(data=results, headers=["attr_id", "attr_kind"])
269 |
270 |
271 | @mod.route("/nearby/university/")
272 | def nearby_university(university_id):
273 | limit = int(request.args.get("limit", 5))
274 | univ = University.query.get(university_id)
275 | query_str = """SELECT id, name
276 | FROM attrs.university
277 | where carnegie = :carnegie AND status != 'D' and id != :uid
278 | ORDER BY ST_MakePoint(:lat, :lng) <-> st_makepoint(lat, lng)
279 | LIMIT :limit;
280 | """
281 | res = db.session.execute(query_str, {"lat": univ.lat, "lng": univ.lng, "carnegie": univ.carnegie, "limit": limit, "uid": university_id})
282 | data = [map(unicode, x) for x in res]
283 | headers = ["id", "name"]
284 | return jsonify(data=data, headers=headers)
285 |
286 |
287 | @mod.route("/similar/university/")
288 | def similar_universities(university_id):
289 | limit = int(request.args.get("limit", 5))
290 | univ = SimilarUniversities.query.get(university_id)
291 | query_str = """SELECT id, name
292 | FROM attrs.similar_universities
293 | where id != :uid
294 | AND carnegie_parent = :carnegie_parent
295 | ORDER BY ST_MakePoint(:x, :y) <-> st_makepoint(x, y)
296 | LIMIT :limit;
297 | """
298 | res = db.session.execute(query_str, {"x": univ.x, "y": univ.y, "carnegie_parent": univ.carnegie_parent, "limit": limit, "uid": university_id})
299 | data = [map(unicode, x) for x in res]
300 | headers = ["id", "name"]
301 | return jsonify(data=data, headers=headers)
302 |
--------------------------------------------------------------------------------
/datausa/bea/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataUSA/datausa-api/7288dede082eda07b61e11cf6dc801fe692f6334/datausa/bea/__init__.py
--------------------------------------------------------------------------------
/datausa/bea/models.py:
--------------------------------------------------------------------------------
1 | from datausa.database import db
2 | from datausa.attrs.models import IoCode
3 | from datausa.core.models import BaseModel
4 | from datausa.attrs.consts import ALL
5 | from datausa.attrs.consts import NO_VALUE_ADDED
6 | # from sqlalchemy import and_
7 |
8 | class BeaUse(db.Model, BaseModel):
9 | __table_args__ = {"schema": "bea"}
10 | __tablename__ = 'use'
11 | source_title = 'Use Tables'
12 | source_link = 'http://bea.gov'
13 | source_org = 'Bureau of Economic Analysis'
14 |
15 | median_moe = 2
16 | to_filter = ["TOTCOMOUT", "HS", "ORE", "GFGD", "G", "TOTII", "GFGN", "GSLE",
17 | "GFE", "GSLG", "Other", "Used", "TOTFU", "TOTVA", "TOTINDOUT"]
18 | no_value_added = to_filter + ["V001", "V002", "V003", "F010", "F020", "F030",
19 | "F040", "F050", "F100"]
20 | year = db.Column(db.Integer, primary_key=True)
21 | industry_iocode = db.Column(db.String, db.ForeignKey(IoCode.id), primary_key=True)
22 | commodity_iocode = db.Column(db.String, db.ForeignKey(IoCode.id), primary_key=True)
23 |
24 | value_millions = db.Column(db.Integer)
25 | industry_level = db.Column(db.Integer)
26 |
27 | @classmethod
28 | def get_supported_levels(cls):
29 | return {
30 | "industry_iocode": [ALL, "0", "1", "naics", NO_VALUE_ADDED],
31 | "commodity_iocode": [ALL, "naics", NO_VALUE_ADDED],
32 | }
33 |
34 | @classmethod
35 | def industry_iocode_filter(cls, level):
36 | if level == ALL:
37 | return True
38 | elif level == "naics":
39 | return ~cls.industry_iocode.in_(cls.to_filter)
40 | elif level == NO_VALUE_ADDED:
41 | return ~cls.industry_iocode.in_(cls.no_value_added)
42 | target_len = int(level)
43 | return cls.industry_level == target_len
44 |
45 | @classmethod
46 | def commodity_iocode_filter(cls, level):
47 | if level == ALL:
48 | return True
49 | elif level == NO_VALUE_ADDED:
50 | return ~cls.commodity_iocode.in_(cls.no_value_added)
51 | return ~cls.commodity_iocode.in_(cls.to_filter)
52 |
--------------------------------------------------------------------------------
/datausa/bls/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataUSA/datausa-api/7288dede082eda07b61e11cf6dc801fe692f6334/datausa/bls/__init__.py
--------------------------------------------------------------------------------
/datausa/bls/models.py:
--------------------------------------------------------------------------------
1 | from datausa.database import db
2 | from datausa.attrs.models import Geo, Soc, Naics
3 | from datausa.attrs.models import PumsSoc, PumsNaics
4 | from datausa.core.models import BaseModel
5 | from datausa.attrs.consts import NATION, STATE, MSA, ALL
6 | from sqlalchemy.orm import column_property
7 | from sqlalchemy.ext.declarative import declared_attr
8 |
9 |
10 | class Bls(BaseModel):
11 | source_title = 'Growth'
12 | source_org = 'Bureau of Labor Statistics'
13 |
14 | __table_args__ = {"schema": "bls"}
15 | source_link = 'http://bls.gov'
16 |
17 |
18 | class SocCrosswalk(db.Model, Bls):
19 | __tablename__ = 'soc_crosswalk'
20 | soc = db.Column("pums_soc", db.String(), db.ForeignKey(PumsSoc.id), primary_key=True)
21 | bls_soc = db.Column(db.String(), db.ForeignKey(Soc.id), primary_key=True)
22 |
23 |
24 | class BlsSoc(object):
25 | @declared_attr
26 | def bls_soc(cls):
27 | return db.Column(db.String(), primary_key=True)
28 |
29 | @declared_attr
30 | def soc(cls):
31 | return column_property(SocCrosswalk.soc)
32 |
33 | @classmethod
34 | def crosswalk_join(cls, qry):
35 | cond = SocCrosswalk.bls_soc == cls.bls_soc
36 | return qry.join(SocCrosswalk, cond, full=True)
37 |
38 |
39 | class GrowthO(db.Model, Bls, BlsSoc):
40 | source_title = 'Employment Projections'
41 | __tablename__ = 'growth_o'
42 | median_moe = 1
43 |
44 | emp_2014_thousands = db.Column(db.Float)
45 | emp_2024_thousands = db.Column(db.Float)
46 | emp_pct_2014 = db.Column(db.Float)
47 | emp_pct_2024 = db.Column(db.Float)
48 | change_thousands = db.Column(db.Float)
49 | pct_change = db.Column(db.Float)
50 | openings_thousands = db.Column(db.Float)
51 |
52 | @classmethod
53 | def get_supported_levels(cls):
54 | return {
55 | "soc": [ALL, "0", "1", "2", "3"],
56 | "bls_soc": [ALL, "0", "1", "2", "3"]
57 | }
58 |
59 |
60 | class GrowthO16(db.Model, Bls, BlsSoc):
61 | source_title = 'Employment Projections'
62 | __tablename__ = 'growth_o_2016'
63 | median_moe = 1
64 |
65 | emp_2016_thousands = db.Column(db.Float)
66 | emp_2026_thousands = db.Column(db.Float)
67 | emp_pct_2016 = db.Column(db.Float)
68 | emp_pct_2026 = db.Column(db.Float)
69 | change_thousands = db.Column(db.Float)
70 | pct_change = db.Column(db.Float)
71 | openings_thousands = db.Column(db.Float)
72 |
73 | @classmethod
74 | def get_supported_levels(cls):
75 | return {
76 | "soc": [ALL, "0", "1", "2", "3"],
77 | "bls_soc": [ALL, "0", "1", "2", "3"]
78 | }
79 |
80 |
81 | class GrowthI(db.Model, Bls):
82 | source_title = 'Industry Projections'
83 | __tablename__ = 'growth_i'
84 | median_moe = 2
85 |
86 | naics = db.Column(db.String, primary_key=True)
87 | title = db.Column(db.String)
88 | emp_2004_thousands = db.Column(db.Float)
89 | emp_2014_thousands = db.Column(db.Float)
90 | emp_2024_thousands = db.Column(db.Float)
91 | emp_change_2004_2014 = db.Column(db.Float)
92 | emp_change_2014_2024 = db.Column(db.Float)
93 | output_2004 = db.Column(db.Float)
94 | output_2014 = db.Column(db.Float)
95 | output_2024 = db.Column(db.Float)
96 | output_carc_2004_2014 = db.Column(db.Float)
97 | output_carc_2014_2024 = db.Column(db.Float)
98 | emp_carc_2004_2014 = db.Column(db.Float)
99 | emp_carc_2014_2024 = db.Column(db.Float)
100 |
101 | @classmethod
102 | def get_supported_levels(cls):
103 | return {
104 | "naics": [ALL, "0", "1", "2", "3", "4"]
105 | }
106 |
107 |
108 | class GrowthI16(db.Model, Bls):
109 | source_title = 'Industry Projections'
110 | __tablename__ = 'growth_i_2016'
111 | median_moe = 2
112 |
113 | naics = db.Column(db.String, primary_key=True)
114 | title = db.Column(db.String)
115 | emp_2006_thousands = db.Column(db.Float)
116 | emp_2016_thousands = db.Column(db.Float)
117 | emp_2026_thousands = db.Column(db.Float)
118 | emp_change_2006_2016 = db.Column(db.Float)
119 | emp_change_2016_2026 = db.Column(db.Float)
120 | output_2006 = db.Column(db.Float)
121 | output_2016 = db.Column(db.Float)
122 | output_2026 = db.Column(db.Float)
123 | output_carc_2006_2016 = db.Column(db.Float)
124 | output_carc_2016_2026 = db.Column(db.Float)
125 | emp_carc_2006_2016 = db.Column(db.Float)
126 | emp_carc_2016_2026 = db.Column(db.Float)
127 |
128 | @classmethod
129 | def get_supported_levels(cls):
130 | return {
131 | "naics": [ALL, "0", "1", "2", "3", "4"]
132 | }
133 |
134 |
135 | class BlsCrosswalk(db.Model, Bls):
136 | __tablename__ = 'bls_crosswalk'
137 | pums_naics = db.Column(db.String, db.ForeignKey(PumsNaics.id),
138 | primary_key=True)
139 | bls_naics = db.Column(db.String, primary_key=True)
140 |
141 |
142 | class GrowthILookup(db.Model, Bls):
143 | __tablename__ = 'growth_i_lookup'
144 | pums_naics = db.Column(db.String, db.ForeignKey(PumsNaics.id), primary_key=True)
145 | bls_naics = db.Column(db.String, primary_key=True)
146 |
147 |
148 | class OesYgo(db.Model, Bls, BlsSoc):
149 | __tablename__ = 'oes_ygo'
150 |
151 | median_moe = 2
152 |
153 | year = db.Column(db.Integer, primary_key=True)
154 | geo = db.Column(db.String, db.ForeignKey(Geo.id), primary_key=True)
155 | # soc = db.Column(db.String, db.ForeignKey(Soc.id), primary_key=True)
156 |
157 | tot_emp = db.Column(db.Integer)
158 | tot_emp_prse = db.Column(db.Float)
159 | avg_wage = db.Column(db.Float)
160 | avg_wage_prse = db.Column(db.Float)
161 | tot_emp_rca = db.Column(db.Float)
162 |
163 | @classmethod
164 | def get_supported_levels(cls):
165 | return {
166 | "geo": [ALL, NATION, STATE, MSA],
167 | "bls_soc": [ALL, "0", "1", "2", "3"],
168 | "soc": [ALL, "0", "1", "2", "3"]
169 | }
170 |
171 | @classmethod
172 | def geo_filter(cls, level):
173 | if level == ALL:
174 | return True
175 | level_map = {NATION: "010", STATE: "040", MSA: "050"}
176 | level_code = level_map[level]
177 | return cls.geo.startswith(level_code)
178 |
179 |
180 | class CesYi(db.Model, Bls):
181 | source_title = 'Current Employment Statistics'
182 | __tablename__ = 'ces_yi'
183 | median_moe = 1.5
184 |
185 | JOINED_FILTER = {"naics": {
186 | "table": Naics,
187 | "column": Naics.level,
188 | "id": Naics.id}}
189 |
190 | year = db.Column(db.Integer, primary_key=True)
191 | naics = db.Column(db.String, db.ForeignKey(Naics.id), primary_key=True)
192 |
193 | avg_hrly_earnings = db.Column(db.Float)
194 | avg_wkly_hrs = db.Column(db.Float)
195 | employees_thousands = db.Column(db.Float)
196 |
197 | @classmethod
198 | def get_supported_levels(cls):
199 | return {
200 | "naics": [ALL, "0", "1", "2", "3", "4"]
201 | }
202 |
203 |
204 | class QcewYgi(db.Model, Bls):
205 | __tablename__ = 'qcew_ygi'
206 | median_moe = 2
207 |
208 | year = db.Column(db.Integer, primary_key=True)
209 | geo = db.Column(db.String, db.ForeignKey(Geo.id), primary_key=True)
210 | naics = db.Column(db.String, db.ForeignKey(Naics.id), primary_key=True)
211 |
212 | naics_level = db.Column(db.Integer)
213 | avg_annual_pay = db.Column(db.Float)
214 | total_annual_wages = db.Column(db.Float)
215 | annual_contributions = db.Column(db.Float)
216 | annual_avg_emplvl = db.Column(db.Float)
217 | total_annual_wages_rca = db.Column(db.Float)
218 | annual_avg_estabs = db.Column(db.Float)
219 | taxable_annual_wages = db.Column(db.Float)
220 | annual_avg_wkly_wage = db.Column(db.Float)
221 |
222 | @classmethod
223 | def get_supported_levels(cls):
224 | return {
225 | "geo": [ALL, NATION, STATE, MSA],
226 | "naics": [ALL, "0", "1", "2", "3", "4"]
227 | }
228 |
229 | @classmethod
230 | def geo_filter(cls, level):
231 | if level == ALL:
232 | return True
233 | level_map = {NATION: "010", STATE: "040", MSA: "050"}
234 | level_code = level_map[level]
235 | return cls.geo.startswith(level_code)
236 |
237 | @classmethod
238 | def naics_filter(cls, level):
239 | if level == ALL:
240 | return True
241 | return cls.naics_level == level
242 |
--------------------------------------------------------------------------------
/datausa/cbp/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataUSA/datausa-api/7288dede082eda07b61e11cf6dc801fe692f6334/datausa/cbp/__init__.py
--------------------------------------------------------------------------------
/datausa/cbp/abstract_models.py:
--------------------------------------------------------------------------------
1 | from datausa.database import db
2 | from datausa.attrs import consts
3 | from datausa.attrs.models import Naics, Geo
4 | from sqlalchemy.orm import column_property
5 | from datausa.core.models import BaseModel
6 | from sqlalchemy.ext.declarative import declared_attr
7 | from sqlalchemy.sql import func
8 | from datausa.attrs.consts import NATION, STATE, COUNTY, MSA, ALL
9 |
10 | class BaseCbp(db.Model, BaseModel):
11 | __abstract__ = True
12 | __table_args__ = {"schema": "cbp"}
13 | source_title = 'County Business Patterns'
14 | source_link = 'http://www.census.gov/econ/cbp/'
15 | source_org = 'Census Bureau'
16 |
17 | est = db.Column(db.Integer())
18 |
19 | emp = db.Column(db.Integer())
20 | emp_nf = db.Column(db.String())
21 | empflag = db.Column(db.String())
22 |
23 | ap = db.Column(db.Float())
24 | ap_nf = db.Column(db.String())
25 |
26 | n1_4 = db.Column(db.Integer())
27 | n5_9 = db.Column(db.Integer())
28 | n20_49 = db.Column(db.Integer())
29 | n50_99 = db.Column(db.Integer())
30 | n100_249 = db.Column(db.Integer())
31 | n250_499 = db.Column(db.Integer())
32 | n500_999 = db.Column(db.Integer())
33 | n1000 = db.Column(db.Integer())
34 | n1000_1 = db.Column(db.Integer())
35 | n1000_2 = db.Column(db.Integer())
36 | n1000_3 = db.Column(db.Integer())
37 | n1000_4 = db.Column(db.Integer())
38 |
39 | @classmethod
40 | def geo_filter(cls, level):
41 | if level == ALL:
42 | return True
43 | level_map = {NATION: "010", STATE: "040", MSA: "310", COUNTY: "050"}
44 | level_code = level_map[level]
45 | return cls.geo.startswith(level_code)
46 |
--------------------------------------------------------------------------------
/datausa/cbp/models.py:
--------------------------------------------------------------------------------
1 | from datausa.database import db
2 | from datausa.attrs.models import Geo, Soc
3 | from datausa.core.models import BaseModel
4 | from datausa.attrs.consts import NATION, STATE, MSA, ALL, GEO
5 |
6 | from datausa.cbp.abstract_models import BaseCbp
7 | from datausa.attrs.consts import NATION, STATE, COUNTY, MSA
8 | from sqlalchemy.sql import func
9 |
10 | class CbpYgi(BaseCbp):
11 | __tablename__ = "ygi"
12 | median_moe = 2
13 |
14 | year = db.Column(db.Integer(), primary_key=True)
15 | geo = db.Column(db.String(), primary_key=True)
16 | naics = db.Column(db.String(), primary_key=True)
17 |
18 | @classmethod
19 | def get_supported_levels(cls):
20 | return {
21 | GEO: [ALL, NATION, STATE, MSA, COUNTY],
22 | "naics": [ALL, "0", "1", "2", "3", "4"]
23 | }
24 |
25 | @classmethod
26 | def naics_filter(cls, level):
27 | if level == ALL:
28 | return True
29 | target_len = int(level) + 2
30 | return func.length(cls.naics) == target_len
31 |
32 | class CbpYg(BaseCbp):
33 | __tablename__ = "yg"
34 | median_moe = 1
35 |
36 | year = db.Column(db.Integer(), primary_key=True)
37 | geo = db.Column(db.String(), primary_key=True)
38 |
39 | @classmethod
40 | def get_supported_levels(cls):
41 | return {
42 | GEO: [ALL, NATION, STATE, MSA, COUNTY],
43 | }
44 |
--------------------------------------------------------------------------------
/datausa/chr/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataUSA/datausa-api/7288dede082eda07b61e11cf6dc801fe692f6334/datausa/chr/__init__.py
--------------------------------------------------------------------------------
/datausa/chr/models.py:
--------------------------------------------------------------------------------
1 | from datausa.database import db
2 | from datausa.attrs import consts
3 | from datausa.attrs.models import University
4 | from datausa import cache
5 |
6 | from datausa.core.models import BaseModel
7 | from datausa.attrs.consts import NATION, STATE, COUNTY, MSA, ALL
8 | from sqlalchemy.ext.automap import automap_base
9 | from sqlalchemy import MetaData
10 |
11 | metadata = cache.get("chr_metadata")
12 | if not metadata:
13 | metadata = MetaData(schema='chr', bind=db.engine)
14 | metadata.reflect()
15 | cache.set("chr_metadata", metadata)
16 |
17 | AutomapBase = automap_base(bind=db.engine, metadata=metadata)
18 |
19 |
20 | class HealthYg(AutomapBase, db.Model, BaseModel):
21 | __table_args__ = {"schema": "chr", "extend_existing": True}
22 | source_title = 'County Health Rankings'
23 | source_link = 'http://www.countyhealthrankings.org/'
24 | source_org = 'University of Wisconsin'
25 | __tablename__ = 'yg'
26 | median_moe = 1
27 |
28 | year = db.Column(db.Integer, primary_key=True)
29 | geo = db.Column(db.String(), primary_key=True)
30 |
31 | @classmethod
32 | def get_supported_levels(cls):
33 | return {"geo": [ALL, STATE, COUNTY]}
34 |
35 | @classmethod
36 | def geo_filter(cls, level):
37 | if level == ALL:
38 | return True
39 | level_map = {STATE: "040", COUNTY: "050"}
40 | level_code = level_map[level]
41 | return cls.geo.startswith(level_code)
42 |
43 | AutomapBase.prepare(db.engine, reflect=False)
44 |
--------------------------------------------------------------------------------
/datausa/core/__init__.py:
--------------------------------------------------------------------------------
1 | from sqlalchemy.orm.attributes import InstrumentedAttribute
2 |
3 | def get_columns(tbl):
4 | cols = tbl.__mapper__.attrs
5 | return [getattr(tbl, col.key) for col in cols]
6 |
7 | # possible_variables = [col.key for t in registered_models for col in t.__table__.columns]
8 | # def attribute_names(cls):
9 | # return [prop.key for prop in class_mapper(cls).iterate_properties
10 | # if isinstance(prop, ColumnProperty)]
11 |
12 | # def get_columns(tbl):
13 | # cols = []
14 | # for item,val in tbl.__dict__.items():
15 | # if isinstance(val, InstrumentedAttribute) and not item.startswith("_"):
16 | # cols.append(val)
17 | # # print tbl.__table__.columns
18 | # return cols
19 |
--------------------------------------------------------------------------------
/datausa/core/api.py:
--------------------------------------------------------------------------------
1 | from sqlalchemy import and_
2 | from flask import Response
3 | import simplejson
4 |
5 | from datausa.core import get_columns
6 | from datausa.core.table_manager import TableManager, table_name
7 | from datausa.attrs import consts
8 | from datausa.attrs.views import attr_map
9 | from sqlalchemy.orm import aliased
10 | from datausa.util.inmem import splitter
11 | from datausa.core.exceptions import DataUSAException
12 | from sqlalchemy import desc, asc, func
13 |
14 | MAX_LIMIT = 4
15 |
16 |
17 | def use_attr_names(table, qry, cols):
18 | new_cols = []
19 | joins = {}
20 | for col in cols:
21 | col_str = col if isinstance(col, basestring) else col.key
22 | orig_str = col_str
23 | col_str = "iocode" if "_iocode" in col_str else col_str
24 | col_str = "geo" if col_str.endswith("_geo") else col_str
25 | col_str = "pums_degree" if "pums" in table.__table_args__["schema"] and col_str == "degree" else col_str
26 | if table.__table_args__["schema"] == 'bls' and col_str in ['naics', 'soc']:
27 | col_str = "bls_{}".format(col_str)
28 | if col_str in attr_map:
29 | attr_obj = attr_map[col_str]
30 | attr_alias = aliased(attr_obj)
31 | joins[orig_str] = [attr_alias, getattr(table, orig_str) == attr_alias.id]
32 | new_cols.append(attr_alias.name.label(orig_str + "_name"))
33 |
34 | new_cols.append(col)
35 | for col_str, j in joins.items():
36 | qry = qry.join(*j, isouter=True)
37 | return qry, new_cols
38 |
39 |
40 | def stream_format(table, cols, qry, api_obj):
41 | def generate():
42 | yield ','.join([col if isinstance(col, basestring) else col.key for col in cols]) + '\n'
43 | for row in qry:
44 | row = [u'"{}"'.format(x) if isinstance(x, basestring) else str(x) for x in list(row)]
45 | yield u','.join(row) + u'\n'
46 | return Response(generate(), mimetype='text/csv')
47 |
48 | def simple_format(table, cols, data, api_obj):
49 | ''' Based on https://github.com/al4/orlo/blob/1b3930bae4aa37eb51aed33a97c088e576cb5a99/orlo/route_api.py#L285-L311'''
50 | def generate(table):
51 | headers = [col if isinstance(col, basestring) else col.key for col in cols]
52 | inf = float('inf')
53 |
54 | """
55 | A lagging generator to stream JSON so we don't have to hold everything in memory
56 | This is a little tricky, as we need to omit the last comma to make valid JSON,
57 | thus we use a lagging generator, similar to http://stackoverflow.com/questions/1630320/
58 | """
59 | yield u'{'
60 |
61 | rows = data.__iter__()
62 | try:
63 | prev_row = next(rows) # get first result
64 | except StopIteration:
65 | # StopIteration here means the length was zero, so yield a valid releases doc and stop
66 | yield u'''"data": [],
67 | "headers": {},
68 | "source": {},
69 | "subs": {},
70 | "logic": {}
71 | '''.format(simplejson.dumps(list(headers)), simplejson.dumps(table.info(api_obj)), simplejson.dumps(api_obj.subs),
72 | simplejson.dumps([table.info(api_obj) for table in api_obj.table_list])) + u'}'
73 | raise StopIteration
74 |
75 | # We have some releases. First, yield the opening json
76 | yield u'"data": ['
77 |
78 | # Iterate over the releases
79 | for row in rows:
80 | yield simplejson.dumps([x if x != inf else None for x in prev_row]) + u', '
81 | prev_row = row
82 |
83 | # Now yield the last iteration without comma
84 | yield simplejson.dumps([x if x != inf else None for x in prev_row])
85 |
86 | yield u'''], "headers": {},
87 | "source": {},
88 | "subs": {},
89 | "logic": {}
90 | '''.format(simplejson.dumps(list(headers)), simplejson.dumps(table.info(api_obj)), simplejson.dumps(api_obj.subs),
91 | simplejson.dumps([table.info(api_obj) for table in api_obj.table_list])) + u'}'
92 |
93 | return Response(generate(table), content_type='application/json')
94 |
95 | # def simple_format(table, cols, data, api_obj):
96 | # headers = [col if isinstance(col, basestring) else col.key for col in cols]
97 | # inf = float('inf')
98 | # data = {
99 | # "headers": list(headers),
100 | # "source": table.info(api_obj),
101 | # "subs": api_obj.subs,
102 | # "logic": [table.info(api_obj) for table in api_obj.table_list],
103 | # "data": [ [x if x != inf else None for x in row] for row in data],
104 | # }
105 | #
106 | # return flask.jsonify(data)
107 |
108 | def parse_method_and_val(cond):
109 | if cond.startswith("^"):
110 | return "startswith", cond[1:], False
111 | elif cond.startswith("~^"):
112 | return "startswith", cond[2:], True
113 | elif cond.endswith("$"):
114 | return "endswith", cond[:-1], False
115 | elif cond.endswith("~$"):
116 | return "endswith", cond[:-2], True
117 | elif cond.startswith("str!"):
118 | return "ne", str(cond[4:]), False
119 | elif cond.startswith("!"):
120 | return "ne", int(cond[1:]), False
121 | elif cond.startswith(">"):
122 | return "gt", int(cond[1:]), False
123 | elif cond.startswith("<"):
124 | return "lt", int(cond[1:]), False
125 | elif cond.startswith("R<"):
126 | return "rt", float(cond[2:]), False
127 | elif cond.startswith("R>"):
128 | return "rg", float(cond[2:]), False
129 | else:
130 | return "like", cond, False
131 |
132 | def where_filters(table, where_str):
133 | if not where_str:
134 | return []
135 | filts = []
136 |
137 | wheres = splitter(where_str)
138 | for where in wheres:
139 | colname, cond = where.split(":")
140 | cols = None
141 | if "/" in colname:
142 | cols = [getattr(table, c) for c in colname.split("/")]
143 | else:
144 | col = getattr(table, colname)
145 | method, value, negate = parse_method_and_val(cond)
146 | if method == "ne":
147 | expr = col != value
148 | elif method == "gt":
149 | expr = col > value
150 | elif method == "lt":
151 | expr = col < value
152 | elif method == "rt":
153 | expr = and_(cols[1] != 0, cols[0] / cols[1] < value)
154 | elif method == "rg":
155 | expr = and_(cols[1] != 0, cols[0] / cols[1] > value)
156 | else:
157 | expr = getattr(col, method)(value)
158 | if negate:
159 | expr = ~expr
160 | filts.append(expr)
161 | return filts
162 |
163 | def sumlevel_filtering(table, api_obj):
164 | shows_and_levels = api_obj.shows_and_levels
165 | filters = []
166 | for col, level in shows_and_levels.items():
167 | args = (table, "{}_filter".format(col))
168 | if hasattr(*args):
169 | func = getattr(*args)
170 | filters.append(func(level))
171 |
172 | # raise Exception(filters)
173 | return filters
174 |
175 | def process_value_filters(table, vars_and_vals, api_obj):
176 | filts = []
177 | for var, val in vars_and_vals.items():
178 | if var == consts.YEAR and val in [consts.LATEST, consts.OLDEST]:
179 | years = TableManager.table_years[table_name(table)]
180 | my_year = years[val]
181 | filt = table.year == my_year
182 | api_obj.set_year(my_year)
183 | elif consts.OR in val:
184 | filt = getattr(table, var).in_(splitter(val))
185 | else:
186 | filt = getattr(table, var) == val
187 | if var == consts.YEAR and val == consts.ALL:
188 | pass # do nothing, show all years
189 | else:
190 | filts.append(filt)
191 | return filts
192 |
193 | def remove_filters(filters, table, col, api_obj):
194 | new_filts = []
195 | for filt in filters:
196 | if hasattr(filt, "left") and hasattr(filt, "right"):
197 | if filt.left.key == col and isinstance(filt.right.value, basestring):
198 | if api_obj.vars_and_vals[col] == filt.right.value:
199 | continue
200 | new_filts.append(filt)
201 | return new_filts
202 |
203 |
204 | def copy_where_literals(api_obj):
205 | if hasattr(api_obj, "where") and api_obj.where:
206 | wheres = splitter(api_obj.where)
207 | for where in wheres:
208 | colname, cond = where.split(":")
209 | if colname not in api_obj.vars_and_vals:
210 | api_obj.vars_and_vals[colname] = cond
211 | return api_obj
212 |
213 |
214 | def handle_join(qry, filters, table, api_obj):
215 | joins = []
216 | joined_filt = table.JOINED_FILTER
217 | # see if we need to copy over which variables are involved
218 | api_obj = copy_where_literals(api_obj)
219 | for col, level in api_obj.shows_and_levels.items():
220 | if level != consts.ALL:
221 | if col in joined_filt:
222 | if not "triggers" in joined_filt[col]:
223 | joins.append(joined_filt[col]["table"])
224 | filters.append(joined_filt[col]["column"] == level)
225 | filters.append(joined_filt[col]["id"] == getattr(table, col))
226 | else:
227 | triggers = joined_filt[col]["triggers"]
228 | for target_lvl, starting in triggers:
229 | if col in api_obj.vars_and_vals:
230 | if api_obj.vars_and_vals[col].startswith(starting) and level == target_lvl:
231 | joins.append(joined_filt[col]["table"])
232 | filters = remove_filters(filters, table, col, api_obj)
233 | filters.append(joined_filt[col]["id"] == getattr(table, col))
234 | filters.append(joined_filt[col]["column"] == api_obj.vars_and_vals[col])
235 | qry = qry.join(*joins)
236 | return qry, filters
237 |
238 |
239 | def query(table, api_obj, stream=False):
240 | vars_and_vals = api_obj.vars_and_vals
241 | values = api_obj.values
242 | exclude = api_obj.exclude
243 |
244 | filters = process_value_filters(table, vars_and_vals, api_obj)
245 | filters += where_filters(table, api_obj.where)
246 | filters += sumlevel_filtering(table, api_obj)
247 |
248 | if values:
249 | pk = [col for col in table.__table__.columns if col.primary_key and col.key not in values]
250 | cols = pk + [getattr(table, col_name) for col_name in values]
251 | else:
252 | cols = get_columns(table)
253 |
254 | if exclude:
255 | cols = [col for col in cols
256 | if (isinstance(col, basestring) and col not in exclude) or col.key not in exclude]
257 |
258 | # qry = table.query.with_entities(*cols)
259 | qry = table.query
260 |
261 | if hasattr(table, "crosswalk_join"):
262 | qry = table.crosswalk_join(qry)
263 |
264 | if stream or api_obj.display_names:
265 | qry, cols = use_attr_names(table, qry, cols)
266 | qry = qry.with_entities(*cols)
267 |
268 | if hasattr(table, "JOINED_FILTER"):
269 | qry, filters = handle_join(qry, filters, table, api_obj)
270 |
271 | qry = qry.filter(*filters)
272 |
273 | if api_obj.order:
274 | sort = desc if api_obj.sort == "desc" else asc
275 | if api_obj.order not in TableManager.possible_variables:
276 | if api_obj.order == 'abs(pct_change)':
277 | pass # allow this
278 | else:
279 | raise DataUSAException("Bad order parameter", api_obj.order)
280 | # sort_stmt = text("{} {} NULLS LAST".format(api_obj.order, sort))
281 | if api_obj.order == 'abs(pct_change)':
282 | target_col = func.abs(table.pct_change)
283 | else:
284 | target_col = getattr(table, api_obj.order)
285 |
286 | qry = qry.order_by(sort(target_col).nullslast())
287 | if api_obj.limit:
288 | qry = qry.limit(api_obj.limit)
289 |
290 | if stream:
291 | return stream_format(table, cols, qry, api_obj)
292 |
293 | return simple_format(table, cols, qry, api_obj)
294 |
--------------------------------------------------------------------------------
/datausa/core/attr_crosswalking.py:
--------------------------------------------------------------------------------
1 | '''Attribute crosswalker for join API'''
2 | from sqlalchemy.orm import aliased
3 | from sqlalchemy import and_, or_, func
4 |
5 | from datausa.bls.models import BlsCrosswalk, GrowthILookup, SocCrosswalk
6 | from datausa.attrs.models import GeoCrosswalker
7 |
8 | def geo_crosswalk_join(tbl1, tbl2, col):
9 | my_joins = []
10 | gc_alias = aliased(GeoCrosswalker)
11 | j1 = [
12 | gc_alias, or_(gc_alias.geo_a == tbl1.geo,
13 | gc_alias.geo_b == tbl1.geo)
14 | ]
15 | j1 = [j1, {"full": False, "isouter": False}]
16 | my_joins.append(j1)
17 |
18 | j2_cond = or_(
19 | and_(gc_alias.geo_a == tbl1.geo, gc_alias.geo_b == tbl2.geo),
20 | and_(gc_alias.geo_b == tbl1.geo, gc_alias.geo_a == tbl2.geo)
21 | )
22 | j2 = [tbl2, j2_cond]
23 | j2 = [j2, {"full": False, "isouter": False}]
24 | my_joins.append(j2)
25 |
26 | return my_joins
27 |
28 | def naics_crosswalk_join(tbl1, tbl2, col, already_joined):
29 | my_joins = []
30 | bls_table = None
31 | pums_table = None
32 |
33 | if tbl1.get_schema_name() == "bls":
34 | bls_table = tbl1
35 | pums_table = tbl2
36 | if tbl2.get_schema_name() == "bls":
37 | bls_table = tbl2
38 | pums_table = tbl1
39 |
40 | cond1 = BlsCrosswalk.pums_naics == pums_table.naics if tbl1 is pums_table else BlsCrosswalk.bls_naics == bls_table.naics
41 |
42 | if not BlsCrosswalk.full_name() in already_joined:
43 | j1 = [[BlsCrosswalk, cond1], {"full": False, "isouter": False}]
44 | my_joins.append(j1)
45 | already_joined[BlsCrosswalk.full_name()] = True
46 | j2_cond = and_(BlsCrosswalk.pums_naics == pums_table.naics,
47 | BlsCrosswalk.bls_naics == bls_table.naics)
48 | j2 = [tbl2, j2_cond]
49 | j2 = [j2, {"full": False, "isouter": False}]
50 | my_joins.append(j2)
51 | return my_joins
52 |
53 | def soc_crosswalk_join(tbl1, tbl2, col):
54 | my_joins = []
55 | cond1 = True
56 | pums_table = None
57 | bls_table = None
58 |
59 | if tbl1.get_schema_name() == "bls":
60 | bls_table = tbl1
61 | elif tbl2.get_schema_name() == "bls":
62 | bls_table = tbl2
63 | if tbl1.get_schema_name().startswith("pums"):
64 | pums_table = tbl1
65 | elif tbl2.get_schema_name().startswith("pums"):
66 | pums_table = tbl2
67 |
68 | if pums_table and bls_table:
69 | AliasedSocCrosswalk = aliased(SocCrosswalk)
70 | cond1 = AliasedSocCrosswalk.pums_soc == pums_table.soc if tbl1 is pums_table else AliasedSocCrosswalk.bls_soc == bls_table.soc
71 | j1 = [[AliasedSocCrosswalk, cond1], {"full": False, "isouter": False}]
72 | my_joins.append(j1)
73 | j2_cond = and_(AliasedSocCrosswalk.pums_soc == pums_table.soc,
74 | AliasedSocCrosswalk.bls_soc == bls_table.soc)
75 | j2 = [[tbl2, j2_cond], {"full": False, "isouter": False}]
76 | my_joins.append(j2)
77 | else:
78 | onet_table = tbl1 if tbl1.get_schema_name() == 'onet' else tbl2
79 | other_table = pums_table or bls_table
80 | j2_cond = or_(onet_table.soc == other_table.soc,
81 | onet_table.soc == func.left(other_table.soc, 2) + '0000',
82 | onet_table.soc == func.left(other_table.soc, 3) + '000',
83 | onet_table.soc == func.left(other_table.soc, 3) + '100',
84 | onet_table.soc == func.left(other_table.soc, 5) + '0')
85 | my_joins.append([[tbl2, j2_cond], {}])
86 | return my_joins
87 |
88 | def cip_crosswalk_join(tbl1, tbl2, col):
89 | if tbl1.get_schema_name().startswith('pums'):
90 | pums_table = tbl1
91 | elif tbl2.get_schema_name().startswith('pums'):
92 | pums_table = tbl2
93 | if tbl1.get_schema_name() == 'ipeds':
94 | deeper_table = tbl1
95 | elif tbl2.get_schema_name() == 'ipeds':
96 | deeper_table = tbl2
97 | if tbl1.get_schema_name() == 'onet':
98 | deeper_table = tbl1
99 | elif tbl1.get_schema_name() == 'onet':
100 | deeper_table = tbl2
101 | direct_join = getattr(pums_table, col) == func.left(getattr(deeper_table, col), 2)
102 |
103 | my_joins = [[[tbl2, direct_join], {"full": False, "isouter": False}]]
104 | return my_joins
105 |
--------------------------------------------------------------------------------
/datausa/core/exceptions.py:
--------------------------------------------------------------------------------
1 | class DataUSAException(Exception): pass
--------------------------------------------------------------------------------
/datausa/core/models.py:
--------------------------------------------------------------------------------
1 | from datausa.core.exceptions import DataUSAException
2 | from datausa.attrs.consts import ALL, OR
3 |
4 |
5 | class BaseModel(object):
6 | median_moe = None
7 | size = None
8 | source_title = ''
9 | source_link = ''
10 | source_org = ''
11 |
12 | # def __init__(levels, moe, size):
13 | # self.supported_levels = levels
14 | # self.median_moe = moe
15 | # self.size = size
16 |
17 | @classmethod
18 | def get_supported_level(cls):
19 | return {}
20 |
21 | @classmethod
22 | def info(cls, api_obj=None):
23 | dataset = cls.source_title
24 | if api_obj and api_obj.get_year():
25 | dataset = "{} {}".format(api_obj.get_year(), dataset)
26 | return {
27 | "dataset": dataset,
28 | "org": cls.source_org,
29 | "table": cls.full_name(),
30 | "link": cls.source_link,
31 | "supported_levels": cls.get_supported_levels(),
32 | }
33 |
34 | @classmethod
35 | def full_name(cls):
36 | table_name = cls.__tablename__
37 | schema_name = cls.__table_args__["schema"]
38 | return "{}.{}".format(schema_name, table_name)
39 |
40 | @classmethod
41 | def get_schema_name(cls):
42 | return cls.__table_args__["schema"]
43 |
44 | @classmethod
45 | def col_strs(cls, short_name=False):
46 | results = [str(col) for col in cls.__table__.columns]
47 | if short_name:
48 | results = [col_name.split(".")[-1] for col_name in results]
49 | return results
50 |
51 | @classmethod
52 | def can_show(cls, attr, lvl):
53 | supported = cls.get_supported_levels()
54 | return attr in supported and lvl in supported[attr]
55 |
56 | class ApiObject(object):
57 | def __init__(self, **kwargs):
58 | allowed = ["vars_needed", "vars_and_vals", "values",
59 | "shows_and_levels", "force", "where", "order",
60 | "sort", "limit", "exclude", "auto_crosswalk",
61 | "display_names", "offset"]
62 | self._year = None
63 | self.auto_crosswalk = False
64 | self.display_names = False
65 | self.offset = None
66 | self.vars_and_vals = {}
67 | for keyword, value in kwargs.items():
68 | if keyword in allowed:
69 | setattr(self, keyword, value)
70 | else:
71 | raise DataUSAException("Invalid ApiObject attribute")
72 | if self.limit:
73 | self.limit = int(self.limit)
74 | if self.offset:
75 | self.offset = int(self.offset)
76 | self.subs = {}
77 | self.table_list = []
78 | self.warnings = []
79 | if self.exclude:
80 | self.exclude = self.exclude.split(",")
81 | if hasattr(self, "year") and self.year != ALL:
82 | self._year = self.year
83 | self.force_schema = None
84 | self.auto_crosswalk = self.auto_crosswalk in [True, 'true', '1']
85 | self.display_names = self.display_names in ['true', '1']
86 | # if not "geo" in self.shows_and_levels and "geo" in self.vars_and_vals:
87 | # if self.vars_and_vals["geo"]:
88 | # prefix = self.vars_and_vals["geo"][:3]
89 | # lookup = {"010": "nation", "040": "state", "050": "county", "310":"msa", "795":"puma", "160":"place"}
90 | # if prefix in lookup:
91 | # self.shows_and_levels["geo"] = lookup[prefix]
92 | def set_year(self, yr):
93 | self._year = str(int(yr))
94 |
95 | def get_year(self):
96 | return self._year
97 |
98 | def capture_logic(self, table_list):
99 | self.table_list = table_list
100 |
101 | def warn(self, msg):
102 | self.warnings.append(msg)
103 |
104 | def record_sub(self, tbl, col, orig_val, new_val):
105 | deltas = [{"original": ov, "replacement": nv} for ov, nv in zip(orig_val, new_val) if ov != nv]
106 |
107 | tbl_name = tbl.full_name()
108 | if tbl_name not in self.subs:
109 | self.subs[tbl_name] = {}
110 | if col not in self.subs[tbl_name]:
111 | self.subs[tbl_name][col] = {}
112 | self.subs[tbl_name][col] = deltas
113 |
114 | def where_vars(self):
115 | if not hasattr(self, "where") or not self.where:
116 | return []
117 | # split by commas
118 | wheres = self.where.split(",")
119 | # then split by colons, and take the last item after period e.g.
120 | var_names = [x.split(":")[0].split(".")[-1] for x in wheres]
121 | var_names = [x for x in var_names if x != 'sumlevel']
122 | # so where=year:2014,grads_total.degree:5 => ['year', 'degree']
123 | return var_names
124 |
--------------------------------------------------------------------------------
/datausa/core/registrar.py:
--------------------------------------------------------------------------------
1 | from datausa.pums.models import *
2 | from datausa.pums.models_5 import *
3 | from datausa.ipeds.models import *
4 | from datausa.onet.models import *
5 | from datausa.chr.models import *
6 | from datausa.bls.models import *
7 | from datausa.cbp.models import *
8 | from datausa.bea.models import *
9 | from datausa.acs.models import *
10 | from datausa.acs.automap_models import *
11 | from datausa.acs.stats_models import *
12 | from datausa.dartmouth.models import *
13 | from datausa.freight.models import *
14 | from datausa.ed.models import DefaultsYg, DefaultsYu, DefaultsYur, DefaultsYure
15 | from datausa.attrs.models import UniversityCrosswalk
16 | from datausa.opiods.models import DrugOverdoseDeathRate, OpiodOverdoseDeathRate, NonMedUsePainMeds
17 |
18 | registered_models = [
19 | # PUMS
20 | Yg, Ygd, Ygr, Ygi, Ygio,
21 | Yo, Yow, Yos, Yod, Yor, Yoas,
22 | Ygo, Ygw, Ygor, Ygs, Ygb, Ygos,
23 |
24 | Yc, Ygc, Yca, Ycd, Ycb, Yoc, Yic, Ycs,
25 | Yi, Yio, Yior, Yios, Yocd, Yid, Yir, Yis,
26 | Yiw,
27 | Ya,
28 |
29 | # PUMS 5-year tables
30 | Ygo5, Ygi5, Yoas5, Ygor5, Ygos5, Ygb5,
31 |
32 | # IPEDS
33 | TuitionYu, TuitionYc, TuitionYcu, TuitionYcs, TuitionYgs,
34 | GradsYu, GradsYcu, GradsYc, GradsYcd, GradsYgd, GradsYud, GradsYucd,
35 | GradsYg, GradsYgc, GradsYgu, GradsYgs, GradsYgcd,
36 | GradsPctYcu,
37 | UnivGeo,
38 | AdmissionsY,
39 | AdmissionsYu,
40 | EnrollmentEfaYusrle,
41 | EnrollmentEfaYusrle,
42 | EnrollmentEfaYus,
43 | EnrollmentEfaYue,
44 | EnrollmentEfaYul,
45 | EnrollmentEfaYur,
46 | LivingArrangementSfaYa, LivingArrangementSfaYu, LivingArrangementSfaYua,
47 | GradRateGrYu, GradRateGrYur, GradRateGrYus, GradRateGrYusr,
48 | FinancialsYu,
49 | AidSfaYui, UniversitySfaYu,
50 | FinancialsEndowmentQuintilesYu,
51 | RetentionEfdYu,
52 | NISSalariesYu,
53 | NISSalariesYuo,
54 | ISSalariesYu,
55 | ISSalariesYua,
56 | ISSalariesYus,
57 | ISSalariesYuas,
58 | ExpensesYu,
59 | ExpensesYue,
60 | ICLivingExpensesYua, ICMaxLivingExpensesYua,
61 | GradRateTimeframeYut,
62 | # ONET
63 | SkillByCip, SkillBySoc,
64 |
65 | # Dartmouth
66 | YgPrimaryCare, YgReimbursements, YgcPostDischarge,
67 |
68 | # County Health Rankings
69 | HealthYg,
70 |
71 | # Bureau of Labor Statistics
72 | OesYgo, QcewYgi, GrowthI16, GrowthI, GrowthO16, GrowthO, CesYi,
73 |
74 | # County Business Patterns
75 | CbpYgi, CbpYg,
76 |
77 | # BEA I/O Tables
78 | BeaUse,
79 |
80 | # ACS 1-year
81 | Acs1_Ygl_Speakers,
82 | Acs1_Ygo_Num_Emp, Acs1_Ygo_Earnings, Acs1_Ygi_Num_Emp,
83 | Acs1_Yg, Acs1_Yg_IncDist, Acs1_Yg_PovertyRace,
84 | Acs1_Yg_NatAge, Acs1_Yg_Race, Acs1_Yg_Conflict,
85 | Acs1_Yg_PropertyValue, Acs1_Yg_PropertyTax, Acs1_Yg_Vehicles,
86 | Acs1_Yg_TravelTime, Acs1_Yg_Transport,
87 | Acs1_Yg_Poverty, Acs1_Yg_Tenure, Acs1_Yg_Income,
88 | Acs1_Yg_Num_Emp,
89 | # ACS
90 | Acs5_Yg, Acs5_Yg_Income, Acs5_Yg_Conflict, Acs5_Yg_IncDist,
91 | Acs5_Ygo_Earnings,
92 | Acs5_Yg_NatAge, Acs5_Yg_Race, Acs5_Yg_Tenure, Acs5_Yg_Transport,
93 | Acs5_Yg_TravelTime, Acs5_Yg_Vehicles, Acs5_Yg_Poverty,
94 | Acs5_Yg_PropertyTax, Acs5_Yg_PropertyValue, Acs5_Ygl_Speakers,
95 | Acs5_Yg_PovertyRace,
96 | Acs5_Ygo_Num_Emp, Acs5_Ygi_Num_Emp,
97 | Acs5_Yg_Num_Emp,
98 | # ACS 3-year
99 | Acs3_Ygo_Num_Emp, Acs3_Ygi_Num_Emp, Acs3_Ygi_MedEarnings,
100 |
101 | # Stats
102 | StateStats, CountyStats, MSAStats, PlaceStats, PUMAStats,
103 |
104 | # ACS Health
105 | Acs1_Yga_Health, Acs1_Ygai_Health, Acs1_Ygais_Health,
106 | Acs1_Ygas_Health, Acs1_Ygi_Health, Acs1_Ygis_Health, Acs1_Ygs_Health,
107 |
108 | # Freight
109 | FAFYodmp, FAFYodp, FAFYomp, FAFYodm, FAFYop, FAFYdp, FAFYdm, FAFYom, FAFYod,
110 |
111 | # Loans
112 | DefaultsYu, DefaultsYg, DefaultsYur, DefaultsYure, UniversityCrosswalk,
113 |
114 | # Opiods
115 | DrugOverdoseDeathRate, OpiodOverdoseDeathRate, NonMedUsePainMeds
116 | ]
117 |
118 |
119 | def register(cls):
120 | registered_models.append(cls)
121 |
--------------------------------------------------------------------------------
/datausa/core/streaming.py:
--------------------------------------------------------------------------------
1 | '''Module to provide streaming of sqlalchemy queries back to client'''
2 | import simplejson
3 | from flask import Response
4 |
5 | def stream_qry_csv(cols, qry, api_obj):
6 | def generate():
7 | yield ','.join([col if isinstance(col, basestring) else col.key for col in cols]) + '\n'
8 | for row in qry:
9 | row = [u'"{}"'.format(x) if isinstance(x, basestring) else str(x) for x in list(row)]
10 | yield u','.join(row) + u'\n'
11 | return Response(generate(), mimetype='text/csv')
12 |
13 | def stream_qry(tables, cols, data, api_obj):
14 | ''' Based on https://github.com/al4/orlo/blob/1b3930bae4aa37eb51aed33a97c088e576cb5a99/orlo/route_api.py#L285-L311'''
15 | def generate(tables):
16 | headers = [col if isinstance(col, basestring) else col.key for col in cols]
17 | inf = float('inf')
18 |
19 | """
20 | A lagging generator to stream JSON so we don't have to hold everything in memory
21 | This is a little tricky, as we need to omit the last comma to make valid JSON,
22 | thus we use a lagging generator, similar to http://stackoverflow.com/questions/1630320/
23 | """
24 | yield u'{'
25 |
26 | rows = data.__iter__()
27 | try:
28 | prev_row = next(rows) # get first result
29 | except StopIteration:
30 | # StopIteration here means the length was zero, so yield a valid releases doc and stop
31 | yield u'''"data": [],
32 | "headers": {},
33 | "source": {},
34 | "subs": {},
35 | "limit": {},
36 | "warnings": {}
37 | '''.format(simplejson.dumps(list(headers)), simplejson.dumps([table.info(api_obj) for table in tables]), simplejson.dumps(api_obj.subs),
38 | api_obj.limit,
39 | simplejson.dumps(api_obj.warnings)) + u'}'
40 | raise StopIteration
41 |
42 | # We have some releases. First, yield the opening json
43 | yield u'"data": ['
44 |
45 | # Iterate over the releases
46 | for row in rows:
47 | yield simplejson.dumps([x if x != inf else None for x in prev_row]) + u', '
48 | prev_row = row
49 |
50 | # Now yield the last iteration without comma
51 | yield simplejson.dumps([x if x != inf else None for x in prev_row])
52 |
53 | yield u'''], "headers": {},
54 | "source": {},
55 | "subs": {},
56 | "limit": {},
57 | "warnings": {}
58 | '''.format(simplejson.dumps(list(headers)), simplejson.dumps([table.info(api_obj) for table in tables]), simplejson.dumps(api_obj.subs),
59 | api_obj.limit,
60 | simplejson.dumps(api_obj.warnings)) + u'}'
61 |
62 | return Response(generate(tables), content_type='application/json')
63 |
--------------------------------------------------------------------------------
/datausa/core/views.py:
--------------------------------------------------------------------------------
1 | from flask import Blueprint, request, jsonify
2 | from datausa.attrs.models import Cip, Naics, University
3 | from datausa.core import table_manager
4 | from datausa.core import api, join_api
5 | from datausa.core.models import ApiObject
6 | from datausa.core.crosswalker import crosswalk
7 | from datausa.util.big_places import is_big_geo
8 | from datausa.core.exceptions import DataUSAException
9 |
10 |
11 | mod = Blueprint('core', __name__, url_prefix='/api')
12 |
13 | manager = table_manager.TableManager()
14 |
15 | def show_attrs(attr_obj):
16 | attrs = attr_obj.query.all()
17 | data = [a.serialize() for a in attrs]
18 | return jsonify(data=data)
19 |
20 | def build_api_obj(default_limit=None):
21 | show = request.args.get("show", "")
22 | sumlevel = request.args.get("sumlevel", "").lower()
23 | required = request.args.get("required", "")
24 | force = request.args.get("force", "")
25 | where = request.args.get("where", "")
26 | order = request.args.get("order", "")
27 | sort = request.args.get("sort", "")
28 | limit = request.args.get("limit", default_limit)
29 | offset = request.args.get("offset", None)
30 | exclude = request.args.get("exclude", None)
31 | auto_crosswalk = request.args.get("auto_crosswalk", False)
32 | display_names = request.args.get("display_names", False)
33 |
34 | shows = show.split(",")
35 | sumlevels = sumlevel.split(",")
36 | if shows and not sumlevel:
37 | sumlevels = ["all" for show in shows]
38 | values = required.split(",") if required else []
39 |
40 | shows_and_levels = {val:sumlevels[idx] for idx, val in enumerate(shows)}
41 |
42 | variables = manager.possible_variables
43 | vars_and_vals = {var:request.args.get(var, None) for var in variables}
44 | vars_and_vals = {k:v for k,v in vars_and_vals.items() if v}
45 |
46 |
47 | vars_needed = vars_and_vals.keys() + shows + values
48 | api_obj = ApiObject(vars_needed=vars_needed, vars_and_vals=vars_and_vals,
49 | shows_and_levels=shows_and_levels, values=values,
50 | where=where, force=force, order=order,
51 | sort=sort, limit=limit, exclude=exclude,
52 | auto_crosswalk=auto_crosswalk,
53 | display_names=display_names,
54 | offset=offset)
55 | return api_obj
56 |
57 | @mod.route("/")
58 | @mod.route("/v1/")
59 | @mod.route("/csv/", defaults={'csv': True})
60 | def api_view(csv=None):
61 | api_obj = build_api_obj()
62 | api_obj = manager.force_1yr_for_big_places(api_obj)
63 | api_obj = manager.schema_selector(api_obj)
64 | table_list = manager.all_tables(api_obj)
65 | table = manager.select_best(table_list, api_obj)
66 | api_obj.capture_logic(table_list)
67 | api_obj = manager.crosswalk(table, api_obj)
68 | data = api.query(table, api_obj, stream=csv)
69 | return data
70 |
71 | @mod.route("/join/")
72 | @mod.route("/join/csv/", defaults={'csv': True})
73 | def api_join_view(csv=None):
74 | api_obj = build_api_obj(default_limit=500)
75 | if api_obj.limit and api_obj.limit > 80000:
76 | raise DataUSAException("Limit parameter must be less than 80,000")
77 | tables = manager.required_tables(api_obj)
78 | data = join_api.joinable_query(tables, api_obj, manager.table_years, csv_format=csv)
79 | return data
80 |
81 |
82 | @mod.route("/logic/")
83 | def logic_view():
84 | api_obj = build_api_obj()
85 | table_list = manager.all_tables(api_obj)
86 | return jsonify(tables=[table.info(api_obj) for table in table_list])
87 |
88 | @mod.route("/variables/")
89 | def view_variables():
90 | '''show available data tables and contained variables'''
91 | shows = request.args.get("show", "").split(",")
92 | sumlevels = request.args.get("sumlevel", "").split(",")
93 | list_all = sumlevels == [""] and shows == [""]
94 | if sumlevels == [""]:
95 | sumlevels = ["all"] * len(shows)
96 | combos = zip(shows, sumlevels)
97 | results = {table.full_name(): table.col_strs(short_name=True) for table in table_manager.registered_models
98 | if list_all or all([table.can_show(show, sumlevel) for show,sumlevel in combos])}
99 | return jsonify(metadata=results)
100 |
101 |
102 | @mod.route('/table/variables/')
103 | def all_table_vars():
104 | '''show all available data tables and contained variables'''
105 | results = {table.full_name(): table.col_strs(short_name=True) for table in table_manager.registered_models}
106 | return jsonify(metadata=results)
107 |
108 | @mod.route("/years/")
109 | def years_view():
110 | years_data = manager.table_years_set
111 | return jsonify(data=years_data)
112 |
--------------------------------------------------------------------------------
/datausa/dartmouth/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataUSA/datausa-api/7288dede082eda07b61e11cf6dc801fe692f6334/datausa/dartmouth/__init__.py
--------------------------------------------------------------------------------
/datausa/dartmouth/models.py:
--------------------------------------------------------------------------------
1 | from datausa.database import db
2 | from datausa.attrs.models import Geo
3 | from datausa import cache
4 |
5 | from datausa.core.models import BaseModel
6 | from datausa.attrs.consts import NATION, STATE, COUNTY, ALL
7 | from sqlalchemy.ext.declarative import declared_attr
8 | from sqlalchemy.ext.automap import automap_base
9 | from sqlalchemy import MetaData
10 |
11 | SCHEMA_NAME = 'dartmouth'
12 | CACHE_KEY = '{}_metadata'.format(SCHEMA_NAME)
13 | metadata = cache.get(CACHE_KEY)
14 | if not metadata:
15 | metadata = MetaData(schema=SCHEMA_NAME, bind=db.engine)
16 | metadata.reflect()
17 | cache.set(CACHE_KEY, metadata)
18 |
19 | AutomapBase = automap_base(bind=db.engine, metadata=metadata)
20 |
21 |
22 | class DartmouthBase(db.Model, BaseModel):
23 | __abstract__ = True
24 | __table_args__ = {"schema": SCHEMA_NAME, "extend_existing": True}
25 | source_title = 'Dartmouth Atlas of Health Care'
26 | source_link = 'http://www.dartmouthatlas.org'
27 | source_org = 'Dartmouth College'
28 |
29 | @declared_attr
30 | def year(cls):
31 | return db.Column(db.Integer(), primary_key=True)
32 |
33 | @declared_attr
34 | def geo(cls):
35 | return db.Column(db.String(), db.ForeignKey(Geo.id), primary_key=True)
36 |
37 | @classmethod
38 | def get_supported_levels(cls):
39 | return {"geo": [ALL, NATION, STATE, COUNTY]}
40 |
41 | @classmethod
42 | def geo_filter(cls, level):
43 | if level == ALL:
44 | return True
45 | level_map = {STATE: "040", COUNTY: "050", NATION: "010"}
46 | level_code = level_map[level]
47 | return cls.geo.startswith(level_code)
48 |
49 |
50 | class YgcPostDischarge(AutomapBase, DartmouthBase):
51 | __tablename__ = 'ygc_post_discharge'
52 | median_moe = 2
53 |
54 | cohort = db.Column(db.String(), primary_key=True)
55 |
56 | @classmethod
57 | def get_supported_levels(cls):
58 | return {"geo": [ALL, NATION, STATE, COUNTY], "cohort": [ALL]}
59 |
60 | class YgPrimaryCare(AutomapBase, DartmouthBase):
61 | __tablename__ = 'yg_prim_care_access'
62 | median_moe = 1
63 |
64 |
65 | class YgReimbursements(AutomapBase, DartmouthBase):
66 | __tablename__ = 'yg_reimbursements'
67 | median_moe = 1
68 |
69 |
70 | AutomapBase.prepare(db.engine, reflect=False)
71 |
--------------------------------------------------------------------------------
/datausa/database.py:
--------------------------------------------------------------------------------
1 | from datausa import app
2 | from flask_sqlalchemy import SQLAlchemy
3 | db = SQLAlchemy(app)
4 |
--------------------------------------------------------------------------------
/datausa/ed/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataUSA/datausa-api/7288dede082eda07b61e11cf6dc801fe692f6334/datausa/ed/__init__.py
--------------------------------------------------------------------------------
/datausa/ed/models.py:
--------------------------------------------------------------------------------
1 | from datausa.database import db
2 | from datausa.core.models import BaseModel
3 | from sqlalchemy.ext.declarative import declared_attr
4 | from datausa.attrs.consts import ALL
5 | from datausa.attrs.models import UniversityCrosswalk, Geo
6 | from sqlalchemy.orm import column_property
7 | from datausa.attrs import consts
8 |
9 |
10 | class GeoId(object):
11 | LEVELS = [consts.NATION, consts.STATE, consts.COUNTY, consts.ALL]
12 |
13 | @declared_attr
14 | def geo(cls):
15 | return db.Column(db.String(), db.ForeignKey(Geo.id), primary_key=True)
16 |
17 | @classmethod
18 | def get_supported_levels(cls):
19 | return {consts.GEO: GeoId.LEVELS}
20 |
21 | @classmethod
22 | def geo_filter(cls, level):
23 | if level == ALL:
24 | return True
25 | level_map = {
26 | consts.NATION: "010",
27 | consts.STATE: "040",
28 | consts.COUNTY: "050",
29 | consts.MSA: "310"
30 | }
31 | level_code = level_map[level]
32 | return cls.geo.startswith(level_code)
33 |
34 |
35 | class BaseEd(db.Model, BaseModel):
36 | __abstract__ = True
37 | __table_args__ = {"schema": "ed"}
38 | supported_levels = {"year": [ALL]}
39 | source_title = 'Official Cohort Default Rates for Schools'
40 | source_link = 'https://www2.ed.gov/offices/OSFAP/defaultmanagement/cdr.html'
41 | source_org = 'Department of Education'
42 |
43 | default_rate = db.Column(db.Float)
44 | num_defaults = db.Column(db.Integer)
45 | num_borrowers = db.Column(db.Integer)
46 |
47 |
48 | class UniversityCols(object):
49 | @declared_attr
50 | def opeid(cls):
51 | return db.Column(db.String(), primary_key=True)
52 |
53 | @declared_attr
54 | def university(cls):
55 | return column_property(UniversityCrosswalk.university)
56 |
57 | @classmethod
58 | def crosswalk_join(cls, qry):
59 | cond = UniversityCrosswalk.opeid6 == cls.opeid
60 | return qry.join(UniversityCrosswalk, cond)
61 |
62 |
63 | class DefaultsYu(BaseEd, UniversityCols):
64 | __tablename__ = "yu_defaults"
65 | median_moe = 1
66 |
67 | year = db.Column(db.Integer(), primary_key=True)
68 | rate_type = db.Column(db.String())
69 | default_rate = db.Column(db.Float)
70 | num_defaults = db.Column(db.Integer)
71 | num_borrowers = db.Column(db.Integer)
72 |
73 | @classmethod
74 | def get_supported_levels(cls):
75 | return {
76 | "year": [ALL],
77 | "university": [ALL],
78 | "opeid": [ALL],
79 | }
80 |
81 |
82 | class DefaultsYg(BaseEd, GeoId):
83 | __tablename__ = "yg_defaults"
84 | median_moe = 1.1
85 |
86 | year = db.Column(db.Integer(), primary_key=True)
87 | rate_type = db.Column(db.String())
88 | default_rate = db.Column(db.Float)
89 | num_defaults = db.Column(db.Integer)
90 | num_borrowers = db.Column(db.Integer)
91 |
92 | @classmethod
93 | def get_supported_levels(cls):
94 | return {
95 | "year": [ALL],
96 | "geo": GeoId.LEVELS
97 | }
98 |
99 |
100 | class DefaultsYur(BaseEd, UniversityCols):
101 | __tablename__ = "yur_defaults"
102 | median_moe = 2
103 |
104 | year = db.Column(db.Integer(), primary_key=True)
105 | rate_type = db.Column(db.String(), primary_key=True)
106 | default_rate = db.Column(db.Float)
107 | num_defaults = db.Column(db.Integer)
108 | num_borrowers = db.Column(db.Integer)
109 |
110 | @classmethod
111 | def get_supported_levels(cls):
112 | return {
113 | "year": [ALL],
114 | "university": [ALL],
115 | "opeid": [ALL],
116 | "rate_type": [ALL]
117 | }
118 |
119 |
120 | class DefaultsYure(BaseEd, UniversityCols):
121 | __tablename__ = "yure_defaults"
122 | median_moe = 3
123 |
124 | year = db.Column(db.Integer(), primary_key=True)
125 | rate_type = db.Column(db.String(), primary_key=True)
126 | ethnic_code = db.Column(db.Integer(), primary_key=True)
127 |
128 | @classmethod
129 | def get_supported_levels(cls):
130 | return {
131 | "year": [ALL],
132 | "university": [ALL],
133 | "opeid": [ALL],
134 | "rate_type": [ALL],
135 | "ethnic_code": [ALL],
136 | }
137 |
--------------------------------------------------------------------------------
/datausa/freight/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataUSA/datausa-api/7288dede082eda07b61e11cf6dc801fe692f6334/datausa/freight/__init__.py
--------------------------------------------------------------------------------
/datausa/freight/models.py:
--------------------------------------------------------------------------------
1 | from datausa.database import db
2 | from datausa.core.models import BaseModel
3 | from sqlalchemy.ext.declarative import declared_attr
4 | from datausa.attrs.consts import STATE, COUNTY, ALL
5 | from datausa.attrs.models import Geo, Sctg, ProductCrosswalk
6 | from sqlalchemy.orm import column_property
7 |
8 | class BaseFreight(db.Model, BaseModel):
9 | __abstract__ = True
10 | __table_args__ = {"schema": "freight"}
11 | supported_levels = {}
12 | source_title = 'Freight Analysis Framework'
13 | source_link = 'https://www.rita.dot.gov/bts/sites/rita.dot.gov.bts/files/subject_areas/freight_transportation/faf'
14 | source_org = 'Bureau of Transportation Statistics'
15 | tons = db.Column(db.Float)
16 | millions_of_2012_dollars = db.Column(db.Float)
17 |
18 |
19 | class Product(object):
20 | @declared_attr
21 | def sctg(cls):
22 | return db.Column(db.String(), db.ForeignKey(Sctg.id), primary_key=True)
23 |
24 | @declared_attr
25 | def napcs(cls):
26 | return column_property(ProductCrosswalk.napcs)
27 |
28 | @classmethod
29 | def crosswalk_join(cls, qry):
30 | cond = ProductCrosswalk.sctg == cls.sctg
31 | return qry.join(ProductCrosswalk, cond)
32 |
33 | class OriginGeo(object):
34 | @declared_attr
35 | def origin_geo(cls):
36 | return db.Column(db.String(), db.ForeignKey(Geo.id), primary_key=True)
37 |
38 | @classmethod
39 | def origin_geo_filter(cls, level):
40 | if level == ALL:
41 | return True
42 | level_map = {STATE: "040", COUNTY: "050"}
43 | level_code = level_map[level]
44 | return cls.origin_geo.startswith(level_code)
45 |
46 | class DestGeo(object):
47 | @declared_attr
48 | def destination_geo(cls):
49 | return db.Column(db.String(), db.ForeignKey(Geo.id), primary_key=True)
50 |
51 | @classmethod
52 | def destination_geo_filter(cls, level):
53 | if level == ALL:
54 | return True
55 | level_map = {STATE: "040", COUNTY: "050"}
56 | level_code = level_map[level]
57 | return cls.destination_geo.startswith(level_code)
58 |
59 |
60 | class FAFYodmp(BaseFreight, OriginGeo, DestGeo, Product):
61 | __tablename__ = "yodmp_faf"
62 | median_moe = 4
63 | year = db.Column(db.Integer(), primary_key=True)
64 | transportation_mode = db.Column(db.String(), primary_key=True)
65 |
66 | @classmethod
67 | def get_supported_levels(cls):
68 | return {
69 | "origin_geo": [STATE, COUNTY, ALL],
70 | "destination_geo": [STATE, COUNTY, ALL],
71 | "transportation_mode": [ALL],
72 | "sctg": [ALL],
73 | "napcs": [ALL]
74 | }
75 |
76 | class FAFYodm(BaseFreight, OriginGeo, DestGeo):
77 | __tablename__ = "yodm_faf"
78 | median_moe = 3
79 | year = db.Column(db.Integer(), primary_key=True)
80 | transportation_mode = db.Column(db.String(), primary_key=True)
81 |
82 | @classmethod
83 | def get_supported_levels(cls):
84 | return {
85 | "origin_geo": [STATE, COUNTY, ALL],
86 | "destination_geo": [STATE, COUNTY, ALL],
87 | "transportation_mode": [ALL]
88 | }
89 |
90 | class FAFYod(BaseFreight, OriginGeo, DestGeo):
91 | __tablename__ = "yod_faf"
92 | median_moe = 2
93 | year = db.Column(db.Integer(), primary_key=True)
94 |
95 | @classmethod
96 | def get_supported_levels(cls):
97 | return {
98 | "origin_geo": [STATE, COUNTY, ALL],
99 | "destination_geo": [STATE, COUNTY, ALL]
100 | }
101 |
102 |
103 | class FAFYodp(BaseFreight, OriginGeo, DestGeo, Product):
104 | __tablename__ = "yodp_faf"
105 | median_moe = 3
106 | year = db.Column(db.Integer(), primary_key=True)
107 |
108 | @classmethod
109 | def get_supported_levels(cls):
110 | return {
111 | "origin_geo": [STATE, COUNTY, ALL],
112 | "destination_geo": [STATE, COUNTY, ALL],
113 | "sctg": [ALL],
114 | "napcs": [ALL]
115 | }
116 |
117 | class FAFYomp(BaseFreight, OriginGeo, Product):
118 | __tablename__ = "yomp_faf"
119 | median_moe = 3
120 | year = db.Column(db.Integer(), primary_key=True)
121 | transportation_mode = db.Column(db.String(), primary_key=True)
122 |
123 | @classmethod
124 | def get_supported_levels(cls):
125 | return {
126 | "origin_geo": [STATE, COUNTY, ALL],
127 | "transportation_mode": [ALL],
128 | "sctg": [ALL],
129 | "napcs": [ALL]
130 | }
131 |
132 | class FAFYop(BaseFreight, OriginGeo, Product):
133 | __tablename__ = "yop_faf"
134 | median_moe = 2
135 | year = db.Column(db.Integer(), primary_key=True)
136 |
137 | @classmethod
138 | def get_supported_levels(cls):
139 | return {
140 | "origin_geo": [STATE, COUNTY, ALL],
141 | "sctg": [ALL],
142 | "napcs": [ALL]
143 | }
144 |
145 | class FAFYdp(BaseFreight, DestGeo, Product):
146 | __tablename__ = "ydp_faf"
147 | median_moe = 2
148 | year = db.Column(db.Integer(), primary_key=True)
149 |
150 | @classmethod
151 | def get_supported_levels(cls):
152 | return {
153 | "destination_geo": [STATE, COUNTY, ALL],
154 | "sctg": [ALL],
155 | "napcs": [ALL]
156 | }
157 |
158 | class FAFYdm(BaseFreight, DestGeo):
159 | __tablename__ = "ydm_faf"
160 | median_moe = 2
161 | year = db.Column(db.Integer(), primary_key=True)
162 | transportation_mode = db.Column(db.String(), primary_key=True)
163 |
164 | @classmethod
165 | def get_supported_levels(cls):
166 | return {
167 | "destination_geo": [STATE, COUNTY, ALL],
168 | "sctg": [ALL],
169 | "transportation_mode": [ALL]
170 | }
171 |
172 | class FAFYom(BaseFreight, OriginGeo):
173 | __tablename__ = "yom_faf"
174 | median_moe = 2
175 | year = db.Column(db.Integer(), primary_key=True)
176 | transportation_mode = db.Column(db.String(), primary_key=True)
177 |
178 | @classmethod
179 | def get_supported_levels(cls):
180 | return {
181 | "origin_geo": [STATE, COUNTY, ALL],
182 | "sctg": [ALL],
183 | "transportation_mode": [ALL]
184 | }
185 |
--------------------------------------------------------------------------------
/datausa/ipeds/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataUSA/datausa-api/7288dede082eda07b61e11cf6dc801fe692f6334/datausa/ipeds/__init__.py
--------------------------------------------------------------------------------
/datausa/ipeds/abstract_models.py:
--------------------------------------------------------------------------------
1 | from datausa.database import db
2 | from datausa.attrs.models import University, Cip, Geo, EnrollmentStatus
3 | from datausa.attrs.models import Degree, Sector, LStudy, IPedsRace, IPedsOcc
4 | from datausa.attrs.models import LivingArrangement, IncomeRange, AcademicRank
5 | from datausa.attrs.models import IPedsExpense
6 | from datausa.core.models import BaseModel
7 | from datausa.attrs.consts import NATION, STATE, COUNTY, MSA
8 | from datausa.attrs.consts import PUMA, PLACE, ALL, GEO
9 |
10 | from sqlalchemy.ext.declarative import declared_attr
11 | from sqlalchemy.sql import func
12 |
13 |
14 | class BaseIpeds(db.Model, BaseModel):
15 | __abstract__ = True
16 | __table_args__ = {"schema": "ipeds"}
17 | supported_levels = {}
18 | source_title = 'NCES IPEDS'
19 | source_link = 'http://nces.ed.gov/ipeds/'
20 | source_org = 'Department of Education'
21 |
22 |
23 | class Enrollment(BaseIpeds):
24 | __abstract__ = True
25 |
26 | enrolled_total = db.Column(db.Integer())
27 | enrolled_men = db.Column(db.Integer())
28 | enrolled_women = db.Column(db.Integer())
29 | enrolled_black = db.Column(db.Integer())
30 | enrolled_asian = db.Column(db.Integer())
31 | enrolled_native = db.Column(db.Integer())
32 | enrolled_unknown = db.Column(db.Integer())
33 |
34 |
35 | class Tuition(BaseIpeds):
36 | __abstract__ = True
37 |
38 | oos_tuition = db.Column(db.Integer())
39 | state_tuition = db.Column(db.Integer())
40 | district_tuition = db.Column(db.Integer())
41 |
42 | oos_fee = db.Column(db.Integer())
43 | state_fee = db.Column(db.Integer())
44 | district_fee = db.Column(db.Integer())
45 |
46 | oos_tuition_grads = db.Column(db.Integer())
47 | state_tuition_grads = db.Column(db.Integer())
48 | district_tuition_grads = db.Column(db.Integer())
49 |
50 | oos_fee_grads = db.Column(db.Integer())
51 | state_fee_grads = db.Column(db.Integer())
52 | district_fee_grads = db.Column(db.Integer())
53 |
54 | class GradsPct(BaseIpeds):
55 | __abstract__ = True
56 | pct_total = db.Column(db.Float())
57 | pct_men = db.Column(db.Float())
58 | pct_women = db.Column(db.Float())
59 |
60 |
61 | class Grads(BaseIpeds):
62 | __abstract__ = True
63 | grads_total = db.Column(db.Integer())
64 | grads_men = db.Column(db.Integer())
65 | grads_women = db.Column(db.Integer())
66 | grads_native = db.Column(db.Integer())
67 | grads_native_men = db.Column(db.Integer())
68 | grads_native_women = db.Column(db.Integer())
69 | grads_asian = db.Column(db.Integer())
70 | grads_asian_men = db.Column(db.Integer())
71 | grads_asian_women = db.Column(db.Integer())
72 | grads_black = db.Column(db.Integer())
73 | grads_black_men = db.Column(db.Integer())
74 | grads_black_women = db.Column(db.Integer())
75 | grads_hispanic = db.Column(db.Integer())
76 | grads_hispanic_men = db.Column(db.Integer())
77 | grads_hispanic_women = db.Column(db.Integer())
78 | grads_hawaiian = db.Column(db.Integer())
79 | grads_hawaiian_men = db.Column(db.Integer())
80 | grads_hawaiian_women = db.Column(db.Integer())
81 | grads_white = db.Column(db.Integer())
82 | grads_white_men = db.Column(db.Integer())
83 | grads_white_women = db.Column(db.Integer())
84 | grads_multi = db.Column(db.Integer())
85 | grads_multi_men = db.Column(db.Integer())
86 | grads_multi_women = db.Column(db.Integer())
87 | grads_unknown = db.Column(db.Integer())
88 | grads_unknown_men = db.Column(db.Integer())
89 | grads_unknown_women = db.Column(db.Integer())
90 | grads_nonresident = db.Column(db.Integer())
91 | grads_nonresident_men = db.Column(db.Integer())
92 | grads_nonresident_women = db.Column(db.Integer())
93 |
94 |
95 | class GeoId(object):
96 | LEVELS = [NATION, STATE, COUNTY, PLACE, MSA, PUMA, ALL]
97 |
98 | @declared_attr
99 | def geo(cls):
100 | return db.Column(db.String(), db.ForeignKey(Geo.id), primary_key=True)
101 |
102 | @classmethod
103 | def get_supported_levels(cls):
104 | return {GEO: GeoId.LEVELS}
105 |
106 | @classmethod
107 | def geo_filter(cls, level):
108 | if level == ALL:
109 | return True
110 | level_map = {NATION: "010", STATE: "040", PUMA: "795",
111 | COUNTY: "050", MSA: "310", PLACE: "160"}
112 | level_code = level_map[level]
113 | return cls.geo.startswith(level_code)
114 |
115 |
116 | class CipId(object):
117 | LEVELS = ["2", "4", "6", "all"]
118 |
119 | @declared_attr
120 | def cip(cls):
121 | return db.Column(db.String(), db.ForeignKey(Cip.id), primary_key=True)
122 |
123 | @classmethod
124 | def get_supported_levels(cls):
125 | return {"cip": ["all", "2", "4", "6"]}
126 |
127 | @classmethod
128 | def cip_filter(cls, level):
129 | if level == 'all':
130 | return True
131 | return func.length(cls.cip) == level
132 |
133 |
134 | class UniversityId(object):
135 | LEVELS = ["all", "0", "1", "2"]
136 | # TODO add university level filter ...
137 |
138 | @declared_attr
139 | def university(cls):
140 | return db.Column(db.String(), db.ForeignKey(University.id), primary_key=True)
141 |
142 | @declared_attr
143 | def university_level(cls):
144 | return db.Column(db.Integer())
145 |
146 | @classmethod
147 | def get_supported_levels(cls):
148 | return {"university": UniversityId.LEVELS}
149 |
150 | @classmethod
151 | def university_filter(cls, level):
152 | if level == 'all':
153 | return True
154 | return cls.university_level == level
155 |
156 |
157 | class LStudyId(object):
158 | @declared_attr
159 | def lstudy(cls):
160 | return db.Column(db.String(), db.ForeignKey(LStudy.id), primary_key=True)
161 |
162 |
163 | class EnrollmentStatusId(object):
164 | @declared_attr
165 | def enrollment_status(cls):
166 | return db.Column(db.String(), db.ForeignKey(EnrollmentStatus.id), primary_key=True)
167 |
168 |
169 | class DegreeId(object):
170 | @declared_attr
171 | def degree(cls):
172 | return db.Column(db.String(), db.ForeignKey(Degree.id), primary_key=True)
173 |
174 |
175 | class SectorId(object):
176 | @declared_attr
177 | def sector(cls):
178 | return db.Column(db.String(), db.ForeignKey(Sector.id), primary_key=True)
179 |
180 | @classmethod
181 | def get_supported_levels(cls):
182 | return {"sector": ["all"]}
183 |
184 |
185 | class Admissions(BaseIpeds):
186 | __abstract__ = True
187 | applicants_total = db.Column(db.Float)
188 | applicants_men = db.Column(db.Float)
189 | applicants_women = db.Column(db.Float)
190 | admissions_total = db.Column(db.Float)
191 | admissions_men = db.Column(db.Float)
192 | admissions_women = db.Column(db.Float)
193 | admissions_enrolled_total = db.Column(db.Float)
194 | admissions_enrolled_men = db.Column(db.Float)
195 | admissions_enrolled_women = db.Column(db.Float)
196 | admissions_enrolled_ft_total = db.Column(db.Float)
197 | admissions_enrolled_ft_men = db.Column(db.Float)
198 | admissions_enrolled_ft_women = db.Column(db.Float)
199 | admissions_enrolled_pt_total = db.Column(db.Float)
200 | admissions_enrolled_pt_men = db.Column(db.Float)
201 | admissions_enrolled_pt_women = db.Column(db.Float)
202 | sub_sat_scores_num = db.Column(db.Float)
203 | sub_act_scores_num = db.Column(db.Float)
204 | sub_sat_scores_pct = db.Column(db.Float)
205 | sub_act_scores_pct = db.Column(db.Float)
206 | sat_cr_25 = db.Column(db.Float)
207 | sat_cr_75 = db.Column(db.Float)
208 | sat_math_25 = db.Column(db.Float)
209 | sat_math_75 = db.Column(db.Float)
210 | sat_writing_25 = db.Column(db.Float)
211 | sat_writing_75 = db.Column(db.Float)
212 | act_composite_25 = db.Column(db.Float)
213 | act_composite_75 = db.Column(db.Float)
214 | act_english_25 = db.Column(db.Float)
215 | act_english_75 = db.Column(db.Float)
216 | act_math_25 = db.Column(db.Float)
217 | act_math_75 = db.Column(db.Float)
218 | act_writing_25 = db.Column(db.Float)
219 | act_writing_75 = db.Column(db.Float)
220 | yield_total = db.Column(db.Float)
221 | yield_men = db.Column(db.Float)
222 | yield_women = db.Column(db.Float)
223 |
224 |
225 | class IPedsRaceId(object):
226 | @declared_attr
227 | def ipeds_race(cls):
228 | return db.Column(db.String(), db.ForeignKey(IPedsRace.id), primary_key=True)
229 |
230 |
231 | class EnrollmentEfa(BaseIpeds):
232 | __abstract__ = True
233 | num_enrolled = db.Column(db.Float)
234 |
235 |
236 | class LivingArrangementId(object):
237 | @declared_attr
238 | def living_arrangement(cls):
239 | return db.Column(db.String(), db.ForeignKey(LivingArrangement.id), primary_key=True)
240 |
241 |
242 | class IncomeRangeId(object):
243 | @declared_attr
244 | def income_range(cls):
245 | return db.Column(db.String(), db.ForeignKey(IncomeRange.id), primary_key=True)
246 |
247 |
248 | class SfaLivingBase(BaseIpeds):
249 | __abstract__ = True
250 | num_in_living_arrangement = db.Column(db.Float)
251 |
252 |
253 | class GradRateBase(BaseIpeds):
254 | __abstract__ = True
255 | grad_rate = db.Column(db.Float)
256 | cohort_size = db.Column(db.Float)
257 | num_finishers = db.Column(db.Float)
258 |
259 |
260 | class FinancialsBase(BaseIpeds):
261 | __abstract__ = True
262 | endowment_value_fiscal_year_end = db.Column(db.Float)
263 | federal_grants_and_contracts = db.Column(db.Float)
264 | investment_income = db.Column(db.Float)
265 | local_grants = db.Column(db.Float)
266 | local_grants_and_contracts = db.Column(db.Float)
267 | other_federal_grants = db.Column(db.Float)
268 | pell_grants = db.Column(db.Float)
269 | private_grants = db.Column(db.Float)
270 | research_rank = db.Column(db.Float)
271 | research_rank_carnegie = db.Column(db.Float)
272 | research_rank_carnegie_pct = db.Column(db.Float)
273 | research_rank_pct = db.Column(db.Float)
274 | research_total = db.Column(db.Float)
275 | state_grants = db.Column(db.Float)
276 | state_grants_and_contracts = db.Column(db.Float)
277 | total_expenses = db.Column(db.Float)
278 | tuition_and_fees = db.Column(db.Float)
279 | total_salaries = db.Column(db.Float)
280 |
281 |
282 | class ExpensesBase(BaseIpeds):
283 | __abstract__ = True
284 | benefits_expense = db.Column(db.Float)
285 | dep_expense = db.Column(db.Float)
286 | interest_expense = db.Column(db.Float)
287 | ops_expense = db.Column(db.Float)
288 | other_expense = db.Column(db.Float)
289 | salaries_expense = db.Column(db.Float)
290 |
291 |
292 | class NISSalariesBase(BaseIpeds):
293 | __abstract__ = True
294 | num_noninstructional_staff = db.Column(db.Float)
295 | outlays_noninstructional_staff = db.Column(db.Float)
296 |
297 |
298 | class ISSalariesBase(BaseIpeds):
299 | __abstract__ = True
300 | num_instructional_staff = db.Column(db.Float)
301 | outlays_instructional_staff = db.Column(db.Float)
302 | months_covered_instructional_staff = db.Column(db.Float)
303 |
304 |
305 | class IPedsOccId(object):
306 | @declared_attr
307 | def ipeds_occ(cls):
308 | return db.Column(db.String(), db.ForeignKey(IPedsOcc.id), primary_key=True)
309 |
310 |
311 | class AcademicRankId(object):
312 | @declared_attr
313 | def academic_rank(cls):
314 | return db.Column(db.String(), db.ForeignKey(AcademicRank.id), primary_key=True)
315 |
316 |
317 | class IPedsExpenseId(object):
318 | @declared_attr
319 | def ipeds_expense(cls):
320 | return db.Column(db.String(), db.ForeignKey(IPedsExpense.id), primary_key=True)
321 |
--------------------------------------------------------------------------------
/datausa/onet/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataUSA/datausa-api/7288dede082eda07b61e11cf6dc801fe692f6334/datausa/onet/__init__.py
--------------------------------------------------------------------------------
/datausa/onet/models.py:
--------------------------------------------------------------------------------
1 | from datausa.database import db
2 | from datausa.attrs.models import Skill
3 | from datausa.ipeds.abstract_models import CipId
4 | from datausa.core.models import BaseModel
5 | from sqlalchemy.ext.declarative import declared_attr
6 |
7 | class BaseOnet(db.Model, BaseModel):
8 | __abstract__ = True
9 | __table_args__ = {"schema": "onet"}
10 | supported_levels = {}
11 | source_title = 'O*NET'
12 | source_link = 'http://www.onetonline.org/'
13 | source_org = 'Department of Labor'
14 |
15 | class SkillId(object):
16 | @declared_attr
17 | def skill(cls):
18 | return db.Column(db.String(), db.ForeignKey(Skill.id), primary_key=True)
19 |
20 | @classmethod
21 | def get_supported_levels(cls):
22 | return {"cip": ["2", "4", "6"]}
23 |
24 | class SkillByCip(BaseOnet, SkillId, CipId):
25 | __tablename__ = "skills_by_cip"
26 | median_moe = 1
27 |
28 | value = db.Column(db.Float)
29 | value_rca = db.Column(db.Float)
30 |
31 | @classmethod
32 | def get_supported_levels(cls):
33 | return {"cip": ["2", "4", "6", "all"], "skill": ["all"]}
34 |
35 | class SkillBySoc(BaseOnet, SkillId):
36 | __tablename__ = "skills_by_soc"
37 | median_moe = 1
38 |
39 | soc = db.Column(db.String(), primary_key=True)
40 | value = db.Column(db.Float)
41 | value_rca = db.Column(db.Float)
42 |
43 | @classmethod
44 | def get_supported_levels(cls):
45 | return {"soc": ["all"], "skill": ["all"]}
46 |
--------------------------------------------------------------------------------
/datausa/opiods/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataUSA/datausa-api/7288dede082eda07b61e11cf6dc801fe692f6334/datausa/opiods/__init__.py
--------------------------------------------------------------------------------
/datausa/opiods/models.py:
--------------------------------------------------------------------------------
1 | from datausa.database import db
2 | from datausa.core.models import BaseModel
3 | from datausa.attrs.consts import NATION, STATE, ALL
4 | from datausa.attrs.models import Geo
5 | from sqlalchemy.ext.declarative import declared_attr
6 |
7 |
8 | class BaseOpiods(db.Model, BaseModel):
9 | __abstract__ = True
10 | __table_args__ = {"schema": "opiods"}
11 | supported_levels = {"year": [ALL]}
12 | source_title = 'Kaiser Family Foundation analysis of Centers for Disease Control and Prevention (CDC), National Center for Health Statistics'
13 | source_link = 'https://www.kff.org/other/state-indicator/opioid-overdose-death-rates/'
14 | source_org = 'Kaiser Family Foundation State Health Facts'
15 |
16 | @classmethod
17 | def get_supported_levels(cls):
18 | return {
19 | "year": [ALL],
20 | "geo": [ALL, NATION, STATE]
21 | }
22 |
23 | @classmethod
24 | def geo_filter(cls, level):
25 | if level == ALL:
26 | return True
27 | level_map = {STATE: "040", NATION: "010"}
28 | level_code = level_map[level]
29 | return cls.geo.startswith(level_code)
30 |
31 | @declared_attr
32 | def geo(cls):
33 | return db.Column(db.String(), db.ForeignKey(Geo.id), primary_key=True)
34 |
35 |
36 | class DrugOverdoseDeathRate(BaseOpiods):
37 | __tablename__ = "drug_overdose_deathrate"
38 | median_moe = 1
39 | source_link = 'https://www.kff.org/other/state-indicator/opioid-overdose-death-rates/?currentTimeframe=0&sortModel=%7B%22colId%22:%22Location%22,%22sort%22:%22asc%22%7D'
40 | year = db.Column(db.Integer(), primary_key=True)
41 | drug_overdose_ageadjusted = db.Column(db.String())
42 |
43 |
44 | class OpiodOverdoseDeathRate(BaseOpiods):
45 | __tablename__ = "opioid_overdose_deathrate"
46 | median_moe = 1
47 | source_link = 'https://www.kff.org/other/state-indicator/opioid-overdose-death-rates/?currentTimeframe=0&sortModel=%7B%22colId%22:%22Location%22,%22sort%22:%22asc%22%7D'
48 | year = db.Column(db.Integer(), primary_key=True)
49 | opioid_overdose_deathrate_ageadjusted = db.Column(db.String())
50 |
51 |
52 | class NonMedUsePainMeds(BaseOpiods):
53 | __tablename__ = "non_medical_use_of_pain_releivers"
54 | median_moe = 1
55 | source_title = 'National Survey on Drug Use and Health'
56 | source_org = 'SAMHSA, Center for Behavioral Health Statistics and Quality'
57 | source_link = 'https://nsduhweb.rti.org/respweb/homepage.cfm'
58 | start_year = db.Column(db.Integer(), primary_key=True)
59 | year = db.Column(db.Integer(), primary_key=True)
60 |
61 | non_medical_use_of_pain_relievers = db.Column(db.String())
62 | non_medical_use_of_pain_relievers_lci = db.Column(db.String())
63 | non_medical_use_of_pain_relievers_uci = db.Column(db.String())
64 |
65 | @classmethod
66 | def get_supported_levels(cls):
67 | return {
68 | "year": [ALL],
69 | "start_year": [ALL],
70 | "geo": [ALL, NATION, STATE]
71 | }
72 |
--------------------------------------------------------------------------------
/datausa/pums/__init__.py:
--------------------------------------------------------------------------------
1 | # __init__.py
--------------------------------------------------------------------------------
/datausa/pums/abstract_models.py:
--------------------------------------------------------------------------------
1 | from sqlalchemy import MetaData
2 | from sqlalchemy.ext.declarative import declared_attr
3 | from datausa.core.exceptions import DataUSAException
4 |
5 | from datausa.database import db
6 | from datausa.attrs import consts
7 | from datausa.core.models import BaseModel
8 | from datausa.attrs.models import *
9 | from datausa.attrs.consts import NATION, STATE, PUMA, ALL, GEO, COUNTY
10 |
11 |
12 | class BasePums(db.Model, BaseModel):
13 | __abstract__ = True
14 | __table_args__ = {"schema": "pums_1yr"}
15 | source_title = 'ACS PUMS 1-year Estimate'
16 | source_link = 'http://census.gov/programs-surveys/acs/technical-documentation/pums.html'
17 | source_org = 'Census Bureau'
18 | num_records = db.Column(db.Integer)
19 | CACHED_YEARS = [2014, 2015, 2016]
20 |
21 |
22 | def __repr__(self):
23 | return '<{}>'.format(self.__class__)
24 |
25 |
26 | class BasePums5(BasePums):
27 | __abstract__ = True
28 | __table_args__ = {"schema": "pums_5yr"}
29 | source_title = 'ACS PUMS 5-year Estimate'
30 | source_link = 'http://census.gov/programs-surveys/acs/technical-documentation/pums.html'
31 | source_org = 'Census Bureau'
32 | CACHED_YEARS = [2014, 2015, 2016]
33 |
34 | class PersonalOver5(object):
35 | avg_age = db.Column(db.Float)
36 | avg_wage = db.Column(db.Float)
37 | avg_age_moe = db.Column(db.Float)
38 | avg_wage_moe = db.Column(db.Float)
39 | num_ppl = db.Column(db.Integer)
40 | num_ppl_moe = db.Column(db.Float)
41 |
42 | class Personal(object):
43 | avg_age = db.Column(db.Float)
44 | avg_wage = db.Column(db.Float)
45 | num_ppl = db.Column(db.Integer)
46 | avg_age_moe = db.Column(db.Float)
47 | avg_wage_moe = db.Column(db.Float)
48 | num_ppl_moe = db.Column(db.Float)
49 |
50 | class Employees(Personal):
51 | avg_age_ft = db.Column(db.Float)
52 | avg_age_pt = db.Column(db.Float)
53 | avg_wage_ft = db.Column(db.Float)
54 | avg_wage_pt = db.Column(db.Float)
55 | num_ppl_ft = db.Column(db.Integer)
56 | num_ppl_pt = db.Column(db.Integer)
57 |
58 | avg_age_ft_moe = db.Column(db.Float)
59 | avg_age_pt_moe = db.Column(db.Float)
60 | avg_wage_ft_moe = db.Column(db.Float)
61 | avg_wage_pt_moe = db.Column(db.Float)
62 | num_ppl_ft_moe = db.Column(db.Float)
63 | num_ppl_pt_moe = db.Column(db.Float)
64 |
65 | avg_hrs = db.Column(db.Float)
66 | avg_hrs_ft = db.Column(db.Float)
67 | avg_hrs_pt = db.Column(db.Float)
68 | avg_hrs_moe = db.Column(db.Float)
69 | avg_hrs_ft_moe = db.Column(db.Float)
70 | avg_hrs_pt_moe = db.Column(db.Float)
71 |
72 | class EmployeesGini(object):
73 | gini = db.Column(db.Float)
74 | gini_ft = db.Column(db.Float)
75 | gini_pt = db.Column(db.Float)
76 |
77 | class EmployeesRca(object):
78 | num_ppl_rca = db.Column(db.Float)
79 | num_ppl_pt_rca = db.Column(db.Float)
80 | num_ppl_ft_rca = db.Column(db.Float)
81 |
82 | class EmployeesWithAge(Personal):
83 | avg_wage_ft = db.Column(db.Float)
84 | avg_wage_pt = db.Column(db.Float)
85 | num_ppl_ft = db.Column(db.Integer)
86 | num_ppl_pt = db.Column(db.Integer)
87 |
88 | avg_wage_ft_moe = db.Column(db.Float)
89 | avg_wage_pt_moe = db.Column(db.Float)
90 | num_ppl_ft_moe = db.Column(db.Float)
91 | num_ppl_pt_moe = db.Column(db.Float)
92 |
93 | class PersonalWithAge(object):
94 | avg_wage = db.Column(db.Float)
95 | num_ppl = db.Column(db.Integer)
96 | avg_wage_moe = db.Column(db.Float)
97 | num_ppl_moe = db.Column(db.Float)
98 |
99 |
100 | class Year(object):
101 | @declared_attr
102 | def year(cls):
103 | return db.Column(db.Integer(), primary_key=True)
104 |
105 | class GeoId(object):
106 | LEVELS = [NATION, STATE, PUMA, ALL]
107 | @classmethod
108 | def get_supported_levels(cls):
109 | return {GEO: GeoId.LEVELS}
110 |
111 | @classmethod
112 | def geo_filter(cls, level):
113 | if level == ALL:
114 | return True
115 | level_map = {NATION: "010", STATE: "040", PUMA: "795"}
116 | level_code = level_map[level]
117 | return cls.geo.startswith(level_code)
118 |
119 | @declared_attr
120 | def geo(cls):
121 | return db.Column(db.String(), db.ForeignKey(Geo.id), primary_key=True)
122 |
123 | class CipId(object):
124 | @declared_attr
125 | def cip(cls):
126 | return db.Column(db.String(), db.ForeignKey(Cip.id), primary_key=True)
127 |
128 | class DegreeId(object):
129 | @declared_attr
130 | def degree(cls):
131 | return db.Column(db.String(), db.ForeignKey(PumsDegree.id), primary_key=True)
132 |
133 | class NaicsId(object):
134 | LEVELS = ["0", "1", "2", "all"]
135 | naics_level = db.Column(db.Integer())
136 |
137 | @declared_attr
138 | def naics(cls):
139 | return db.Column(db.String(), db.ForeignKey(PumsNaics.id), primary_key=True)
140 |
141 | @classmethod
142 | def naics_filter(cls, level):
143 | if level == consts.ALL:
144 | return True
145 | return cls.naics_level == level
146 |
147 | class SocId(object):
148 | LEVELS = ["0", "1", "2", "3", "all"]
149 | soc_level = db.Column(db.Integer())
150 |
151 | @declared_attr
152 | def soc(cls):
153 | return db.Column(db.String(), db.ForeignKey(PumsSoc.id), primary_key=True)
154 |
155 | @classmethod
156 | def soc_filter(cls, level):
157 | if level == consts.ALL:
158 | return True
159 | return cls.soc_level == level
160 |
161 | class WageId(object):
162 | @declared_attr
163 | def wage_bin(cls):
164 | return db.Column(db.String(), db.ForeignKey(PumsWage.id), primary_key=True)
165 |
166 | class RaceId(object):
167 | @declared_attr
168 | def race(cls):
169 | return db.Column(db.String(), db.ForeignKey(PumsRace.id), primary_key=True)
170 |
171 | class SexId(object):
172 | @declared_attr
173 | def sex(cls):
174 | return db.Column(db.String(), db.ForeignKey(PumsSex.id), primary_key=True)
175 |
176 | class BirthplaceId(object):
177 | @declared_attr
178 | def birthplace(cls):
179 | return db.Column(db.String(), db.ForeignKey(PumsBirthplace.id), primary_key=True)
180 |
--------------------------------------------------------------------------------
/datausa/pums/models.py:
--------------------------------------------------------------------------------
1 | from datausa.pums.abstract_models import *
2 | from datausa.attrs.consts import ALL
3 |
4 | class Ya(BasePums, EmployeesWithAge, Year):
5 | __tablename__ = "ya"
6 | median_moe = 0.5
7 |
8 | age = db.Column(db.String(), primary_key=True)
9 |
10 | @classmethod
11 | def get_supported_levels(cls):
12 | return {"age": [ALL]}
13 |
14 | class Yc(BasePums, Employees, Year, CipId):
15 | __tablename__ = "yc"
16 | median_moe = 1
17 |
18 | avg_wage_rank = db.Column(db.Integer)
19 |
20 | @classmethod
21 | def get_supported_levels(cls):
22 | return {"cip": ["2", ALL]}
23 |
24 | class Ycs(BasePums, Employees, Year, CipId, SexId):
25 | __tablename__ = "ycs"
26 | median_moe = 2
27 |
28 | @classmethod
29 | def get_supported_levels(cls):
30 | return {"cip": ["2", ALL], "sex": [ALL]}
31 |
32 | class Yca(BasePums, EmployeesWithAge, Year, CipId):
33 | __tablename__ = "yca"
34 | median_moe = 2
35 |
36 | age = db.Column(db.String(), primary_key=True)
37 |
38 | @classmethod
39 | def get_supported_levels(cls):
40 | return {"cip": ["2", ALL], "age": [ALL]}
41 |
42 | class Ycb(BasePums, Employees, Year, CipId, BirthplaceId, EmployeesRca):
43 | __tablename__ = "ycb"
44 | median_moe = 2
45 |
46 | @classmethod
47 | def get_supported_levels(cls):
48 | return {"cip": ["2", ALL], "birthplace": [ALL]} # TODO support in/out of US?
49 |
50 | class Ycd(BasePums, Employees, Year, CipId, DegreeId):
51 | __tablename__ = "ycd"
52 | median_moe = 2
53 |
54 | @classmethod
55 | def get_supported_levels(cls):
56 | return {"cip": ["2", ALL], "degree": [ALL]}
57 |
58 | class Yg(BasePums, Employees, Year, GeoId, EmployeesGini):
59 | __tablename__ = "yg"
60 | median_moe = 1
61 |
62 |
63 | class Ygd(BasePums, Employees, Year, GeoId, DegreeId):
64 | __tablename__ = "ygd"
65 | median_moe = 2
66 |
67 | @classmethod
68 | def get_supported_levels(cls):
69 | return {"geo": GeoId.LEVELS, "degree": [ALL]}
70 |
71 |
72 | class Ygi(BasePums, Employees, Year, GeoId, NaicsId, EmployeesRca):
73 | __tablename__ = "ygi"
74 | median_moe = 2
75 |
76 | @classmethod
77 | def get_supported_levels(cls):
78 | return {"geo": GeoId.LEVELS, "naics": NaicsId.LEVELS}
79 |
80 | class Ygio(BasePums, Employees, Year, GeoId, NaicsId, SocId):
81 | __tablename__ = "ygio"
82 | median_moe = 5
83 | @classmethod
84 | def get_supported_levels(cls):
85 | return {"geo": GeoId.LEVELS,
86 | "soc": SocId.LEVELS,
87 | "naics": NaicsId.LEVELS}
88 |
89 | # class Ygmd(BasePums, Personal, Year, GeoId, MajorId, DegreeId):
90 | # __tablename__ = "ygmd"
91 | # median_moe = 3
92 |
93 | class Ygc(BasePums, Employees, Year, GeoId, CipId, EmployeesRca):
94 | __tablename__ = "ygc"
95 | median_moe = 2
96 |
97 | @classmethod
98 | def get_supported_levels(cls):
99 | return {"geo": GeoId.LEVELS, "cip": ["2", ALL]}
100 |
101 | class Yo(BasePums, Employees, Year, SocId, EmployeesGini):
102 | __tablename__ = "yo"
103 | median_moe = 1
104 |
105 | avg_wage_rank = db.Column(db.Integer)
106 | num_ppl_rank = db.Column(db.Integer)
107 |
108 | @classmethod
109 | def get_supported_levels(cls):
110 | return {"soc": SocId.LEVELS}
111 |
112 | class Yow(BasePums, Employees, Year, SocId, WageId):
113 | __tablename__ = "yow"
114 | median_moe = 1.5
115 |
116 | @classmethod
117 | def get_supported_levels(cls):
118 | return {"soc": SocId.LEVELS, "wage_bin": [ALL]}
119 |
120 |
121 | class Yiw(BasePums, Employees, Year, NaicsId, WageId):
122 | __tablename__ = "yiw"
123 | median_moe = 1.5
124 |
125 | @classmethod
126 | def get_supported_levels(cls):
127 | return {"naics": NaicsId.LEVELS, "wage_bin": [ALL]}
128 |
129 |
130 | class Ygo(BasePums, Employees, Year, GeoId, SocId, EmployeesRca):
131 | __tablename__ = "ygo"
132 | median_moe = 2
133 |
134 | @classmethod
135 | def get_supported_levels(cls):
136 | return {"geo": GeoId.LEVELS, "soc": SocId.LEVELS}
137 |
138 | class Ygw(BasePums, Employees, Year, GeoId, WageId):
139 | __tablename__ = "ygw"
140 | median_moe = 2
141 |
142 | @classmethod
143 | def get_supported_levels(cls):
144 | return {"geo": GeoId.LEVELS, "wage_bin": [ALL]}
145 |
146 |
147 | class Yor(BasePums, Employees, Year, SocId, RaceId):
148 | __tablename__ = "yor"
149 | median_moe = 2
150 |
151 | @classmethod
152 | def get_supported_levels(cls):
153 | return {"soc": SocId.LEVELS,
154 | "race": [ALL]}
155 |
156 |
157 | class Ygor(BasePums, Employees, Year, GeoId, SocId, RaceId):
158 | __tablename__ = "ygor"
159 | median_moe = 3
160 |
161 | @classmethod
162 | def get_supported_levels(cls):
163 | return {"geo": GeoId.LEVELS, "soc": SocId.LEVELS,
164 | "race": [ALL]}
165 |
166 | class Ygs(BasePums, Employees, Year, GeoId, SexId):
167 | __tablename__ = "ygs"
168 | median_moe = 2
169 |
170 | @classmethod
171 | def get_supported_levels(cls):
172 | return {"geo": GeoId.LEVELS, "sex": [ALL]}
173 |
174 | class Ygr(BasePums, Employees, Year, GeoId, RaceId):
175 | __tablename__ = "ygr"
176 | median_moe = 2
177 |
178 | @classmethod
179 | def get_supported_levels(cls):
180 | return {"geo": GeoId.LEVELS, "race": [ALL]}
181 |
182 | class Ygos(BasePums, Employees, Year, GeoId, SocId, SexId):
183 | __tablename__ = "ygos"
184 | median_moe = 3
185 |
186 | @classmethod
187 | def get_supported_levels(cls):
188 | return {"geo": GeoId.LEVELS, "soc": SocId.LEVELS,
189 | "sex": [ALL]}
190 |
191 | class Yoc(BasePums, Employees, Year, SocId, CipId, EmployeesRca):
192 | __tablename__ = "yoc"
193 | median_moe = 2
194 |
195 | @classmethod
196 | def get_supported_levels(cls):
197 | return {"cip": ["2", ALL], "soc": SocId.LEVELS}
198 |
199 | class Yic(BasePums, Employees, Year, NaicsId, CipId):
200 | __tablename__ = "yic"
201 | median_moe = 2
202 | @classmethod
203 | def get_supported_levels(cls):
204 | return {"cip": ["2", ALL], "naics": NaicsId.LEVELS}
205 |
206 | class Yio(BasePums, Employees, Year, NaicsId, SocId, EmployeesRca):
207 | __tablename__ = "yio"
208 | median_moe = 2
209 |
210 | @classmethod
211 | def get_supported_levels(cls):
212 | return {"soc": SocId.LEVELS, "naics": NaicsId.LEVELS}
213 |
214 |
215 | class Yir(BasePums, Employees, Year, NaicsId, RaceId, EmployeesRca):
216 | __tablename__ = "yir"
217 | median_moe = 2
218 |
219 | @classmethod
220 | def get_supported_levels(cls):
221 | return {"naics": NaicsId.LEVELS, "race": [ALL]}
222 |
223 |
224 | class Yior(BasePums, Employees, Year, NaicsId, SocId, RaceId):
225 | __tablename__ = "yior"
226 | median_moe = 3
227 |
228 | @classmethod
229 | def get_supported_levels(cls):
230 | return {"soc": SocId.LEVELS, "naics": NaicsId.LEVELS, "race": [ALL]}
231 |
232 |
233 | class Yos(BasePums, Employees, Year, SocId, SexId):
234 | __tablename__ = "yos"
235 | median_moe = 2
236 |
237 | @classmethod
238 | def get_supported_levels(cls):
239 | return {"soc": SocId.LEVELS, "sex": [ALL]}
240 |
241 |
242 | class Yoas(BasePums, EmployeesWithAge, Year, SocId, SexId):
243 | __tablename__ = "yoas"
244 | median_moe = 3
245 | age = db.Column(db.String(), primary_key=True)
246 |
247 | @classmethod
248 | def get_supported_levels(cls):
249 | return {"soc": SocId.LEVELS, "sex": [ALL], "age": [ALL]}
250 |
251 |
252 | class Yod(BasePums, Employees, Year, SocId, DegreeId):
253 | __tablename__ = "yod"
254 | median_moe = 2
255 |
256 | @classmethod
257 | def get_supported_levels(cls):
258 | return {"soc": SocId.LEVELS, "degree": [ALL]}
259 |
260 |
261 | class Yid(BasePums, Employees, Year, NaicsId, DegreeId):
262 | __tablename__ = "yid"
263 | median_moe = 2
264 |
265 | @classmethod
266 | def get_supported_levels(cls):
267 | return {"naics": NaicsId.LEVELS, "degree": [ALL]}
268 |
269 |
270 | class Yi(BasePums, Employees, Year, NaicsId, EmployeesGini):
271 | __tablename__ = "yi"
272 | median_moe = 1
273 |
274 | avg_wage_rank = db.Column(db.Integer)
275 | num_ppl_rank = db.Column(db.Integer)
276 |
277 | @classmethod
278 | def get_supported_levels(cls):
279 | return {"naics": NaicsId.LEVELS}
280 |
281 |
282 | class Yis(BasePums, Employees, Year, NaicsId, SexId, EmployeesRca):
283 | __tablename__ = "yis"
284 | median_moe = 2
285 |
286 | @classmethod
287 | def get_supported_levels(cls):
288 | return {"naics": NaicsId.LEVELS, "sex": [ALL]}
289 |
290 |
291 | class Yios(BasePums, Employees, Year, NaicsId, SocId, SexId):
292 | __tablename__ = "yios"
293 | median_moe = 3
294 |
295 | @classmethod
296 | def get_supported_levels(cls):
297 | return {"soc": SocId.LEVELS, "naics": NaicsId.LEVELS, "sex": [ALL]}
298 |
299 | class Yocd(BasePums, Employees, Year, SocId, CipId, DegreeId, EmployeesRca):
300 | __tablename__ = "yocd"
301 | median_moe = 3
302 |
303 | @classmethod
304 | def get_supported_levels(cls):
305 | return {"cip": ["2", ALL], "soc": SocId.LEVELS, "degree": [ALL]}
306 |
307 |
308 | class Ygb(BasePums, PersonalOver5, Year, GeoId, BirthplaceId):
309 | __tablename__ = "ygb_v2"
310 | median_moe = 2.1
311 | num_over5 = db.Column(db.Float)
312 | num_over5_moe = db.Column(db.Float)
313 | num_over5_rca = db.Column(db.Float)
314 |
315 | @classmethod
316 | def get_supported_levels(cls):
317 | return {"geo": GeoId.LEVELS, "birthplace": [ALL]}
318 |
--------------------------------------------------------------------------------
/datausa/pums/models_5.py:
--------------------------------------------------------------------------------
1 | from datausa.pums.abstract_models import *
2 | from datausa.attrs.consts import ALL
3 |
4 |
5 | class Ygi5(BasePums5, Employees, Year, GeoId, NaicsId, EmployeesRca):
6 | __tablename__ = "ygi"
7 | median_moe = 1.9
8 |
9 | @classmethod
10 | def get_supported_levels(cls):
11 | return {"geo": GeoId.LEVELS, "naics": NaicsId.LEVELS}
12 |
13 |
14 | class Ygo5(BasePums5, Employees, Year, GeoId, SocId, EmployeesRca):
15 | __tablename__ = "ygo"
16 | median_moe = 1.9
17 |
18 | @classmethod
19 | def get_supported_levels(cls):
20 | return {"geo": GeoId.LEVELS, "soc": SocId.LEVELS}
21 |
22 |
23 | class Yoas5(BasePums5, EmployeesWithAge, Year, SocId, SexId):
24 | __tablename__ = "yoas"
25 | median_moe = 2.9
26 | age = db.Column(db.String(), primary_key=True)
27 |
28 | @classmethod
29 | def get_supported_levels(cls):
30 | return {"soc": SocId.LEVELS, "sex": [ALL], "age": [ALL]}
31 |
32 |
33 | class Ygor5(BasePums5, Employees, Year, GeoId, SocId, RaceId):
34 | __tablename__ = "ygor"
35 | median_moe = 2.9
36 |
37 | @classmethod
38 | def get_supported_levels(cls):
39 | return {"geo": GeoId.LEVELS, "soc": SocId.LEVELS,
40 | "race": [ALL]}
41 |
42 | class Ygos5(BasePums5, Employees, Year, GeoId, SocId, SexId):
43 | __tablename__ = "ygos"
44 | median_moe = 2.9
45 |
46 | @classmethod
47 | def get_supported_levels(cls):
48 | return {"geo": GeoId.LEVELS, "soc": SocId.LEVELS,
49 | "sex": [ALL]}
50 |
51 | class Ygb5(BasePums5, PersonalOver5, Year, GeoId, BirthplaceId):
52 | __tablename__ = "ygb_v2"
53 | median_moe = 2
54 | num_over5 = db.Column(db.Float)
55 | num_over5_moe = db.Column(db.Float)
56 | num_over5_rca = db.Column(db.Float)
57 |
58 | @classmethod
59 | def get_supported_levels(cls):
60 | return {"geo": GeoId.LEVELS, "birthplace": [ALL]}
61 |
--------------------------------------------------------------------------------
/datausa/util/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataUSA/datausa-api/7288dede082eda07b61e11cf6dc801fe692f6334/datausa/util/__init__.py
--------------------------------------------------------------------------------
/datausa/util/big_places.py:
--------------------------------------------------------------------------------
1 | from datausa import cache
2 | from sqlalchemy import or_, and_
3 | from sqlalchemy import distinct
4 | from datausa.attrs.consts import POP_THRESHOLD
5 | from datausa.acs.automap_models import Acs1_Yg
6 |
7 | @cache.memoize()
8 | def get_big_geos():
9 | conds = [
10 | Acs1_Yg.geo.startswith("010"),
11 | Acs1_Yg.geo.startswith("040"),
12 | Acs1_Yg.geo.startswith("050"),
13 | Acs1_Yg.geo.startswith("160"),
14 | Acs1_Yg.geo.startswith("310"),
15 | ]
16 | cond = and_(or_(*conds), Acs1_Yg.pop > POP_THRESHOLD)
17 | geos = Acs1_Yg.query.with_entities(distinct(Acs1_Yg.geo)).filter(cond).all()
18 | return set([g for g, in geos]) # faster lookup with set
19 |
20 | def is_big_geo(geo_id):
21 | # for sufficiently large places, we can also rely on 1-year estimate
22 | return geo_id in big_geos
23 |
24 | big_geos = get_big_geos()
25 |
--------------------------------------------------------------------------------
/datausa/util/inmem.py:
--------------------------------------------------------------------------------
1 | from datausa import cache
2 | from datausa.ipeds.models import GradsYgc
3 | from datausa.onet.models import SkillBySoc, SkillByCip
4 | import re
5 |
6 | def splitter(x):
7 | return re.split(",(?! )", x)
8 |
9 | @cache.memoize()
10 | def ipeds_place_map():
11 | qry = GradsYgc.query.with_entities(GradsYgc.geo.distinct()).all()
12 | return {item: True for item, in qry}
13 |
14 |
15 | @cache.memoize()
16 | def onet_socs():
17 | qry = SkillBySoc.query.with_entities(SkillBySoc.soc.distinct()).all()
18 | return {item: True for item, in qry}
19 |
20 |
21 | @cache.memoize()
22 | def onet_cips():
23 | qry = SkillByCip.query.with_entities(SkillByCip.cip.distinct()).all()
24 | return {item: True for item, in qry}
25 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | Flask==0.10.1
2 | Flask-SQLAlchemy==2.1
3 | SQLAlchemy==1.1.14
4 | psycopg2==2.7.3.2
5 | Flask-Compress==1.3.0
6 | simplejson==3.8.2
7 | Flask-Cors==2.1.2
8 | Flask-Cache==0.13.1
9 | Pillow==3.1.1
10 | Whoosh==2.7.2
11 | Unidecode==0.4.19
12 | gunicorn==19.4.5
13 | click==6.3
14 | pytest==3.0.3
15 |
--------------------------------------------------------------------------------
/run.py:
--------------------------------------------------------------------------------
1 | from datausa import app
2 |
3 | if __name__ == '__main__':
4 | app.debug = True
5 | app.run('0.0.0.0')
6 |
--------------------------------------------------------------------------------
/scripts/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataUSA/datausa-api/7288dede082eda07b61e11cf6dc801fe692f6334/scripts/__init__.py
--------------------------------------------------------------------------------
/scripts/alt_fill_cache.py:
--------------------------------------------------------------------------------
1 | import requests
2 | import os
3 | from requests.auth import HTTPBasicAuth
4 | import click
5 | import signal
6 | import sys
7 |
8 | errors = []
9 |
10 |
11 | def url_to_json(url):
12 | print url
13 | result = requests.get(url).json()
14 | if 'data' in result:
15 | return result['data'], result['headers']
16 | raise Exception("No data!")
17 |
18 |
19 | def crawl_page(site_url, moi):
20 | display_id, attr_kind = moi
21 | if not display_id:
22 | print "skipping", display_id, attr_kind
23 | page = u'{}/profile/{}/{}/'.format(site_url, attr_kind, display_id)
24 | print page, "getting..."
25 | r = requests.get(page, auth=HTTPBasicAuth('sunbird', os.environ.get('DATAUSA_WEB_PW', '')))
26 | if r.status_code != 200:
27 | if r.status_code == 401:
28 | raise Exception("You may have forgotten to set DATAUSA_WEB_PW \
29 | envinroment variable (or provided a bad PW).\nWe need this because \
30 | the site is password protected")
31 | print "PAGE ERROR", page, r.status_code
32 | errors.append(page)
33 |
34 |
35 | def crawl_attr(api_url, site_url, attr_kind, offset, sumlevel):
36 | sumlevel = "" if not sumlevel else "sumlevel={}".format(sumlevel)
37 | url_str = '{}/attrs/search?q=&kind={}&limit=110000&offset={}&{}'
38 | data, headers = url_to_json(url_str.format(api_url, attr_kind, offset, sumlevel))
39 | data = sorted(data, key=lambda obj: obj[headers.index('zvalue')], reverse=True)
40 | if attr_kind != 'geo':
41 | mydata = [[country[headers.index('id')], attr_kind] for country in data]
42 | else:
43 | mydata = [[country[headers.index('url_name')], attr_kind] for country in data]
44 |
45 | for x in mydata:
46 | crawl_page(site_url, x)
47 |
48 |
49 | def fix_url(my_url):
50 | if not my_url.startswith('http://'):
51 | my_url = 'http://' + my_url
52 | if my_url.endswith('/'):
53 | my_url = my_url[:-1]
54 | return my_url
55 |
56 |
57 | def signal_handler(signal, frame):
58 | print "Pages with Errors"
59 | print errors
60 | print "Number of errors:", len(errors)
61 | sys.exit(0)
62 |
63 |
64 | @click.command()
65 | @click.option('--api_url', default="http://db.datausa.io", help='API Url')
66 | @click.option('--site_url', default="http://beta.datausa.io", help='Site Url')
67 | @click.option('--attr', default="geo", help="attr kind")
68 | @click.option('--offset', default=0, help="offset in list")
69 | @click.option('--sumlevel', default=None, help="attr sumlevel")
70 | def main(api_url, site_url, attr, offset, sumlevel):
71 | api_url = fix_url(api_url)
72 | site_url = fix_url(site_url)
73 | attrs = attr.split(",")
74 | print "Waiting for crawl to complete..."
75 | for attr in attrs:
76 | crawl_attr(api_url, site_url, attr, offset, sumlevel)
77 | print "Crawl complete!"
78 | print "Pages with Errors"
79 | print errors
80 | print "Number of errors:", len(errors)
81 |
82 |
83 | if __name__ == "__main__":
84 | signal.signal(signal.SIGINT, signal_handler)
85 | main()
86 |
--------------------------------------------------------------------------------
/scripts/build_search.py:
--------------------------------------------------------------------------------
1 | '''
2 | Script used to generate the query that makes up the search table
3 | '''
4 | from datausa.pums.abstract_models import BasePums
5 |
6 | pums_schema_name = BasePums.get_schema_name()
7 |
8 | # Industry and Occupation Z-scoring
9 | attrs = [("soc", "{}.yo".format(pums_schema_name), "avg_wage", [0, 1, 2, 3]),
10 | ("naics", "{}.yi".format(pums_schema_name), "num_ppl", [0, 1, 2])]
11 |
12 | qry = '''SELECT g.{0} as id, (g.{2} - stats.average) / stats.st AS zvalue, '{0}' as kind , lower(a.name) as name, a.name as display, a.level::text as sumlevel, -1 as is_stem, a.url_name as url_name, a.keywords as keywords
13 | FROM {1} g
14 | LEFT JOIN pums_attrs.pums_{0} a ON (a.id = g.{0} and a.level = g.{0}_level)
15 | CROSS JOIN
16 | (select STDDEV({2}) as st, AVG({2}) as average FROM {1} WHERE {0}_level={3} AND year=2015) stats
17 | WHERE g.{0}_level = {3}
18 | AND g.year = 2015'''
19 |
20 | queries = []
21 | for attr, table, metric, levels in attrs:
22 | for level in levels:
23 | queries.append(qry.format(attr, table, metric, level))
24 | #print queries[0]
25 |
26 |
27 |
28 | # CIP codes
29 | cip_qry = '''SELECT g.{0}, (g.{2} - stats.average) / stats.st AS zvalue, '{0}' as kind , lower(a.name) as name, a.name as display, a.level::text as sumlevel, a.is_stem as is_stem, a.url_name as url_name, a.keywords as keywords
30 | FROM {1} g
31 | LEFT JOIN attrs.course a ON (a.id = g.{0})
32 | CROSS JOIN
33 | (select STDDEV({2}) as st, AVG({2}) as average FROM {1} WHERE char_length({0}) = {3} AND year=2015) stats
34 | WHERE char_length({0}) = {3}
35 | AND g.year = 2015'''
36 |
37 | for level in [2, 4, 6]:
38 | queries.append(cip_qry.format("cip", "ipeds.grads_yc", "grads_total", level))
39 |
40 | # GEO codes
41 | geo_qry = '''SELECT g.{0}, (g.{2} - stats.average) / stats.st AS zvalue, '{0}' as kind , lower(a.name) as name, a.display_name as display, a.sumlevel::text as sumlevel, -1 as is_stem, a.url_name as url_name, a.keywords as keywords
42 | FROM {1} g
43 | LEFT JOIN attrs.geo_names a ON (a.id = g.{0})
44 | CROSS JOIN
45 | (select STDDEV({2}) as st, AVG({2}) as average FROM {1} WHERE {0} LIKE '{3}%' AND year=2015) stats
46 | WHERE g.{0} LIKE '{3}%'
47 | AND g.year = 2015'''
48 |
49 | for level in ['040', '050', '160', '310', '795']:
50 | queries.append(geo_qry.format("geo", "acs_5yr.yg", "pop", level))
51 |
52 | queries.append("SELECT '01000US', 150, 'geo', 'united states', 'United States', '010', -1, 'united-states', '{usa, us, america}'")
53 |
54 |
55 | # UNIVERSITIES
56 | university_qry = '''SELECT g.{0}, (g.{2} - stats.average) / stats.st AS zvalue, '{0}' as kind , lower(a.name) as name, a.display_name as display, a.university_level::text as sumlevel, a.is_stem as is_stem, a.url_name as url_name, a.keywords as keywords
57 | FROM {1} g
58 | LEFT JOIN attrs.university a ON (a.id = g.{0})
59 | CROSS JOIN
60 | (select STDDEV({2}) as st, AVG({2}) as average FROM {1} WHERE year=2015) stats
61 | WHERE g.year = 2015 and a.status != 'D' '''
62 |
63 | queries.append(university_qry.format("university", "ipeds.grads_yu", "grads_total"))
64 |
65 | tail_qrys = ["({})".format(q) if i != 0 else q for i, q in enumerate(queries)]
66 | final_q = "\n UNION \n".join(tail_qrys)
67 | print(final_q)
68 |
--------------------------------------------------------------------------------
/scripts/cache_helper.applescript:
--------------------------------------------------------------------------------
1 | tell application "Terminal"
2 | do script ". ~/Virtualenvs/datausa-api/bin/activate; python ~/Projects/datausa-api/scripts/fill_cache.py http://db.datausa.io geo"
3 | activate
4 | end tell
5 |
6 | tell application "Terminal"
7 | do script ". ~/Virtualenvs/datausa-api/bin/activate; python ~/Projects/datausa-api/scripts/fill_cache.py http://db.datausa.io cip"
8 | activate
9 | end tell
10 |
11 | tell application "Terminal"
12 | do script ". ~/Virtualenvs/datausa-api/bin/activate; python ~/Projects/datausa-api/scripts/fill_cache.py http://db.datausa.io soc"
13 | activate
14 | end tell
15 |
16 | tell application "Terminal"
17 | do script ". ~/Virtualenvs/datausa-api/bin/activate; python ~/Projects/datausa-api/scripts/fill_cache.py http://db.datausa.io naics"
18 | activate
19 | end tell
20 |
--------------------------------------------------------------------------------
/scripts/fill_cache.py:
--------------------------------------------------------------------------------
1 | import requests
2 | import json
3 | import time
4 | import threading
5 | import os
6 | from multiprocessing import Pool
7 | from requests.auth import HTTPBasicAuth
8 |
9 |
10 | def url_to_json(url):
11 | print url
12 | result = requests.get(url).json()
13 | if 'data' in result:
14 | return result['data'], result['headers']
15 | raise Exception("No data!")
16 |
17 | def crawl_page(moi):
18 | display_id,attr_kind = moi
19 | if not display_id:
20 | print "skipping", display_id, attr_kind
21 | page = u'http://beta.datausa.io/profile/{}/{}/'.format( attr_kind, display_id)
22 | print page, "getting..."
23 | r = requests.get(page, auth=HTTPBasicAuth('datausa', os.environ.get('DATAUSA_WEB_PW', '')))
24 | if r.status_code != 200:
25 | if r.status_code == 401:
26 | raise Exception("You may have forgotten to set DATAUSA_WEB_PW env var (or provided a bad PW).\nWe need this because the site is password protected")
27 | print "PAGE ERROR", page, r.status_code
28 |
29 | def crawl_attr(base_url, attr_kind='country'):
30 | data, headers = url_to_json('{}/attrs/search?q=&kind={}&limit=100000'.format(base_url, attr_kind))
31 | data = sorted(data, key=lambda obj: obj[headers.index('zvalue')], reverse=True)
32 | if attr_kind != 'geo':
33 | mydata = [[country[headers.index('id')], attr_kind] for country in data]
34 | else:
35 | mydata = [[country[headers.index('url_name')], attr_kind] for country in data]
36 | #pool = Pool(5)
37 | #pool.map(crawl_page, mydata)
38 | for x in mydata:
39 | crawl_page(x)
40 |
41 |
42 |
43 | def main(base_url="http://db.datausa.io", attr="geo"):
44 | if not base_url.startswith('http://'):
45 | base_url = 'http://' + base_url
46 | if base_url.endswith('/'):
47 | base_url = base_url[:-1]
48 | attrs = attr.split(",")
49 | print "Waiting for crawl to complete..."
50 | for attr in attrs:
51 | crawl_attr(base_url, attr)
52 | print "Crawl complete!"
53 |
54 | if __name__ == "__main__":
55 | import sys
56 | if len(sys.argv) < 2:
57 | main()
58 | else:
59 | attr = sys.argv[2] if len(sys.argv) >= 3 else "geo,naics,soc,cip"
60 | main(sys.argv[1], attr)
61 |
62 | # EXAMPLE: python fill_cache.py db.datausa.io naics
63 | # python fill_cache.py db.datausa.io naics,soc
64 | # python fill_cache.py db.datausa.io geo
65 |
--------------------------------------------------------------------------------
/scripts/fix_urlnames.py:
--------------------------------------------------------------------------------
1 | import os
2 | import os.path
3 | from whoosh import index
4 | from whoosh.fields import Schema, ID, TEXT, NUMERIC, KEYWORD, NGRAM, NGRAMWORDS
5 | from whoosh.fields import BOOLEAN
6 | from config import SEARCH_INDEX_DIR
7 | from datausa.database import db
8 |
9 | from datausa.attrs.models import Geo
10 | from datausa.acs.automap_models import Acs5_Yg
11 | geos = Geo.query.filter(Geo.id.like('160%')).all()
12 |
13 | url_map = {}
14 |
15 | for g in geos:
16 | if g.url_name and g.url_name not in url_map:
17 | url_map[g.url_name] = []
18 | if g.url_name:
19 | url_map[g.url_name].append(g)
20 |
21 | # now we have a list of all g's
22 | url_map = {k:v for k,v in url_map.items() if v and len(v) > 1}
23 |
24 | # get first ...
25 | for url_name, glist in url_map.items():
26 | if url_name.endswith("-pr") or url_name == 'chevy-chase-md':
27 | print "skipping pr for now..."
28 | continue
29 | print "working on", url_name
30 | if len(glist) == 2:
31 | data = []
32 | has_ran = False
33 | for g in glist:
34 | moi = Acs5_Yg.query.filter(Acs5_Yg.year == 2014, Acs5_Yg.geo == g.id).first()
35 | parents, headers = Geo.parents(g.id)
36 | county = None
37 | for p in parents:
38 | print p, "TEST"
39 | if p[0][:3] == '050':
40 | county = p[2].split("-county-")[0].lower()
41 | if not moi:
42 | continue
43 | pop = moi.pop
44 | data.append([g.url_name, g.id, pop, county, g])
45 | has_ran = True
46 | if not has_ran:
47 | print "skipping", url_name
48 | continue
49 | # select the place with less pop
50 | from operator import attrgetter
51 | min_pl = min(data, key=lambda x: x[2])
52 | print data
53 | print min_pl
54 | print "RENAMING!!!!"
55 | geo_obj = min_pl[-1]
56 | print geo_obj.name, "|", geo_obj.display_name , "|", geo_obj.url_name
57 | newc = u", {} County".format(min_pl[-2].title())
58 | new_name = geo_obj.name.strip() + newc
59 | new_disp = geo_obj.display_name.replace(geo_obj.name, new_name)
60 | print "min_pl-2=",min_pl[-2]
61 | new_url = geo_obj.url_name[:-3] + u'-{}-county'.format(min_pl[-2]) + geo_obj.url_name[-3:]
62 | print "========="
63 | print "GEOid", geo_obj.id
64 | print "original", geo_obj.name
65 | print "original", geo_obj.display_name
66 | print "original", geo_obj.url_name
67 | print "The new name", new_name
68 | print "The new disp", new_disp
69 | print "The new url", new_url
70 | geo_obj.name = new_name
71 | geo_obj.display_name = new_disp
72 | geo_obj.url_name = new_url
73 | user_ok = raw_input("DO I HAVE THE OK? ")
74 | if user_ok == "AOK":
75 | db.session.add(geo_obj)
76 | db.session.commit()
77 | else:
78 | print "url_name has more than 2!!!!!!", url_name
79 |
--------------------------------------------------------------------------------
/scripts/flickr/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataUSA/datausa-api/7288dede082eda07b61e11cf6dc801fe692f6334/scripts/flickr/__init__.py
--------------------------------------------------------------------------------
/scripts/flickr/analyze.py:
--------------------------------------------------------------------------------
1 | import csv, flickr, short, sys
2 |
3 | MAX_SIDE = 1400
4 | LICENSES = ["4", "5", "7", "8", "9", "10"]
5 |
6 | def read_csv():
7 |
8 | if len(sys.argv) < 3:
9 | print "------------------------------------------"
10 | print "ERROR: Script requires 2 variables, an attribute type and a filename."
11 | print "Example: python grab.py cip file.csv"
12 | print "------------------------------------------"
13 | return
14 |
15 | attr_type = sys.argv[1]
16 | csvReader = csv.DictReader(open(sys.argv[2]))
17 | input_file = list(csvReader)
18 | images = []
19 |
20 | print "Analyzing {} images".format(attr_type.upper())
21 | for index, row in enumerate(input_file):
22 | sys.stdout.write("\r{} of {}".format(index, len(input_file)))
23 | sys.stdout.flush()
24 | uid = row["id"]
25 |
26 | if "image_link" in row and row["image_link"] != "":
27 |
28 | image = row["image_link"]
29 | if "photolist" in image:
30 | image = image.split("/in/photolist")[0]
31 |
32 | pid = image.split("/")[-1]
33 | if "flic.kr" not in image:
34 | image = "http://flic.kr/p/{}".format(short.encode(pid))
35 |
36 |
37 | image = {"id": uid, "url": image, "small": False, "removed": False}
38 | row["error"] = ""
39 |
40 | photo = flickr.Photo(pid)
41 | try:
42 | photo._load_properties()
43 | except:
44 | image["removed"] = True
45 | row["error"] = "removed"
46 |
47 | if photo._Photo__license:
48 | image["license"] = photo._Photo__license
49 | if image["license"] in LICENSES:
50 | if len([p for p in photo.getSizes() if p["width"] >= MAX_SIDE]) == 0:
51 | image["small"] = True
52 | row["error"] = "resolution"
53 | else:
54 | row["error"] = "license-{}".format(image["license"])
55 |
56 | images.append(image)
57 |
58 | print "\n"
59 | print "Outputing to CSV..."
60 | with open(sys.argv[2].replace(".csv", "-update.csv"), 'wb') as f:
61 | w = csv.DictWriter(f, None)
62 |
63 | w.fieldnames = csvReader.fieldnames
64 | w.writerow(dict((h, h) for h in csvReader.fieldnames))
65 |
66 | for row in input_file:
67 | w.writerow(row)
68 |
69 | print "\n"
70 | num_images = float(len(images))
71 | print "{} images have been analyzed".format(int(num_images))
72 | bads = sum(1 for image in images if "license" in image and image["license"] not in LICENSES)
73 | print "{} ({}%) have bad licenses".format(bads, round((bads / num_images) * 100))
74 | smalls = sum(1 for image in images if image["small"])
75 | print "{} ({}%) are too small".format(smalls, round((smalls / num_images) * 100))
76 | missing = sum(1 for image in images if image["removed"])
77 | print "{} ({}%) have been removed from Flickr".format(missing, round((missing / num_images) * 100))
78 |
79 | if __name__ == '__main__':
80 | read_csv()
81 |
--------------------------------------------------------------------------------
/scripts/flickr/grab.py:
--------------------------------------------------------------------------------
1 | import csv, flickr, os, short, sys, urllib
2 | from config import FLICKR_DIR
3 | from datausa.database import db
4 | from datausa.attrs.views import attr_map
5 | from PIL import Image as pillow
6 |
7 | MAX_SIDE = 1400
8 | LICENSES = ["4", "5", "7", "8", "9", "10"]
9 |
10 | def read_csv():
11 |
12 | thumb_side = 425
13 | quality = 90
14 |
15 | if len(sys.argv) < 3:
16 | print "------------------------------------------"
17 | print "ERROR: Script requires 2 variables, an attribute type and a filename."
18 | print "Example: python grab.py cip file.csv"
19 | print "------------------------------------------"
20 | return
21 |
22 | attr_type = sys.argv[1]
23 | if attr_type not in attr_map:
24 | print "------------------------------------------"
25 | print "ERROR: Invalid attribute type."
26 | print "Allowed keys: {}".format(", ".join(attr_map.keys()))
27 | print "------------------------------------------"
28 | return
29 | else:
30 | table = attr_map[attr_type]
31 |
32 | csvFilename = sys.argv[2]
33 | csvReader = csv.DictReader(open(csvFilename))
34 | input_file = list(csvReader)
35 | imgdir = os.path.join(FLICKR_DIR, attr_type)
36 | thumbdir = imgdir.replace("splash", "thumb")
37 | badImages = []
38 | smallImages = []
39 | goodImages = []
40 | removedImages = []
41 | deletedImages = []
42 |
43 | # skip = True
44 |
45 | if not os.path.exists(imgdir):
46 | os.makedirs(imgdir)
47 |
48 | if not os.path.exists(thumbdir):
49 | os.makedirs(thumbdir)
50 |
51 | for row in input_file:
52 |
53 | update = False
54 |
55 | uid = row["id"]
56 | imgpath = os.path.join(imgdir, "{}.jpg".format(uid))
57 | thumbpath = os.path.join(thumbdir, "{}.jpg".format(uid))
58 |
59 | image_only = attr_type == "geo"
60 |
61 | if "level" in row:
62 | attr = table.query.filter_by(id=uid,level=row["level"]).first()
63 | else:
64 | attr = table.query.get(uid)
65 |
66 | if attr and "image_link" in row:
67 | image = row["image_link"]
68 |
69 | if not image and attr.image_link:
70 |
71 | attr.image_meta = None
72 | attr.image_link = None
73 | attr.image_author = None
74 | update = True
75 | deletedImages.append(uid)
76 | row["error"] = ""
77 | os.remove(imgpath)
78 | os.remove(thumbpath)
79 |
80 | elif image and attr.image_link != image:
81 |
82 | if "photolist" in image:
83 | image = image.split("/in/photolist")[0]
84 |
85 | pid = image.split("/")[-1]
86 | if "flic.kr" not in image:
87 | image = "http://flic.kr/p/{}".format(short.encode(pid))
88 |
89 | photo = flickr.Photo(pid)
90 | try:
91 | photo._load_properties()
92 | except:
93 | row["error"] = "removed"
94 | removedImages.append(uid)
95 | continue
96 |
97 | image = {"id": uid, "url": image, "license": photo._Photo__license}
98 |
99 | if image["license"] not in LICENSES:
100 | badImages.append(image)
101 | row["error"] = "license-{}".format(image["license"])
102 | else:
103 | sizes = [p for p in photo.getSizes() if p["width"] >= MAX_SIDE]
104 | if len(sizes) == 0:
105 | smallImages.append(image)
106 | row["error"] = "resolution"
107 | else:
108 | download_url = min(sizes, key=lambda item: item["width"])["source"]
109 |
110 | urllib.urlretrieve(download_url, imgpath)
111 |
112 | img = pillow.open(imgpath).convert("RGB")
113 |
114 | img.thumbnail((MAX_SIDE, MAX_SIDE), pillow.ANTIALIAS)
115 | img.save(imgpath, "JPEG", quality=quality)
116 |
117 | img.thumbnail((thumb_side, thumb_side), pillow.ANTIALIAS)
118 | img.save(thumbpath, "JPEG", quality=quality)
119 |
120 | author = photo._Photo__owner
121 | author = author.realname if author.realname else author.username
122 | image["author"] = author.replace("'", "\\'")
123 | goodImages.append(image)
124 |
125 | attr.image_link = image["url"]
126 | attr.image_author = image["author"]
127 | update = True
128 |
129 | # set False to True to force thumbnails
130 | elif False and image:
131 |
132 | imgpath = os.path.join(imgdir, "{}.jpg".format(uid))
133 | thumbpath = os.path.join(thumbdir, "{}.jpg".format(uid))
134 |
135 | img = pillow.open(imgpath).convert("RGB")
136 |
137 | img.thumbnail((thumb_side, thumb_side), pillow.ANTIALIAS)
138 | img.save(thumbpath, "JPEG", quality=quality)
139 |
140 | if not image_only:
141 | name = row["name"]
142 | if attr and name and attr.name != name:
143 | attr.name = name
144 | update = True
145 |
146 | if "image_meta" in row:
147 | meta = row["image_meta"]
148 | if attr and meta and attr.image_meta != meta:
149 | attr.image_meta = meta
150 | update = True
151 |
152 | if update:
153 | db.session.add(attr)
154 | db.session.commit()
155 |
156 | # break
157 |
158 |
159 |
160 | print "\n"
161 | print "Outputing to CSV..."
162 | with open(csvFilename.replace(".csv", "-update.csv"), 'wb') as f:
163 | w = csv.DictWriter(f, None)
164 |
165 | w.fieldnames = csvReader.fieldnames
166 | w.writerow(dict((h, h) for h in csvReader.fieldnames))
167 |
168 | for row in input_file:
169 | w.writerow(row)
170 |
171 | print "\n"
172 | print "{} new images have been processed.".format(len(goodImages))
173 | if len(badImages) > 0:
174 | print "The following images have bad licenses: {}".format(", ".join([i["id"] for i in badImages]))
175 | if len(smallImages) > 0:
176 | print "The following images are too small: {}".format(", ".join([i["id"] for i in smallImages]))
177 | if len(removedImages) > 0:
178 | print "The following images have been removed from Flickr: {}".format(", ".join([i for i in removedImages]))
179 | if len(deletedImages) > 0:
180 | print "The following images have been deleted: {}".format(", ".join([i for i in deletedImages]))
181 |
182 |
183 |
184 | if __name__ == '__main__':
185 | read_csv()
186 |
--------------------------------------------------------------------------------
/scripts/flickr/short.py:
--------------------------------------------------------------------------------
1 | '''
2 | This code is taken from the flickrapi project
3 | See https://github.com/rfaulkner/flickrapi/blob/master/flickrapi/shorturl.py
4 | and http://stuvel.eu/flickrapi
5 | '''
6 |
7 | ALPHABET = u'123456789abcdefghijkmnopqrstuvwxyzABCDEFGHJKLMNPQRSTUVWXYZ'
8 | ALPHALEN = len(ALPHABET)
9 | SHORT_URL = u'http://flic.kr/p/%s'
10 |
11 | def encode(photo_id):
12 | '''encode(photo_id) -> short id
13 |
14 | >>> encode(u'4325695128')
15 | '7Afjsu'
16 | >>> encode(u'2811466321')
17 | '5hruZg'
18 | '''
19 |
20 | photo_id = int(photo_id)
21 |
22 | encoded = u''
23 | while photo_id >= ALPHALEN:
24 | div, mod = divmod(photo_id, ALPHALEN)
25 | encoded = ALPHABET[mod] + encoded
26 | photo_id = int(div)
27 |
28 | encoded = ALPHABET[photo_id] + encoded
29 |
30 | return encoded
31 |
--------------------------------------------------------------------------------
/scripts/gen_indicies.py:
--------------------------------------------------------------------------------
1 | '''
2 | Script used to add indexes for PUMS tables
3 | '''
4 | import itertools
5 |
6 | lookup = {
7 | "a": "age",
8 | "b": "birthplace",
9 | "c": "cip",
10 | "d": "degree",
11 | "s": "sector",
12 | "g": "geo",
13 | "i": "naics",
14 | "o": "soc",
15 | "r": "race",
16 | "s": "sex",
17 | "w": "wage_bin",
18 | "y": "year",
19 | }
20 |
21 | tables = [
22 | 'ya',
23 | 'yc',
24 | 'yca',
25 | 'ycb',
26 | 'ycd',
27 | 'ycs',
28 | 'yg',
29 | 'ygb',
30 | 'ygc',
31 | 'ygd',
32 | 'ygi',
33 | 'ygio',
34 | 'ygo',
35 | 'ygor',
36 | 'ygos',
37 | 'ygr',
38 | 'ygs',
39 | 'ygw',
40 | 'yi',
41 | 'yic',
42 | 'yid',
43 | 'yio',
44 | 'yior',
45 | 'yios',
46 | 'yir',
47 | 'yis',
48 | 'yiw',
49 | 'yo',
50 | 'yoas',
51 | 'yoc',
52 | 'yocd',
53 | 'yod',
54 | 'yor',
55 | 'yos',
56 | 'yow',
57 | ]
58 | schema = 'pums_1yr'
59 |
60 | def has_prefix(indexes, index):
61 | for ix in indexes:
62 | if ix.startswith(index):
63 | return True
64 | return False
65 |
66 | def gen_index(table, idx_id, is_pk=False):
67 | cols = [lookup[l] for l in idx_id]
68 | if is_pk:
69 | if "i" in table:
70 | cols.append("naics_level")
71 | if "o" in table:
72 | cols.append("soc_level")
73 | cols = ",".join(cols)
74 | unq = "" if not is_pk else "UNIQUE"
75 | qry = "CREATE {4} INDEX {1}_{2}_idx ON {0}.{1} ({3});".format(schema, table, idx_id, cols, unq)
76 | return qry
77 |
78 | for table in tables:
79 | indexes = []
80 | sizes = range(1, len(table) + 1)
81 | sizes.reverse()
82 | for size in sizes:
83 | tmp = list(itertools.combinations(table, size))
84 | indexes += [''.join(x) for x in tmp if not has_prefix(indexes, ''.join(x))]
85 |
86 | # indexes to create
87 | for index in indexes:
88 | print gen_index(table, index, len(index) == len(table))
89 |
--------------------------------------------------------------------------------
/scripts/get_vnames.py:
--------------------------------------------------------------------------------
1 | import pprint
2 | from datausa.core import registrar
3 | from datausa.database import db
4 |
5 | data={}
6 |
7 | for tbl in registrar.registered_models:
8 | data[tbl.full_name()] = [c.key for c in tbl.__table__.columns]
9 |
10 | pp = pprint.PrettyPrinter(indent=4)
11 | pp.pprint(data)
12 |
--------------------------------------------------------------------------------
/scripts/search/build_index.py:
--------------------------------------------------------------------------------
1 | import os
2 | import os.path
3 | from whoosh import index
4 | from whoosh.fields import Schema, ID, TEXT, NUMERIC, KEYWORD, NGRAMWORDS
5 | from config import SEARCH_INDEX_DIR
6 | from unidecode import unidecode
7 |
8 |
9 | def manual_add(writer, name, display, orig_id, is_stem=False, url_name=None, zoverride=None, kind=u"geo"):
10 | from datausa.attrs.models import Search
11 | kind = unicode(kind)
12 | doc_obj = Search.query.filter_by(id=orig_id, kind=kind).first()
13 | zval = doc_obj.zvalue * 1.5 if not zoverride else zoverride
14 | is_stem = is_stem or doc_obj.is_stem
15 | if not url_name:
16 | url_name = doc_obj.url_name
17 | writer.add_document(id=doc_obj.id, name=name,
18 | display=display, zvalue=zval,
19 | kind=kind, sumlevel=doc_obj.sumlevel,
20 | is_stem=is_stem, url_name=url_name)
21 |
22 |
23 | def get_schema():
24 | return Schema(id=ID(stored=True),
25 | name=NGRAMWORDS(stored=True, minsize=2, maxsize=12, at='start', queryor=True),
26 | display=TEXT(stored=True),
27 | zvalue=NUMERIC(stored=True),
28 | kind=KEYWORD(stored=True),
29 | sumlevel=KEYWORD(stored=True),
30 | is_stem=NUMERIC(stored=True),
31 | url_name=TEXT(stored=True))
32 |
33 |
34 | if __name__ == '__main__':
35 | print "got here!"
36 | print SEARCH_INDEX_DIR
37 | if not os.path.exists(SEARCH_INDEX_DIR):
38 | print "got here2"
39 | os.mkdir(SEARCH_INDEX_DIR)
40 | ix = index.create_in(SEARCH_INDEX_DIR, get_schema())
41 | print "Creating attr index..."
42 |
43 | ix = index.open_dir(SEARCH_INDEX_DIR)
44 | writer = ix.writer()
45 | from datausa.attrs.models import Search
46 | all_objs = Search.query.all()
47 | for obj in all_objs:
48 | dname = obj.display
49 | stem = False if not hasattr(obj, "is_stem") else obj.is_stem
50 | if dname:
51 | dname = unicode(dname)
52 | dname = unidecode(dname)
53 | dname = unicode(dname)
54 | dname = dname.lower().replace(",", "")
55 | dname = dname.replace(".", "")
56 | writer.add_document(id=obj.id, name=dname,
57 | display=obj.display, zvalue=obj.zvalue,
58 | kind=obj.kind, sumlevel=obj.sumlevel,
59 | is_stem=stem, url_name=obj.url_name)
60 |
61 | if obj.keywords:
62 | for keyword in obj.keywords:
63 | writer.add_document(id=obj.id, name=keyword,
64 | display=obj.display, zvalue=obj.zvalue,
65 | kind=obj.kind, sumlevel=obj.sumlevel,
66 | is_stem=stem, url_name=obj.url_name)
67 | # Custom synonyms to help with search
68 | import pandas as pd
69 | attrs_with_aliases = ["geo"]
70 | for kind in attrs_with_aliases:
71 | target_path = os.path.join(SEARCH_INDEX_DIR, "..", "scripts", "search", "{}_aliases.csv".format(kind))
72 | df = pd.read_csv(target_path)
73 | for geo, name, short, zval in df.values:
74 | for alias in short.split(","):
75 | alias = alias.strip()
76 | manual_add(writer, unicode(alias), unicode(name), unicode(geo), zoverride=zval, kind=kind)
77 |
78 | # --
79 | manual_add(writer, u'garbagemen', u'Garbagemen', '537081', kind=u'soc')
80 | manual_add(writer, u'doctors', u'Doctors', '291060', kind=u'soc')
81 | manual_add(writer, u'manhattan', u'Manhattan, NY', '05000US36061', kind=u'geo')
82 | manual_add(writer, u'meteorologists', u'Meteorologists', '192021', kind=u'soc')
83 | manual_add(writer, u'film', u'Motion Pictures & Video Industries', '5121', kind=u'naics')
84 | manual_add(writer, u'movies', u'Motion Pictures & Video Industries', '5121', kind=u'naics')
85 |
86 | writer.commit()
87 |
--------------------------------------------------------------------------------
/scripts/search/build_var_index.py:
--------------------------------------------------------------------------------
1 | import os
2 | import os.path
3 | import json
4 |
5 | from whoosh import index
6 | from whoosh.fields import Schema, TEXT, NGRAMWORDS
7 | from config import VAR_INDEX_DIR
8 |
9 |
10 | def get_schema():
11 | return Schema(related_vars=TEXT(stored=True),
12 | name=NGRAMWORDS(stored=True, minsize=3, maxsize=12, at='start', queryor=True),
13 | description=TEXT(stored=True),
14 | section=TEXT(stored=True),
15 | section_title=TEXT(stored=True),
16 | related_attrs=TEXT(stored=True),
17 | params=TEXT(stored=True))
18 |
19 | if __name__ == '__main__':
20 | print("Building index...")
21 | if not os.path.exists(VAR_INDEX_DIR):
22 | os.mkdir(VAR_INDEX_DIR)
23 | ix = index.create_in(VAR_INDEX_DIR, get_schema())
24 | print("Creating variables index...")
25 |
26 | ix = index.open_dir(VAR_INDEX_DIR)
27 | writer = ix.writer()
28 |
29 | all_vars = [
30 | [u'adult_obesity,diabetes', u'obesity', u'Obesity Prevalence,Diabetes Prevalence', u'conditions_diseases', u'Healthcare', u'geo', None],
31 | [u'adult_obesity,diabetes', u'diabetes', u'Obesity Prevalence,Diabetes Prevalence', u'conditions_diseases', u'Healthcare', u'geo', None],
32 | [u'adult_obesity,diabetes', u'healthcare', u'Obesity Prevalence,Diabetes Prevalence', u'conditions_diseases', u'Healthcare', u'geo', None],
33 | [u'motor_vehicle_crash_deaths', u'car crashes', u'Motor Vehicle Crash Deaths', u'risky', u'Crime', u'geo', None],
34 | [u'motor_vehicle_crash_deaths', u'accidents', u'Motor Vehicle Crash Deaths', u'risky', u'Crime', u'geo', None],
35 |
36 | [u'adult_smoking', u'smokers', u'Adult Smoking Prevalence', u'risky', u'Healthcare', u'geo', None],
37 | [u'adult_smoking', u'cigarettes', u'Adult Smoking Prevalence', u'risky', u'Healthcare', u'geo', None],
38 |
39 | # [u'infant_mortality', u'infant mortality', u'Infant mortality', u'health', u'geo'],
40 | # [u'teen_births', u'teen births', u'Teen births', u'health', u'geo'],
41 | [u'mean_commute_minutes', u'commuters', u'Average Travel Time', u'commute_time', u'Transportation', u'geo', None],
42 | [u'mean_commute_minutes', u'transit', u'Average Travel Time', u'commute_time', u'Transportation', u'geo', None],
43 | [u'mean_commute_minutes', u'transportation', u'Average Travel Time', u'commute_time', u'Transportation', u'geo', None],
44 | [u'mean_commute_minutes', u'travel time', u'Average Travel Time', u'commute_time', u'Transportation', u'geo', None],
45 |
46 | [u'conflict_total', u'veterans', u'Number of Veterans', u'veterans', u'Military', u'geo', None],
47 | [u'conflict_total', u'war', u'Number of Veterans', u'veterans', u'Military', u'geo', None],
48 |
49 | [u'violent_crime', u'crime', u'Violent Crimes', u'crime', u'Crime', u'geo', None],
50 | [u'homicide_rate', u'murder', u'Homicide Deaths', u'crime', u'Crime', u'geo', None],
51 | [u'homicide_rate', u'homicide', u'Homicide Deaths', u'crime', u'Crime', u'geo', None],
52 |
53 | [u'pop,age', u'population', u'Population,Median Age', u'demographics', u'Demographics', u'geo', None],
54 | [u'pop,age', u'people', u'Population,Median Age', u'demographics', u'Demographics', u'geo', None],
55 | [u'age', u'age', u'Median Age', u'demographics', u'Demographics', u'geo', None],
56 | [u'income', u'income', u'Median Yearly Income', u'economy', u'Economy', u'geo', None],
57 | [u'avg_wage', u'salaries', u'Average Salary', u'economy', u'Economy', u'geo,soc,naics,cip', None],
58 | [u'avg_wage', u'wage', u'Average Salary', u'economy', u'Economy', u'geo,soc,naics,cip', None],
59 | [u'income,age,pop', u'economics', u'Median Yearly Income,Age,Population', u'economy', u'Economy', u'geo', None],
60 | # [u'avg_wage', u'wages', u'Wages', u'income_distro', u'geo', None],
61 | [u'median_property_value', u'property value', u'Median Property Value', u'economy', u'Economy', u'geo', None],
62 | [u'median_property_value', u'home value', u'Median Property Value', u'economy', u'Economy', u'geo', None],
63 | [u'median_property_value', u'housing cost', u'Median Property Value', u'economy', u'Economy', u'geo', None],
64 |
65 | [u'income_below_poverty', u'poverti', u'Population in Poverty', u'poverty_age_gender', u'Wages', u'geo', None],
66 | [u'income_below_poverty', u'poor', u'Population in Poverty', u'poverty_age_gender', u'Wages', u'geo', None],
67 |
68 | [u'households_renter_occupied,households_owner_occupied,households', u'renters', u'Renter occupied households', u'rent_own', u'Housing', u'geo', None],
69 | [u'grads_total', u'graduates', u'Degrees Awarded', u'education', u'Housing', u'geo', None],
70 | [u'grads_total', u'grads', u'Degrees Awarded', u'education', u'Housing', u'geo', None],
71 | [u'grads_total', u'students', u'Degrees Awarded', u'education', u'Housing', u'geo', None],
72 |
73 | [u'nativity_foreign,nativity_us', u'foreign born', u'Foreign Born,Native Born', u'demographics', u'Demographics', u'geo', None],
74 | [u'nativity_foreign,nativity_us', u'native born', u'Foreign Born,Native Born', u'demographics', u'Demographics', u'geo', None],
75 |
76 | [u'pop_black,pop_latino,pop_white,pop_asian', u'race ethnicity', u'Black Population,Hispanic Population,White Population,Asian Population', u'ethnicity', u'Heritage', u'geo', None],
77 | [u'us_citizens', u'citizen', u'Citizenship', u'citizenship', u'Heritage', u'geo', None],
78 | [u'gini', u'gini', u'Gini', u'income_distro', u'Wages', u'geo', None],
79 | [u'gini', u'inequality', u'Gini', u'income_distro', u'Wages', u'geo', None],
80 | [u'pop_latino', u'hispanic', u'Hispanic Population', u'ethnicity', u'Heritage', u'geo', None],
81 | [u'pop_latino', u'latino', u'Hispanic Population', u'ethnicity', u'Heritage', u'geo', None],
82 | [u'pop_black', u'black', u'Black Population', u'ethnicity', u'Heritage', u'geo', None],
83 | [u'pop_white', u'white', u'White Population', u'ethnicity', u'Heritage', u'geo', None],
84 | [u'pop_asian', u'asian', u'Asian Population', u'ethnicity', u'Heritage', u'geo', None],
85 | [u'transport_bicycle', u'bicycle', u'Bicycle to Work', u'mode_transport', u'Transportation', u'geo', None],
86 | [u'transport_bicycle', u'bikers', u'Bicycle to Work', u'mode_transport', u'Transportation', u'geo', None],
87 | [u'transport_bicycle', u'cyclist', u'Bicycle to Work', u'mode_transport', u'Transportation', u'geo', None],
88 | [u'transport_carpooled', u'carpool', u'Carpool to Work', u'mode_transport', u'Transportation', u'geo', None],
89 | [u'transport_publictrans', u'public transit', u'Public Transit to Work', u'mode_transport', u'Transportation', u'geo', None],
90 | [u'transport_drove', u'drive', u'Drove Alone to Work', u'mode_transport', u'Transportation', u'geo', None],
91 | [u'transport_drove', u'cars', u'Drove Alone to Work', u'mode_transport', u'Transportation', u'geo', None],
92 | [u'transport_drove', u'drivers', u'Drove Alone to Work', u'mode_transport', u'Transportation', u'geo', None],
93 | [u'transport_taxi', u'taxi', u'Taxi to Work', u'mode_transport', u'Transportation', u'geo', None],
94 | [u'transport_motorcycle', u'motorcycle', u'Motorcycled to Work', u'mode_transport', u'Transportation', u'geo', None],
95 | [u'transport_walked', u'walk', u'Walked to Work', u'mode_transport', u'Transportation', u'geo', None],
96 |
97 | ]
98 |
99 | from datausa.attrs.models import AcsLanguage, PumsBirthplace
100 |
101 | for lang in AcsLanguage.query.all():
102 | my_params = {
103 | "year": "latest",
104 | "language": lang.id
105 | }
106 | my_var = [u'num_speakers', u'{}'.format(lang.name.lower()),
107 | u'{} Speakers'.format(lang.name), u'languages', u'Heritage', u'geo', unicode(json.dumps(my_params))]
108 | print my_var
109 | all_vars.append(my_var)
110 |
111 |
112 | for birthplace in PumsBirthplace.query.filter(~PumsBirthplace.id.startswith("XX"),
113 | ~PumsBirthplace.id.startswith("040")):
114 | if birthplace.id in ["161", "344"]: # skip georgia and car
115 | continue
116 | my_params = {
117 | "year": "latest",
118 | "birthplace": birthplace.id
119 | }
120 | b_keyword = birthplace.demonym or birthplace.name
121 | b_keyword = b_keyword.lower().strip()
122 | b_keyword = " ".join([k for k in b_keyword.split(" ") if len(k) > 3])
123 | my_var = [u'num_over5', u'{}'.format(b_keyword),
124 | u'People Born in {}'.format(birthplace.name.title()), u'heritage', u'Heritage', u'geo', unicode(json.dumps(my_params))]
125 | print my_var
126 | all_vars.append(my_var)
127 |
128 | for related_vars, name, description, section, section_title, related_attrs, params in all_vars:
129 | # print '|{}|{}|{}|'.format(name, description, related_vars)
130 | writer.add_document(related_vars=related_vars, name=name,
131 | description=description, section=section,
132 | section_title=section_title,
133 | related_attrs=related_attrs, params=params)
134 | writer.commit()
135 |
--------------------------------------------------------------------------------
/scripts/search/geo_aliases.csv:
--------------------------------------------------------------------------------
1 | geo,name,short,zval
2 | 01000US,United States,"united states, us, usa",150
3 | 16000US3651000,"New York, NY",nyc,124.7253778
4 | 05000US36047,"Kings County (Brooklyn), NY",brooklyn,15
5 | 16000US0644000,"Los Angeles, CA",la,57.66425865
6 | 16000US4260000,"Philadelphia, PA",philly,23.08155342
7 | 05000US06059,"Orange County, CA",oc,19.442103262
8 | 16000US1150000,"Washington, DC",dc,19.231709584
9 | 16000US3240000,"Las Vegas, NV",vegas,18.810854347
10 | 16000US4055000,"Oklahoma City, OK",okc,18.803290277
11 | 16000US2255000,"New Orleans, LA",nola,15.270642743
12 | 04000US06,California,"Cali, CA",14.616604426
13 | 16000US3915000,"Cincinnati, OH",cinci,14.366834419
14 | 04000US48,Texas,tx,13.860417732
15 | 04000US36,New York,ny,12.961516308
16 | 04000US12,Florida,fl,12.903672701
17 | 04000US17,Illinois,il,10
18 | 04000US42,Pennsylvania,pa,10
19 | 79500US2000500,"Kansas City PUMA, KS",kc,1
20 | 04000US39,Ohio,oh,10
21 | 04000US26,Michigan,mi,10
22 | 04000US13,Georgia,ga,10
23 | 04000US37,North Carolina,nc,10
24 | 04000US34,New Jersey,nj,10
25 | 04000US51,Virginia,va,10
26 | 04000US53,Washington,wa,10
27 | 04000US25,Massachusetts,ma,10
28 | 04000US25,Massachusetts,mass,10
29 | 04000US18,Indiana,in,10
30 | 04000US04,Arizona,az,10
31 | 04000US47,Tennessee,tn,10
32 | 04000US29,Missouri,mo,10
33 | 16000US0931270,"Glastonbury Center, CT",gbury,0.5
34 | 04000US24,Maryland,md,10
35 | 04000US55,Wisconsin,wi,10
36 | 04000US27,Minnesota,mn,10
37 | 04000US08,Colorado,co,10
38 | 04000US01,Alabama,al,10
39 | 04000US45,South Carolina,sc,10
40 | 04000US22,Louisiana,la,10
41 | 04000US21,Kentucky,ky,10
42 | 04000US41,Oregon,or,10
43 | 04000US40,Oklahoma,ok,10
44 | 04000US72,Puerto Rico,pr,10
45 | 04000US09,Connecticut,ct,10
46 | 04000US19,Iowa,ia,10
47 | 04000US28,Mississippi,ms,10
48 | 04000US05,Arkansas,ar,10
49 | 04000US20,Kansas,ks,10
50 | 04000US49,Utah,ut,10
51 | 04000US32,Nevada,nv,10
52 | 04000US35,New Mexico,nm,10
53 | 04000US54,West Virginia,wv,10
54 | 04000US31,Nebraska,ne,10
55 | 04000US16,Idaho,id,10
56 | 04000US15,Hawaii,hi,10
57 | 04000US23,Maine,me,10
58 | 04000US33,New Hampshire,nh,10
59 | 04000US44,Rhode Island,ri,10
60 | 04000US30,Montana,mt,10
61 | 04000US10,Delaware,de,10
62 | 04000US46,South Dakota,sd,10
63 | 04000US02,Alaska,ak,10
64 | 04000US38,North Dakota,nd,10
65 | 04000US50,Vermont,vt,10
66 | 04000US11,District of Columbia,dc,10
67 | 04000US56,Wyoming,wy,10
68 |
--------------------------------------------------------------------------------
/scripts/search/rebuild_index.py:
--------------------------------------------------------------------------------
1 | import os
2 | import shutil
3 | from config import SEARCH_INDEX_DIR, SQLALCHEMY_DATABASE_URI
4 | from sqlalchemy import create_engine
5 |
6 | print("Step 1. Delete old index")
7 | try:
8 | shutil.rmtree(SEARCH_INDEX_DIR)
9 | except OSError:
10 | print("No directory found...continuing...")
11 |
12 | print("Step 2. Refresh Materialized View")
13 | engine = create_engine(SQLALCHEMY_DATABASE_URI)
14 | with engine.begin() as connection:
15 | result = connection.execute("REFRESH MATERIALIZED VIEW attrs.search_v8")
16 | print("Result", result)
17 |
18 | print("Step 3. Rebuild Index")
19 | build_index = os.path.join(SEARCH_INDEX_DIR.replace("search_index/", ""), "scripts", "search", "build_index.py")
20 | result = os.system("python {}".format(build_index))
21 | print("Result", result)
22 |
--------------------------------------------------------------------------------
/scripts/search/zip_lookup.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | '86000US' || zcta5.geoid10 AS child_geoid,
3 | '31000US' || cbsa.geoid AS parent_geoid,
4 | ST_Area(ST_Intersection(zcta5.geom,cbsa.geom))/ST_Area(zcta5.geom)*100 as percent_covered,
5 | ST_Area(cbsa.geom) as parent_area
6 | FROM tiger2013.zcta5
7 | JOIN tiger2013.cbsa ON ST_Intersects(zcta5.geom, cbsa.geom)
8 | WHERE
9 | ST_Area(ST_Intersection(zcta5.geom,cbsa.geom))/ST_Area(zcta5.geom) > 0
10 | UNION
11 | (SELECT
12 | '86000US' || zcta5.geoid10 AS child_geoid,
13 | '16000US' || place.geoid AS parent_geoid,
14 | ST_Area(ST_Intersection(zcta5.geom,place.geom))/ST_Area(zcta5.geom)*100 as percent_covered,
15 | ST_Area(place.geom) as parent_area
16 | FROM tiger2013.zcta5
17 | JOIN tiger2013.place ON ST_Intersects(zcta5.geom, place.geom)
18 | WHERE
19 | ST_Area(ST_Intersection(zcta5.geom,place.geom))/ST_Area(zcta5.geom) > 0
20 | and ST_IsValid(zcta5.geom))
21 | UNION
22 | (SELECT
23 | '86000US' || zcta5.geoid10 AS child_geoid,
24 | '05000US' || county.geoid AS parent_geoid,
25 | ST_Area(ST_Intersection(zcta5.geom,county.geom))/ST_Area(zcta5.geom)*100 as percent_covered,
26 | ST_Area(county.geom) as parent_area
27 | FROM tiger2013.zcta5
28 | JOIN tiger2013.county ON ST_Intersects(zcta5.geom, county.geom)
29 | WHERE
30 | ST_Area(ST_Intersection(zcta5.geom, county.geom))/ST_Area(zcta5.geom) > 0
31 | and ST_IsValid(zcta5.geom))
32 | UNION
33 | (SELECT
34 | '86000US' || zcta5.geoid10 AS child_geoid,
35 | '79500US' || puma.geoid10 AS parent_geoid,
36 | ST_Area(ST_Intersection(zcta5.geom,puma.geom))/ST_Area(zcta5.geom)*100 as percent_covered,
37 | ST_Area(puma.geom) as parent_area
38 | FROM tiger2013.zcta5
39 | JOIN tiger2013.puma ON ST_Intersects(zcta5.geom, puma.geom)
40 | WHERE
41 | ST_Area(ST_Intersection(zcta5.geom, puma.geom))/ST_Area(zcta5.geom) > 0
42 | and ST_IsValid(zcta5.geom))
--------------------------------------------------------------------------------
/scripts/university_abbrev_gen.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import re
3 | from nltk.tokenize import word_tokenize
4 | from nltk.corpus import stopwords
5 |
6 | df = pd.read_csv("schools.csv")
7 | stop_words = set(stopwords.words('english')).union(set(["&", "-"]))
8 |
9 |
10 | def abbrev(x):
11 | x = x.replace("-", " ")
12 | x = re.sub(r"('|:)", "", x)
13 | tokens = word_tokenize(x)
14 | filtered = [w[0] for w in tokens if w not in stop_words]
15 | return "".join(filtered).lower()
16 |
17 |
18 | df['abbrev'] = df.name.apply(abbrev)
19 | df.to_csv("school_abrevs.csv")
20 |
--------------------------------------------------------------------------------
/scripts/update_university_keywords.py:
--------------------------------------------------------------------------------
1 | import click
2 | import pandas as pd
3 | from datausa.attrs.models import University
4 | from datausa.database import db
5 |
6 |
7 | @click.command()
8 | @click.option('--sheet_url', prompt='Spreadsheet URL',
9 | help='The spreadsheet containing the university abbreviation mappings.')
10 | def update_keywords(sheet_url):
11 | abbr_df = pd.read_csv(sheet_url)
12 | univs_by_id = {u.id: u for u in University.query}
13 | for univ_id, name, abbrev in abbr_df.values:
14 | univ_obj = univs_by_id[univ_id]
15 | abbrevs = abbrev.split(",")
16 | if univ_obj.keywords and set(abbrevs) == set(univ_obj.keywords):
17 | # no update required!
18 | pass
19 | else:
20 | univ_obj.keywords = abbrevs
21 | db.session.add(univ_obj)
22 | db.session.commit()
23 |
24 |
25 | if __name__ == '__main__':
26 | update_keywords()
27 |
--------------------------------------------------------------------------------
/scripts/url_names.py:
--------------------------------------------------------------------------------
1 |
2 | from datausa.attrs.models import Geo
3 | from datausa.database import db
4 |
5 | def hyphenate(x):
6 | ctr = {ord(c):u'-' for c in [',', ' ', '-']}
7 | tmp = unicode(x).translate(ctr)
8 | return tmp.replace('--', '-')
9 |
10 | sumlevels = ['160']
11 |
12 | count = 1
13 | for sumlevel in sumlevels:
14 | filters = [Geo.id.startswith(sumlevel)]
15 | objs = Geo.query.filter(*filters).all()
16 | for o in objs:
17 | o.url_name = hyphenate(o.display_name)
18 | print o.url_name
19 | db.session.add(o)
20 |
21 | if count > 10000:
22 | db.session.commit()
23 | count = 1
24 |
25 | db.session.commit()
26 |
27 |
28 |
--------------------------------------------------------------------------------
/search_index/MAIN_WRITELOCK:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataUSA/datausa-api/7288dede082eda07b61e11cf6dc801fe692f6334/search_index/MAIN_WRITELOCK
--------------------------------------------------------------------------------
/search_index/MAIN_hzur5fe2wkrq53me.seg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataUSA/datausa-api/7288dede082eda07b61e11cf6dc801fe692f6334/search_index/MAIN_hzur5fe2wkrq53me.seg
--------------------------------------------------------------------------------
/search_index/_MAIN_1.toc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataUSA/datausa-api/7288dede082eda07b61e11cf6dc801fe692f6334/search_index/_MAIN_1.toc
--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataUSA/datausa-api/7288dede082eda07b61e11cf6dc801fe692f6334/tests/__init__.py
--------------------------------------------------------------------------------
/tests/test_joins.py:
--------------------------------------------------------------------------------
1 | import json
2 | import unittest
3 |
4 | import datausa
5 |
6 |
7 | class TestJoinAPI(unittest.TestCase):
8 |
9 | def setUp(self):
10 | self.app = datausa.app.test_client()
11 |
12 | def get_data(self, url):
13 | req = self.app.get(url)
14 | result = json.loads(req.data)
15 | assert 'data' in result
16 | data = result['data']
17 | headers = result['headers']
18 | return data, headers
19 |
20 | def test_geo_crosswalk(self):
21 | req = self.app.get('/api/join/?required=adult_obesity,income&sumlevel=all&show=geo&where=income.geo:16000US2507000,adult_obesity.sumlevel:county&year=latest&auto_crosswalk=1')
22 | result = json.loads(req.data)
23 | assert 'data' in result
24 | data = result['data']
25 | headers = result['headers']
26 | target_index = headers.index('chr.yg.adult_obesity')
27 | chr_geo_index = headers.index('chr.yg.geo')
28 | first_row = data[0]
29 | assert len(data) == 1
30 | assert first_row[target_index]
31 | assert first_row[chr_geo_index] == '05000US25025'
32 |
33 | def test_join_but_no_geo_crosswalk(self):
34 | req = self.app.get('/api/join/?required=pop_black,pop_white,income&sumlevel=all&show=geo&where=income.geo:16000US2511000&year=latest')
35 | result = json.loads(req.data)
36 | assert 'data' in result
37 | data = result['data']
38 | headers = result['headers']
39 | target_index = headers.index('acs_5yr.yg_race.pop_black')
40 | yg_race_geo_index = headers.index('acs_5yr.yg_race.geo')
41 | first_row = data[0]
42 | assert len(data) == 1
43 | assert first_row[target_index]
44 | assert first_row[yg_race_geo_index] == '16000US2511000'
45 |
46 | def test_display_names(self):
47 | req = self.app.get('/api/join/?required=adult_obesity,income&sumlevel=all&show=geo&where=adult_obesity.geo:04000US25&display_names=1')
48 | result = json.loads(req.data)
49 | assert 'data' in result
50 | data = result['data']
51 | headers = result['headers']
52 | target_index = headers.index('chr.yg.geo_name')
53 | assert target_index >= 0
54 | first_row = data[0]
55 | assert first_row[target_index] == 'Massachusetts'
56 |
57 | def test_limit(self):
58 | url = '/api/join/?required=grads_total&sumlevel=all&show=geo&limit=3'
59 | data, _ = self.get_data(url)
60 | assert len(data) == 3
61 |
62 | def test_geos_crosswalk_3vars(self):
63 | url = '/api/join/?required=adult_obesity,avg_wage,income&sumlevel=all&show=geo&where=income.geo:16000US2507000,adult_obesity.sumlevel:county,grads_total.sumlevel:county&year=latest&auto_crosswalk=1'
64 | data, _ = self.get_data(url)
65 | assert len(data) >= 1
66 |
67 | def test_cip_crosswalk(self):
68 | url = '/api/join/?required=avg_wage,value&sumlevel=all&show=cip&where=value.cip:010000'
69 | data, _ = self.get_data(url)
70 | assert len(data) >= 1
71 |
72 | def test_geos_2vars_latest(self):
73 | url = '/api/join/?required=adult_obesity,income&sumlevel=all&show=geo&where=income.geo:04000US25,adult_obesity.geo:04000US25&year=latest'
74 | data, _ = self.get_data(url)
75 | assert len(data) == 1
76 |
77 | def test_ipeds_acs_geo_join(self):
78 | url = '/api/join/?required=grads_total,income&sumlevel=all&show=geo&where=income.geo:16000US2507000,grads_total.sumlevel:state&year=latest&auto_crosswalk=1'
79 | data, _ = self.get_data(url)
80 | assert len(data) == 1
81 |
82 | def test_puma_to_state(self):
83 | url = '/api/join/?required=avg_wage,grads_total,income&show=geo&where=avg_wage.sumlevel:puma,grads_total.geo:04000US25,avg_wage.geo:79500US2500100,income.sumlevel:state&year=latest&auto_crosswalk=1'
84 | data, _ = self.get_data(url)
85 | assert len(data) == 1
86 |
87 | def test_puma_to_state_and_county(self):
88 | url = '/api/join/?required=avg_wage,grads_total,income&show=geo&where=avg_wage.geo:79500US2500506,grads_total.sumlevel:state,income.sumlevel:county&year=latest&auto_crosswalk=1'
89 | data, _ = self.get_data(url)
90 | assert len(data) == 1
91 |
92 | def test_bug(self):
93 | url = '/api/join/?required=grads_total,adult_obesity&sumlevel=all&show=geo&where=grads_total.geo:16000US2511000,adult_obesity.sumlevel:state&year=latest&auto_crosswalk=1'
94 | data, _ = self.get_data(url)
95 | assert len(data) == 1
96 |
97 | def test_bug2(self):
98 | url = '/api/join/?required=avg_wage,income&show=geo&where=avg_wage.geo:79500US2500506,income.sumlevel:state&year=latest&auto_crosswalk=1'
99 | data, _ = self.get_data(url)
100 | assert len(data) == 1
101 |
102 | def test_national_containment(self):
103 | url='/api/join/?required=grads_total,adult_obesity&sumlevel=all&show=geo&limit=5&where=grads_total.geo:01000US,adult_obesity.sumlevel:county&auto_crosswalk=1'
104 | data, _ = self.get_data(url)
105 | assert len(data) >= 1
106 |
107 | def test_geo_non_crosswalk(self):
108 | url='/api/join/?required=grads_total,adult_obesity&show=geo&limit=1&where=grads_total.geo:16000US2511000&auto_crosswalk=0'
109 | data, headers = self.get_data(url)
110 | target_index = headers.index('chr.yg.geo')
111 | first_row = data[0]
112 | assert first_row[target_index] is None
113 |
114 | def test_cip_crosswalk(self):
115 | url='/api/join/?required=avg_wage,grads_total&show=cip&limit=1&where=grads_total.cip:090401&auto_crosswalk=1'
116 | data, headers = self.get_data(url)
117 | target_index = headers.index('pums_1yr.yc.cip')
118 | first_row = data[0]
119 | assert first_row[target_index] == '09'
120 |
121 | def test_cip_no_crosswalk(self):
122 | url='/api/join/?required=avg_wage,grads_total&show=cip&limit=1&where=grads_total.cip:090401&auto_crosswalk=0'
123 | data, headers = self.get_data(url)
124 | target_index = headers.index('pums_1yr.yc.cip')
125 | first_row = data[0]
126 | assert first_row[target_index] is None
127 |
128 | def test_onet_soc_crosswalk(self):
129 | url='/api/join/?required=avg_wage,value&sumlevel=all&show=soc&limit=5&auto_crosswalk=1&where=avg_wage.soc:1110XX'
130 | data, headers = self.get_data(url)
131 | onet_index = headers.index('onet.skills_by_soc.soc')
132 | pums_index = headers.index('pums_1yr.yo.soc')
133 | first_row = data[0]
134 | assert first_row[onet_index] in ['111000', '110000']
135 | assert first_row[pums_index] == '1110XX'
136 |
137 | def test_onet_soc_no_crosswalk(self):
138 | url='/api/join/?required=avg_wage,value&sumlevel=all&show=soc&limit=5&auto_crosswalk=0&where=avg_wage.soc:1110XX'
139 | data, headers = self.get_data(url)
140 | onet_index = headers.index('onet.skills_by_soc.soc')
141 | pums_index = headers.index('pums_1yr.yo.soc')
142 | first_row = data[0]
143 | assert first_row[onet_index] is None
144 | assert first_row[pums_index] == '1110XX'
145 |
146 | def where_bug(self):
147 | url = 'api/join/?required=income,grads_total&sumlevel=county&show=geo&where=grads_total.degree:5&limit=5'
148 | data, headers = self.get_data(url)
149 | assert len(data) == 5
150 |
151 | def test_naics_xwalk(self):
152 | url = '/api/join/?required=employees_thousands,num_ppl,avg_wage&sumlevel=0&show=naics&limit=5&naics=23&year=latest'
153 | data, headers = self.get_data(url)
154 | bls_index = headers.index('bls.ces_yi.naics')
155 | pums_index = headers.index('pums_1yr.yi.naics')
156 | first_row = data[0]
157 | assert len(data) == 1
158 | assert first_row[bls_index] is not None
159 | assert first_row[pums_index] is not None
160 |
161 | def test_naics_auto_xwalk(self):
162 | url = '/api/join/?required=employees_thousands,num_ppl&show=naics&auto_crosswalk=1&where=num_ppl.naics:71-72&year=latest'
163 | data, headers = self.get_data(url)
164 | bls_index = headers.index('bls.ces_yi.naics')
165 | pums_index = headers.index('pums_1yr.yi.naics')
166 | first_row = data[0]
167 | assert len(data) == 1
168 | assert first_row[bls_index] == '71'
169 | assert first_row[pums_index] == '71-72'
170 |
171 |
172 | def test_pums_names(self):
173 | url = '/api/join/?required=num_ppl&sumlevel=all&show=naics&naics=23&display_names=1'
174 | data, headers = self.get_data(url)
175 | pums_index = headers.index('pums_1yr.yi.naics_name')
176 | first_row = data[0]
177 | assert first_row[pums_index] == 'Construction'
178 |
179 | def test_pums_degree_name(self):
180 | url = '/api/join/?required=num_ppl&sumlevel=all&show=degree&naics=54&display_names=1°ree=21'
181 | data, headers = self.get_data(url)
182 | pums_index = headers.index('pums_1yr.yid.degree_name')
183 | first_row = data[0]
184 | assert first_row[pums_index] == "Bachelor's degree"
185 |
186 | def test_bls_names(self):
187 | url = '/api/join/?required=employees_thousands&sumlevel=all&show=naics&naics=54&display_names=1'
188 | data, headers = self.get_data(url)
189 | pums_index = headers.index('bls.ces_yi.naics_name')
190 | first_row = data[0]
191 | assert first_row[pums_index] == "Professional, Scientific, and Technical Services"
192 |
193 | def test_offset_sort(self):
194 | url = '/api/join/?required=pop&sumlevel=state&show=geo&limit=1&offset=1&display_names=1&year=2014&sort=desc&order=pop'
195 | data, headers = self.get_data(url)
196 | pums_index = headers.index('acs_5yr.yg.geo_name')
197 | first_row = data[0]
198 | assert first_row[pums_index] == "Texas"
199 |
200 | def test_ed_crosswalk_join(self):
201 | url = '/api/join/?show=university&sumlevel=all&required=grads_total,default_rate&limit=1&university=100654'
202 | data, headers = self.get_data(url)
203 | ed_id = headers.index('ed.yu_defaults.university')
204 | ipeds_id = headers.index('ipeds.grads_yu.university')
205 | opeid = headers.index('ed.yu_defaults.opeid')
206 |
207 | first_row = data[0]
208 | assert first_row[ed_id] == first_row[ipeds_id] and first_row[opeid] == "001002"
209 |
210 | if __name__ == '__main__':
211 | unittest.main()
212 |
--------------------------------------------------------------------------------
/tests/test_search.py:
--------------------------------------------------------------------------------
1 | from whoosh.qparser import QueryParser
2 | from whoosh import index, sorting, scoring
3 | from whoosh import qparser, query
4 | from config import SEARCH_INDEX_DIR
5 | import math
6 | import unittest
7 | from datausa.attrs.search import do_search
8 |
9 | ix = index.open_dir(SEARCH_INDEX_DIR)
10 | qp = QueryParser("name", schema=ix.schema, group=qparser.OrGroup)
11 |
12 | facet = sorting.FieldFacet("zvalue", reverse=True)
13 | scores = sorting.ScoreFacet()
14 |
15 | class TestStringMethods(unittest.TestCase):
16 | NY_IDS = ['31000US35620', '05000US36061', '04000US36', '16000US3651000']
17 |
18 | def test_extra_word(self):
19 | data,suggs,tries,my_vars = do_search("new york economy")
20 | self.assertTrue(data[0][0] in self.NY_IDS)
21 |
22 | def test_manhattan(self):
23 | data,suggs,tries,my_vars = do_search("manhattan")
24 | self.assertEqual(data[0][0], "05000US36061")
25 |
26 | def test_exact_match_begin(self):
27 | data,suggs,tries,my_vars = do_search("nome")
28 | self.assertEqual(data[0][0], '16000US0254920')
29 |
30 | def test_ny(self):
31 | data,suggs,tries,my_vars = do_search("new york")
32 | self.assertTrue(data[0][0] in self.NY_IDS)
33 |
34 | def test_doc(self):
35 | data,suggs,tries,my_vars = do_search("doctor")
36 | self.assertEqual(data[0][0], '291060')
37 |
38 | def test_stl(self):
39 | data,suggs,tries,my_vars = do_search("st louis")
40 | self.assertEqual(data[0][0], '16000US2965000')
41 |
42 | def test_fortla(self):
43 | data,suggs,tries,my_vars = do_search("fort lau")
44 | self.assertEqual(data[0][0], '16000US1224000')
45 |
46 | def test_bad_spelling(self):
47 | data,suggs,tries,my_vars = do_search("massachusitt")
48 | self.assertEqual(data[0][0], '04000US25')
49 |
50 | def test_econ(self):
51 | econs = ['193011', '450601', '01000US', '193011']
52 | data,suggs,tries,my_vars = do_search("econ")
53 | self.assertTrue(data[0][0] in econs)
54 |
55 | def test_milford(self):
56 | data,suggs,tries,my_vars = do_search("milford nh")
57 | self.assertEqual(data[0][0], '16000US3347940')
58 |
59 | def test_bevhills(self):
60 | data,suggs,tries,my_vars = do_search("beverly hills")
61 | self.assertEqual(data[0][0], '16000US0606308')
62 |
63 | def test_kind_naics(self):
64 | data,suggs,tries,my_vars = do_search("educat", kind="naics")
65 | self.assertTrue(data[0][0])
66 |
67 | def test_ma(self):
68 | data,suggs,tries,my_vars = do_search("ma")
69 | self.assertEqual(data[0][0], '04000US25')
70 |
71 | def test_ak(self):
72 | data,suggs,tries,my_vars = do_search("ak")
73 | self.assertEqual(data[0][0], '04000US02')
74 |
75 | def test_pa(self):
76 | data,suggs,tries,my_vars = do_search("pa")
77 | self.assertEqual(data[0][0], '04000US42')
78 |
79 | def test_al(self):
80 | data,suggs,tries,my_vars = do_search("al")
81 | self.assertEqual(data[0][0], '04000US01')
82 |
83 | def test_dc(self):
84 | data,suggs,tries,my_vars = do_search("dc")
85 | self.assertEqual(data[0][0], '16000US1150000')
86 |
87 | def test_rny(self):
88 | data,suggs,tries,my_vars = do_search("rochester, ny")
89 | self.assertEqual(data[0][0], '16000US3663000')
90 |
91 | def test_cpmd(self):
92 | data,suggs,tries,my_vars = do_search("college park, md")
93 | self.assertEqual(data[0][0], '16000US2418750')
94 |
95 | def test_moco(self):
96 | data,suggs,tries,my_vars = do_search("montgomery county")
97 | self.assertEqual(data[0][0], '05000US24031')
98 |
99 | def test_pgc(self):
100 | data,suggs,tries,my_vars = do_search("prince georges county")
101 | self.assertEqual(data[0][0], '05000US24033')
102 |
103 | def test_travel_time(self):
104 | data,suggs,tries,my_vars = do_search("travel time")
105 | self.assertEqual(data[0][0], '01000US')
106 |
107 | def test_commute_time(self):
108 | data,suggs,tries,my_vars = do_search("commute time")
109 | self.assertEqual(data[0][0], '01000US')
110 |
111 | def test_boston_travel_time(self):
112 | data,suggs,tries,my_vars = do_search("boston travel time")
113 | self.assertEqual(data[0][0], '16000US2507000')
114 |
115 | def test_nj_travel_time(self):
116 | data,suggs,tries,my_vars = do_search("economy in new jersey")
117 | ids = [row[0] for row in data[:3]]
118 | self.assertTrue('04000US34' in ids)
119 | self.assertEqual(ids[0], '16000US1820152')
120 |
121 | def test_obesity(self):
122 | data,suggs,tries,my_vars = do_search("obesity")
123 | self.assertEqual(data[0][0], '01000US')
124 |
125 | def test_vietnamese_wyoming(self):
126 | data,suggs,tries,my_vars = do_search("vietnamese speakers in wyoming")
127 | ids = [row[0] for row in data]
128 | self.assertTrue('04000US56' in ids[:2])
129 |
130 | def test_polish_chicago(self):
131 | data,suggs,tries,my_vars = do_search("polish speakers in chicago")
132 | self.assertEqual(data[0][0], '16000US1714000')
133 |
134 | def test_native_cambr(self):
135 | data,suggs,tries,my_vars = do_search("native born in cambridge")
136 | self.assertEqual(data[0][0], '16000US2511000')
137 |
138 | def test_fr_cambr(self):
139 | data,suggs,tries,my_vars = do_search("french in cambridge")
140 | self.assertEqual(data[0][0], '16000US2511000')
141 |
142 | def test_chil_nm(self):
143 | data,suggs,tries,my_vars = do_search("chileans in new mexico")
144 | self.assertEqual(data[0][0], '04000US35')
145 |
146 | def test_swiss_nj(self):
147 | data,suggs,tries,my_vars = do_search("swiss in new jersey")
148 | self.assertEqual(data[0][0], '04000US34')
149 |
150 | def test_cuba_montana(self):
151 | data,suggs,tries,my_vars = do_search("cubans in montana")
152 | self.assertEqual(data[0][0], '04000US30')
153 |
154 | def test_il_fl(self):
155 | data,suggs,tries,my_vars = do_search("israelis in florida")
156 | self.assertEqual(data[0][0], '04000US12')
157 |
158 | def test_citizenship_fla(self):
159 | data,suggs,tries,my_vars = do_search("citizenship in florida")
160 | self.assertEqual(data[0][0], '04000US12')
161 |
162 | def test_ga(self):
163 | data,suggs,tries,my_vars = do_search("georgia")
164 | self.assertEqual(data[0][0], '04000US13')
165 |
166 | def test_age(self):
167 | data,suggs,tries,my_vars = do_search("age in chicago")
168 | self.assertEqual(data[0][0], '16000US1714000')
169 | self.assertTrue(len(my_vars) > 0)
170 | self.assertEqual(my_vars[0]['name'], 'age')
171 |
172 | def test_healthcare(self):
173 | data,suggs,tries,my_vars = do_search("healthcare")
174 | self.assertEqual(data[0][0], "01000US")
175 | self.assertEqual(my_vars[0]["name"], "healthcare")
176 | self.assertEqual(my_vars[0]["section"], "conditions_diseases")
177 |
178 | def test_obesity(self):
179 | data,suggs,tries,my_vars = do_search("obesity")
180 | self.assertEqual(data[0][0], "01000US")
181 | self.assertEqual(my_vars[0]["name"], "obesity")
182 | self.assertEqual(my_vars[0]["section"], "conditions_diseases")
183 |
184 | def test_umd(self):
185 | data, suggs, tries, my_vars = do_search("umd")
186 | self.assertEqual(data[0][0], "163286")
187 |
188 | def test_harvard(self):
189 | data, suggs, tries, my_vars = do_search("boston university")
190 | self.assertEqual(data[0][0], "164988")
191 |
192 | def test_neu(self):
193 | data, suggs, tries, my_vars = do_search("neu")
194 | self.assertEqual(data[0][0], "167358")
195 |
196 |
197 | if __name__ == '__main__':
198 | unittest.main()
199 |
--------------------------------------------------------------------------------
/var_index/MAIN_WRITELOCK:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataUSA/datausa-api/7288dede082eda07b61e11cf6dc801fe692f6334/var_index/MAIN_WRITELOCK
--------------------------------------------------------------------------------
/var_index/MAIN_g1c93s1e37q8coxg.seg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataUSA/datausa-api/7288dede082eda07b61e11cf6dc801fe692f6334/var_index/MAIN_g1c93s1e37q8coxg.seg
--------------------------------------------------------------------------------
/var_index/_MAIN_1.toc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataUSA/datausa-api/7288dede082eda07b61e11cf6dc801fe692f6334/var_index/_MAIN_1.toc
--------------------------------------------------------------------------------