├── .gitignore ├── ColumnDesc.txt ├── README.md ├── Tester.ipynb ├── airflow ├── dags │ ├── subdags │ │ └── subdag_for_dimensions.py │ ├── temp_input │ │ └── I94_SAS_Labels_Descriptions.SAS │ ├── temp_output │ │ ├── airport-codes_csv.csv │ │ ├── i94addr.csv │ │ ├── i94cit&i94res.csv │ │ ├── i94mode.csv │ │ ├── i94port.csv │ │ ├── i94visa.csv │ │ └── us-cities-demographics.csv │ └── udacity_capstone.py └── plugins │ ├── __init__.py │ ├── helpers │ ├── __init__.py │ └── sql_queries.py │ └── operators │ ├── __init__.py │ ├── data_quality.py │ ├── sas7bdat_to_parquet.py │ ├── sas_to_csv.py │ ├── stage_redshift.py │ └── transfer_to_s3.py ├── airflow_start.sh ├── config.py ├── cryptosetup.py ├── dwh.cfg ├── env.yml ├── img ├── city_intake.png ├── diff_airports.png ├── graph.png ├── marker.png ├── no_of_immigrants.png ├── pipeline-tree.png ├── pipeline.png ├── schema.PNG └── state_airports.png └── load ├── __init__.py ├── aws_load.py ├── aws_utils.py └── example_usage.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by .ignore support plugin (hsz.mobi) 2 | ### JetBrains template 3 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm 4 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 5 | 6 | # User-specific stuff 7 | .idea/**/workspace.xml 8 | .idea/**/tasks.xml 9 | .idea/**/dictionaries 10 | .idea/**/shelf 11 | 12 | # Sensitive or high-churn files 13 | .idea/**/dataSources/ 14 | .idea/**/dataSources.ids 15 | .idea/**/dataSources.local.xml 16 | .idea/**/sqlDataSources.xml 17 | .idea/**/dynamic.xml 18 | .idea/**/uiDesigner.xml 19 | .idea/**/dbnavigator.xml 20 | 21 | # Gradle 22 | .idea/**/gradle.xml 23 | .idea/**/libraries 24 | 25 | # CMake 26 | cmake-build-debug/ 27 | cmake-build-release/ 28 | 29 | # Mongo Explorer plugin 30 | .idea/**/mongoSettings.xml 31 | 32 | # File-based project format 33 | *.iws 34 | 35 | # IntelliJ 36 | out/ 37 | 38 | # mpeltonen/sbt-idea plugin 39 | .idea_modules/ 40 | 41 | # JIRA plugin 42 | atlassian-ide-plugin.xml 43 | 44 | # Cursive Clojure plugin 45 | .idea/replstate.xml 46 | 47 | # Crashlytics plugin (for Android Studio and IntelliJ) 48 | com_crashlytics_export_strings.xml 49 | crashlytics.properties 50 | crashlytics-build.properties 51 | fabric.properties 52 | 53 | # Editor-based Rest Client 54 | .idea/httpRequests 55 | ### Python template 56 | # Byte-compiled / optimized / DLL files 57 | __pycache__/ 58 | *.py[cod] 59 | *$py.class 60 | 61 | # C extensions 62 | *.so 63 | 64 | # Distribution / packaging 65 | .Python 66 | build/ 67 | develop-eggs/ 68 | dist/ 69 | downloads/ 70 | eggs/ 71 | .eggs/ 72 | lib/ 73 | lib64/ 74 | parts/ 75 | sdist/ 76 | var/ 77 | wheels/ 78 | *.egg-info/ 79 | .installed.cfg 80 | *.egg 81 | MANIFEST 82 | 83 | # PyInstaller 84 | # Usually these files are written by a python script from a template 85 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 86 | *.manifest 87 | *.spec 88 | 89 | # Installer logs 90 | pip-log.txt 91 | pip-delete-this-directory.txt 92 | 93 | # Unit test / coverage reports 94 | htmlcov/ 95 | .tox/ 96 | .coverage 97 | .coverage.* 98 | .cache 99 | nosetests.xml 100 | coverage.xml 101 | *.cover 102 | .hypothesis/ 103 | .pytest_cache/ 104 | 105 | # Translations 106 | *.mo 107 | *.pot 108 | 109 | # Django stuff: 110 | *.log 111 | local_settings.py 112 | db.sqlite3 113 | 114 | # Flask stuff: 115 | instance/ 116 | .webassets-cache 117 | 118 | # Scrapy stuff: 119 | .scrapy 120 | 121 | # Sphinx documentation 122 | docs/_build/ 123 | 124 | # PyBuilder 125 | target/ 126 | 127 | # Jupyter Notebook 128 | .ipynb_checkpoints 129 | 130 | # pyenv 131 | .python-version 132 | 133 | # celery beat schedule file 134 | celerybeat-schedule 135 | 136 | # SageMath parsed files 137 | *.sage.py 138 | 139 | # Environments 140 | .env 141 | .venv 142 | env/ 143 | venv/ 144 | ENV/ 145 | env.bak/ 146 | venv.bak/ 147 | 148 | # Spyder project settings 149 | .spyderproject 150 | .spyproject 151 | 152 | # Rope project settings 153 | .ropeproject 154 | 155 | # mkdocs documentation 156 | /site 157 | 158 | # mypy 159 | .mypy_cache/ 160 | 161 | ### JetBrains template 162 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm 163 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 164 | 165 | # User-specific stuff 166 | .idea/**/workspace.xml 167 | .idea/**/tasks.xml 168 | .idea/**/dictionaries 169 | .idea/**/shelf 170 | 171 | # Sensitive or high-churn files 172 | .idea/**/dataSources/ 173 | .idea/**/dataSources.ids 174 | .idea/**/dataSources.local.xml 175 | .idea/**/sqlDataSources.xml 176 | .idea/**/dynamic.xml 177 | .idea/**/uiDesigner.xml 178 | .idea/**/dbnavigator.xml 179 | 180 | # Gradle 181 | .idea/**/gradle.xml 182 | .idea/**/libraries 183 | 184 | # CMake 185 | cmake-build-debug/ 186 | cmake-build-release/ 187 | 188 | # Mongo Explorer plugin 189 | .idea/**/mongoSettings.xml 190 | 191 | # File-based project format 192 | *.iws 193 | 194 | # IntelliJ 195 | out/ 196 | 197 | # mpeltonen/sbt-idea plugin 198 | .idea_modules/ 199 | 200 | # JIRA plugin 201 | atlassian-ide-plugin.xml 202 | 203 | # Cursive Clojure plugin 204 | .idea/replstate.xml 205 | 206 | # Crashlytics plugin (for Android Studio and IntelliJ) 207 | com_crashlytics_export_strings.xml 208 | crashlytics.properties 209 | crashlytics-build.properties 210 | fabric.properties 211 | 212 | # Editor-based Rest Client 213 | .idea/httpRequests 214 | ### Python template 215 | # Byte-compiled / optimized / DLL files 216 | __pycache__/ 217 | *.py[cod] 218 | *$py.class 219 | 220 | # C extensions 221 | *.so 222 | 223 | # Distribution / packaging 224 | .Python 225 | build/ 226 | develop-eggs/ 227 | dist/ 228 | downloads/ 229 | eggs/ 230 | .eggs/ 231 | lib/ 232 | lib64/ 233 | parts/ 234 | sdist/ 235 | var/ 236 | wheels/ 237 | *.egg-info/ 238 | .installed.cfg 239 | *.egg 240 | MANIFEST 241 | 242 | # PyInstaller 243 | # Usually these files are written by a python script from a template 244 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 245 | *.manifest 246 | *.spec 247 | 248 | # Installer logs 249 | pip-log.txt 250 | pip-delete-this-directory.txt 251 | 252 | # Unit test / coverage reports 253 | htmlcov/ 254 | .tox/ 255 | .coverage 256 | .coverage.* 257 | .cache 258 | nosetests.xml 259 | coverage.xml 260 | *.cover 261 | .hypothesis/ 262 | .pytest_cache/ 263 | 264 | # Translations 265 | *.mo 266 | *.pot 267 | 268 | # Django stuff: 269 | *.log 270 | local_settings.py 271 | db.sqlite3 272 | 273 | # Flask stuff: 274 | instance/ 275 | .webassets-cache 276 | 277 | # Scrapy stuff: 278 | .scrapy 279 | 280 | # Sphinx documentation 281 | docs/_build/ 282 | 283 | # PyBuilder 284 | target/ 285 | 286 | # Jupyter Notebook 287 | .ipynb_checkpoints 288 | 289 | # pyenv 290 | .python-version 291 | 292 | # celery beat schedule file 293 | celerybeat-schedule 294 | 295 | # SageMath parsed files 296 | *.sage.py 297 | 298 | # Environments 299 | .env 300 | .venv 301 | env/ 302 | venv/ 303 | ENV/ 304 | env.bak/ 305 | venv.bak/ 306 | 307 | # Spyder project settings 308 | .spyderproject 309 | .spyproject 310 | 311 | # Rope project settings 312 | .ropeproject 313 | 314 | # mkdocs documentation 315 | /site 316 | 317 | # mypy 318 | .mypy_cache/ 319 | 320 | /extraction/ 321 | /load/ 322 | /test.py 323 | -------------------------------------------------------------------------------- /ColumnDesc.txt: -------------------------------------------------------------------------------- 1 | ColumnName | ColumnDesc | ColumnUse 2 | ----------------|----------------------------------------------------------------------------------------|----------------- 3 | CICID - - 4 | I94YR - 4 digit year - 5 | I94MON - Numeric month - 6 | I94CIT - This format shows all the valid and invalid country codes for processing - 7 | I94RES - This format shows all the valid and invalid country codes for processing - 8 | I94PORT - This format shows all the valid and invalid port codes for processing - 9 | ARRDATE - This is the Arrival Date in the USA - 10 | I94MODE - This is the mode of transport (1 = 'Air';2 = 'Sea';3 = 'Land';9 = 'Not reported') - 11 | I94ADDR - The state codes; everything else goes into 'other' - 12 | DEPDATE - This is the Departure Date from the USA - 13 | I94BIR - Age of Respondent in Years - 14 | I94VISA - Visa codes collapsed into three categories:(1 = Business;2 = Pleasure;3 = Student)*/ - 15 | COUNT - Used for summary statistics - 16 | DTADFILE - Character Date Field - Date added to I-94 Files - CIC does not use 17 | VISAPOST - Department of State where where Visa was issued - CIC does not use 18 | OCCUP - Occupation that will be performed in U.S. - CIC does not use 19 | ENTDEPA - Arrival Flag - admitted or paroled into the U.S. - CIC does not use 20 | ENTDEPD - Departure Flag - Departed, lost I-94 or is deceased - CIC does not use 21 | ENTDEPU - Update Flag - Either apprehended, overstayed, adjusted to perm residence - CIC does not use 22 | MATFLAG - Match flag - Match of arrival and departure records - 23 | BIRYEAR - 4 digit year of birth - 24 | DTADDTO - Character Date Field - Date to which admitted to U.S. (allowed to stay until) - CIC does not use 25 | GENDER - Non-immigrant sex - 26 | INSNUM - INS number - 27 | AIRLINE - Airline used to arrive in U.S. - 28 | ADMNUM - Admission Number - 29 | FLTNO - Flight number of Airline used to arrive in U.S. - 30 | VISATYPE - Class of admission legally admitting the non-immigrant to temporarily stay in U.S. - -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ## Data Engineering Capstone Project for Udacity 4 | 5 | ### Objective 6 | 7 | --- 8 | In this project we are going to work with US immigraton data from the 9 | year 1994. We have facts such as visa types, transport modes, landing 10 | ports, us state codes, country codes. Apart from the sas7bdat formatted 11 | immigration data we have us airport information and us demographics 12 | data. We are going to parse SAS descriptor files for all the dimensions 13 | and sas7bdat files for all the facts. The tools we are going to use here 14 | are Apache Spark, Apache Airflow, Amazon Redshift, Amazon S3. 15 | 16 | We will be reading, parsing and cleaning the data from local file 17 | systems, Amazon S3 and transferring data to redshift tables in AWS. We 18 | will be orchestrating the flow of data through Apache Airflow DAGs. 19 | 20 | Finally we will be using some SQL queries to extract some valuable stats 21 | and graphs from the data itself. 22 | 23 | ### Data Model 24 | 25 | --- 26 | ![alt text](img/schema.PNG) 27 | 28 | ### Data Pipeline 29 | 30 | ___ 31 | ![alt text](img/marker.png) 32 | ![alt text](img/pipeline.png) 33 | ![alt_text](img/pipeline-tree.png) 34 | 35 | ### Installing and starting 36 | 37 | --- 38 | 39 | #### Installing Python Dependencies 40 | You need to install this python dependencies 41 | In Terminal/CommandPrompt: 42 | 43 | without anaconda you can do this: 44 | ``` 45 | $ python3 -m venv virtual-env-name 46 | $ source virtual-env-name/bin/activate 47 | $ pip install -r requirements.txt 48 | ``` 49 | with anaconda you can do this (in Windows): 50 | ``` 51 | $ conda env create -f env.yml 52 | $ source activate 53 | ``` 54 | or (in Others) 55 | ``` 56 | conda create -y -n python==3.6 57 | conda install -f -y -q -n -c conda-forge --file requirements.txt 58 | [source activate/ conda activate] 59 | ``` 60 | #### Fixing/Configuring Airflow 61 | ``` 62 | $ pip install --upgrade Flask 63 | $ pip install zappa 64 | $ mkdir airflow_home 65 | $ export AIRFLOW_HOME=./airflow_home 66 | $ cd airflow_home 67 | $ airflow initdb 68 | $ airflow webserver 69 | $ airflow scheduler 70 | ``` 71 | 72 | #### More Airflow commands 73 | To list existing dags registered with airflow 74 | ``` 75 | $ airflow list_dags 76 | ``` 77 | 78 | #### Secure/Encrypt your connections and hooks 79 | **Run** 80 | ```bash 81 | $ python cryptosetup.py 82 | ``` 83 | copy this key to *airflow.cfg* to paste after 84 | fernet_key = ************ 85 | 86 | #### Setting up connections and variables in Airflow UI for AWS 87 | TODO: There is no code to modify in this exercise. We're going to 88 | create a connection and a variable. 89 | 90 | **S3** 91 | 1. Open your browser to localhost:8080 and open Admin->Variables 92 | 2. Click "Create" 93 | 3. Set "Key" equal to "s3_bucket" and set "Val" equal to "udacity-dend" 94 | 4. Set "Key" equal to "s3_prefix" and set "Val" equal to "data-pipelines" 95 | 5. Click save 96 | 97 | **AWS** 98 | 1. Open Admin->Connections 99 | 2. Click "Create" 100 | 3. Set "Conn Id" to "aws_credentials", "Conn Type" to "Amazon Web Services" 101 | 4. Set "Login" to your aws_access_key_id and "Password" to your aws_secret_key 102 | 5. Click save 103 | 6. If it doesn't work then in "Extra" field put: 104 | {"region_name": "your_aws_region", "aws_access_key_id":"your_aws_access_key_id", "aws_secret_access_key": "your_aws_secret_access_key", "aws_iam_user": "your_created_iam_user"} 105 | 7. These are all you can put: 106 | - aws_account_id: AWS account ID for the connection 107 | - aws_iam_role: AWS IAM role for the connection 108 | - external_id: AWS external ID for the connection 109 | - host: Endpoint URL for the connection 110 | - region_name: AWS region for the connection 111 | - role_arn: AWS role ARN for the connection 112 | 113 | **Redshift** 114 | 1. Open Admin->Connections 115 | 2. Click "Create" 116 | 3. Set "Conn Id" to "redshift", "Conn Type" to "postgres" 117 | 4. Set "Login" to your master_username for your cluster and "Password" 118 | to your master_password for your cluster 119 | 5. Click save 120 | 121 | #### Optional 122 | If you haven't setup your AWS Redshift Cluster yet 123 | (or don't want to create one manually), then use the files 124 | inside 'aws' folder 125 | - To create cluster and IAM role: Run the below code in terminal from 'aws' folder to create your Redshift database and a 126 | iam_role in aws having read access to Amazon S3 and permissions 127 | attached to the created cluster 128 | ```bash 129 | $ python aws_operate.py --action start 130 | ``` 131 | copy the DWH_ENDPOINT for and DWH_ROLE_ARN 132 | for from the print statements 133 | - To create Tables: Run the below code in terminal from project dir to create tables in your Redshift database 134 | in aws 135 | ```bash 136 | $ python create_table.py --host 137 | 138 | - To Stop: Run the below code in terminal from 'aws' directory to destroy your Redshift database and 139 | detach iam_role from the cluster 140 | ```bash 141 | $ python aws_operate.py --action stop 142 | ``` 143 | 144 | ### About the data 145 | 146 | --- 147 | #### I94 Immigration Data: 148 | This data comes from the US National Tourism and Trade Office. 149 | [This](https://travel.trade.gov/research/reports/i94/historical/2016.html) 150 | is where the data comes from. There's a sample file so you can take a look 151 | at the data in csv format before sreading it all in. The report contains 152 | international visitor arrival statistics by world regions and selected 153 | countries (including top 20), type of visa, mode of transportation, 154 | age groups, states visited (first intended address only), and the top 155 | ports of entry (for select countries) 156 | 157 | #### World Temperature Data: 158 | This dataset came from Kaggle. You can read more about it [here](https://www.kaggle.com/berkeleyearth/climate-change-earth-surface-temperature-data). 159 | 160 | #### U.S. City Demographic Data: 161 | This data comes from OpenSoft. You can read more about it [here](https://public.opendatasoft.com/explore/dataset/us-cities-demographics/export/). 162 | 163 | #### Airport Code Table: 164 | This is a simple table of airport codes and corresponding cities. It comes from [here](https://datahub.io/core/airport-codes#data). 165 | 166 | ### Run the project 167 | 168 | --- 169 | 1. Follow all the setup mentioned above 170 | 2. Create a bucket in region 'us-west-2' in Amazon S3 171 | 3. You have to setup all the connections and variables in the Airflow 172 | admin 173 | i. Setup aws connection with user credentials (access_key and 174 | secret_key with login and password). Make sure the region is 'us-west-2' 175 | ii. Setup Redshift connection with user, password, host, port, 176 | schema, db 177 | iii. Setup iam_role for your aws account 178 | iv. Setup variables for 'temp_input', 'temp_output', 'spark_path' (spark 179 | manipulation path for parquet files), sas_file (sas7bdat descriptor 180 | files) 181 | v. Place all the csv inputs inside temp_output directory 182 | vi. Create a folder called 'spark_path' inside \airflow\dags\ 183 | vii. Create variable called 's3_bucket' (make sure the bucket in 184 | AWS is in region 'us-west-2') 185 | 186 | Example: 187 | 188 | | variable | example value | 189 | |:-------------|-------------:| 190 | | iam_role | #### | 191 | | s3_bucket | #### | 192 | | sas_file | /home/workspace/airflow/dags/temp_input/I94_SAS_Labels_Descriptions.SAS | 193 | | spark_path | /home/workspace/airflow/dags/spark_path | 194 | | temp_input | /home/workspace/airflow/dags/temp_input/ | 195 | | temp_output | /home/workspace/airflow/dags/temp_output/ | 196 | 197 | 4. Data Location for input files: 198 | i. Put all your sas7bdat formatted files in temp_input directory 199 | (whenever you want to process/insert them into the db, when you are 200 | done remove the .sas7bdat file/files and drop new files) 201 | ii. Put SAS descriptor file in temp_input directory 202 | iii. Put airport-codes_csv.csv file in temp_output directory 203 | 204 | 205 | ### Test it Yourself! 206 | 207 | --- 208 | 209 | Here are some example queries we test to see the uploaded results into 210 | the Redshift schema 211 | 212 | **Example Queries** 213 | #### City from where immigrants arrived 214 | ``` 215 | SELECT TOP 10 b.port_city, b.port_state_or_country, COUNT(cicid) AS count 216 | FROM project.immigration a INNER JOIN project.i94ports b ON a.i94port=b.port_code 217 | GROUP BY b.port_city, b.port_state_or_country 218 | ORDER BY COUNT(cicid) DESC 219 | ``` 220 | 221 | #### Different kinds of airports 222 | ``` 223 | SELECT top 10 distinct type, count(*) AS count_type 224 | FROM project.airport_codes 225 | WHERE iso_country = 'US' 226 | GROUP BY type 227 | ORDER BY count_type DESC 228 | ``` 229 | 230 | #### Immigrants from different countries 231 | ``` 232 | SELECT top 10 SUBSTRING(b.country_name, 0, 15) as country_name, COUNT(cicid) as count 233 | FROM project.immigration a INNER JOIN project.i94res b ON a.i94res=b.country_code 234 | GROUP BY b.country_name 235 | ORDER BY COUNT(cicid) DESC 236 | ``` 237 | 238 | #### Small airports from different states 239 | ``` 240 | SELECT a.state_name AS State, airports.count AS Count_of_Airports 241 | FROM 242 | (SELECT top 10 distinct substring(iso_region, 4, length(iso_region)) AS state, count(*) 243 | FROM project.airport_codes 244 | WHERE iso_country = 'US' AND type='small_airport' 245 | GROUP BY iso_region) airports INNER JOIN project.i94addr a ON airports.state=a.state_code 246 | ORDER BY airports.count DESC 247 | ``` 248 | 249 | #### Small airport locations 250 | ``` 251 | SELECT a.longitude_deg, a.latitude_deg 252 | FROM project.airport_codes a 253 | WHERE a.iso_country = 'US' AND a.type = 'small_airport' 254 | ``` 255 | ### Stats and Graphs 256 | 257 | --- 258 | #### City from where immigrants arrived 259 | ![alt text](img/city_intake.png) 260 | 261 | #### Different kinds of airports 262 | ![alt_text](img/diff_airports.png) 263 | 264 | #### Immigrants from different countries 265 | ![alt text](img/no_of_immigrants.png) 266 | 267 | #### Small airports from different states 268 | ![alt_text](img/state_airports.png) 269 | 270 | #### Small airports locations in different states 271 | ![alt_text](img/graph.png) 272 | 273 | Scoping the Project 274 | --- 275 | 276 | The purpose is to produce interesting stats from the US immigration 277 | data, airports around the world, and different dimensions such as visa 278 | type, transport mode, nationality etc. 279 | 280 | ### Steps Taken: 281 | The steps taken are in the following order: 282 | **Gather the data**: 283 | This took a while as different kinds of formats were chosen, I 284 | needed to fix my mindset on which data I will actually use in 285 | future for my analysis and queries. I fixated on .sas7bdat 286 | formatted immigration data which fulfills the minimum number of 287 | rows requirements, the cleaned airport data for dimensions and 288 | SAS descriptor file for fulfilling the different kind of formats 289 | to be chosen for the project 290 | **Study the data**: 291 | This took a while as I needed to understand what kind of 292 | pre-processing I would use to clean the individual datasets 293 | mentioned above. Dropping rows on a condition, filtering rows 294 | according to other dimensions and facts etc. 295 | **Choice of infrastructure**: 296 | After studying the data I decided upon certain tools and 297 | technologies, to the point where I am comfortable; I made use of 298 | maximum number of skills that I think I learnt through out the 299 | process. 300 | **Implementation and Testing**: 301 | Once my pipeline started running, I did all kinds of quality 302 | checks to ensure that data is processed correctly and provided a 303 | Jupyter notebook to test the project. 304 | 305 | ### Purpose of Final Data Model: 306 | Gather interesting insights like demographic population based on certain 307 | dimensions based upon some filter conditions. 308 | e.g. 309 | - Compare immigration of different nationalities 310 | - Compare number of airports by state 311 | - Different kinds of airport statistics 312 | - Aggregate flow of immigrants through different cities 313 | 314 | So I am using the airport codes, US immigration data of '94 and 315 | dimensions such as visa type, mode of transport, nationality codes, US 316 | state code information 317 | 318 | 319 | Addressing other scenarios 320 | --- 321 | 322 | ### Data Increased by 100x: 323 | - I am using columnar format of redshift, so querying will not be slower 324 | - Incremental update is provided so that every time full amount is not 325 | inserted everytime. Whenever one wants to insert data into the database 326 | for immigration can just drop their sas7bdat files into the temp_input 327 | folder 328 | - Spark is used where heavy data is read and parsed, so distributed 329 | processing is also involved 330 | - Spark memory and processors is configurable to handle more pressure 331 | - S3 storage is used which is scalable and easily accessible with other 332 | AWS infrastructure 333 | 334 | 335 | ### The pipelines would be run on a daily basis by 7 am every day: 336 | - The pipeline is scheduled as per requirements 337 | 338 | ### The database needed to be accessed by 100+ people: 339 | - People are granted usage on schema, so not everyone but people who 340 | have access to the data can use it as necessary, below are the 341 | necessary commands one you use in Redshift query editor, that's why it 342 | is purely optional to use it as a task in the pipeline: 343 | 344 | We can create a group of users, called _webappusers_, who will use the 345 | use the functionality of the schema but cannot take admin decisions and 346 | we can add individual users with their name and init password. 347 | 348 | ```bash 349 | create group webappusers; 350 | create user webappuser1 password 'webAppuser1pass' in group webappusers; 351 | grant usage on schema project to group webappusers; 352 | ``` 353 | 354 | We can create a group of users called __webdevusers__, who will have 355 | admin privileges on the schema, we can add those individual users with 356 | their name and init password 357 | ``` 358 | create group webdevusers; 359 | create user webappdevuser1 password 'webAppdev1pass' in group webdevusers; 360 | grant all on schema webapp to group webdevusers; 361 | ``` 362 | 363 | Defending Decisions 364 | --- 365 | 366 | ### The choice of tools, technologies: 367 | - Airflow to view, monitor and log flow of information: Extremely useful tool to control end to end ETL processing 368 | - S3 Storage to store data on a large scale: Never complain about storage and most importantly when it stores big data 369 | - Redshift to make advantage of columnar format and faster querying strategies: Query from anywhere and anytime 370 | - Spark for distributed processing of heavy data: Best in-memory faster processing 371 | - Pandas for cleaning data frames: absolutely neccessary 372 | 373 | ### Links for Airflow 374 | 375 | --- 376 | **Context Variables** 377 | https://airflow.apache.org/macros.html 378 | 379 | **Hacks for airflow** 380 | https://medium.com/datareply/airflow-lesser-known-tips-tricks-and-best-practises-cf4d4a90f8f 381 | https://medium.com/handy-tech/airflow-tips-tricks-and-pitfalls-9ba53fba14eb 382 | https://www.astronomer.io/guides/dag-best-practices/ 383 | 384 | ### Technologies Used 385 | 386 | 387 | 388 | 389 | 390 | 391 | 392 | -------------------------------------------------------------------------------- /airflow/dags/subdags/subdag_for_dimensions.py: -------------------------------------------------------------------------------- 1 | from airflow import DAG 2 | from airflow.operators import StageToRedshiftOperator 3 | from helpers import SqlQueries 4 | from airflow.models import Variable 5 | from airflow.hooks.postgres_hook import PostgresHook 6 | from airflow.operators.python_operator import PythonOperator 7 | import logging 8 | import boto3 9 | from airflow.contrib.hooks.aws_hook import AwsHook 10 | 11 | 12 | def load_dimension_subdag( 13 | parent_dag_name, 14 | task_id, 15 | redshift_conn_id, 16 | *args, **kwargs): 17 | """ 18 | A python function with arguments, which creates a dag 19 | :param parent_dag_name: imp ({parent_dag_name}.{task_id}) 20 | :param task_id: imp {task_id} 21 | :param redshift_conn_id: {any connection id} 22 | :param args: {verbose} 23 | :param kwargs: {verbose and context variables} 24 | :return: 25 | """ 26 | dag = DAG( 27 | f"{parent_dag_name}.{task_id}", 28 | **kwargs 29 | ) 30 | 31 | copy_ports = StageToRedshiftOperator( 32 | task_id='copy_ports', 33 | dag=dag, 34 | redshift_conn_id="redshift", 35 | aws_credentials_id="aws_default", 36 | file='i94port.csv', 37 | delimiter=',', 38 | table='i94ports', 39 | s3_bucket=Variable.get("s3_bucket"), 40 | s3_key="csv", 41 | sql_stmt=SqlQueries.copy_csv_cmd, 42 | provide_context=True) 43 | 44 | copy_visa = StageToRedshiftOperator( 45 | task_id='copy_visa', 46 | dag=dag, 47 | redshift_conn_id="redshift", 48 | aws_credentials_id="aws_default", 49 | file='i94visa.csv', 50 | delimiter=',', 51 | table='i94visa', 52 | s3_bucket=Variable.get("s3_bucket"), 53 | s3_key="csv", 54 | sql_stmt=SqlQueries.copy_csv_cmd, 55 | provide_context=True) 56 | 57 | copy_modes = StageToRedshiftOperator( 58 | task_id='copy_modes', 59 | dag=dag, 60 | redshift_conn_id="redshift", 61 | aws_credentials_id="aws_default", 62 | file='i94mode.csv', 63 | delimiter=',', 64 | table='i94mode', 65 | s3_bucket=Variable.get("s3_bucket"), 66 | s3_key="csv", 67 | sql_stmt=SqlQueries.copy_csv_cmd, 68 | provide_context=True) 69 | 70 | copy_addr = StageToRedshiftOperator( 71 | task_id='copy_addr', 72 | dag=dag, 73 | redshift_conn_id="redshift", 74 | aws_credentials_id="aws_default", 75 | file='i94addr.csv', 76 | delimiter=',', 77 | table='i94addr', 78 | s3_bucket=Variable.get("s3_bucket"), 79 | s3_key="csv", 80 | sql_stmt=SqlQueries.copy_csv_cmd, 81 | provide_context=True) 82 | 83 | copy_country_codes = StageToRedshiftOperator( 84 | task_id='copy_country_codes', 85 | dag=dag, 86 | redshift_conn_id="redshift", 87 | aws_credentials_id="aws_default", 88 | file='i94cit&i94res.csv', 89 | delimiter=',', 90 | table='i94res', 91 | s3_bucket=Variable.get("s3_bucket"), 92 | s3_key="csv", 93 | sql_stmt=SqlQueries.copy_csv_cmd, 94 | provide_context=True) 95 | 96 | copy_cities_demographics = StageToRedshiftOperator( 97 | task_id='copy_cities_demographics', 98 | dag=dag, 99 | redshift_conn_id="redshift", 100 | aws_credentials_id="aws_default", 101 | file='us-cities-demographics.csv', 102 | delimiter=';', 103 | table='us_cities_demographics', 104 | s3_bucket=Variable.get("s3_bucket"), 105 | s3_key="csv", 106 | sql_stmt=SqlQueries.copy_csv_cmd, 107 | provide_context=True) 108 | 109 | copy_airports = StageToRedshiftOperator( 110 | task_id='copy_airports', 111 | dag=dag, 112 | redshift_conn_id="redshift", 113 | aws_credentials_id="aws_default", 114 | file='airport-codes_csv.csv', 115 | delimiter=',', 116 | table='airport_codes', 117 | s3_bucket=Variable.get("s3_bucket"), 118 | s3_key="csv", 119 | sql_stmt=SqlQueries.copy_csv_cmd, 120 | provide_context=True) 121 | 122 | def parquet_to_redshift(table, s3_bucket, s3_key, iam_role, 123 | sql_stmt, redshift_conn_id, **kwargs): 124 | """ 125 | This function reads parquet files and copies them to redshift 126 | schema.db 127 | :param table: 128 | :param s3_bucket: 129 | :param s3_key: 130 | :param iam_role: 131 | :param sql_stmt: 132 | :param redshift_conn_id: 133 | :param kwargs: 134 | :return: 135 | """ 136 | redshift = PostgresHook(postgres_conn_id=redshift_conn_id) 137 | logging.info("Copying data from S3 to Redshift") 138 | s3_path = "s3://{}/{}".format(s3_bucket, s3_key) 139 | formatted_sql = sql_stmt.format( 140 | table, 141 | s3_path, 142 | iam_role 143 | ) 144 | redshift.run(formatted_sql) 145 | aws_hook = AwsHook("aws_default") 146 | credentials = aws_hook.get_credentials() 147 | client = boto3.client('s3', 148 | aws_access_key_id=credentials.access_key, 149 | aws_secret_access_key=credentials.secret_key) 150 | objects_to_delete = client.list_objects( 151 | Bucket=Variable.get("s3_bucket"), Prefix="parquet") 152 | delete_keys = {'Objects': []} 153 | delete_keys['Objects'] = [{'Key': k} for k in 154 | [obj['Key'] for obj in 155 | objects_to_delete.get('Contents', 156 | [])]] 157 | client.delete_objects(Bucket=Variable.get("s3_bucket"), 158 | Delete=delete_keys) 159 | 160 | copy_immigration = PythonOperator( 161 | task_id='copy_immigration', 162 | python_callable=parquet_to_redshift, # changed 163 | provide_context=True, 164 | op_kwargs={'table': "immigration", 165 | 's3_bucket': Variable.get("s3_bucket"), 166 | 's3_key': 'parquet', 167 | 'iam_role': Variable.get('iam_role'), 168 | 'sql_stmt': SqlQueries.copy_parquet_cmd, 169 | 'redshift_conn_id': 'redshift'}, 170 | dag=dag 171 | ) 172 | 173 | copy_ports 174 | copy_visa 175 | copy_modes 176 | copy_addr 177 | copy_country_codes 178 | copy_airports 179 | copy_cities_demographics 180 | copy_immigration 181 | 182 | return dag -------------------------------------------------------------------------------- /airflow/dags/temp_input/I94_SAS_Labels_Descriptions.SAS: -------------------------------------------------------------------------------- 1 | libname library 'Your file location' ; 2 | proc format library=library ; 3 | 4 | /* I94YR - 4 digit year */ 5 | 6 | /* I94MON - Numeric month */ 7 | 8 | /* I94CIT & I94RES - This format shows all the valid and invalid codes for processing */ 9 | value i94cntyl 10 | 582 = 'MEXICO Air Sea, and Not Reported (I-94, no land arrivals)' 11 | 236 = 'AFGHANISTAN' 12 | 101 = 'ALBANIA' 13 | 316 = 'ALGERIA' 14 | 102 = 'ANDORRA' 15 | 324 = 'ANGOLA' 16 | 529 = 'ANGUILLA' 17 | 518 = 'ANTIGUA-BARBUDA' 18 | 687 = 'ARGENTINA ' 19 | 151 = 'ARMENIA' 20 | 532 = 'ARUBA' 21 | 438 = 'AUSTRALIA' 22 | 103 = 'AUSTRIA' 23 | 152 = 'AZERBAIJAN' 24 | 512 = 'BAHAMAS' 25 | 298 = 'BAHRAIN' 26 | 274 = 'BANGLADESH' 27 | 513 = 'BARBADOS' 28 | 104 = 'BELGIUM' 29 | 581 = 'BELIZE' 30 | 386 = 'BENIN' 31 | 509 = 'BERMUDA' 32 | 153 = 'BELARUS' 33 | 242 = 'BHUTAN' 34 | 688 = 'BOLIVIA' 35 | 717 = 'BONAIRE, ST EUSTATIUS, SABA' 36 | 164 = 'BOSNIA-HERZEGOVINA' 37 | 336 = 'BOTSWANA' 38 | 689 = 'BRAZIL' 39 | 525 = 'BRITISH VIRGIN ISLANDS' 40 | 217 = 'BRUNEI' 41 | 105 = 'BULGARIA' 42 | 393 = 'BURKINA FASO' 43 | 243 = 'BURMA' 44 | 375 = 'BURUNDI' 45 | 310 = 'CAMEROON' 46 | 326 = 'CAPE VERDE' 47 | 526 = 'CAYMAN ISLANDS' 48 | 383 = 'CENTRAL AFRICAN REPUBLIC' 49 | 384 = 'CHAD' 50 | 690 = 'CHILE' 51 | 245 = 'CHINA, PRC' 52 | 721 = 'CURACAO' 53 | 270 = 'CHRISTMAS ISLAND' 54 | 271 = 'COCOS ISLANDS' 55 | 691 = 'COLOMBIA' 56 | 317 = 'COMOROS' 57 | 385 = 'CONGO' 58 | 467 = 'COOK ISLANDS' 59 | 575 = 'COSTA RICA' 60 | 165 = 'CROATIA' 61 | 584 = 'CUBA' 62 | 218 = 'CYPRUS' 63 | 140 = 'CZECH REPUBLIC' 64 | 723 = 'FAROE ISLANDS (PART OF DENMARK)' 65 | 108 = 'DENMARK' 66 | 322 = 'DJIBOUTI' 67 | 519 = 'DOMINICA' 68 | 585 = 'DOMINICAN REPUBLIC' 69 | 240 = 'EAST TIMOR' 70 | 692 = 'ECUADOR' 71 | 368 = 'EGYPT' 72 | 576 = 'EL SALVADOR' 73 | 399 = 'EQUATORIAL GUINEA' 74 | 372 = 'ERITREA' 75 | 109 = 'ESTONIA' 76 | 369 = 'ETHIOPIA' 77 | 604 = 'FALKLAND ISLANDS' 78 | 413 = 'FIJI' 79 | 110 = 'FINLAND' 80 | 111 = 'FRANCE' 81 | 601 = 'FRENCH GUIANA' 82 | 411 = 'FRENCH POLYNESIA' 83 | 387 = 'GABON' 84 | 338 = 'GAMBIA' 85 | 758 = 'GAZA STRIP' 86 | 154 = 'GEORGIA' 87 | 112 = 'GERMANY' 88 | 339 = 'GHANA' 89 | 143 = 'GIBRALTAR' 90 | 113 = 'GREECE' 91 | 520 = 'GRENADA' 92 | 507 = 'GUADELOUPE' 93 | 577 = 'GUATEMALA' 94 | 382 = 'GUINEA' 95 | 327 = 'GUINEA-BISSAU' 96 | 603 = 'GUYANA' 97 | 586 = 'HAITI' 98 | 726 = 'HEARD AND MCDONALD IS.' 99 | 149 = 'HOLY SEE/VATICAN' 100 | 528 = 'HONDURAS' 101 | 206 = 'HONG KONG' 102 | 114 = 'HUNGARY' 103 | 115 = 'ICELAND' 104 | 213 = 'INDIA' 105 | 759 = 'INDIAN OCEAN AREAS (FRENCH)' 106 | 729 = 'INDIAN OCEAN TERRITORY' 107 | 204 = 'INDONESIA' 108 | 249 = 'IRAN' 109 | 250 = 'IRAQ' 110 | 116 = 'IRELAND' 111 | 251 = 'ISRAEL' 112 | 117 = 'ITALY' 113 | 388 = 'IVORY COAST' 114 | 514 = 'JAMAICA' 115 | 209 = 'JAPAN' 116 | 253 = 'JORDAN' 117 | 201 = 'KAMPUCHEA' 118 | 155 = 'KAZAKHSTAN' 119 | 340 = 'KENYA' 120 | 414 = 'KIRIBATI' 121 | 732 = 'KOSOVO' 122 | 272 = 'KUWAIT' 123 | 156 = 'KYRGYZSTAN' 124 | 203 = 'LAOS' 125 | 118 = 'LATVIA' 126 | 255 = 'LEBANON' 127 | 335 = 'LESOTHO' 128 | 370 = 'LIBERIA' 129 | 381 = 'LIBYA' 130 | 119 = 'LIECHTENSTEIN' 131 | 120 = 'LITHUANIA' 132 | 121 = 'LUXEMBOURG' 133 | 214 = 'MACAU' 134 | 167 = 'MACEDONIA' 135 | 320 = 'MADAGASCAR' 136 | 345 = 'MALAWI' 137 | 273 = 'MALAYSIA' 138 | 220 = 'MALDIVES' 139 | 392 = 'MALI' 140 | 145 = 'MALTA' 141 | 472 = 'MARSHALL ISLANDS' 142 | 511 = 'MARTINIQUE' 143 | 389 = 'MAURITANIA' 144 | 342 = 'MAURITIUS' 145 | 760 = 'MAYOTTE (AFRICA - FRENCH)' 146 | 473 = 'MICRONESIA, FED. STATES OF' 147 | 157 = 'MOLDOVA' 148 | 122 = 'MONACO' 149 | 299 = 'MONGOLIA' 150 | 735 = 'MONTENEGRO' 151 | 521 = 'MONTSERRAT' 152 | 332 = 'MOROCCO' 153 | 329 = 'MOZAMBIQUE' 154 | 371 = 'NAMIBIA' 155 | 440 = 'NAURU' 156 | 257 = 'NEPAL' 157 | 123 = 'NETHERLANDS' 158 | 508 = 'NETHERLANDS ANTILLES' 159 | 409 = 'NEW CALEDONIA' 160 | 464 = 'NEW ZEALAND' 161 | 579 = 'NICARAGUA' 162 | 390 = 'NIGER' 163 | 343 = 'NIGERIA' 164 | 470 = 'NIUE' 165 | 275 = 'NORTH KOREA' 166 | 124 = 'NORWAY' 167 | 256 = 'OMAN' 168 | 258 = 'PAKISTAN' 169 | 474 = 'PALAU' 170 | 743 = 'PALESTINE' 171 | 504 = 'PANAMA' 172 | 441 = 'PAPUA NEW GUINEA' 173 | 693 = 'PARAGUAY' 174 | 694 = 'PERU' 175 | 260 = 'PHILIPPINES' 176 | 416 = 'PITCAIRN ISLANDS' 177 | 107 = 'POLAND' 178 | 126 = 'PORTUGAL' 179 | 297 = 'QATAR' 180 | 748 = 'REPUBLIC OF SOUTH SUDAN' 181 | 321 = 'REUNION' 182 | 127 = 'ROMANIA' 183 | 158 = 'RUSSIA' 184 | 376 = 'RWANDA' 185 | 128 = 'SAN MARINO' 186 | 330 = 'SAO TOME AND PRINCIPE' 187 | 261 = 'SAUDI ARABIA' 188 | 391 = 'SENEGAL' 189 | 142 = 'SERBIA AND MONTENEGRO' 190 | 745 = 'SERBIA' 191 | 347 = 'SEYCHELLES' 192 | 348 = 'SIERRA LEONE' 193 | 207 = 'SINGAPORE' 194 | 141 = 'SLOVAKIA' 195 | 166 = 'SLOVENIA' 196 | 412 = 'SOLOMON ISLANDS' 197 | 397 = 'SOMALIA' 198 | 373 = 'SOUTH AFRICA' 199 | 276 = 'SOUTH KOREA' 200 | 129 = 'SPAIN' 201 | 244 = 'SRI LANKA' 202 | 346 = 'ST. HELENA' 203 | 522 = 'ST. KITTS-NEVIS' 204 | 523 = 'ST. LUCIA' 205 | 502 = 'ST. PIERRE AND MIQUELON' 206 | 524 = 'ST. VINCENT-GRENADINES' 207 | 716 = 'SAINT BARTHELEMY' 208 | 736 = 'SAINT MARTIN' 209 | 749 = 'SAINT MAARTEN' 210 | 350 = 'SUDAN' 211 | 602 = 'SURINAME' 212 | 351 = 'SWAZILAND' 213 | 130 = 'SWEDEN' 214 | 131 = 'SWITZERLAND' 215 | 262 = 'SYRIA' 216 | 268 = 'TAIWAN' 217 | 159 = 'TAJIKISTAN' 218 | 353 = 'TANZANIA' 219 | 263 = 'THAILAND' 220 | 304 = 'TOGO' 221 | 417 = 'TONGA' 222 | 516 = 'TRINIDAD AND TOBAGO' 223 | 323 = 'TUNISIA' 224 | 264 = 'TURKEY' 225 | 161 = 'TURKMENISTAN' 226 | 527 = 'TURKS AND CAICOS ISLANDS' 227 | 420 = 'TUVALU' 228 | 352 = 'UGANDA' 229 | 162 = 'UKRAINE' 230 | 296 = 'UNITED ARAB EMIRATES' 231 | 135 = 'UNITED KINGDOM' 232 | 695 = 'URUGUAY' 233 | 163 = 'UZBEKISTAN' 234 | 410 = 'VANUATU' 235 | 696 = 'VENEZUELA' 236 | 266 = 'VIETNAM' 237 | 469 = 'WALLIS AND FUTUNA ISLANDS' 238 | 757 = 'WEST INDIES (FRENCH)' 239 | 333 = 'WESTERN SAHARA' 240 | 465 = 'WESTERN SAMOA' 241 | 216 = 'YEMEN' 242 | 139 = 'YUGOSLAVIA' 243 | 301 = 'ZAIRE' 244 | 344 = 'ZAMBIA' 245 | 315 = 'ZIMBABWE' 246 | 403 = 'INVALID: AMERICAN SAMOA' 247 | 712 = 'INVALID: ANTARCTICA' 248 | 700 = 'INVALID: BORN ON BOARD SHIP' 249 | 719 = 'INVALID: BOUVET ISLAND (ANTARCTICA/NORWAY TERR.)' 250 | 574 = 'INVALID: CANADA' 251 | 720 = 'INVALID: CANTON AND ENDERBURY ISLS' 252 | 106 = 'INVALID: CZECHOSLOVAKIA' 253 | 739 = 'INVALID: DRONNING MAUD LAND (ANTARCTICA-NORWAY)' 254 | 394 = 'INVALID: FRENCH SOUTHERN AND ANTARCTIC' 255 | 501 = 'INVALID: GREENLAND' 256 | 404 = 'INVALID: GUAM' 257 | 730 = 'INVALID: INTERNATIONAL WATERS' 258 | 731 = 'INVALID: JOHNSON ISLAND' 259 | 471 = 'INVALID: MARIANA ISLANDS, NORTHERN' 260 | 737 = 'INVALID: MIDWAY ISLANDS' 261 | 753 = 'INVALID: MINOR OUTLYING ISLANDS - USA' 262 | 740 = 'INVALID: NEUTRAL ZONE (S. ARABIA/IRAQ)' 263 | 710 = 'INVALID: NON-QUOTA IMMIGRANT' 264 | 505 = 'INVALID: PUERTO RICO' 265 | 0 = 'INVALID: STATELESS' 266 | 705 = 'INVALID: STATELESS' 267 | 583 = 'INVALID: UNITED STATES' 268 | 407 = 'INVALID: UNITED STATES' 269 | 999 = 'INVALID: UNKNOWN' 270 | 239 = 'INVALID: UNKNOWN COUNTRY' 271 | 134 = 'INVALID: USSR' 272 | 506 = 'INVALID: U.S. VIRGIN ISLANDS' 273 | 755 = 'INVALID: WAKE ISLAND' 274 | 311 = 'Collapsed Tanzania (should not show)' 275 | 741 = 'Collapsed Curacao (should not show)' 276 | 54 = 'No Country Code (54)' 277 | 100 = 'No Country Code (100)' 278 | 187 = 'No Country Code (187)' 279 | 190 = 'No Country Code (190)' 280 | 200 = 'No Country Code (200)' 281 | 219 = 'No Country Code (219)' 282 | 238 = 'No Country Code (238)' 283 | 277 = 'No Country Code (277)' 284 | 293 = 'No Country Code (293)' 285 | 300 = 'No Country Code (300)' 286 | 319 = 'No Country Code (319)' 287 | 365 = 'No Country Code (365)' 288 | 395 = 'No Country Code (395)' 289 | 400 = 'No Country Code (400)' 290 | 485 = 'No Country Code (485)' 291 | 503 = 'No Country Code (503)' 292 | 589 = 'No Country Code (589)' 293 | 592 = 'No Country Code (592)' 294 | 791 = 'No Country Code (791)' 295 | 849 = 'No Country Code (849)' 296 | 914 = 'No Country Code (914)' 297 | 944 = 'No Country Code (944)' 298 | 996 = 'No Country Code (996)' ; 299 | 300 | 301 | /* I94PORT - This format shows all the valid and invalid codes for processing */ 302 | value $i94prtl 303 | 'ALC' = 'ALCAN, AK ' 304 | 'ANC' = 'ANCHORAGE, AK ' 305 | 'BAR' = 'BAKER AAF - BAKER ISLAND, AK' 306 | 'DAC' = 'DALTONS CACHE, AK ' 307 | 'PIZ' = 'DEW STATION PT LAY DEW, AK' 308 | 'DTH' = 'DUTCH HARBOR, AK ' 309 | 'EGL' = 'EAGLE, AK ' 310 | 'FRB' = 'FAIRBANKS, AK ' 311 | 'HOM' = 'HOMER, AK ' 312 | 'HYD' = 'HYDER, AK ' 313 | 'JUN' = 'JUNEAU, AK ' 314 | '5KE' = 'KETCHIKAN, AK' 315 | 'KET' = 'KETCHIKAN, AK ' 316 | 'MOS' = 'MOSES POINT INTERMEDIATE, AK' 317 | 'NIK' = 'NIKISKI, AK ' 318 | 'NOM' = 'NOM, AK ' 319 | 'PKC' = 'POKER CREEK, AK ' 320 | 'ORI' = 'PORT LIONS SPB, AK' 321 | 'SKA' = 'SKAGWAY, AK ' 322 | 'SNP' = 'ST. PAUL ISLAND, AK' 323 | 'TKI' = 'TOKEEN, AK' 324 | 'WRA' = 'WRANGELL, AK ' 325 | 'HSV' = 'MADISON COUNTY - HUNTSVILLE, AL' 326 | 'MOB' = 'MOBILE, AL ' 327 | 'LIA' = 'LITTLE ROCK, AR (BPS)' 328 | 'ROG' = 'ROGERS ARPT, AR' 329 | 'DOU' = 'DOUGLAS, AZ ' 330 | 'LUK' = 'LUKEVILLE, AZ ' 331 | 'MAP' = 'MARIPOSA AZ ' 332 | 'NAC' = 'NACO, AZ ' 333 | 'NOG' = 'NOGALES, AZ ' 334 | 'PHO' = 'PHOENIX, AZ ' 335 | 'POR' = 'PORTAL, AZ' 336 | 'SLU' = 'SAN LUIS, AZ ' 337 | 'SAS' = 'SASABE, AZ ' 338 | 'TUC' = 'TUCSON, AZ ' 339 | 'YUI' = 'YUMA, AZ ' 340 | 'AND' = 'ANDRADE, CA ' 341 | 'BUR' = 'BURBANK, CA' 342 | 'CAL' = 'CALEXICO, CA ' 343 | 'CAO' = 'CAMPO, CA ' 344 | 'FRE' = 'FRESNO, CA ' 345 | 'ICP' = 'IMPERIAL COUNTY, CA ' 346 | 'LNB' = 'LONG BEACH, CA ' 347 | 'LOS' = 'LOS ANGELES, CA ' 348 | 'BFL' = 'MEADOWS FIELD - BAKERSFIELD, CA' 349 | 'OAK' = 'OAKLAND, CA ' 350 | 'ONT' = 'ONTARIO, CA' 351 | 'OTM' = 'OTAY MESA, CA ' 352 | 'BLT' = 'PACIFIC, HWY. STATION, CA ' 353 | 'PSP' = 'PALM SPRINGS, CA' 354 | 'SAC' = 'SACRAMENTO, CA ' 355 | 'SLS' = 'SALINAS, CA (BPS)' 356 | 'SDP' = 'SAN DIEGO, CA' 357 | 'SFR' = 'SAN FRANCISCO, CA ' 358 | 'SNJ' = 'SAN JOSE, CA ' 359 | 'SLO' = 'SAN LUIS OBISPO, CA ' 360 | 'SLI' = 'SAN LUIS OBISPO, CA (BPS)' 361 | 'SPC' = 'SAN PEDRO, CA ' 362 | 'SYS' = 'SAN YSIDRO, CA ' 363 | 'SAA' = 'SANTA ANA, CA ' 364 | 'STO' = 'STOCKTON, CA (BPS)' 365 | 'TEC' = 'TECATE, CA ' 366 | 'TRV' = 'TRAVIS-AFB, CA ' 367 | 'APA' = 'ARAPAHOE COUNTY, CO' 368 | 'ASE' = 'ASPEN, CO #ARPT' 369 | 'COS' = 'COLORADO SPRINGS, CO' 370 | 'DEN' = 'DENVER, CO ' 371 | 'DRO' = 'LA PLATA - DURANGO, CO' 372 | 'BDL' = 'BRADLEY INTERNATIONAL, CT' 373 | 'BGC' = 'BRIDGEPORT, CT ' 374 | 'GRT' = 'GROTON, CT ' 375 | 'HAR' = 'HARTFORD, CT ' 376 | 'NWH' = 'NEW HAVEN, CT ' 377 | 'NWL' = 'NEW LONDON, CT ' 378 | 'TST' = 'NEWINGTON DATA CENTER TEST, CT' 379 | 'WAS' = 'WASHINGTON DC ' 380 | 'DOV' = 'DOVER AFB, DE' 381 | 'DVD' = 'DOVER-AFB, DE ' 382 | 'WLL' = 'WILMINGTON, DE ' 383 | 'BOC' = 'BOCAGRANDE, FL ' 384 | 'SRQ' = 'BRADENTON - SARASOTA, FL' 385 | 'CAN' = 'CAPE CANAVERAL, FL ' 386 | 'DAB' = 'DAYTONA BEACH INTERNATIONAL, FL' 387 | 'FRN' = 'FERNANDINA, FL ' 388 | 'FTL' = 'FORT LAUDERDALE, FL ' 389 | 'FMY' = 'FORT MYERS, FL ' 390 | 'FPF' = 'FORT PIERCE, FL ' 391 | 'HUR' = 'HURLBURT FIELD, FL' 392 | 'GNV' = 'J R ALISON MUNI - GAINESVILLE, FL' 393 | 'JAC' = 'JACKSONVILLE, FL ' 394 | 'KEY' = 'KEY WEST, FL ' 395 | 'LEE' = 'LEESBURG MUNICIPAL AIRPORT, FL' 396 | 'MLB' = 'MELBOURNE, FL' 397 | 'MIA' = 'MIAMI, FL ' 398 | 'APF' = 'NAPLES, FL #ARPT' 399 | 'OPF' = 'OPA LOCKA, FL' 400 | 'ORL' = 'ORLANDO, FL ' 401 | 'PAN' = 'PANAMA CITY, FL ' 402 | 'PEN' = 'PENSACOLA, FL ' 403 | 'PCF' = 'PORT CANAVERAL, FL ' 404 | 'PEV' = 'PORT EVERGLADES, FL ' 405 | 'PSJ' = 'PORT ST JOE, FL ' 406 | 'SFB' = 'SANFORD, FL ' 407 | 'SGJ' = 'ST AUGUSTINE ARPT, FL' 408 | 'SAU' = 'ST AUGUSTINE, FL ' 409 | 'FPR' = 'ST LUCIE COUNTY, FL' 410 | 'SPE' = 'ST PETERSBURG, FL ' 411 | 'TAM' = 'TAMPA, FL ' 412 | 'WPB' = 'WEST PALM BEACH, FL ' 413 | 'ATL' = 'ATLANTA, GA ' 414 | 'BRU' = 'BRUNSWICK, GA ' 415 | 'AGS' = 'BUSH FIELD - AUGUSTA, GA' 416 | 'SAV' = 'SAVANNAH, GA ' 417 | 'AGA' = 'AGANA, GU ' 418 | 'HHW' = 'HONOLULU, HI ' 419 | 'OGG' = 'KAHULUI - MAUI, HI' 420 | 'KOA' = 'KEAHOLE-KONA, HI ' 421 | 'LIH' = 'LIHUE, HI ' 422 | 'CID' = 'CEDAR RAPIDS/IOWA CITY, IA' 423 | 'DSM' = 'DES MOINES, IA' 424 | 'BOI' = 'AIR TERM. (GOWEN FLD) BOISE, ID' 425 | 'EPI' = 'EASTPORT, ID ' 426 | 'IDA' = 'FANNING FIELD - IDAHO FALLS, ID' 427 | 'PTL' = 'PORTHILL, ID ' 428 | 'SPI' = 'CAPITAL - SPRINGFIELD, IL' 429 | 'CHI' = 'CHICAGO, IL ' 430 | 'DPA' = 'DUPAGE COUNTY, IL' 431 | 'PIA' = 'GREATER PEORIA, IL' 432 | 'RFD' = 'GREATER ROCKFORD, IL' 433 | 'UGN' = 'MEMORIAL - WAUKEGAN, IL' 434 | 'GAR' = 'GARY, IN ' 435 | 'HMM' = 'HAMMOND, IN ' 436 | 'INP' = 'INDIANAPOLIS, IN ' 437 | 'MRL' = 'MERRILLVILLE, IN ' 438 | 'SBN' = 'SOUTH BEND, IN' 439 | 'ICT' = 'MID-CONTINENT - WITCHITA, KS' 440 | 'LEX' = 'BLUE GRASS - LEXINGTON, KY' 441 | 'LOU' = 'LOUISVILLE, KY ' 442 | 'BTN' = 'BATON ROUGE, LA ' 443 | 'LKC' = 'LAKE CHARLES, LA ' 444 | 'LAK' = 'LAKE CHARLES, LA (BPS)' 445 | 'MLU' = 'MONROE, LA' 446 | 'MGC' = 'MORGAN CITY, LA ' 447 | 'NOL' = 'NEW ORLEANS, LA ' 448 | 'BOS' = 'BOSTON, MA ' 449 | 'GLO' = 'GLOUCESTER, MA ' 450 | 'BED' = 'HANSCOM FIELD - BEDFORD, MA' 451 | 'LYN' = 'LYNDEN, WA ' 452 | 'ADW' = 'ANDREWS AFB, MD' 453 | 'BAL' = 'BALTIMORE, MD ' 454 | 'MKG' = 'MUSKEGON, MD' 455 | 'PAX' = 'PATUXENT RIVER, MD ' 456 | 'BGM' = 'BANGOR, ME ' 457 | 'BOO' = 'BOOTHBAY HARBOR, ME ' 458 | 'BWM' = 'BRIDGEWATER, ME ' 459 | 'BCK' = 'BUCKPORT, ME ' 460 | 'CLS' = 'CALAIS, ME ' 461 | 'CRB' = 'CARIBOU, ME ' 462 | 'COB' = 'COBURN GORE, ME ' 463 | 'EST' = 'EASTCOURT, ME ' 464 | 'EPT' = 'EASTPORT MUNICIPAL, ME' 465 | 'EPM' = 'EASTPORT, ME ' 466 | 'FOR' = 'FOREST CITY, ME ' 467 | 'FTF' = 'FORT FAIRFIELD, ME ' 468 | 'FTK' = 'FORT KENT, ME ' 469 | 'HML' = 'HAMIIN, ME ' 470 | 'HTM' = 'HOULTON, ME ' 471 | 'JKM' = 'JACKMAN, ME ' 472 | 'KAL' = 'KALISPEL, MT ' 473 | 'LIM' = 'LIMESTONE, ME ' 474 | 'LUB' = 'LUBEC, ME ' 475 | 'MAD' = 'MADAWASKA, ME ' 476 | 'POM' = 'PORTLAND, ME ' 477 | 'RGM' = 'RANGELEY, ME (BPS)' 478 | 'SBR' = 'SOUTH BREWER, ME ' 479 | 'SRL' = 'ST AURELIE, ME ' 480 | 'SPA' = 'ST PAMPILE, ME ' 481 | 'VNB' = 'VAN BUREN, ME ' 482 | 'VCB' = 'VANCEBORO, ME ' 483 | 'AGN' = 'ALGONAC, MI ' 484 | 'ALP' = 'ALPENA, MI ' 485 | 'BCY' = 'BAY CITY, MI ' 486 | 'DET' = 'DETROIT, MI ' 487 | 'GRP' = 'GRAND RAPIDS, MI' 488 | 'GRO' = 'GROSSE ISLE, MI ' 489 | 'ISL' = 'ISLE ROYALE, MI ' 490 | 'MRC' = 'MARINE CITY, MI ' 491 | 'MRY' = 'MARYSVILLE, MI ' 492 | 'PTK' = 'OAKLAND COUNTY - PONTIAC, MI' 493 | 'PHU' = 'PORT HURON, MI ' 494 | 'RBT' = 'ROBERTS LANDING, MI ' 495 | 'SAG' = 'SAGINAW, MI ' 496 | 'SSM' = 'SAULT STE. MARIE, MI ' 497 | 'SCL' = 'ST CLAIR, MI ' 498 | 'YIP' = 'WILLOW RUN - YPSILANTI, MI' 499 | 'BAU' = 'BAUDETTE, MN ' 500 | 'CAR' = 'CARIBOU MUNICIPAL AIRPORT, MN' 501 | 'GTF' = 'Collapsed into INT, MN' 502 | 'INL' = 'Collapsed into INT, MN' 503 | 'CRA' = 'CRANE LAKE, MN ' 504 | 'MIC' = 'CRYSTAL MUNICIPAL AIRPORT, MN' 505 | 'DUL' = 'DULUTH, MN ' 506 | 'ELY' = 'ELY, MN ' 507 | 'GPM' = 'GRAND PORTAGE, MN ' 508 | 'SVC' = 'GRANT COUNTY - SILVER CITY, MN' 509 | 'INT' = 'INT''L FALLS, MN ' 510 | 'LAN' = 'LANCASTER, MN ' 511 | 'MSP' = 'MINN./ST PAUL, MN ' 512 | 'LIN' = 'NORTHERN SVC CENTER, MN ' 513 | 'NOY' = 'NOYES, MN ' 514 | 'PIN' = 'PINE CREEK, MN ' 515 | '48Y' = 'PINECREEK BORDER ARPT, MN' 516 | 'RAN' = 'RAINER, MN ' 517 | 'RST' = 'ROCHESTER, MN' 518 | 'ROS' = 'ROSEAU, MN ' 519 | 'SPM' = 'ST PAUL, MN ' 520 | 'WSB' = 'WARROAD INTL, SPB, MN' 521 | 'WAR' = 'WARROAD, MN ' 522 | 'KAN' = 'KANSAS CITY, MO ' 523 | 'SGF' = 'SPRINGFIELD-BRANSON, MO' 524 | 'STL' = 'ST LOUIS, MO ' 525 | 'WHI' = 'WHITETAIL, MT ' 526 | 'WHM' = 'WILD HORSE, MT ' 527 | 'GPT' = 'BILOXI REGIONAL, MS' 528 | 'GTR' = 'GOLDEN TRIANGLE LOWNDES CNTY, MS' 529 | 'GUL' = 'GULFPORT, MS ' 530 | 'PAS' = 'PASCAGOULA, MS ' 531 | 'JAN' = 'THOMPSON FIELD - JACKSON, MS' 532 | 'BIL' = 'BILLINGS, MT ' 533 | 'BTM' = 'BUTTE, MT ' 534 | 'CHF' = 'CHIEF MT, MT ' 535 | 'CTB' = 'CUT BANK MUNICIPAL, MT' 536 | 'CUT' = 'CUT BANK, MT ' 537 | 'DLB' = 'DEL BONITA, MT ' 538 | 'EUR' = 'EUREKA, MT (BPS)' 539 | 'BZN' = 'GALLATIN FIELD - BOZEMAN, MT' 540 | 'FCA' = 'GLACIER NATIONAL PARK, MT' 541 | 'GGW' = 'GLASGOW, MT ' 542 | 'GRE' = 'GREAT FALLS, MT ' 543 | 'HVR' = 'HAVRE, MT ' 544 | 'HEL' = 'HELENA, MT ' 545 | 'LWT' = 'LEWISTON, MT ' 546 | 'MGM' = 'MORGAN, MT ' 547 | 'OPH' = 'OPHEIM, MT ' 548 | 'PIE' = 'PIEGAN, MT ' 549 | 'RAY' = 'RAYMOND, MT ' 550 | 'ROO' = 'ROOSVILLE, MT ' 551 | 'SCO' = 'SCOBEY, MT ' 552 | 'SWE' = 'SWEETGTASS, MT ' 553 | 'TRL' = 'TRIAL CREEK, MT ' 554 | 'TUR' = 'TURNER, MT ' 555 | 'WCM' = 'WILLOW CREEK, MT ' 556 | 'CLT' = 'CHARLOTTE, NC ' 557 | 'FAY' = 'FAYETTEVILLE, NC' 558 | 'MRH' = 'MOREHEAD CITY, NC ' 559 | 'FOP' = 'MORRIS FIELDS AAF, NC' 560 | 'GSO' = 'PIEDMONT TRIAD INTL AIRPORT, NC' 561 | 'RDU' = 'RALEIGH/DURHAM, NC ' 562 | 'SSC' = 'SHAW AFB - SUMTER, NC' 563 | 'WIL' = 'WILMINGTON, NC ' 564 | 'AMB' = 'AMBROSE, ND ' 565 | 'ANT' = 'ANTLER, ND ' 566 | 'CRY' = 'CARBURY, ND ' 567 | 'DNS' = 'DUNSEITH, ND ' 568 | 'FAR' = 'FARGO, ND ' 569 | 'FRT' = 'FORTUNA, ND ' 570 | 'GRF' = 'GRAND FORKS, ND ' 571 | 'HNN' = 'HANNAH, ND ' 572 | 'HNS' = 'HANSBORO, ND ' 573 | 'MAI' = 'MAIDA, ND ' 574 | 'MND' = 'MINOT, ND ' 575 | 'NEC' = 'NECHE, ND ' 576 | 'NOO' = 'NOONAN, ND ' 577 | 'NRG' = 'NORTHGATE, ND ' 578 | 'PEM' = 'PEMBINA, ND ' 579 | 'SAR' = 'SARLES, ND ' 580 | 'SHR' = 'SHERWOOD, ND ' 581 | 'SJO' = 'ST JOHN, ND ' 582 | 'WAL' = 'WALHALLA, ND ' 583 | 'WHO' = 'WESTHOPE, ND ' 584 | 'WND' = 'WILLISTON, ND ' 585 | 'OMA' = 'OMAHA, NE ' 586 | 'LEB' = 'LEBANON, NH ' 587 | 'MHT' = 'MANCHESTER, NH' 588 | 'PNH' = 'PITTSBURG, NH ' 589 | 'PSM' = 'PORTSMOUTH, NH ' 590 | 'BYO' = 'BAYONNE, NJ ' 591 | 'CNJ' = 'CAMDEN, NJ ' 592 | 'HOB' = 'HOBOKEN, NJ ' 593 | 'JER' = 'JERSEY CITY, NJ ' 594 | 'WRI' = 'MC GUIRE AFB - WRIGHTSOWN, NJ' 595 | 'MMU' = 'MORRISTOWN, NJ' 596 | 'NEW' = 'NEWARK/TETERBORO, NJ ' 597 | 'PER' = 'PERTH AMBOY, NJ ' 598 | 'ACY' = 'POMONA FIELD - ATLANTIC CITY, NJ' 599 | 'ALA' = 'ALAMAGORDO, NM (BPS)' 600 | 'ABQ' = 'ALBUQUERQUE, NM ' 601 | 'ANP' = 'ANTELOPE WELLS, NM ' 602 | 'CRL' = 'CARLSBAD, NM ' 603 | 'COL' = 'COLUMBUS, NM ' 604 | 'CDD' = 'CRANE LAKE - ST. LOUIS CNTY, NM' 605 | 'DNM' = 'DEMING, NM (BPS)' 606 | 'LAS' = 'LAS CRUCES, NM ' 607 | 'LOB' = 'LORDSBURG, NM (BPS)' 608 | 'RUI' = 'RUIDOSO, NM' 609 | 'STR' = 'SANTA TERESA, NM ' 610 | 'RNO' = 'CANNON INTL - RENO/TAHOE, NV' 611 | 'FLX' = 'FALLON MUNICIPAL AIRPORT, NV' 612 | 'LVG' = 'LAS VEGAS, NV ' 613 | 'REN' = 'RENO, NV ' 614 | 'ALB' = 'ALBANY, NY ' 615 | 'AXB' = 'ALEXANDRIA BAY, NY ' 616 | 'BUF' = 'BUFFALO, NY ' 617 | 'CNH' = 'CANNON CORNERS, NY' 618 | 'CAP' = 'CAPE VINCENT, NY ' 619 | 'CHM' = 'CHAMPLAIN, NY ' 620 | 'CHT' = 'CHATEAUGAY, NY ' 621 | 'CLA' = 'CLAYTON, NY ' 622 | 'FTC' = 'FORT COVINGTON, NY ' 623 | 'LAG' = 'LA GUARDIA, NY ' 624 | 'LEW' = 'LEWISTON, NY ' 625 | 'MAS' = 'MASSENA, NY ' 626 | 'MAG' = 'MCGUIRE AFB, NY ' 627 | 'MOO' = 'MOORES, NY ' 628 | 'MRR' = 'MORRISTOWN, NY ' 629 | 'NYC' = 'NEW YORK, NY ' 630 | 'NIA' = 'NIAGARA FALLS, NY ' 631 | 'OGD' = 'OGDENSBURG, NY ' 632 | 'OSW' = 'OSWEGO, NY ' 633 | 'ELM' = 'REGIONAL ARPT - HORSEHEAD, NY' 634 | 'ROC' = 'ROCHESTER, NY ' 635 | 'ROU' = 'ROUSES POINT, NY ' 636 | 'SWF' = 'STEWART - ORANGE CNTY, NY' 637 | 'SYR' = 'SYRACUSE, NY ' 638 | 'THO' = 'THOUSAND ISLAND BRIDGE, NY' 639 | 'TRO' = 'TROUT RIVER, NY ' 640 | 'WAT' = 'WATERTOWN, NY ' 641 | 'HPN' = 'WESTCHESTER - WHITE PLAINS, NY' 642 | 'WRB' = 'WHIRLPOOL BRIDGE, NY' 643 | 'YOU' = 'YOUNGSTOWN, NY ' 644 | 'AKR' = 'AKRON, OH ' 645 | 'ATB' = 'ASHTABULA, OH ' 646 | 'CIN' = 'CINCINNATI, OH ' 647 | 'CLE' = 'CLEVELAND, OH ' 648 | 'CLM' = 'COLUMBUS, OH ' 649 | 'LOR' = 'LORAIN, OH ' 650 | 'MBO' = 'MARBLE HEADS, OH ' 651 | 'SDY' = 'SANDUSKY, OH ' 652 | 'TOL' = 'TOLEDO, OH ' 653 | 'OKC' = 'OKLAHOMA CITY, OK ' 654 | 'TUL' = 'TULSA, OK' 655 | 'AST' = 'ASTORIA, OR ' 656 | 'COO' = 'COOS BAY, OR ' 657 | 'HIO' = 'HILLSBORO, OR' 658 | 'MED' = 'MEDFORD, OR ' 659 | 'NPT' = 'NEWPORT, OR ' 660 | 'POO' = 'PORTLAND, OR ' 661 | 'PUT' = 'PUT-IN-BAY, OH ' 662 | 'RDM' = 'ROBERTS FIELDS - REDMOND, OR' 663 | 'ERI' = 'ERIE, PA ' 664 | 'MDT' = 'HARRISBURG, PA' 665 | 'HSB' = 'HARRISONBURG, PA ' 666 | 'PHI' = 'PHILADELPHIA, PA ' 667 | 'PIT' = 'PITTSBURG, PA ' 668 | 'AGU' = 'AGUADILLA, PR ' 669 | 'BQN' = 'BORINQUEN - AGUADILLO, PR' 670 | 'JCP' = 'CULEBRA - BENJAMIN RIVERA, PR' 671 | 'ENS' = 'ENSENADA, PR ' 672 | 'FAJ' = 'FAJARDO, PR ' 673 | 'HUM' = 'HUMACAO, PR ' 674 | 'JOB' = 'JOBOS, PR ' 675 | 'MAY' = 'MAYAGUEZ, PR ' 676 | 'PON' = 'PONCE, PR ' 677 | 'PSE' = 'PONCE-MERCEDITA, PR' 678 | 'SAJ' = 'SAN JUAN, PR ' 679 | 'VQS' = 'VIEQUES-ARPT, PR' 680 | 'PRO' = 'PROVIDENCE, RI ' 681 | 'PVD' = 'THEODORE FRANCIS - WARWICK, RI' 682 | 'CHL' = 'CHARLESTON, SC ' 683 | 'CAE' = 'COLUMBIA, SC #ARPT' 684 | 'GEO' = 'GEORGETOWN, SC ' 685 | 'GSP' = 'GREENVILLE, SC' 686 | 'GRR' = 'GREER, SC' 687 | 'MYR' = 'MYRTLE BEACH, SC' 688 | 'SPF' = 'BLACK HILLS, SPEARFISH, SD' 689 | 'HON' = 'HOWES REGIONAL ARPT - HURON, SD' 690 | 'SAI' = 'SAIPAN, SPN ' 691 | 'TYS' = 'MC GHEE TYSON - ALCOA, TN' 692 | 'MEM' = 'MEMPHIS, TN ' 693 | 'NSV' = 'NASHVILLE, TN ' 694 | 'TRI' = 'TRI CITY ARPT, TN' 695 | 'ADS' = 'ADDISON AIRPORT- ADDISON, TX' 696 | 'ADT' = 'AMISTAD DAM, TX ' 697 | 'ANZ' = 'ANZALDUAS, TX' 698 | 'AUS' = 'AUSTIN, TX ' 699 | 'BEA' = 'BEAUMONT, TX ' 700 | 'BBP' = 'BIG BEND PARK, TX (BPS)' 701 | 'SCC' = 'BP SPEC COORD. CTR, TX' 702 | 'BTC' = 'BP TACTICAL UNIT, TX ' 703 | 'BOA' = 'BRIDGE OF AMERICAS, TX' 704 | 'BRO' = 'BROWNSVILLE, TX ' 705 | 'CRP' = 'CORPUS CHRISTI, TX ' 706 | 'DAL' = 'DALLAS, TX ' 707 | 'DLR' = 'DEL RIO, TX ' 708 | 'DNA' = 'DONNA, TX' 709 | 'EGP' = 'EAGLE PASS, TX ' 710 | 'ELP' = 'EL PASO, TX ' 711 | 'FAB' = 'FABENS, TX ' 712 | 'FAL' = 'FALCON HEIGHTS, TX ' 713 | 'FTH' = 'FORT HANCOCK, TX ' 714 | 'AFW' = 'FORT WORTH ALLIANCE, TX' 715 | 'FPT' = 'FREEPORT, TX ' 716 | 'GAL' = 'GALVESTON, TX ' 717 | 'HLG' = 'HARLINGEN, TX ' 718 | 'HID' = 'HIDALGO, TX ' 719 | 'HOU' = 'HOUSTON, TX ' 720 | 'SGR' = 'HULL FIELD, SUGAR LAND ARPT, TX' 721 | 'LLB' = 'JUAREZ-LINCOLN BRIDGE, TX' 722 | 'LCB' = 'LAREDO COLUMBIA BRIDGE, TX' 723 | 'LRN' = 'LAREDO NORTH, TX ' 724 | 'LAR' = 'LAREDO, TX ' 725 | 'LSE' = 'LOS EBANOS, TX ' 726 | 'IND' = 'LOS INDIOS, TX' 727 | 'LOI' = 'LOS INDIOS, TX ' 728 | 'MRS' = 'MARFA, TX (BPS)' 729 | 'MCA' = 'MCALLEN, TX ' 730 | 'MAF' = 'ODESSA REGIONAL, TX' 731 | 'PDN' = 'PASO DEL NORTE,TX ' 732 | 'PBB' = 'PEACE BRIDGE, NY ' 733 | 'PHR' = 'PHARR, TX ' 734 | 'PAR' = 'PORT ARTHUR, TX ' 735 | 'ISB' = 'PORT ISABEL, TX ' 736 | 'POE' = 'PORT OF EL PASO, TX ' 737 | 'PRE' = 'PRESIDIO, TX ' 738 | 'PGR' = 'PROGRESO, TX ' 739 | 'RIO' = 'RIO GRANDE CITY, TX ' 740 | 'ROM' = 'ROMA, TX ' 741 | 'SNA' = 'SAN ANTONIO, TX ' 742 | 'SNN' = 'SANDERSON, TX ' 743 | 'VIB' = 'VETERAN INTL BRIDGE, TX' 744 | 'YSL' = 'YSLETA, TX ' 745 | 'CHA' = 'CHARLOTTE AMALIE, VI ' 746 | 'CHR' = 'CHRISTIANSTED, VI ' 747 | 'CRU' = 'CRUZ BAY, ST JOHN, VI ' 748 | 'FRK' = 'FREDERIKSTED, VI ' 749 | 'STT' = 'ST THOMAS, VI ' 750 | 'LGU' = 'CACHE AIRPORT - LOGAN, UT' 751 | 'SLC' = 'SALT LAKE CITY, UT ' 752 | 'CHO' = 'ALBEMARLE CHARLOTTESVILLE, VA' 753 | 'DAA' = 'DAVISON AAF - FAIRFAX CNTY, VA' 754 | 'HOP' = 'HOPEWELL, VA ' 755 | 'HEF' = 'MANASSAS, VA #ARPT' 756 | 'NWN' = 'NEWPORT, VA ' 757 | 'NOR' = 'NORFOLK, VA ' 758 | 'RCM' = 'RICHMOND, VA ' 759 | 'ABS' = 'ALBURG SPRINGS, VT ' 760 | 'ABG' = 'ALBURG, VT ' 761 | 'BEB' = 'BEEBE PLAIN, VT ' 762 | 'BEE' = 'BEECHER FALLS, VT ' 763 | 'BRG' = 'BURLINGTON, VT ' 764 | 'CNA' = 'CANAAN, VT ' 765 | 'DER' = 'DERBY LINE, VT (I-91) ' 766 | 'DLV' = 'DERBY LINE, VT (RT. 5)' 767 | 'ERC' = 'EAST RICHFORD, VT ' 768 | 'HIG' = 'HIGHGATE SPRINGS, VT ' 769 | 'MOR' = 'MORSES LINE, VT ' 770 | 'NPV' = 'NEWPORT, VT ' 771 | 'NRT' = 'NORTH TROY, VT ' 772 | 'NRN' = 'NORTON, VT ' 773 | 'PIV' = 'PINNACLE ROAD, VT ' 774 | 'RIF' = 'RICHFORT, VT ' 775 | 'STA' = 'ST ALBANS, VT ' 776 | 'SWB' = 'SWANTON, VT (BP - SECTOR HQ)' 777 | 'WBE' = 'WEST BERKSHIRE, VT ' 778 | 'ABE' = 'ABERDEEN, WA ' 779 | 'ANA' = 'ANACORTES, WA ' 780 | 'BEL' = 'BELLINGHAM, WA ' 781 | 'BLI' = 'BELLINGHAM, WASHINGTON #INTL' 782 | 'BLA' = 'BLAINE, WA ' 783 | 'BWA' = 'BOUNDARY, WA ' 784 | 'CUR' = 'CURLEW, WA (BPS)' 785 | 'DVL' = 'DANVILLE, WA ' 786 | 'EVE' = 'EVERETT, WA ' 787 | 'FER' = 'FERRY, WA ' 788 | 'FRI' = 'FRIDAY HARBOR, WA ' 789 | 'FWA' = 'FRONTIER, WA ' 790 | 'KLM' = 'KALAMA, WA ' 791 | 'LAU' = 'LAURIER, WA ' 792 | 'LON' = 'LONGVIEW, WA ' 793 | 'MET' = 'METALINE FALLS, WA ' 794 | 'MWH' = 'MOSES LAKE GRANT COUNTY ARPT, WA' 795 | 'NEA' = 'NEAH BAY, WA ' 796 | 'NIG' = 'NIGHTHAWK, WA ' 797 | 'OLY' = 'OLYMPIA, WA ' 798 | 'ORO' = 'OROVILLE, WA ' 799 | 'PWB' = 'PASCO, WA ' 800 | 'PIR' = 'POINT ROBERTS, WA ' 801 | 'PNG' = 'PORT ANGELES, WA ' 802 | 'PTO' = 'PORT TOWNSEND, WA ' 803 | 'SEA' = 'SEATTLE, WA ' 804 | 'SPO' = 'SPOKANE, WA ' 805 | 'SUM' = 'SUMAS, WA ' 806 | 'TAC' = 'TACOMA, WA ' 807 | 'PSC' = 'TRI-CITIES - PASCO, WA' 808 | 'VAN' = 'VANCOUVER, WA ' 809 | 'AGM' = 'ALGOMA, WI ' 810 | 'BAY' = 'BAYFIELD, WI ' 811 | 'GRB' = 'GREEN BAY, WI ' 812 | 'MNW' = 'MANITOWOC, WI ' 813 | 'MIL' = 'MILWAUKEE, WI ' 814 | 'MSN' = 'TRUAX FIELD - DANE COUNTY, WI' 815 | 'CHS' = 'CHARLESTON, WV ' 816 | 'CLK' = 'CLARKSBURG, WV ' 817 | 'BLF' = 'MERCER COUNTY, WV' 818 | 'CSP' = 'CASPER, WY ' 819 | 'XXX' = 'NOT REPORTED/UNKNOWN ' 820 | '888' = 'UNIDENTIFED AIR / SEAPORT' 821 | 'UNK' = 'UNKNOWN POE ' 822 | 'CLG' = 'CALGARY, CANADA ' 823 | 'EDA' = 'EDMONTON, CANADA ' 824 | 'YHC' = 'HAKAI PASS, CANADA' 825 | 'HAL' = 'Halifax, NS, Canada ' 826 | 'MON' = 'MONTREAL, CANADA ' 827 | 'OTT' = 'OTTAWA, CANADA ' 828 | 'YXE' = 'SASKATOON, CANADA' 829 | 'TOR' = 'TORONTO, CANADA ' 830 | 'VCV' = 'VANCOUVER, CANADA ' 831 | 'VIC' = 'VICTORIA, CANADA ' 832 | 'WIN' = 'WINNIPEG, CANADA ' 833 | 'AMS' = 'AMSTERDAM-SCHIPHOL, NETHERLANDS' 834 | 'ARB' = 'ARUBA, NETH ANTILLES ' 835 | 'BAN' = 'BANKOK, THAILAND ' 836 | 'BEI' = 'BEICA #ARPT, ETHIOPIA' 837 | 'PEK' = 'BEIJING CAPITAL INTL, PRC' 838 | 'BDA' = 'KINDLEY FIELD, BERMUDA' 839 | 'BOG' = 'BOGOTA, EL DORADO #ARPT, COLOMBIA' 840 | 'EZE' = 'BUENOS AIRES, MINISTRO PIST, ARGENTINA' 841 | 'CUN' = 'CANCUN, MEXICO' 842 | 'CRQ' = 'CARAVELAS, BA #ARPT, BRAZIL' 843 | 'MVD' = 'CARRASCO, URUGUAY' 844 | 'DUB' = 'DUBLIN, IRELAND ' 845 | 'FOU' = 'FOUGAMOU #ARPT, GABON' 846 | 'FBA' = 'FREEPORT, BAHAMAS ' 847 | 'MTY' = 'GEN M. ESCOBEDO, Monterrey, MX' 848 | 'HMO' = 'GEN PESQUEIRA GARCIA, MX' 849 | 'GCM' = 'GRAND CAYMAN, CAYMAN ISLAND' 850 | 'GDL' = 'GUADALAJARA, MIGUEL HIDAL, MX' 851 | 'HAM' = 'HAMILTON, BERMUDA ' 852 | 'ICN' = 'INCHON, SEOUL KOREA' 853 | 'IWA' = 'INVALID - IWAKUNI, JAPAN' 854 | 'CND' = 'KOGALNICEANU, ROMANIA' 855 | 'LAH' = 'LABUHA ARPT, INDONESIA' 856 | 'DUR' = 'LOUIS BOTHA, SOUTH AFRICA' 857 | 'MAL' = 'MANGOLE ARPT, INDONESIA' 858 | 'MDE' = 'MEDELLIN, COLOMBIA' 859 | 'MEX' = 'JUAREZ INTL, MEXICO CITY, MX' 860 | 'LHR' = 'MIDDLESEX, ENGLAND' 861 | 'NBO' = 'NAIROBI, KENYA ' 862 | 'NAS' = 'NASSAU, BAHAMAS ' 863 | 'NCA' = 'NORTH CAICOS, TURK & CAIMAN' 864 | 'PTY' = 'OMAR TORRIJOS, PANAMA' 865 | 'SPV' = 'PAPUA, NEW GUINEA' 866 | 'UIO' = 'QUITO (MARISCAL SUCR), ECUADOR' 867 | 'RIT' = 'ROME, ITALY ' 868 | 'SNO' = 'SAKON NAKHON #ARPT, THAILAND' 869 | 'SLP' = 'SAN LUIS POTOSI #ARPT, MEXICO' 870 | 'SAN' = 'SAN SALVADOR, EL SALVADOR' 871 | 'SRO' = 'SANTANA RAMOS #ARPT, COLOMBIA' 872 | 'GRU' = 'GUARULHOS INTL, SAO PAULO, BRAZIL' 873 | 'SHA' = 'SHANNON, IRELAND ' 874 | 'HIL' = 'SHILLAVO, ETHIOPIA' 875 | 'TOK' = 'TOROKINA #ARPT, PAPUA, NEW GUINEA' 876 | 'VER' = 'VERACRUZ, MEXICO' 877 | 'LGW' = 'WEST SUSSEX, ENGLAND ' 878 | 'ZZZ' = 'MEXICO Land (Banco de Mexico) ' 879 | 'CHN' = 'No PORT Code (CHN)' 880 | 'CNC' = 'CANNON CORNERS, NY' 881 | 'MAA' = 'Abu Dhabi' 882 | 'AG0' = 'MAGNOLIA, AR' 883 | 'BHM' = 'BAR HARBOR, ME' 884 | 'BHX' = 'BIRMINGHAM, AL' 885 | 'CAK' = 'AKRON, OH' 886 | 'FOK' = 'SUFFOLK COUNTY, NY' 887 | 'LND' = 'LANDER, WY' 888 | 'MAR' = 'MARFA, TX' 889 | 'MLI' = 'MOLINE, IL' 890 | 'RIV' = 'RIVERSIDE, CA' 891 | 'RME' = 'ROME, NY' 892 | 'VNY' = 'VAN NUYS, CA' 893 | 'YUM' = 'YUMA, AZ' 894 | 'FRG' = 'Collapsed (FOK) 06/15' 895 | 'HRL' = 'Collapsed (HLG) 06/15' 896 | 'ISP' = 'Collapsed (FOK) 06/15' 897 | 'JSJ' = 'Collapsed (SAJ) 06/15' 898 | 'BUS' = 'Collapsed (BUF) 06/15' 899 | 'IAG' = 'Collapsed (NIA) 06/15' 900 | 'PHN' = 'Collapsed (PHU) 06/15' 901 | 'STN' = 'Collapsed (STR) 06/15' 902 | 'VMB' = 'Collapsed (VNB) 06/15' 903 | 'T01' = 'Collapsed (SEA) 06/15' 904 | 'PHF' = 'No PORT Code (PHF)' 905 | 'DRV' = 'No PORT Code (DRV)' 906 | 'FTB' = 'No PORT Code (FTB)' 907 | 'GAC' = 'No PORT Code (GAC)' 908 | 'GMT' = 'No PORT Code (GMT)' 909 | 'JFA' = 'No PORT Code (JFA)' 910 | 'JMZ' = 'No PORT Code (JMZ)' 911 | 'NC8' = 'No PORT Code (NC8)' 912 | 'NYL' = 'No PORT Code (NYL)' 913 | 'OAI' = 'No PORT Code (OAI)' 914 | 'PCW' = 'No PORT Code (PCW)' 915 | 'WA5' = 'No PORT Code (WAS)' 916 | 'WTR' = 'No PORT Code (WTR)' 917 | 'X96' = 'No PORT Code (X96)' 918 | 'XNA' = 'No PORT Code (XNA)' 919 | 'YGF' = 'No PORT Code (YGF)' 920 | '5T6' = 'No PORT Code (5T6)' 921 | '060' = 'No PORT Code (60)' 922 | 'SP0' = 'No PORT Code (SP0)' 923 | 'W55' = 'No PORT Code (W55)' 924 | 'X44' = 'No PORT Code (X44)' 925 | 'AUH' = 'No PORT Code (AUH)' 926 | 'RYY' = 'No PORT Code (RYY)' 927 | 'SUS' = 'No PORT Code (SUS)' 928 | '74S' = 'No PORT Code (74S)' 929 | 'ATW' = 'No PORT Code (ATW)' 930 | 'CPX' = 'No PORT Code (CPX)' 931 | 'MTH' = 'No PORT Code (MTH)' 932 | 'PFN' = 'No PORT Code (PFN)' 933 | 'SCH' = 'No PORT Code (SCH)' 934 | 'ASI' = 'No PORT Code (ASI)' 935 | 'BKF' = 'No PORT Code (BKF)' 936 | 'DAY' = 'No PORT Code (DAY)' 937 | 'Y62' = 'No PORT Code (Y62)' 938 | 'AG' = 'No PORT Code (AG)' 939 | 'BCM' = 'No PORT Code (BCM)' 940 | 'DEC' = 'No PORT Code (DEC)' 941 | 'PLB' = 'No PORT Code (PLB)' 942 | 'CXO' = 'No PORT Code (CXO)' 943 | 'JBQ' = 'No PORT Code (JBQ)' 944 | 'JIG' = 'No PORT Code (JIG)' 945 | 'OGS' = 'No PORT Code (OGS)' 946 | 'TIW' = 'No PORT Code (TIW)' 947 | 'OTS' = 'No PORT Code (OTS)' 948 | 'AMT' = 'No PORT Code (AMT)' 949 | 'EGE' = 'No PORT Code (EGE)' 950 | 'GPI' = 'No PORT Code (GPI)' 951 | 'NGL' = 'No PORT Code (NGL)' 952 | 'OLM' = 'No PORT Code (OLM)' 953 | '.GA' = 'No PORT Code (.GA)' 954 | 'CLX' = 'No PORT Code (CLX)' 955 | 'CP ' = 'No PORT Code (CP)' 956 | 'FSC' = 'No PORT Code (FSC)' 957 | 'NK' = 'No PORT Code (NK)' 958 | 'ADU' = 'No PORT Code (ADU)' 959 | 'AKT' = 'No PORT Code (AKT)' 960 | 'LIT' = 'No PORT Code (LIT)' 961 | 'A2A' = 'No PORT Code (A2A)' 962 | 'OSN' = 'No PORT Code (OSN)' 963 | ; 964 | 965 | 966 | /* ARRDATE is the Arrival Date in the USA. It is a SAS date numeric field that a 967 | permament format has not been applied. Please apply whichever date format 968 | works for you. */ 969 | 970 | 971 | /* I94MODE - There are missing values as well as not reported (9) */ 972 | value i94model 973 | 1 = 'Air' 974 | 2 = 'Sea' 975 | 3 = 'Land' 976 | 9 = 'Not reported' ; 977 | 978 | 979 | /* I94ADDR - There is lots of invalid codes in this variable and the list below 980 | shows what we have found to be valid, everything else goes into 'other' */ 981 | value i94addrl 982 | 'AL'='ALABAMA' 983 | 'AK'='ALASKA' 984 | 'AZ'='ARIZONA' 985 | 'AR'='ARKANSAS' 986 | 'CA'='CALIFORNIA' 987 | 'CO'='COLORADO' 988 | 'CT'='CONNECTICUT' 989 | 'DE'='DELAWARE' 990 | 'DC'='DIST. OF COLUMBIA' 991 | 'FL'='FLORIDA' 992 | 'GA'='GEORGIA' 993 | 'GU'='GUAM' 994 | 'HI'='HAWAII' 995 | 'ID'='IDAHO' 996 | 'IL'='ILLINOIS' 997 | 'IN'='INDIANA' 998 | 'IA'='IOWA' 999 | 'KS'='KANSAS' 1000 | 'KY'='KENTUCKY' 1001 | 'LA'='LOUISIANA' 1002 | 'ME'='MAINE' 1003 | 'MD'='MARYLAND' 1004 | 'MA'='MASSACHUSETTS' 1005 | 'MI'='MICHIGAN' 1006 | 'MN'='MINNESOTA' 1007 | 'MS'='MISSISSIPPI' 1008 | 'MO'='MISSOURI' 1009 | 'MT'='MONTANA' 1010 | 'NC'='N. CAROLINA' 1011 | 'ND'='N. DAKOTA' 1012 | 'NE'='NEBRASKA' 1013 | 'NV'='NEVADA' 1014 | 'NH'='NEW HAMPSHIRE' 1015 | 'NJ'='NEW JERSEY' 1016 | 'NM'='NEW MEXICO' 1017 | 'NY'='NEW YORK' 1018 | 'OH'='OHIO' 1019 | 'OK'='OKLAHOMA' 1020 | 'OR'='OREGON' 1021 | 'PA'='PENNSYLVANIA' 1022 | 'PR'='PUERTO RICO' 1023 | 'RI'='RHODE ISLAND' 1024 | 'SC'='S. CAROLINA' 1025 | 'SD'='S. DAKOTA' 1026 | 'TN'='TENNESSEE' 1027 | 'TX'='TEXAS' 1028 | 'UT'='UTAH' 1029 | 'VT'='VERMONT' 1030 | 'VI'='VIRGIN ISLANDS' 1031 | 'VA'='VIRGINIA' 1032 | 'WV'='W. VIRGINIA' 1033 | 'WA'='WASHINGTON' 1034 | 'WI'='WISCONSON' 1035 | 'WY'='WYOMING' 1036 | '99'='All Other Codes' ; 1037 | 1038 | /* DEPDATE is the Departure Date from the USA. It is a SAS date numeric field that 1039 | a permament format has not been applied. Please apply whichever date format 1040 | works for you. */ 1041 | 1042 | 1043 | /* I94BIR - Age of Respondent in Years */ 1044 | 1045 | 1046 | /* I94VISA - Visa codes collapsed into three categories: 1047 | 1 = Business 1048 | 2 = Pleasure 1049 | 3 = Student 1050 | */ 1051 | 1052 | 1053 | /* COUNT - Used for summary statistics */ 1054 | 1055 | 1056 | /* DTADFILE - Character Date Field - Date added to I-94 Files - CIC does not use */ 1057 | 1058 | 1059 | /* VISAPOST - Department of State where where Visa was issued - CIC does not use */ 1060 | 1061 | 1062 | /* OCCUP - Occupation that will be performed in U.S. - CIC does not use */ 1063 | 1064 | 1065 | /* ENTDEPA - Arrival Flag - admitted or paroled into the U.S. - CIC does not use */ 1066 | 1067 | 1068 | /* ENTDEPD - Departure Flag - Departed, lost I-94 or is deceased - CIC does not use */ 1069 | 1070 | 1071 | /* ENTDEPU - Update Flag - Either apprehended, overstayed, adjusted to perm residence - CIC does not use */ 1072 | 1073 | 1074 | /* MATFLAG - Match flag - Match of arrival and departure records */ 1075 | 1076 | 1077 | /* BIRYEAR - 4 digit year of birth */ 1078 | 1079 | 1080 | /* DTADDTO - Character Date Field - Date to which admitted to U.S. (allowed to stay until) - CIC does not use */ 1081 | 1082 | 1083 | /* GENDER - Non-immigrant sex */ 1084 | 1085 | 1086 | /* INSNUM - INS number */ 1087 | 1088 | 1089 | /* AIRLINE - Airline used to arrive in U.S. */ 1090 | 1091 | 1092 | /* ADMNUM - Admission Number */ 1093 | 1094 | 1095 | /* FLTNO - Flight number of Airline used to arrive in U.S. */ 1096 | 1097 | 1098 | /* VISATYPE - Class of admission legally admitting the non-immigrant to temporarily stay in U.S. */ 1099 | run ; 1100 | 1101 | -------------------------------------------------------------------------------- /airflow/dags/temp_output/i94addr.csv: -------------------------------------------------------------------------------- 1 | state_code,state_name 2 | AL,Alabama 3 | AK,Alaska 4 | AZ,Arizona 5 | AR,Arkansas 6 | CA,California 7 | CO,Colorado 8 | CT,Connecticut 9 | DE,Delaware 10 | DC,Dist. Of Columbia 11 | FL,Florida 12 | GA,Georgia 13 | GU,Guam 14 | HI,Hawaii 15 | ID,Idaho 16 | IL,Illinois 17 | IN,Indiana 18 | IA,Iowa 19 | KS,Kansas 20 | KY,Kentucky 21 | LA,Louisiana 22 | ME,Maine 23 | MD,Maryland 24 | MA,Massachusetts 25 | MI,Michigan 26 | MN,Minnesota 27 | MS,Mississippi 28 | MO,Missouri 29 | MT,Montana 30 | NC,N. Carolina 31 | ND,N. Dakota 32 | NE,Nebraska 33 | NV,Nevada 34 | NH,New Hampshire 35 | NJ,New Jersey 36 | NM,New Mexico 37 | NY,New York 38 | OH,Ohio 39 | OK,Oklahoma 40 | OR,Oregon 41 | PA,Pennsylvania 42 | PR,Puerto Rico 43 | RI,Rhode Island 44 | SC,S. Carolina 45 | SD,S. Dakota 46 | TN,Tennessee 47 | TX,Texas 48 | UT,Utah 49 | VT,Vermont 50 | VI,Virgin Islands 51 | VA,Virginia 52 | WV,W. Virginia 53 | WA,Washington 54 | WI,Wisconson 55 | WY,Wyoming 56 | 99,All Other Codes 57 | -------------------------------------------------------------------------------- /airflow/dags/temp_output/i94cit&i94res.csv: -------------------------------------------------------------------------------- 1 | country_code,country_name 2 | 582,"Mexico Air Sea, And Not Reported (I-94, No Land Arrivals)" 3 | 236,Afghanistan 4 | 101,Albania 5 | 316,Algeria 6 | 102,Andorra 7 | 324,Angola 8 | 529,Anguilla 9 | 518,Antigua-Barbuda 10 | 687,Argentina 11 | 151,Armenia 12 | 532,Aruba 13 | 438,Australia 14 | 103,Austria 15 | 152,Azerbaijan 16 | 512,Bahamas 17 | 298,Bahrain 18 | 274,Bangladesh 19 | 513,Barbados 20 | 104,Belgium 21 | 581,Belize 22 | 386,Benin 23 | 509,Bermuda 24 | 153,Belarus 25 | 242,Bhutan 26 | 688,Bolivia 27 | 717,"Bonaire, St Eustatius, Saba" 28 | 164,Bosnia-Herzegovina 29 | 336,Botswana 30 | 689,Brazil 31 | 525,British Virgin Islands 32 | 217,Brunei 33 | 105,Bulgaria 34 | 393,Burkina Faso 35 | 243,Burma 36 | 375,Burundi 37 | 310,Cameroon 38 | 326,Cape Verde 39 | 526,Cayman Islands 40 | 383,Central African Republic 41 | 384,Chad 42 | 690,Chile 43 | 245,"China, Prc" 44 | 721,Curacao 45 | 270,Christmas Island 46 | 271,Cocos Islands 47 | 691,Colombia 48 | 317,Comoros 49 | 385,Congo 50 | 467,Cook Islands 51 | 575,Costa Rica 52 | 165,Croatia 53 | 584,Cuba 54 | 218,Cyprus 55 | 140,Czech Republic 56 | 723,Faroe Islands (Part Of Denmark) 57 | 108,Denmark 58 | 322,Djibouti 59 | 519,Dominica 60 | 585,Dominican Republic 61 | 240,East Timor 62 | 692,Ecuador 63 | 368,Egypt 64 | 576,El Salvador 65 | 399,Equatorial Guinea 66 | 372,Eritrea 67 | 109,Estonia 68 | 369,Ethiopia 69 | 604,Falkland Islands 70 | 413,Fiji 71 | 110,Finland 72 | 111,France 73 | 601,French Guiana 74 | 411,French Polynesia 75 | 387,Gabon 76 | 338,Gambia 77 | 758,Gaza Strip 78 | 154,Georgia 79 | 112,Germany 80 | 339,Ghana 81 | 143,Gibraltar 82 | 113,Greece 83 | 520,Grenada 84 | 507,Guadeloupe 85 | 577,Guatemala 86 | 382,Guinea 87 | 327,Guinea-Bissau 88 | 603,Guyana 89 | 586,Haiti 90 | 726,Heard And Mcdonald Is. 91 | 149,Holy See/Vatican 92 | 528,Honduras 93 | 206,Hong Kong 94 | 114,Hungary 95 | 115,Iceland 96 | 213,India 97 | 759,Indian Ocean Areas (French) 98 | 729,Indian Ocean Territory 99 | 204,Indonesia 100 | 249,Iran 101 | 250,Iraq 102 | 116,Ireland 103 | 251,Israel 104 | 117,Italy 105 | 388,Ivory Coast 106 | 514,Jamaica 107 | 209,Japan 108 | 253,Jordan 109 | 201,Kampuchea 110 | 155,Kazakhstan 111 | 340,Kenya 112 | 414,Kiribati 113 | 732,Kosovo 114 | 272,Kuwait 115 | 156,Kyrgyzstan 116 | 203,Laos 117 | 118,Latvia 118 | 255,Lebanon 119 | 335,Lesotho 120 | 370,Liberia 121 | 381,Libya 122 | 119,Liechtenstein 123 | 120,Lithuania 124 | 121,Luxembourg 125 | 214,Macau 126 | 167,Macedonia 127 | 320,Madagascar 128 | 345,Malawi 129 | 273,Malaysia 130 | 220,Maldives 131 | 392,Mali 132 | 145,Malta 133 | 472,Marshall Islands 134 | 511,Martinique 135 | 389,Mauritania 136 | 342,Mauritius 137 | 760,Mayotte (Africa - French) 138 | 473,"Micronesia, Fed. States Of" 139 | 157,Moldova 140 | 122,Monaco 141 | 299,Mongolia 142 | 735,Montenegro 143 | 521,Montserrat 144 | 332,Morocco 145 | 329,Mozambique 146 | 371,Namibia 147 | 440,Nauru 148 | 257,Nepal 149 | 123,Netherlands 150 | 508,Netherlands Antilles 151 | 409,New Caledonia 152 | 464,New Zealand 153 | 579,Nicaragua 154 | 390,Niger 155 | 343,Nigeria 156 | 470,Niue 157 | 275,North Korea 158 | 124,Norway 159 | 256,Oman 160 | 258,Pakistan 161 | 474,Palau 162 | 743,Palestine 163 | 504,Panama 164 | 441,Papua New Guinea 165 | 693,Paraguay 166 | 694,Peru 167 | 260,Philippines 168 | 416,Pitcairn Islands 169 | 107,Poland 170 | 126,Portugal 171 | 297,Qatar 172 | 748,Republic Of South Sudan 173 | 321,Reunion 174 | 127,Romania 175 | 158,Russia 176 | 376,Rwanda 177 | 128,San Marino 178 | 330,Sao Tome And Principe 179 | 261,Saudi Arabia 180 | 391,Senegal 181 | 142,Serbia And Montenegro 182 | 745,Serbia 183 | 347,Seychelles 184 | 348,Sierra Leone 185 | 207,Singapore 186 | 141,Slovakia 187 | 166,Slovenia 188 | 412,Solomon Islands 189 | 397,Somalia 190 | 373,South Africa 191 | 276,South Korea 192 | 129,Spain 193 | 244,Sri Lanka 194 | 346,St. Helena 195 | 522,St. Kitts-Nevis 196 | 523,St. Lucia 197 | 502,St. Pierre And Miquelon 198 | 524,St. Vincent-Grenadines 199 | 716,Saint Barthelemy 200 | 736,Saint Martin 201 | 749,Saint Maarten 202 | 350,Sudan 203 | 602,Suriname 204 | 351,Swaziland 205 | 130,Sweden 206 | 131,Switzerland 207 | 262,Syria 208 | 268,Taiwan 209 | 159,Tajikistan 210 | 353,Tanzania 211 | 263,Thailand 212 | 304,Togo 213 | 417,Tonga 214 | 516,Trinidad And Tobago 215 | 323,Tunisia 216 | 264,Turkey 217 | 161,Turkmenistan 218 | 527,Turks And Caicos Islands 219 | 420,Tuvalu 220 | 352,Uganda 221 | 162,Ukraine 222 | 296,United Arab Emirates 223 | 135,United Kingdom 224 | 695,Uruguay 225 | 163,Uzbekistan 226 | 410,Vanuatu 227 | 696,Venezuela 228 | 266,Vietnam 229 | 469,Wallis And Futuna Islands 230 | 757,West Indies (French) 231 | 333,Western Sahara 232 | 465,Western Samoa 233 | 216,Yemen 234 | 139,Yugoslavia 235 | 301,Zaire 236 | 344,Zambia 237 | 315,Zimbabwe 238 | 403,Invalid: American Samoa 239 | 712,Invalid: Antarctica 240 | 700,Invalid: Born On Board Ship 241 | 719,Invalid: Bouvet Island (Antarctica/Norway Terr.) 242 | 574,Invalid: Canada 243 | 720,Invalid: Canton And Enderbury Isls 244 | 106,Invalid: Czechoslovakia 245 | 739,Invalid: Dronning Maud Land (Antarctica-Norway) 246 | 394,Invalid: French Southern And Antarctic 247 | 501,Invalid: Greenland 248 | 404,Invalid: Guam 249 | 730,Invalid: International Waters 250 | 731,Invalid: Johnson Island 251 | 471,"Invalid: Mariana Islands, Northern" 252 | 737,Invalid: Midway Islands 253 | 753,Invalid: Minor Outlying Islands - Usa 254 | 740,Invalid: Neutral Zone (S. Arabia/Iraq) 255 | 710,Invalid: Non-Quota Immigrant 256 | 505,Invalid: Puerto Rico 257 | 0,Invalid: Stateless 258 | 705,Invalid: Stateless 259 | 583,Invalid: United States 260 | 407,Invalid: United States 261 | 999,Invalid: Unknown 262 | 239,Invalid: Unknown Country 263 | 134,Invalid: Ussr 264 | 506,Invalid: U.S. Virgin Islands 265 | 755,Invalid: Wake Island 266 | 311,Collapsed Tanzania (Should Not Show) 267 | 741,Collapsed Curacao (Should Not Show) 268 | 54,No Country Code (54) 269 | 100,No Country Code (100) 270 | 187,No Country Code (187) 271 | 190,No Country Code (190) 272 | 200,No Country Code (200) 273 | 219,No Country Code (219) 274 | 238,No Country Code (238) 275 | 277,No Country Code (277) 276 | 293,No Country Code (293) 277 | 300,No Country Code (300) 278 | 319,No Country Code (319) 279 | 365,No Country Code (365) 280 | 395,No Country Code (395) 281 | 400,No Country Code (400) 282 | 485,No Country Code (485) 283 | 503,No Country Code (503) 284 | 589,No Country Code (589) 285 | 592,No Country Code (592) 286 | 791,No Country Code (791) 287 | 849,No Country Code (849) 288 | 914,No Country Code (914) 289 | 944,No Country Code (944) 290 | 996,No Country Code (996) 291 | -------------------------------------------------------------------------------- /airflow/dags/temp_output/i94mode.csv: -------------------------------------------------------------------------------- 1 | trans_code,trans_name 2 | 1,Air 3 | 2,Sea 4 | 3,Land 5 | 9,Not Reported 6 | -------------------------------------------------------------------------------- /airflow/dags/temp_output/i94port.csv: -------------------------------------------------------------------------------- 1 | port_code,port_name,port_city,port_state 2 | ALC,"Alcan, Ak",Alcan, AK 3 | ANC,"Anchorage, Ak",Anchorage, AK 4 | BAR,"Baker Aaf - Baker Island, Ak",Baker Aaf - Baker Island, AK 5 | DAC,"Daltons Cache, Ak",Daltons Cache, AK 6 | PIZ,"Dew Station Pt Lay Dew, Ak",Dew Station Pt Lay Dew, AK 7 | DTH,"Dutch Harbor, Ak",Dutch Harbor, AK 8 | EGL,"Eagle, Ak",Eagle, AK 9 | FRB,"Fairbanks, Ak",Fairbanks, AK 10 | HOM,"Homer, Ak",Homer, AK 11 | HYD,"Hyder, Ak",Hyder, AK 12 | JUN,"Juneau, Ak",Juneau, AK 13 | 5KE,"Ketchikan, Ak",Ketchikan, AK 14 | KET,"Ketchikan, Ak",Ketchikan, AK 15 | MOS,"Moses Point Intermediate, Ak",Moses Point Intermediate, AK 16 | NIK,"Nikiski, Ak",Nikiski, AK 17 | NOM,"Nom, Ak",Nom, AK 18 | PKC,"Poker Creek, Ak",Poker Creek, AK 19 | ORI,"Port Lions Spb, Ak",Port Lions Spb, AK 20 | SKA,"Skagway, Ak",Skagway, AK 21 | SNP,"St. Paul Island, Ak",St. Paul Island, AK 22 | TKI,"Tokeen, Ak",Tokeen, AK 23 | WRA,"Wrangell, Ak",Wrangell, AK 24 | HSV,"Madison County - Huntsville, Al",Madison County - Huntsville, AL 25 | MOB,"Mobile, Al",Mobile, AL 26 | LIA,"Little Rock, Ar (Bps)",Little Rock, AR (BPS) 27 | ROG,"Rogers Arpt, Ar",Rogers Arpt, AR 28 | DOU,"Douglas, Az",Douglas, AZ 29 | LUK,"Lukeville, Az",Lukeville, AZ 30 | MAP,Mariposa Az,Mariposa Az, 31 | NAC,"Naco, Az",Naco, AZ 32 | NOG,"Nogales, Az",Nogales, AZ 33 | PHO,"Phoenix, Az",Phoenix, AZ 34 | POR,"Portal, Az",Portal, AZ 35 | SLU,"San Luis, Az",San Luis, AZ 36 | SAS,"Sasabe, Az",Sasabe, AZ 37 | TUC,"Tucson, Az",Tucson, AZ 38 | YUI,"Yuma, Az",Yuma, AZ 39 | AND,"Andrade, Ca",Andrade, CA 40 | BUR,"Burbank, Ca",Burbank, CA 41 | CAL,"Calexico, Ca",Calexico, CA 42 | CAO,"Campo, Ca",Campo, CA 43 | FRE,"Fresno, Ca",Fresno, CA 44 | ICP,"Imperial County, Ca",Imperial County, CA 45 | LNB,"Long Beach, Ca",Long Beach, CA 46 | LOS,"Los Angeles, Ca",Los Angeles, CA 47 | BFL,"Meadows Field - Bakersfield, Ca",Meadows Field - Bakersfield, CA 48 | OAK,"Oakland, Ca",Oakland, CA 49 | ONT,"Ontario, Ca",Ontario, CA 50 | OTM,"Otay Mesa, Ca",Otay Mesa, CA 51 | BLT,"Pacific, Hwy. Station, Ca","Pacific, Hwy. Station", CA 52 | PSP,"Palm Springs, Ca",Palm Springs, CA 53 | SAC,"Sacramento, Ca",Sacramento, CA 54 | SLS,"Salinas, Ca (Bps)",Salinas, CA (BPS) 55 | SDP,"San Diego, Ca",San Diego, CA 56 | SFR,"San Francisco, Ca",San Francisco, CA 57 | SNJ,"San Jose, Ca",San Jose, CA 58 | SLO,"San Luis Obispo, Ca",San Luis Obispo, CA 59 | SLI,"San Luis Obispo, Ca (Bps)",San Luis Obispo, CA (BPS) 60 | SPC,"San Pedro, Ca",San Pedro, CA 61 | SYS,"San Ysidro, Ca",San Ysidro, CA 62 | SAA,"Santa Ana, Ca",Santa Ana, CA 63 | STO,"Stockton, Ca (Bps)",Stockton, CA (BPS) 64 | TEC,"Tecate, Ca",Tecate, CA 65 | TRV,"Travis-Afb, Ca",Travis-Afb, CA 66 | APA,"Arapahoe County, Co",Arapahoe County, CO 67 | ASE,"Aspen, Co #Arpt",Aspen, CO #ARPT 68 | COS,"Colorado Springs, Co",Colorado Springs, CO 69 | DEN,"Denver, Co",Denver, CO 70 | DRO,"La Plata - Durango, Co",La Plata - Durango, CO 71 | BDL,"Bradley International, Ct",Bradley International, CT 72 | BGC,"Bridgeport, Ct",Bridgeport, CT 73 | GRT,"Groton, Ct",Groton, CT 74 | HAR,"Hartford, Ct",Hartford, CT 75 | NWH,"New Haven, Ct",New Haven, CT 76 | NWL,"New London, Ct",New London, CT 77 | TST,"Newington Data Center Test, Ct",Newington Data Center Test, CT 78 | WAS,Washington Dc,Washington Dc, 79 | DOV,"Dover Afb, De",Dover Afb, DE 80 | DVD,"Dover-Afb, De",Dover-Afb, DE 81 | WLL,"Wilmington, De",Wilmington, DE 82 | BOC,"Bocagrande, Fl",Bocagrande, FL 83 | SRQ,"Bradenton - Sarasota, Fl",Bradenton - Sarasota, FL 84 | CAN,"Cape Canaveral, Fl",Cape Canaveral, FL 85 | DAB,"Daytona Beach International, Fl",Daytona Beach International, FL 86 | FRN,"Fernandina, Fl",Fernandina, FL 87 | FTL,"Fort Lauderdale, Fl",Fort Lauderdale, FL 88 | FMY,"Fort Myers, Fl",Fort Myers, FL 89 | FPF,"Fort Pierce, Fl",Fort Pierce, FL 90 | HUR,"Hurlburt Field, Fl",Hurlburt Field, FL 91 | GNV,"J R Alison Muni - Gainesville, Fl",J R Alison Muni - Gainesville, FL 92 | JAC,"Jacksonville, Fl",Jacksonville, FL 93 | KEY,"Key West, Fl",Key West, FL 94 | LEE,"Leesburg Municipal Airport, Fl",Leesburg Municipal Airport, FL 95 | MLB,"Melbourne, Fl",Melbourne, FL 96 | MIA,"Miami, Fl",Miami, FL 97 | APF,"Naples, Fl #Arpt",Naples, FL #ARPT 98 | OPF,"Opa Locka, Fl",Opa Locka, FL 99 | ORL,"Orlando, Fl",Orlando, FL 100 | PAN,"Panama City, Fl",Panama City, FL 101 | PEN,"Pensacola, Fl",Pensacola, FL 102 | PCF,"Port Canaveral, Fl",Port Canaveral, FL 103 | PEV,"Port Everglades, Fl",Port Everglades, FL 104 | PSJ,"Port St Joe, Fl",Port St Joe, FL 105 | SFB,"Sanford, Fl",Sanford, FL 106 | SGJ,"St Augustine Arpt, Fl",St Augustine Arpt, FL 107 | SAU,"St Augustine, Fl",St Augustine, FL 108 | FPR,"St Lucie County, Fl",St Lucie County, FL 109 | SPE,"St Petersburg, Fl",St Petersburg, FL 110 | TAM,"Tampa, Fl",Tampa, FL 111 | WPB,"West Palm Beach, Fl",West Palm Beach, FL 112 | ATL,"Atlanta, Ga",Atlanta, GA 113 | BRU,"Brunswick, Ga",Brunswick, GA 114 | AGS,"Bush Field - Augusta, Ga",Bush Field - Augusta, GA 115 | SAV,"Savannah, Ga",Savannah, GA 116 | AGA,"Agana, Gu",Agana, GU 117 | HHW,"Honolulu, Hi",Honolulu, HI 118 | OGG,"Kahului - Maui, Hi",Kahului - Maui, HI 119 | KOA,"Keahole-Kona, Hi",Keahole-Kona, HI 120 | LIH,"Lihue, Hi",Lihue, HI 121 | CID,"Cedar Rapids/Iowa City, Ia",Cedar Rapids/Iowa City, IA 122 | DSM,"Des Moines, Ia",Des Moines, IA 123 | BOI,"Air Term. (Gowen Fld) Boise, Id",Air Term. (Gowen Fld) Boise, ID 124 | EPI,"Eastport, Id",Eastport, ID 125 | IDA,"Fanning Field - Idaho Falls, Id",Fanning Field - Idaho Falls, ID 126 | PTL,"Porthill, Id",Porthill, ID 127 | SPI,"Capital - Springfield, Il",Capital - Springfield, IL 128 | CHI,"Chicago, Il",Chicago, IL 129 | DPA,"Dupage County, Il",Dupage County, IL 130 | PIA,"Greater Peoria, Il",Greater Peoria, IL 131 | RFD,"Greater Rockford, Il",Greater Rockford, IL 132 | UGN,"Memorial - Waukegan, Il",Memorial - Waukegan, IL 133 | GAR,"Gary, In",Gary, IN 134 | HMM,"Hammond, In",Hammond, IN 135 | INP,"Indianapolis, In",Indianapolis, IN 136 | MRL,"Merrillville, In",Merrillville, IN 137 | SBN,"South Bend, In",South Bend, IN 138 | ICT,"Mid-Continent - Witchita, Ks",Mid-Continent - Witchita, KS 139 | LEX,"Blue Grass - Lexington, Ky",Blue Grass - Lexington, KY 140 | LOU,"Louisville, Ky",Louisville, KY 141 | BTN,"Baton Rouge, La",Baton Rouge, LA 142 | LKC,"Lake Charles, La",Lake Charles, LA 143 | LAK,"Lake Charles, La (Bps)",Lake Charles, LA (BPS) 144 | MLU,"Monroe, La",Monroe, LA 145 | MGC,"Morgan City, La",Morgan City, LA 146 | NOL,"New Orleans, La",New Orleans, LA 147 | BOS,"Boston, Ma",Boston, MA 148 | GLO,"Gloucester, Ma",Gloucester, MA 149 | BED,"Hanscom Field - Bedford, Ma",Hanscom Field - Bedford, MA 150 | LYN,"Lynden, Wa",Lynden, WA 151 | ADW,"Andrews Afb, Md",Andrews Afb, MD 152 | BAL,"Baltimore, Md",Baltimore, MD 153 | MKG,"Muskegon, Md",Muskegon, MD 154 | PAX,"Patuxent River, Md",Patuxent River, MD 155 | BGM,"Bangor, Me",Bangor, ME 156 | BOO,"Boothbay Harbor, Me",Boothbay Harbor, ME 157 | BWM,"Bridgewater, Me",Bridgewater, ME 158 | BCK,"Buckport, Me",Buckport, ME 159 | CLS,"Calais, Me",Calais, ME 160 | CRB,"Caribou, Me",Caribou, ME 161 | COB,"Coburn Gore, Me",Coburn Gore, ME 162 | EST,"Eastcourt, Me",Eastcourt, ME 163 | EPT,"Eastport Municipal, Me",Eastport Municipal, ME 164 | EPM,"Eastport, Me",Eastport, ME 165 | FOR,"Forest City, Me",Forest City, ME 166 | FTF,"Fort Fairfield, Me",Fort Fairfield, ME 167 | FTK,"Fort Kent, Me",Fort Kent, ME 168 | HML,"Hamiin, Me",Hamiin, ME 169 | HTM,"Houlton, Me",Houlton, ME 170 | JKM,"Jackman, Me",Jackman, ME 171 | KAL,"Kalispel, Mt",Kalispel, MT 172 | LIM,"Limestone, Me",Limestone, ME 173 | LUB,"Lubec, Me",Lubec, ME 174 | MAD,"Madawaska, Me",Madawaska, ME 175 | POM,"Portland, Me",Portland, ME 176 | RGM,"Rangeley, Me (Bps)",Rangeley, ME (BPS) 177 | SBR,"South Brewer, Me",South Brewer, ME 178 | SRL,"St Aurelie, Me",St Aurelie, ME 179 | SPA,"St Pampile, Me",St Pampile, ME 180 | VNB,"Van Buren, Me",Van Buren, ME 181 | VCB,"Vanceboro, Me",Vanceboro, ME 182 | AGN,"Algonac, Mi",Algonac, MI 183 | ALP,"Alpena, Mi",Alpena, MI 184 | BCY,"Bay City, Mi",Bay City, MI 185 | DET,"Detroit, Mi",Detroit, MI 186 | GRP,"Grand Rapids, Mi",Grand Rapids, MI 187 | GRO,"Grosse Isle, Mi",Grosse Isle, MI 188 | ISL,"Isle Royale, Mi",Isle Royale, MI 189 | MRC,"Marine City, Mi",Marine City, MI 190 | MRY,"Marysville, Mi",Marysville, MI 191 | PTK,"Oakland County - Pontiac, Mi",Oakland County - Pontiac, MI 192 | PHU,"Port Huron, Mi",Port Huron, MI 193 | RBT,"Roberts Landing, Mi",Roberts Landing, MI 194 | SAG,"Saginaw, Mi",Saginaw, MI 195 | SSM,"Sault Ste. Marie, Mi",Sault Ste. Marie, MI 196 | SCL,"St Clair, Mi",St Clair, MI 197 | YIP,"Willow Run - Ypsilanti, Mi",Willow Run - Ypsilanti, MI 198 | BAU,"Baudette, Mn",Baudette, MN 199 | CAR,"Caribou Municipal Airport, Mn",Caribou Municipal Airport, MN 200 | GTF,"Collapsed Into Int, Mn",Collapsed Into Int, MN 201 | INL,"Collapsed Into Int, Mn",Collapsed Into Int, MN 202 | CRA,"Crane Lake, Mn",Crane Lake, MN 203 | MIC,"Crystal Municipal Airport, Mn",Crystal Municipal Airport, MN 204 | DUL,"Duluth, Mn",Duluth, MN 205 | ELY,"Ely, Mn",Ely, MN 206 | GPM,"Grand Portage, Mn",Grand Portage, MN 207 | SVC,"Grant County - Silver City, Mn",Grant County - Silver City, MN 208 | INT,"Intl Falls, Mn",Intl Falls, MN 209 | LAN,"Lancaster, Mn",Lancaster, MN 210 | MSP,"Minn./St Paul, Mn",Minn./St Paul, MN 211 | LIN,"Northern Svc Center, Mn",Northern Svc Center, MN 212 | NOY,"Noyes, Mn",Noyes, MN 213 | PIN,"Pine Creek, Mn",Pine Creek, MN 214 | 48Y,"Pinecreek Border Arpt, Mn",Pinecreek Border Arpt, MN 215 | RAN,"Rainer, Mn",Rainer, MN 216 | RST,"Rochester, Mn",Rochester, MN 217 | ROS,"Roseau, Mn",Roseau, MN 218 | SPM,"St Paul, Mn",St Paul, MN 219 | WSB,"Warroad Intl, Spb, Mn","Warroad Intl, Spb", MN 220 | WAR,"Warroad, Mn",Warroad, MN 221 | KAN,"Kansas City, Mo",Kansas City, MO 222 | SGF,"Springfield-Branson, Mo",Springfield-Branson, MO 223 | STL,"St Louis, Mo",St Louis, MO 224 | WHI,"Whitetail, Mt",Whitetail, MT 225 | WHM,"Wild Horse, Mt",Wild Horse, MT 226 | GPT,"Biloxi Regional, Ms",Biloxi Regional, MS 227 | GTR,"Golden Triangle Lowndes Cnty, Ms",Golden Triangle Lowndes Cnty, MS 228 | GUL,"Gulfport, Ms",Gulfport, MS 229 | PAS,"Pascagoula, Ms",Pascagoula, MS 230 | JAN,"Thompson Field - Jackson, Ms",Thompson Field - Jackson, MS 231 | BIL,"Billings, Mt",Billings, MT 232 | BTM,"Butte, Mt",Butte, MT 233 | CHF,"Chief Mt, Mt",Chief Mt, MT 234 | CTB,"Cut Bank Municipal, Mt",Cut Bank Municipal, MT 235 | CUT,"Cut Bank, Mt",Cut Bank, MT 236 | DLB,"Del Bonita, Mt",Del Bonita, MT 237 | EUR,"Eureka, Mt (Bps)",Eureka, MT (BPS) 238 | BZN,"Gallatin Field - Bozeman, Mt",Gallatin Field - Bozeman, MT 239 | FCA,"Glacier National Park, Mt",Glacier National Park, MT 240 | GGW,"Glasgow, Mt",Glasgow, MT 241 | GRE,"Great Falls, Mt",Great Falls, MT 242 | HVR,"Havre, Mt",Havre, MT 243 | HEL,"Helena, Mt",Helena, MT 244 | LWT,"Lewiston, Mt",Lewiston, MT 245 | MGM,"Morgan, Mt",Morgan, MT 246 | OPH,"Opheim, Mt",Opheim, MT 247 | PIE,"Piegan, Mt",Piegan, MT 248 | RAY,"Raymond, Mt",Raymond, MT 249 | ROO,"Roosville, Mt",Roosville, MT 250 | SCO,"Scobey, Mt",Scobey, MT 251 | SWE,"Sweetgtass, Mt",Sweetgtass, MT 252 | TRL,"Trial Creek, Mt",Trial Creek, MT 253 | TUR,"Turner, Mt",Turner, MT 254 | WCM,"Willow Creek, Mt",Willow Creek, MT 255 | CLT,"Charlotte, Nc",Charlotte, NC 256 | FAY,"Fayetteville, Nc",Fayetteville, NC 257 | MRH,"Morehead City, Nc",Morehead City, NC 258 | FOP,"Morris Fields Aaf, Nc",Morris Fields Aaf, NC 259 | GSO,"Piedmont Triad Intl Airport, Nc",Piedmont Triad Intl Airport, NC 260 | RDU,"Raleigh/Durham, Nc",Raleigh/Durham, NC 261 | SSC,"Shaw Afb - Sumter, Nc",Shaw Afb - Sumter, NC 262 | WIL,"Wilmington, Nc",Wilmington, NC 263 | AMB,"Ambrose, Nd",Ambrose, ND 264 | ANT,"Antler, Nd",Antler, ND 265 | CRY,"Carbury, Nd",Carbury, ND 266 | DNS,"Dunseith, Nd",Dunseith, ND 267 | FAR,"Fargo, Nd",Fargo, ND 268 | FRT,"Fortuna, Nd",Fortuna, ND 269 | GRF,"Grand Forks, Nd",Grand Forks, ND 270 | HNN,"Hannah, Nd",Hannah, ND 271 | HNS,"Hansboro, Nd",Hansboro, ND 272 | MAI,"Maida, Nd",Maida, ND 273 | MND,"Minot, Nd",Minot, ND 274 | NEC,"Neche, Nd",Neche, ND 275 | NOO,"Noonan, Nd",Noonan, ND 276 | NRG,"Northgate, Nd",Northgate, ND 277 | PEM,"Pembina, Nd",Pembina, ND 278 | SAR,"Sarles, Nd",Sarles, ND 279 | SHR,"Sherwood, Nd",Sherwood, ND 280 | SJO,"St John, Nd",St John, ND 281 | WAL,"Walhalla, Nd",Walhalla, ND 282 | WHO,"Westhope, Nd",Westhope, ND 283 | WND,"Williston, Nd",Williston, ND 284 | OMA,"Omaha, Ne",Omaha, NE 285 | LEB,"Lebanon, Nh",Lebanon, NH 286 | MHT,"Manchester, Nh",Manchester, NH 287 | PNH,"Pittsburg, Nh",Pittsburg, NH 288 | PSM,"Portsmouth, Nh",Portsmouth, NH 289 | BYO,"Bayonne, Nj",Bayonne, NJ 290 | CNJ,"Camden, Nj",Camden, NJ 291 | HOB,"Hoboken, Nj",Hoboken, NJ 292 | JER,"Jersey City, Nj",Jersey City, NJ 293 | WRI,"Mc Guire Afb - Wrightsown, Nj",Mc Guire Afb - Wrightsown, NJ 294 | MMU,"Morristown, Nj",Morristown, NJ 295 | NEW,"Newark/Teterboro, Nj",Newark/Teterboro, NJ 296 | PER,"Perth Amboy, Nj",Perth Amboy, NJ 297 | ACY,"Pomona Field - Atlantic City, Nj",Pomona Field - Atlantic City, NJ 298 | ALA,"Alamagordo, Nm (Bps)",Alamagordo, NM (BPS) 299 | ABQ,"Albuquerque, Nm",Albuquerque, NM 300 | ANP,"Antelope Wells, Nm",Antelope Wells, NM 301 | CRL,"Carlsbad, Nm",Carlsbad, NM 302 | COL,"Columbus, Nm",Columbus, NM 303 | CDD,"Crane Lake - St. Louis Cnty, Nm",Crane Lake - St. Louis Cnty, NM 304 | DNM,"Deming, Nm (Bps)",Deming, NM (BPS) 305 | LAS,"Las Cruces, Nm",Las Cruces, NM 306 | LOB,"Lordsburg, Nm (Bps)",Lordsburg, NM (BPS) 307 | RUI,"Ruidoso, Nm",Ruidoso, NM 308 | STR,"Santa Teresa, Nm",Santa Teresa, NM 309 | RNO,"Cannon Intl - Reno/Tahoe, Nv",Cannon Intl - Reno/Tahoe, NV 310 | FLX,"Fallon Municipal Airport, Nv",Fallon Municipal Airport, NV 311 | LVG,"Las Vegas, Nv",Las Vegas, NV 312 | REN,"Reno, Nv",Reno, NV 313 | ALB,"Albany, Ny",Albany, NY 314 | AXB,"Alexandria Bay, Ny",Alexandria Bay, NY 315 | BUF,"Buffalo, Ny",Buffalo, NY 316 | CNH,"Cannon Corners, Ny",Cannon Corners, NY 317 | CAP,"Cape Vincent, Ny",Cape Vincent, NY 318 | CHM,"Champlain, Ny",Champlain, NY 319 | CHT,"Chateaugay, Ny",Chateaugay, NY 320 | CLA,"Clayton, Ny",Clayton, NY 321 | FTC,"Fort Covington, Ny",Fort Covington, NY 322 | LAG,"La Guardia, Ny",La Guardia, NY 323 | LEW,"Lewiston, Ny",Lewiston, NY 324 | MAS,"Massena, Ny",Massena, NY 325 | MAG,"Mcguire Afb, Ny",Mcguire Afb, NY 326 | MOO,"Moores, Ny",Moores, NY 327 | MRR,"Morristown, Ny",Morristown, NY 328 | NYC,"New York, Ny",New York, NY 329 | NIA,"Niagara Falls, Ny",Niagara Falls, NY 330 | OGD,"Ogdensburg, Ny",Ogdensburg, NY 331 | OSW,"Oswego, Ny",Oswego, NY 332 | ELM,"Regional Arpt - Horsehead, Ny",Regional Arpt - Horsehead, NY 333 | ROC,"Rochester, Ny",Rochester, NY 334 | ROU,"Rouses Point, Ny",Rouses Point, NY 335 | SWF,"Stewart - Orange Cnty, Ny",Stewart - Orange Cnty, NY 336 | SYR,"Syracuse, Ny",Syracuse, NY 337 | THO,"Thousand Island Bridge, Ny",Thousand Island Bridge, NY 338 | TRO,"Trout River, Ny",Trout River, NY 339 | WAT,"Watertown, Ny",Watertown, NY 340 | HPN,"Westchester - White Plains, Ny",Westchester - White Plains, NY 341 | WRB,"Whirlpool Bridge, Ny",Whirlpool Bridge, NY 342 | YOU,"Youngstown, Ny",Youngstown, NY 343 | AKR,"Akron, Oh",Akron, OH 344 | ATB,"Ashtabula, Oh",Ashtabula, OH 345 | CIN,"Cincinnati, Oh",Cincinnati, OH 346 | CLE,"Cleveland, Oh",Cleveland, OH 347 | CLM,"Columbus, Oh",Columbus, OH 348 | LOR,"Lorain, Oh",Lorain, OH 349 | MBO,"Marble Heads, Oh",Marble Heads, OH 350 | SDY,"Sandusky, Oh",Sandusky, OH 351 | TOL,"Toledo, Oh",Toledo, OH 352 | OKC,"Oklahoma City, Ok",Oklahoma City, OK 353 | TUL,"Tulsa, Ok",Tulsa, OK 354 | AST,"Astoria, Or",Astoria, OR 355 | COO,"Coos Bay, Or",Coos Bay, OR 356 | HIO,"Hillsboro, Or",Hillsboro, OR 357 | MED,"Medford, Or",Medford, OR 358 | NPT,"Newport, Or",Newport, OR 359 | POO,"Portland, Or",Portland, OR 360 | PUT,"Put-In-Bay, Oh",Put-In-Bay, OH 361 | RDM,"Roberts Fields - Redmond, Or",Roberts Fields - Redmond, OR 362 | ERI,"Erie, Pa",Erie, PA 363 | MDT,"Harrisburg, Pa",Harrisburg, PA 364 | HSB,"Harrisonburg, Pa",Harrisonburg, PA 365 | PHI,"Philadelphia, Pa",Philadelphia, PA 366 | PIT,"Pittsburg, Pa",Pittsburg, PA 367 | AGU,"Aguadilla, Pr",Aguadilla, PR 368 | BQN,"Borinquen - Aguadillo, Pr",Borinquen - Aguadillo, PR 369 | JCP,"Culebra - Benjamin Rivera, Pr",Culebra - Benjamin Rivera, PR 370 | ENS,"Ensenada, Pr",Ensenada, PR 371 | FAJ,"Fajardo, Pr",Fajardo, PR 372 | HUM,"Humacao, Pr",Humacao, PR 373 | JOB,"Jobos, Pr",Jobos, PR 374 | MAY,"Mayaguez, Pr",Mayaguez, PR 375 | PON,"Ponce, Pr",Ponce, PR 376 | PSE,"Ponce-Mercedita, Pr",Ponce-Mercedita, PR 377 | SAJ,"San Juan, Pr",San Juan, PR 378 | VQS,"Vieques-Arpt, Pr",Vieques-Arpt, PR 379 | PRO,"Providence, Ri",Providence, RI 380 | PVD,"Theodore Francis - Warwick, Ri",Theodore Francis - Warwick, RI 381 | CHL,"Charleston, Sc",Charleston, SC 382 | CAE,"Columbia, Sc #Arpt",Columbia, SC #ARPT 383 | GEO,"Georgetown, Sc",Georgetown, SC 384 | GSP,"Greenville, Sc",Greenville, SC 385 | GRR,"Greer, Sc",Greer, SC 386 | MYR,"Myrtle Beach, Sc",Myrtle Beach, SC 387 | SPF,"Black Hills, Spearfish, Sd","Black Hills, Spearfish", SD 388 | HON,"Howes Regional Arpt - Huron, Sd",Howes Regional Arpt - Huron, SD 389 | SAI,"Saipan, Spn",Saipan, SPN 390 | TYS,"Mc Ghee Tyson - Alcoa, Tn",Mc Ghee Tyson - Alcoa, TN 391 | MEM,"Memphis, Tn",Memphis, TN 392 | NSV,"Nashville, Tn",Nashville, TN 393 | TRI,"Tri City Arpt, Tn",Tri City Arpt, TN 394 | ADS,"Addison Airport- Addison, Tx",Addison Airport- Addison, TX 395 | ADT,"Amistad Dam, Tx",Amistad Dam, TX 396 | ANZ,"Anzalduas, Tx",Anzalduas, TX 397 | AUS,"Austin, Tx",Austin, TX 398 | BEA,"Beaumont, Tx",Beaumont, TX 399 | BBP,"Big Bend Park, Tx (Bps)",Big Bend Park, TX (BPS) 400 | SCC,"Bp Spec Coord. Ctr, Tx",Bp Spec Coord. Ctr, TX 401 | BTC,"Bp Tactical Unit, Tx",Bp Tactical Unit, TX 402 | BOA,"Bridge Of Americas, Tx",Bridge Of Americas, TX 403 | BRO,"Brownsville, Tx",Brownsville, TX 404 | CRP,"Corpus Christi, Tx",Corpus Christi, TX 405 | DAL,"Dallas, Tx",Dallas, TX 406 | DLR,"Del Rio, Tx",Del Rio, TX 407 | DNA,"Donna, Tx",Donna, TX 408 | EGP,"Eagle Pass, Tx",Eagle Pass, TX 409 | ELP,"El Paso, Tx",El Paso, TX 410 | FAB,"Fabens, Tx",Fabens, TX 411 | FAL,"Falcon Heights, Tx",Falcon Heights, TX 412 | FTH,"Fort Hancock, Tx",Fort Hancock, TX 413 | AFW,"Fort Worth Alliance, Tx",Fort Worth Alliance, TX 414 | FPT,"Freeport, Tx",Freeport, TX 415 | GAL,"Galveston, Tx",Galveston, TX 416 | HLG,"Harlingen, Tx",Harlingen, TX 417 | HID,"Hidalgo, Tx",Hidalgo, TX 418 | HOU,"Houston, Tx",Houston, TX 419 | SGR,"Hull Field, Sugar Land Arpt, Tx","Hull Field, Sugar Land Arpt", TX 420 | LLB,"Juarez-Lincoln Bridge, Tx",Juarez-Lincoln Bridge, TX 421 | LCB,"Laredo Columbia Bridge, Tx",Laredo Columbia Bridge, TX 422 | LRN,"Laredo North, Tx",Laredo North, TX 423 | LAR,"Laredo, Tx",Laredo, TX 424 | LSE,"Los Ebanos, Tx",Los Ebanos, TX 425 | IND,"Los Indios, Tx",Los Indios, TX 426 | LOI,"Los Indios, Tx",Los Indios, TX 427 | MRS,"Marfa, Tx (Bps)",Marfa, TX (BPS) 428 | MCA,"Mcallen, Tx",Mcallen, TX 429 | MAF,"Odessa Regional, Tx",Odessa Regional, TX 430 | PDN,"Paso Del Norte,Tx",Paso Del Norte,TX 431 | PBB,"Peace Bridge, Ny",Peace Bridge, NY 432 | PHR,"Pharr, Tx",Pharr, TX 433 | PAR,"Port Arthur, Tx",Port Arthur, TX 434 | ISB,"Port Isabel, Tx",Port Isabel, TX 435 | POE,"Port Of El Paso, Tx",Port Of El Paso, TX 436 | PRE,"Presidio, Tx",Presidio, TX 437 | PGR,"Progreso, Tx",Progreso, TX 438 | RIO,"Rio Grande City, Tx",Rio Grande City, TX 439 | ROM,"Roma, Tx",Roma, TX 440 | SNA,"San Antonio, Tx",San Antonio, TX 441 | SNN,"Sanderson, Tx",Sanderson, TX 442 | VIB,"Veteran Intl Bridge, Tx",Veteran Intl Bridge, TX 443 | YSL,"Ysleta, Tx",Ysleta, TX 444 | CHA,"Charlotte Amalie, Vi",Charlotte Amalie, VI 445 | CHR,"Christiansted, Vi",Christiansted, VI 446 | CRU,"Cruz Bay, St John, Vi","Cruz Bay, St John", VI 447 | FRK,"Frederiksted, Vi",Frederiksted, VI 448 | STT,"St Thomas, Vi",St Thomas, VI 449 | LGU,"Cache Airport - Logan, Ut",Cache Airport - Logan, UT 450 | SLC,"Salt Lake City, Ut",Salt Lake City, UT 451 | CHO,"Albemarle Charlottesville, Va",Albemarle Charlottesville, VA 452 | DAA,"Davison Aaf - Fairfax Cnty, Va",Davison Aaf - Fairfax Cnty, VA 453 | HOP,"Hopewell, Va",Hopewell, VA 454 | HEF,"Manassas, Va #Arpt",Manassas, VA #ARPT 455 | NWN,"Newport, Va",Newport, VA 456 | NOR,"Norfolk, Va",Norfolk, VA 457 | RCM,"Richmond, Va",Richmond, VA 458 | ABS,"Alburg Springs, Vt",Alburg Springs, VT 459 | ABG,"Alburg, Vt",Alburg, VT 460 | BEB,"Beebe Plain, Vt",Beebe Plain, VT 461 | BEE,"Beecher Falls, Vt",Beecher Falls, VT 462 | BRG,"Burlington, Vt",Burlington, VT 463 | CNA,"Canaan, Vt",Canaan, VT 464 | DER,"Derby Line, Vt (I-91)",Derby Line, VT (I-91) 465 | DLV,"Derby Line, Vt (Rt. 5)",Derby Line, VT (RT. 5) 466 | ERC,"East Richford, Vt",East Richford, VT 467 | HIG,"Highgate Springs, Vt",Highgate Springs, VT 468 | MOR,"Morses Line, Vt",Morses Line, VT 469 | NPV,"Newport, Vt",Newport, VT 470 | NRT,"North Troy, Vt",North Troy, VT 471 | NRN,"Norton, Vt",Norton, VT 472 | PIV,"Pinnacle Road, Vt",Pinnacle Road, VT 473 | RIF,"Richfort, Vt",Richfort, VT 474 | STA,"St Albans, Vt",St Albans, VT 475 | SWB,"Swanton, Vt (Bp - Sector Hq)",Swanton, VT (BP - SECTOR HQ) 476 | WBE,"West Berkshire, Vt",West Berkshire, VT 477 | ABE,"Aberdeen, Wa",Aberdeen, WA 478 | ANA,"Anacortes, Wa",Anacortes, WA 479 | BEL,"Bellingham, Wa",Bellingham, WA 480 | BLI,"Bellingham, Washington #Intl",Bellingham, WASHINGTON #INTL 481 | BLA,"Blaine, Wa",Blaine, WA 482 | BWA,"Boundary, Wa",Boundary, WA 483 | CUR,"Curlew, Wa (Bps)",Curlew, WA (BPS) 484 | DVL,"Danville, Wa",Danville, WA 485 | EVE,"Everett, Wa",Everett, WA 486 | FER,"Ferry, Wa",Ferry, WA 487 | FRI,"Friday Harbor, Wa",Friday Harbor, WA 488 | FWA,"Frontier, Wa",Frontier, WA 489 | KLM,"Kalama, Wa",Kalama, WA 490 | LAU,"Laurier, Wa",Laurier, WA 491 | LON,"Longview, Wa",Longview, WA 492 | MET,"Metaline Falls, Wa",Metaline Falls, WA 493 | MWH,"Moses Lake Grant County Arpt, Wa",Moses Lake Grant County Arpt, WA 494 | NEA,"Neah Bay, Wa",Neah Bay, WA 495 | NIG,"Nighthawk, Wa",Nighthawk, WA 496 | OLY,"Olympia, Wa",Olympia, WA 497 | ORO,"Oroville, Wa",Oroville, WA 498 | PWB,"Pasco, Wa",Pasco, WA 499 | PIR,"Point Roberts, Wa",Point Roberts, WA 500 | PNG,"Port Angeles, Wa",Port Angeles, WA 501 | PTO,"Port Townsend, Wa",Port Townsend, WA 502 | SEA,"Seattle, Wa",Seattle, WA 503 | SPO,"Spokane, Wa",Spokane, WA 504 | SUM,"Sumas, Wa",Sumas, WA 505 | TAC,"Tacoma, Wa",Tacoma, WA 506 | PSC,"Tri-Cities - Pasco, Wa",Tri-Cities - Pasco, WA 507 | VAN,"Vancouver, Wa",Vancouver, WA 508 | AGM,"Algoma, Wi",Algoma, WI 509 | BAY,"Bayfield, Wi",Bayfield, WI 510 | GRB,"Green Bay, Wi",Green Bay, WI 511 | MNW,"Manitowoc, Wi",Manitowoc, WI 512 | MIL,"Milwaukee, Wi",Milwaukee, WI 513 | MSN,"Truax Field - Dane County, Wi",Truax Field - Dane County, WI 514 | CHS,"Charleston, Wv",Charleston, WV 515 | CLK,"Clarksburg, Wv",Clarksburg, WV 516 | BLF,"Mercer County, Wv",Mercer County, WV 517 | CSP,"Casper, Wy",Casper, WY 518 | XXX,Not Reported/Unknown,Not Reported/Unknown, 519 | 888,Unidentifed Air / Seaport,Unidentifed Air / Seaport, 520 | UNK,Unknown Poe,Unknown Poe, 521 | CLG,"Calgary, Canada",Calgary, CANADA 522 | EDA,"Edmonton, Canada",Edmonton, CANADA 523 | YHC,"Hakai Pass, Canada",Hakai Pass, CANADA 524 | HAL,"Halifax, Ns, Canada","Halifax, Ns", CANADA 525 | MON,"Montreal, Canada",Montreal, CANADA 526 | OTT,"Ottawa, Canada",Ottawa, CANADA 527 | YXE,"Saskatoon, Canada",Saskatoon, CANADA 528 | TOR,"Toronto, Canada",Toronto, CANADA 529 | VCV,"Vancouver, Canada",Vancouver, CANADA 530 | VIC,"Victoria, Canada",Victoria, CANADA 531 | WIN,"Winnipeg, Canada",Winnipeg, CANADA 532 | AMS,"Amsterdam-Schiphol, Netherlands",Amsterdam-Schiphol, NETHERLANDS 533 | ARB,"Aruba, Neth Antilles",Aruba, NETH ANTILLES 534 | BAN,"Bankok, Thailand",Bankok, THAILAND 535 | BEI,"Beica #Arpt, Ethiopia",Beica #Arpt, ETHIOPIA 536 | PEK,"Beijing Capital Intl, Prc",Beijing Capital Intl, PRC 537 | BDA,"Kindley Field, Bermuda",Kindley Field, BERMUDA 538 | BOG,"Bogota, El Dorado #Arpt, Colombia","Bogota, El Dorado #Arpt", COLOMBIA 539 | EZE,"Buenos Aires, Ministro Pist, Argentina","Buenos Aires, Ministro Pist", ARGENTINA 540 | CUN,"Cancun, Mexico",Cancun, MEXICO 541 | CRQ,"Caravelas, Ba #Arpt, Brazil","Caravelas, Ba #Arpt", BRAZIL 542 | MVD,"Carrasco, Uruguay",Carrasco, URUGUAY 543 | DUB,"Dublin, Ireland",Dublin, IRELAND 544 | FOU,"Fougamou #Arpt, Gabon",Fougamou #Arpt, GABON 545 | FBA,"Freeport, Bahamas",Freeport, BAHAMAS 546 | MTY,"Gen M. Escobedo, Monterrey, Mx","Gen M. Escobedo, Monterrey", MX 547 | HMO,"Gen Pesqueira Garcia, Mx",Gen Pesqueira Garcia, MX 548 | GCM,"Grand Cayman, Cayman Island",Grand Cayman, CAYMAN ISLAND 549 | GDL,"Guadalajara, Miguel Hidal, Mx","Guadalajara, Miguel Hidal", MX 550 | HAM,"Hamilton, Bermuda",Hamilton, BERMUDA 551 | ICN,"Inchon, Seoul Korea",Inchon, SEOUL KOREA 552 | IWA,"Invalid - Iwakuni, Japan",Invalid - Iwakuni, JAPAN 553 | CND,"Kogalniceanu, Romania",Kogalniceanu, ROMANIA 554 | LAH,"Labuha Arpt, Indonesia",Labuha Arpt, INDONESIA 555 | DUR,"Louis Botha, South Africa",Louis Botha, SOUTH AFRICA 556 | MAL,"Mangole Arpt, Indonesia",Mangole Arpt, INDONESIA 557 | MDE,"Medellin, Colombia",Medellin, COLOMBIA 558 | MEX,"Juarez Intl, Mexico City, Mx","Juarez Intl, Mexico City", MX 559 | LHR,"Middlesex, England",Middlesex, ENGLAND 560 | NBO,"Nairobi, Kenya",Nairobi, KENYA 561 | NAS,"Nassau, Bahamas",Nassau, BAHAMAS 562 | NCA,"North Caicos, Turk & Caiman",North Caicos, TURK & CAIMAN 563 | PTY,"Omar Torrijos, Panama",Omar Torrijos, PANAMA 564 | SPV,"Papua, New Guinea",Papua, NEW GUINEA 565 | UIO,"Quito (Mariscal Sucr), Ecuador",Quito (Mariscal Sucr), ECUADOR 566 | RIT,"Rome, Italy",Rome, ITALY 567 | SNO,"Sakon Nakhon #Arpt, Thailand",Sakon Nakhon #Arpt, THAILAND 568 | SLP,"San Luis Potosi #Arpt, Mexico",San Luis Potosi #Arpt, MEXICO 569 | SAN,"San Salvador, El Salvador",San Salvador, EL SALVADOR 570 | SRO,"Santana Ramos #Arpt, Colombia",Santana Ramos #Arpt, COLOMBIA 571 | GRU,"Guarulhos Intl, Sao Paulo, Brazil","Guarulhos Intl, Sao Paulo", BRAZIL 572 | SHA,"Shannon, Ireland",Shannon, IRELAND 573 | HIL,"Shillavo, Ethiopia",Shillavo, ETHIOPIA 574 | TOK,"Torokina #Arpt, Papua, New Guinea","Torokina #Arpt, Papua", NEW GUINEA 575 | VER,"Veracruz, Mexico",Veracruz, MEXICO 576 | LGW,"West Sussex, England",West Sussex, ENGLAND 577 | ZZZ,Mexico Land (Banco De Mexico),Mexico Land (Banco De Mexico), 578 | CHN,No Port Code (Chn),No Port Code (Chn), 579 | CNC,"Cannon Corners, Ny",Cannon Corners, NY 580 | MAA,Abu Dhabi,Abu Dhabi, 581 | AG0,"Magnolia, Ar",Magnolia, AR 582 | BHM,"Bar Harbor, Me",Bar Harbor, ME 583 | BHX,"Birmingham, Al",Birmingham, AL 584 | CAK,"Akron, Oh",Akron, OH 585 | FOK,"Suffolk County, Ny",Suffolk County, NY 586 | LND,"Lander, Wy",Lander, WY 587 | MAR,"Marfa, Tx",Marfa, TX 588 | MLI,"Moline, Il",Moline, IL 589 | RIV,"Riverside, Ca",Riverside, CA 590 | RME,"Rome, Ny",Rome, NY 591 | VNY,"Van Nuys, Ca",Van Nuys, CA 592 | YUM,"Yuma, Az",Yuma, AZ 593 | FRG,Collapsed (Fok) 06/15,Collapsed (Fok) 06/15, 594 | HRL,Collapsed (Hlg) 06/15,Collapsed (Hlg) 06/15, 595 | ISP,Collapsed (Fok) 06/15,Collapsed (Fok) 06/15, 596 | JSJ,Collapsed (Saj) 06/15,Collapsed (Saj) 06/15, 597 | BUS,Collapsed (Buf) 06/15,Collapsed (Buf) 06/15, 598 | IAG,Collapsed (Nia) 06/15,Collapsed (Nia) 06/15, 599 | PHN,Collapsed (Phu) 06/15,Collapsed (Phu) 06/15, 600 | STN,Collapsed (Str) 06/15,Collapsed (Str) 06/15, 601 | VMB,Collapsed (Vnb) 06/15,Collapsed (Vnb) 06/15, 602 | T01,Collapsed (Sea) 06/15,Collapsed (Sea) 06/15, 603 | PHF,No Port Code (Phf),No Port Code (Phf), 604 | DRV,No Port Code (Drv),No Port Code (Drv), 605 | FTB,No Port Code (Ftb),No Port Code (Ftb), 606 | GAC,No Port Code (Gac),No Port Code (Gac), 607 | GMT,No Port Code (Gmt),No Port Code (Gmt), 608 | JFA,No Port Code (Jfa),No Port Code (Jfa), 609 | JMZ,No Port Code (Jmz),No Port Code (Jmz), 610 | NC8,No Port Code (Nc8),No Port Code (Nc8), 611 | NYL,No Port Code (Nyl),No Port Code (Nyl), 612 | OAI,No Port Code (Oai),No Port Code (Oai), 613 | PCW,No Port Code (Pcw),No Port Code (Pcw), 614 | WA5,No Port Code (Was),No Port Code (Was), 615 | WTR,No Port Code (Wtr),No Port Code (Wtr), 616 | X96,No Port Code (X96),No Port Code (X96), 617 | XNA,No Port Code (Xna),No Port Code (Xna), 618 | YGF,No Port Code (Ygf),No Port Code (Ygf), 619 | 5T6,No Port Code (5T6),No Port Code (5T6), 620 | 060,No Port Code (60),No Port Code (60), 621 | SP0,No Port Code (Sp0),No Port Code (Sp0), 622 | W55,No Port Code (W55),No Port Code (W55), 623 | X44,No Port Code (X44),No Port Code (X44), 624 | AUH,No Port Code (Auh),No Port Code (Auh), 625 | RYY,No Port Code (Ryy),No Port Code (Ryy), 626 | SUS,No Port Code (Sus),No Port Code (Sus), 627 | 74S,No Port Code (74S),No Port Code (74S), 628 | ATW,No Port Code (Atw),No Port Code (Atw), 629 | CPX,No Port Code (Cpx),No Port Code (Cpx), 630 | MTH,No Port Code (Mth),No Port Code (Mth), 631 | PFN,No Port Code (Pfn),No Port Code (Pfn), 632 | SCH,No Port Code (Sch),No Port Code (Sch), 633 | ASI,No Port Code (Asi),No Port Code (Asi), 634 | BKF,No Port Code (Bkf),No Port Code (Bkf), 635 | DAY,No Port Code (Day),No Port Code (Day), 636 | Y62,No Port Code (Y62),No Port Code (Y62), 637 | AG,No Port Code (Ag),No Port Code (Ag), 638 | BCM,No Port Code (Bcm),No Port Code (Bcm), 639 | DEC,No Port Code (Dec),No Port Code (Dec), 640 | PLB,No Port Code (Plb),No Port Code (Plb), 641 | CXO,No Port Code (Cxo),No Port Code (Cxo), 642 | JBQ,No Port Code (Jbq),No Port Code (Jbq), 643 | JIG,No Port Code (Jig),No Port Code (Jig), 644 | OGS,No Port Code (Ogs),No Port Code (Ogs), 645 | TIW,No Port Code (Tiw),No Port Code (Tiw), 646 | OTS,No Port Code (Ots),No Port Code (Ots), 647 | AMT,No Port Code (Amt),No Port Code (Amt), 648 | EGE,No Port Code (Ege),No Port Code (Ege), 649 | GPI,No Port Code (Gpi),No Port Code (Gpi), 650 | NGL,No Port Code (Ngl),No Port Code (Ngl), 651 | OLM,No Port Code (Olm),No Port Code (Olm), 652 | .GA,No Port Code (.Ga),No Port Code (.Ga), 653 | CLX,No Port Code (Clx),No Port Code (Clx), 654 | CP,No Port Code (Cp),No Port Code (Cp), 655 | FSC,No Port Code (Fsc),No Port Code (Fsc), 656 | NK,No Port Code (Nk),No Port Code (Nk), 657 | ADU,No Port Code (Adu),No Port Code (Adu), 658 | AKT,No Port Code (Akt),No Port Code (Akt), 659 | LIT,No Port Code (Lit),No Port Code (Lit), 660 | A2A,No Port Code (A2A),No Port Code (A2A), 661 | OSN,No Port Code (Osn),No Port Code (Osn), 662 | -------------------------------------------------------------------------------- /airflow/dags/temp_output/i94visa.csv: -------------------------------------------------------------------------------- 1 | reason_code,reason_travel 2 | 1,Business 3 | 2,Pleasure 4 | 3,Student 5 | -------------------------------------------------------------------------------- /airflow/dags/udacity_capstone.py: -------------------------------------------------------------------------------- 1 | # generic 2 | from datetime import datetime, timedelta 3 | import os 4 | import shutil 5 | import logging 6 | # airflow 7 | from airflow import DAG 8 | from airflow.operators.dummy_operator import DummyOperator 9 | from airflow.operators.postgres_operator import PostgresOperator 10 | from airflow.operators.python_operator import PythonOperator 11 | from airflow.operators import (SASToCSVOperator, TransferToS3Operator, SAS7ToParquet, StageToRedshiftOperator, DataQualityOperator) 12 | from subdags.subdag_for_dimensions import load_dimension_subdag 13 | from airflow.models import Variable 14 | from helpers import SqlQueries 15 | from airflow.operators.subdag_operator import SubDagOperator 16 | 17 | 18 | default_args = { 19 | 'owner': 'udacity', 20 | 'start_date': datetime(2019, 8, 22, 7), 21 | 'end_date': datetime(2019, 12, 31, 7), 22 | 'email_on_retry': False, 23 | 'retries': 3, 24 | 'catchup': False, 25 | 'retry_delay': timedelta(minutes=5), 26 | 'depends_on_past': True, 27 | 'wait_for_downstream': True 28 | } 29 | 30 | # dag is complete 31 | dag = DAG('udacity_capstone', 32 | default_args=default_args, 33 | description='Data Engineering Capstone Project', 34 | schedule_interval='@daily' 35 | ) 36 | 37 | # dummy for node 0 38 | start_operator = DummyOperator(task_id='Begin_execution', dag=dag) 39 | 40 | # convert sas descriptor to csv 41 | convert_sas_to_csv = SASToCSVOperator( 42 | task_id='sas_to_csv', 43 | dag=dag, 44 | input_path=Variable.get("sas_file"), 45 | output_path=Variable.get("temp_output"), 46 | provide_context=True 47 | ) 48 | 49 | # transfer files to csv 50 | transfer_to_s3_csv = TransferToS3Operator( 51 | task_id='transfer_to_s3_csv', 52 | dag=dag, 53 | aws_credentials_id="aws_default", 54 | input_path=Variable.get("temp_output"), 55 | bucket_name=Variable.get("s3_bucket"), 56 | file_ext="csv", 57 | provide_context=True 58 | ) 59 | 60 | sas7bdat_to_parquet = SAS7ToParquet ( 61 | task_id='sas7bdat_to_parquet', 62 | dag=dag, 63 | input_path=Variable.get("temp_input"), 64 | output_path=Variable.get("spark_path"), 65 | provide_context=True 66 | ) 67 | 68 | transfer_to_s3_parquet = TransferToS3Operator( 69 | task_id='transfer_to_s3_parquet', 70 | dag=dag, 71 | aws_credentials_id="aws_default", 72 | input_path=Variable.get("spark_path"), 73 | bucket_name=Variable.get("s3_bucket"), 74 | file_ext="parquet", 75 | provide_context=True 76 | ) 77 | 78 | task_create_schema = PostgresOperator( 79 | task_id="create_schema", 80 | postgres_conn_id="redshift", 81 | sql=SqlQueries.create_schema, 82 | dag=dag 83 | ) 84 | 85 | task_drop_table = PostgresOperator( 86 | task_id="drop_table", 87 | postgres_conn_id="redshift", 88 | sql=SqlQueries.drop_tables, 89 | dag=dag 90 | ) 91 | 92 | task_create_table = PostgresOperator( 93 | task_id="create_table", 94 | postgres_conn_id="redshift", 95 | sql=SqlQueries.create_tables, 96 | dag=dag 97 | ) 98 | 99 | load_dimension_subdag_task = SubDagOperator( 100 | subdag=load_dimension_subdag( 101 | parent_dag_name="udacity_capstone", 102 | task_id="load_dimensions", 103 | redshift_conn_id="redshift", 104 | start_date=datetime(2018, 1, 1) 105 | ), 106 | task_id="load_dimensions", 107 | dag=dag 108 | ) 109 | 110 | # run quality check 111 | run_quality_checks = DataQualityOperator( 112 | task_id='Run_data_quality_checks', 113 | dag=dag, 114 | redshift_conn_id="redshift", 115 | sql_stmt=SqlQueries.count_check, 116 | tables=SqlQueries.tables 117 | ) 118 | 119 | 120 | def cleaning(**kwargs): 121 | folder = Variable.get("spark_path") 122 | for the_file in os.listdir(folder): 123 | file_path = os.path.join(folder, the_file) 124 | try: 125 | if os.path.isfile(file_path): 126 | os.unlink(file_path) 127 | elif os.path.isdir(file_path): 128 | shutil.rmtree(file_path) 129 | except Exception as e: 130 | logging.info(e) 131 | 132 | 133 | clean_temp_files = PythonOperator( 134 | task_id='clean_temp_files', 135 | python_callable=cleaning, 136 | provide_context=True, 137 | dag=dag 138 | ) 139 | 140 | # grant_access = """ 141 | # create group webappusers; 142 | # create user webappuser1 password 'webAppuser1pass' in group webappusers; 143 | # grant usage on schema project to group webappusers; 144 | # """ 145 | # grant_access_to_users = PostgresOperator( 146 | # task_id="grant_access", 147 | # postgres_conn_id="redshift", 148 | # sql=grant_access, 149 | # dag=dag 150 | # ) 151 | 152 | # dummy for node end 153 | end_operator = DummyOperator(task_id='Stop_execution', dag=dag) 154 | 155 | # order 156 | start_operator >> convert_sas_to_csv >> transfer_to_s3_csv >> task_create_schema 157 | start_operator >> sas7bdat_to_parquet >> transfer_to_s3_parquet >> task_create_schema 158 | task_create_schema >> task_drop_table >> task_create_table >> load_dimension_subdag_task >> run_quality_checks >> clean_temp_files >> end_operator 159 | -------------------------------------------------------------------------------- /airflow/plugins/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, absolute_import, print_function 2 | from airflow.plugins_manager import AirflowPlugin 3 | import operators 4 | import helpers 5 | 6 | 7 | # Defining the plugin class 8 | class UdacityPlugin(AirflowPlugin): 9 | name = "udacity_plugin" 10 | operators = [ 11 | operators.SASToCSVOperator, 12 | operators.TransferToS3Operator, 13 | operators.SAS7ToParquet, 14 | operators.StageToRedshiftOperator, 15 | operators.DataQualityOperator 16 | ] 17 | helpers = [ 18 | helpers.SqlQueries 19 | ] -------------------------------------------------------------------------------- /airflow/plugins/helpers/__init__.py: -------------------------------------------------------------------------------- 1 | from helpers.sql_queries import SqlQueries 2 | 3 | __all__ = [ 4 | 'SqlQueries' 5 | ] 6 | -------------------------------------------------------------------------------- /airflow/plugins/helpers/sql_queries.py: -------------------------------------------------------------------------------- 1 | class SqlQueries: 2 | """ 3 | all create queries 4 | """ 5 | 6 | immigration = """ 7 | CREATE TABLE IF NOT EXISTS project.immigration ( 8 | cicid FLOAT, 9 | i94yr FLOAT, 10 | i94mon FLOAT, 11 | i94cit FLOAT, 12 | i94res FLOAT, 13 | i94port VARCHAR, 14 | arrdate FLOAT, 15 | i94mode FLOAT, 16 | i94addr varchar, 17 | depdate FLOAT, 18 | i94bir FLOAT, 19 | i94visa FLOAT, 20 | count FLOAT, 21 | dtadfile VARCHAR, 22 | visapost VARCHAR, 23 | occup VARCHAR, 24 | entdepa VARCHAR, 25 | entdepd VARCHAR, 26 | entdepu VARCHAR, 27 | matflag VARCHAR, 28 | biryear FLOAT, 29 | dtaddto VARCHAR, 30 | gender VARCHAR, 31 | insnum VARCHAR, 32 | airline VARCHAR, 33 | admnum FLOAT, 34 | fltno VARCHAR, 35 | visatype VARCHAR 36 | ); 37 | """ 38 | 39 | airports = """ 40 | CREATE TABLE IF NOT EXISTS project.airport_codes ( 41 | id INT, 42 | ident VARCHAR, 43 | type VARCHAR, 44 | name VARCHAR, 45 | latitude_deg FLOAT, 46 | longitude_deg FLOAT, 47 | elevation_ft FLOAT, 48 | continent VARCHAR, 49 | iso_country VARCHAR, 50 | iso_region VARCHAR, 51 | municipality VARCHAR, 52 | scheduled_service VARCHAR, 53 | gps_code VARCHAR, 54 | iata_code VARCHAR, 55 | local_code VARCHAR, 56 | home_link VARCHAR, 57 | wikipedia_link VARCHAR, 58 | keywords VARCHAR 59 | ); 60 | """ 61 | 62 | i94ports = """ 63 | CREATE TABLE IF NOT EXISTS project.i94ports ( 64 | port_code VARCHAR, 65 | port_of_entry VARCHAR, 66 | port_city VARCHAR, 67 | port_state_or_country VARCHAR 68 | ); 69 | """ 70 | 71 | i94visa = """ 72 | CREATE TABLE IF NOT EXISTS project.i94visa ( 73 | visa_code INT, 74 | visa_reason VARCHAR 75 | ); 76 | """ 77 | 78 | i94mode = """ 79 | CREATE TABLE IF NOT EXISTS project.i94mode ( 80 | trans_code INT, 81 | trans_name VARCHAR 82 | ); 83 | """ 84 | 85 | i94addr = """ 86 | CREATE TABLE IF NOT EXISTS project.i94addr ( 87 | state_code VARCHAR, 88 | state_name VARCHAR 89 | ); 90 | """ 91 | 92 | i94res = """ 93 | CREATE TABLE IF NOT EXISTS project.i94res ( 94 | country_code INT, 95 | country_name VARCHAR 96 | ); 97 | """ 98 | 99 | us_cities_demographics = """ 100 | CREATE TABLE IF NOT EXISTS project.us_cities_demographics ( 101 | city VARCHAR, 102 | state VARCHAR, 103 | median_age FLOAT, 104 | male_population FLOAT, 105 | female_population FLOAT, 106 | total_population FLOAT, 107 | number_of_veterans FLOAT, 108 | foreign_born FLOAT, 109 | average_household_size FLOAT, 110 | state_code VARCHAR, 111 | race VARCHAR, 112 | count INT 113 | ); 114 | """ 115 | 116 | drop_tables = """ 117 | DROP TABLE IF EXISTS project.immigration; 118 | DROP TABLE IF EXISTS project.airport_codes; 119 | DROP TABLE IF EXISTS project.i94port; 120 | DROP TABLE IF EXISTS project.i94visa; 121 | DROP TABLE IF EXISTS project.i94mode; 122 | DROP TABLE IF EXISTS project.i94addr; 123 | DROP TABLE IF EXISTS project.i94res; 124 | DROP TABLE IF EXISTS project.us_cities_demographics; 125 | """ 126 | 127 | create_tables = immigration + airports + i94ports + i94visa + i94mode + i94addr + i94res + us_cities_demographics 128 | tables = ["immigration", "airport_codes", "i94ports", "i94visa", 129 | "i94mode", "i94addr", "i94res", "us_cities_demographics"] 130 | 131 | copy_csv_cmd = """ 132 | COPY project.{} FROM '{}' 133 | CREDENTIALS 'aws_access_key_id={};aws_secret_access_key={}' 134 | IGNOREHEADER 1 135 | DELIMITER '{}' 136 | COMPUPDATE OFF 137 | TRUNCATECOLUMNS 138 | CSV; 139 | """ 140 | create_schema = """ 141 | CREATE schema IF NOT EXISTS project; 142 | """ 143 | count_check = """SELECT CASE WHEN COUNT(*) > 1 THEN 1 ELSE 0 END AS non_empty FROM project.{}""" 144 | 145 | copy_parquet_cmd = """ 146 | COPY project.{} FROM '{}' 147 | IAM_ROLE '{}' 148 | FORMAT AS PARQUET; 149 | """ -------------------------------------------------------------------------------- /airflow/plugins/operators/__init__.py: -------------------------------------------------------------------------------- 1 | from operators.sas_to_csv import SASToCSVOperator 2 | from operators.transfer_to_s3 import TransferToS3Operator 3 | from operators.sas7bdat_to_parquet import SAS7ToParquet 4 | from operators.stage_redshift import StageToRedshiftOperator 5 | from operators.data_quality import DataQualityOperator 6 | 7 | __all__ = [ 8 | 'SASToCSVOperator', 9 | 'TransferToS3Operator', 10 | 'SAS7ToParquet', 11 | 'StageToRedshiftOperator', 12 | 'DataQualityOperator' 13 | ] -------------------------------------------------------------------------------- /airflow/plugins/operators/data_quality.py: -------------------------------------------------------------------------------- 1 | from airflow.hooks.postgres_hook import PostgresHook 2 | from airflow.models import BaseOperator 3 | from airflow.utils.decorators import apply_defaults 4 | 5 | 6 | class DataQualityOperator(BaseOperator): 7 | 8 | ui_color = '#89DA59' 9 | 10 | @apply_defaults 11 | def __init__(self, 12 | redshift_conn_id, 13 | sql_stmt, 14 | tables, 15 | *args, **kwargs): 16 | 17 | super(DataQualityOperator, self).__init__(*args, **kwargs) 18 | self.redshift_conn_id = redshift_conn_id 19 | self.sql_stmt = sql_stmt 20 | self.tables = tables 21 | 22 | def execute(self, context): 23 | self.log.info(f""" Checking ETL result quality """) 24 | redshift = PostgresHook(self.redshift_conn_id) 25 | for cur_table in self.tables: 26 | try: 27 | if redshift.run(self.sql_stmt.format(cur_table)) == 1: 28 | self.log.info(f""" Quality test passed for {cur_table} """) 29 | except Exception: 30 | raise ValueError(f""" Quality check for {cur_table} """) -------------------------------------------------------------------------------- /airflow/plugins/operators/sas7bdat_to_parquet.py: -------------------------------------------------------------------------------- 1 | from pyspark.sql import SparkSession 2 | from os import listdir 3 | from os.path import isfile, join 4 | from pyspark.sql.types import * 5 | from airflow.models import BaseOperator 6 | from airflow.utils.decorators import apply_defaults 7 | import logging 8 | import shutil 9 | import os 10 | 11 | 12 | class SAS7ToParquet(BaseOperator): 13 | 14 | ui_color = '#87CEFA' 15 | 16 | @apply_defaults 17 | def __init__(self, 18 | input_path, 19 | output_path, 20 | *args, **kwargs): 21 | 22 | super(SAS7ToParquet, self).__init__(*args, **kwargs) 23 | self.input_path = input_path 24 | self.output_path = output_path 25 | 26 | def execute(self, context): 27 | logging.info("Creating spark session ...") 28 | spark = SparkSession.builder \ 29 | .config("spark.jars.packages", 30 | "saurfang:spark-sas7bdat:2.0.0-s_2.11") \ 31 | .enableHiveSupport() \ 32 | .getOrCreate() 33 | 34 | # spark context 35 | sc = spark.sparkContext 36 | 37 | # column names 38 | logging.info('Defining column names and resulting schema ... ') 39 | columns = ['cicid', 40 | 'i94yr', 41 | 'i94mon', 42 | 'i94cit', 43 | 'i94res', 44 | 'i94port', 45 | 'arrdate', 46 | 'i94mode', 47 | 'i94addr', 48 | 'depdate', 49 | 'i94bir', 50 | 'i94visa', 51 | 'count', 52 | 'dtadfile', 53 | 'visapost', 54 | 'occup', 55 | 'entdepa', 56 | 'entdepd', 57 | 'entdepu', 58 | 'matflag', 59 | 'biryear', 60 | 'dtaddto', 61 | 'gender', 62 | 'insnum', 63 | 'airline', 64 | 'admnum', 65 | 'fltno', 66 | 'visatype'] 67 | 68 | # schema definition 69 | schema = StructType([ 70 | StructField('cicid', DoubleType(), True), 71 | StructField('i94yr', DoubleType(), True), 72 | StructField('i94mon', DoubleType(), True), 73 | StructField('i94cit', DoubleType(), True), 74 | StructField('i94res', DoubleType(), True), 75 | StructField('i94port', StringType(), True), 76 | StructField('arrdate', DoubleType(), True), 77 | StructField('i94mode', DoubleType(), True), 78 | StructField('i94addr', StringType(), True), 79 | StructField('depdate', DoubleType(), True), 80 | StructField('i94bir', DoubleType(), True), 81 | StructField('i94visa', DoubleType(), True), 82 | StructField('count', DoubleType(), True), 83 | StructField('dtadfile', StringType(), True), 84 | StructField('visapost', StringType(), True), 85 | StructField('occup', StringType(), True), 86 | StructField('entdepa', StringType(), True), 87 | StructField('entdepd', StringType(), True), 88 | StructField('entdepu', StringType(), True), 89 | StructField('matflag', StringType(), True), 90 | StructField('biryear', DoubleType(), True), 91 | StructField('dtaddto', StringType(), True), 92 | StructField('gender', StringType(), True), 93 | StructField('insnum', StringType(), True), 94 | StructField('airline', StringType(), True), 95 | StructField('admnum', DoubleType(), True), 96 | StructField('fltno', StringType(), True), 97 | StructField('visatype', StringType(), True) 98 | ]) 99 | 100 | df_all = spark.createDataFrame(sc.emptyRDD(), schema) 101 | 102 | logging.info('Reading sas7bdat files from disc ... ') 103 | onlyfiles = [join(self.input_path, f) for f in 104 | listdir(self.input_path) if 105 | isfile(join(self.input_path, f))] 106 | 107 | for f in onlyfiles: 108 | file_name, file_extension = os.path.splitext(f) 109 | if file_extension == '.' + 'sas7bdat': 110 | df_temp = spark.read.format( 111 | 'com.github.saurfang.sas.spark').load(f)\ 112 | .select(columns) 113 | df_all = df_all.union(df_temp) 114 | 115 | logging.info('Writing parquet to disc ... ') 116 | if os.path.exists(self.output_path): 117 | shutil.rmtree(self.output_path) 118 | 119 | df_temp = df_all.filter(df_all.i94addr.isNotNull())\ 120 | .filter(df_all.i94res.isNotNull()) 121 | df_temp.write.parquet(self.output_path) -------------------------------------------------------------------------------- /airflow/plugins/operators/sas_to_csv.py: -------------------------------------------------------------------------------- 1 | from airflow.models import BaseOperator 2 | from airflow.utils.decorators import apply_defaults 3 | import logging 4 | import pandas as pd 5 | import re 6 | import os 7 | 8 | 9 | class SASToCSVOperator(BaseOperator): 10 | 11 | ui_color = '#FFC0CB' 12 | 13 | @apply_defaults 14 | def __init__(self, 15 | input_path, 16 | output_path, 17 | *args, **kwargs): 18 | 19 | super(SASToCSVOperator, self).__init__(*args, **kwargs) 20 | self.input_path = input_path 21 | self.output_path = output_path 22 | 23 | def execute(self, context): 24 | with open(self.input_path, "r", encoding='utf-8') as main_file: 25 | file = main_file.read() 26 | 27 | sas_label_ext = {} 28 | temp_data = [] 29 | attr_name = '' 30 | 31 | logging.info("reading file ...") 32 | for line in file.split("\n"): 33 | line = re.sub(r"\s+|\t+|\r+", " ", line) 34 | 35 | if "/*" in line and "-" in line: 36 | attr_name, attr_desc = [item.strip(" ") for item in 37 | line.split("*")[1].split( 38 | "-", 39 | 1)] 40 | attr_name = attr_name.replace(' & ', '&').lower() 41 | if attr_name != '': 42 | sas_label_ext[attr_name] = {'desc': attr_desc} 43 | elif '=' in line: 44 | temp_data.append( 45 | [item.strip(';').strip(" ").replace( 46 | '\'', '').lstrip().rstrip().title() for item 47 | in 48 | line.split('=')]) 49 | elif len(temp_data) > 0: 50 | if attr_name != '': 51 | sas_label_ext[attr_name]['data'] = temp_data 52 | temp_data = [] 53 | 54 | # country 55 | logging.info("preparing country codes ...") 56 | sas_label_ext['i94cit&i94res']['df'] = pd.DataFrame( 57 | sas_label_ext['i94cit&i94res']['data'], 58 | columns=['country_code', 'country_name']) 59 | 60 | # port 61 | logging.info("preparing port codes ...") 62 | tempdf = pd.DataFrame(sas_label_ext['i94port']['data'], 63 | columns=['port_code', 'port_name']) 64 | tempdf['port_code'] = tempdf['port_code'].str.upper() 65 | tempdf[['port_city', 'port_state']] = tempdf[ 66 | 'port_name'].str.rsplit(',', 1, expand=True) 67 | tempdf['port_state'] = tempdf['port_state'].str.upper() 68 | sas_label_ext['i94port']['df'] = tempdf 69 | 70 | # mode 71 | logging.info("preparing transport modes ...") 72 | sas_label_ext['i94mode']['df'] = pd.DataFrame( 73 | sas_label_ext['i94mode']['data'], 74 | columns=['trans_code', 'trans_name']) 75 | tempdf = pd.DataFrame(sas_label_ext['i94addr']['data'], 76 | columns=['state_code', 'state_name']) 77 | tempdf['state_code'] = tempdf['state_code'].str.upper() 78 | 79 | # address 80 | logging.info("preparing state codes ...") 81 | sas_label_ext['i94addr']['df'] = tempdf 82 | 83 | # visa 84 | logging.info("preparing visa codes ...") 85 | sas_label_ext['i94visa']['df'] = pd.DataFrame( 86 | sas_label_ext['i94visa']['data'], 87 | columns=['reason_code', 'reason_travel']) 88 | 89 | # write to csv 90 | logging.info("writing to csv files ...") 91 | for table in sas_label_ext.keys(): 92 | if 'df' in sas_label_ext[table].keys(): 93 | with open(os.path.join(self.output_path, table + 94 | ".csv"), 95 | "w") as output_file: 96 | sas_label_ext[table]['df'].to_csv(output_file, 97 | index=False) 98 | -------------------------------------------------------------------------------- /airflow/plugins/operators/stage_redshift.py: -------------------------------------------------------------------------------- 1 | from airflow.hooks.postgres_hook import PostgresHook 2 | from airflow.models import BaseOperator 3 | from airflow.utils.decorators import apply_defaults 4 | from airflow.contrib.hooks.aws_hook import AwsHook 5 | 6 | 7 | class StageToRedshiftOperator(BaseOperator): 8 | @apply_defaults 9 | def __init__(self, 10 | redshift_conn_id, 11 | aws_credentials_id, 12 | file, 13 | delimiter, 14 | table, 15 | s3_bucket, 16 | s3_key, 17 | sql_stmt, 18 | *args, **kwargs): 19 | 20 | super(StageToRedshiftOperator, self).__init__(*args, **kwargs) 21 | self.file = file 22 | self.delimiter = delimiter 23 | self.table = table 24 | self.redshift_conn_id = redshift_conn_id 25 | self.s3_bucket = s3_bucket 26 | self.s3_key = s3_key 27 | self.aws_credentials_id = aws_credentials_id 28 | self.sql_stmt = sql_stmt 29 | 30 | def execute(self, context): 31 | aws_hook = AwsHook(self.aws_credentials_id) 32 | credentials = aws_hook.get_credentials() 33 | redshift = PostgresHook(postgres_conn_id=self.redshift_conn_id) 34 | self.log.info("Copying data from S3 to Redshift") 35 | rendered_key = self.s3_key.format(**context) 36 | s3_path = "s3://{}/{}/{}".format(self.s3_bucket, rendered_key, 37 | self.file) 38 | formatted_sql = self.sql_stmt.format( 39 | self.table, 40 | s3_path, 41 | credentials.access_key, 42 | credentials.secret_key, 43 | self.delimiter 44 | ) 45 | redshift.run(formatted_sql) 46 | -------------------------------------------------------------------------------- /airflow/plugins/operators/transfer_to_s3.py: -------------------------------------------------------------------------------- 1 | from airflow.models import BaseOperator 2 | from airflow.utils.decorators import apply_defaults 3 | import logging 4 | import os 5 | from boto3.s3.transfer import S3Transfer 6 | import boto3 7 | from airflow.contrib.hooks.aws_hook import AwsHook 8 | 9 | 10 | class TransferToS3Operator(BaseOperator): 11 | 12 | ui_color = '#89DA59' 13 | 14 | @apply_defaults 15 | def __init__(self, 16 | aws_credentials_id, 17 | input_path, 18 | bucket_name, 19 | file_ext, 20 | *args, **kwargs): 21 | 22 | super(TransferToS3Operator, self).__init__(*args, **kwargs) 23 | self.aws_credentials_id = aws_credentials_id 24 | self.input_path = input_path 25 | self.bucket_name = bucket_name 26 | self.file_ext = file_ext 27 | 28 | def execute(self, context): 29 | logging.info('Reading AWS Credentials ... ') 30 | aws_hook = AwsHook(self.aws_credentials_id) 31 | credentials = aws_hook.get_credentials() 32 | client = boto3.client( 33 | 's3', 34 | aws_access_key_id=credentials.access_key, 35 | aws_secret_access_key=credentials.secret_key) 36 | transfer = S3Transfer(client) 37 | logging.info('Copying Files ... ') 38 | for subdir, dirs, files in os.walk(self.input_path): 39 | for file in files: 40 | file_name, file_extension = os.path.splitext(file) 41 | full_path = os.path.join(subdir, file) 42 | if file_extension == '.' + self.file_ext: 43 | logging.info( 44 | "transferring file {}".format(file_name)) 45 | transfer.upload_file(full_path, self.bucket_name, 46 | self.file_ext 47 | + '/' + file) 48 | logging.info('Successfully finished copying all the files ... ') 49 | -------------------------------------------------------------------------------- /airflow_start.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | #!/bin/bash 3 | 4 | # Start airflow 5 | airflow scheduler --daemon 6 | airflow webserver --daemon -p 3000 7 | 8 | # Wait till airflow web-server is ready 9 | echo "Waiting for Airflow web server..." 10 | while true; do 11 | _RUNNING=$(ps aux | grep airflow-webserver | grep ready | wc -l) 12 | if [ $_RUNNING -eq 0 ]; then 13 | sleep 1 14 | else 15 | echo "Airflow web server is ready" 16 | break; 17 | fi 18 | done -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | import configparser 2 | 3 | config = configparser.ConfigParser() 4 | config.read_file(open('dwh.cfg')) 5 | 6 | # amazon aws 7 | KEY = config.get('AWS', 'key') 8 | SECRET = config.get('AWS', 'secret') 9 | 10 | # Redshift 11 | DWH_CLUSTER_TYPE = config.get('DWH', 'DWH_CLUSTER_TYPE') 12 | DWH_NUM_NODES = config.get('DWH', 'DWH_NUM_NODES') 13 | DWH_NODE_TYPE = config.get('DWH', 'DWH_NODE_TYPE') 14 | 15 | DWH_IAM_ROLE_NAME = config.get('DWH', 'DWH_IAM_ROLE_NAME') 16 | DWH_CLUSTER_IDENTIFIER = config.get('DWH', 'DWH_CLUSTER_IDENTIFIER') 17 | DWH_DB = config.get('DWH', 'DWH_DB') 18 | DWH_DB_USER = config.get('DWH', 'DWH_DB_USER') 19 | DWH_DB_PASSWORD = config.get('DWH', 'DWH_DB_PASSWORD') 20 | DWH_PORT = config.get('DWH', 'DWH_PORT') 21 | DWH_SCHEMA = config.get('DWH', 'DWH_SCHEMA') 22 | DWH_REGION = config.get('DWH', 'DWH_REGION') 23 | 24 | # S3 25 | LOG_JSON_FORMAT = config.get('S3', 'LOG_JSON_FORMAT') 26 | S3_BUCKET_LOG_JSON_PATH = config.get('S3', 'S3_BUCKET_LOG_JSON_PATH') 27 | S3_BUCKET_SONG_JSON_PATH = config.get('S3', 'S3_BUCKET_SONG_JSON_PATH') -------------------------------------------------------------------------------- /cryptosetup.py: -------------------------------------------------------------------------------- 1 | # secure airflow connections 2 | from cryptography.fernet import Fernet 3 | 4 | # generate key 5 | fernet_key = Fernet.generate_key() 6 | print(fernet_key.decode()) 7 | -------------------------------------------------------------------------------- /dwh.cfg: -------------------------------------------------------------------------------- 1 | [AWS] 2 | KEY= 3 | SECRET= 4 | 5 | [DWH] 6 | DWH_CLUSTER_TYPE=multi-node 7 | DWH_NUM_NODES=4 8 | DWH_NODE_TYPE=dc2.large 9 | 10 | DWH_IAM_ROLE_NAME= 11 | DWH_CLUSTER_IDENTIFIER= 12 | DWH_DB=udacity 13 | DWH_DB_USER= 14 | DWH_DB_PASSWORD= 15 | DWH_PORT=5439 16 | DWH_SCHEMA=public 17 | DWH_REGION=us-west-2 18 | 19 | [S3] 20 | LOG_JSON_FORMAT= 21 | S3_BUCKET_LOG_JSON_PATH= 22 | S3_BUCKET_SONG_JSON_PATH= 23 | 24 | [Spark] 25 | SPARK_SUBMIT=/opt/spark-2.4.3-bin-hadoop2.7/bin/spark-submit -------------------------------------------------------------------------------- /env.yml: -------------------------------------------------------------------------------- 1 | name: CapstoneProject 2 | channels: 3 | - anaconda 4 | - conda-forge 5 | - defaults 6 | dependencies: 7 | - jupyter==1.0.0=py36_0 8 | - psycopg2==2.7.4=py36_0 9 | - geopandas 10 | - pip: 11 | - apache-airflow 12 | - matplotlib 13 | - pandas 14 | - descartes 15 | - shutil 16 | - ipython-sql 17 | - pandas-redshift 18 | - pyspark 19 | - boto3 20 | - zappa 21 | - smart_open 22 | -------------------------------------------------------------------------------- /img/city_intake.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/supratim94336/DataEngineeringCapstoneProject/57328be863bc8ea7aad6575767d8d474446b2915/img/city_intake.png -------------------------------------------------------------------------------- /img/diff_airports.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/supratim94336/DataEngineeringCapstoneProject/57328be863bc8ea7aad6575767d8d474446b2915/img/diff_airports.png -------------------------------------------------------------------------------- /img/graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/supratim94336/DataEngineeringCapstoneProject/57328be863bc8ea7aad6575767d8d474446b2915/img/graph.png -------------------------------------------------------------------------------- /img/marker.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/supratim94336/DataEngineeringCapstoneProject/57328be863bc8ea7aad6575767d8d474446b2915/img/marker.png -------------------------------------------------------------------------------- /img/no_of_immigrants.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/supratim94336/DataEngineeringCapstoneProject/57328be863bc8ea7aad6575767d8d474446b2915/img/no_of_immigrants.png -------------------------------------------------------------------------------- /img/pipeline-tree.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/supratim94336/DataEngineeringCapstoneProject/57328be863bc8ea7aad6575767d8d474446b2915/img/pipeline-tree.png -------------------------------------------------------------------------------- /img/pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/supratim94336/DataEngineeringCapstoneProject/57328be863bc8ea7aad6575767d8d474446b2915/img/pipeline.png -------------------------------------------------------------------------------- /img/schema.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/supratim94336/DataEngineeringCapstoneProject/57328be863bc8ea7aad6575767d8d474446b2915/img/schema.PNG -------------------------------------------------------------------------------- /img/state_airports.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/supratim94336/DataEngineeringCapstoneProject/57328be863bc8ea7aad6575767d8d474446b2915/img/state_airports.png -------------------------------------------------------------------------------- /load/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/supratim94336/DataEngineeringCapstoneProject/57328be863bc8ea7aad6575767d8d474446b2915/load/__init__.py -------------------------------------------------------------------------------- /load/aws_load.py: -------------------------------------------------------------------------------- 1 | import os 2 | from boto3.s3.transfer import S3Transfer 3 | import boto3 4 | import logging 5 | 6 | 7 | class AWSLoad: 8 | 9 | def __init__(self, access_key, secret_key): 10 | self.access_key = access_key 11 | self.secret_key = secret_key 12 | 13 | def transfer_files_to_s3(self, input_path, bucket_name, file_ext): 14 | """ 15 | This function transfers data from local file system to remote S3 16 | storage 17 | :param input_path: 18 | :param bucket_name: 19 | :param file_ext: 20 | :return: 21 | """ 22 | client = boto3.client('s3', aws_access_key_id=self.access_key, 23 | aws_secret_access_key=self.secret_key) 24 | transfer = S3Transfer(client) 25 | for subdir, dirs, files in os.walk(input_path): 26 | for file in files: 27 | file_name, file_extension = os.path.splitext(file) 28 | full_path = os.path.join(subdir, file) 29 | if file_extension == '.' + file_ext: 30 | logging.info("transferring file {}".format(file_name)) 31 | transfer.upload_file(full_path, bucket_name, file_ext 32 | + '/' + file) 33 | -------------------------------------------------------------------------------- /load/aws_utils.py: -------------------------------------------------------------------------------- 1 | import boto3 2 | from config import * 3 | import json 4 | from botocore.exceptions import ClientError 5 | import sys 6 | import time 7 | import logging 8 | 9 | 10 | class AWSUtils: 11 | 12 | def __init__(self, access_key, secret_key): 13 | self.access_key = access_key 14 | self.secret_key = secret_key 15 | 16 | @staticmethod 17 | def animate(): 18 | chars = r"|/—\|" 19 | for char in chars: 20 | sys.stdout.write('\r' + 'Please Wait ...' + char) 21 | time.sleep(.1) 22 | sys.stdout.flush() 23 | 24 | def create_iam_role(self, iam_role): 25 | """ 26 | This function creates an iam role based on your config 27 | :return: 28 | """ 29 | iam = boto3.client('iam', 30 | aws_access_key_id=self.access_key, 31 | aws_secret_access_key=self.secret_key, 32 | region_name='us-west-2' 33 | ) 34 | logging.info("1.1 creating role") 35 | try: 36 | iam.create_role( 37 | Path='/', 38 | RoleName=iam_role, 39 | Description="Allows Redshift to call AWS Services.", 40 | AssumeRolePolicyDocument=json.dumps( 41 | {'Statement': [{'Action': 'sts:AssumeRole', 42 | 'Effect': 'Allow', 43 | 'Principal': 44 | {'Service': 'redshift.amazonaws.com'} 45 | }], 'Version': '2012-10-17'})) 46 | 47 | except ClientError as e: 48 | logging.info(f'ERROR: {e}') 49 | 50 | logging.info("1.2 Attaching Policy") 51 | try: 52 | iam.attach_role_policy( 53 | RoleName=iam_role, 54 | PolicyArn="arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess" 55 | )['ResponseMetadata']['HTTPStatusCode'] 56 | except ClientError as e: 57 | logging.info(f'ERROR: {e}') 58 | 59 | logging.info("1.3 Get the IAM role ARN") 60 | role_arn = iam.get_role(RoleName=iam_role)['Role']['Arn'] 61 | return role_arn 62 | 63 | def create_redshift_cluster(self, role_arn): 64 | """ 65 | This function creates a cluster on your behalf 66 | :param role_arn: 67 | :return: 68 | """ 69 | logging.info("1.1 Client is created ...") 70 | redshift = boto3.client('redshift', 71 | region_name="us-west-2", 72 | aws_access_key_id=self.access_key, 73 | aws_secret_access_key=self.secret_key 74 | ) 75 | try: 76 | logging.info("1.2 Cluster config is being created ...") 77 | redshift.create_cluster( 78 | # HW 79 | ClusterType=DWH_CLUSTER_TYPE, 80 | NodeType=DWH_NODE_TYPE, 81 | NumberOfNodes=int(DWH_NUM_NODES), 82 | 83 | # Identifiers & Credentials 84 | DBName=DWH_DB, 85 | ClusterIdentifier=DWH_CLUSTER_IDENTIFIER, 86 | MasterUsername=DWH_DB_USER, 87 | MasterUserPassword=DWH_DB_PASSWORD, 88 | 89 | # Roles (for s3 access) 90 | IamRoles=[role_arn]) 91 | except ClientError as e: 92 | logging.info(f'ERROR: {e}') 93 | 94 | logging.info("1.3 Cluster is being created ...") 95 | while redshift.describe_clusters( 96 | ClusterIdentifier=DWH_CLUSTER_IDENTIFIER)\ 97 | ['Clusters'][0]['ClusterStatus'] != 'available': 98 | AWSUtils.animate() 99 | 100 | logging.info("\r1.4 Cluster is created successfully ...") 101 | return redshift.describe_clusters( 102 | ClusterIdentifier=DWH_CLUSTER_IDENTIFIER)['Clusters'][0] 103 | ['Endpoint']['Address'] 104 | 105 | def delete_redshift_cluster(self): 106 | """ 107 | This function deletes a cluster 108 | :return: 109 | """ 110 | logging.info("1.1 Client is created ...") 111 | redshift = boto3.client('redshift', 112 | region_name="us-west-2", 113 | aws_access_key_id=self.access_key, 114 | aws_secret_access_key=self.secret_key 115 | ) 116 | logging.info("1.2 Cluster is identified ...") 117 | try: 118 | redshift.delete_cluster( 119 | ClusterIdentifier=DWH_CLUSTER_IDENTIFIER, 120 | SkipFinalClusterSnapshot=True) 121 | except ClientError as e: 122 | logging.info(f'ERROR: {e}') 123 | 124 | try: 125 | logging.info("1.3 Cluster is being deleted ...") 126 | while redshift.describe_clusters( 127 | ClusterIdentifier=DWH_CLUSTER_IDENTIFIER)\ 128 | ['Clusters'][0]['ClusterStatus'] == 'deleting': 129 | AWSUtils.animate() 130 | except: 131 | logging.info("\r1.4 Cluster is deleted successfully ...") 132 | return None 133 | 134 | def list_bucket(self, bucket_name, prefix): 135 | """ 136 | This function lists files in a bucket 137 | :param bucket_name: 138 | :param prefix: 139 | :return: files 140 | """ 141 | files = [] 142 | s3 = boto3.resource('s3', 143 | region_name="us-west-2", 144 | aws_access_key_id=self.access_key, 145 | aws_secret_access_key=self.secret_key 146 | ) 147 | bucket = s3.Bucket(bucket_name) 148 | for obj in bucket.objects.filter(Prefix=prefix): 149 | files.append(obj) 150 | return files 151 | 152 | def detach_iam_role(self, iam_role): 153 | iam = boto3.client('iam', 154 | aws_access_key_id=self.access_key, 155 | aws_secret_access_key=self.secret_key, 156 | region_name='us-west-2' 157 | ) 158 | iam.detach_role_policy(RoleName=iam_role, 159 | PolicyArn="arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess") 160 | iam.delete_role(RoleName=iam_role) 161 | -------------------------------------------------------------------------------- /load/example_usage.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from load.aws_utils import AWSUtils 3 | import configparser 4 | 5 | 6 | def main(): 7 | parser = argparse.ArgumentParser(description='start/stop') 8 | parser.add_argument('--action', type=str, help='type an action') 9 | args = parser.parse_args() 10 | action = args.action 11 | config = configparser.ConfigParser() 12 | config.read_file(open('dwh.cfg')) 13 | access_key = config.get('AWS', 'key') 14 | secret_key = config.get('AWS', 'secret') 15 | iam_role_name = config.get('DWH', 'DWH_IAM_ROLE_NAME') 16 | aws_utils_instance = AWSUtils(access_key, secret_key) 17 | 18 | if action == "start": 19 | role_arn = aws_utils_instance.create_iam_role(iam_role_name) 20 | dwh_endpoint = aws_utils_instance.create_redshift_cluster(role_arn) 21 | print('Please copy these values for future reference') 22 | print('DWH_ROLE_ARN={}'.format(role_arn)) 23 | print('dwh_endpoint={}'.format(dwh_endpoint)) 24 | 25 | elif action == "stop": 26 | print('destroying the cluster') 27 | aws_utils_instance.delete_redshift_cluster() 28 | print('detaching role') 29 | aws_utils_instance.detach_iam_role(iam_role_name) 30 | 31 | 32 | if __name__ == "__main__": 33 | main() 34 | --------------------------------------------------------------------------------