├── .gitignore
├── ColumnDesc.txt
├── README.md
├── Tester.ipynb
├── airflow
├── dags
│ ├── subdags
│ │ └── subdag_for_dimensions.py
│ ├── temp_input
│ │ └── I94_SAS_Labels_Descriptions.SAS
│ ├── temp_output
│ │ ├── airport-codes_csv.csv
│ │ ├── i94addr.csv
│ │ ├── i94cit&i94res.csv
│ │ ├── i94mode.csv
│ │ ├── i94port.csv
│ │ ├── i94visa.csv
│ │ └── us-cities-demographics.csv
│ └── udacity_capstone.py
└── plugins
│ ├── __init__.py
│ ├── helpers
│ ├── __init__.py
│ └── sql_queries.py
│ └── operators
│ ├── __init__.py
│ ├── data_quality.py
│ ├── sas7bdat_to_parquet.py
│ ├── sas_to_csv.py
│ ├── stage_redshift.py
│ └── transfer_to_s3.py
├── airflow_start.sh
├── config.py
├── cryptosetup.py
├── dwh.cfg
├── env.yml
├── img
├── city_intake.png
├── diff_airports.png
├── graph.png
├── marker.png
├── no_of_immigrants.png
├── pipeline-tree.png
├── pipeline.png
├── schema.PNG
└── state_airports.png
└── load
├── __init__.py
├── aws_load.py
├── aws_utils.py
└── example_usage.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # Created by .ignore support plugin (hsz.mobi)
2 | ### JetBrains template
3 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm
4 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
5 |
6 | # User-specific stuff
7 | .idea/**/workspace.xml
8 | .idea/**/tasks.xml
9 | .idea/**/dictionaries
10 | .idea/**/shelf
11 |
12 | # Sensitive or high-churn files
13 | .idea/**/dataSources/
14 | .idea/**/dataSources.ids
15 | .idea/**/dataSources.local.xml
16 | .idea/**/sqlDataSources.xml
17 | .idea/**/dynamic.xml
18 | .idea/**/uiDesigner.xml
19 | .idea/**/dbnavigator.xml
20 |
21 | # Gradle
22 | .idea/**/gradle.xml
23 | .idea/**/libraries
24 |
25 | # CMake
26 | cmake-build-debug/
27 | cmake-build-release/
28 |
29 | # Mongo Explorer plugin
30 | .idea/**/mongoSettings.xml
31 |
32 | # File-based project format
33 | *.iws
34 |
35 | # IntelliJ
36 | out/
37 |
38 | # mpeltonen/sbt-idea plugin
39 | .idea_modules/
40 |
41 | # JIRA plugin
42 | atlassian-ide-plugin.xml
43 |
44 | # Cursive Clojure plugin
45 | .idea/replstate.xml
46 |
47 | # Crashlytics plugin (for Android Studio and IntelliJ)
48 | com_crashlytics_export_strings.xml
49 | crashlytics.properties
50 | crashlytics-build.properties
51 | fabric.properties
52 |
53 | # Editor-based Rest Client
54 | .idea/httpRequests
55 | ### Python template
56 | # Byte-compiled / optimized / DLL files
57 | __pycache__/
58 | *.py[cod]
59 | *$py.class
60 |
61 | # C extensions
62 | *.so
63 |
64 | # Distribution / packaging
65 | .Python
66 | build/
67 | develop-eggs/
68 | dist/
69 | downloads/
70 | eggs/
71 | .eggs/
72 | lib/
73 | lib64/
74 | parts/
75 | sdist/
76 | var/
77 | wheels/
78 | *.egg-info/
79 | .installed.cfg
80 | *.egg
81 | MANIFEST
82 |
83 | # PyInstaller
84 | # Usually these files are written by a python script from a template
85 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
86 | *.manifest
87 | *.spec
88 |
89 | # Installer logs
90 | pip-log.txt
91 | pip-delete-this-directory.txt
92 |
93 | # Unit test / coverage reports
94 | htmlcov/
95 | .tox/
96 | .coverage
97 | .coverage.*
98 | .cache
99 | nosetests.xml
100 | coverage.xml
101 | *.cover
102 | .hypothesis/
103 | .pytest_cache/
104 |
105 | # Translations
106 | *.mo
107 | *.pot
108 |
109 | # Django stuff:
110 | *.log
111 | local_settings.py
112 | db.sqlite3
113 |
114 | # Flask stuff:
115 | instance/
116 | .webassets-cache
117 |
118 | # Scrapy stuff:
119 | .scrapy
120 |
121 | # Sphinx documentation
122 | docs/_build/
123 |
124 | # PyBuilder
125 | target/
126 |
127 | # Jupyter Notebook
128 | .ipynb_checkpoints
129 |
130 | # pyenv
131 | .python-version
132 |
133 | # celery beat schedule file
134 | celerybeat-schedule
135 |
136 | # SageMath parsed files
137 | *.sage.py
138 |
139 | # Environments
140 | .env
141 | .venv
142 | env/
143 | venv/
144 | ENV/
145 | env.bak/
146 | venv.bak/
147 |
148 | # Spyder project settings
149 | .spyderproject
150 | .spyproject
151 |
152 | # Rope project settings
153 | .ropeproject
154 |
155 | # mkdocs documentation
156 | /site
157 |
158 | # mypy
159 | .mypy_cache/
160 |
161 | ### JetBrains template
162 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm
163 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
164 |
165 | # User-specific stuff
166 | .idea/**/workspace.xml
167 | .idea/**/tasks.xml
168 | .idea/**/dictionaries
169 | .idea/**/shelf
170 |
171 | # Sensitive or high-churn files
172 | .idea/**/dataSources/
173 | .idea/**/dataSources.ids
174 | .idea/**/dataSources.local.xml
175 | .idea/**/sqlDataSources.xml
176 | .idea/**/dynamic.xml
177 | .idea/**/uiDesigner.xml
178 | .idea/**/dbnavigator.xml
179 |
180 | # Gradle
181 | .idea/**/gradle.xml
182 | .idea/**/libraries
183 |
184 | # CMake
185 | cmake-build-debug/
186 | cmake-build-release/
187 |
188 | # Mongo Explorer plugin
189 | .idea/**/mongoSettings.xml
190 |
191 | # File-based project format
192 | *.iws
193 |
194 | # IntelliJ
195 | out/
196 |
197 | # mpeltonen/sbt-idea plugin
198 | .idea_modules/
199 |
200 | # JIRA plugin
201 | atlassian-ide-plugin.xml
202 |
203 | # Cursive Clojure plugin
204 | .idea/replstate.xml
205 |
206 | # Crashlytics plugin (for Android Studio and IntelliJ)
207 | com_crashlytics_export_strings.xml
208 | crashlytics.properties
209 | crashlytics-build.properties
210 | fabric.properties
211 |
212 | # Editor-based Rest Client
213 | .idea/httpRequests
214 | ### Python template
215 | # Byte-compiled / optimized / DLL files
216 | __pycache__/
217 | *.py[cod]
218 | *$py.class
219 |
220 | # C extensions
221 | *.so
222 |
223 | # Distribution / packaging
224 | .Python
225 | build/
226 | develop-eggs/
227 | dist/
228 | downloads/
229 | eggs/
230 | .eggs/
231 | lib/
232 | lib64/
233 | parts/
234 | sdist/
235 | var/
236 | wheels/
237 | *.egg-info/
238 | .installed.cfg
239 | *.egg
240 | MANIFEST
241 |
242 | # PyInstaller
243 | # Usually these files are written by a python script from a template
244 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
245 | *.manifest
246 | *.spec
247 |
248 | # Installer logs
249 | pip-log.txt
250 | pip-delete-this-directory.txt
251 |
252 | # Unit test / coverage reports
253 | htmlcov/
254 | .tox/
255 | .coverage
256 | .coverage.*
257 | .cache
258 | nosetests.xml
259 | coverage.xml
260 | *.cover
261 | .hypothesis/
262 | .pytest_cache/
263 |
264 | # Translations
265 | *.mo
266 | *.pot
267 |
268 | # Django stuff:
269 | *.log
270 | local_settings.py
271 | db.sqlite3
272 |
273 | # Flask stuff:
274 | instance/
275 | .webassets-cache
276 |
277 | # Scrapy stuff:
278 | .scrapy
279 |
280 | # Sphinx documentation
281 | docs/_build/
282 |
283 | # PyBuilder
284 | target/
285 |
286 | # Jupyter Notebook
287 | .ipynb_checkpoints
288 |
289 | # pyenv
290 | .python-version
291 |
292 | # celery beat schedule file
293 | celerybeat-schedule
294 |
295 | # SageMath parsed files
296 | *.sage.py
297 |
298 | # Environments
299 | .env
300 | .venv
301 | env/
302 | venv/
303 | ENV/
304 | env.bak/
305 | venv.bak/
306 |
307 | # Spyder project settings
308 | .spyderproject
309 | .spyproject
310 |
311 | # Rope project settings
312 | .ropeproject
313 |
314 | # mkdocs documentation
315 | /site
316 |
317 | # mypy
318 | .mypy_cache/
319 |
320 | /extraction/
321 | /load/
322 | /test.py
323 |
--------------------------------------------------------------------------------
/ColumnDesc.txt:
--------------------------------------------------------------------------------
1 | ColumnName | ColumnDesc | ColumnUse
2 | ----------------|----------------------------------------------------------------------------------------|-----------------
3 | CICID - -
4 | I94YR - 4 digit year -
5 | I94MON - Numeric month -
6 | I94CIT - This format shows all the valid and invalid country codes for processing -
7 | I94RES - This format shows all the valid and invalid country codes for processing -
8 | I94PORT - This format shows all the valid and invalid port codes for processing -
9 | ARRDATE - This is the Arrival Date in the USA -
10 | I94MODE - This is the mode of transport (1 = 'Air';2 = 'Sea';3 = 'Land';9 = 'Not reported') -
11 | I94ADDR - The state codes; everything else goes into 'other' -
12 | DEPDATE - This is the Departure Date from the USA -
13 | I94BIR - Age of Respondent in Years -
14 | I94VISA - Visa codes collapsed into three categories:(1 = Business;2 = Pleasure;3 = Student)*/ -
15 | COUNT - Used for summary statistics -
16 | DTADFILE - Character Date Field - Date added to I-94 Files - CIC does not use
17 | VISAPOST - Department of State where where Visa was issued - CIC does not use
18 | OCCUP - Occupation that will be performed in U.S. - CIC does not use
19 | ENTDEPA - Arrival Flag - admitted or paroled into the U.S. - CIC does not use
20 | ENTDEPD - Departure Flag - Departed, lost I-94 or is deceased - CIC does not use
21 | ENTDEPU - Update Flag - Either apprehended, overstayed, adjusted to perm residence - CIC does not use
22 | MATFLAG - Match flag - Match of arrival and departure records -
23 | BIRYEAR - 4 digit year of birth -
24 | DTADDTO - Character Date Field - Date to which admitted to U.S. (allowed to stay until) - CIC does not use
25 | GENDER - Non-immigrant sex -
26 | INSNUM - INS number -
27 | AIRLINE - Airline used to arrive in U.S. -
28 | ADMNUM - Admission Number -
29 | FLTNO - Flight number of Airline used to arrive in U.S. -
30 | VISATYPE - Class of admission legally admitting the non-immigrant to temporarily stay in U.S. -
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | ## Data Engineering Capstone Project for Udacity
4 |
5 | ### Objective
6 |
7 | ---
8 | In this project we are going to work with US immigraton data from the
9 | year 1994. We have facts such as visa types, transport modes, landing
10 | ports, us state codes, country codes. Apart from the sas7bdat formatted
11 | immigration data we have us airport information and us demographics
12 | data. We are going to parse SAS descriptor files for all the dimensions
13 | and sas7bdat files for all the facts. The tools we are going to use here
14 | are Apache Spark, Apache Airflow, Amazon Redshift, Amazon S3.
15 |
16 | We will be reading, parsing and cleaning the data from local file
17 | systems, Amazon S3 and transferring data to redshift tables in AWS. We
18 | will be orchestrating the flow of data through Apache Airflow DAGs.
19 |
20 | Finally we will be using some SQL queries to extract some valuable stats
21 | and graphs from the data itself.
22 |
23 | ### Data Model
24 |
25 | ---
26 | 
27 |
28 | ### Data Pipeline
29 |
30 | ___
31 | 
32 | 
33 | 
34 |
35 | ### Installing and starting
36 |
37 | ---
38 |
39 | #### Installing Python Dependencies
40 | You need to install this python dependencies
41 | In Terminal/CommandPrompt:
42 |
43 | without anaconda you can do this:
44 | ```
45 | $ python3 -m venv virtual-env-name
46 | $ source virtual-env-name/bin/activate
47 | $ pip install -r requirements.txt
48 | ```
49 | with anaconda you can do this (in Windows):
50 | ```
51 | $ conda env create -f env.yml
52 | $ source activate
53 | ```
54 | or (in Others)
55 | ```
56 | conda create -y -n python==3.6
57 | conda install -f -y -q -n -c conda-forge --file requirements.txt
58 | [source activate/ conda activate]
59 | ```
60 | #### Fixing/Configuring Airflow
61 | ```
62 | $ pip install --upgrade Flask
63 | $ pip install zappa
64 | $ mkdir airflow_home
65 | $ export AIRFLOW_HOME=./airflow_home
66 | $ cd airflow_home
67 | $ airflow initdb
68 | $ airflow webserver
69 | $ airflow scheduler
70 | ```
71 |
72 | #### More Airflow commands
73 | To list existing dags registered with airflow
74 | ```
75 | $ airflow list_dags
76 | ```
77 |
78 | #### Secure/Encrypt your connections and hooks
79 | **Run**
80 | ```bash
81 | $ python cryptosetup.py
82 | ```
83 | copy this key to *airflow.cfg* to paste after
84 | fernet_key = ************
85 |
86 | #### Setting up connections and variables in Airflow UI for AWS
87 | TODO: There is no code to modify in this exercise. We're going to
88 | create a connection and a variable.
89 |
90 | **S3**
91 | 1. Open your browser to localhost:8080 and open Admin->Variables
92 | 2. Click "Create"
93 | 3. Set "Key" equal to "s3_bucket" and set "Val" equal to "udacity-dend"
94 | 4. Set "Key" equal to "s3_prefix" and set "Val" equal to "data-pipelines"
95 | 5. Click save
96 |
97 | **AWS**
98 | 1. Open Admin->Connections
99 | 2. Click "Create"
100 | 3. Set "Conn Id" to "aws_credentials", "Conn Type" to "Amazon Web Services"
101 | 4. Set "Login" to your aws_access_key_id and "Password" to your aws_secret_key
102 | 5. Click save
103 | 6. If it doesn't work then in "Extra" field put:
104 | {"region_name": "your_aws_region", "aws_access_key_id":"your_aws_access_key_id", "aws_secret_access_key": "your_aws_secret_access_key", "aws_iam_user": "your_created_iam_user"}
105 | 7. These are all you can put:
106 | - aws_account_id: AWS account ID for the connection
107 | - aws_iam_role: AWS IAM role for the connection
108 | - external_id: AWS external ID for the connection
109 | - host: Endpoint URL for the connection
110 | - region_name: AWS region for the connection
111 | - role_arn: AWS role ARN for the connection
112 |
113 | **Redshift**
114 | 1. Open Admin->Connections
115 | 2. Click "Create"
116 | 3. Set "Conn Id" to "redshift", "Conn Type" to "postgres"
117 | 4. Set "Login" to your master_username for your cluster and "Password"
118 | to your master_password for your cluster
119 | 5. Click save
120 |
121 | #### Optional
122 | If you haven't setup your AWS Redshift Cluster yet
123 | (or don't want to create one manually), then use the files
124 | inside 'aws' folder
125 | - To create cluster and IAM role: Run the below code in terminal from 'aws' folder to create your Redshift database and a
126 | iam_role in aws having read access to Amazon S3 and permissions
127 | attached to the created cluster
128 | ```bash
129 | $ python aws_operate.py --action start
130 | ```
131 | copy the DWH_ENDPOINT for and DWH_ROLE_ARN
132 | for from the print statements
133 | - To create Tables: Run the below code in terminal from project dir to create tables in your Redshift database
134 | in aws
135 | ```bash
136 | $ python create_table.py --host
137 |
138 | - To Stop: Run the below code in terminal from 'aws' directory to destroy your Redshift database and
139 | detach iam_role from the cluster
140 | ```bash
141 | $ python aws_operate.py --action stop
142 | ```
143 |
144 | ### About the data
145 |
146 | ---
147 | #### I94 Immigration Data:
148 | This data comes from the US National Tourism and Trade Office.
149 | [This](https://travel.trade.gov/research/reports/i94/historical/2016.html)
150 | is where the data comes from. There's a sample file so you can take a look
151 | at the data in csv format before sreading it all in. The report contains
152 | international visitor arrival statistics by world regions and selected
153 | countries (including top 20), type of visa, mode of transportation,
154 | age groups, states visited (first intended address only), and the top
155 | ports of entry (for select countries)
156 |
157 | #### World Temperature Data:
158 | This dataset came from Kaggle. You can read more about it [here](https://www.kaggle.com/berkeleyearth/climate-change-earth-surface-temperature-data).
159 |
160 | #### U.S. City Demographic Data:
161 | This data comes from OpenSoft. You can read more about it [here](https://public.opendatasoft.com/explore/dataset/us-cities-demographics/export/).
162 |
163 | #### Airport Code Table:
164 | This is a simple table of airport codes and corresponding cities. It comes from [here](https://datahub.io/core/airport-codes#data).
165 |
166 | ### Run the project
167 |
168 | ---
169 | 1. Follow all the setup mentioned above
170 | 2. Create a bucket in region 'us-west-2' in Amazon S3
171 | 3. You have to setup all the connections and variables in the Airflow
172 | admin
173 | i. Setup aws connection with user credentials (access_key and
174 | secret_key with login and password). Make sure the region is 'us-west-2'
175 | ii. Setup Redshift connection with user, password, host, port,
176 | schema, db
177 | iii. Setup iam_role for your aws account
178 | iv. Setup variables for 'temp_input', 'temp_output', 'spark_path' (spark
179 | manipulation path for parquet files), sas_file (sas7bdat descriptor
180 | files)
181 | v. Place all the csv inputs inside temp_output directory
182 | vi. Create a folder called 'spark_path' inside \airflow\dags\
183 | vii. Create variable called 's3_bucket' (make sure the bucket in
184 | AWS is in region 'us-west-2')
185 |
186 | Example:
187 |
188 | | variable | example value |
189 | |:-------------|-------------:|
190 | | iam_role | #### |
191 | | s3_bucket | #### |
192 | | sas_file | /home/workspace/airflow/dags/temp_input/I94_SAS_Labels_Descriptions.SAS |
193 | | spark_path | /home/workspace/airflow/dags/spark_path |
194 | | temp_input | /home/workspace/airflow/dags/temp_input/ |
195 | | temp_output | /home/workspace/airflow/dags/temp_output/ |
196 |
197 | 4. Data Location for input files:
198 | i. Put all your sas7bdat formatted files in temp_input directory
199 | (whenever you want to process/insert them into the db, when you are
200 | done remove the .sas7bdat file/files and drop new files)
201 | ii. Put SAS descriptor file in temp_input directory
202 | iii. Put airport-codes_csv.csv file in temp_output directory
203 |
204 |
205 | ### Test it Yourself!
206 |
207 | ---
208 |
209 | Here are some example queries we test to see the uploaded results into
210 | the Redshift schema
211 |
212 | **Example Queries**
213 | #### City from where immigrants arrived
214 | ```
215 | SELECT TOP 10 b.port_city, b.port_state_or_country, COUNT(cicid) AS count
216 | FROM project.immigration a INNER JOIN project.i94ports b ON a.i94port=b.port_code
217 | GROUP BY b.port_city, b.port_state_or_country
218 | ORDER BY COUNT(cicid) DESC
219 | ```
220 |
221 | #### Different kinds of airports
222 | ```
223 | SELECT top 10 distinct type, count(*) AS count_type
224 | FROM project.airport_codes
225 | WHERE iso_country = 'US'
226 | GROUP BY type
227 | ORDER BY count_type DESC
228 | ```
229 |
230 | #### Immigrants from different countries
231 | ```
232 | SELECT top 10 SUBSTRING(b.country_name, 0, 15) as country_name, COUNT(cicid) as count
233 | FROM project.immigration a INNER JOIN project.i94res b ON a.i94res=b.country_code
234 | GROUP BY b.country_name
235 | ORDER BY COUNT(cicid) DESC
236 | ```
237 |
238 | #### Small airports from different states
239 | ```
240 | SELECT a.state_name AS State, airports.count AS Count_of_Airports
241 | FROM
242 | (SELECT top 10 distinct substring(iso_region, 4, length(iso_region)) AS state, count(*)
243 | FROM project.airport_codes
244 | WHERE iso_country = 'US' AND type='small_airport'
245 | GROUP BY iso_region) airports INNER JOIN project.i94addr a ON airports.state=a.state_code
246 | ORDER BY airports.count DESC
247 | ```
248 |
249 | #### Small airport locations
250 | ```
251 | SELECT a.longitude_deg, a.latitude_deg
252 | FROM project.airport_codes a
253 | WHERE a.iso_country = 'US' AND a.type = 'small_airport'
254 | ```
255 | ### Stats and Graphs
256 |
257 | ---
258 | #### City from where immigrants arrived
259 | 
260 |
261 | #### Different kinds of airports
262 | 
263 |
264 | #### Immigrants from different countries
265 | 
266 |
267 | #### Small airports from different states
268 | 
269 |
270 | #### Small airports locations in different states
271 | 
272 |
273 | Scoping the Project
274 | ---
275 |
276 | The purpose is to produce interesting stats from the US immigration
277 | data, airports around the world, and different dimensions such as visa
278 | type, transport mode, nationality etc.
279 |
280 | ### Steps Taken:
281 | The steps taken are in the following order:
282 | **Gather the data**:
283 | This took a while as different kinds of formats were chosen, I
284 | needed to fix my mindset on which data I will actually use in
285 | future for my analysis and queries. I fixated on .sas7bdat
286 | formatted immigration data which fulfills the minimum number of
287 | rows requirements, the cleaned airport data for dimensions and
288 | SAS descriptor file for fulfilling the different kind of formats
289 | to be chosen for the project
290 | **Study the data**:
291 | This took a while as I needed to understand what kind of
292 | pre-processing I would use to clean the individual datasets
293 | mentioned above. Dropping rows on a condition, filtering rows
294 | according to other dimensions and facts etc.
295 | **Choice of infrastructure**:
296 | After studying the data I decided upon certain tools and
297 | technologies, to the point where I am comfortable; I made use of
298 | maximum number of skills that I think I learnt through out the
299 | process.
300 | **Implementation and Testing**:
301 | Once my pipeline started running, I did all kinds of quality
302 | checks to ensure that data is processed correctly and provided a
303 | Jupyter notebook to test the project.
304 |
305 | ### Purpose of Final Data Model:
306 | Gather interesting insights like demographic population based on certain
307 | dimensions based upon some filter conditions.
308 | e.g.
309 | - Compare immigration of different nationalities
310 | - Compare number of airports by state
311 | - Different kinds of airport statistics
312 | - Aggregate flow of immigrants through different cities
313 |
314 | So I am using the airport codes, US immigration data of '94 and
315 | dimensions such as visa type, mode of transport, nationality codes, US
316 | state code information
317 |
318 |
319 | Addressing other scenarios
320 | ---
321 |
322 | ### Data Increased by 100x:
323 | - I am using columnar format of redshift, so querying will not be slower
324 | - Incremental update is provided so that every time full amount is not
325 | inserted everytime. Whenever one wants to insert data into the database
326 | for immigration can just drop their sas7bdat files into the temp_input
327 | folder
328 | - Spark is used where heavy data is read and parsed, so distributed
329 | processing is also involved
330 | - Spark memory and processors is configurable to handle more pressure
331 | - S3 storage is used which is scalable and easily accessible with other
332 | AWS infrastructure
333 |
334 |
335 | ### The pipelines would be run on a daily basis by 7 am every day:
336 | - The pipeline is scheduled as per requirements
337 |
338 | ### The database needed to be accessed by 100+ people:
339 | - People are granted usage on schema, so not everyone but people who
340 | have access to the data can use it as necessary, below are the
341 | necessary commands one you use in Redshift query editor, that's why it
342 | is purely optional to use it as a task in the pipeline:
343 |
344 | We can create a group of users, called _webappusers_, who will use the
345 | use the functionality of the schema but cannot take admin decisions and
346 | we can add individual users with their name and init password.
347 |
348 | ```bash
349 | create group webappusers;
350 | create user webappuser1 password 'webAppuser1pass' in group webappusers;
351 | grant usage on schema project to group webappusers;
352 | ```
353 |
354 | We can create a group of users called __webdevusers__, who will have
355 | admin privileges on the schema, we can add those individual users with
356 | their name and init password
357 | ```
358 | create group webdevusers;
359 | create user webappdevuser1 password 'webAppdev1pass' in group webdevusers;
360 | grant all on schema webapp to group webdevusers;
361 | ```
362 |
363 | Defending Decisions
364 | ---
365 |
366 | ### The choice of tools, technologies:
367 | - Airflow to view, monitor and log flow of information: Extremely useful tool to control end to end ETL processing
368 | - S3 Storage to store data on a large scale: Never complain about storage and most importantly when it stores big data
369 | - Redshift to make advantage of columnar format and faster querying strategies: Query from anywhere and anytime
370 | - Spark for distributed processing of heavy data: Best in-memory faster processing
371 | - Pandas for cleaning data frames: absolutely neccessary
372 |
373 | ### Links for Airflow
374 |
375 | ---
376 | **Context Variables**
377 | https://airflow.apache.org/macros.html
378 |
379 | **Hacks for airflow**
380 | https://medium.com/datareply/airflow-lesser-known-tips-tricks-and-best-practises-cf4d4a90f8f
381 | https://medium.com/handy-tech/airflow-tips-tricks-and-pitfalls-9ba53fba14eb
382 | https://www.astronomer.io/guides/dag-best-practices/
383 |
384 | ### Technologies Used
385 |
386 |
387 |
388 |
389 |
390 |
391 |
392 |
--------------------------------------------------------------------------------
/airflow/dags/subdags/subdag_for_dimensions.py:
--------------------------------------------------------------------------------
1 | from airflow import DAG
2 | from airflow.operators import StageToRedshiftOperator
3 | from helpers import SqlQueries
4 | from airflow.models import Variable
5 | from airflow.hooks.postgres_hook import PostgresHook
6 | from airflow.operators.python_operator import PythonOperator
7 | import logging
8 | import boto3
9 | from airflow.contrib.hooks.aws_hook import AwsHook
10 |
11 |
12 | def load_dimension_subdag(
13 | parent_dag_name,
14 | task_id,
15 | redshift_conn_id,
16 | *args, **kwargs):
17 | """
18 | A python function with arguments, which creates a dag
19 | :param parent_dag_name: imp ({parent_dag_name}.{task_id})
20 | :param task_id: imp {task_id}
21 | :param redshift_conn_id: {any connection id}
22 | :param args: {verbose}
23 | :param kwargs: {verbose and context variables}
24 | :return:
25 | """
26 | dag = DAG(
27 | f"{parent_dag_name}.{task_id}",
28 | **kwargs
29 | )
30 |
31 | copy_ports = StageToRedshiftOperator(
32 | task_id='copy_ports',
33 | dag=dag,
34 | redshift_conn_id="redshift",
35 | aws_credentials_id="aws_default",
36 | file='i94port.csv',
37 | delimiter=',',
38 | table='i94ports',
39 | s3_bucket=Variable.get("s3_bucket"),
40 | s3_key="csv",
41 | sql_stmt=SqlQueries.copy_csv_cmd,
42 | provide_context=True)
43 |
44 | copy_visa = StageToRedshiftOperator(
45 | task_id='copy_visa',
46 | dag=dag,
47 | redshift_conn_id="redshift",
48 | aws_credentials_id="aws_default",
49 | file='i94visa.csv',
50 | delimiter=',',
51 | table='i94visa',
52 | s3_bucket=Variable.get("s3_bucket"),
53 | s3_key="csv",
54 | sql_stmt=SqlQueries.copy_csv_cmd,
55 | provide_context=True)
56 |
57 | copy_modes = StageToRedshiftOperator(
58 | task_id='copy_modes',
59 | dag=dag,
60 | redshift_conn_id="redshift",
61 | aws_credentials_id="aws_default",
62 | file='i94mode.csv',
63 | delimiter=',',
64 | table='i94mode',
65 | s3_bucket=Variable.get("s3_bucket"),
66 | s3_key="csv",
67 | sql_stmt=SqlQueries.copy_csv_cmd,
68 | provide_context=True)
69 |
70 | copy_addr = StageToRedshiftOperator(
71 | task_id='copy_addr',
72 | dag=dag,
73 | redshift_conn_id="redshift",
74 | aws_credentials_id="aws_default",
75 | file='i94addr.csv',
76 | delimiter=',',
77 | table='i94addr',
78 | s3_bucket=Variable.get("s3_bucket"),
79 | s3_key="csv",
80 | sql_stmt=SqlQueries.copy_csv_cmd,
81 | provide_context=True)
82 |
83 | copy_country_codes = StageToRedshiftOperator(
84 | task_id='copy_country_codes',
85 | dag=dag,
86 | redshift_conn_id="redshift",
87 | aws_credentials_id="aws_default",
88 | file='i94cit&i94res.csv',
89 | delimiter=',',
90 | table='i94res',
91 | s3_bucket=Variable.get("s3_bucket"),
92 | s3_key="csv",
93 | sql_stmt=SqlQueries.copy_csv_cmd,
94 | provide_context=True)
95 |
96 | copy_cities_demographics = StageToRedshiftOperator(
97 | task_id='copy_cities_demographics',
98 | dag=dag,
99 | redshift_conn_id="redshift",
100 | aws_credentials_id="aws_default",
101 | file='us-cities-demographics.csv',
102 | delimiter=';',
103 | table='us_cities_demographics',
104 | s3_bucket=Variable.get("s3_bucket"),
105 | s3_key="csv",
106 | sql_stmt=SqlQueries.copy_csv_cmd,
107 | provide_context=True)
108 |
109 | copy_airports = StageToRedshiftOperator(
110 | task_id='copy_airports',
111 | dag=dag,
112 | redshift_conn_id="redshift",
113 | aws_credentials_id="aws_default",
114 | file='airport-codes_csv.csv',
115 | delimiter=',',
116 | table='airport_codes',
117 | s3_bucket=Variable.get("s3_bucket"),
118 | s3_key="csv",
119 | sql_stmt=SqlQueries.copy_csv_cmd,
120 | provide_context=True)
121 |
122 | def parquet_to_redshift(table, s3_bucket, s3_key, iam_role,
123 | sql_stmt, redshift_conn_id, **kwargs):
124 | """
125 | This function reads parquet files and copies them to redshift
126 | schema.db
127 | :param table:
128 | :param s3_bucket:
129 | :param s3_key:
130 | :param iam_role:
131 | :param sql_stmt:
132 | :param redshift_conn_id:
133 | :param kwargs:
134 | :return:
135 | """
136 | redshift = PostgresHook(postgres_conn_id=redshift_conn_id)
137 | logging.info("Copying data from S3 to Redshift")
138 | s3_path = "s3://{}/{}".format(s3_bucket, s3_key)
139 | formatted_sql = sql_stmt.format(
140 | table,
141 | s3_path,
142 | iam_role
143 | )
144 | redshift.run(formatted_sql)
145 | aws_hook = AwsHook("aws_default")
146 | credentials = aws_hook.get_credentials()
147 | client = boto3.client('s3',
148 | aws_access_key_id=credentials.access_key,
149 | aws_secret_access_key=credentials.secret_key)
150 | objects_to_delete = client.list_objects(
151 | Bucket=Variable.get("s3_bucket"), Prefix="parquet")
152 | delete_keys = {'Objects': []}
153 | delete_keys['Objects'] = [{'Key': k} for k in
154 | [obj['Key'] for obj in
155 | objects_to_delete.get('Contents',
156 | [])]]
157 | client.delete_objects(Bucket=Variable.get("s3_bucket"),
158 | Delete=delete_keys)
159 |
160 | copy_immigration = PythonOperator(
161 | task_id='copy_immigration',
162 | python_callable=parquet_to_redshift, # changed
163 | provide_context=True,
164 | op_kwargs={'table': "immigration",
165 | 's3_bucket': Variable.get("s3_bucket"),
166 | 's3_key': 'parquet',
167 | 'iam_role': Variable.get('iam_role'),
168 | 'sql_stmt': SqlQueries.copy_parquet_cmd,
169 | 'redshift_conn_id': 'redshift'},
170 | dag=dag
171 | )
172 |
173 | copy_ports
174 | copy_visa
175 | copy_modes
176 | copy_addr
177 | copy_country_codes
178 | copy_airports
179 | copy_cities_demographics
180 | copy_immigration
181 |
182 | return dag
--------------------------------------------------------------------------------
/airflow/dags/temp_input/I94_SAS_Labels_Descriptions.SAS:
--------------------------------------------------------------------------------
1 | libname library 'Your file location' ;
2 | proc format library=library ;
3 |
4 | /* I94YR - 4 digit year */
5 |
6 | /* I94MON - Numeric month */
7 |
8 | /* I94CIT & I94RES - This format shows all the valid and invalid codes for processing */
9 | value i94cntyl
10 | 582 = 'MEXICO Air Sea, and Not Reported (I-94, no land arrivals)'
11 | 236 = 'AFGHANISTAN'
12 | 101 = 'ALBANIA'
13 | 316 = 'ALGERIA'
14 | 102 = 'ANDORRA'
15 | 324 = 'ANGOLA'
16 | 529 = 'ANGUILLA'
17 | 518 = 'ANTIGUA-BARBUDA'
18 | 687 = 'ARGENTINA '
19 | 151 = 'ARMENIA'
20 | 532 = 'ARUBA'
21 | 438 = 'AUSTRALIA'
22 | 103 = 'AUSTRIA'
23 | 152 = 'AZERBAIJAN'
24 | 512 = 'BAHAMAS'
25 | 298 = 'BAHRAIN'
26 | 274 = 'BANGLADESH'
27 | 513 = 'BARBADOS'
28 | 104 = 'BELGIUM'
29 | 581 = 'BELIZE'
30 | 386 = 'BENIN'
31 | 509 = 'BERMUDA'
32 | 153 = 'BELARUS'
33 | 242 = 'BHUTAN'
34 | 688 = 'BOLIVIA'
35 | 717 = 'BONAIRE, ST EUSTATIUS, SABA'
36 | 164 = 'BOSNIA-HERZEGOVINA'
37 | 336 = 'BOTSWANA'
38 | 689 = 'BRAZIL'
39 | 525 = 'BRITISH VIRGIN ISLANDS'
40 | 217 = 'BRUNEI'
41 | 105 = 'BULGARIA'
42 | 393 = 'BURKINA FASO'
43 | 243 = 'BURMA'
44 | 375 = 'BURUNDI'
45 | 310 = 'CAMEROON'
46 | 326 = 'CAPE VERDE'
47 | 526 = 'CAYMAN ISLANDS'
48 | 383 = 'CENTRAL AFRICAN REPUBLIC'
49 | 384 = 'CHAD'
50 | 690 = 'CHILE'
51 | 245 = 'CHINA, PRC'
52 | 721 = 'CURACAO'
53 | 270 = 'CHRISTMAS ISLAND'
54 | 271 = 'COCOS ISLANDS'
55 | 691 = 'COLOMBIA'
56 | 317 = 'COMOROS'
57 | 385 = 'CONGO'
58 | 467 = 'COOK ISLANDS'
59 | 575 = 'COSTA RICA'
60 | 165 = 'CROATIA'
61 | 584 = 'CUBA'
62 | 218 = 'CYPRUS'
63 | 140 = 'CZECH REPUBLIC'
64 | 723 = 'FAROE ISLANDS (PART OF DENMARK)'
65 | 108 = 'DENMARK'
66 | 322 = 'DJIBOUTI'
67 | 519 = 'DOMINICA'
68 | 585 = 'DOMINICAN REPUBLIC'
69 | 240 = 'EAST TIMOR'
70 | 692 = 'ECUADOR'
71 | 368 = 'EGYPT'
72 | 576 = 'EL SALVADOR'
73 | 399 = 'EQUATORIAL GUINEA'
74 | 372 = 'ERITREA'
75 | 109 = 'ESTONIA'
76 | 369 = 'ETHIOPIA'
77 | 604 = 'FALKLAND ISLANDS'
78 | 413 = 'FIJI'
79 | 110 = 'FINLAND'
80 | 111 = 'FRANCE'
81 | 601 = 'FRENCH GUIANA'
82 | 411 = 'FRENCH POLYNESIA'
83 | 387 = 'GABON'
84 | 338 = 'GAMBIA'
85 | 758 = 'GAZA STRIP'
86 | 154 = 'GEORGIA'
87 | 112 = 'GERMANY'
88 | 339 = 'GHANA'
89 | 143 = 'GIBRALTAR'
90 | 113 = 'GREECE'
91 | 520 = 'GRENADA'
92 | 507 = 'GUADELOUPE'
93 | 577 = 'GUATEMALA'
94 | 382 = 'GUINEA'
95 | 327 = 'GUINEA-BISSAU'
96 | 603 = 'GUYANA'
97 | 586 = 'HAITI'
98 | 726 = 'HEARD AND MCDONALD IS.'
99 | 149 = 'HOLY SEE/VATICAN'
100 | 528 = 'HONDURAS'
101 | 206 = 'HONG KONG'
102 | 114 = 'HUNGARY'
103 | 115 = 'ICELAND'
104 | 213 = 'INDIA'
105 | 759 = 'INDIAN OCEAN AREAS (FRENCH)'
106 | 729 = 'INDIAN OCEAN TERRITORY'
107 | 204 = 'INDONESIA'
108 | 249 = 'IRAN'
109 | 250 = 'IRAQ'
110 | 116 = 'IRELAND'
111 | 251 = 'ISRAEL'
112 | 117 = 'ITALY'
113 | 388 = 'IVORY COAST'
114 | 514 = 'JAMAICA'
115 | 209 = 'JAPAN'
116 | 253 = 'JORDAN'
117 | 201 = 'KAMPUCHEA'
118 | 155 = 'KAZAKHSTAN'
119 | 340 = 'KENYA'
120 | 414 = 'KIRIBATI'
121 | 732 = 'KOSOVO'
122 | 272 = 'KUWAIT'
123 | 156 = 'KYRGYZSTAN'
124 | 203 = 'LAOS'
125 | 118 = 'LATVIA'
126 | 255 = 'LEBANON'
127 | 335 = 'LESOTHO'
128 | 370 = 'LIBERIA'
129 | 381 = 'LIBYA'
130 | 119 = 'LIECHTENSTEIN'
131 | 120 = 'LITHUANIA'
132 | 121 = 'LUXEMBOURG'
133 | 214 = 'MACAU'
134 | 167 = 'MACEDONIA'
135 | 320 = 'MADAGASCAR'
136 | 345 = 'MALAWI'
137 | 273 = 'MALAYSIA'
138 | 220 = 'MALDIVES'
139 | 392 = 'MALI'
140 | 145 = 'MALTA'
141 | 472 = 'MARSHALL ISLANDS'
142 | 511 = 'MARTINIQUE'
143 | 389 = 'MAURITANIA'
144 | 342 = 'MAURITIUS'
145 | 760 = 'MAYOTTE (AFRICA - FRENCH)'
146 | 473 = 'MICRONESIA, FED. STATES OF'
147 | 157 = 'MOLDOVA'
148 | 122 = 'MONACO'
149 | 299 = 'MONGOLIA'
150 | 735 = 'MONTENEGRO'
151 | 521 = 'MONTSERRAT'
152 | 332 = 'MOROCCO'
153 | 329 = 'MOZAMBIQUE'
154 | 371 = 'NAMIBIA'
155 | 440 = 'NAURU'
156 | 257 = 'NEPAL'
157 | 123 = 'NETHERLANDS'
158 | 508 = 'NETHERLANDS ANTILLES'
159 | 409 = 'NEW CALEDONIA'
160 | 464 = 'NEW ZEALAND'
161 | 579 = 'NICARAGUA'
162 | 390 = 'NIGER'
163 | 343 = 'NIGERIA'
164 | 470 = 'NIUE'
165 | 275 = 'NORTH KOREA'
166 | 124 = 'NORWAY'
167 | 256 = 'OMAN'
168 | 258 = 'PAKISTAN'
169 | 474 = 'PALAU'
170 | 743 = 'PALESTINE'
171 | 504 = 'PANAMA'
172 | 441 = 'PAPUA NEW GUINEA'
173 | 693 = 'PARAGUAY'
174 | 694 = 'PERU'
175 | 260 = 'PHILIPPINES'
176 | 416 = 'PITCAIRN ISLANDS'
177 | 107 = 'POLAND'
178 | 126 = 'PORTUGAL'
179 | 297 = 'QATAR'
180 | 748 = 'REPUBLIC OF SOUTH SUDAN'
181 | 321 = 'REUNION'
182 | 127 = 'ROMANIA'
183 | 158 = 'RUSSIA'
184 | 376 = 'RWANDA'
185 | 128 = 'SAN MARINO'
186 | 330 = 'SAO TOME AND PRINCIPE'
187 | 261 = 'SAUDI ARABIA'
188 | 391 = 'SENEGAL'
189 | 142 = 'SERBIA AND MONTENEGRO'
190 | 745 = 'SERBIA'
191 | 347 = 'SEYCHELLES'
192 | 348 = 'SIERRA LEONE'
193 | 207 = 'SINGAPORE'
194 | 141 = 'SLOVAKIA'
195 | 166 = 'SLOVENIA'
196 | 412 = 'SOLOMON ISLANDS'
197 | 397 = 'SOMALIA'
198 | 373 = 'SOUTH AFRICA'
199 | 276 = 'SOUTH KOREA'
200 | 129 = 'SPAIN'
201 | 244 = 'SRI LANKA'
202 | 346 = 'ST. HELENA'
203 | 522 = 'ST. KITTS-NEVIS'
204 | 523 = 'ST. LUCIA'
205 | 502 = 'ST. PIERRE AND MIQUELON'
206 | 524 = 'ST. VINCENT-GRENADINES'
207 | 716 = 'SAINT BARTHELEMY'
208 | 736 = 'SAINT MARTIN'
209 | 749 = 'SAINT MAARTEN'
210 | 350 = 'SUDAN'
211 | 602 = 'SURINAME'
212 | 351 = 'SWAZILAND'
213 | 130 = 'SWEDEN'
214 | 131 = 'SWITZERLAND'
215 | 262 = 'SYRIA'
216 | 268 = 'TAIWAN'
217 | 159 = 'TAJIKISTAN'
218 | 353 = 'TANZANIA'
219 | 263 = 'THAILAND'
220 | 304 = 'TOGO'
221 | 417 = 'TONGA'
222 | 516 = 'TRINIDAD AND TOBAGO'
223 | 323 = 'TUNISIA'
224 | 264 = 'TURKEY'
225 | 161 = 'TURKMENISTAN'
226 | 527 = 'TURKS AND CAICOS ISLANDS'
227 | 420 = 'TUVALU'
228 | 352 = 'UGANDA'
229 | 162 = 'UKRAINE'
230 | 296 = 'UNITED ARAB EMIRATES'
231 | 135 = 'UNITED KINGDOM'
232 | 695 = 'URUGUAY'
233 | 163 = 'UZBEKISTAN'
234 | 410 = 'VANUATU'
235 | 696 = 'VENEZUELA'
236 | 266 = 'VIETNAM'
237 | 469 = 'WALLIS AND FUTUNA ISLANDS'
238 | 757 = 'WEST INDIES (FRENCH)'
239 | 333 = 'WESTERN SAHARA'
240 | 465 = 'WESTERN SAMOA'
241 | 216 = 'YEMEN'
242 | 139 = 'YUGOSLAVIA'
243 | 301 = 'ZAIRE'
244 | 344 = 'ZAMBIA'
245 | 315 = 'ZIMBABWE'
246 | 403 = 'INVALID: AMERICAN SAMOA'
247 | 712 = 'INVALID: ANTARCTICA'
248 | 700 = 'INVALID: BORN ON BOARD SHIP'
249 | 719 = 'INVALID: BOUVET ISLAND (ANTARCTICA/NORWAY TERR.)'
250 | 574 = 'INVALID: CANADA'
251 | 720 = 'INVALID: CANTON AND ENDERBURY ISLS'
252 | 106 = 'INVALID: CZECHOSLOVAKIA'
253 | 739 = 'INVALID: DRONNING MAUD LAND (ANTARCTICA-NORWAY)'
254 | 394 = 'INVALID: FRENCH SOUTHERN AND ANTARCTIC'
255 | 501 = 'INVALID: GREENLAND'
256 | 404 = 'INVALID: GUAM'
257 | 730 = 'INVALID: INTERNATIONAL WATERS'
258 | 731 = 'INVALID: JOHNSON ISLAND'
259 | 471 = 'INVALID: MARIANA ISLANDS, NORTHERN'
260 | 737 = 'INVALID: MIDWAY ISLANDS'
261 | 753 = 'INVALID: MINOR OUTLYING ISLANDS - USA'
262 | 740 = 'INVALID: NEUTRAL ZONE (S. ARABIA/IRAQ)'
263 | 710 = 'INVALID: NON-QUOTA IMMIGRANT'
264 | 505 = 'INVALID: PUERTO RICO'
265 | 0 = 'INVALID: STATELESS'
266 | 705 = 'INVALID: STATELESS'
267 | 583 = 'INVALID: UNITED STATES'
268 | 407 = 'INVALID: UNITED STATES'
269 | 999 = 'INVALID: UNKNOWN'
270 | 239 = 'INVALID: UNKNOWN COUNTRY'
271 | 134 = 'INVALID: USSR'
272 | 506 = 'INVALID: U.S. VIRGIN ISLANDS'
273 | 755 = 'INVALID: WAKE ISLAND'
274 | 311 = 'Collapsed Tanzania (should not show)'
275 | 741 = 'Collapsed Curacao (should not show)'
276 | 54 = 'No Country Code (54)'
277 | 100 = 'No Country Code (100)'
278 | 187 = 'No Country Code (187)'
279 | 190 = 'No Country Code (190)'
280 | 200 = 'No Country Code (200)'
281 | 219 = 'No Country Code (219)'
282 | 238 = 'No Country Code (238)'
283 | 277 = 'No Country Code (277)'
284 | 293 = 'No Country Code (293)'
285 | 300 = 'No Country Code (300)'
286 | 319 = 'No Country Code (319)'
287 | 365 = 'No Country Code (365)'
288 | 395 = 'No Country Code (395)'
289 | 400 = 'No Country Code (400)'
290 | 485 = 'No Country Code (485)'
291 | 503 = 'No Country Code (503)'
292 | 589 = 'No Country Code (589)'
293 | 592 = 'No Country Code (592)'
294 | 791 = 'No Country Code (791)'
295 | 849 = 'No Country Code (849)'
296 | 914 = 'No Country Code (914)'
297 | 944 = 'No Country Code (944)'
298 | 996 = 'No Country Code (996)' ;
299 |
300 |
301 | /* I94PORT - This format shows all the valid and invalid codes for processing */
302 | value $i94prtl
303 | 'ALC' = 'ALCAN, AK '
304 | 'ANC' = 'ANCHORAGE, AK '
305 | 'BAR' = 'BAKER AAF - BAKER ISLAND, AK'
306 | 'DAC' = 'DALTONS CACHE, AK '
307 | 'PIZ' = 'DEW STATION PT LAY DEW, AK'
308 | 'DTH' = 'DUTCH HARBOR, AK '
309 | 'EGL' = 'EAGLE, AK '
310 | 'FRB' = 'FAIRBANKS, AK '
311 | 'HOM' = 'HOMER, AK '
312 | 'HYD' = 'HYDER, AK '
313 | 'JUN' = 'JUNEAU, AK '
314 | '5KE' = 'KETCHIKAN, AK'
315 | 'KET' = 'KETCHIKAN, AK '
316 | 'MOS' = 'MOSES POINT INTERMEDIATE, AK'
317 | 'NIK' = 'NIKISKI, AK '
318 | 'NOM' = 'NOM, AK '
319 | 'PKC' = 'POKER CREEK, AK '
320 | 'ORI' = 'PORT LIONS SPB, AK'
321 | 'SKA' = 'SKAGWAY, AK '
322 | 'SNP' = 'ST. PAUL ISLAND, AK'
323 | 'TKI' = 'TOKEEN, AK'
324 | 'WRA' = 'WRANGELL, AK '
325 | 'HSV' = 'MADISON COUNTY - HUNTSVILLE, AL'
326 | 'MOB' = 'MOBILE, AL '
327 | 'LIA' = 'LITTLE ROCK, AR (BPS)'
328 | 'ROG' = 'ROGERS ARPT, AR'
329 | 'DOU' = 'DOUGLAS, AZ '
330 | 'LUK' = 'LUKEVILLE, AZ '
331 | 'MAP' = 'MARIPOSA AZ '
332 | 'NAC' = 'NACO, AZ '
333 | 'NOG' = 'NOGALES, AZ '
334 | 'PHO' = 'PHOENIX, AZ '
335 | 'POR' = 'PORTAL, AZ'
336 | 'SLU' = 'SAN LUIS, AZ '
337 | 'SAS' = 'SASABE, AZ '
338 | 'TUC' = 'TUCSON, AZ '
339 | 'YUI' = 'YUMA, AZ '
340 | 'AND' = 'ANDRADE, CA '
341 | 'BUR' = 'BURBANK, CA'
342 | 'CAL' = 'CALEXICO, CA '
343 | 'CAO' = 'CAMPO, CA '
344 | 'FRE' = 'FRESNO, CA '
345 | 'ICP' = 'IMPERIAL COUNTY, CA '
346 | 'LNB' = 'LONG BEACH, CA '
347 | 'LOS' = 'LOS ANGELES, CA '
348 | 'BFL' = 'MEADOWS FIELD - BAKERSFIELD, CA'
349 | 'OAK' = 'OAKLAND, CA '
350 | 'ONT' = 'ONTARIO, CA'
351 | 'OTM' = 'OTAY MESA, CA '
352 | 'BLT' = 'PACIFIC, HWY. STATION, CA '
353 | 'PSP' = 'PALM SPRINGS, CA'
354 | 'SAC' = 'SACRAMENTO, CA '
355 | 'SLS' = 'SALINAS, CA (BPS)'
356 | 'SDP' = 'SAN DIEGO, CA'
357 | 'SFR' = 'SAN FRANCISCO, CA '
358 | 'SNJ' = 'SAN JOSE, CA '
359 | 'SLO' = 'SAN LUIS OBISPO, CA '
360 | 'SLI' = 'SAN LUIS OBISPO, CA (BPS)'
361 | 'SPC' = 'SAN PEDRO, CA '
362 | 'SYS' = 'SAN YSIDRO, CA '
363 | 'SAA' = 'SANTA ANA, CA '
364 | 'STO' = 'STOCKTON, CA (BPS)'
365 | 'TEC' = 'TECATE, CA '
366 | 'TRV' = 'TRAVIS-AFB, CA '
367 | 'APA' = 'ARAPAHOE COUNTY, CO'
368 | 'ASE' = 'ASPEN, CO #ARPT'
369 | 'COS' = 'COLORADO SPRINGS, CO'
370 | 'DEN' = 'DENVER, CO '
371 | 'DRO' = 'LA PLATA - DURANGO, CO'
372 | 'BDL' = 'BRADLEY INTERNATIONAL, CT'
373 | 'BGC' = 'BRIDGEPORT, CT '
374 | 'GRT' = 'GROTON, CT '
375 | 'HAR' = 'HARTFORD, CT '
376 | 'NWH' = 'NEW HAVEN, CT '
377 | 'NWL' = 'NEW LONDON, CT '
378 | 'TST' = 'NEWINGTON DATA CENTER TEST, CT'
379 | 'WAS' = 'WASHINGTON DC '
380 | 'DOV' = 'DOVER AFB, DE'
381 | 'DVD' = 'DOVER-AFB, DE '
382 | 'WLL' = 'WILMINGTON, DE '
383 | 'BOC' = 'BOCAGRANDE, FL '
384 | 'SRQ' = 'BRADENTON - SARASOTA, FL'
385 | 'CAN' = 'CAPE CANAVERAL, FL '
386 | 'DAB' = 'DAYTONA BEACH INTERNATIONAL, FL'
387 | 'FRN' = 'FERNANDINA, FL '
388 | 'FTL' = 'FORT LAUDERDALE, FL '
389 | 'FMY' = 'FORT MYERS, FL '
390 | 'FPF' = 'FORT PIERCE, FL '
391 | 'HUR' = 'HURLBURT FIELD, FL'
392 | 'GNV' = 'J R ALISON MUNI - GAINESVILLE, FL'
393 | 'JAC' = 'JACKSONVILLE, FL '
394 | 'KEY' = 'KEY WEST, FL '
395 | 'LEE' = 'LEESBURG MUNICIPAL AIRPORT, FL'
396 | 'MLB' = 'MELBOURNE, FL'
397 | 'MIA' = 'MIAMI, FL '
398 | 'APF' = 'NAPLES, FL #ARPT'
399 | 'OPF' = 'OPA LOCKA, FL'
400 | 'ORL' = 'ORLANDO, FL '
401 | 'PAN' = 'PANAMA CITY, FL '
402 | 'PEN' = 'PENSACOLA, FL '
403 | 'PCF' = 'PORT CANAVERAL, FL '
404 | 'PEV' = 'PORT EVERGLADES, FL '
405 | 'PSJ' = 'PORT ST JOE, FL '
406 | 'SFB' = 'SANFORD, FL '
407 | 'SGJ' = 'ST AUGUSTINE ARPT, FL'
408 | 'SAU' = 'ST AUGUSTINE, FL '
409 | 'FPR' = 'ST LUCIE COUNTY, FL'
410 | 'SPE' = 'ST PETERSBURG, FL '
411 | 'TAM' = 'TAMPA, FL '
412 | 'WPB' = 'WEST PALM BEACH, FL '
413 | 'ATL' = 'ATLANTA, GA '
414 | 'BRU' = 'BRUNSWICK, GA '
415 | 'AGS' = 'BUSH FIELD - AUGUSTA, GA'
416 | 'SAV' = 'SAVANNAH, GA '
417 | 'AGA' = 'AGANA, GU '
418 | 'HHW' = 'HONOLULU, HI '
419 | 'OGG' = 'KAHULUI - MAUI, HI'
420 | 'KOA' = 'KEAHOLE-KONA, HI '
421 | 'LIH' = 'LIHUE, HI '
422 | 'CID' = 'CEDAR RAPIDS/IOWA CITY, IA'
423 | 'DSM' = 'DES MOINES, IA'
424 | 'BOI' = 'AIR TERM. (GOWEN FLD) BOISE, ID'
425 | 'EPI' = 'EASTPORT, ID '
426 | 'IDA' = 'FANNING FIELD - IDAHO FALLS, ID'
427 | 'PTL' = 'PORTHILL, ID '
428 | 'SPI' = 'CAPITAL - SPRINGFIELD, IL'
429 | 'CHI' = 'CHICAGO, IL '
430 | 'DPA' = 'DUPAGE COUNTY, IL'
431 | 'PIA' = 'GREATER PEORIA, IL'
432 | 'RFD' = 'GREATER ROCKFORD, IL'
433 | 'UGN' = 'MEMORIAL - WAUKEGAN, IL'
434 | 'GAR' = 'GARY, IN '
435 | 'HMM' = 'HAMMOND, IN '
436 | 'INP' = 'INDIANAPOLIS, IN '
437 | 'MRL' = 'MERRILLVILLE, IN '
438 | 'SBN' = 'SOUTH BEND, IN'
439 | 'ICT' = 'MID-CONTINENT - WITCHITA, KS'
440 | 'LEX' = 'BLUE GRASS - LEXINGTON, KY'
441 | 'LOU' = 'LOUISVILLE, KY '
442 | 'BTN' = 'BATON ROUGE, LA '
443 | 'LKC' = 'LAKE CHARLES, LA '
444 | 'LAK' = 'LAKE CHARLES, LA (BPS)'
445 | 'MLU' = 'MONROE, LA'
446 | 'MGC' = 'MORGAN CITY, LA '
447 | 'NOL' = 'NEW ORLEANS, LA '
448 | 'BOS' = 'BOSTON, MA '
449 | 'GLO' = 'GLOUCESTER, MA '
450 | 'BED' = 'HANSCOM FIELD - BEDFORD, MA'
451 | 'LYN' = 'LYNDEN, WA '
452 | 'ADW' = 'ANDREWS AFB, MD'
453 | 'BAL' = 'BALTIMORE, MD '
454 | 'MKG' = 'MUSKEGON, MD'
455 | 'PAX' = 'PATUXENT RIVER, MD '
456 | 'BGM' = 'BANGOR, ME '
457 | 'BOO' = 'BOOTHBAY HARBOR, ME '
458 | 'BWM' = 'BRIDGEWATER, ME '
459 | 'BCK' = 'BUCKPORT, ME '
460 | 'CLS' = 'CALAIS, ME '
461 | 'CRB' = 'CARIBOU, ME '
462 | 'COB' = 'COBURN GORE, ME '
463 | 'EST' = 'EASTCOURT, ME '
464 | 'EPT' = 'EASTPORT MUNICIPAL, ME'
465 | 'EPM' = 'EASTPORT, ME '
466 | 'FOR' = 'FOREST CITY, ME '
467 | 'FTF' = 'FORT FAIRFIELD, ME '
468 | 'FTK' = 'FORT KENT, ME '
469 | 'HML' = 'HAMIIN, ME '
470 | 'HTM' = 'HOULTON, ME '
471 | 'JKM' = 'JACKMAN, ME '
472 | 'KAL' = 'KALISPEL, MT '
473 | 'LIM' = 'LIMESTONE, ME '
474 | 'LUB' = 'LUBEC, ME '
475 | 'MAD' = 'MADAWASKA, ME '
476 | 'POM' = 'PORTLAND, ME '
477 | 'RGM' = 'RANGELEY, ME (BPS)'
478 | 'SBR' = 'SOUTH BREWER, ME '
479 | 'SRL' = 'ST AURELIE, ME '
480 | 'SPA' = 'ST PAMPILE, ME '
481 | 'VNB' = 'VAN BUREN, ME '
482 | 'VCB' = 'VANCEBORO, ME '
483 | 'AGN' = 'ALGONAC, MI '
484 | 'ALP' = 'ALPENA, MI '
485 | 'BCY' = 'BAY CITY, MI '
486 | 'DET' = 'DETROIT, MI '
487 | 'GRP' = 'GRAND RAPIDS, MI'
488 | 'GRO' = 'GROSSE ISLE, MI '
489 | 'ISL' = 'ISLE ROYALE, MI '
490 | 'MRC' = 'MARINE CITY, MI '
491 | 'MRY' = 'MARYSVILLE, MI '
492 | 'PTK' = 'OAKLAND COUNTY - PONTIAC, MI'
493 | 'PHU' = 'PORT HURON, MI '
494 | 'RBT' = 'ROBERTS LANDING, MI '
495 | 'SAG' = 'SAGINAW, MI '
496 | 'SSM' = 'SAULT STE. MARIE, MI '
497 | 'SCL' = 'ST CLAIR, MI '
498 | 'YIP' = 'WILLOW RUN - YPSILANTI, MI'
499 | 'BAU' = 'BAUDETTE, MN '
500 | 'CAR' = 'CARIBOU MUNICIPAL AIRPORT, MN'
501 | 'GTF' = 'Collapsed into INT, MN'
502 | 'INL' = 'Collapsed into INT, MN'
503 | 'CRA' = 'CRANE LAKE, MN '
504 | 'MIC' = 'CRYSTAL MUNICIPAL AIRPORT, MN'
505 | 'DUL' = 'DULUTH, MN '
506 | 'ELY' = 'ELY, MN '
507 | 'GPM' = 'GRAND PORTAGE, MN '
508 | 'SVC' = 'GRANT COUNTY - SILVER CITY, MN'
509 | 'INT' = 'INT''L FALLS, MN '
510 | 'LAN' = 'LANCASTER, MN '
511 | 'MSP' = 'MINN./ST PAUL, MN '
512 | 'LIN' = 'NORTHERN SVC CENTER, MN '
513 | 'NOY' = 'NOYES, MN '
514 | 'PIN' = 'PINE CREEK, MN '
515 | '48Y' = 'PINECREEK BORDER ARPT, MN'
516 | 'RAN' = 'RAINER, MN '
517 | 'RST' = 'ROCHESTER, MN'
518 | 'ROS' = 'ROSEAU, MN '
519 | 'SPM' = 'ST PAUL, MN '
520 | 'WSB' = 'WARROAD INTL, SPB, MN'
521 | 'WAR' = 'WARROAD, MN '
522 | 'KAN' = 'KANSAS CITY, MO '
523 | 'SGF' = 'SPRINGFIELD-BRANSON, MO'
524 | 'STL' = 'ST LOUIS, MO '
525 | 'WHI' = 'WHITETAIL, MT '
526 | 'WHM' = 'WILD HORSE, MT '
527 | 'GPT' = 'BILOXI REGIONAL, MS'
528 | 'GTR' = 'GOLDEN TRIANGLE LOWNDES CNTY, MS'
529 | 'GUL' = 'GULFPORT, MS '
530 | 'PAS' = 'PASCAGOULA, MS '
531 | 'JAN' = 'THOMPSON FIELD - JACKSON, MS'
532 | 'BIL' = 'BILLINGS, MT '
533 | 'BTM' = 'BUTTE, MT '
534 | 'CHF' = 'CHIEF MT, MT '
535 | 'CTB' = 'CUT BANK MUNICIPAL, MT'
536 | 'CUT' = 'CUT BANK, MT '
537 | 'DLB' = 'DEL BONITA, MT '
538 | 'EUR' = 'EUREKA, MT (BPS)'
539 | 'BZN' = 'GALLATIN FIELD - BOZEMAN, MT'
540 | 'FCA' = 'GLACIER NATIONAL PARK, MT'
541 | 'GGW' = 'GLASGOW, MT '
542 | 'GRE' = 'GREAT FALLS, MT '
543 | 'HVR' = 'HAVRE, MT '
544 | 'HEL' = 'HELENA, MT '
545 | 'LWT' = 'LEWISTON, MT '
546 | 'MGM' = 'MORGAN, MT '
547 | 'OPH' = 'OPHEIM, MT '
548 | 'PIE' = 'PIEGAN, MT '
549 | 'RAY' = 'RAYMOND, MT '
550 | 'ROO' = 'ROOSVILLE, MT '
551 | 'SCO' = 'SCOBEY, MT '
552 | 'SWE' = 'SWEETGTASS, MT '
553 | 'TRL' = 'TRIAL CREEK, MT '
554 | 'TUR' = 'TURNER, MT '
555 | 'WCM' = 'WILLOW CREEK, MT '
556 | 'CLT' = 'CHARLOTTE, NC '
557 | 'FAY' = 'FAYETTEVILLE, NC'
558 | 'MRH' = 'MOREHEAD CITY, NC '
559 | 'FOP' = 'MORRIS FIELDS AAF, NC'
560 | 'GSO' = 'PIEDMONT TRIAD INTL AIRPORT, NC'
561 | 'RDU' = 'RALEIGH/DURHAM, NC '
562 | 'SSC' = 'SHAW AFB - SUMTER, NC'
563 | 'WIL' = 'WILMINGTON, NC '
564 | 'AMB' = 'AMBROSE, ND '
565 | 'ANT' = 'ANTLER, ND '
566 | 'CRY' = 'CARBURY, ND '
567 | 'DNS' = 'DUNSEITH, ND '
568 | 'FAR' = 'FARGO, ND '
569 | 'FRT' = 'FORTUNA, ND '
570 | 'GRF' = 'GRAND FORKS, ND '
571 | 'HNN' = 'HANNAH, ND '
572 | 'HNS' = 'HANSBORO, ND '
573 | 'MAI' = 'MAIDA, ND '
574 | 'MND' = 'MINOT, ND '
575 | 'NEC' = 'NECHE, ND '
576 | 'NOO' = 'NOONAN, ND '
577 | 'NRG' = 'NORTHGATE, ND '
578 | 'PEM' = 'PEMBINA, ND '
579 | 'SAR' = 'SARLES, ND '
580 | 'SHR' = 'SHERWOOD, ND '
581 | 'SJO' = 'ST JOHN, ND '
582 | 'WAL' = 'WALHALLA, ND '
583 | 'WHO' = 'WESTHOPE, ND '
584 | 'WND' = 'WILLISTON, ND '
585 | 'OMA' = 'OMAHA, NE '
586 | 'LEB' = 'LEBANON, NH '
587 | 'MHT' = 'MANCHESTER, NH'
588 | 'PNH' = 'PITTSBURG, NH '
589 | 'PSM' = 'PORTSMOUTH, NH '
590 | 'BYO' = 'BAYONNE, NJ '
591 | 'CNJ' = 'CAMDEN, NJ '
592 | 'HOB' = 'HOBOKEN, NJ '
593 | 'JER' = 'JERSEY CITY, NJ '
594 | 'WRI' = 'MC GUIRE AFB - WRIGHTSOWN, NJ'
595 | 'MMU' = 'MORRISTOWN, NJ'
596 | 'NEW' = 'NEWARK/TETERBORO, NJ '
597 | 'PER' = 'PERTH AMBOY, NJ '
598 | 'ACY' = 'POMONA FIELD - ATLANTIC CITY, NJ'
599 | 'ALA' = 'ALAMAGORDO, NM (BPS)'
600 | 'ABQ' = 'ALBUQUERQUE, NM '
601 | 'ANP' = 'ANTELOPE WELLS, NM '
602 | 'CRL' = 'CARLSBAD, NM '
603 | 'COL' = 'COLUMBUS, NM '
604 | 'CDD' = 'CRANE LAKE - ST. LOUIS CNTY, NM'
605 | 'DNM' = 'DEMING, NM (BPS)'
606 | 'LAS' = 'LAS CRUCES, NM '
607 | 'LOB' = 'LORDSBURG, NM (BPS)'
608 | 'RUI' = 'RUIDOSO, NM'
609 | 'STR' = 'SANTA TERESA, NM '
610 | 'RNO' = 'CANNON INTL - RENO/TAHOE, NV'
611 | 'FLX' = 'FALLON MUNICIPAL AIRPORT, NV'
612 | 'LVG' = 'LAS VEGAS, NV '
613 | 'REN' = 'RENO, NV '
614 | 'ALB' = 'ALBANY, NY '
615 | 'AXB' = 'ALEXANDRIA BAY, NY '
616 | 'BUF' = 'BUFFALO, NY '
617 | 'CNH' = 'CANNON CORNERS, NY'
618 | 'CAP' = 'CAPE VINCENT, NY '
619 | 'CHM' = 'CHAMPLAIN, NY '
620 | 'CHT' = 'CHATEAUGAY, NY '
621 | 'CLA' = 'CLAYTON, NY '
622 | 'FTC' = 'FORT COVINGTON, NY '
623 | 'LAG' = 'LA GUARDIA, NY '
624 | 'LEW' = 'LEWISTON, NY '
625 | 'MAS' = 'MASSENA, NY '
626 | 'MAG' = 'MCGUIRE AFB, NY '
627 | 'MOO' = 'MOORES, NY '
628 | 'MRR' = 'MORRISTOWN, NY '
629 | 'NYC' = 'NEW YORK, NY '
630 | 'NIA' = 'NIAGARA FALLS, NY '
631 | 'OGD' = 'OGDENSBURG, NY '
632 | 'OSW' = 'OSWEGO, NY '
633 | 'ELM' = 'REGIONAL ARPT - HORSEHEAD, NY'
634 | 'ROC' = 'ROCHESTER, NY '
635 | 'ROU' = 'ROUSES POINT, NY '
636 | 'SWF' = 'STEWART - ORANGE CNTY, NY'
637 | 'SYR' = 'SYRACUSE, NY '
638 | 'THO' = 'THOUSAND ISLAND BRIDGE, NY'
639 | 'TRO' = 'TROUT RIVER, NY '
640 | 'WAT' = 'WATERTOWN, NY '
641 | 'HPN' = 'WESTCHESTER - WHITE PLAINS, NY'
642 | 'WRB' = 'WHIRLPOOL BRIDGE, NY'
643 | 'YOU' = 'YOUNGSTOWN, NY '
644 | 'AKR' = 'AKRON, OH '
645 | 'ATB' = 'ASHTABULA, OH '
646 | 'CIN' = 'CINCINNATI, OH '
647 | 'CLE' = 'CLEVELAND, OH '
648 | 'CLM' = 'COLUMBUS, OH '
649 | 'LOR' = 'LORAIN, OH '
650 | 'MBO' = 'MARBLE HEADS, OH '
651 | 'SDY' = 'SANDUSKY, OH '
652 | 'TOL' = 'TOLEDO, OH '
653 | 'OKC' = 'OKLAHOMA CITY, OK '
654 | 'TUL' = 'TULSA, OK'
655 | 'AST' = 'ASTORIA, OR '
656 | 'COO' = 'COOS BAY, OR '
657 | 'HIO' = 'HILLSBORO, OR'
658 | 'MED' = 'MEDFORD, OR '
659 | 'NPT' = 'NEWPORT, OR '
660 | 'POO' = 'PORTLAND, OR '
661 | 'PUT' = 'PUT-IN-BAY, OH '
662 | 'RDM' = 'ROBERTS FIELDS - REDMOND, OR'
663 | 'ERI' = 'ERIE, PA '
664 | 'MDT' = 'HARRISBURG, PA'
665 | 'HSB' = 'HARRISONBURG, PA '
666 | 'PHI' = 'PHILADELPHIA, PA '
667 | 'PIT' = 'PITTSBURG, PA '
668 | 'AGU' = 'AGUADILLA, PR '
669 | 'BQN' = 'BORINQUEN - AGUADILLO, PR'
670 | 'JCP' = 'CULEBRA - BENJAMIN RIVERA, PR'
671 | 'ENS' = 'ENSENADA, PR '
672 | 'FAJ' = 'FAJARDO, PR '
673 | 'HUM' = 'HUMACAO, PR '
674 | 'JOB' = 'JOBOS, PR '
675 | 'MAY' = 'MAYAGUEZ, PR '
676 | 'PON' = 'PONCE, PR '
677 | 'PSE' = 'PONCE-MERCEDITA, PR'
678 | 'SAJ' = 'SAN JUAN, PR '
679 | 'VQS' = 'VIEQUES-ARPT, PR'
680 | 'PRO' = 'PROVIDENCE, RI '
681 | 'PVD' = 'THEODORE FRANCIS - WARWICK, RI'
682 | 'CHL' = 'CHARLESTON, SC '
683 | 'CAE' = 'COLUMBIA, SC #ARPT'
684 | 'GEO' = 'GEORGETOWN, SC '
685 | 'GSP' = 'GREENVILLE, SC'
686 | 'GRR' = 'GREER, SC'
687 | 'MYR' = 'MYRTLE BEACH, SC'
688 | 'SPF' = 'BLACK HILLS, SPEARFISH, SD'
689 | 'HON' = 'HOWES REGIONAL ARPT - HURON, SD'
690 | 'SAI' = 'SAIPAN, SPN '
691 | 'TYS' = 'MC GHEE TYSON - ALCOA, TN'
692 | 'MEM' = 'MEMPHIS, TN '
693 | 'NSV' = 'NASHVILLE, TN '
694 | 'TRI' = 'TRI CITY ARPT, TN'
695 | 'ADS' = 'ADDISON AIRPORT- ADDISON, TX'
696 | 'ADT' = 'AMISTAD DAM, TX '
697 | 'ANZ' = 'ANZALDUAS, TX'
698 | 'AUS' = 'AUSTIN, TX '
699 | 'BEA' = 'BEAUMONT, TX '
700 | 'BBP' = 'BIG BEND PARK, TX (BPS)'
701 | 'SCC' = 'BP SPEC COORD. CTR, TX'
702 | 'BTC' = 'BP TACTICAL UNIT, TX '
703 | 'BOA' = 'BRIDGE OF AMERICAS, TX'
704 | 'BRO' = 'BROWNSVILLE, TX '
705 | 'CRP' = 'CORPUS CHRISTI, TX '
706 | 'DAL' = 'DALLAS, TX '
707 | 'DLR' = 'DEL RIO, TX '
708 | 'DNA' = 'DONNA, TX'
709 | 'EGP' = 'EAGLE PASS, TX '
710 | 'ELP' = 'EL PASO, TX '
711 | 'FAB' = 'FABENS, TX '
712 | 'FAL' = 'FALCON HEIGHTS, TX '
713 | 'FTH' = 'FORT HANCOCK, TX '
714 | 'AFW' = 'FORT WORTH ALLIANCE, TX'
715 | 'FPT' = 'FREEPORT, TX '
716 | 'GAL' = 'GALVESTON, TX '
717 | 'HLG' = 'HARLINGEN, TX '
718 | 'HID' = 'HIDALGO, TX '
719 | 'HOU' = 'HOUSTON, TX '
720 | 'SGR' = 'HULL FIELD, SUGAR LAND ARPT, TX'
721 | 'LLB' = 'JUAREZ-LINCOLN BRIDGE, TX'
722 | 'LCB' = 'LAREDO COLUMBIA BRIDGE, TX'
723 | 'LRN' = 'LAREDO NORTH, TX '
724 | 'LAR' = 'LAREDO, TX '
725 | 'LSE' = 'LOS EBANOS, TX '
726 | 'IND' = 'LOS INDIOS, TX'
727 | 'LOI' = 'LOS INDIOS, TX '
728 | 'MRS' = 'MARFA, TX (BPS)'
729 | 'MCA' = 'MCALLEN, TX '
730 | 'MAF' = 'ODESSA REGIONAL, TX'
731 | 'PDN' = 'PASO DEL NORTE,TX '
732 | 'PBB' = 'PEACE BRIDGE, NY '
733 | 'PHR' = 'PHARR, TX '
734 | 'PAR' = 'PORT ARTHUR, TX '
735 | 'ISB' = 'PORT ISABEL, TX '
736 | 'POE' = 'PORT OF EL PASO, TX '
737 | 'PRE' = 'PRESIDIO, TX '
738 | 'PGR' = 'PROGRESO, TX '
739 | 'RIO' = 'RIO GRANDE CITY, TX '
740 | 'ROM' = 'ROMA, TX '
741 | 'SNA' = 'SAN ANTONIO, TX '
742 | 'SNN' = 'SANDERSON, TX '
743 | 'VIB' = 'VETERAN INTL BRIDGE, TX'
744 | 'YSL' = 'YSLETA, TX '
745 | 'CHA' = 'CHARLOTTE AMALIE, VI '
746 | 'CHR' = 'CHRISTIANSTED, VI '
747 | 'CRU' = 'CRUZ BAY, ST JOHN, VI '
748 | 'FRK' = 'FREDERIKSTED, VI '
749 | 'STT' = 'ST THOMAS, VI '
750 | 'LGU' = 'CACHE AIRPORT - LOGAN, UT'
751 | 'SLC' = 'SALT LAKE CITY, UT '
752 | 'CHO' = 'ALBEMARLE CHARLOTTESVILLE, VA'
753 | 'DAA' = 'DAVISON AAF - FAIRFAX CNTY, VA'
754 | 'HOP' = 'HOPEWELL, VA '
755 | 'HEF' = 'MANASSAS, VA #ARPT'
756 | 'NWN' = 'NEWPORT, VA '
757 | 'NOR' = 'NORFOLK, VA '
758 | 'RCM' = 'RICHMOND, VA '
759 | 'ABS' = 'ALBURG SPRINGS, VT '
760 | 'ABG' = 'ALBURG, VT '
761 | 'BEB' = 'BEEBE PLAIN, VT '
762 | 'BEE' = 'BEECHER FALLS, VT '
763 | 'BRG' = 'BURLINGTON, VT '
764 | 'CNA' = 'CANAAN, VT '
765 | 'DER' = 'DERBY LINE, VT (I-91) '
766 | 'DLV' = 'DERBY LINE, VT (RT. 5)'
767 | 'ERC' = 'EAST RICHFORD, VT '
768 | 'HIG' = 'HIGHGATE SPRINGS, VT '
769 | 'MOR' = 'MORSES LINE, VT '
770 | 'NPV' = 'NEWPORT, VT '
771 | 'NRT' = 'NORTH TROY, VT '
772 | 'NRN' = 'NORTON, VT '
773 | 'PIV' = 'PINNACLE ROAD, VT '
774 | 'RIF' = 'RICHFORT, VT '
775 | 'STA' = 'ST ALBANS, VT '
776 | 'SWB' = 'SWANTON, VT (BP - SECTOR HQ)'
777 | 'WBE' = 'WEST BERKSHIRE, VT '
778 | 'ABE' = 'ABERDEEN, WA '
779 | 'ANA' = 'ANACORTES, WA '
780 | 'BEL' = 'BELLINGHAM, WA '
781 | 'BLI' = 'BELLINGHAM, WASHINGTON #INTL'
782 | 'BLA' = 'BLAINE, WA '
783 | 'BWA' = 'BOUNDARY, WA '
784 | 'CUR' = 'CURLEW, WA (BPS)'
785 | 'DVL' = 'DANVILLE, WA '
786 | 'EVE' = 'EVERETT, WA '
787 | 'FER' = 'FERRY, WA '
788 | 'FRI' = 'FRIDAY HARBOR, WA '
789 | 'FWA' = 'FRONTIER, WA '
790 | 'KLM' = 'KALAMA, WA '
791 | 'LAU' = 'LAURIER, WA '
792 | 'LON' = 'LONGVIEW, WA '
793 | 'MET' = 'METALINE FALLS, WA '
794 | 'MWH' = 'MOSES LAKE GRANT COUNTY ARPT, WA'
795 | 'NEA' = 'NEAH BAY, WA '
796 | 'NIG' = 'NIGHTHAWK, WA '
797 | 'OLY' = 'OLYMPIA, WA '
798 | 'ORO' = 'OROVILLE, WA '
799 | 'PWB' = 'PASCO, WA '
800 | 'PIR' = 'POINT ROBERTS, WA '
801 | 'PNG' = 'PORT ANGELES, WA '
802 | 'PTO' = 'PORT TOWNSEND, WA '
803 | 'SEA' = 'SEATTLE, WA '
804 | 'SPO' = 'SPOKANE, WA '
805 | 'SUM' = 'SUMAS, WA '
806 | 'TAC' = 'TACOMA, WA '
807 | 'PSC' = 'TRI-CITIES - PASCO, WA'
808 | 'VAN' = 'VANCOUVER, WA '
809 | 'AGM' = 'ALGOMA, WI '
810 | 'BAY' = 'BAYFIELD, WI '
811 | 'GRB' = 'GREEN BAY, WI '
812 | 'MNW' = 'MANITOWOC, WI '
813 | 'MIL' = 'MILWAUKEE, WI '
814 | 'MSN' = 'TRUAX FIELD - DANE COUNTY, WI'
815 | 'CHS' = 'CHARLESTON, WV '
816 | 'CLK' = 'CLARKSBURG, WV '
817 | 'BLF' = 'MERCER COUNTY, WV'
818 | 'CSP' = 'CASPER, WY '
819 | 'XXX' = 'NOT REPORTED/UNKNOWN '
820 | '888' = 'UNIDENTIFED AIR / SEAPORT'
821 | 'UNK' = 'UNKNOWN POE '
822 | 'CLG' = 'CALGARY, CANADA '
823 | 'EDA' = 'EDMONTON, CANADA '
824 | 'YHC' = 'HAKAI PASS, CANADA'
825 | 'HAL' = 'Halifax, NS, Canada '
826 | 'MON' = 'MONTREAL, CANADA '
827 | 'OTT' = 'OTTAWA, CANADA '
828 | 'YXE' = 'SASKATOON, CANADA'
829 | 'TOR' = 'TORONTO, CANADA '
830 | 'VCV' = 'VANCOUVER, CANADA '
831 | 'VIC' = 'VICTORIA, CANADA '
832 | 'WIN' = 'WINNIPEG, CANADA '
833 | 'AMS' = 'AMSTERDAM-SCHIPHOL, NETHERLANDS'
834 | 'ARB' = 'ARUBA, NETH ANTILLES '
835 | 'BAN' = 'BANKOK, THAILAND '
836 | 'BEI' = 'BEICA #ARPT, ETHIOPIA'
837 | 'PEK' = 'BEIJING CAPITAL INTL, PRC'
838 | 'BDA' = 'KINDLEY FIELD, BERMUDA'
839 | 'BOG' = 'BOGOTA, EL DORADO #ARPT, COLOMBIA'
840 | 'EZE' = 'BUENOS AIRES, MINISTRO PIST, ARGENTINA'
841 | 'CUN' = 'CANCUN, MEXICO'
842 | 'CRQ' = 'CARAVELAS, BA #ARPT, BRAZIL'
843 | 'MVD' = 'CARRASCO, URUGUAY'
844 | 'DUB' = 'DUBLIN, IRELAND '
845 | 'FOU' = 'FOUGAMOU #ARPT, GABON'
846 | 'FBA' = 'FREEPORT, BAHAMAS '
847 | 'MTY' = 'GEN M. ESCOBEDO, Monterrey, MX'
848 | 'HMO' = 'GEN PESQUEIRA GARCIA, MX'
849 | 'GCM' = 'GRAND CAYMAN, CAYMAN ISLAND'
850 | 'GDL' = 'GUADALAJARA, MIGUEL HIDAL, MX'
851 | 'HAM' = 'HAMILTON, BERMUDA '
852 | 'ICN' = 'INCHON, SEOUL KOREA'
853 | 'IWA' = 'INVALID - IWAKUNI, JAPAN'
854 | 'CND' = 'KOGALNICEANU, ROMANIA'
855 | 'LAH' = 'LABUHA ARPT, INDONESIA'
856 | 'DUR' = 'LOUIS BOTHA, SOUTH AFRICA'
857 | 'MAL' = 'MANGOLE ARPT, INDONESIA'
858 | 'MDE' = 'MEDELLIN, COLOMBIA'
859 | 'MEX' = 'JUAREZ INTL, MEXICO CITY, MX'
860 | 'LHR' = 'MIDDLESEX, ENGLAND'
861 | 'NBO' = 'NAIROBI, KENYA '
862 | 'NAS' = 'NASSAU, BAHAMAS '
863 | 'NCA' = 'NORTH CAICOS, TURK & CAIMAN'
864 | 'PTY' = 'OMAR TORRIJOS, PANAMA'
865 | 'SPV' = 'PAPUA, NEW GUINEA'
866 | 'UIO' = 'QUITO (MARISCAL SUCR), ECUADOR'
867 | 'RIT' = 'ROME, ITALY '
868 | 'SNO' = 'SAKON NAKHON #ARPT, THAILAND'
869 | 'SLP' = 'SAN LUIS POTOSI #ARPT, MEXICO'
870 | 'SAN' = 'SAN SALVADOR, EL SALVADOR'
871 | 'SRO' = 'SANTANA RAMOS #ARPT, COLOMBIA'
872 | 'GRU' = 'GUARULHOS INTL, SAO PAULO, BRAZIL'
873 | 'SHA' = 'SHANNON, IRELAND '
874 | 'HIL' = 'SHILLAVO, ETHIOPIA'
875 | 'TOK' = 'TOROKINA #ARPT, PAPUA, NEW GUINEA'
876 | 'VER' = 'VERACRUZ, MEXICO'
877 | 'LGW' = 'WEST SUSSEX, ENGLAND '
878 | 'ZZZ' = 'MEXICO Land (Banco de Mexico) '
879 | 'CHN' = 'No PORT Code (CHN)'
880 | 'CNC' = 'CANNON CORNERS, NY'
881 | 'MAA' = 'Abu Dhabi'
882 | 'AG0' = 'MAGNOLIA, AR'
883 | 'BHM' = 'BAR HARBOR, ME'
884 | 'BHX' = 'BIRMINGHAM, AL'
885 | 'CAK' = 'AKRON, OH'
886 | 'FOK' = 'SUFFOLK COUNTY, NY'
887 | 'LND' = 'LANDER, WY'
888 | 'MAR' = 'MARFA, TX'
889 | 'MLI' = 'MOLINE, IL'
890 | 'RIV' = 'RIVERSIDE, CA'
891 | 'RME' = 'ROME, NY'
892 | 'VNY' = 'VAN NUYS, CA'
893 | 'YUM' = 'YUMA, AZ'
894 | 'FRG' = 'Collapsed (FOK) 06/15'
895 | 'HRL' = 'Collapsed (HLG) 06/15'
896 | 'ISP' = 'Collapsed (FOK) 06/15'
897 | 'JSJ' = 'Collapsed (SAJ) 06/15'
898 | 'BUS' = 'Collapsed (BUF) 06/15'
899 | 'IAG' = 'Collapsed (NIA) 06/15'
900 | 'PHN' = 'Collapsed (PHU) 06/15'
901 | 'STN' = 'Collapsed (STR) 06/15'
902 | 'VMB' = 'Collapsed (VNB) 06/15'
903 | 'T01' = 'Collapsed (SEA) 06/15'
904 | 'PHF' = 'No PORT Code (PHF)'
905 | 'DRV' = 'No PORT Code (DRV)'
906 | 'FTB' = 'No PORT Code (FTB)'
907 | 'GAC' = 'No PORT Code (GAC)'
908 | 'GMT' = 'No PORT Code (GMT)'
909 | 'JFA' = 'No PORT Code (JFA)'
910 | 'JMZ' = 'No PORT Code (JMZ)'
911 | 'NC8' = 'No PORT Code (NC8)'
912 | 'NYL' = 'No PORT Code (NYL)'
913 | 'OAI' = 'No PORT Code (OAI)'
914 | 'PCW' = 'No PORT Code (PCW)'
915 | 'WA5' = 'No PORT Code (WAS)'
916 | 'WTR' = 'No PORT Code (WTR)'
917 | 'X96' = 'No PORT Code (X96)'
918 | 'XNA' = 'No PORT Code (XNA)'
919 | 'YGF' = 'No PORT Code (YGF)'
920 | '5T6' = 'No PORT Code (5T6)'
921 | '060' = 'No PORT Code (60)'
922 | 'SP0' = 'No PORT Code (SP0)'
923 | 'W55' = 'No PORT Code (W55)'
924 | 'X44' = 'No PORT Code (X44)'
925 | 'AUH' = 'No PORT Code (AUH)'
926 | 'RYY' = 'No PORT Code (RYY)'
927 | 'SUS' = 'No PORT Code (SUS)'
928 | '74S' = 'No PORT Code (74S)'
929 | 'ATW' = 'No PORT Code (ATW)'
930 | 'CPX' = 'No PORT Code (CPX)'
931 | 'MTH' = 'No PORT Code (MTH)'
932 | 'PFN' = 'No PORT Code (PFN)'
933 | 'SCH' = 'No PORT Code (SCH)'
934 | 'ASI' = 'No PORT Code (ASI)'
935 | 'BKF' = 'No PORT Code (BKF)'
936 | 'DAY' = 'No PORT Code (DAY)'
937 | 'Y62' = 'No PORT Code (Y62)'
938 | 'AG' = 'No PORT Code (AG)'
939 | 'BCM' = 'No PORT Code (BCM)'
940 | 'DEC' = 'No PORT Code (DEC)'
941 | 'PLB' = 'No PORT Code (PLB)'
942 | 'CXO' = 'No PORT Code (CXO)'
943 | 'JBQ' = 'No PORT Code (JBQ)'
944 | 'JIG' = 'No PORT Code (JIG)'
945 | 'OGS' = 'No PORT Code (OGS)'
946 | 'TIW' = 'No PORT Code (TIW)'
947 | 'OTS' = 'No PORT Code (OTS)'
948 | 'AMT' = 'No PORT Code (AMT)'
949 | 'EGE' = 'No PORT Code (EGE)'
950 | 'GPI' = 'No PORT Code (GPI)'
951 | 'NGL' = 'No PORT Code (NGL)'
952 | 'OLM' = 'No PORT Code (OLM)'
953 | '.GA' = 'No PORT Code (.GA)'
954 | 'CLX' = 'No PORT Code (CLX)'
955 | 'CP ' = 'No PORT Code (CP)'
956 | 'FSC' = 'No PORT Code (FSC)'
957 | 'NK' = 'No PORT Code (NK)'
958 | 'ADU' = 'No PORT Code (ADU)'
959 | 'AKT' = 'No PORT Code (AKT)'
960 | 'LIT' = 'No PORT Code (LIT)'
961 | 'A2A' = 'No PORT Code (A2A)'
962 | 'OSN' = 'No PORT Code (OSN)'
963 | ;
964 |
965 |
966 | /* ARRDATE is the Arrival Date in the USA. It is a SAS date numeric field that a
967 | permament format has not been applied. Please apply whichever date format
968 | works for you. */
969 |
970 |
971 | /* I94MODE - There are missing values as well as not reported (9) */
972 | value i94model
973 | 1 = 'Air'
974 | 2 = 'Sea'
975 | 3 = 'Land'
976 | 9 = 'Not reported' ;
977 |
978 |
979 | /* I94ADDR - There is lots of invalid codes in this variable and the list below
980 | shows what we have found to be valid, everything else goes into 'other' */
981 | value i94addrl
982 | 'AL'='ALABAMA'
983 | 'AK'='ALASKA'
984 | 'AZ'='ARIZONA'
985 | 'AR'='ARKANSAS'
986 | 'CA'='CALIFORNIA'
987 | 'CO'='COLORADO'
988 | 'CT'='CONNECTICUT'
989 | 'DE'='DELAWARE'
990 | 'DC'='DIST. OF COLUMBIA'
991 | 'FL'='FLORIDA'
992 | 'GA'='GEORGIA'
993 | 'GU'='GUAM'
994 | 'HI'='HAWAII'
995 | 'ID'='IDAHO'
996 | 'IL'='ILLINOIS'
997 | 'IN'='INDIANA'
998 | 'IA'='IOWA'
999 | 'KS'='KANSAS'
1000 | 'KY'='KENTUCKY'
1001 | 'LA'='LOUISIANA'
1002 | 'ME'='MAINE'
1003 | 'MD'='MARYLAND'
1004 | 'MA'='MASSACHUSETTS'
1005 | 'MI'='MICHIGAN'
1006 | 'MN'='MINNESOTA'
1007 | 'MS'='MISSISSIPPI'
1008 | 'MO'='MISSOURI'
1009 | 'MT'='MONTANA'
1010 | 'NC'='N. CAROLINA'
1011 | 'ND'='N. DAKOTA'
1012 | 'NE'='NEBRASKA'
1013 | 'NV'='NEVADA'
1014 | 'NH'='NEW HAMPSHIRE'
1015 | 'NJ'='NEW JERSEY'
1016 | 'NM'='NEW MEXICO'
1017 | 'NY'='NEW YORK'
1018 | 'OH'='OHIO'
1019 | 'OK'='OKLAHOMA'
1020 | 'OR'='OREGON'
1021 | 'PA'='PENNSYLVANIA'
1022 | 'PR'='PUERTO RICO'
1023 | 'RI'='RHODE ISLAND'
1024 | 'SC'='S. CAROLINA'
1025 | 'SD'='S. DAKOTA'
1026 | 'TN'='TENNESSEE'
1027 | 'TX'='TEXAS'
1028 | 'UT'='UTAH'
1029 | 'VT'='VERMONT'
1030 | 'VI'='VIRGIN ISLANDS'
1031 | 'VA'='VIRGINIA'
1032 | 'WV'='W. VIRGINIA'
1033 | 'WA'='WASHINGTON'
1034 | 'WI'='WISCONSON'
1035 | 'WY'='WYOMING'
1036 | '99'='All Other Codes' ;
1037 |
1038 | /* DEPDATE is the Departure Date from the USA. It is a SAS date numeric field that
1039 | a permament format has not been applied. Please apply whichever date format
1040 | works for you. */
1041 |
1042 |
1043 | /* I94BIR - Age of Respondent in Years */
1044 |
1045 |
1046 | /* I94VISA - Visa codes collapsed into three categories:
1047 | 1 = Business
1048 | 2 = Pleasure
1049 | 3 = Student
1050 | */
1051 |
1052 |
1053 | /* COUNT - Used for summary statistics */
1054 |
1055 |
1056 | /* DTADFILE - Character Date Field - Date added to I-94 Files - CIC does not use */
1057 |
1058 |
1059 | /* VISAPOST - Department of State where where Visa was issued - CIC does not use */
1060 |
1061 |
1062 | /* OCCUP - Occupation that will be performed in U.S. - CIC does not use */
1063 |
1064 |
1065 | /* ENTDEPA - Arrival Flag - admitted or paroled into the U.S. - CIC does not use */
1066 |
1067 |
1068 | /* ENTDEPD - Departure Flag - Departed, lost I-94 or is deceased - CIC does not use */
1069 |
1070 |
1071 | /* ENTDEPU - Update Flag - Either apprehended, overstayed, adjusted to perm residence - CIC does not use */
1072 |
1073 |
1074 | /* MATFLAG - Match flag - Match of arrival and departure records */
1075 |
1076 |
1077 | /* BIRYEAR - 4 digit year of birth */
1078 |
1079 |
1080 | /* DTADDTO - Character Date Field - Date to which admitted to U.S. (allowed to stay until) - CIC does not use */
1081 |
1082 |
1083 | /* GENDER - Non-immigrant sex */
1084 |
1085 |
1086 | /* INSNUM - INS number */
1087 |
1088 |
1089 | /* AIRLINE - Airline used to arrive in U.S. */
1090 |
1091 |
1092 | /* ADMNUM - Admission Number */
1093 |
1094 |
1095 | /* FLTNO - Flight number of Airline used to arrive in U.S. */
1096 |
1097 |
1098 | /* VISATYPE - Class of admission legally admitting the non-immigrant to temporarily stay in U.S. */
1099 | run ;
1100 |
1101 |
--------------------------------------------------------------------------------
/airflow/dags/temp_output/i94addr.csv:
--------------------------------------------------------------------------------
1 | state_code,state_name
2 | AL,Alabama
3 | AK,Alaska
4 | AZ,Arizona
5 | AR,Arkansas
6 | CA,California
7 | CO,Colorado
8 | CT,Connecticut
9 | DE,Delaware
10 | DC,Dist. Of Columbia
11 | FL,Florida
12 | GA,Georgia
13 | GU,Guam
14 | HI,Hawaii
15 | ID,Idaho
16 | IL,Illinois
17 | IN,Indiana
18 | IA,Iowa
19 | KS,Kansas
20 | KY,Kentucky
21 | LA,Louisiana
22 | ME,Maine
23 | MD,Maryland
24 | MA,Massachusetts
25 | MI,Michigan
26 | MN,Minnesota
27 | MS,Mississippi
28 | MO,Missouri
29 | MT,Montana
30 | NC,N. Carolina
31 | ND,N. Dakota
32 | NE,Nebraska
33 | NV,Nevada
34 | NH,New Hampshire
35 | NJ,New Jersey
36 | NM,New Mexico
37 | NY,New York
38 | OH,Ohio
39 | OK,Oklahoma
40 | OR,Oregon
41 | PA,Pennsylvania
42 | PR,Puerto Rico
43 | RI,Rhode Island
44 | SC,S. Carolina
45 | SD,S. Dakota
46 | TN,Tennessee
47 | TX,Texas
48 | UT,Utah
49 | VT,Vermont
50 | VI,Virgin Islands
51 | VA,Virginia
52 | WV,W. Virginia
53 | WA,Washington
54 | WI,Wisconson
55 | WY,Wyoming
56 | 99,All Other Codes
57 |
--------------------------------------------------------------------------------
/airflow/dags/temp_output/i94cit&i94res.csv:
--------------------------------------------------------------------------------
1 | country_code,country_name
2 | 582,"Mexico Air Sea, And Not Reported (I-94, No Land Arrivals)"
3 | 236,Afghanistan
4 | 101,Albania
5 | 316,Algeria
6 | 102,Andorra
7 | 324,Angola
8 | 529,Anguilla
9 | 518,Antigua-Barbuda
10 | 687,Argentina
11 | 151,Armenia
12 | 532,Aruba
13 | 438,Australia
14 | 103,Austria
15 | 152,Azerbaijan
16 | 512,Bahamas
17 | 298,Bahrain
18 | 274,Bangladesh
19 | 513,Barbados
20 | 104,Belgium
21 | 581,Belize
22 | 386,Benin
23 | 509,Bermuda
24 | 153,Belarus
25 | 242,Bhutan
26 | 688,Bolivia
27 | 717,"Bonaire, St Eustatius, Saba"
28 | 164,Bosnia-Herzegovina
29 | 336,Botswana
30 | 689,Brazil
31 | 525,British Virgin Islands
32 | 217,Brunei
33 | 105,Bulgaria
34 | 393,Burkina Faso
35 | 243,Burma
36 | 375,Burundi
37 | 310,Cameroon
38 | 326,Cape Verde
39 | 526,Cayman Islands
40 | 383,Central African Republic
41 | 384,Chad
42 | 690,Chile
43 | 245,"China, Prc"
44 | 721,Curacao
45 | 270,Christmas Island
46 | 271,Cocos Islands
47 | 691,Colombia
48 | 317,Comoros
49 | 385,Congo
50 | 467,Cook Islands
51 | 575,Costa Rica
52 | 165,Croatia
53 | 584,Cuba
54 | 218,Cyprus
55 | 140,Czech Republic
56 | 723,Faroe Islands (Part Of Denmark)
57 | 108,Denmark
58 | 322,Djibouti
59 | 519,Dominica
60 | 585,Dominican Republic
61 | 240,East Timor
62 | 692,Ecuador
63 | 368,Egypt
64 | 576,El Salvador
65 | 399,Equatorial Guinea
66 | 372,Eritrea
67 | 109,Estonia
68 | 369,Ethiopia
69 | 604,Falkland Islands
70 | 413,Fiji
71 | 110,Finland
72 | 111,France
73 | 601,French Guiana
74 | 411,French Polynesia
75 | 387,Gabon
76 | 338,Gambia
77 | 758,Gaza Strip
78 | 154,Georgia
79 | 112,Germany
80 | 339,Ghana
81 | 143,Gibraltar
82 | 113,Greece
83 | 520,Grenada
84 | 507,Guadeloupe
85 | 577,Guatemala
86 | 382,Guinea
87 | 327,Guinea-Bissau
88 | 603,Guyana
89 | 586,Haiti
90 | 726,Heard And Mcdonald Is.
91 | 149,Holy See/Vatican
92 | 528,Honduras
93 | 206,Hong Kong
94 | 114,Hungary
95 | 115,Iceland
96 | 213,India
97 | 759,Indian Ocean Areas (French)
98 | 729,Indian Ocean Territory
99 | 204,Indonesia
100 | 249,Iran
101 | 250,Iraq
102 | 116,Ireland
103 | 251,Israel
104 | 117,Italy
105 | 388,Ivory Coast
106 | 514,Jamaica
107 | 209,Japan
108 | 253,Jordan
109 | 201,Kampuchea
110 | 155,Kazakhstan
111 | 340,Kenya
112 | 414,Kiribati
113 | 732,Kosovo
114 | 272,Kuwait
115 | 156,Kyrgyzstan
116 | 203,Laos
117 | 118,Latvia
118 | 255,Lebanon
119 | 335,Lesotho
120 | 370,Liberia
121 | 381,Libya
122 | 119,Liechtenstein
123 | 120,Lithuania
124 | 121,Luxembourg
125 | 214,Macau
126 | 167,Macedonia
127 | 320,Madagascar
128 | 345,Malawi
129 | 273,Malaysia
130 | 220,Maldives
131 | 392,Mali
132 | 145,Malta
133 | 472,Marshall Islands
134 | 511,Martinique
135 | 389,Mauritania
136 | 342,Mauritius
137 | 760,Mayotte (Africa - French)
138 | 473,"Micronesia, Fed. States Of"
139 | 157,Moldova
140 | 122,Monaco
141 | 299,Mongolia
142 | 735,Montenegro
143 | 521,Montserrat
144 | 332,Morocco
145 | 329,Mozambique
146 | 371,Namibia
147 | 440,Nauru
148 | 257,Nepal
149 | 123,Netherlands
150 | 508,Netherlands Antilles
151 | 409,New Caledonia
152 | 464,New Zealand
153 | 579,Nicaragua
154 | 390,Niger
155 | 343,Nigeria
156 | 470,Niue
157 | 275,North Korea
158 | 124,Norway
159 | 256,Oman
160 | 258,Pakistan
161 | 474,Palau
162 | 743,Palestine
163 | 504,Panama
164 | 441,Papua New Guinea
165 | 693,Paraguay
166 | 694,Peru
167 | 260,Philippines
168 | 416,Pitcairn Islands
169 | 107,Poland
170 | 126,Portugal
171 | 297,Qatar
172 | 748,Republic Of South Sudan
173 | 321,Reunion
174 | 127,Romania
175 | 158,Russia
176 | 376,Rwanda
177 | 128,San Marino
178 | 330,Sao Tome And Principe
179 | 261,Saudi Arabia
180 | 391,Senegal
181 | 142,Serbia And Montenegro
182 | 745,Serbia
183 | 347,Seychelles
184 | 348,Sierra Leone
185 | 207,Singapore
186 | 141,Slovakia
187 | 166,Slovenia
188 | 412,Solomon Islands
189 | 397,Somalia
190 | 373,South Africa
191 | 276,South Korea
192 | 129,Spain
193 | 244,Sri Lanka
194 | 346,St. Helena
195 | 522,St. Kitts-Nevis
196 | 523,St. Lucia
197 | 502,St. Pierre And Miquelon
198 | 524,St. Vincent-Grenadines
199 | 716,Saint Barthelemy
200 | 736,Saint Martin
201 | 749,Saint Maarten
202 | 350,Sudan
203 | 602,Suriname
204 | 351,Swaziland
205 | 130,Sweden
206 | 131,Switzerland
207 | 262,Syria
208 | 268,Taiwan
209 | 159,Tajikistan
210 | 353,Tanzania
211 | 263,Thailand
212 | 304,Togo
213 | 417,Tonga
214 | 516,Trinidad And Tobago
215 | 323,Tunisia
216 | 264,Turkey
217 | 161,Turkmenistan
218 | 527,Turks And Caicos Islands
219 | 420,Tuvalu
220 | 352,Uganda
221 | 162,Ukraine
222 | 296,United Arab Emirates
223 | 135,United Kingdom
224 | 695,Uruguay
225 | 163,Uzbekistan
226 | 410,Vanuatu
227 | 696,Venezuela
228 | 266,Vietnam
229 | 469,Wallis And Futuna Islands
230 | 757,West Indies (French)
231 | 333,Western Sahara
232 | 465,Western Samoa
233 | 216,Yemen
234 | 139,Yugoslavia
235 | 301,Zaire
236 | 344,Zambia
237 | 315,Zimbabwe
238 | 403,Invalid: American Samoa
239 | 712,Invalid: Antarctica
240 | 700,Invalid: Born On Board Ship
241 | 719,Invalid: Bouvet Island (Antarctica/Norway Terr.)
242 | 574,Invalid: Canada
243 | 720,Invalid: Canton And Enderbury Isls
244 | 106,Invalid: Czechoslovakia
245 | 739,Invalid: Dronning Maud Land (Antarctica-Norway)
246 | 394,Invalid: French Southern And Antarctic
247 | 501,Invalid: Greenland
248 | 404,Invalid: Guam
249 | 730,Invalid: International Waters
250 | 731,Invalid: Johnson Island
251 | 471,"Invalid: Mariana Islands, Northern"
252 | 737,Invalid: Midway Islands
253 | 753,Invalid: Minor Outlying Islands - Usa
254 | 740,Invalid: Neutral Zone (S. Arabia/Iraq)
255 | 710,Invalid: Non-Quota Immigrant
256 | 505,Invalid: Puerto Rico
257 | 0,Invalid: Stateless
258 | 705,Invalid: Stateless
259 | 583,Invalid: United States
260 | 407,Invalid: United States
261 | 999,Invalid: Unknown
262 | 239,Invalid: Unknown Country
263 | 134,Invalid: Ussr
264 | 506,Invalid: U.S. Virgin Islands
265 | 755,Invalid: Wake Island
266 | 311,Collapsed Tanzania (Should Not Show)
267 | 741,Collapsed Curacao (Should Not Show)
268 | 54,No Country Code (54)
269 | 100,No Country Code (100)
270 | 187,No Country Code (187)
271 | 190,No Country Code (190)
272 | 200,No Country Code (200)
273 | 219,No Country Code (219)
274 | 238,No Country Code (238)
275 | 277,No Country Code (277)
276 | 293,No Country Code (293)
277 | 300,No Country Code (300)
278 | 319,No Country Code (319)
279 | 365,No Country Code (365)
280 | 395,No Country Code (395)
281 | 400,No Country Code (400)
282 | 485,No Country Code (485)
283 | 503,No Country Code (503)
284 | 589,No Country Code (589)
285 | 592,No Country Code (592)
286 | 791,No Country Code (791)
287 | 849,No Country Code (849)
288 | 914,No Country Code (914)
289 | 944,No Country Code (944)
290 | 996,No Country Code (996)
291 |
--------------------------------------------------------------------------------
/airflow/dags/temp_output/i94mode.csv:
--------------------------------------------------------------------------------
1 | trans_code,trans_name
2 | 1,Air
3 | 2,Sea
4 | 3,Land
5 | 9,Not Reported
6 |
--------------------------------------------------------------------------------
/airflow/dags/temp_output/i94port.csv:
--------------------------------------------------------------------------------
1 | port_code,port_name,port_city,port_state
2 | ALC,"Alcan, Ak",Alcan, AK
3 | ANC,"Anchorage, Ak",Anchorage, AK
4 | BAR,"Baker Aaf - Baker Island, Ak",Baker Aaf - Baker Island, AK
5 | DAC,"Daltons Cache, Ak",Daltons Cache, AK
6 | PIZ,"Dew Station Pt Lay Dew, Ak",Dew Station Pt Lay Dew, AK
7 | DTH,"Dutch Harbor, Ak",Dutch Harbor, AK
8 | EGL,"Eagle, Ak",Eagle, AK
9 | FRB,"Fairbanks, Ak",Fairbanks, AK
10 | HOM,"Homer, Ak",Homer, AK
11 | HYD,"Hyder, Ak",Hyder, AK
12 | JUN,"Juneau, Ak",Juneau, AK
13 | 5KE,"Ketchikan, Ak",Ketchikan, AK
14 | KET,"Ketchikan, Ak",Ketchikan, AK
15 | MOS,"Moses Point Intermediate, Ak",Moses Point Intermediate, AK
16 | NIK,"Nikiski, Ak",Nikiski, AK
17 | NOM,"Nom, Ak",Nom, AK
18 | PKC,"Poker Creek, Ak",Poker Creek, AK
19 | ORI,"Port Lions Spb, Ak",Port Lions Spb, AK
20 | SKA,"Skagway, Ak",Skagway, AK
21 | SNP,"St. Paul Island, Ak",St. Paul Island, AK
22 | TKI,"Tokeen, Ak",Tokeen, AK
23 | WRA,"Wrangell, Ak",Wrangell, AK
24 | HSV,"Madison County - Huntsville, Al",Madison County - Huntsville, AL
25 | MOB,"Mobile, Al",Mobile, AL
26 | LIA,"Little Rock, Ar (Bps)",Little Rock, AR (BPS)
27 | ROG,"Rogers Arpt, Ar",Rogers Arpt, AR
28 | DOU,"Douglas, Az",Douglas, AZ
29 | LUK,"Lukeville, Az",Lukeville, AZ
30 | MAP,Mariposa Az,Mariposa Az,
31 | NAC,"Naco, Az",Naco, AZ
32 | NOG,"Nogales, Az",Nogales, AZ
33 | PHO,"Phoenix, Az",Phoenix, AZ
34 | POR,"Portal, Az",Portal, AZ
35 | SLU,"San Luis, Az",San Luis, AZ
36 | SAS,"Sasabe, Az",Sasabe, AZ
37 | TUC,"Tucson, Az",Tucson, AZ
38 | YUI,"Yuma, Az",Yuma, AZ
39 | AND,"Andrade, Ca",Andrade, CA
40 | BUR,"Burbank, Ca",Burbank, CA
41 | CAL,"Calexico, Ca",Calexico, CA
42 | CAO,"Campo, Ca",Campo, CA
43 | FRE,"Fresno, Ca",Fresno, CA
44 | ICP,"Imperial County, Ca",Imperial County, CA
45 | LNB,"Long Beach, Ca",Long Beach, CA
46 | LOS,"Los Angeles, Ca",Los Angeles, CA
47 | BFL,"Meadows Field - Bakersfield, Ca",Meadows Field - Bakersfield, CA
48 | OAK,"Oakland, Ca",Oakland, CA
49 | ONT,"Ontario, Ca",Ontario, CA
50 | OTM,"Otay Mesa, Ca",Otay Mesa, CA
51 | BLT,"Pacific, Hwy. Station, Ca","Pacific, Hwy. Station", CA
52 | PSP,"Palm Springs, Ca",Palm Springs, CA
53 | SAC,"Sacramento, Ca",Sacramento, CA
54 | SLS,"Salinas, Ca (Bps)",Salinas, CA (BPS)
55 | SDP,"San Diego, Ca",San Diego, CA
56 | SFR,"San Francisco, Ca",San Francisco, CA
57 | SNJ,"San Jose, Ca",San Jose, CA
58 | SLO,"San Luis Obispo, Ca",San Luis Obispo, CA
59 | SLI,"San Luis Obispo, Ca (Bps)",San Luis Obispo, CA (BPS)
60 | SPC,"San Pedro, Ca",San Pedro, CA
61 | SYS,"San Ysidro, Ca",San Ysidro, CA
62 | SAA,"Santa Ana, Ca",Santa Ana, CA
63 | STO,"Stockton, Ca (Bps)",Stockton, CA (BPS)
64 | TEC,"Tecate, Ca",Tecate, CA
65 | TRV,"Travis-Afb, Ca",Travis-Afb, CA
66 | APA,"Arapahoe County, Co",Arapahoe County, CO
67 | ASE,"Aspen, Co #Arpt",Aspen, CO #ARPT
68 | COS,"Colorado Springs, Co",Colorado Springs, CO
69 | DEN,"Denver, Co",Denver, CO
70 | DRO,"La Plata - Durango, Co",La Plata - Durango, CO
71 | BDL,"Bradley International, Ct",Bradley International, CT
72 | BGC,"Bridgeport, Ct",Bridgeport, CT
73 | GRT,"Groton, Ct",Groton, CT
74 | HAR,"Hartford, Ct",Hartford, CT
75 | NWH,"New Haven, Ct",New Haven, CT
76 | NWL,"New London, Ct",New London, CT
77 | TST,"Newington Data Center Test, Ct",Newington Data Center Test, CT
78 | WAS,Washington Dc,Washington Dc,
79 | DOV,"Dover Afb, De",Dover Afb, DE
80 | DVD,"Dover-Afb, De",Dover-Afb, DE
81 | WLL,"Wilmington, De",Wilmington, DE
82 | BOC,"Bocagrande, Fl",Bocagrande, FL
83 | SRQ,"Bradenton - Sarasota, Fl",Bradenton - Sarasota, FL
84 | CAN,"Cape Canaveral, Fl",Cape Canaveral, FL
85 | DAB,"Daytona Beach International, Fl",Daytona Beach International, FL
86 | FRN,"Fernandina, Fl",Fernandina, FL
87 | FTL,"Fort Lauderdale, Fl",Fort Lauderdale, FL
88 | FMY,"Fort Myers, Fl",Fort Myers, FL
89 | FPF,"Fort Pierce, Fl",Fort Pierce, FL
90 | HUR,"Hurlburt Field, Fl",Hurlburt Field, FL
91 | GNV,"J R Alison Muni - Gainesville, Fl",J R Alison Muni - Gainesville, FL
92 | JAC,"Jacksonville, Fl",Jacksonville, FL
93 | KEY,"Key West, Fl",Key West, FL
94 | LEE,"Leesburg Municipal Airport, Fl",Leesburg Municipal Airport, FL
95 | MLB,"Melbourne, Fl",Melbourne, FL
96 | MIA,"Miami, Fl",Miami, FL
97 | APF,"Naples, Fl #Arpt",Naples, FL #ARPT
98 | OPF,"Opa Locka, Fl",Opa Locka, FL
99 | ORL,"Orlando, Fl",Orlando, FL
100 | PAN,"Panama City, Fl",Panama City, FL
101 | PEN,"Pensacola, Fl",Pensacola, FL
102 | PCF,"Port Canaveral, Fl",Port Canaveral, FL
103 | PEV,"Port Everglades, Fl",Port Everglades, FL
104 | PSJ,"Port St Joe, Fl",Port St Joe, FL
105 | SFB,"Sanford, Fl",Sanford, FL
106 | SGJ,"St Augustine Arpt, Fl",St Augustine Arpt, FL
107 | SAU,"St Augustine, Fl",St Augustine, FL
108 | FPR,"St Lucie County, Fl",St Lucie County, FL
109 | SPE,"St Petersburg, Fl",St Petersburg, FL
110 | TAM,"Tampa, Fl",Tampa, FL
111 | WPB,"West Palm Beach, Fl",West Palm Beach, FL
112 | ATL,"Atlanta, Ga",Atlanta, GA
113 | BRU,"Brunswick, Ga",Brunswick, GA
114 | AGS,"Bush Field - Augusta, Ga",Bush Field - Augusta, GA
115 | SAV,"Savannah, Ga",Savannah, GA
116 | AGA,"Agana, Gu",Agana, GU
117 | HHW,"Honolulu, Hi",Honolulu, HI
118 | OGG,"Kahului - Maui, Hi",Kahului - Maui, HI
119 | KOA,"Keahole-Kona, Hi",Keahole-Kona, HI
120 | LIH,"Lihue, Hi",Lihue, HI
121 | CID,"Cedar Rapids/Iowa City, Ia",Cedar Rapids/Iowa City, IA
122 | DSM,"Des Moines, Ia",Des Moines, IA
123 | BOI,"Air Term. (Gowen Fld) Boise, Id",Air Term. (Gowen Fld) Boise, ID
124 | EPI,"Eastport, Id",Eastport, ID
125 | IDA,"Fanning Field - Idaho Falls, Id",Fanning Field - Idaho Falls, ID
126 | PTL,"Porthill, Id",Porthill, ID
127 | SPI,"Capital - Springfield, Il",Capital - Springfield, IL
128 | CHI,"Chicago, Il",Chicago, IL
129 | DPA,"Dupage County, Il",Dupage County, IL
130 | PIA,"Greater Peoria, Il",Greater Peoria, IL
131 | RFD,"Greater Rockford, Il",Greater Rockford, IL
132 | UGN,"Memorial - Waukegan, Il",Memorial - Waukegan, IL
133 | GAR,"Gary, In",Gary, IN
134 | HMM,"Hammond, In",Hammond, IN
135 | INP,"Indianapolis, In",Indianapolis, IN
136 | MRL,"Merrillville, In",Merrillville, IN
137 | SBN,"South Bend, In",South Bend, IN
138 | ICT,"Mid-Continent - Witchita, Ks",Mid-Continent - Witchita, KS
139 | LEX,"Blue Grass - Lexington, Ky",Blue Grass - Lexington, KY
140 | LOU,"Louisville, Ky",Louisville, KY
141 | BTN,"Baton Rouge, La",Baton Rouge, LA
142 | LKC,"Lake Charles, La",Lake Charles, LA
143 | LAK,"Lake Charles, La (Bps)",Lake Charles, LA (BPS)
144 | MLU,"Monroe, La",Monroe, LA
145 | MGC,"Morgan City, La",Morgan City, LA
146 | NOL,"New Orleans, La",New Orleans, LA
147 | BOS,"Boston, Ma",Boston, MA
148 | GLO,"Gloucester, Ma",Gloucester, MA
149 | BED,"Hanscom Field - Bedford, Ma",Hanscom Field - Bedford, MA
150 | LYN,"Lynden, Wa",Lynden, WA
151 | ADW,"Andrews Afb, Md",Andrews Afb, MD
152 | BAL,"Baltimore, Md",Baltimore, MD
153 | MKG,"Muskegon, Md",Muskegon, MD
154 | PAX,"Patuxent River, Md",Patuxent River, MD
155 | BGM,"Bangor, Me",Bangor, ME
156 | BOO,"Boothbay Harbor, Me",Boothbay Harbor, ME
157 | BWM,"Bridgewater, Me",Bridgewater, ME
158 | BCK,"Buckport, Me",Buckport, ME
159 | CLS,"Calais, Me",Calais, ME
160 | CRB,"Caribou, Me",Caribou, ME
161 | COB,"Coburn Gore, Me",Coburn Gore, ME
162 | EST,"Eastcourt, Me",Eastcourt, ME
163 | EPT,"Eastport Municipal, Me",Eastport Municipal, ME
164 | EPM,"Eastport, Me",Eastport, ME
165 | FOR,"Forest City, Me",Forest City, ME
166 | FTF,"Fort Fairfield, Me",Fort Fairfield, ME
167 | FTK,"Fort Kent, Me",Fort Kent, ME
168 | HML,"Hamiin, Me",Hamiin, ME
169 | HTM,"Houlton, Me",Houlton, ME
170 | JKM,"Jackman, Me",Jackman, ME
171 | KAL,"Kalispel, Mt",Kalispel, MT
172 | LIM,"Limestone, Me",Limestone, ME
173 | LUB,"Lubec, Me",Lubec, ME
174 | MAD,"Madawaska, Me",Madawaska, ME
175 | POM,"Portland, Me",Portland, ME
176 | RGM,"Rangeley, Me (Bps)",Rangeley, ME (BPS)
177 | SBR,"South Brewer, Me",South Brewer, ME
178 | SRL,"St Aurelie, Me",St Aurelie, ME
179 | SPA,"St Pampile, Me",St Pampile, ME
180 | VNB,"Van Buren, Me",Van Buren, ME
181 | VCB,"Vanceboro, Me",Vanceboro, ME
182 | AGN,"Algonac, Mi",Algonac, MI
183 | ALP,"Alpena, Mi",Alpena, MI
184 | BCY,"Bay City, Mi",Bay City, MI
185 | DET,"Detroit, Mi",Detroit, MI
186 | GRP,"Grand Rapids, Mi",Grand Rapids, MI
187 | GRO,"Grosse Isle, Mi",Grosse Isle, MI
188 | ISL,"Isle Royale, Mi",Isle Royale, MI
189 | MRC,"Marine City, Mi",Marine City, MI
190 | MRY,"Marysville, Mi",Marysville, MI
191 | PTK,"Oakland County - Pontiac, Mi",Oakland County - Pontiac, MI
192 | PHU,"Port Huron, Mi",Port Huron, MI
193 | RBT,"Roberts Landing, Mi",Roberts Landing, MI
194 | SAG,"Saginaw, Mi",Saginaw, MI
195 | SSM,"Sault Ste. Marie, Mi",Sault Ste. Marie, MI
196 | SCL,"St Clair, Mi",St Clair, MI
197 | YIP,"Willow Run - Ypsilanti, Mi",Willow Run - Ypsilanti, MI
198 | BAU,"Baudette, Mn",Baudette, MN
199 | CAR,"Caribou Municipal Airport, Mn",Caribou Municipal Airport, MN
200 | GTF,"Collapsed Into Int, Mn",Collapsed Into Int, MN
201 | INL,"Collapsed Into Int, Mn",Collapsed Into Int, MN
202 | CRA,"Crane Lake, Mn",Crane Lake, MN
203 | MIC,"Crystal Municipal Airport, Mn",Crystal Municipal Airport, MN
204 | DUL,"Duluth, Mn",Duluth, MN
205 | ELY,"Ely, Mn",Ely, MN
206 | GPM,"Grand Portage, Mn",Grand Portage, MN
207 | SVC,"Grant County - Silver City, Mn",Grant County - Silver City, MN
208 | INT,"Intl Falls, Mn",Intl Falls, MN
209 | LAN,"Lancaster, Mn",Lancaster, MN
210 | MSP,"Minn./St Paul, Mn",Minn./St Paul, MN
211 | LIN,"Northern Svc Center, Mn",Northern Svc Center, MN
212 | NOY,"Noyes, Mn",Noyes, MN
213 | PIN,"Pine Creek, Mn",Pine Creek, MN
214 | 48Y,"Pinecreek Border Arpt, Mn",Pinecreek Border Arpt, MN
215 | RAN,"Rainer, Mn",Rainer, MN
216 | RST,"Rochester, Mn",Rochester, MN
217 | ROS,"Roseau, Mn",Roseau, MN
218 | SPM,"St Paul, Mn",St Paul, MN
219 | WSB,"Warroad Intl, Spb, Mn","Warroad Intl, Spb", MN
220 | WAR,"Warroad, Mn",Warroad, MN
221 | KAN,"Kansas City, Mo",Kansas City, MO
222 | SGF,"Springfield-Branson, Mo",Springfield-Branson, MO
223 | STL,"St Louis, Mo",St Louis, MO
224 | WHI,"Whitetail, Mt",Whitetail, MT
225 | WHM,"Wild Horse, Mt",Wild Horse, MT
226 | GPT,"Biloxi Regional, Ms",Biloxi Regional, MS
227 | GTR,"Golden Triangle Lowndes Cnty, Ms",Golden Triangle Lowndes Cnty, MS
228 | GUL,"Gulfport, Ms",Gulfport, MS
229 | PAS,"Pascagoula, Ms",Pascagoula, MS
230 | JAN,"Thompson Field - Jackson, Ms",Thompson Field - Jackson, MS
231 | BIL,"Billings, Mt",Billings, MT
232 | BTM,"Butte, Mt",Butte, MT
233 | CHF,"Chief Mt, Mt",Chief Mt, MT
234 | CTB,"Cut Bank Municipal, Mt",Cut Bank Municipal, MT
235 | CUT,"Cut Bank, Mt",Cut Bank, MT
236 | DLB,"Del Bonita, Mt",Del Bonita, MT
237 | EUR,"Eureka, Mt (Bps)",Eureka, MT (BPS)
238 | BZN,"Gallatin Field - Bozeman, Mt",Gallatin Field - Bozeman, MT
239 | FCA,"Glacier National Park, Mt",Glacier National Park, MT
240 | GGW,"Glasgow, Mt",Glasgow, MT
241 | GRE,"Great Falls, Mt",Great Falls, MT
242 | HVR,"Havre, Mt",Havre, MT
243 | HEL,"Helena, Mt",Helena, MT
244 | LWT,"Lewiston, Mt",Lewiston, MT
245 | MGM,"Morgan, Mt",Morgan, MT
246 | OPH,"Opheim, Mt",Opheim, MT
247 | PIE,"Piegan, Mt",Piegan, MT
248 | RAY,"Raymond, Mt",Raymond, MT
249 | ROO,"Roosville, Mt",Roosville, MT
250 | SCO,"Scobey, Mt",Scobey, MT
251 | SWE,"Sweetgtass, Mt",Sweetgtass, MT
252 | TRL,"Trial Creek, Mt",Trial Creek, MT
253 | TUR,"Turner, Mt",Turner, MT
254 | WCM,"Willow Creek, Mt",Willow Creek, MT
255 | CLT,"Charlotte, Nc",Charlotte, NC
256 | FAY,"Fayetteville, Nc",Fayetteville, NC
257 | MRH,"Morehead City, Nc",Morehead City, NC
258 | FOP,"Morris Fields Aaf, Nc",Morris Fields Aaf, NC
259 | GSO,"Piedmont Triad Intl Airport, Nc",Piedmont Triad Intl Airport, NC
260 | RDU,"Raleigh/Durham, Nc",Raleigh/Durham, NC
261 | SSC,"Shaw Afb - Sumter, Nc",Shaw Afb - Sumter, NC
262 | WIL,"Wilmington, Nc",Wilmington, NC
263 | AMB,"Ambrose, Nd",Ambrose, ND
264 | ANT,"Antler, Nd",Antler, ND
265 | CRY,"Carbury, Nd",Carbury, ND
266 | DNS,"Dunseith, Nd",Dunseith, ND
267 | FAR,"Fargo, Nd",Fargo, ND
268 | FRT,"Fortuna, Nd",Fortuna, ND
269 | GRF,"Grand Forks, Nd",Grand Forks, ND
270 | HNN,"Hannah, Nd",Hannah, ND
271 | HNS,"Hansboro, Nd",Hansboro, ND
272 | MAI,"Maida, Nd",Maida, ND
273 | MND,"Minot, Nd",Minot, ND
274 | NEC,"Neche, Nd",Neche, ND
275 | NOO,"Noonan, Nd",Noonan, ND
276 | NRG,"Northgate, Nd",Northgate, ND
277 | PEM,"Pembina, Nd",Pembina, ND
278 | SAR,"Sarles, Nd",Sarles, ND
279 | SHR,"Sherwood, Nd",Sherwood, ND
280 | SJO,"St John, Nd",St John, ND
281 | WAL,"Walhalla, Nd",Walhalla, ND
282 | WHO,"Westhope, Nd",Westhope, ND
283 | WND,"Williston, Nd",Williston, ND
284 | OMA,"Omaha, Ne",Omaha, NE
285 | LEB,"Lebanon, Nh",Lebanon, NH
286 | MHT,"Manchester, Nh",Manchester, NH
287 | PNH,"Pittsburg, Nh",Pittsburg, NH
288 | PSM,"Portsmouth, Nh",Portsmouth, NH
289 | BYO,"Bayonne, Nj",Bayonne, NJ
290 | CNJ,"Camden, Nj",Camden, NJ
291 | HOB,"Hoboken, Nj",Hoboken, NJ
292 | JER,"Jersey City, Nj",Jersey City, NJ
293 | WRI,"Mc Guire Afb - Wrightsown, Nj",Mc Guire Afb - Wrightsown, NJ
294 | MMU,"Morristown, Nj",Morristown, NJ
295 | NEW,"Newark/Teterboro, Nj",Newark/Teterboro, NJ
296 | PER,"Perth Amboy, Nj",Perth Amboy, NJ
297 | ACY,"Pomona Field - Atlantic City, Nj",Pomona Field - Atlantic City, NJ
298 | ALA,"Alamagordo, Nm (Bps)",Alamagordo, NM (BPS)
299 | ABQ,"Albuquerque, Nm",Albuquerque, NM
300 | ANP,"Antelope Wells, Nm",Antelope Wells, NM
301 | CRL,"Carlsbad, Nm",Carlsbad, NM
302 | COL,"Columbus, Nm",Columbus, NM
303 | CDD,"Crane Lake - St. Louis Cnty, Nm",Crane Lake - St. Louis Cnty, NM
304 | DNM,"Deming, Nm (Bps)",Deming, NM (BPS)
305 | LAS,"Las Cruces, Nm",Las Cruces, NM
306 | LOB,"Lordsburg, Nm (Bps)",Lordsburg, NM (BPS)
307 | RUI,"Ruidoso, Nm",Ruidoso, NM
308 | STR,"Santa Teresa, Nm",Santa Teresa, NM
309 | RNO,"Cannon Intl - Reno/Tahoe, Nv",Cannon Intl - Reno/Tahoe, NV
310 | FLX,"Fallon Municipal Airport, Nv",Fallon Municipal Airport, NV
311 | LVG,"Las Vegas, Nv",Las Vegas, NV
312 | REN,"Reno, Nv",Reno, NV
313 | ALB,"Albany, Ny",Albany, NY
314 | AXB,"Alexandria Bay, Ny",Alexandria Bay, NY
315 | BUF,"Buffalo, Ny",Buffalo, NY
316 | CNH,"Cannon Corners, Ny",Cannon Corners, NY
317 | CAP,"Cape Vincent, Ny",Cape Vincent, NY
318 | CHM,"Champlain, Ny",Champlain, NY
319 | CHT,"Chateaugay, Ny",Chateaugay, NY
320 | CLA,"Clayton, Ny",Clayton, NY
321 | FTC,"Fort Covington, Ny",Fort Covington, NY
322 | LAG,"La Guardia, Ny",La Guardia, NY
323 | LEW,"Lewiston, Ny",Lewiston, NY
324 | MAS,"Massena, Ny",Massena, NY
325 | MAG,"Mcguire Afb, Ny",Mcguire Afb, NY
326 | MOO,"Moores, Ny",Moores, NY
327 | MRR,"Morristown, Ny",Morristown, NY
328 | NYC,"New York, Ny",New York, NY
329 | NIA,"Niagara Falls, Ny",Niagara Falls, NY
330 | OGD,"Ogdensburg, Ny",Ogdensburg, NY
331 | OSW,"Oswego, Ny",Oswego, NY
332 | ELM,"Regional Arpt - Horsehead, Ny",Regional Arpt - Horsehead, NY
333 | ROC,"Rochester, Ny",Rochester, NY
334 | ROU,"Rouses Point, Ny",Rouses Point, NY
335 | SWF,"Stewart - Orange Cnty, Ny",Stewart - Orange Cnty, NY
336 | SYR,"Syracuse, Ny",Syracuse, NY
337 | THO,"Thousand Island Bridge, Ny",Thousand Island Bridge, NY
338 | TRO,"Trout River, Ny",Trout River, NY
339 | WAT,"Watertown, Ny",Watertown, NY
340 | HPN,"Westchester - White Plains, Ny",Westchester - White Plains, NY
341 | WRB,"Whirlpool Bridge, Ny",Whirlpool Bridge, NY
342 | YOU,"Youngstown, Ny",Youngstown, NY
343 | AKR,"Akron, Oh",Akron, OH
344 | ATB,"Ashtabula, Oh",Ashtabula, OH
345 | CIN,"Cincinnati, Oh",Cincinnati, OH
346 | CLE,"Cleveland, Oh",Cleveland, OH
347 | CLM,"Columbus, Oh",Columbus, OH
348 | LOR,"Lorain, Oh",Lorain, OH
349 | MBO,"Marble Heads, Oh",Marble Heads, OH
350 | SDY,"Sandusky, Oh",Sandusky, OH
351 | TOL,"Toledo, Oh",Toledo, OH
352 | OKC,"Oklahoma City, Ok",Oklahoma City, OK
353 | TUL,"Tulsa, Ok",Tulsa, OK
354 | AST,"Astoria, Or",Astoria, OR
355 | COO,"Coos Bay, Or",Coos Bay, OR
356 | HIO,"Hillsboro, Or",Hillsboro, OR
357 | MED,"Medford, Or",Medford, OR
358 | NPT,"Newport, Or",Newport, OR
359 | POO,"Portland, Or",Portland, OR
360 | PUT,"Put-In-Bay, Oh",Put-In-Bay, OH
361 | RDM,"Roberts Fields - Redmond, Or",Roberts Fields - Redmond, OR
362 | ERI,"Erie, Pa",Erie, PA
363 | MDT,"Harrisburg, Pa",Harrisburg, PA
364 | HSB,"Harrisonburg, Pa",Harrisonburg, PA
365 | PHI,"Philadelphia, Pa",Philadelphia, PA
366 | PIT,"Pittsburg, Pa",Pittsburg, PA
367 | AGU,"Aguadilla, Pr",Aguadilla, PR
368 | BQN,"Borinquen - Aguadillo, Pr",Borinquen - Aguadillo, PR
369 | JCP,"Culebra - Benjamin Rivera, Pr",Culebra - Benjamin Rivera, PR
370 | ENS,"Ensenada, Pr",Ensenada, PR
371 | FAJ,"Fajardo, Pr",Fajardo, PR
372 | HUM,"Humacao, Pr",Humacao, PR
373 | JOB,"Jobos, Pr",Jobos, PR
374 | MAY,"Mayaguez, Pr",Mayaguez, PR
375 | PON,"Ponce, Pr",Ponce, PR
376 | PSE,"Ponce-Mercedita, Pr",Ponce-Mercedita, PR
377 | SAJ,"San Juan, Pr",San Juan, PR
378 | VQS,"Vieques-Arpt, Pr",Vieques-Arpt, PR
379 | PRO,"Providence, Ri",Providence, RI
380 | PVD,"Theodore Francis - Warwick, Ri",Theodore Francis - Warwick, RI
381 | CHL,"Charleston, Sc",Charleston, SC
382 | CAE,"Columbia, Sc #Arpt",Columbia, SC #ARPT
383 | GEO,"Georgetown, Sc",Georgetown, SC
384 | GSP,"Greenville, Sc",Greenville, SC
385 | GRR,"Greer, Sc",Greer, SC
386 | MYR,"Myrtle Beach, Sc",Myrtle Beach, SC
387 | SPF,"Black Hills, Spearfish, Sd","Black Hills, Spearfish", SD
388 | HON,"Howes Regional Arpt - Huron, Sd",Howes Regional Arpt - Huron, SD
389 | SAI,"Saipan, Spn",Saipan, SPN
390 | TYS,"Mc Ghee Tyson - Alcoa, Tn",Mc Ghee Tyson - Alcoa, TN
391 | MEM,"Memphis, Tn",Memphis, TN
392 | NSV,"Nashville, Tn",Nashville, TN
393 | TRI,"Tri City Arpt, Tn",Tri City Arpt, TN
394 | ADS,"Addison Airport- Addison, Tx",Addison Airport- Addison, TX
395 | ADT,"Amistad Dam, Tx",Amistad Dam, TX
396 | ANZ,"Anzalduas, Tx",Anzalduas, TX
397 | AUS,"Austin, Tx",Austin, TX
398 | BEA,"Beaumont, Tx",Beaumont, TX
399 | BBP,"Big Bend Park, Tx (Bps)",Big Bend Park, TX (BPS)
400 | SCC,"Bp Spec Coord. Ctr, Tx",Bp Spec Coord. Ctr, TX
401 | BTC,"Bp Tactical Unit, Tx",Bp Tactical Unit, TX
402 | BOA,"Bridge Of Americas, Tx",Bridge Of Americas, TX
403 | BRO,"Brownsville, Tx",Brownsville, TX
404 | CRP,"Corpus Christi, Tx",Corpus Christi, TX
405 | DAL,"Dallas, Tx",Dallas, TX
406 | DLR,"Del Rio, Tx",Del Rio, TX
407 | DNA,"Donna, Tx",Donna, TX
408 | EGP,"Eagle Pass, Tx",Eagle Pass, TX
409 | ELP,"El Paso, Tx",El Paso, TX
410 | FAB,"Fabens, Tx",Fabens, TX
411 | FAL,"Falcon Heights, Tx",Falcon Heights, TX
412 | FTH,"Fort Hancock, Tx",Fort Hancock, TX
413 | AFW,"Fort Worth Alliance, Tx",Fort Worth Alliance, TX
414 | FPT,"Freeport, Tx",Freeport, TX
415 | GAL,"Galveston, Tx",Galveston, TX
416 | HLG,"Harlingen, Tx",Harlingen, TX
417 | HID,"Hidalgo, Tx",Hidalgo, TX
418 | HOU,"Houston, Tx",Houston, TX
419 | SGR,"Hull Field, Sugar Land Arpt, Tx","Hull Field, Sugar Land Arpt", TX
420 | LLB,"Juarez-Lincoln Bridge, Tx",Juarez-Lincoln Bridge, TX
421 | LCB,"Laredo Columbia Bridge, Tx",Laredo Columbia Bridge, TX
422 | LRN,"Laredo North, Tx",Laredo North, TX
423 | LAR,"Laredo, Tx",Laredo, TX
424 | LSE,"Los Ebanos, Tx",Los Ebanos, TX
425 | IND,"Los Indios, Tx",Los Indios, TX
426 | LOI,"Los Indios, Tx",Los Indios, TX
427 | MRS,"Marfa, Tx (Bps)",Marfa, TX (BPS)
428 | MCA,"Mcallen, Tx",Mcallen, TX
429 | MAF,"Odessa Regional, Tx",Odessa Regional, TX
430 | PDN,"Paso Del Norte,Tx",Paso Del Norte,TX
431 | PBB,"Peace Bridge, Ny",Peace Bridge, NY
432 | PHR,"Pharr, Tx",Pharr, TX
433 | PAR,"Port Arthur, Tx",Port Arthur, TX
434 | ISB,"Port Isabel, Tx",Port Isabel, TX
435 | POE,"Port Of El Paso, Tx",Port Of El Paso, TX
436 | PRE,"Presidio, Tx",Presidio, TX
437 | PGR,"Progreso, Tx",Progreso, TX
438 | RIO,"Rio Grande City, Tx",Rio Grande City, TX
439 | ROM,"Roma, Tx",Roma, TX
440 | SNA,"San Antonio, Tx",San Antonio, TX
441 | SNN,"Sanderson, Tx",Sanderson, TX
442 | VIB,"Veteran Intl Bridge, Tx",Veteran Intl Bridge, TX
443 | YSL,"Ysleta, Tx",Ysleta, TX
444 | CHA,"Charlotte Amalie, Vi",Charlotte Amalie, VI
445 | CHR,"Christiansted, Vi",Christiansted, VI
446 | CRU,"Cruz Bay, St John, Vi","Cruz Bay, St John", VI
447 | FRK,"Frederiksted, Vi",Frederiksted, VI
448 | STT,"St Thomas, Vi",St Thomas, VI
449 | LGU,"Cache Airport - Logan, Ut",Cache Airport - Logan, UT
450 | SLC,"Salt Lake City, Ut",Salt Lake City, UT
451 | CHO,"Albemarle Charlottesville, Va",Albemarle Charlottesville, VA
452 | DAA,"Davison Aaf - Fairfax Cnty, Va",Davison Aaf - Fairfax Cnty, VA
453 | HOP,"Hopewell, Va",Hopewell, VA
454 | HEF,"Manassas, Va #Arpt",Manassas, VA #ARPT
455 | NWN,"Newport, Va",Newport, VA
456 | NOR,"Norfolk, Va",Norfolk, VA
457 | RCM,"Richmond, Va",Richmond, VA
458 | ABS,"Alburg Springs, Vt",Alburg Springs, VT
459 | ABG,"Alburg, Vt",Alburg, VT
460 | BEB,"Beebe Plain, Vt",Beebe Plain, VT
461 | BEE,"Beecher Falls, Vt",Beecher Falls, VT
462 | BRG,"Burlington, Vt",Burlington, VT
463 | CNA,"Canaan, Vt",Canaan, VT
464 | DER,"Derby Line, Vt (I-91)",Derby Line, VT (I-91)
465 | DLV,"Derby Line, Vt (Rt. 5)",Derby Line, VT (RT. 5)
466 | ERC,"East Richford, Vt",East Richford, VT
467 | HIG,"Highgate Springs, Vt",Highgate Springs, VT
468 | MOR,"Morses Line, Vt",Morses Line, VT
469 | NPV,"Newport, Vt",Newport, VT
470 | NRT,"North Troy, Vt",North Troy, VT
471 | NRN,"Norton, Vt",Norton, VT
472 | PIV,"Pinnacle Road, Vt",Pinnacle Road, VT
473 | RIF,"Richfort, Vt",Richfort, VT
474 | STA,"St Albans, Vt",St Albans, VT
475 | SWB,"Swanton, Vt (Bp - Sector Hq)",Swanton, VT (BP - SECTOR HQ)
476 | WBE,"West Berkshire, Vt",West Berkshire, VT
477 | ABE,"Aberdeen, Wa",Aberdeen, WA
478 | ANA,"Anacortes, Wa",Anacortes, WA
479 | BEL,"Bellingham, Wa",Bellingham, WA
480 | BLI,"Bellingham, Washington #Intl",Bellingham, WASHINGTON #INTL
481 | BLA,"Blaine, Wa",Blaine, WA
482 | BWA,"Boundary, Wa",Boundary, WA
483 | CUR,"Curlew, Wa (Bps)",Curlew, WA (BPS)
484 | DVL,"Danville, Wa",Danville, WA
485 | EVE,"Everett, Wa",Everett, WA
486 | FER,"Ferry, Wa",Ferry, WA
487 | FRI,"Friday Harbor, Wa",Friday Harbor, WA
488 | FWA,"Frontier, Wa",Frontier, WA
489 | KLM,"Kalama, Wa",Kalama, WA
490 | LAU,"Laurier, Wa",Laurier, WA
491 | LON,"Longview, Wa",Longview, WA
492 | MET,"Metaline Falls, Wa",Metaline Falls, WA
493 | MWH,"Moses Lake Grant County Arpt, Wa",Moses Lake Grant County Arpt, WA
494 | NEA,"Neah Bay, Wa",Neah Bay, WA
495 | NIG,"Nighthawk, Wa",Nighthawk, WA
496 | OLY,"Olympia, Wa",Olympia, WA
497 | ORO,"Oroville, Wa",Oroville, WA
498 | PWB,"Pasco, Wa",Pasco, WA
499 | PIR,"Point Roberts, Wa",Point Roberts, WA
500 | PNG,"Port Angeles, Wa",Port Angeles, WA
501 | PTO,"Port Townsend, Wa",Port Townsend, WA
502 | SEA,"Seattle, Wa",Seattle, WA
503 | SPO,"Spokane, Wa",Spokane, WA
504 | SUM,"Sumas, Wa",Sumas, WA
505 | TAC,"Tacoma, Wa",Tacoma, WA
506 | PSC,"Tri-Cities - Pasco, Wa",Tri-Cities - Pasco, WA
507 | VAN,"Vancouver, Wa",Vancouver, WA
508 | AGM,"Algoma, Wi",Algoma, WI
509 | BAY,"Bayfield, Wi",Bayfield, WI
510 | GRB,"Green Bay, Wi",Green Bay, WI
511 | MNW,"Manitowoc, Wi",Manitowoc, WI
512 | MIL,"Milwaukee, Wi",Milwaukee, WI
513 | MSN,"Truax Field - Dane County, Wi",Truax Field - Dane County, WI
514 | CHS,"Charleston, Wv",Charleston, WV
515 | CLK,"Clarksburg, Wv",Clarksburg, WV
516 | BLF,"Mercer County, Wv",Mercer County, WV
517 | CSP,"Casper, Wy",Casper, WY
518 | XXX,Not Reported/Unknown,Not Reported/Unknown,
519 | 888,Unidentifed Air / Seaport,Unidentifed Air / Seaport,
520 | UNK,Unknown Poe,Unknown Poe,
521 | CLG,"Calgary, Canada",Calgary, CANADA
522 | EDA,"Edmonton, Canada",Edmonton, CANADA
523 | YHC,"Hakai Pass, Canada",Hakai Pass, CANADA
524 | HAL,"Halifax, Ns, Canada","Halifax, Ns", CANADA
525 | MON,"Montreal, Canada",Montreal, CANADA
526 | OTT,"Ottawa, Canada",Ottawa, CANADA
527 | YXE,"Saskatoon, Canada",Saskatoon, CANADA
528 | TOR,"Toronto, Canada",Toronto, CANADA
529 | VCV,"Vancouver, Canada",Vancouver, CANADA
530 | VIC,"Victoria, Canada",Victoria, CANADA
531 | WIN,"Winnipeg, Canada",Winnipeg, CANADA
532 | AMS,"Amsterdam-Schiphol, Netherlands",Amsterdam-Schiphol, NETHERLANDS
533 | ARB,"Aruba, Neth Antilles",Aruba, NETH ANTILLES
534 | BAN,"Bankok, Thailand",Bankok, THAILAND
535 | BEI,"Beica #Arpt, Ethiopia",Beica #Arpt, ETHIOPIA
536 | PEK,"Beijing Capital Intl, Prc",Beijing Capital Intl, PRC
537 | BDA,"Kindley Field, Bermuda",Kindley Field, BERMUDA
538 | BOG,"Bogota, El Dorado #Arpt, Colombia","Bogota, El Dorado #Arpt", COLOMBIA
539 | EZE,"Buenos Aires, Ministro Pist, Argentina","Buenos Aires, Ministro Pist", ARGENTINA
540 | CUN,"Cancun, Mexico",Cancun, MEXICO
541 | CRQ,"Caravelas, Ba #Arpt, Brazil","Caravelas, Ba #Arpt", BRAZIL
542 | MVD,"Carrasco, Uruguay",Carrasco, URUGUAY
543 | DUB,"Dublin, Ireland",Dublin, IRELAND
544 | FOU,"Fougamou #Arpt, Gabon",Fougamou #Arpt, GABON
545 | FBA,"Freeport, Bahamas",Freeport, BAHAMAS
546 | MTY,"Gen M. Escobedo, Monterrey, Mx","Gen M. Escobedo, Monterrey", MX
547 | HMO,"Gen Pesqueira Garcia, Mx",Gen Pesqueira Garcia, MX
548 | GCM,"Grand Cayman, Cayman Island",Grand Cayman, CAYMAN ISLAND
549 | GDL,"Guadalajara, Miguel Hidal, Mx","Guadalajara, Miguel Hidal", MX
550 | HAM,"Hamilton, Bermuda",Hamilton, BERMUDA
551 | ICN,"Inchon, Seoul Korea",Inchon, SEOUL KOREA
552 | IWA,"Invalid - Iwakuni, Japan",Invalid - Iwakuni, JAPAN
553 | CND,"Kogalniceanu, Romania",Kogalniceanu, ROMANIA
554 | LAH,"Labuha Arpt, Indonesia",Labuha Arpt, INDONESIA
555 | DUR,"Louis Botha, South Africa",Louis Botha, SOUTH AFRICA
556 | MAL,"Mangole Arpt, Indonesia",Mangole Arpt, INDONESIA
557 | MDE,"Medellin, Colombia",Medellin, COLOMBIA
558 | MEX,"Juarez Intl, Mexico City, Mx","Juarez Intl, Mexico City", MX
559 | LHR,"Middlesex, England",Middlesex, ENGLAND
560 | NBO,"Nairobi, Kenya",Nairobi, KENYA
561 | NAS,"Nassau, Bahamas",Nassau, BAHAMAS
562 | NCA,"North Caicos, Turk & Caiman",North Caicos, TURK & CAIMAN
563 | PTY,"Omar Torrijos, Panama",Omar Torrijos, PANAMA
564 | SPV,"Papua, New Guinea",Papua, NEW GUINEA
565 | UIO,"Quito (Mariscal Sucr), Ecuador",Quito (Mariscal Sucr), ECUADOR
566 | RIT,"Rome, Italy",Rome, ITALY
567 | SNO,"Sakon Nakhon #Arpt, Thailand",Sakon Nakhon #Arpt, THAILAND
568 | SLP,"San Luis Potosi #Arpt, Mexico",San Luis Potosi #Arpt, MEXICO
569 | SAN,"San Salvador, El Salvador",San Salvador, EL SALVADOR
570 | SRO,"Santana Ramos #Arpt, Colombia",Santana Ramos #Arpt, COLOMBIA
571 | GRU,"Guarulhos Intl, Sao Paulo, Brazil","Guarulhos Intl, Sao Paulo", BRAZIL
572 | SHA,"Shannon, Ireland",Shannon, IRELAND
573 | HIL,"Shillavo, Ethiopia",Shillavo, ETHIOPIA
574 | TOK,"Torokina #Arpt, Papua, New Guinea","Torokina #Arpt, Papua", NEW GUINEA
575 | VER,"Veracruz, Mexico",Veracruz, MEXICO
576 | LGW,"West Sussex, England",West Sussex, ENGLAND
577 | ZZZ,Mexico Land (Banco De Mexico),Mexico Land (Banco De Mexico),
578 | CHN,No Port Code (Chn),No Port Code (Chn),
579 | CNC,"Cannon Corners, Ny",Cannon Corners, NY
580 | MAA,Abu Dhabi,Abu Dhabi,
581 | AG0,"Magnolia, Ar",Magnolia, AR
582 | BHM,"Bar Harbor, Me",Bar Harbor, ME
583 | BHX,"Birmingham, Al",Birmingham, AL
584 | CAK,"Akron, Oh",Akron, OH
585 | FOK,"Suffolk County, Ny",Suffolk County, NY
586 | LND,"Lander, Wy",Lander, WY
587 | MAR,"Marfa, Tx",Marfa, TX
588 | MLI,"Moline, Il",Moline, IL
589 | RIV,"Riverside, Ca",Riverside, CA
590 | RME,"Rome, Ny",Rome, NY
591 | VNY,"Van Nuys, Ca",Van Nuys, CA
592 | YUM,"Yuma, Az",Yuma, AZ
593 | FRG,Collapsed (Fok) 06/15,Collapsed (Fok) 06/15,
594 | HRL,Collapsed (Hlg) 06/15,Collapsed (Hlg) 06/15,
595 | ISP,Collapsed (Fok) 06/15,Collapsed (Fok) 06/15,
596 | JSJ,Collapsed (Saj) 06/15,Collapsed (Saj) 06/15,
597 | BUS,Collapsed (Buf) 06/15,Collapsed (Buf) 06/15,
598 | IAG,Collapsed (Nia) 06/15,Collapsed (Nia) 06/15,
599 | PHN,Collapsed (Phu) 06/15,Collapsed (Phu) 06/15,
600 | STN,Collapsed (Str) 06/15,Collapsed (Str) 06/15,
601 | VMB,Collapsed (Vnb) 06/15,Collapsed (Vnb) 06/15,
602 | T01,Collapsed (Sea) 06/15,Collapsed (Sea) 06/15,
603 | PHF,No Port Code (Phf),No Port Code (Phf),
604 | DRV,No Port Code (Drv),No Port Code (Drv),
605 | FTB,No Port Code (Ftb),No Port Code (Ftb),
606 | GAC,No Port Code (Gac),No Port Code (Gac),
607 | GMT,No Port Code (Gmt),No Port Code (Gmt),
608 | JFA,No Port Code (Jfa),No Port Code (Jfa),
609 | JMZ,No Port Code (Jmz),No Port Code (Jmz),
610 | NC8,No Port Code (Nc8),No Port Code (Nc8),
611 | NYL,No Port Code (Nyl),No Port Code (Nyl),
612 | OAI,No Port Code (Oai),No Port Code (Oai),
613 | PCW,No Port Code (Pcw),No Port Code (Pcw),
614 | WA5,No Port Code (Was),No Port Code (Was),
615 | WTR,No Port Code (Wtr),No Port Code (Wtr),
616 | X96,No Port Code (X96),No Port Code (X96),
617 | XNA,No Port Code (Xna),No Port Code (Xna),
618 | YGF,No Port Code (Ygf),No Port Code (Ygf),
619 | 5T6,No Port Code (5T6),No Port Code (5T6),
620 | 060,No Port Code (60),No Port Code (60),
621 | SP0,No Port Code (Sp0),No Port Code (Sp0),
622 | W55,No Port Code (W55),No Port Code (W55),
623 | X44,No Port Code (X44),No Port Code (X44),
624 | AUH,No Port Code (Auh),No Port Code (Auh),
625 | RYY,No Port Code (Ryy),No Port Code (Ryy),
626 | SUS,No Port Code (Sus),No Port Code (Sus),
627 | 74S,No Port Code (74S),No Port Code (74S),
628 | ATW,No Port Code (Atw),No Port Code (Atw),
629 | CPX,No Port Code (Cpx),No Port Code (Cpx),
630 | MTH,No Port Code (Mth),No Port Code (Mth),
631 | PFN,No Port Code (Pfn),No Port Code (Pfn),
632 | SCH,No Port Code (Sch),No Port Code (Sch),
633 | ASI,No Port Code (Asi),No Port Code (Asi),
634 | BKF,No Port Code (Bkf),No Port Code (Bkf),
635 | DAY,No Port Code (Day),No Port Code (Day),
636 | Y62,No Port Code (Y62),No Port Code (Y62),
637 | AG,No Port Code (Ag),No Port Code (Ag),
638 | BCM,No Port Code (Bcm),No Port Code (Bcm),
639 | DEC,No Port Code (Dec),No Port Code (Dec),
640 | PLB,No Port Code (Plb),No Port Code (Plb),
641 | CXO,No Port Code (Cxo),No Port Code (Cxo),
642 | JBQ,No Port Code (Jbq),No Port Code (Jbq),
643 | JIG,No Port Code (Jig),No Port Code (Jig),
644 | OGS,No Port Code (Ogs),No Port Code (Ogs),
645 | TIW,No Port Code (Tiw),No Port Code (Tiw),
646 | OTS,No Port Code (Ots),No Port Code (Ots),
647 | AMT,No Port Code (Amt),No Port Code (Amt),
648 | EGE,No Port Code (Ege),No Port Code (Ege),
649 | GPI,No Port Code (Gpi),No Port Code (Gpi),
650 | NGL,No Port Code (Ngl),No Port Code (Ngl),
651 | OLM,No Port Code (Olm),No Port Code (Olm),
652 | .GA,No Port Code (.Ga),No Port Code (.Ga),
653 | CLX,No Port Code (Clx),No Port Code (Clx),
654 | CP,No Port Code (Cp),No Port Code (Cp),
655 | FSC,No Port Code (Fsc),No Port Code (Fsc),
656 | NK,No Port Code (Nk),No Port Code (Nk),
657 | ADU,No Port Code (Adu),No Port Code (Adu),
658 | AKT,No Port Code (Akt),No Port Code (Akt),
659 | LIT,No Port Code (Lit),No Port Code (Lit),
660 | A2A,No Port Code (A2A),No Port Code (A2A),
661 | OSN,No Port Code (Osn),No Port Code (Osn),
662 |
--------------------------------------------------------------------------------
/airflow/dags/temp_output/i94visa.csv:
--------------------------------------------------------------------------------
1 | reason_code,reason_travel
2 | 1,Business
3 | 2,Pleasure
4 | 3,Student
5 |
--------------------------------------------------------------------------------
/airflow/dags/udacity_capstone.py:
--------------------------------------------------------------------------------
1 | # generic
2 | from datetime import datetime, timedelta
3 | import os
4 | import shutil
5 | import logging
6 | # airflow
7 | from airflow import DAG
8 | from airflow.operators.dummy_operator import DummyOperator
9 | from airflow.operators.postgres_operator import PostgresOperator
10 | from airflow.operators.python_operator import PythonOperator
11 | from airflow.operators import (SASToCSVOperator, TransferToS3Operator, SAS7ToParquet, StageToRedshiftOperator, DataQualityOperator)
12 | from subdags.subdag_for_dimensions import load_dimension_subdag
13 | from airflow.models import Variable
14 | from helpers import SqlQueries
15 | from airflow.operators.subdag_operator import SubDagOperator
16 |
17 |
18 | default_args = {
19 | 'owner': 'udacity',
20 | 'start_date': datetime(2019, 8, 22, 7),
21 | 'end_date': datetime(2019, 12, 31, 7),
22 | 'email_on_retry': False,
23 | 'retries': 3,
24 | 'catchup': False,
25 | 'retry_delay': timedelta(minutes=5),
26 | 'depends_on_past': True,
27 | 'wait_for_downstream': True
28 | }
29 |
30 | # dag is complete
31 | dag = DAG('udacity_capstone',
32 | default_args=default_args,
33 | description='Data Engineering Capstone Project',
34 | schedule_interval='@daily'
35 | )
36 |
37 | # dummy for node 0
38 | start_operator = DummyOperator(task_id='Begin_execution', dag=dag)
39 |
40 | # convert sas descriptor to csv
41 | convert_sas_to_csv = SASToCSVOperator(
42 | task_id='sas_to_csv',
43 | dag=dag,
44 | input_path=Variable.get("sas_file"),
45 | output_path=Variable.get("temp_output"),
46 | provide_context=True
47 | )
48 |
49 | # transfer files to csv
50 | transfer_to_s3_csv = TransferToS3Operator(
51 | task_id='transfer_to_s3_csv',
52 | dag=dag,
53 | aws_credentials_id="aws_default",
54 | input_path=Variable.get("temp_output"),
55 | bucket_name=Variable.get("s3_bucket"),
56 | file_ext="csv",
57 | provide_context=True
58 | )
59 |
60 | sas7bdat_to_parquet = SAS7ToParquet (
61 | task_id='sas7bdat_to_parquet',
62 | dag=dag,
63 | input_path=Variable.get("temp_input"),
64 | output_path=Variable.get("spark_path"),
65 | provide_context=True
66 | )
67 |
68 | transfer_to_s3_parquet = TransferToS3Operator(
69 | task_id='transfer_to_s3_parquet',
70 | dag=dag,
71 | aws_credentials_id="aws_default",
72 | input_path=Variable.get("spark_path"),
73 | bucket_name=Variable.get("s3_bucket"),
74 | file_ext="parquet",
75 | provide_context=True
76 | )
77 |
78 | task_create_schema = PostgresOperator(
79 | task_id="create_schema",
80 | postgres_conn_id="redshift",
81 | sql=SqlQueries.create_schema,
82 | dag=dag
83 | )
84 |
85 | task_drop_table = PostgresOperator(
86 | task_id="drop_table",
87 | postgres_conn_id="redshift",
88 | sql=SqlQueries.drop_tables,
89 | dag=dag
90 | )
91 |
92 | task_create_table = PostgresOperator(
93 | task_id="create_table",
94 | postgres_conn_id="redshift",
95 | sql=SqlQueries.create_tables,
96 | dag=dag
97 | )
98 |
99 | load_dimension_subdag_task = SubDagOperator(
100 | subdag=load_dimension_subdag(
101 | parent_dag_name="udacity_capstone",
102 | task_id="load_dimensions",
103 | redshift_conn_id="redshift",
104 | start_date=datetime(2018, 1, 1)
105 | ),
106 | task_id="load_dimensions",
107 | dag=dag
108 | )
109 |
110 | # run quality check
111 | run_quality_checks = DataQualityOperator(
112 | task_id='Run_data_quality_checks',
113 | dag=dag,
114 | redshift_conn_id="redshift",
115 | sql_stmt=SqlQueries.count_check,
116 | tables=SqlQueries.tables
117 | )
118 |
119 |
120 | def cleaning(**kwargs):
121 | folder = Variable.get("spark_path")
122 | for the_file in os.listdir(folder):
123 | file_path = os.path.join(folder, the_file)
124 | try:
125 | if os.path.isfile(file_path):
126 | os.unlink(file_path)
127 | elif os.path.isdir(file_path):
128 | shutil.rmtree(file_path)
129 | except Exception as e:
130 | logging.info(e)
131 |
132 |
133 | clean_temp_files = PythonOperator(
134 | task_id='clean_temp_files',
135 | python_callable=cleaning,
136 | provide_context=True,
137 | dag=dag
138 | )
139 |
140 | # grant_access = """
141 | # create group webappusers;
142 | # create user webappuser1 password 'webAppuser1pass' in group webappusers;
143 | # grant usage on schema project to group webappusers;
144 | # """
145 | # grant_access_to_users = PostgresOperator(
146 | # task_id="grant_access",
147 | # postgres_conn_id="redshift",
148 | # sql=grant_access,
149 | # dag=dag
150 | # )
151 |
152 | # dummy for node end
153 | end_operator = DummyOperator(task_id='Stop_execution', dag=dag)
154 |
155 | # order
156 | start_operator >> convert_sas_to_csv >> transfer_to_s3_csv >> task_create_schema
157 | start_operator >> sas7bdat_to_parquet >> transfer_to_s3_parquet >> task_create_schema
158 | task_create_schema >> task_drop_table >> task_create_table >> load_dimension_subdag_task >> run_quality_checks >> clean_temp_files >> end_operator
159 |
--------------------------------------------------------------------------------
/airflow/plugins/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import division, absolute_import, print_function
2 | from airflow.plugins_manager import AirflowPlugin
3 | import operators
4 | import helpers
5 |
6 |
7 | # Defining the plugin class
8 | class UdacityPlugin(AirflowPlugin):
9 | name = "udacity_plugin"
10 | operators = [
11 | operators.SASToCSVOperator,
12 | operators.TransferToS3Operator,
13 | operators.SAS7ToParquet,
14 | operators.StageToRedshiftOperator,
15 | operators.DataQualityOperator
16 | ]
17 | helpers = [
18 | helpers.SqlQueries
19 | ]
--------------------------------------------------------------------------------
/airflow/plugins/helpers/__init__.py:
--------------------------------------------------------------------------------
1 | from helpers.sql_queries import SqlQueries
2 |
3 | __all__ = [
4 | 'SqlQueries'
5 | ]
6 |
--------------------------------------------------------------------------------
/airflow/plugins/helpers/sql_queries.py:
--------------------------------------------------------------------------------
1 | class SqlQueries:
2 | """
3 | all create queries
4 | """
5 |
6 | immigration = """
7 | CREATE TABLE IF NOT EXISTS project.immigration (
8 | cicid FLOAT,
9 | i94yr FLOAT,
10 | i94mon FLOAT,
11 | i94cit FLOAT,
12 | i94res FLOAT,
13 | i94port VARCHAR,
14 | arrdate FLOAT,
15 | i94mode FLOAT,
16 | i94addr varchar,
17 | depdate FLOAT,
18 | i94bir FLOAT,
19 | i94visa FLOAT,
20 | count FLOAT,
21 | dtadfile VARCHAR,
22 | visapost VARCHAR,
23 | occup VARCHAR,
24 | entdepa VARCHAR,
25 | entdepd VARCHAR,
26 | entdepu VARCHAR,
27 | matflag VARCHAR,
28 | biryear FLOAT,
29 | dtaddto VARCHAR,
30 | gender VARCHAR,
31 | insnum VARCHAR,
32 | airline VARCHAR,
33 | admnum FLOAT,
34 | fltno VARCHAR,
35 | visatype VARCHAR
36 | );
37 | """
38 |
39 | airports = """
40 | CREATE TABLE IF NOT EXISTS project.airport_codes (
41 | id INT,
42 | ident VARCHAR,
43 | type VARCHAR,
44 | name VARCHAR,
45 | latitude_deg FLOAT,
46 | longitude_deg FLOAT,
47 | elevation_ft FLOAT,
48 | continent VARCHAR,
49 | iso_country VARCHAR,
50 | iso_region VARCHAR,
51 | municipality VARCHAR,
52 | scheduled_service VARCHAR,
53 | gps_code VARCHAR,
54 | iata_code VARCHAR,
55 | local_code VARCHAR,
56 | home_link VARCHAR,
57 | wikipedia_link VARCHAR,
58 | keywords VARCHAR
59 | );
60 | """
61 |
62 | i94ports = """
63 | CREATE TABLE IF NOT EXISTS project.i94ports (
64 | port_code VARCHAR,
65 | port_of_entry VARCHAR,
66 | port_city VARCHAR,
67 | port_state_or_country VARCHAR
68 | );
69 | """
70 |
71 | i94visa = """
72 | CREATE TABLE IF NOT EXISTS project.i94visa (
73 | visa_code INT,
74 | visa_reason VARCHAR
75 | );
76 | """
77 |
78 | i94mode = """
79 | CREATE TABLE IF NOT EXISTS project.i94mode (
80 | trans_code INT,
81 | trans_name VARCHAR
82 | );
83 | """
84 |
85 | i94addr = """
86 | CREATE TABLE IF NOT EXISTS project.i94addr (
87 | state_code VARCHAR,
88 | state_name VARCHAR
89 | );
90 | """
91 |
92 | i94res = """
93 | CREATE TABLE IF NOT EXISTS project.i94res (
94 | country_code INT,
95 | country_name VARCHAR
96 | );
97 | """
98 |
99 | us_cities_demographics = """
100 | CREATE TABLE IF NOT EXISTS project.us_cities_demographics (
101 | city VARCHAR,
102 | state VARCHAR,
103 | median_age FLOAT,
104 | male_population FLOAT,
105 | female_population FLOAT,
106 | total_population FLOAT,
107 | number_of_veterans FLOAT,
108 | foreign_born FLOAT,
109 | average_household_size FLOAT,
110 | state_code VARCHAR,
111 | race VARCHAR,
112 | count INT
113 | );
114 | """
115 |
116 | drop_tables = """
117 | DROP TABLE IF EXISTS project.immigration;
118 | DROP TABLE IF EXISTS project.airport_codes;
119 | DROP TABLE IF EXISTS project.i94port;
120 | DROP TABLE IF EXISTS project.i94visa;
121 | DROP TABLE IF EXISTS project.i94mode;
122 | DROP TABLE IF EXISTS project.i94addr;
123 | DROP TABLE IF EXISTS project.i94res;
124 | DROP TABLE IF EXISTS project.us_cities_demographics;
125 | """
126 |
127 | create_tables = immigration + airports + i94ports + i94visa + i94mode + i94addr + i94res + us_cities_demographics
128 | tables = ["immigration", "airport_codes", "i94ports", "i94visa",
129 | "i94mode", "i94addr", "i94res", "us_cities_demographics"]
130 |
131 | copy_csv_cmd = """
132 | COPY project.{} FROM '{}'
133 | CREDENTIALS 'aws_access_key_id={};aws_secret_access_key={}'
134 | IGNOREHEADER 1
135 | DELIMITER '{}'
136 | COMPUPDATE OFF
137 | TRUNCATECOLUMNS
138 | CSV;
139 | """
140 | create_schema = """
141 | CREATE schema IF NOT EXISTS project;
142 | """
143 | count_check = """SELECT CASE WHEN COUNT(*) > 1 THEN 1 ELSE 0 END AS non_empty FROM project.{}"""
144 |
145 | copy_parquet_cmd = """
146 | COPY project.{} FROM '{}'
147 | IAM_ROLE '{}'
148 | FORMAT AS PARQUET;
149 | """
--------------------------------------------------------------------------------
/airflow/plugins/operators/__init__.py:
--------------------------------------------------------------------------------
1 | from operators.sas_to_csv import SASToCSVOperator
2 | from operators.transfer_to_s3 import TransferToS3Operator
3 | from operators.sas7bdat_to_parquet import SAS7ToParquet
4 | from operators.stage_redshift import StageToRedshiftOperator
5 | from operators.data_quality import DataQualityOperator
6 |
7 | __all__ = [
8 | 'SASToCSVOperator',
9 | 'TransferToS3Operator',
10 | 'SAS7ToParquet',
11 | 'StageToRedshiftOperator',
12 | 'DataQualityOperator'
13 | ]
--------------------------------------------------------------------------------
/airflow/plugins/operators/data_quality.py:
--------------------------------------------------------------------------------
1 | from airflow.hooks.postgres_hook import PostgresHook
2 | from airflow.models import BaseOperator
3 | from airflow.utils.decorators import apply_defaults
4 |
5 |
6 | class DataQualityOperator(BaseOperator):
7 |
8 | ui_color = '#89DA59'
9 |
10 | @apply_defaults
11 | def __init__(self,
12 | redshift_conn_id,
13 | sql_stmt,
14 | tables,
15 | *args, **kwargs):
16 |
17 | super(DataQualityOperator, self).__init__(*args, **kwargs)
18 | self.redshift_conn_id = redshift_conn_id
19 | self.sql_stmt = sql_stmt
20 | self.tables = tables
21 |
22 | def execute(self, context):
23 | self.log.info(f""" Checking ETL result quality """)
24 | redshift = PostgresHook(self.redshift_conn_id)
25 | for cur_table in self.tables:
26 | try:
27 | if redshift.run(self.sql_stmt.format(cur_table)) == 1:
28 | self.log.info(f""" Quality test passed for {cur_table} """)
29 | except Exception:
30 | raise ValueError(f""" Quality check for {cur_table} """)
--------------------------------------------------------------------------------
/airflow/plugins/operators/sas7bdat_to_parquet.py:
--------------------------------------------------------------------------------
1 | from pyspark.sql import SparkSession
2 | from os import listdir
3 | from os.path import isfile, join
4 | from pyspark.sql.types import *
5 | from airflow.models import BaseOperator
6 | from airflow.utils.decorators import apply_defaults
7 | import logging
8 | import shutil
9 | import os
10 |
11 |
12 | class SAS7ToParquet(BaseOperator):
13 |
14 | ui_color = '#87CEFA'
15 |
16 | @apply_defaults
17 | def __init__(self,
18 | input_path,
19 | output_path,
20 | *args, **kwargs):
21 |
22 | super(SAS7ToParquet, self).__init__(*args, **kwargs)
23 | self.input_path = input_path
24 | self.output_path = output_path
25 |
26 | def execute(self, context):
27 | logging.info("Creating spark session ...")
28 | spark = SparkSession.builder \
29 | .config("spark.jars.packages",
30 | "saurfang:spark-sas7bdat:2.0.0-s_2.11") \
31 | .enableHiveSupport() \
32 | .getOrCreate()
33 |
34 | # spark context
35 | sc = spark.sparkContext
36 |
37 | # column names
38 | logging.info('Defining column names and resulting schema ... ')
39 | columns = ['cicid',
40 | 'i94yr',
41 | 'i94mon',
42 | 'i94cit',
43 | 'i94res',
44 | 'i94port',
45 | 'arrdate',
46 | 'i94mode',
47 | 'i94addr',
48 | 'depdate',
49 | 'i94bir',
50 | 'i94visa',
51 | 'count',
52 | 'dtadfile',
53 | 'visapost',
54 | 'occup',
55 | 'entdepa',
56 | 'entdepd',
57 | 'entdepu',
58 | 'matflag',
59 | 'biryear',
60 | 'dtaddto',
61 | 'gender',
62 | 'insnum',
63 | 'airline',
64 | 'admnum',
65 | 'fltno',
66 | 'visatype']
67 |
68 | # schema definition
69 | schema = StructType([
70 | StructField('cicid', DoubleType(), True),
71 | StructField('i94yr', DoubleType(), True),
72 | StructField('i94mon', DoubleType(), True),
73 | StructField('i94cit', DoubleType(), True),
74 | StructField('i94res', DoubleType(), True),
75 | StructField('i94port', StringType(), True),
76 | StructField('arrdate', DoubleType(), True),
77 | StructField('i94mode', DoubleType(), True),
78 | StructField('i94addr', StringType(), True),
79 | StructField('depdate', DoubleType(), True),
80 | StructField('i94bir', DoubleType(), True),
81 | StructField('i94visa', DoubleType(), True),
82 | StructField('count', DoubleType(), True),
83 | StructField('dtadfile', StringType(), True),
84 | StructField('visapost', StringType(), True),
85 | StructField('occup', StringType(), True),
86 | StructField('entdepa', StringType(), True),
87 | StructField('entdepd', StringType(), True),
88 | StructField('entdepu', StringType(), True),
89 | StructField('matflag', StringType(), True),
90 | StructField('biryear', DoubleType(), True),
91 | StructField('dtaddto', StringType(), True),
92 | StructField('gender', StringType(), True),
93 | StructField('insnum', StringType(), True),
94 | StructField('airline', StringType(), True),
95 | StructField('admnum', DoubleType(), True),
96 | StructField('fltno', StringType(), True),
97 | StructField('visatype', StringType(), True)
98 | ])
99 |
100 | df_all = spark.createDataFrame(sc.emptyRDD(), schema)
101 |
102 | logging.info('Reading sas7bdat files from disc ... ')
103 | onlyfiles = [join(self.input_path, f) for f in
104 | listdir(self.input_path) if
105 | isfile(join(self.input_path, f))]
106 |
107 | for f in onlyfiles:
108 | file_name, file_extension = os.path.splitext(f)
109 | if file_extension == '.' + 'sas7bdat':
110 | df_temp = spark.read.format(
111 | 'com.github.saurfang.sas.spark').load(f)\
112 | .select(columns)
113 | df_all = df_all.union(df_temp)
114 |
115 | logging.info('Writing parquet to disc ... ')
116 | if os.path.exists(self.output_path):
117 | shutil.rmtree(self.output_path)
118 |
119 | df_temp = df_all.filter(df_all.i94addr.isNotNull())\
120 | .filter(df_all.i94res.isNotNull())
121 | df_temp.write.parquet(self.output_path)
--------------------------------------------------------------------------------
/airflow/plugins/operators/sas_to_csv.py:
--------------------------------------------------------------------------------
1 | from airflow.models import BaseOperator
2 | from airflow.utils.decorators import apply_defaults
3 | import logging
4 | import pandas as pd
5 | import re
6 | import os
7 |
8 |
9 | class SASToCSVOperator(BaseOperator):
10 |
11 | ui_color = '#FFC0CB'
12 |
13 | @apply_defaults
14 | def __init__(self,
15 | input_path,
16 | output_path,
17 | *args, **kwargs):
18 |
19 | super(SASToCSVOperator, self).__init__(*args, **kwargs)
20 | self.input_path = input_path
21 | self.output_path = output_path
22 |
23 | def execute(self, context):
24 | with open(self.input_path, "r", encoding='utf-8') as main_file:
25 | file = main_file.read()
26 |
27 | sas_label_ext = {}
28 | temp_data = []
29 | attr_name = ''
30 |
31 | logging.info("reading file ...")
32 | for line in file.split("\n"):
33 | line = re.sub(r"\s+|\t+|\r+", " ", line)
34 |
35 | if "/*" in line and "-" in line:
36 | attr_name, attr_desc = [item.strip(" ") for item in
37 | line.split("*")[1].split(
38 | "-",
39 | 1)]
40 | attr_name = attr_name.replace(' & ', '&').lower()
41 | if attr_name != '':
42 | sas_label_ext[attr_name] = {'desc': attr_desc}
43 | elif '=' in line:
44 | temp_data.append(
45 | [item.strip(';').strip(" ").replace(
46 | '\'', '').lstrip().rstrip().title() for item
47 | in
48 | line.split('=')])
49 | elif len(temp_data) > 0:
50 | if attr_name != '':
51 | sas_label_ext[attr_name]['data'] = temp_data
52 | temp_data = []
53 |
54 | # country
55 | logging.info("preparing country codes ...")
56 | sas_label_ext['i94cit&i94res']['df'] = pd.DataFrame(
57 | sas_label_ext['i94cit&i94res']['data'],
58 | columns=['country_code', 'country_name'])
59 |
60 | # port
61 | logging.info("preparing port codes ...")
62 | tempdf = pd.DataFrame(sas_label_ext['i94port']['data'],
63 | columns=['port_code', 'port_name'])
64 | tempdf['port_code'] = tempdf['port_code'].str.upper()
65 | tempdf[['port_city', 'port_state']] = tempdf[
66 | 'port_name'].str.rsplit(',', 1, expand=True)
67 | tempdf['port_state'] = tempdf['port_state'].str.upper()
68 | sas_label_ext['i94port']['df'] = tempdf
69 |
70 | # mode
71 | logging.info("preparing transport modes ...")
72 | sas_label_ext['i94mode']['df'] = pd.DataFrame(
73 | sas_label_ext['i94mode']['data'],
74 | columns=['trans_code', 'trans_name'])
75 | tempdf = pd.DataFrame(sas_label_ext['i94addr']['data'],
76 | columns=['state_code', 'state_name'])
77 | tempdf['state_code'] = tempdf['state_code'].str.upper()
78 |
79 | # address
80 | logging.info("preparing state codes ...")
81 | sas_label_ext['i94addr']['df'] = tempdf
82 |
83 | # visa
84 | logging.info("preparing visa codes ...")
85 | sas_label_ext['i94visa']['df'] = pd.DataFrame(
86 | sas_label_ext['i94visa']['data'],
87 | columns=['reason_code', 'reason_travel'])
88 |
89 | # write to csv
90 | logging.info("writing to csv files ...")
91 | for table in sas_label_ext.keys():
92 | if 'df' in sas_label_ext[table].keys():
93 | with open(os.path.join(self.output_path, table +
94 | ".csv"),
95 | "w") as output_file:
96 | sas_label_ext[table]['df'].to_csv(output_file,
97 | index=False)
98 |
--------------------------------------------------------------------------------
/airflow/plugins/operators/stage_redshift.py:
--------------------------------------------------------------------------------
1 | from airflow.hooks.postgres_hook import PostgresHook
2 | from airflow.models import BaseOperator
3 | from airflow.utils.decorators import apply_defaults
4 | from airflow.contrib.hooks.aws_hook import AwsHook
5 |
6 |
7 | class StageToRedshiftOperator(BaseOperator):
8 | @apply_defaults
9 | def __init__(self,
10 | redshift_conn_id,
11 | aws_credentials_id,
12 | file,
13 | delimiter,
14 | table,
15 | s3_bucket,
16 | s3_key,
17 | sql_stmt,
18 | *args, **kwargs):
19 |
20 | super(StageToRedshiftOperator, self).__init__(*args, **kwargs)
21 | self.file = file
22 | self.delimiter = delimiter
23 | self.table = table
24 | self.redshift_conn_id = redshift_conn_id
25 | self.s3_bucket = s3_bucket
26 | self.s3_key = s3_key
27 | self.aws_credentials_id = aws_credentials_id
28 | self.sql_stmt = sql_stmt
29 |
30 | def execute(self, context):
31 | aws_hook = AwsHook(self.aws_credentials_id)
32 | credentials = aws_hook.get_credentials()
33 | redshift = PostgresHook(postgres_conn_id=self.redshift_conn_id)
34 | self.log.info("Copying data from S3 to Redshift")
35 | rendered_key = self.s3_key.format(**context)
36 | s3_path = "s3://{}/{}/{}".format(self.s3_bucket, rendered_key,
37 | self.file)
38 | formatted_sql = self.sql_stmt.format(
39 | self.table,
40 | s3_path,
41 | credentials.access_key,
42 | credentials.secret_key,
43 | self.delimiter
44 | )
45 | redshift.run(formatted_sql)
46 |
--------------------------------------------------------------------------------
/airflow/plugins/operators/transfer_to_s3.py:
--------------------------------------------------------------------------------
1 | from airflow.models import BaseOperator
2 | from airflow.utils.decorators import apply_defaults
3 | import logging
4 | import os
5 | from boto3.s3.transfer import S3Transfer
6 | import boto3
7 | from airflow.contrib.hooks.aws_hook import AwsHook
8 |
9 |
10 | class TransferToS3Operator(BaseOperator):
11 |
12 | ui_color = '#89DA59'
13 |
14 | @apply_defaults
15 | def __init__(self,
16 | aws_credentials_id,
17 | input_path,
18 | bucket_name,
19 | file_ext,
20 | *args, **kwargs):
21 |
22 | super(TransferToS3Operator, self).__init__(*args, **kwargs)
23 | self.aws_credentials_id = aws_credentials_id
24 | self.input_path = input_path
25 | self.bucket_name = bucket_name
26 | self.file_ext = file_ext
27 |
28 | def execute(self, context):
29 | logging.info('Reading AWS Credentials ... ')
30 | aws_hook = AwsHook(self.aws_credentials_id)
31 | credentials = aws_hook.get_credentials()
32 | client = boto3.client(
33 | 's3',
34 | aws_access_key_id=credentials.access_key,
35 | aws_secret_access_key=credentials.secret_key)
36 | transfer = S3Transfer(client)
37 | logging.info('Copying Files ... ')
38 | for subdir, dirs, files in os.walk(self.input_path):
39 | for file in files:
40 | file_name, file_extension = os.path.splitext(file)
41 | full_path = os.path.join(subdir, file)
42 | if file_extension == '.' + self.file_ext:
43 | logging.info(
44 | "transferring file {}".format(file_name))
45 | transfer.upload_file(full_path, self.bucket_name,
46 | self.file_ext
47 | + '/' + file)
48 | logging.info('Successfully finished copying all the files ... ')
49 |
--------------------------------------------------------------------------------
/airflow_start.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | #!/bin/bash
3 |
4 | # Start airflow
5 | airflow scheduler --daemon
6 | airflow webserver --daemon -p 3000
7 |
8 | # Wait till airflow web-server is ready
9 | echo "Waiting for Airflow web server..."
10 | while true; do
11 | _RUNNING=$(ps aux | grep airflow-webserver | grep ready | wc -l)
12 | if [ $_RUNNING -eq 0 ]; then
13 | sleep 1
14 | else
15 | echo "Airflow web server is ready"
16 | break;
17 | fi
18 | done
--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
1 | import configparser
2 |
3 | config = configparser.ConfigParser()
4 | config.read_file(open('dwh.cfg'))
5 |
6 | # amazon aws
7 | KEY = config.get('AWS', 'key')
8 | SECRET = config.get('AWS', 'secret')
9 |
10 | # Redshift
11 | DWH_CLUSTER_TYPE = config.get('DWH', 'DWH_CLUSTER_TYPE')
12 | DWH_NUM_NODES = config.get('DWH', 'DWH_NUM_NODES')
13 | DWH_NODE_TYPE = config.get('DWH', 'DWH_NODE_TYPE')
14 |
15 | DWH_IAM_ROLE_NAME = config.get('DWH', 'DWH_IAM_ROLE_NAME')
16 | DWH_CLUSTER_IDENTIFIER = config.get('DWH', 'DWH_CLUSTER_IDENTIFIER')
17 | DWH_DB = config.get('DWH', 'DWH_DB')
18 | DWH_DB_USER = config.get('DWH', 'DWH_DB_USER')
19 | DWH_DB_PASSWORD = config.get('DWH', 'DWH_DB_PASSWORD')
20 | DWH_PORT = config.get('DWH', 'DWH_PORT')
21 | DWH_SCHEMA = config.get('DWH', 'DWH_SCHEMA')
22 | DWH_REGION = config.get('DWH', 'DWH_REGION')
23 |
24 | # S3
25 | LOG_JSON_FORMAT = config.get('S3', 'LOG_JSON_FORMAT')
26 | S3_BUCKET_LOG_JSON_PATH = config.get('S3', 'S3_BUCKET_LOG_JSON_PATH')
27 | S3_BUCKET_SONG_JSON_PATH = config.get('S3', 'S3_BUCKET_SONG_JSON_PATH')
--------------------------------------------------------------------------------
/cryptosetup.py:
--------------------------------------------------------------------------------
1 | # secure airflow connections
2 | from cryptography.fernet import Fernet
3 |
4 | # generate key
5 | fernet_key = Fernet.generate_key()
6 | print(fernet_key.decode())
7 |
--------------------------------------------------------------------------------
/dwh.cfg:
--------------------------------------------------------------------------------
1 | [AWS]
2 | KEY=
3 | SECRET=
4 |
5 | [DWH]
6 | DWH_CLUSTER_TYPE=multi-node
7 | DWH_NUM_NODES=4
8 | DWH_NODE_TYPE=dc2.large
9 |
10 | DWH_IAM_ROLE_NAME=
11 | DWH_CLUSTER_IDENTIFIER=
12 | DWH_DB=udacity
13 | DWH_DB_USER=
14 | DWH_DB_PASSWORD=
15 | DWH_PORT=5439
16 | DWH_SCHEMA=public
17 | DWH_REGION=us-west-2
18 |
19 | [S3]
20 | LOG_JSON_FORMAT=
21 | S3_BUCKET_LOG_JSON_PATH=
22 | S3_BUCKET_SONG_JSON_PATH=
23 |
24 | [Spark]
25 | SPARK_SUBMIT=/opt/spark-2.4.3-bin-hadoop2.7/bin/spark-submit
--------------------------------------------------------------------------------
/env.yml:
--------------------------------------------------------------------------------
1 | name: CapstoneProject
2 | channels:
3 | - anaconda
4 | - conda-forge
5 | - defaults
6 | dependencies:
7 | - jupyter==1.0.0=py36_0
8 | - psycopg2==2.7.4=py36_0
9 | - geopandas
10 | - pip:
11 | - apache-airflow
12 | - matplotlib
13 | - pandas
14 | - descartes
15 | - shutil
16 | - ipython-sql
17 | - pandas-redshift
18 | - pyspark
19 | - boto3
20 | - zappa
21 | - smart_open
22 |
--------------------------------------------------------------------------------
/img/city_intake.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/supratim94336/DataEngineeringCapstoneProject/57328be863bc8ea7aad6575767d8d474446b2915/img/city_intake.png
--------------------------------------------------------------------------------
/img/diff_airports.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/supratim94336/DataEngineeringCapstoneProject/57328be863bc8ea7aad6575767d8d474446b2915/img/diff_airports.png
--------------------------------------------------------------------------------
/img/graph.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/supratim94336/DataEngineeringCapstoneProject/57328be863bc8ea7aad6575767d8d474446b2915/img/graph.png
--------------------------------------------------------------------------------
/img/marker.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/supratim94336/DataEngineeringCapstoneProject/57328be863bc8ea7aad6575767d8d474446b2915/img/marker.png
--------------------------------------------------------------------------------
/img/no_of_immigrants.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/supratim94336/DataEngineeringCapstoneProject/57328be863bc8ea7aad6575767d8d474446b2915/img/no_of_immigrants.png
--------------------------------------------------------------------------------
/img/pipeline-tree.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/supratim94336/DataEngineeringCapstoneProject/57328be863bc8ea7aad6575767d8d474446b2915/img/pipeline-tree.png
--------------------------------------------------------------------------------
/img/pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/supratim94336/DataEngineeringCapstoneProject/57328be863bc8ea7aad6575767d8d474446b2915/img/pipeline.png
--------------------------------------------------------------------------------
/img/schema.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/supratim94336/DataEngineeringCapstoneProject/57328be863bc8ea7aad6575767d8d474446b2915/img/schema.PNG
--------------------------------------------------------------------------------
/img/state_airports.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/supratim94336/DataEngineeringCapstoneProject/57328be863bc8ea7aad6575767d8d474446b2915/img/state_airports.png
--------------------------------------------------------------------------------
/load/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/supratim94336/DataEngineeringCapstoneProject/57328be863bc8ea7aad6575767d8d474446b2915/load/__init__.py
--------------------------------------------------------------------------------
/load/aws_load.py:
--------------------------------------------------------------------------------
1 | import os
2 | from boto3.s3.transfer import S3Transfer
3 | import boto3
4 | import logging
5 |
6 |
7 | class AWSLoad:
8 |
9 | def __init__(self, access_key, secret_key):
10 | self.access_key = access_key
11 | self.secret_key = secret_key
12 |
13 | def transfer_files_to_s3(self, input_path, bucket_name, file_ext):
14 | """
15 | This function transfers data from local file system to remote S3
16 | storage
17 | :param input_path:
18 | :param bucket_name:
19 | :param file_ext:
20 | :return:
21 | """
22 | client = boto3.client('s3', aws_access_key_id=self.access_key,
23 | aws_secret_access_key=self.secret_key)
24 | transfer = S3Transfer(client)
25 | for subdir, dirs, files in os.walk(input_path):
26 | for file in files:
27 | file_name, file_extension = os.path.splitext(file)
28 | full_path = os.path.join(subdir, file)
29 | if file_extension == '.' + file_ext:
30 | logging.info("transferring file {}".format(file_name))
31 | transfer.upload_file(full_path, bucket_name, file_ext
32 | + '/' + file)
33 |
--------------------------------------------------------------------------------
/load/aws_utils.py:
--------------------------------------------------------------------------------
1 | import boto3
2 | from config import *
3 | import json
4 | from botocore.exceptions import ClientError
5 | import sys
6 | import time
7 | import logging
8 |
9 |
10 | class AWSUtils:
11 |
12 | def __init__(self, access_key, secret_key):
13 | self.access_key = access_key
14 | self.secret_key = secret_key
15 |
16 | @staticmethod
17 | def animate():
18 | chars = r"|/—\|"
19 | for char in chars:
20 | sys.stdout.write('\r' + 'Please Wait ...' + char)
21 | time.sleep(.1)
22 | sys.stdout.flush()
23 |
24 | def create_iam_role(self, iam_role):
25 | """
26 | This function creates an iam role based on your config
27 | :return:
28 | """
29 | iam = boto3.client('iam',
30 | aws_access_key_id=self.access_key,
31 | aws_secret_access_key=self.secret_key,
32 | region_name='us-west-2'
33 | )
34 | logging.info("1.1 creating role")
35 | try:
36 | iam.create_role(
37 | Path='/',
38 | RoleName=iam_role,
39 | Description="Allows Redshift to call AWS Services.",
40 | AssumeRolePolicyDocument=json.dumps(
41 | {'Statement': [{'Action': 'sts:AssumeRole',
42 | 'Effect': 'Allow',
43 | 'Principal':
44 | {'Service': 'redshift.amazonaws.com'}
45 | }], 'Version': '2012-10-17'}))
46 |
47 | except ClientError as e:
48 | logging.info(f'ERROR: {e}')
49 |
50 | logging.info("1.2 Attaching Policy")
51 | try:
52 | iam.attach_role_policy(
53 | RoleName=iam_role,
54 | PolicyArn="arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess"
55 | )['ResponseMetadata']['HTTPStatusCode']
56 | except ClientError as e:
57 | logging.info(f'ERROR: {e}')
58 |
59 | logging.info("1.3 Get the IAM role ARN")
60 | role_arn = iam.get_role(RoleName=iam_role)['Role']['Arn']
61 | return role_arn
62 |
63 | def create_redshift_cluster(self, role_arn):
64 | """
65 | This function creates a cluster on your behalf
66 | :param role_arn:
67 | :return:
68 | """
69 | logging.info("1.1 Client is created ...")
70 | redshift = boto3.client('redshift',
71 | region_name="us-west-2",
72 | aws_access_key_id=self.access_key,
73 | aws_secret_access_key=self.secret_key
74 | )
75 | try:
76 | logging.info("1.2 Cluster config is being created ...")
77 | redshift.create_cluster(
78 | # HW
79 | ClusterType=DWH_CLUSTER_TYPE,
80 | NodeType=DWH_NODE_TYPE,
81 | NumberOfNodes=int(DWH_NUM_NODES),
82 |
83 | # Identifiers & Credentials
84 | DBName=DWH_DB,
85 | ClusterIdentifier=DWH_CLUSTER_IDENTIFIER,
86 | MasterUsername=DWH_DB_USER,
87 | MasterUserPassword=DWH_DB_PASSWORD,
88 |
89 | # Roles (for s3 access)
90 | IamRoles=[role_arn])
91 | except ClientError as e:
92 | logging.info(f'ERROR: {e}')
93 |
94 | logging.info("1.3 Cluster is being created ...")
95 | while redshift.describe_clusters(
96 | ClusterIdentifier=DWH_CLUSTER_IDENTIFIER)\
97 | ['Clusters'][0]['ClusterStatus'] != 'available':
98 | AWSUtils.animate()
99 |
100 | logging.info("\r1.4 Cluster is created successfully ...")
101 | return redshift.describe_clusters(
102 | ClusterIdentifier=DWH_CLUSTER_IDENTIFIER)['Clusters'][0]
103 | ['Endpoint']['Address']
104 |
105 | def delete_redshift_cluster(self):
106 | """
107 | This function deletes a cluster
108 | :return:
109 | """
110 | logging.info("1.1 Client is created ...")
111 | redshift = boto3.client('redshift',
112 | region_name="us-west-2",
113 | aws_access_key_id=self.access_key,
114 | aws_secret_access_key=self.secret_key
115 | )
116 | logging.info("1.2 Cluster is identified ...")
117 | try:
118 | redshift.delete_cluster(
119 | ClusterIdentifier=DWH_CLUSTER_IDENTIFIER,
120 | SkipFinalClusterSnapshot=True)
121 | except ClientError as e:
122 | logging.info(f'ERROR: {e}')
123 |
124 | try:
125 | logging.info("1.3 Cluster is being deleted ...")
126 | while redshift.describe_clusters(
127 | ClusterIdentifier=DWH_CLUSTER_IDENTIFIER)\
128 | ['Clusters'][0]['ClusterStatus'] == 'deleting':
129 | AWSUtils.animate()
130 | except:
131 | logging.info("\r1.4 Cluster is deleted successfully ...")
132 | return None
133 |
134 | def list_bucket(self, bucket_name, prefix):
135 | """
136 | This function lists files in a bucket
137 | :param bucket_name:
138 | :param prefix:
139 | :return: files
140 | """
141 | files = []
142 | s3 = boto3.resource('s3',
143 | region_name="us-west-2",
144 | aws_access_key_id=self.access_key,
145 | aws_secret_access_key=self.secret_key
146 | )
147 | bucket = s3.Bucket(bucket_name)
148 | for obj in bucket.objects.filter(Prefix=prefix):
149 | files.append(obj)
150 | return files
151 |
152 | def detach_iam_role(self, iam_role):
153 | iam = boto3.client('iam',
154 | aws_access_key_id=self.access_key,
155 | aws_secret_access_key=self.secret_key,
156 | region_name='us-west-2'
157 | )
158 | iam.detach_role_policy(RoleName=iam_role,
159 | PolicyArn="arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess")
160 | iam.delete_role(RoleName=iam_role)
161 |
--------------------------------------------------------------------------------
/load/example_usage.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | from load.aws_utils import AWSUtils
3 | import configparser
4 |
5 |
6 | def main():
7 | parser = argparse.ArgumentParser(description='start/stop')
8 | parser.add_argument('--action', type=str, help='type an action')
9 | args = parser.parse_args()
10 | action = args.action
11 | config = configparser.ConfigParser()
12 | config.read_file(open('dwh.cfg'))
13 | access_key = config.get('AWS', 'key')
14 | secret_key = config.get('AWS', 'secret')
15 | iam_role_name = config.get('DWH', 'DWH_IAM_ROLE_NAME')
16 | aws_utils_instance = AWSUtils(access_key, secret_key)
17 |
18 | if action == "start":
19 | role_arn = aws_utils_instance.create_iam_role(iam_role_name)
20 | dwh_endpoint = aws_utils_instance.create_redshift_cluster(role_arn)
21 | print('Please copy these values for future reference')
22 | print('DWH_ROLE_ARN={}'.format(role_arn))
23 | print('dwh_endpoint={}'.format(dwh_endpoint))
24 |
25 | elif action == "stop":
26 | print('destroying the cluster')
27 | aws_utils_instance.delete_redshift_cluster()
28 | print('detaching role')
29 | aws_utils_instance.detach_iam_role(iam_role_name)
30 |
31 |
32 | if __name__ == "__main__":
33 | main()
34 |
--------------------------------------------------------------------------------