├── .gitignore
├── LICENSE
├── README.md
├── _config.yml
├── code
└── apps
│ ├── api
│ ├── Dockerfile
│ ├── app
│ │ ├── config
│ │ │ └── config.cfg
│ │ ├── db_engine.py
│ │ ├── main.py
│ │ └── sql_queries.py
│ └── requirements.txt
│ ├── docker-compose.yaml
│ └── ingestion
│ ├── Dockerfile
│ ├── app
│ ├── config
│ │ └── config.cfg
│ ├── data
│ │ └── sample.csv
│ ├── db_engine.py
│ ├── main.py
│ └── sql_queries.py
│ ├── requirements.txt
│ └── wait-for-it.sh
└── googled57bdb220576a44a.html
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | pip-wheel-metadata/
24 | share/python-wheels/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | MANIFEST
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .nox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | *.py,cover
51 | .hypothesis/
52 | .pytest_cache/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | target/
76 |
77 | # Jupyter Notebook
78 | .ipynb_checkpoints
79 |
80 | # IPython
81 | profile_default/
82 | ipython_config.py
83 |
84 | # pyenv
85 | .python-version
86 |
87 | # pipenv
88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
91 | # install all needed dependencies.
92 | #Pipfile.lock
93 |
94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
95 | __pypackages__/
96 |
97 | # Celery stuff
98 | celerybeat-schedule
99 | celerybeat.pid
100 |
101 | # SageMath parsed files
102 | *.sage.py
103 |
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 |
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 |
117 | # Rope project settings
118 | .ropeproject
119 |
120 | # mkdocs documentation
121 | /site
122 |
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 |
128 | # Pyre type checker
129 | .pyre/
130 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Data Engineer Challenge
2 |
3 | This goal of this repository is based on solving a technical challenge for the data engineering position.
4 |
5 | Check the article here: Design, Development and Deployment of a simple Data Pipeline
6 |
7 |
8 | 
9 |
10 |
11 | - Install Docker Desktop on Windows, it will install Docker Compose as well, Docker Compose will allow you to run multiple container applications.
12 | - Install git-bash for windows, once installed, open git bash and download this repository, this will download the docker-compose.yaml file, and other files needed.
13 |
14 |
15 | ```linux
16 | ramse@DESKTOP-K6K6E5A MINGW64 /c
17 | $ git clone https://github.com/Wittline/data-engineer-challenge.git
18 | ```
19 |
20 | - Once all the files needed were downloaded from the repository, let's run everything. We will use the git bash tool again, go to the folder data-engineer-challenge and we will run the Docker Compose command:
21 |
22 | ```linux
23 | ramse@DESKTOP-K6K6E5A MINGW64 /c
24 | $ cd data-engineer-challenge
25 | ```
26 |
27 | ```linux
28 | ramse@DESKTOP-K6K6E5A MINGW64 /c/data-engineer-challenge
29 | $ cd code
30 | ```
31 | ```linux
32 | ramse@DESKTOP-K6K6E5A MINGW64 /c/data-engineer-challenge/code
33 | $ cd apps
34 | ```
35 |
36 | ```linux
37 | @DESKTOP-K6K6E5A MINGW64 /c/data-engineer-challenge/code/apps
38 | $ docker-compose up
39 | ```
40 |
41 | - After wait for a couple of minutes the final result of executing the above command should look like this:
42 |
43 | 
44 |
45 |
46 | - docker-compose executionlet's check the API REST, go to your browser and search for: http://localhost:8080/docs#/default
47 |
48 | 
49 |
50 |
51 | - let's try the request to the API REST
52 |
53 | 
54 |
55 |
56 | - let's check the response from the API REST
57 |
58 | 
59 |
60 | # Contributing and Feedback
61 | Any ideas or feedback about this repository?. Help me to improve it.
62 |
63 | # Authors
64 | - Created by Ramses Alexander Coraspe Valdez
65 | - Created on 2022
66 |
67 | # License
68 | This project is licensed under the terms of the Apache License.
69 |
70 |
71 |
--------------------------------------------------------------------------------
/_config.yml:
--------------------------------------------------------------------------------
1 | theme: jekyll-theme-cayman
--------------------------------------------------------------------------------
/code/apps/api/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3.8
2 |
3 | WORKDIR /api
4 |
5 | COPY requirements.txt .
6 |
7 | RUN pip install -r requirements.txt
8 |
9 | COPY ./app ./app
10 |
11 | CMD ["python", "./app/main.py"]
--------------------------------------------------------------------------------
/code/apps/api/app/config/config.cfg:
--------------------------------------------------------------------------------
1 | [POSTGRESQL]
2 | host = pg_container
3 | database = db
4 | user = postgres
5 | password = pg12345
--------------------------------------------------------------------------------
/code/apps/api/app/db_engine.py:
--------------------------------------------------------------------------------
1 | import psycopg2
2 | import pandas as pd
3 |
4 |
5 | async def create_connection(params):
6 |
7 | conn = None
8 | try:
9 | print('Connecting to the PostgreSQL database')
10 | conn = psycopg2.connect(**params)
11 | conn.set_session(autocommit=True)
12 |
13 | cur = conn.cursor()
14 |
15 | print('PostgreSQL database version:')
16 | cur.execute('SELECT version()')
17 |
18 | db_version = cur.fetchone()
19 | print(db_version)
20 |
21 | return cur, conn
22 | except (Exception, psycopg2.DatabaseError) as error:
23 | return None, None, True, str(error)
24 |
25 |
26 | def close_connection(cur, conn):
27 |
28 | try:
29 | cur.close()
30 | if conn is not None:
31 | conn.close()
32 | print('Database connection closed')
33 | except (Exception, psycopg2.DatabaseError) as error:
34 | print(error)
35 |
36 |
37 | def pg_to_pd(cur, query, columns):
38 |
39 | try:
40 | cur.execute(query)
41 | except (Exception, psycopg2.DatabaseError) as error:
42 | print("Error: %s" % error)
43 | return 1
44 |
45 | tupples = cur.fetchall()
46 |
47 |
48 | df = pd.DataFrame(tupples, columns=columns)
49 | return df
--------------------------------------------------------------------------------
/code/apps/api/app/main.py:
--------------------------------------------------------------------------------
1 | from typing import Optional
2 | from fastapi import FastAPI
3 | from fastapi.responses import JSONResponse
4 | import uvicorn
5 | import pandas as pd
6 | import psycopg2
7 | import db_engine as dbe
8 | import os
9 | from pydantic import BaseModel
10 | import configparser
11 | from sql_queries import get_users_by_department_company
12 |
13 | app = FastAPI()
14 |
15 |
16 | @app.get("/")
17 | def read_root():
18 | return {"Hello": "This is my database api"}
19 |
20 | class SearchUser(BaseModel):
21 | department: int
22 | company: int
23 | limit: int
24 |
25 |
26 | @app.post("/read")
27 |
28 | async def read(item: SearchUser):
29 | try:
30 |
31 | item = item.dict()
32 |
33 | params = {"host": "", "database": "", "user": "", "password": ""}
34 |
35 | config = configparser.ConfigParser()
36 | config.read_file(open(os.getcwd() + '/app/config/config.cfg'))
37 | pg_config = dict(config.items('POSTGRESQL'))
38 |
39 | for k, v in pg_config.items():
40 | params[k] = v
41 |
42 | cur, conn = await dbe.create_connection(params)
43 |
44 | query = get_users_by_department_company.format(**item)
45 |
46 | cur.execute(query)
47 | results = cur.fetchall()
48 | dbe.close_connection(cur, conn)
49 | return results
50 |
51 | except (Exception, psycopg2.Error) as error:
52 | msg = "Error while fetching data from PostgreSQL: {}".format(error)
53 | dbe.close_connection(cur, conn)
54 | return {'error':True,
55 | 'message': msg}
56 |
57 | if __name__== '__main__':
58 | uvicorn.run(app, port = 8080, host= "0.0.0.0")
--------------------------------------------------------------------------------
/code/apps/api/app/sql_queries.py:
--------------------------------------------------------------------------------
1 |
2 | get_users_by_department_company = """
3 | SELECT u.id, u.firstname, u.lastname, u.email, u.Phone1, u.Phone2,
4 | u.zip_code, u.Address, u.City, u.state, d.name as department, c.name as company
5 | FROM users u
6 | inner join companies c
7 | ON c.id = u.company_id
8 | inner join departments d
9 | on d.id = u.department_id
10 | where u.company_id = {company} and u.department_id = {department}
11 | limit {limit};
12 | """
--------------------------------------------------------------------------------
/code/apps/api/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Wittline/data-engineer-challenge/232d1efd07197f5af09a8ccf2561b58e3c617426/code/apps/api/requirements.txt
--------------------------------------------------------------------------------
/code/apps/docker-compose.yaml:
--------------------------------------------------------------------------------
1 | version: '3.1'
2 |
3 | services:
4 | db:
5 | container_name: pg_container
6 | image: postgres
7 | restart: always
8 | environment:
9 | POSTGRES_USER: "postgres"
10 | POSTGRES_PASSWORD: "pg12345"
11 | POSTGRES_DB: "db"
12 | POSTGRES_HOST_AUTH_METHOD: trust
13 | ports:
14 | - "5432:5432"
15 | networks:
16 | - postgres
17 |
18 | ingestion:
19 | build: ./ingestion/
20 | container_name: ingestion
21 | networks:
22 | - postgres
23 | depends_on:
24 | - db
25 | command: bash -c "/usr/wait-for-it.sh --timeout=0 db:5432 && python -u ./app/main.py"
26 | volumes:
27 | - ./ingestion/app/data:/ingestion/app/data/:z
28 |
29 | api:
30 | build: ./api/
31 | container_name: api
32 | ports:
33 | - "8080:8080"
34 | networks:
35 | - postgres
36 | depends_on:
37 | - ingestion
38 |
39 | networks:
40 | postgres:
41 | driver: bridge
42 |
--------------------------------------------------------------------------------
/code/apps/ingestion/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3.8
2 |
3 | WORKDIR /ingestion
4 |
5 | COPY requirements.txt .
6 |
7 | COPY wait-for-it.sh /usr/wait-for-it.sh
8 |
9 | RUN pip install -r requirements.txt
10 |
11 | COPY ./app ./app
12 |
13 | RUN chmod +x /usr/wait-for-it.sh
--------------------------------------------------------------------------------
/code/apps/ingestion/app/config/config.cfg:
--------------------------------------------------------------------------------
1 | [POSTGRESQL]
2 | host = pg_container
3 | database = db
4 | user = postgres
5 | password = pg12345
6 |
7 | [STAGINGFILE]
8 | location = app/data/sample.csv
--------------------------------------------------------------------------------
/code/apps/ingestion/app/data/sample.csv:
--------------------------------------------------------------------------------
1 | first_name,last_name,company_name,address,city,state,zip,phone1,phone2,email,department
2 | James,Butt,"Benton, John B Jr",6649 N Blue Gum St,New Orleans,LA,70116,504-621-8927,504-845-1427,jbutt@gmail.com,Sales
3 | James,Butt,"Benton, John B Jr",6649 N Blue Gum St,New Orleans,LA,70116,504-621-8927,504-845-1427,jbutt@gmail.com,Marketing
4 | Josephine,Darakjy,"Chanay, Jeffrey A Esq",4 B Blue Ridge Blvd,Brighton,MI,48116,810-292-9388,810-374-9840,josephine_darakjy@darakjy.org,Human Resources
5 | Art,Venere,"Chemel, James L Cpa",8 W Cerritos Ave #54,Bridgeport,NJ,8014,856-636-8749,856-264-4130,art@venere.org,Purchasing
6 | Lenna,Paprocki,Feltz Printing Service,639 Main St,Anchorage,AK,99501,907-385-4412,907-921-2010,lpaprocki@hotmail.com,Marketing
7 | Donette,Foller,Printing Dimensions,34 Center St,Hamilton,OH,45011,513-570-1893,513-549-4561,donette.foller@cox.net,Production
8 | Simona,Morasca,"Chapman, Ross E Esq",3 Mcauley Dr,Ashland,OH,44805,419-503-2484,419-800-6759,simona@morasca.com,Research and Development
9 | Mitsue,Tollner,Morlong Associates,7 Eads St,Chicago,IL,60632,773-573-6914,773-924-8565,mitsue_tollner@yahoo.com,Finances
10 | Leota,Dilliard,Commercial Press,7 W Jackson Blvd,San Jose,CA,95111,408-752-3500,408-813-1105,leota@hotmail.com,Sales
11 | Sage,Wieser,Truhlar And Truhlar Attys,5 Boston Ave #88,Sioux Falls,SD,57105,605-414-2147,605-794-4895,sage_wieser@cox.net,Human Resources
12 | Kris,Marrier,"King, Christopher A Esq",228 Runamuck Pl #2808,Baltimore,MD,21224,410-655-8723,410-804-4694,kris@gmail.com,Purchasing
13 | Minna,Amigon,"Dorl, James J Esq",2371 Jerrold Ave,Kulpsville,PA,19443,215-874-1229,215-422-8694,minna_amigon@yahoo.com,Marketing
14 | Abel,Maclead,Rangoni Of Florence,37275 St Rt 17m M,Middle Island,NY,11953,631-335-3414,631-677-3675,amaclead@gmail.com,Production
15 | Kiley,Caldarera,Feiner Bros,25 E 75th St #69,Los Angeles,CA,90034,310-498-5651,310-254-3084,kiley.caldarera@aol.com,Research and Development
16 | Graciela,Ruta,Buckley Miller & Wright,98 Connecticut Ave Nw,Chagrin Falls,OH,44023,440-780-8425,440-579-7763,gruta@cox.net,Finances
17 | Cammy,Albares,"Rousseaux, Michael Esq",56 E Morehead St,Laredo,TX,78045,956-537-6195,956-841-7216,calbares@gmail.com,Sales
18 | Mattie,Poquette,Century Communications,73 State Road 434 E,Phoenix,AZ,85013,602-277-4385,602-953-6360,mattie@aol.com,Human Resources
19 | Meaghan,Garufi,"Bolton, Wilbur Esq",69734 E Carrillo St,Mc Minnville,TN,37110,931-313-9635,931-235-7959,meaghan@hotmail.com,Purchasing
20 | Gladys,Rim,T M Byxbee Company Pc,322 New Horizon Blvd,Milwaukee,WI,53207,414-661-9598,414-377-2880,gladys.rim@rim.org,Marketing
21 | Yuki,Whobrey,Farmers Insurance Group,1 State Route 27,Taylor,MI,48180,313-288-7937,313-341-4470,yuki_whobrey@aol.com,Production
22 | Fletcher,Flosi,Post Box Services Plus,394 Manchester Blvd,Rockford,IL,61109,815-828-2147,815-426-5657,fletcher.flosi@yahoo.com,Research and Development
23 | Bette,Nicka,Sport En Art,6 S 33rd St,Aston,PA,19014,610-545-3615,610-492-4643,bette_nicka@cox.net,Finances
24 | Veronika,Inouye,C 4 Network Inc,6 Greenleaf Ave,San Jose,CA,95111,408-540-1785,408-813-4592,vinouye@aol.com,Sales
25 | Veronika,Inouye,C 4 Network Inc,6 Greenleaf Ave,San Jose,CA,95111,408-540-1785,408-813-4592,vinouye@aol.com,Finances
26 | Willard,Kolmetz,"Ingalls, Donald R Esq",618 W Yakima Ave,Irving,TX,75062,972-303-9197,972-896-4882,willard@hotmail.com,Human Resources
27 | Maryann,Royster,"Franklin, Peter L Esq",74 S Westgate St,Albany,NY,12204,518-966-7987,518-448-8982,mroyster@royster.com,Purchasing
28 | Alisha,Slusarski,Wtlz Power 107 Fm,3273 State St,Middlesex,NJ,8846,732-658-3154,732-635-3453,alisha@slusarski.com,Marketing
29 | Allene,Iturbide,"Ledecky, David Esq",1 Central Ave,Stevens Point,WI,54481,715-662-6764,715-530-9863,allene_iturbide@cox.net,Production
30 | Chanel,Caudy,Professional Image Inc,86 Nw 66th St #8673,Shawnee,KS,66218,913-388-2079,913-899-1103,chanel.caudy@caudy.org,Research and Development
31 | Ezekiel,Chui,"Sider, Donald C Esq",2 Cedar Ave #84,Easton,MD,21601,410-669-1642,410-235-8738,ezekiel@chui.com,Finances
32 | Willow,Kusko,U Pull It,90991 Thorburn Ave,New York,NY,10011,212-582-4976,212-934-5167,wkusko@yahoo.com,Human Resources
33 | Bernardo,Figeroa,"Clark, Richard Cpa",386 9th Ave N,Conroe,TX,77301,936-336-3951,936-597-3614,bfigeroa@aol.com,Finances
34 | Bernardo,Figeroa,"Clark, Richard Cpa",386 9th Ave N,Conroe,TX,77301,936-336-3951,936-597-3614,bfigeroa@aol.com,Purchasing
35 | Ammie,Corrio,"Moskowitz, Barry S",74874 Atlantic Ave,Columbus,OH,43215,614-801-9788,614-648-3265,ammie@corrio.com,Marketing
36 | Francine,Vocelka,Cascade Realty Advisors Inc,366 South Dr,Las Cruces,NM,88011,505-977-3911,505-335-5293,francine_vocelka@vocelka.com,Human Resources
37 | Ernie,Stenseth,Knwz Newsradio,45 E Liberty St,Ridgefield Park,NJ,7660,201-709-6245,201-387-9093,ernie_stenseth@aol.com,Purchasing
38 | Albina,Glick,"Giampetro, Anthony D",4 Ralph Ct,Dunellen,NJ,8812,732-924-7882,732-782-6701,albina@glick.com,Marketing
39 | Alishia,Sergi,Milford Enterprises Inc,2742 Distribution Way,New York,NY,10025,212-860-1579,212-753-2740,asergi@gmail.com,Human Resources
40 | Solange,Shinko,"Mosocco, Ronald A",426 Wolf St,Metairie,LA,70002,504-979-9175,504-265-8174,solange@shinko.com,Purchasing
41 | Jose,Stockham,Tri State Refueler Co,128 Bransten Rd,New York,NY,10011,212-675-8570,212-569-4233,jose@yahoo.com,Marketing
42 | Rozella,Ostrosky,Parkway Company,17 Morena Blvd,Camarillo,CA,93012,805-832-6163,805-609-1531,rozella.ostrosky@ostrosky.com,Sales
43 | Valentine,Gillian,Fbs Business Finance,775 W 17th St,San Antonio,TX,78204,210-812-9597,210-300-6244,valentine_gillian@gmail.com,Purchasing
44 | Valentine,Gillian,Fbs Business Finance,775 W 17th St,San Antonio,TX,78204,210-812-9597,210-300-6244,valentine_gillian@gmail.com,Sales
45 | Kati,Rulapaugh,Eder Assocs Consltng Engrs Pc,6980 Dorsett Rd,Abilene,KS,67410,785-463-7829,785-219-7724,kati.rulapaugh@hotmail.com,Sales
46 | Youlanda,Schemmer,Tri M Tool Inc,2881 Lewis Rd,Prineville,OR,97754,541-548-8197,541-993-2611,youlanda@aol.com,Sales
47 | Dyan,Oldroyd,International Eyelets Inc,7219 Woodfield Rd,Overland Park,KS,66204,913-413-4604,913-645-8918,doldroyd@aol.com,Sales
48 | Roxane,Campain,Rapid Trading Intl,1048 Main St,Fairbanks,AK,99708,907-231-4722,907-335-6568,roxane@hotmail.com,Sales
49 | Lavera,Perin,Abc Enterprises Inc,678 3rd Ave,Miami,FL,33196,305-606-7291,305-995-2078,lperin@perin.org,Sales
50 | Erick,Ferencz,Cindy Turner Associates,20 S Babcock St,Fairbanks,AK,99712,907-741-1044,907-227-6777,erick.ferencz@aol.com,Sales
51 | Fatima,Saylors,"Stanton, James D Esq",2 Lighthouse Ave,Hopkins,MN,55343,952-768-2416,952-479-2375,fsaylors@saylors.org,Sales
52 | Jina,Briddick,Grace Pastries Inc,38938 Park Blvd,Boston,MA,2128,617-399-5124,617-997-5771,jina_briddick@briddick.com,Sales
53 | Kanisha,Waycott,"Schroer, Gene E Esq",5 Tomahawk Dr,Los Angeles,CA,90006,323-453-2780,323-315-7314,kanisha_waycott@yahoo.com,Purchasing
54 | Emerson,Bowley,Knights Inn,762 S Main St,Madison,WI,53711,608-336-7444,608-658-7940,emerson.bowley@bowley.org,Sales
55 | Emerson,Bowley,Knights Inn,762 S Main St,Madison,WI,53711,608-336-7444,608-658-7940,emerson.bowley@bowley.org,Purchasing
56 | Blair,Malet,Bollinger Mach Shp & Shipyard,209 Decker Dr,Philadelphia,PA,19132,215-907-9111,215-794-4519,bmalet@yahoo.com,Purchasing
57 | Brock,Bolognia,Orinda News,4486 W O St #1,New York,NY,10003,212-402-9216,212-617-5063,bbolognia@yahoo.com,Purchasing
58 | Lorrie,Nestle,Ballard Spahr Andrews,39 S 7th St,Tullahoma,TN,37388,931-875-6644,931-303-6041,lnestle@hotmail.com,Purchasing
59 | Sabra,Uyetake,Lowy Limousine Service,98839 Hawthorne Blvd #6101,Columbia,SC,29201,803-925-5213,803-681-3678,sabra@uyetake.org,Purchasing
60 | Marjory,Mastella,Vicon Corporation,71 San Mateo Ave,Wayne,PA,19087,610-814-5533,610-379-7125,mmastella@mastella.com,Purchasing
61 | Karl,Klonowski,"Rossi, Michael M",76 Brooks St #9,Flemington,NJ,8822,908-877-6135,908-470-4661,karl_klonowski@yahoo.com,Production
62 | Tonette,Wenner,Northwest Publishing,4545 Courthouse Rd,Westbury,NY,11590,516-968-6051,516-333-4861,twenner@aol.com,Production
63 | Amber,Monarrez,Branford Wire & Mfg Co,14288 Foster Ave #4121,Jenkintown,PA,19046,215-934-8655,215-329-6386,amber_monarrez@monarrez.org,Production
64 | Shenika,Seewald,East Coast Marketing,4 Otis St,Van Nuys,CA,91405,818-423-4007,818-749-8650,shenika@gmail.com,Production
65 | Delmy,Ahle,Wye Technologies Inc,65895 S 16th St,Providence,RI,2909,401-458-2547,401-559-8961,delmy.ahle@hotmail.com,Production
66 | Deeanna,Juhas,"Healy, George W Iv",14302 Pennsylvania Ave,Huntingdon Valley,PA,19006,215-211-9589,215-417-9563,deeanna_juhas@gmail.com,Sales
67 | Blondell,Pugh,Alpenlite Inc,201 Hawk Ct,Providence,RI,2904,401-960-8259,401-300-8122,bpugh@aol.com,Purchasing
68 | Blondell,Pugh,Alpenlite Inc,201 Hawk Ct,Providence,RI,2904,401-960-8259,401-300-8122,bpugh@aol.com,Human Resources
69 | Jamal,Vanausdal,"Hubbard, Bruce Esq",53075 Sw 152nd Ter #615,Monroe Township,NJ,8831,732-234-1546,732-904-2931,jamal@vanausdal.org,Purchasing
70 | Cecily,Hollack,Arthur A Oliver & Son Inc,59 N Groesbeck Hwy,Austin,TX,78731,512-486-3817,512-861-3814,cecily@hollack.org,Marketing
71 | Carmelina,Lindall,George Jessop Carter Jewelers,2664 Lewis Rd,Littleton,CO,80126,303-724-7371,303-874-5160,carmelina_lindall@lindall.com,Production
72 | Maurine,Yglesias,"Schultz, Thomas C Md",59 Shady Ln #53,Milwaukee,WI,53214,414-748-1374,414-573-7719,maurine_yglesias@yglesias.com,Research and Development
73 | Tawna,Buvens,H H H Enterprises Inc,3305 Nabell Ave #679,New York,NY,10009,212-674-9610,212-462-9157,tawna@gmail.com,Finances
74 | Penney,Weight,Hawaiian King Hotel,18 Fountain St,Anchorage,AK,99515,907-797-9628,907-873-2882,penney_weight@aol.com,Sales
75 | Elly,Morocco,Killion Industries,7 W 32nd St,Erie,PA,16502,814-393-5571,814-420-3553,elly_morocco@gmail.com,Human Resources
76 | Ilene,Eroman,"Robinson, William J Esq",2853 S Central Expy,Glen Burnie,MD,21061,410-914-9018,410-937-4543,ilene.eroman@hotmail.com,Purchasing
77 | Vallie,Mondella,Private Properties,74 W College St,Boise,ID,83707,208-862-5339,208-737-8439,vmondella@mondella.com,Marketing
78 | Kallie,Blackwood,Rowley Schlimgen Inc,701 S Harrison Rd,San Francisco,CA,94104,415-315-2761,415-604-7609,kallie.blackwood@gmail.com,Production
79 | Johnetta,Abdallah,Forging Specialties,1088 Pinehurst St,Chapel Hill,NC,27514,919-225-9345,919-715-3791,johnetta_abdallah@aol.com,Research and Development
80 | Bobbye,Rhym,"Smits, Patricia Garity",30 W 80th St #1995,San Carlos,CA,94070,650-528-5783,650-811-9032,brhym@rhym.com,Finances
81 | Micaela,Rhymes,H Lee Leonard Attorney At Law,20932 Hedley St,Concord,CA,94520,925-647-3298,925-522-7798,micaela_rhymes@gmail.com,Finances
82 | Tamar,Hoogland,A K Construction Co,2737 Pistorio Rd #9230,London,OH,43140,740-343-8575,740-526-5410,tamar@hotmail.com,Finances
83 | Moon,Parlato,"Ambelang, Jessica M Md",74989 Brandon St,Wellsville,NY,14895,585-866-8313,585-498-4278,moon@yahoo.com,Finances
84 | Laurel,Reitler,Q A Service,6 Kains Ave,Baltimore,MD,21215,410-520-4832,410-957-6903,laurel_reitler@reitler.com,Finances
85 | Delisa,Crupi,Wood & Whitacre Contractors,47565 W Grand Ave,Newark,NJ,7105,973-354-2040,973-847-9611,delisa.crupi@crupi.com,Finances
86 | Viva,Toelkes,Mark Iv Press Ltd,4284 Dorigo Ln,Chicago,IL,60647,773-446-5569,773-352-3437,viva.toelkes@gmail.com,Finances
87 | Elza,Lipke,Museum Of Science & Industry,6794 Lake Dr E,Newark,NJ,7104,973-927-3447,973-796-3667,elza@yahoo.com,Human Resources
88 | Devorah,Chickering,Garrison Ind,31 Douglas Blvd #950,Clovis,NM,88101,505-975-8559,505-950-1763,devorah@hotmail.com,Human Resources
89 | Timothy,Mulqueen,Saronix Nymph Products,44 W 4th St,Staten Island,NY,10309,718-332-6527,718-654-7063,timothy_mulqueen@mulqueen.org,Human Resources
90 | Arlette,Honeywell,Smc Inc,11279 Loytan St,Jacksonville,FL,32254,904-775-4480,904-514-9918,ahoneywell@honeywell.com,Human Resources
91 | Dominque,Dickerson,E A I Electronic Assocs Inc,69 Marquette Ave,Hayward,CA,94545,510-993-3758,510-901-7640,dominque.dickerson@dickerson.org,Human Resources
92 | Lettie,Isenhower,"Conte, Christopher A Esq",70 W Main St,Beachwood,OH,44122,216-657-7668,216-733-8494,lettie_isenhower@yahoo.com,Human Resources
93 | Myra,Munns,Anker Law Office,461 Prospect Pl #316,Euless,TX,76040,817-914-7518,817-451-3518,mmunns@cox.net,Human Resources
94 | Stephaine,Barfield,Beutelschies & Company,47154 Whipple Ave Nw,Gardena,CA,90247,310-774-7643,310-968-1219,stephaine@barfield.com,Human Resources
95 | Lai,Gato,"Fligg, Kenneth I Jr",37 Alabama Ave,Evanston,IL,60201,847-728-7286,847-957-4614,lai.gato@gato.org,Marketing
96 | Stephen,Emigh,"Sharp, J Daniel Esq",3777 E Richmond St #900,Akron,OH,44302,330-537-5358,330-700-2312,stephen_emigh@hotmail.com,Marketing
97 | Tyra,Shields,"Assink, Anne H Esq",3 Fort Worth Ave,Philadelphia,PA,19106,215-255-1641,215-228-8264,tshields@gmail.com,Marketing
98 | Tammara,Wardrip,Jewel My Shop Inc,4800 Black Horse Pike,Burlingame,CA,94010,650-803-1936,650-216-5075,twardrip@cox.net,Marketing
99 | Cory,Gibes,Chinese Translation Resources,83649 W Belmont Ave,San Gabriel,CA,91776,626-572-1096,626-696-2777,cory.gibes@gmail.com,Marketing
100 | Danica,Bruschke,"Stevens, Charles T",840 15th Ave,Waco,TX,76708,254-782-8569,254-205-1422,danica_bruschke@gmail.com,Marketing
101 | Wilda,Giguere,"Mclaughlin, Luther W Cpa",1747 Calle Amanecer #2,Anchorage,AK,99501,907-870-5536,907-914-9482,wilda@cox.net,Marketing
102 | Elvera,Benimadho,Tree Musketeers,99385 Charity St #840,San Jose,CA,95110,408-703-8505,408-440-8447,elvera.benimadho@cox.net,Marketing
103 | Carma,Vanheusen,Springfield Div Oh Edison Co,68556 Central Hwy,San Leandro,CA,94577,510-503-7169,510-452-4835,carma@cox.net,Marketing
104 | Malinda,Hochard,Logan Memorial Hospital,55 Riverside Ave,Indianapolis,IN,46202,317-722-5066,317-472-2412,malinda.hochard@yahoo.com,Marketing
105 | Natalie,Fern,"Kelly, Charles G Esq",7140 University Ave,Rock Springs,WY,82901,307-704-8713,307-279-3793,natalie.fern@hotmail.com,Marketing
106 | Lisha,Centini,Industrial Paper Shredders Inc,64 5th Ave #1153,Mc Lean,VA,22102,703-235-3937,703-475-7568,lisha@centini.org,Marketing
107 | Arlene,Klusman,Beck Horizon Builders,3 Secor Rd,New Orleans,LA,70112,504-710-5840,504-946-1807,arlene_klusman@gmail.com,Marketing
--------------------------------------------------------------------------------
/code/apps/ingestion/app/db_engine.py:
--------------------------------------------------------------------------------
1 | import psycopg2
2 | import pandas as pd
3 | from sql_queries import create_table_queries, drop_table_queries, fill_table_queries, create_constraints
4 |
5 |
6 | def create_connection(params):
7 | """
8 | create a new connection with the postgreSQL
9 | database and return the cur and conn object
10 | :param params: connection string
11 | """
12 | conn = None
13 |
14 | try:
15 | print('Connecting to the PostgreSQL database')
16 | conn = psycopg2.connect(**params)
17 | conn.set_session(autocommit=True)
18 |
19 | cur = conn.cursor()
20 |
21 | print('PostgreSQL database version:')
22 | cur.execute('SELECT version()')
23 |
24 | db_version = cur.fetchone()
25 | print(db_version)
26 | return cur, conn
27 | except (Exception, psycopg2.DatabaseError) as error:
28 |
29 | print(error)
30 |
31 |
32 | def close_connection(cur, conn):
33 | """
34 | close the connection with the postgreSQL database
35 | :param cur: cursor
36 | :param conn: connection object
37 | """
38 | try:
39 | cur.close()
40 | if conn is not None:
41 | conn.close()
42 | print('Database connection closed')
43 | except (Exception, psycopg2.DatabaseError) as error:
44 | print(error)
45 |
46 | def drop_table(cur, conn, table):
47 | """
48 | drop an specific table
49 | :param cur: cursor
50 | :param conn: connection object
51 | """
52 |
53 | query = "DROP TABLE IF EXISTS {0}".format(table)
54 | print(f"Executing: {query}")
55 | cur.execute(query)
56 | conn.commit()
57 |
58 |
59 | def drop_tables(cur, conn):
60 | """
61 | drop all the tables in the example
62 | :param cur: cursor
63 | :param conn: connection object
64 | """
65 | print("Dropping tables")
66 | for query in drop_table_queries:
67 | cur.execute(query)
68 | conn.commit()
69 | print("Tables dropped")
70 |
71 |
72 | def create_tables(cur, conn):
73 | """
74 | create all the tables in the example
75 | :param cur: cursor
76 | :param conn: connection object
77 | """
78 | print("Creating created")
79 | for query in create_table_queries:
80 | cur.execute(query)
81 | conn.commit()
82 | print("Tables created")
83 |
84 | def pg_to_pd(cur, query, columns):
85 | """
86 | return the select result as panda dataframe
87 | :param cur: cursor
88 | :param query: SELECT query string
89 | :param columns: columns name in the select
90 | """
91 | try:
92 | cur.execute(query)
93 | except (Exception, psycopg2.DatabaseError) as error:
94 | print("Error: %s" % error)
95 | return 1
96 |
97 | tupples = cur.fetchall()
98 |
99 |
100 | df = pd.DataFrame(tupples, columns=columns)
101 | return df
102 |
103 |
104 | def fill_from_staging_all(cur, conn):
105 | """
106 | Fill all the records in the tables
107 | :param cur: cursor
108 | :param conn: connection object
109 | """
110 | for query in fill_table_queries:
111 | cur.execute(query)
112 | conn.commit()
113 | print("Records were populated from staging")
114 |
115 | def check_data(cur, conn, tables):
116 | """
117 | Check count of records in tables
118 | :param cur: cursor
119 | :param conn: connection object
120 | :param tables: tables to check
121 | """
122 |
123 | count_values = {}
124 |
125 | for table in tables:
126 | query_count = "SELECT COUNT(*) FROM {0}".format(table)
127 |
128 | try:
129 | cur = conn.cursor()
130 | cur.execute(query_count)
131 | count_values[table] = cur.fetchone()[0]
132 | except (Exception, psycopg2.DatabaseError) as error:
133 | print("Error: %s" % error)
134 | raise
135 |
136 | return count_values
137 |
138 | def set_staging(cur, conn, staging_file, columns):
139 |
140 | print("Copying data from .csv to staging zone")
141 |
142 | try:
143 | copy_cmd = f"copy staging({','.join(columns)}) from stdout (format csv)"
144 | with open(staging_file, 'r') as f:
145 | next(f)
146 | cur.copy_expert(copy_cmd, f)
147 | conn.commit()
148 | print("Staging ready")
149 | except (psycopg2.Error) as e:
150 | print(e)
151 |
152 | def set_constraints(cur, conn):
153 | print("Setting constraints")
154 | for query in create_constraints:
155 | cur.execute(query)
156 | conn.commit()
157 | print("Constraints ready")
158 |
159 |
160 |
161 |
--------------------------------------------------------------------------------
/code/apps/ingestion/app/main.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import db_engine as dbe
3 | import configparser
4 | import os
5 |
6 |
7 | class Pipeline:
8 |
9 | def __init__(self, params, staging_file):
10 | self.params = params
11 | self.staging_file = staging_file
12 |
13 | def run(self):
14 | tables = ['users', 'companies', 'departments']
15 | columns_staging = ['first_name','last_name','company_name','address','city','state','zip','phone1','phone2','email','department']
16 | cur, conn = dbe.create_connection(self.params)
17 | dbe.drop_tables(cur, conn)
18 | dbe.create_tables(cur, conn)
19 | dbe.set_staging(cur, conn, self.staging_file, columns_staging)
20 | dbe.fill_from_staging_all(cur, conn)
21 | dbe.drop_table(cur, conn, 'staging')
22 | dbe.set_constraints(cur, conn)
23 | count_tables = dbe.check_data(cur, conn, tables)
24 | for k, v in count_tables.items():
25 | print("Table {0} has {1} records".format(k, v))
26 | dbe.close_connection(cur, conn)
27 |
28 |
29 | if __name__ == '__main__':
30 |
31 | params = {"host": "", "database": "", "user": "", "password": ""}
32 |
33 | config = configparser.ConfigParser()
34 | config.read_file(open(os.getcwd() + '/app/config/config.cfg'))
35 | pg_config = dict(config.items('POSTGRESQL'))
36 | staging_file = config.get('STAGINGFILE','location')
37 |
38 | for k, v in pg_config.items():
39 | params[k] = v
40 |
41 | print(params)
42 |
43 | pipeline = Pipeline(params, staging_file)
44 | pipeline.run()
--------------------------------------------------------------------------------
/code/apps/ingestion/app/sql_queries.py:
--------------------------------------------------------------------------------
1 | # DROP TABLES
2 | users_table_drop = "DROP TABLE IF EXISTS users"
3 | departments_table_drop = "DROP TABLE IF EXISTS departments"
4 | companies_table_drop = "DROP TABLE IF EXISTS companies"
5 | staging_table_drop = "DROP TABLE IF EXISTS staging"
6 |
7 | # CREATE TABLES
8 |
9 | staging_table_create = ("""
10 | CREATE TABLE IF NOT EXISTS staging(
11 | id serial PRIMARY KEY NOT NULL,
12 | first_name varchar,
13 | last_name varchar,
14 | company_name varchar,
15 | address varchar,
16 | city varchar,
17 | state varchar,
18 | zip varchar,
19 | phone1 varchar,
20 | phone2 varchar,
21 | email varchar,
22 | department varchar
23 | );
24 | """)
25 |
26 | users_table_create = ("""
27 | CREATE TABLE IF NOT EXISTS users(
28 | id serial NOT NULL,
29 | firstname varchar,
30 | lastname varchar,
31 | email varchar,
32 | Phone1 varchar,
33 | Phone2 varchar,
34 | zip_code varchar,
35 | Address varchar,
36 | City varchar,
37 | state varchar,
38 | department_id integer NOT NULL,
39 | company_id integer NOT NULL,
40 | CONSTRAINT users_pkey PRIMARY KEY(id)
41 | );
42 | """)
43 |
44 |
45 | departments_table_create = ("""
46 | CREATE TABLE IF NOT EXISTS departments(
47 | id serial NOT NULL,
48 | name varchar,
49 | CONSTRAINT department_pkey PRIMARY KEY(id)
50 | );
51 | """)
52 |
53 |
54 | companies_table_create = ("""
55 | CREATE TABLE IF NOT EXISTS companies(
56 | id serial NOT NULL,
57 | name varchar,
58 | CONSTRAINT company_pkey PRIMARY KEY(id)
59 | );
60 | """)
61 |
62 | constraints = ("""
63 |
64 | ALTER TABLE users
65 | ADD CONSTRAINT users_department_id_fkey
66 | FOREIGN KEY (department_id) REFERENCES departments (id);
67 |
68 | ALTER TABLE users
69 | ADD CONSTRAINT users_company_id_fkey
70 | FOREIGN KEY (company_id) REFERENCES companies (id);
71 |
72 | """)
73 |
74 | ## FILL TABLES FROM STAGING
75 |
76 | users_fill_from_staging = ("""
77 | insert into users (firstname, lastname, email, Phone1, Phone2, zip_code, Address, City, state, department_id, company_id)
78 | select
79 | s.first_name as firstname,
80 | s.last_name as lastname,
81 | s.email as email,
82 | s.phone1 as Phone1,
83 | s.phone2 as Phone2,
84 | s.zip as zip_code,
85 | s.address as Address,
86 | s.city as City,
87 | s.state as state,
88 | d.id as department_id,
89 | c.id as company_id
90 | from staging s
91 | INNER JOIN companies c
92 | ON s.company_name = c.name
93 | INNER JOIN departments d
94 | ON s.department = d.name;
95 | """)
96 |
97 | companies_fill_from_staging = ("""
98 | insert into companies (name)
99 | select distinct company_name as Name
100 | from staging;
101 | """)
102 |
103 | departments_fill_from_staging = ("""
104 | insert into departments (NAME)
105 | select distinct department as Name
106 | from staging;
107 | """)
108 |
109 |
110 | fill_table_queries = [companies_fill_from_staging,departments_fill_from_staging,users_fill_from_staging]
111 | create_table_queries = [staging_table_create, users_table_create, departments_table_create, companies_table_create]
112 | drop_table_queries = [users_table_drop, departments_table_drop, companies_table_drop, staging_table_drop]
113 | create_constraints = [constraints]
--------------------------------------------------------------------------------
/code/apps/ingestion/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Wittline/data-engineer-challenge/232d1efd07197f5af09a8ccf2561b58e3c617426/code/apps/ingestion/requirements.txt
--------------------------------------------------------------------------------
/code/apps/ingestion/wait-for-it.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | # Use this script to test if a given TCP host/port are available
3 |
4 | WAITFORIT_cmdname=${0##*/}
5 |
6 | echoerr() { if [[ $WAITFORIT_QUIET -ne 1 ]]; then echo "$@" 1>&2; fi }
7 |
8 | usage()
9 | {
10 | cat << USAGE >&2
11 | Usage:
12 | $WAITFORIT_cmdname host:port [-s] [-t timeout] [-- command args]
13 | -h HOST | --host=HOST Host or IP under test
14 | -p PORT | --port=PORT TCP port under test
15 | Alternatively, you specify the host and port as host:port
16 | -s | --strict Only execute subcommand if the test succeeds
17 | -q | --quiet Don't output any status messages
18 | -t TIMEOUT | --timeout=TIMEOUT
19 | Timeout in seconds, zero for no timeout
20 | -- COMMAND ARGS Execute command with args after the test finishes
21 | USAGE
22 | exit 1
23 | }
24 |
25 | wait_for()
26 | {
27 | if [[ $WAITFORIT_TIMEOUT -gt 0 ]]; then
28 | echoerr "$WAITFORIT_cmdname: waiting $WAITFORIT_TIMEOUT seconds for $WAITFORIT_HOST:$WAITFORIT_PORT"
29 | else
30 | echoerr "$WAITFORIT_cmdname: waiting for $WAITFORIT_HOST:$WAITFORIT_PORT without a timeout"
31 | fi
32 | WAITFORIT_start_ts=$(date +%s)
33 | while :
34 | do
35 | if [[ $WAITFORIT_ISBUSY -eq 1 ]]; then
36 | nc -z $WAITFORIT_HOST $WAITFORIT_PORT
37 | WAITFORIT_result=$?
38 | else
39 | (echo -n > /dev/tcp/$WAITFORIT_HOST/$WAITFORIT_PORT) >/dev/null 2>&1
40 | WAITFORIT_result=$?
41 | fi
42 | if [[ $WAITFORIT_result -eq 0 ]]; then
43 | WAITFORIT_end_ts=$(date +%s)
44 | echoerr "$WAITFORIT_cmdname: $WAITFORIT_HOST:$WAITFORIT_PORT is available after $((WAITFORIT_end_ts - WAITFORIT_start_ts)) seconds"
45 | break
46 | fi
47 | sleep 1
48 | done
49 | return $WAITFORIT_result
50 | }
51 |
52 | wait_for_wrapper()
53 | {
54 | # In order to support SIGINT during timeout: http://unix.stackexchange.com/a/57692
55 | if [[ $WAITFORIT_QUIET -eq 1 ]]; then
56 | timeout $WAITFORIT_BUSYTIMEFLAG $WAITFORIT_TIMEOUT $0 --quiet --child --host=$WAITFORIT_HOST --port=$WAITFORIT_PORT --timeout=$WAITFORIT_TIMEOUT &
57 | else
58 | timeout $WAITFORIT_BUSYTIMEFLAG $WAITFORIT_TIMEOUT $0 --child --host=$WAITFORIT_HOST --port=$WAITFORIT_PORT --timeout=$WAITFORIT_TIMEOUT &
59 | fi
60 | WAITFORIT_PID=$!
61 | trap "kill -INT -$WAITFORIT_PID" INT
62 | wait $WAITFORIT_PID
63 | WAITFORIT_RESULT=$?
64 | if [[ $WAITFORIT_RESULT -ne 0 ]]; then
65 | echoerr "$WAITFORIT_cmdname: timeout occurred after waiting $WAITFORIT_TIMEOUT seconds for $WAITFORIT_HOST:$WAITFORIT_PORT"
66 | fi
67 | return $WAITFORIT_RESULT
68 | }
69 |
70 | # process arguments
71 | while [[ $# -gt 0 ]]
72 | do
73 | case "$1" in
74 | *:* )
75 | WAITFORIT_hostport=(${1//:/ })
76 | WAITFORIT_HOST=${WAITFORIT_hostport[0]}
77 | WAITFORIT_PORT=${WAITFORIT_hostport[1]}
78 | shift 1
79 | ;;
80 | --child)
81 | WAITFORIT_CHILD=1
82 | shift 1
83 | ;;
84 | -q | --quiet)
85 | WAITFORIT_QUIET=1
86 | shift 1
87 | ;;
88 | -s | --strict)
89 | WAITFORIT_STRICT=1
90 | shift 1
91 | ;;
92 | -h)
93 | WAITFORIT_HOST="$2"
94 | if [[ $WAITFORIT_HOST == "" ]]; then break; fi
95 | shift 2
96 | ;;
97 | --host=*)
98 | WAITFORIT_HOST="${1#*=}"
99 | shift 1
100 | ;;
101 | -p)
102 | WAITFORIT_PORT="$2"
103 | if [[ $WAITFORIT_PORT == "" ]]; then break; fi
104 | shift 2
105 | ;;
106 | --port=*)
107 | WAITFORIT_PORT="${1#*=}"
108 | shift 1
109 | ;;
110 | -t)
111 | WAITFORIT_TIMEOUT="$2"
112 | if [[ $WAITFORIT_TIMEOUT == "" ]]; then break; fi
113 | shift 2
114 | ;;
115 | --timeout=*)
116 | WAITFORIT_TIMEOUT="${1#*=}"
117 | shift 1
118 | ;;
119 | --)
120 | shift
121 | WAITFORIT_CLI=("$@")
122 | break
123 | ;;
124 | --help)
125 | usage
126 | ;;
127 | *)
128 | echoerr "Unknown argument: $1"
129 | usage
130 | ;;
131 | esac
132 | done
133 |
134 | if [[ "$WAITFORIT_HOST" == "" || "$WAITFORIT_PORT" == "" ]]; then
135 | echoerr "Error: you need to provide a host and port to test."
136 | usage
137 | fi
138 |
139 | WAITFORIT_TIMEOUT=${WAITFORIT_TIMEOUT:-15}
140 | WAITFORIT_STRICT=${WAITFORIT_STRICT:-0}
141 | WAITFORIT_CHILD=${WAITFORIT_CHILD:-0}
142 | WAITFORIT_QUIET=${WAITFORIT_QUIET:-0}
143 |
144 | # Check to see if timeout is from busybox?
145 | WAITFORIT_TIMEOUT_PATH=$(type -p timeout)
146 | WAITFORIT_TIMEOUT_PATH=$(realpath $WAITFORIT_TIMEOUT_PATH 2>/dev/null || readlink -f $WAITFORIT_TIMEOUT_PATH)
147 |
148 | WAITFORIT_BUSYTIMEFLAG=""
149 | if [[ $WAITFORIT_TIMEOUT_PATH =~ "busybox" ]]; then
150 | WAITFORIT_ISBUSY=1
151 | # Check if busybox timeout uses -t flag
152 | # (recent Alpine versions don't support -t anymore)
153 | if timeout &>/dev/stdout | grep -q -e '-t '; then
154 | WAITFORIT_BUSYTIMEFLAG="-t"
155 | fi
156 | else
157 | WAITFORIT_ISBUSY=0
158 | fi
159 |
160 | if [[ $WAITFORIT_CHILD -gt 0 ]]; then
161 | wait_for
162 | WAITFORIT_RESULT=$?
163 | exit $WAITFORIT_RESULT
164 | else
165 | if [[ $WAITFORIT_TIMEOUT -gt 0 ]]; then
166 | wait_for_wrapper
167 | WAITFORIT_RESULT=$?
168 | else
169 | wait_for
170 | WAITFORIT_RESULT=$?
171 | fi
172 | fi
173 |
174 | if [[ $WAITFORIT_CLI != "" ]]; then
175 | if [[ $WAITFORIT_RESULT -ne 0 && $WAITFORIT_STRICT -eq 1 ]]; then
176 | echoerr "$WAITFORIT_cmdname: strict mode, refusing to execute subprocess"
177 | exit $WAITFORIT_RESULT
178 | fi
179 | exec "${WAITFORIT_CLI[@]}"
180 | else
181 | exit $WAITFORIT_RESULT
182 | fi
183 |
--------------------------------------------------------------------------------
/googled57bdb220576a44a.html:
--------------------------------------------------------------------------------
1 | google-site-verification: googled57bdb220576a44a.html
--------------------------------------------------------------------------------