├── .gitignore ├── LICENSE ├── README.md ├── _config.yml ├── code └── apps │ ├── api │ ├── Dockerfile │ ├── app │ │ ├── config │ │ │ └── config.cfg │ │ ├── db_engine.py │ │ ├── main.py │ │ └── sql_queries.py │ └── requirements.txt │ ├── docker-compose.yaml │ └── ingestion │ ├── Dockerfile │ ├── app │ ├── config │ │ └── config.cfg │ ├── data │ │ └── sample.csv │ ├── db_engine.py │ ├── main.py │ └── sql_queries.py │ ├── requirements.txt │ └── wait-for-it.sh └── googled57bdb220576a44a.html /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Data Engineer Challenge 2 | 3 | This goal of this repository is based on solving a technical challenge for the data engineering position. 4 | 5 | Check the article here: Design, Development and Deployment of a simple Data Pipeline 6 | 7 | 8 | ![image](https://user-images.githubusercontent.com/8701464/172062180-c90e7f11-ae64-4fd2-9772-8cfd2fb6abf6.png) 9 | 10 | 11 | - Install Docker Desktop on Windows, it will install Docker Compose as well, Docker Compose will allow you to run multiple container applications. 12 | - Install git-bash for windows, once installed, open git bash and download this repository, this will download the docker-compose.yaml file, and other files needed. 13 | 14 | 15 | ```linux 16 | ramse@DESKTOP-K6K6E5A MINGW64 /c 17 | $ git clone https://github.com/Wittline/data-engineer-challenge.git 18 | ``` 19 | 20 | - Once all the files needed were downloaded from the repository, let's run everything. We will use the git bash tool again, go to the folder data-engineer-challenge and we will run the Docker Compose command: 21 | 22 | ```linux 23 | ramse@DESKTOP-K6K6E5A MINGW64 /c 24 | $ cd data-engineer-challenge 25 | ``` 26 | 27 | ```linux 28 | ramse@DESKTOP-K6K6E5A MINGW64 /c/data-engineer-challenge 29 | $ cd code 30 | ``` 31 | ```linux 32 | ramse@DESKTOP-K6K6E5A MINGW64 /c/data-engineer-challenge/code 33 | $ cd apps 34 | ``` 35 | 36 | ```linux 37 | @DESKTOP-K6K6E5A MINGW64 /c/data-engineer-challenge/code/apps 38 | $ docker-compose up 39 | ``` 40 | 41 | - After wait for a couple of minutes the final result of executing the above command should look like this: 42 | 43 | ![image](https://user-images.githubusercontent.com/8701464/172062212-05193fba-d980-4917-9fe1-f1134d72afb8.png) 44 | 45 | 46 | - docker-compose executionlet's check the API REST, go to your browser and search for: http://localhost:8080/docs#/default 47 | 48 | ![image](https://user-images.githubusercontent.com/8701464/172062217-9fbd6026-6a49-42fc-bbbb-9efb58743cc9.png) 49 | 50 | 51 | - let's try the request to the API REST 52 | 53 | ![image](https://user-images.githubusercontent.com/8701464/172062224-b9ec90bd-8454-4e5a-a4f5-2adc932a41a2.png) 54 | 55 | 56 | - let's check the response from the API REST 57 | 58 | ![image](https://user-images.githubusercontent.com/8701464/172062241-8fc66570-9ca8-4ff9-b56e-239fb6eee118.png) 59 | 60 | # Contributing and Feedback 61 | Any ideas or feedback about this repository?. Help me to improve it. 62 | 63 | # Authors 64 | - Created by Ramses Alexander Coraspe Valdez 65 | - Created on 2022 66 | 67 | # License 68 | This project is licensed under the terms of the Apache License. 69 | 70 | 71 | -------------------------------------------------------------------------------- /_config.yml: -------------------------------------------------------------------------------- 1 | theme: jekyll-theme-cayman -------------------------------------------------------------------------------- /code/apps/api/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.8 2 | 3 | WORKDIR /api 4 | 5 | COPY requirements.txt . 6 | 7 | RUN pip install -r requirements.txt 8 | 9 | COPY ./app ./app 10 | 11 | CMD ["python", "./app/main.py"] -------------------------------------------------------------------------------- /code/apps/api/app/config/config.cfg: -------------------------------------------------------------------------------- 1 | [POSTGRESQL] 2 | host = pg_container 3 | database = db 4 | user = postgres 5 | password = pg12345 -------------------------------------------------------------------------------- /code/apps/api/app/db_engine.py: -------------------------------------------------------------------------------- 1 | import psycopg2 2 | import pandas as pd 3 | 4 | 5 | async def create_connection(params): 6 | 7 | conn = None 8 | try: 9 | print('Connecting to the PostgreSQL database') 10 | conn = psycopg2.connect(**params) 11 | conn.set_session(autocommit=True) 12 | 13 | cur = conn.cursor() 14 | 15 | print('PostgreSQL database version:') 16 | cur.execute('SELECT version()') 17 | 18 | db_version = cur.fetchone() 19 | print(db_version) 20 | 21 | return cur, conn 22 | except (Exception, psycopg2.DatabaseError) as error: 23 | return None, None, True, str(error) 24 | 25 | 26 | def close_connection(cur, conn): 27 | 28 | try: 29 | cur.close() 30 | if conn is not None: 31 | conn.close() 32 | print('Database connection closed') 33 | except (Exception, psycopg2.DatabaseError) as error: 34 | print(error) 35 | 36 | 37 | def pg_to_pd(cur, query, columns): 38 | 39 | try: 40 | cur.execute(query) 41 | except (Exception, psycopg2.DatabaseError) as error: 42 | print("Error: %s" % error) 43 | return 1 44 | 45 | tupples = cur.fetchall() 46 | 47 | 48 | df = pd.DataFrame(tupples, columns=columns) 49 | return df -------------------------------------------------------------------------------- /code/apps/api/app/main.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | from fastapi import FastAPI 3 | from fastapi.responses import JSONResponse 4 | import uvicorn 5 | import pandas as pd 6 | import psycopg2 7 | import db_engine as dbe 8 | import os 9 | from pydantic import BaseModel 10 | import configparser 11 | from sql_queries import get_users_by_department_company 12 | 13 | app = FastAPI() 14 | 15 | 16 | @app.get("/") 17 | def read_root(): 18 | return {"Hello": "This is my database api"} 19 | 20 | class SearchUser(BaseModel): 21 | department: int 22 | company: int 23 | limit: int 24 | 25 | 26 | @app.post("/read") 27 | 28 | async def read(item: SearchUser): 29 | try: 30 | 31 | item = item.dict() 32 | 33 | params = {"host": "", "database": "", "user": "", "password": ""} 34 | 35 | config = configparser.ConfigParser() 36 | config.read_file(open(os.getcwd() + '/app/config/config.cfg')) 37 | pg_config = dict(config.items('POSTGRESQL')) 38 | 39 | for k, v in pg_config.items(): 40 | params[k] = v 41 | 42 | cur, conn = await dbe.create_connection(params) 43 | 44 | query = get_users_by_department_company.format(**item) 45 | 46 | cur.execute(query) 47 | results = cur.fetchall() 48 | dbe.close_connection(cur, conn) 49 | return results 50 | 51 | except (Exception, psycopg2.Error) as error: 52 | msg = "Error while fetching data from PostgreSQL: {}".format(error) 53 | dbe.close_connection(cur, conn) 54 | return {'error':True, 55 | 'message': msg} 56 | 57 | if __name__== '__main__': 58 | uvicorn.run(app, port = 8080, host= "0.0.0.0") -------------------------------------------------------------------------------- /code/apps/api/app/sql_queries.py: -------------------------------------------------------------------------------- 1 | 2 | get_users_by_department_company = """ 3 | SELECT u.id, u.firstname, u.lastname, u.email, u.Phone1, u.Phone2, 4 | u.zip_code, u.Address, u.City, u.state, d.name as department, c.name as company 5 | FROM users u 6 | inner join companies c 7 | ON c.id = u.company_id 8 | inner join departments d 9 | on d.id = u.department_id 10 | where u.company_id = {company} and u.department_id = {department} 11 | limit {limit}; 12 | """ -------------------------------------------------------------------------------- /code/apps/api/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Wittline/data-engineer-challenge/232d1efd07197f5af09a8ccf2561b58e3c617426/code/apps/api/requirements.txt -------------------------------------------------------------------------------- /code/apps/docker-compose.yaml: -------------------------------------------------------------------------------- 1 | version: '3.1' 2 | 3 | services: 4 | db: 5 | container_name: pg_container 6 | image: postgres 7 | restart: always 8 | environment: 9 | POSTGRES_USER: "postgres" 10 | POSTGRES_PASSWORD: "pg12345" 11 | POSTGRES_DB: "db" 12 | POSTGRES_HOST_AUTH_METHOD: trust 13 | ports: 14 | - "5432:5432" 15 | networks: 16 | - postgres 17 | 18 | ingestion: 19 | build: ./ingestion/ 20 | container_name: ingestion 21 | networks: 22 | - postgres 23 | depends_on: 24 | - db 25 | command: bash -c "/usr/wait-for-it.sh --timeout=0 db:5432 && python -u ./app/main.py" 26 | volumes: 27 | - ./ingestion/app/data:/ingestion/app/data/:z 28 | 29 | api: 30 | build: ./api/ 31 | container_name: api 32 | ports: 33 | - "8080:8080" 34 | networks: 35 | - postgres 36 | depends_on: 37 | - ingestion 38 | 39 | networks: 40 | postgres: 41 | driver: bridge 42 | -------------------------------------------------------------------------------- /code/apps/ingestion/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.8 2 | 3 | WORKDIR /ingestion 4 | 5 | COPY requirements.txt . 6 | 7 | COPY wait-for-it.sh /usr/wait-for-it.sh 8 | 9 | RUN pip install -r requirements.txt 10 | 11 | COPY ./app ./app 12 | 13 | RUN chmod +x /usr/wait-for-it.sh -------------------------------------------------------------------------------- /code/apps/ingestion/app/config/config.cfg: -------------------------------------------------------------------------------- 1 | [POSTGRESQL] 2 | host = pg_container 3 | database = db 4 | user = postgres 5 | password = pg12345 6 | 7 | [STAGINGFILE] 8 | location = app/data/sample.csv -------------------------------------------------------------------------------- /code/apps/ingestion/app/data/sample.csv: -------------------------------------------------------------------------------- 1 | first_name,last_name,company_name,address,city,state,zip,phone1,phone2,email,department 2 | James,Butt,"Benton, John B Jr",6649 N Blue Gum St,New Orleans,LA,70116,504-621-8927,504-845-1427,jbutt@gmail.com,Sales 3 | James,Butt,"Benton, John B Jr",6649 N Blue Gum St,New Orleans,LA,70116,504-621-8927,504-845-1427,jbutt@gmail.com,Marketing 4 | Josephine,Darakjy,"Chanay, Jeffrey A Esq",4 B Blue Ridge Blvd,Brighton,MI,48116,810-292-9388,810-374-9840,josephine_darakjy@darakjy.org,Human Resources 5 | Art,Venere,"Chemel, James L Cpa",8 W Cerritos Ave #54,Bridgeport,NJ,8014,856-636-8749,856-264-4130,art@venere.org,Purchasing 6 | Lenna,Paprocki,Feltz Printing Service,639 Main St,Anchorage,AK,99501,907-385-4412,907-921-2010,lpaprocki@hotmail.com,Marketing 7 | Donette,Foller,Printing Dimensions,34 Center St,Hamilton,OH,45011,513-570-1893,513-549-4561,donette.foller@cox.net,Production 8 | Simona,Morasca,"Chapman, Ross E Esq",3 Mcauley Dr,Ashland,OH,44805,419-503-2484,419-800-6759,simona@morasca.com,Research and Development 9 | Mitsue,Tollner,Morlong Associates,7 Eads St,Chicago,IL,60632,773-573-6914,773-924-8565,mitsue_tollner@yahoo.com,Finances 10 | Leota,Dilliard,Commercial Press,7 W Jackson Blvd,San Jose,CA,95111,408-752-3500,408-813-1105,leota@hotmail.com,Sales 11 | Sage,Wieser,Truhlar And Truhlar Attys,5 Boston Ave #88,Sioux Falls,SD,57105,605-414-2147,605-794-4895,sage_wieser@cox.net,Human Resources 12 | Kris,Marrier,"King, Christopher A Esq",228 Runamuck Pl #2808,Baltimore,MD,21224,410-655-8723,410-804-4694,kris@gmail.com,Purchasing 13 | Minna,Amigon,"Dorl, James J Esq",2371 Jerrold Ave,Kulpsville,PA,19443,215-874-1229,215-422-8694,minna_amigon@yahoo.com,Marketing 14 | Abel,Maclead,Rangoni Of Florence,37275 St Rt 17m M,Middle Island,NY,11953,631-335-3414,631-677-3675,amaclead@gmail.com,Production 15 | Kiley,Caldarera,Feiner Bros,25 E 75th St #69,Los Angeles,CA,90034,310-498-5651,310-254-3084,kiley.caldarera@aol.com,Research and Development 16 | Graciela,Ruta,Buckley Miller & Wright,98 Connecticut Ave Nw,Chagrin Falls,OH,44023,440-780-8425,440-579-7763,gruta@cox.net,Finances 17 | Cammy,Albares,"Rousseaux, Michael Esq",56 E Morehead St,Laredo,TX,78045,956-537-6195,956-841-7216,calbares@gmail.com,Sales 18 | Mattie,Poquette,Century Communications,73 State Road 434 E,Phoenix,AZ,85013,602-277-4385,602-953-6360,mattie@aol.com,Human Resources 19 | Meaghan,Garufi,"Bolton, Wilbur Esq",69734 E Carrillo St,Mc Minnville,TN,37110,931-313-9635,931-235-7959,meaghan@hotmail.com,Purchasing 20 | Gladys,Rim,T M Byxbee Company Pc,322 New Horizon Blvd,Milwaukee,WI,53207,414-661-9598,414-377-2880,gladys.rim@rim.org,Marketing 21 | Yuki,Whobrey,Farmers Insurance Group,1 State Route 27,Taylor,MI,48180,313-288-7937,313-341-4470,yuki_whobrey@aol.com,Production 22 | Fletcher,Flosi,Post Box Services Plus,394 Manchester Blvd,Rockford,IL,61109,815-828-2147,815-426-5657,fletcher.flosi@yahoo.com,Research and Development 23 | Bette,Nicka,Sport En Art,6 S 33rd St,Aston,PA,19014,610-545-3615,610-492-4643,bette_nicka@cox.net,Finances 24 | Veronika,Inouye,C 4 Network Inc,6 Greenleaf Ave,San Jose,CA,95111,408-540-1785,408-813-4592,vinouye@aol.com,Sales 25 | Veronika,Inouye,C 4 Network Inc,6 Greenleaf Ave,San Jose,CA,95111,408-540-1785,408-813-4592,vinouye@aol.com,Finances 26 | Willard,Kolmetz,"Ingalls, Donald R Esq",618 W Yakima Ave,Irving,TX,75062,972-303-9197,972-896-4882,willard@hotmail.com,Human Resources 27 | Maryann,Royster,"Franklin, Peter L Esq",74 S Westgate St,Albany,NY,12204,518-966-7987,518-448-8982,mroyster@royster.com,Purchasing 28 | Alisha,Slusarski,Wtlz Power 107 Fm,3273 State St,Middlesex,NJ,8846,732-658-3154,732-635-3453,alisha@slusarski.com,Marketing 29 | Allene,Iturbide,"Ledecky, David Esq",1 Central Ave,Stevens Point,WI,54481,715-662-6764,715-530-9863,allene_iturbide@cox.net,Production 30 | Chanel,Caudy,Professional Image Inc,86 Nw 66th St #8673,Shawnee,KS,66218,913-388-2079,913-899-1103,chanel.caudy@caudy.org,Research and Development 31 | Ezekiel,Chui,"Sider, Donald C Esq",2 Cedar Ave #84,Easton,MD,21601,410-669-1642,410-235-8738,ezekiel@chui.com,Finances 32 | Willow,Kusko,U Pull It,90991 Thorburn Ave,New York,NY,10011,212-582-4976,212-934-5167,wkusko@yahoo.com,Human Resources 33 | Bernardo,Figeroa,"Clark, Richard Cpa",386 9th Ave N,Conroe,TX,77301,936-336-3951,936-597-3614,bfigeroa@aol.com,Finances 34 | Bernardo,Figeroa,"Clark, Richard Cpa",386 9th Ave N,Conroe,TX,77301,936-336-3951,936-597-3614,bfigeroa@aol.com,Purchasing 35 | Ammie,Corrio,"Moskowitz, Barry S",74874 Atlantic Ave,Columbus,OH,43215,614-801-9788,614-648-3265,ammie@corrio.com,Marketing 36 | Francine,Vocelka,Cascade Realty Advisors Inc,366 South Dr,Las Cruces,NM,88011,505-977-3911,505-335-5293,francine_vocelka@vocelka.com,Human Resources 37 | Ernie,Stenseth,Knwz Newsradio,45 E Liberty St,Ridgefield Park,NJ,7660,201-709-6245,201-387-9093,ernie_stenseth@aol.com,Purchasing 38 | Albina,Glick,"Giampetro, Anthony D",4 Ralph Ct,Dunellen,NJ,8812,732-924-7882,732-782-6701,albina@glick.com,Marketing 39 | Alishia,Sergi,Milford Enterprises Inc,2742 Distribution Way,New York,NY,10025,212-860-1579,212-753-2740,asergi@gmail.com,Human Resources 40 | Solange,Shinko,"Mosocco, Ronald A",426 Wolf St,Metairie,LA,70002,504-979-9175,504-265-8174,solange@shinko.com,Purchasing 41 | Jose,Stockham,Tri State Refueler Co,128 Bransten Rd,New York,NY,10011,212-675-8570,212-569-4233,jose@yahoo.com,Marketing 42 | Rozella,Ostrosky,Parkway Company,17 Morena Blvd,Camarillo,CA,93012,805-832-6163,805-609-1531,rozella.ostrosky@ostrosky.com,Sales 43 | Valentine,Gillian,Fbs Business Finance,775 W 17th St,San Antonio,TX,78204,210-812-9597,210-300-6244,valentine_gillian@gmail.com,Purchasing 44 | Valentine,Gillian,Fbs Business Finance,775 W 17th St,San Antonio,TX,78204,210-812-9597,210-300-6244,valentine_gillian@gmail.com,Sales 45 | Kati,Rulapaugh,Eder Assocs Consltng Engrs Pc,6980 Dorsett Rd,Abilene,KS,67410,785-463-7829,785-219-7724,kati.rulapaugh@hotmail.com,Sales 46 | Youlanda,Schemmer,Tri M Tool Inc,2881 Lewis Rd,Prineville,OR,97754,541-548-8197,541-993-2611,youlanda@aol.com,Sales 47 | Dyan,Oldroyd,International Eyelets Inc,7219 Woodfield Rd,Overland Park,KS,66204,913-413-4604,913-645-8918,doldroyd@aol.com,Sales 48 | Roxane,Campain,Rapid Trading Intl,1048 Main St,Fairbanks,AK,99708,907-231-4722,907-335-6568,roxane@hotmail.com,Sales 49 | Lavera,Perin,Abc Enterprises Inc,678 3rd Ave,Miami,FL,33196,305-606-7291,305-995-2078,lperin@perin.org,Sales 50 | Erick,Ferencz,Cindy Turner Associates,20 S Babcock St,Fairbanks,AK,99712,907-741-1044,907-227-6777,erick.ferencz@aol.com,Sales 51 | Fatima,Saylors,"Stanton, James D Esq",2 Lighthouse Ave,Hopkins,MN,55343,952-768-2416,952-479-2375,fsaylors@saylors.org,Sales 52 | Jina,Briddick,Grace Pastries Inc,38938 Park Blvd,Boston,MA,2128,617-399-5124,617-997-5771,jina_briddick@briddick.com,Sales 53 | Kanisha,Waycott,"Schroer, Gene E Esq",5 Tomahawk Dr,Los Angeles,CA,90006,323-453-2780,323-315-7314,kanisha_waycott@yahoo.com,Purchasing 54 | Emerson,Bowley,Knights Inn,762 S Main St,Madison,WI,53711,608-336-7444,608-658-7940,emerson.bowley@bowley.org,Sales 55 | Emerson,Bowley,Knights Inn,762 S Main St,Madison,WI,53711,608-336-7444,608-658-7940,emerson.bowley@bowley.org,Purchasing 56 | Blair,Malet,Bollinger Mach Shp & Shipyard,209 Decker Dr,Philadelphia,PA,19132,215-907-9111,215-794-4519,bmalet@yahoo.com,Purchasing 57 | Brock,Bolognia,Orinda News,4486 W O St #1,New York,NY,10003,212-402-9216,212-617-5063,bbolognia@yahoo.com,Purchasing 58 | Lorrie,Nestle,Ballard Spahr Andrews,39 S 7th St,Tullahoma,TN,37388,931-875-6644,931-303-6041,lnestle@hotmail.com,Purchasing 59 | Sabra,Uyetake,Lowy Limousine Service,98839 Hawthorne Blvd #6101,Columbia,SC,29201,803-925-5213,803-681-3678,sabra@uyetake.org,Purchasing 60 | Marjory,Mastella,Vicon Corporation,71 San Mateo Ave,Wayne,PA,19087,610-814-5533,610-379-7125,mmastella@mastella.com,Purchasing 61 | Karl,Klonowski,"Rossi, Michael M",76 Brooks St #9,Flemington,NJ,8822,908-877-6135,908-470-4661,karl_klonowski@yahoo.com,Production 62 | Tonette,Wenner,Northwest Publishing,4545 Courthouse Rd,Westbury,NY,11590,516-968-6051,516-333-4861,twenner@aol.com,Production 63 | Amber,Monarrez,Branford Wire & Mfg Co,14288 Foster Ave #4121,Jenkintown,PA,19046,215-934-8655,215-329-6386,amber_monarrez@monarrez.org,Production 64 | Shenika,Seewald,East Coast Marketing,4 Otis St,Van Nuys,CA,91405,818-423-4007,818-749-8650,shenika@gmail.com,Production 65 | Delmy,Ahle,Wye Technologies Inc,65895 S 16th St,Providence,RI,2909,401-458-2547,401-559-8961,delmy.ahle@hotmail.com,Production 66 | Deeanna,Juhas,"Healy, George W Iv",14302 Pennsylvania Ave,Huntingdon Valley,PA,19006,215-211-9589,215-417-9563,deeanna_juhas@gmail.com,Sales 67 | Blondell,Pugh,Alpenlite Inc,201 Hawk Ct,Providence,RI,2904,401-960-8259,401-300-8122,bpugh@aol.com,Purchasing 68 | Blondell,Pugh,Alpenlite Inc,201 Hawk Ct,Providence,RI,2904,401-960-8259,401-300-8122,bpugh@aol.com,Human Resources 69 | Jamal,Vanausdal,"Hubbard, Bruce Esq",53075 Sw 152nd Ter #615,Monroe Township,NJ,8831,732-234-1546,732-904-2931,jamal@vanausdal.org,Purchasing 70 | Cecily,Hollack,Arthur A Oliver & Son Inc,59 N Groesbeck Hwy,Austin,TX,78731,512-486-3817,512-861-3814,cecily@hollack.org,Marketing 71 | Carmelina,Lindall,George Jessop Carter Jewelers,2664 Lewis Rd,Littleton,CO,80126,303-724-7371,303-874-5160,carmelina_lindall@lindall.com,Production 72 | Maurine,Yglesias,"Schultz, Thomas C Md",59 Shady Ln #53,Milwaukee,WI,53214,414-748-1374,414-573-7719,maurine_yglesias@yglesias.com,Research and Development 73 | Tawna,Buvens,H H H Enterprises Inc,3305 Nabell Ave #679,New York,NY,10009,212-674-9610,212-462-9157,tawna@gmail.com,Finances 74 | Penney,Weight,Hawaiian King Hotel,18 Fountain St,Anchorage,AK,99515,907-797-9628,907-873-2882,penney_weight@aol.com,Sales 75 | Elly,Morocco,Killion Industries,7 W 32nd St,Erie,PA,16502,814-393-5571,814-420-3553,elly_morocco@gmail.com,Human Resources 76 | Ilene,Eroman,"Robinson, William J Esq",2853 S Central Expy,Glen Burnie,MD,21061,410-914-9018,410-937-4543,ilene.eroman@hotmail.com,Purchasing 77 | Vallie,Mondella,Private Properties,74 W College St,Boise,ID,83707,208-862-5339,208-737-8439,vmondella@mondella.com,Marketing 78 | Kallie,Blackwood,Rowley Schlimgen Inc,701 S Harrison Rd,San Francisco,CA,94104,415-315-2761,415-604-7609,kallie.blackwood@gmail.com,Production 79 | Johnetta,Abdallah,Forging Specialties,1088 Pinehurst St,Chapel Hill,NC,27514,919-225-9345,919-715-3791,johnetta_abdallah@aol.com,Research and Development 80 | Bobbye,Rhym,"Smits, Patricia Garity",30 W 80th St #1995,San Carlos,CA,94070,650-528-5783,650-811-9032,brhym@rhym.com,Finances 81 | Micaela,Rhymes,H Lee Leonard Attorney At Law,20932 Hedley St,Concord,CA,94520,925-647-3298,925-522-7798,micaela_rhymes@gmail.com,Finances 82 | Tamar,Hoogland,A K Construction Co,2737 Pistorio Rd #9230,London,OH,43140,740-343-8575,740-526-5410,tamar@hotmail.com,Finances 83 | Moon,Parlato,"Ambelang, Jessica M Md",74989 Brandon St,Wellsville,NY,14895,585-866-8313,585-498-4278,moon@yahoo.com,Finances 84 | Laurel,Reitler,Q A Service,6 Kains Ave,Baltimore,MD,21215,410-520-4832,410-957-6903,laurel_reitler@reitler.com,Finances 85 | Delisa,Crupi,Wood & Whitacre Contractors,47565 W Grand Ave,Newark,NJ,7105,973-354-2040,973-847-9611,delisa.crupi@crupi.com,Finances 86 | Viva,Toelkes,Mark Iv Press Ltd,4284 Dorigo Ln,Chicago,IL,60647,773-446-5569,773-352-3437,viva.toelkes@gmail.com,Finances 87 | Elza,Lipke,Museum Of Science & Industry,6794 Lake Dr E,Newark,NJ,7104,973-927-3447,973-796-3667,elza@yahoo.com,Human Resources 88 | Devorah,Chickering,Garrison Ind,31 Douglas Blvd #950,Clovis,NM,88101,505-975-8559,505-950-1763,devorah@hotmail.com,Human Resources 89 | Timothy,Mulqueen,Saronix Nymph Products,44 W 4th St,Staten Island,NY,10309,718-332-6527,718-654-7063,timothy_mulqueen@mulqueen.org,Human Resources 90 | Arlette,Honeywell,Smc Inc,11279 Loytan St,Jacksonville,FL,32254,904-775-4480,904-514-9918,ahoneywell@honeywell.com,Human Resources 91 | Dominque,Dickerson,E A I Electronic Assocs Inc,69 Marquette Ave,Hayward,CA,94545,510-993-3758,510-901-7640,dominque.dickerson@dickerson.org,Human Resources 92 | Lettie,Isenhower,"Conte, Christopher A Esq",70 W Main St,Beachwood,OH,44122,216-657-7668,216-733-8494,lettie_isenhower@yahoo.com,Human Resources 93 | Myra,Munns,Anker Law Office,461 Prospect Pl #316,Euless,TX,76040,817-914-7518,817-451-3518,mmunns@cox.net,Human Resources 94 | Stephaine,Barfield,Beutelschies & Company,47154 Whipple Ave Nw,Gardena,CA,90247,310-774-7643,310-968-1219,stephaine@barfield.com,Human Resources 95 | Lai,Gato,"Fligg, Kenneth I Jr",37 Alabama Ave,Evanston,IL,60201,847-728-7286,847-957-4614,lai.gato@gato.org,Marketing 96 | Stephen,Emigh,"Sharp, J Daniel Esq",3777 E Richmond St #900,Akron,OH,44302,330-537-5358,330-700-2312,stephen_emigh@hotmail.com,Marketing 97 | Tyra,Shields,"Assink, Anne H Esq",3 Fort Worth Ave,Philadelphia,PA,19106,215-255-1641,215-228-8264,tshields@gmail.com,Marketing 98 | Tammara,Wardrip,Jewel My Shop Inc,4800 Black Horse Pike,Burlingame,CA,94010,650-803-1936,650-216-5075,twardrip@cox.net,Marketing 99 | Cory,Gibes,Chinese Translation Resources,83649 W Belmont Ave,San Gabriel,CA,91776,626-572-1096,626-696-2777,cory.gibes@gmail.com,Marketing 100 | Danica,Bruschke,"Stevens, Charles T",840 15th Ave,Waco,TX,76708,254-782-8569,254-205-1422,danica_bruschke@gmail.com,Marketing 101 | Wilda,Giguere,"Mclaughlin, Luther W Cpa",1747 Calle Amanecer #2,Anchorage,AK,99501,907-870-5536,907-914-9482,wilda@cox.net,Marketing 102 | Elvera,Benimadho,Tree Musketeers,99385 Charity St #840,San Jose,CA,95110,408-703-8505,408-440-8447,elvera.benimadho@cox.net,Marketing 103 | Carma,Vanheusen,Springfield Div Oh Edison Co,68556 Central Hwy,San Leandro,CA,94577,510-503-7169,510-452-4835,carma@cox.net,Marketing 104 | Malinda,Hochard,Logan Memorial Hospital,55 Riverside Ave,Indianapolis,IN,46202,317-722-5066,317-472-2412,malinda.hochard@yahoo.com,Marketing 105 | Natalie,Fern,"Kelly, Charles G Esq",7140 University Ave,Rock Springs,WY,82901,307-704-8713,307-279-3793,natalie.fern@hotmail.com,Marketing 106 | Lisha,Centini,Industrial Paper Shredders Inc,64 5th Ave #1153,Mc Lean,VA,22102,703-235-3937,703-475-7568,lisha@centini.org,Marketing 107 | Arlene,Klusman,Beck Horizon Builders,3 Secor Rd,New Orleans,LA,70112,504-710-5840,504-946-1807,arlene_klusman@gmail.com,Marketing -------------------------------------------------------------------------------- /code/apps/ingestion/app/db_engine.py: -------------------------------------------------------------------------------- 1 | import psycopg2 2 | import pandas as pd 3 | from sql_queries import create_table_queries, drop_table_queries, fill_table_queries, create_constraints 4 | 5 | 6 | def create_connection(params): 7 | """ 8 | create a new connection with the postgreSQL 9 | database and return the cur and conn object 10 | :param params: connection string 11 | """ 12 | conn = None 13 | 14 | try: 15 | print('Connecting to the PostgreSQL database') 16 | conn = psycopg2.connect(**params) 17 | conn.set_session(autocommit=True) 18 | 19 | cur = conn.cursor() 20 | 21 | print('PostgreSQL database version:') 22 | cur.execute('SELECT version()') 23 | 24 | db_version = cur.fetchone() 25 | print(db_version) 26 | return cur, conn 27 | except (Exception, psycopg2.DatabaseError) as error: 28 | 29 | print(error) 30 | 31 | 32 | def close_connection(cur, conn): 33 | """ 34 | close the connection with the postgreSQL database 35 | :param cur: cursor 36 | :param conn: connection object 37 | """ 38 | try: 39 | cur.close() 40 | if conn is not None: 41 | conn.close() 42 | print('Database connection closed') 43 | except (Exception, psycopg2.DatabaseError) as error: 44 | print(error) 45 | 46 | def drop_table(cur, conn, table): 47 | """ 48 | drop an specific table 49 | :param cur: cursor 50 | :param conn: connection object 51 | """ 52 | 53 | query = "DROP TABLE IF EXISTS {0}".format(table) 54 | print(f"Executing: {query}") 55 | cur.execute(query) 56 | conn.commit() 57 | 58 | 59 | def drop_tables(cur, conn): 60 | """ 61 | drop all the tables in the example 62 | :param cur: cursor 63 | :param conn: connection object 64 | """ 65 | print("Dropping tables") 66 | for query in drop_table_queries: 67 | cur.execute(query) 68 | conn.commit() 69 | print("Tables dropped") 70 | 71 | 72 | def create_tables(cur, conn): 73 | """ 74 | create all the tables in the example 75 | :param cur: cursor 76 | :param conn: connection object 77 | """ 78 | print("Creating created") 79 | for query in create_table_queries: 80 | cur.execute(query) 81 | conn.commit() 82 | print("Tables created") 83 | 84 | def pg_to_pd(cur, query, columns): 85 | """ 86 | return the select result as panda dataframe 87 | :param cur: cursor 88 | :param query: SELECT query string 89 | :param columns: columns name in the select 90 | """ 91 | try: 92 | cur.execute(query) 93 | except (Exception, psycopg2.DatabaseError) as error: 94 | print("Error: %s" % error) 95 | return 1 96 | 97 | tupples = cur.fetchall() 98 | 99 | 100 | df = pd.DataFrame(tupples, columns=columns) 101 | return df 102 | 103 | 104 | def fill_from_staging_all(cur, conn): 105 | """ 106 | Fill all the records in the tables 107 | :param cur: cursor 108 | :param conn: connection object 109 | """ 110 | for query in fill_table_queries: 111 | cur.execute(query) 112 | conn.commit() 113 | print("Records were populated from staging") 114 | 115 | def check_data(cur, conn, tables): 116 | """ 117 | Check count of records in tables 118 | :param cur: cursor 119 | :param conn: connection object 120 | :param tables: tables to check 121 | """ 122 | 123 | count_values = {} 124 | 125 | for table in tables: 126 | query_count = "SELECT COUNT(*) FROM {0}".format(table) 127 | 128 | try: 129 | cur = conn.cursor() 130 | cur.execute(query_count) 131 | count_values[table] = cur.fetchone()[0] 132 | except (Exception, psycopg2.DatabaseError) as error: 133 | print("Error: %s" % error) 134 | raise 135 | 136 | return count_values 137 | 138 | def set_staging(cur, conn, staging_file, columns): 139 | 140 | print("Copying data from .csv to staging zone") 141 | 142 | try: 143 | copy_cmd = f"copy staging({','.join(columns)}) from stdout (format csv)" 144 | with open(staging_file, 'r') as f: 145 | next(f) 146 | cur.copy_expert(copy_cmd, f) 147 | conn.commit() 148 | print("Staging ready") 149 | except (psycopg2.Error) as e: 150 | print(e) 151 | 152 | def set_constraints(cur, conn): 153 | print("Setting constraints") 154 | for query in create_constraints: 155 | cur.execute(query) 156 | conn.commit() 157 | print("Constraints ready") 158 | 159 | 160 | 161 | -------------------------------------------------------------------------------- /code/apps/ingestion/app/main.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import db_engine as dbe 3 | import configparser 4 | import os 5 | 6 | 7 | class Pipeline: 8 | 9 | def __init__(self, params, staging_file): 10 | self.params = params 11 | self.staging_file = staging_file 12 | 13 | def run(self): 14 | tables = ['users', 'companies', 'departments'] 15 | columns_staging = ['first_name','last_name','company_name','address','city','state','zip','phone1','phone2','email','department'] 16 | cur, conn = dbe.create_connection(self.params) 17 | dbe.drop_tables(cur, conn) 18 | dbe.create_tables(cur, conn) 19 | dbe.set_staging(cur, conn, self.staging_file, columns_staging) 20 | dbe.fill_from_staging_all(cur, conn) 21 | dbe.drop_table(cur, conn, 'staging') 22 | dbe.set_constraints(cur, conn) 23 | count_tables = dbe.check_data(cur, conn, tables) 24 | for k, v in count_tables.items(): 25 | print("Table {0} has {1} records".format(k, v)) 26 | dbe.close_connection(cur, conn) 27 | 28 | 29 | if __name__ == '__main__': 30 | 31 | params = {"host": "", "database": "", "user": "", "password": ""} 32 | 33 | config = configparser.ConfigParser() 34 | config.read_file(open(os.getcwd() + '/app/config/config.cfg')) 35 | pg_config = dict(config.items('POSTGRESQL')) 36 | staging_file = config.get('STAGINGFILE','location') 37 | 38 | for k, v in pg_config.items(): 39 | params[k] = v 40 | 41 | print(params) 42 | 43 | pipeline = Pipeline(params, staging_file) 44 | pipeline.run() -------------------------------------------------------------------------------- /code/apps/ingestion/app/sql_queries.py: -------------------------------------------------------------------------------- 1 | # DROP TABLES 2 | users_table_drop = "DROP TABLE IF EXISTS users" 3 | departments_table_drop = "DROP TABLE IF EXISTS departments" 4 | companies_table_drop = "DROP TABLE IF EXISTS companies" 5 | staging_table_drop = "DROP TABLE IF EXISTS staging" 6 | 7 | # CREATE TABLES 8 | 9 | staging_table_create = (""" 10 | CREATE TABLE IF NOT EXISTS staging( 11 | id serial PRIMARY KEY NOT NULL, 12 | first_name varchar, 13 | last_name varchar, 14 | company_name varchar, 15 | address varchar, 16 | city varchar, 17 | state varchar, 18 | zip varchar, 19 | phone1 varchar, 20 | phone2 varchar, 21 | email varchar, 22 | department varchar 23 | ); 24 | """) 25 | 26 | users_table_create = (""" 27 | CREATE TABLE IF NOT EXISTS users( 28 | id serial NOT NULL, 29 | firstname varchar, 30 | lastname varchar, 31 | email varchar, 32 | Phone1 varchar, 33 | Phone2 varchar, 34 | zip_code varchar, 35 | Address varchar, 36 | City varchar, 37 | state varchar, 38 | department_id integer NOT NULL, 39 | company_id integer NOT NULL, 40 | CONSTRAINT users_pkey PRIMARY KEY(id) 41 | ); 42 | """) 43 | 44 | 45 | departments_table_create = (""" 46 | CREATE TABLE IF NOT EXISTS departments( 47 | id serial NOT NULL, 48 | name varchar, 49 | CONSTRAINT department_pkey PRIMARY KEY(id) 50 | ); 51 | """) 52 | 53 | 54 | companies_table_create = (""" 55 | CREATE TABLE IF NOT EXISTS companies( 56 | id serial NOT NULL, 57 | name varchar, 58 | CONSTRAINT company_pkey PRIMARY KEY(id) 59 | ); 60 | """) 61 | 62 | constraints = (""" 63 | 64 | ALTER TABLE users 65 | ADD CONSTRAINT users_department_id_fkey 66 | FOREIGN KEY (department_id) REFERENCES departments (id); 67 | 68 | ALTER TABLE users 69 | ADD CONSTRAINT users_company_id_fkey 70 | FOREIGN KEY (company_id) REFERENCES companies (id); 71 | 72 | """) 73 | 74 | ## FILL TABLES FROM STAGING 75 | 76 | users_fill_from_staging = (""" 77 | insert into users (firstname, lastname, email, Phone1, Phone2, zip_code, Address, City, state, department_id, company_id) 78 | select 79 | s.first_name as firstname, 80 | s.last_name as lastname, 81 | s.email as email, 82 | s.phone1 as Phone1, 83 | s.phone2 as Phone2, 84 | s.zip as zip_code, 85 | s.address as Address, 86 | s.city as City, 87 | s.state as state, 88 | d.id as department_id, 89 | c.id as company_id 90 | from staging s 91 | INNER JOIN companies c 92 | ON s.company_name = c.name 93 | INNER JOIN departments d 94 | ON s.department = d.name; 95 | """) 96 | 97 | companies_fill_from_staging = (""" 98 | insert into companies (name) 99 | select distinct company_name as Name 100 | from staging; 101 | """) 102 | 103 | departments_fill_from_staging = (""" 104 | insert into departments (NAME) 105 | select distinct department as Name 106 | from staging; 107 | """) 108 | 109 | 110 | fill_table_queries = [companies_fill_from_staging,departments_fill_from_staging,users_fill_from_staging] 111 | create_table_queries = [staging_table_create, users_table_create, departments_table_create, companies_table_create] 112 | drop_table_queries = [users_table_drop, departments_table_drop, companies_table_drop, staging_table_drop] 113 | create_constraints = [constraints] -------------------------------------------------------------------------------- /code/apps/ingestion/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Wittline/data-engineer-challenge/232d1efd07197f5af09a8ccf2561b58e3c617426/code/apps/ingestion/requirements.txt -------------------------------------------------------------------------------- /code/apps/ingestion/wait-for-it.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Use this script to test if a given TCP host/port are available 3 | 4 | WAITFORIT_cmdname=${0##*/} 5 | 6 | echoerr() { if [[ $WAITFORIT_QUIET -ne 1 ]]; then echo "$@" 1>&2; fi } 7 | 8 | usage() 9 | { 10 | cat << USAGE >&2 11 | Usage: 12 | $WAITFORIT_cmdname host:port [-s] [-t timeout] [-- command args] 13 | -h HOST | --host=HOST Host or IP under test 14 | -p PORT | --port=PORT TCP port under test 15 | Alternatively, you specify the host and port as host:port 16 | -s | --strict Only execute subcommand if the test succeeds 17 | -q | --quiet Don't output any status messages 18 | -t TIMEOUT | --timeout=TIMEOUT 19 | Timeout in seconds, zero for no timeout 20 | -- COMMAND ARGS Execute command with args after the test finishes 21 | USAGE 22 | exit 1 23 | } 24 | 25 | wait_for() 26 | { 27 | if [[ $WAITFORIT_TIMEOUT -gt 0 ]]; then 28 | echoerr "$WAITFORIT_cmdname: waiting $WAITFORIT_TIMEOUT seconds for $WAITFORIT_HOST:$WAITFORIT_PORT" 29 | else 30 | echoerr "$WAITFORIT_cmdname: waiting for $WAITFORIT_HOST:$WAITFORIT_PORT without a timeout" 31 | fi 32 | WAITFORIT_start_ts=$(date +%s) 33 | while : 34 | do 35 | if [[ $WAITFORIT_ISBUSY -eq 1 ]]; then 36 | nc -z $WAITFORIT_HOST $WAITFORIT_PORT 37 | WAITFORIT_result=$? 38 | else 39 | (echo -n > /dev/tcp/$WAITFORIT_HOST/$WAITFORIT_PORT) >/dev/null 2>&1 40 | WAITFORIT_result=$? 41 | fi 42 | if [[ $WAITFORIT_result -eq 0 ]]; then 43 | WAITFORIT_end_ts=$(date +%s) 44 | echoerr "$WAITFORIT_cmdname: $WAITFORIT_HOST:$WAITFORIT_PORT is available after $((WAITFORIT_end_ts - WAITFORIT_start_ts)) seconds" 45 | break 46 | fi 47 | sleep 1 48 | done 49 | return $WAITFORIT_result 50 | } 51 | 52 | wait_for_wrapper() 53 | { 54 | # In order to support SIGINT during timeout: http://unix.stackexchange.com/a/57692 55 | if [[ $WAITFORIT_QUIET -eq 1 ]]; then 56 | timeout $WAITFORIT_BUSYTIMEFLAG $WAITFORIT_TIMEOUT $0 --quiet --child --host=$WAITFORIT_HOST --port=$WAITFORIT_PORT --timeout=$WAITFORIT_TIMEOUT & 57 | else 58 | timeout $WAITFORIT_BUSYTIMEFLAG $WAITFORIT_TIMEOUT $0 --child --host=$WAITFORIT_HOST --port=$WAITFORIT_PORT --timeout=$WAITFORIT_TIMEOUT & 59 | fi 60 | WAITFORIT_PID=$! 61 | trap "kill -INT -$WAITFORIT_PID" INT 62 | wait $WAITFORIT_PID 63 | WAITFORIT_RESULT=$? 64 | if [[ $WAITFORIT_RESULT -ne 0 ]]; then 65 | echoerr "$WAITFORIT_cmdname: timeout occurred after waiting $WAITFORIT_TIMEOUT seconds for $WAITFORIT_HOST:$WAITFORIT_PORT" 66 | fi 67 | return $WAITFORIT_RESULT 68 | } 69 | 70 | # process arguments 71 | while [[ $# -gt 0 ]] 72 | do 73 | case "$1" in 74 | *:* ) 75 | WAITFORIT_hostport=(${1//:/ }) 76 | WAITFORIT_HOST=${WAITFORIT_hostport[0]} 77 | WAITFORIT_PORT=${WAITFORIT_hostport[1]} 78 | shift 1 79 | ;; 80 | --child) 81 | WAITFORIT_CHILD=1 82 | shift 1 83 | ;; 84 | -q | --quiet) 85 | WAITFORIT_QUIET=1 86 | shift 1 87 | ;; 88 | -s | --strict) 89 | WAITFORIT_STRICT=1 90 | shift 1 91 | ;; 92 | -h) 93 | WAITFORIT_HOST="$2" 94 | if [[ $WAITFORIT_HOST == "" ]]; then break; fi 95 | shift 2 96 | ;; 97 | --host=*) 98 | WAITFORIT_HOST="${1#*=}" 99 | shift 1 100 | ;; 101 | -p) 102 | WAITFORIT_PORT="$2" 103 | if [[ $WAITFORIT_PORT == "" ]]; then break; fi 104 | shift 2 105 | ;; 106 | --port=*) 107 | WAITFORIT_PORT="${1#*=}" 108 | shift 1 109 | ;; 110 | -t) 111 | WAITFORIT_TIMEOUT="$2" 112 | if [[ $WAITFORIT_TIMEOUT == "" ]]; then break; fi 113 | shift 2 114 | ;; 115 | --timeout=*) 116 | WAITFORIT_TIMEOUT="${1#*=}" 117 | shift 1 118 | ;; 119 | --) 120 | shift 121 | WAITFORIT_CLI=("$@") 122 | break 123 | ;; 124 | --help) 125 | usage 126 | ;; 127 | *) 128 | echoerr "Unknown argument: $1" 129 | usage 130 | ;; 131 | esac 132 | done 133 | 134 | if [[ "$WAITFORIT_HOST" == "" || "$WAITFORIT_PORT" == "" ]]; then 135 | echoerr "Error: you need to provide a host and port to test." 136 | usage 137 | fi 138 | 139 | WAITFORIT_TIMEOUT=${WAITFORIT_TIMEOUT:-15} 140 | WAITFORIT_STRICT=${WAITFORIT_STRICT:-0} 141 | WAITFORIT_CHILD=${WAITFORIT_CHILD:-0} 142 | WAITFORIT_QUIET=${WAITFORIT_QUIET:-0} 143 | 144 | # Check to see if timeout is from busybox? 145 | WAITFORIT_TIMEOUT_PATH=$(type -p timeout) 146 | WAITFORIT_TIMEOUT_PATH=$(realpath $WAITFORIT_TIMEOUT_PATH 2>/dev/null || readlink -f $WAITFORIT_TIMEOUT_PATH) 147 | 148 | WAITFORIT_BUSYTIMEFLAG="" 149 | if [[ $WAITFORIT_TIMEOUT_PATH =~ "busybox" ]]; then 150 | WAITFORIT_ISBUSY=1 151 | # Check if busybox timeout uses -t flag 152 | # (recent Alpine versions don't support -t anymore) 153 | if timeout &>/dev/stdout | grep -q -e '-t '; then 154 | WAITFORIT_BUSYTIMEFLAG="-t" 155 | fi 156 | else 157 | WAITFORIT_ISBUSY=0 158 | fi 159 | 160 | if [[ $WAITFORIT_CHILD -gt 0 ]]; then 161 | wait_for 162 | WAITFORIT_RESULT=$? 163 | exit $WAITFORIT_RESULT 164 | else 165 | if [[ $WAITFORIT_TIMEOUT -gt 0 ]]; then 166 | wait_for_wrapper 167 | WAITFORIT_RESULT=$? 168 | else 169 | wait_for 170 | WAITFORIT_RESULT=$? 171 | fi 172 | fi 173 | 174 | if [[ $WAITFORIT_CLI != "" ]]; then 175 | if [[ $WAITFORIT_RESULT -ne 0 && $WAITFORIT_STRICT -eq 1 ]]; then 176 | echoerr "$WAITFORIT_cmdname: strict mode, refusing to execute subprocess" 177 | exit $WAITFORIT_RESULT 178 | fi 179 | exec "${WAITFORIT_CLI[@]}" 180 | else 181 | exit $WAITFORIT_RESULT 182 | fi 183 | -------------------------------------------------------------------------------- /googled57bdb220576a44a.html: -------------------------------------------------------------------------------- 1 | google-site-verification: googled57bdb220576a44a.html --------------------------------------------------------------------------------