├── .gitignore ├── Dockerfile ├── Makefile ├── README.how-to-run.md ├── README.md ├── data ├── inappropriate_words.txt └── reviews.jsonl ├── design_document.md ├── diagrams ├── architecture_diagram.png └── data_flow_diagram.png ├── docker-compose.yml ├── ingestion ├── pyproject.toml ├── setup.py ├── src │ └── ingestion │ │ ├── __init__.py │ │ ├── cli.py │ │ ├── config.py │ │ ├── processor.py │ │ └── spark_utils.py └── tests │ ├── test_config_parser.py │ ├── test_data │ ├── input │ │ ├── inappropriate_words.txt │ │ └── reviews.jsonl │ ├── schemas │ │ ├── aggregation.json │ │ └── review.json │ └── test_config.yaml │ └── test_processor.py ├── ingestion_config └── default.yaml ├── notebook └── .gitkeep ├── requirements.txt ├── scaling_discussion.md ├── schemas ├── aggregation.json └── review.json └── test_output.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/imamar94/trivago-recruitment/HEAD/.gitignore -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/imamar94/trivago-recruitment/HEAD/Dockerfile -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/imamar94/trivago-recruitment/HEAD/Makefile -------------------------------------------------------------------------------- /README.how-to-run.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/imamar94/trivago-recruitment/HEAD/README.how-to-run.md -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/imamar94/trivago-recruitment/HEAD/README.md -------------------------------------------------------------------------------- /data/inappropriate_words.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/imamar94/trivago-recruitment/HEAD/data/inappropriate_words.txt -------------------------------------------------------------------------------- /data/reviews.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/imamar94/trivago-recruitment/HEAD/data/reviews.jsonl -------------------------------------------------------------------------------- /design_document.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/imamar94/trivago-recruitment/HEAD/design_document.md -------------------------------------------------------------------------------- /diagrams/architecture_diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/imamar94/trivago-recruitment/HEAD/diagrams/architecture_diagram.png -------------------------------------------------------------------------------- /diagrams/data_flow_diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/imamar94/trivago-recruitment/HEAD/diagrams/data_flow_diagram.png -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/imamar94/trivago-recruitment/HEAD/docker-compose.yml -------------------------------------------------------------------------------- /ingestion/pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/imamar94/trivago-recruitment/HEAD/ingestion/pyproject.toml -------------------------------------------------------------------------------- /ingestion/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/imamar94/trivago-recruitment/HEAD/ingestion/setup.py -------------------------------------------------------------------------------- /ingestion/src/ingestion/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ingestion/src/ingestion/cli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/imamar94/trivago-recruitment/HEAD/ingestion/src/ingestion/cli.py -------------------------------------------------------------------------------- /ingestion/src/ingestion/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/imamar94/trivago-recruitment/HEAD/ingestion/src/ingestion/config.py -------------------------------------------------------------------------------- /ingestion/src/ingestion/processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/imamar94/trivago-recruitment/HEAD/ingestion/src/ingestion/processor.py -------------------------------------------------------------------------------- /ingestion/src/ingestion/spark_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/imamar94/trivago-recruitment/HEAD/ingestion/src/ingestion/spark_utils.py -------------------------------------------------------------------------------- /ingestion/tests/test_config_parser.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/imamar94/trivago-recruitment/HEAD/ingestion/tests/test_config_parser.py -------------------------------------------------------------------------------- /ingestion/tests/test_data/input/inappropriate_words.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/imamar94/trivago-recruitment/HEAD/ingestion/tests/test_data/input/inappropriate_words.txt -------------------------------------------------------------------------------- /ingestion/tests/test_data/input/reviews.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/imamar94/trivago-recruitment/HEAD/ingestion/tests/test_data/input/reviews.jsonl -------------------------------------------------------------------------------- /ingestion/tests/test_data/schemas/aggregation.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/imamar94/trivago-recruitment/HEAD/ingestion/tests/test_data/schemas/aggregation.json -------------------------------------------------------------------------------- /ingestion/tests/test_data/schemas/review.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/imamar94/trivago-recruitment/HEAD/ingestion/tests/test_data/schemas/review.json -------------------------------------------------------------------------------- /ingestion/tests/test_data/test_config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/imamar94/trivago-recruitment/HEAD/ingestion/tests/test_data/test_config.yaml -------------------------------------------------------------------------------- /ingestion/tests/test_processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/imamar94/trivago-recruitment/HEAD/ingestion/tests/test_processor.py -------------------------------------------------------------------------------- /ingestion_config/default.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/imamar94/trivago-recruitment/HEAD/ingestion_config/default.yaml -------------------------------------------------------------------------------- /notebook/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/imamar94/trivago-recruitment/HEAD/requirements.txt -------------------------------------------------------------------------------- /scaling_discussion.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/imamar94/trivago-recruitment/HEAD/scaling_discussion.md -------------------------------------------------------------------------------- /schemas/aggregation.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/imamar94/trivago-recruitment/HEAD/schemas/aggregation.json -------------------------------------------------------------------------------- /schemas/review.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/imamar94/trivago-recruitment/HEAD/schemas/review.json -------------------------------------------------------------------------------- /test_output.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/imamar94/trivago-recruitment/HEAD/test_output.py --------------------------------------------------------------------------------