├── .coveragerc ├── .gitignore ├── LICENSE ├── README.md ├── dataflow ├── __init__.py ├── build_datastore_template.py └── config_template.py ├── dataproc ├── __init__.py ├── bin │ ├── create_cluster.sh │ ├── launch_jupyter.sh │ ├── run_df_naive.sh │ ├── run_dimsum.sh │ ├── run_naive.sh │ └── utils.sh ├── jobs │ ├── __init__.py │ ├── base.py │ ├── df_naive.py │ ├── dimsum.py │ ├── factory.py │ ├── naive.py │ └── run_jobs.py └── notebooks │ ├── df_naive.ipynb │ └── naive.ipynb ├── gae ├── __init__.py ├── appengine_config.py ├── base_utils.py ├── config_template.py ├── connector │ ├── __init__.py │ ├── bigquery.py │ ├── dataflow.py │ ├── dataproc.py │ ├── datastore.py │ ├── gcp.py │ └── storage.py ├── cron.yaml ├── cythonized │ ├── __init__.py │ ├── c_funcs.c │ ├── c_funcs.pyx │ ├── c_funcs.so │ └── setup.py ├── dataproc ├── factory.py ├── main.py ├── main.yaml ├── queries │ └── customers_interactions.sql ├── queue.yaml ├── recommender.py ├── recommender.yaml ├── requirements.txt ├── scheduler.py ├── standard_requirements.txt ├── utils.py ├── worker.py └── worker.yaml ├── nox.py └── tests ├── stress ├── config_template.py └── marreco.py ├── system ├── data │ ├── dataflow │ │ ├── file1.json.gz │ │ └── file2.json.gz │ ├── dataproc │ │ └── jobs │ │ │ └── train │ │ │ ├── 1 │ │ │ └── result.gz │ │ │ ├── 2 │ │ │ └── result.gz │ │ │ └── dimsum │ │ │ ├── 1 │ │ │ └── result.gz │ │ │ └── 2 │ │ │ └── result.gz │ └── gae │ │ └── test_query_customers.sql ├── dataflow │ └── test_build_datastore_template.py ├── dataproc │ └── jobs │ │ ├── base_fixture.py │ │ ├── conftest.py │ │ ├── test_base.py │ │ └── test_dimsum.py └── gae │ └── test_queries.py └── unit ├── data └── gae │ ├── test_config.json │ ├── test_query.sql │ └── test_requirements.txt └── gae ├── base.py ├── connector ├── test_bigquery.py ├── test_dataflow.py ├── test_dataproc.py ├── test_datastore.py ├── test_gcp.py └── test_storage.py ├── test_base_utils.py ├── test_factory.py ├── test_main.py ├── test_recommender.py ├── test_scheduler.py ├── test_utils.py └── test_worker.py /.coveragerc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/.coveragerc -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/README.md -------------------------------------------------------------------------------- /dataflow/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/dataflow/__init__.py -------------------------------------------------------------------------------- /dataflow/build_datastore_template.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/dataflow/build_datastore_template.py -------------------------------------------------------------------------------- /dataflow/config_template.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/dataflow/config_template.py -------------------------------------------------------------------------------- /dataproc/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/dataproc/__init__.py -------------------------------------------------------------------------------- /dataproc/bin/create_cluster.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/dataproc/bin/create_cluster.sh -------------------------------------------------------------------------------- /dataproc/bin/launch_jupyter.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/dataproc/bin/launch_jupyter.sh -------------------------------------------------------------------------------- /dataproc/bin/run_df_naive.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/dataproc/bin/run_df_naive.sh -------------------------------------------------------------------------------- /dataproc/bin/run_dimsum.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/dataproc/bin/run_dimsum.sh -------------------------------------------------------------------------------- /dataproc/bin/run_naive.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/dataproc/bin/run_naive.sh -------------------------------------------------------------------------------- /dataproc/bin/utils.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/dataproc/bin/utils.sh -------------------------------------------------------------------------------- /dataproc/jobs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/dataproc/jobs/__init__.py -------------------------------------------------------------------------------- /dataproc/jobs/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/dataproc/jobs/base.py -------------------------------------------------------------------------------- /dataproc/jobs/df_naive.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/dataproc/jobs/df_naive.py -------------------------------------------------------------------------------- /dataproc/jobs/dimsum.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/dataproc/jobs/dimsum.py -------------------------------------------------------------------------------- /dataproc/jobs/factory.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/dataproc/jobs/factory.py -------------------------------------------------------------------------------- /dataproc/jobs/naive.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/dataproc/jobs/naive.py -------------------------------------------------------------------------------- /dataproc/jobs/run_jobs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/dataproc/jobs/run_jobs.py -------------------------------------------------------------------------------- /dataproc/notebooks/df_naive.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/dataproc/notebooks/df_naive.ipynb -------------------------------------------------------------------------------- /dataproc/notebooks/naive.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/dataproc/notebooks/naive.ipynb -------------------------------------------------------------------------------- /gae/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/gae/__init__.py -------------------------------------------------------------------------------- /gae/appengine_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/gae/appengine_config.py -------------------------------------------------------------------------------- /gae/base_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/gae/base_utils.py -------------------------------------------------------------------------------- /gae/config_template.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/gae/config_template.py -------------------------------------------------------------------------------- /gae/connector/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/gae/connector/__init__.py -------------------------------------------------------------------------------- /gae/connector/bigquery.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/gae/connector/bigquery.py -------------------------------------------------------------------------------- /gae/connector/dataflow.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/gae/connector/dataflow.py -------------------------------------------------------------------------------- /gae/connector/dataproc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/gae/connector/dataproc.py -------------------------------------------------------------------------------- /gae/connector/datastore.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/gae/connector/datastore.py -------------------------------------------------------------------------------- /gae/connector/gcp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/gae/connector/gcp.py -------------------------------------------------------------------------------- /gae/connector/storage.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/gae/connector/storage.py -------------------------------------------------------------------------------- /gae/cron.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/gae/cron.yaml -------------------------------------------------------------------------------- /gae/cythonized/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/gae/cythonized/__init__.py -------------------------------------------------------------------------------- /gae/cythonized/c_funcs.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/gae/cythonized/c_funcs.c -------------------------------------------------------------------------------- /gae/cythonized/c_funcs.pyx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/gae/cythonized/c_funcs.pyx -------------------------------------------------------------------------------- /gae/cythonized/c_funcs.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/gae/cythonized/c_funcs.so -------------------------------------------------------------------------------- /gae/cythonized/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/gae/cythonized/setup.py -------------------------------------------------------------------------------- /gae/dataproc: -------------------------------------------------------------------------------- 1 | ../dataproc/jobs/ -------------------------------------------------------------------------------- /gae/factory.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/gae/factory.py -------------------------------------------------------------------------------- /gae/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/gae/main.py -------------------------------------------------------------------------------- /gae/main.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/gae/main.yaml -------------------------------------------------------------------------------- /gae/queries/customers_interactions.sql: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/gae/queries/customers_interactions.sql -------------------------------------------------------------------------------- /gae/queue.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/gae/queue.yaml -------------------------------------------------------------------------------- /gae/recommender.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/gae/recommender.py -------------------------------------------------------------------------------- /gae/recommender.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/gae/recommender.yaml -------------------------------------------------------------------------------- /gae/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/gae/requirements.txt -------------------------------------------------------------------------------- /gae/scheduler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/gae/scheduler.py -------------------------------------------------------------------------------- /gae/standard_requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/gae/standard_requirements.txt -------------------------------------------------------------------------------- /gae/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/gae/utils.py -------------------------------------------------------------------------------- /gae/worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/gae/worker.py -------------------------------------------------------------------------------- /gae/worker.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/gae/worker.yaml -------------------------------------------------------------------------------- /nox.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/nox.py -------------------------------------------------------------------------------- /tests/stress/config_template.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/tests/stress/config_template.py -------------------------------------------------------------------------------- /tests/stress/marreco.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/tests/stress/marreco.py -------------------------------------------------------------------------------- /tests/system/data/dataflow/file1.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/tests/system/data/dataflow/file1.json.gz -------------------------------------------------------------------------------- /tests/system/data/dataflow/file2.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/tests/system/data/dataflow/file2.json.gz -------------------------------------------------------------------------------- /tests/system/data/dataproc/jobs/train/1/result.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/tests/system/data/dataproc/jobs/train/1/result.gz -------------------------------------------------------------------------------- /tests/system/data/dataproc/jobs/train/2/result.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/tests/system/data/dataproc/jobs/train/2/result.gz -------------------------------------------------------------------------------- /tests/system/data/dataproc/jobs/train/dimsum/1/result.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/tests/system/data/dataproc/jobs/train/dimsum/1/result.gz -------------------------------------------------------------------------------- /tests/system/data/dataproc/jobs/train/dimsum/2/result.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/tests/system/data/dataproc/jobs/train/dimsum/2/result.gz -------------------------------------------------------------------------------- /tests/system/data/gae/test_query_customers.sql: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/tests/system/data/gae/test_query_customers.sql -------------------------------------------------------------------------------- /tests/system/dataflow/test_build_datastore_template.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/tests/system/dataflow/test_build_datastore_template.py -------------------------------------------------------------------------------- /tests/system/dataproc/jobs/base_fixture.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/tests/system/dataproc/jobs/base_fixture.py -------------------------------------------------------------------------------- /tests/system/dataproc/jobs/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/tests/system/dataproc/jobs/conftest.py -------------------------------------------------------------------------------- /tests/system/dataproc/jobs/test_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/tests/system/dataproc/jobs/test_base.py -------------------------------------------------------------------------------- /tests/system/dataproc/jobs/test_dimsum.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/tests/system/dataproc/jobs/test_dimsum.py -------------------------------------------------------------------------------- /tests/system/gae/test_queries.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/tests/system/gae/test_queries.py -------------------------------------------------------------------------------- /tests/unit/data/gae/test_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/tests/unit/data/gae/test_config.json -------------------------------------------------------------------------------- /tests/unit/data/gae/test_query.sql: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/tests/unit/data/gae/test_query.sql -------------------------------------------------------------------------------- /tests/unit/data/gae/test_requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/tests/unit/data/gae/test_requirements.txt -------------------------------------------------------------------------------- /tests/unit/gae/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/tests/unit/gae/base.py -------------------------------------------------------------------------------- /tests/unit/gae/connector/test_bigquery.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/tests/unit/gae/connector/test_bigquery.py -------------------------------------------------------------------------------- /tests/unit/gae/connector/test_dataflow.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/tests/unit/gae/connector/test_dataflow.py -------------------------------------------------------------------------------- /tests/unit/gae/connector/test_dataproc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/tests/unit/gae/connector/test_dataproc.py -------------------------------------------------------------------------------- /tests/unit/gae/connector/test_datastore.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/tests/unit/gae/connector/test_datastore.py -------------------------------------------------------------------------------- /tests/unit/gae/connector/test_gcp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/tests/unit/gae/connector/test_gcp.py -------------------------------------------------------------------------------- /tests/unit/gae/connector/test_storage.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/tests/unit/gae/connector/test_storage.py -------------------------------------------------------------------------------- /tests/unit/gae/test_base_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/tests/unit/gae/test_base_utils.py -------------------------------------------------------------------------------- /tests/unit/gae/test_factory.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/tests/unit/gae/test_factory.py -------------------------------------------------------------------------------- /tests/unit/gae/test_main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/tests/unit/gae/test_main.py -------------------------------------------------------------------------------- /tests/unit/gae/test_recommender.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/tests/unit/gae/test_recommender.py -------------------------------------------------------------------------------- /tests/unit/gae/test_scheduler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/tests/unit/gae/test_scheduler.py -------------------------------------------------------------------------------- /tests/unit/gae/test_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/tests/unit/gae/test_utils.py -------------------------------------------------------------------------------- /tests/unit/gae/test_worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/example_dataproc_twitter/HEAD/tests/unit/gae/test_worker.py --------------------------------------------------------------------------------