├── .gitattributes ├── .gitignore ├── .travis.yml ├── Dockerfile ├── LICENSE.md ├── _config.yml ├── airflow └── dags │ ├── basic_hourly.py │ └── fb_docker.py ├── aws_s3.py ├── codefresh.yml ├── data ├── Examining data using Spark.ipynb ├── Streaming Data using Kafka + Spark Streaming.ipynb ├── Working with TensorFlow.ipynb ├── files │ ├── fb_scheam.avsc │ ├── id.txt │ ├── lda_test.txt │ └── text.txt └── rivers.txt ├── fb_comments_page.py ├── fb_posts.py ├── fb_posts_realtime.py ├── fb_scrapper.py ├── get_posts.py ├── pg_db.py ├── readme.md ├── requirements.txt ├── run_es.py ├── save_pg.py ├── tests ├── data │ └── files │ │ └── fb_scheam.avsc ├── fb_posts_realtime_test.py ├── fb_posts_test.py ├── fb_scheam.avsc ├── integrations_tests.py ├── kafka_producer_test.py ├── kafka_test.py ├── save_times.dat ├── test.dat └── text.txt └── threaded_proc.py /.gitattributes: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isaacmg/fb_scraper/HEAD/.gitattributes -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isaacmg/fb_scraper/HEAD/.gitignore -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isaacmg/fb_scraper/HEAD/.travis.yml -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isaacmg/fb_scraper/HEAD/Dockerfile -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isaacmg/fb_scraper/HEAD/LICENSE.md -------------------------------------------------------------------------------- /_config.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isaacmg/fb_scraper/HEAD/_config.yml -------------------------------------------------------------------------------- /airflow/dags/basic_hourly.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isaacmg/fb_scraper/HEAD/airflow/dags/basic_hourly.py -------------------------------------------------------------------------------- /airflow/dags/fb_docker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isaacmg/fb_scraper/HEAD/airflow/dags/fb_docker.py -------------------------------------------------------------------------------- /aws_s3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isaacmg/fb_scraper/HEAD/aws_s3.py -------------------------------------------------------------------------------- /codefresh.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isaacmg/fb_scraper/HEAD/codefresh.yml -------------------------------------------------------------------------------- /data/Examining data using Spark.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isaacmg/fb_scraper/HEAD/data/Examining data using Spark.ipynb -------------------------------------------------------------------------------- /data/Streaming Data using Kafka + Spark Streaming.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isaacmg/fb_scraper/HEAD/data/Streaming Data using Kafka + Spark Streaming.ipynb -------------------------------------------------------------------------------- /data/Working with TensorFlow.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isaacmg/fb_scraper/HEAD/data/Working with TensorFlow.ipynb -------------------------------------------------------------------------------- /data/files/fb_scheam.avsc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isaacmg/fb_scraper/HEAD/data/files/fb_scheam.avsc -------------------------------------------------------------------------------- /data/files/id.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isaacmg/fb_scraper/HEAD/data/files/id.txt -------------------------------------------------------------------------------- /data/files/lda_test.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isaacmg/fb_scraper/HEAD/data/files/lda_test.txt -------------------------------------------------------------------------------- /data/files/text.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isaacmg/fb_scraper/HEAD/data/files/text.txt -------------------------------------------------------------------------------- /data/rivers.txt: -------------------------------------------------------------------------------- 1 | tellico 2 | ocoee -------------------------------------------------------------------------------- /fb_comments_page.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isaacmg/fb_scraper/HEAD/fb_comments_page.py -------------------------------------------------------------------------------- /fb_posts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isaacmg/fb_scraper/HEAD/fb_posts.py -------------------------------------------------------------------------------- /fb_posts_realtime.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isaacmg/fb_scraper/HEAD/fb_posts_realtime.py -------------------------------------------------------------------------------- /fb_scrapper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isaacmg/fb_scraper/HEAD/fb_scrapper.py -------------------------------------------------------------------------------- /get_posts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isaacmg/fb_scraper/HEAD/get_posts.py -------------------------------------------------------------------------------- /pg_db.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isaacmg/fb_scraper/HEAD/pg_db.py -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isaacmg/fb_scraper/HEAD/readme.md -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isaacmg/fb_scraper/HEAD/requirements.txt -------------------------------------------------------------------------------- /run_es.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isaacmg/fb_scraper/HEAD/run_es.py -------------------------------------------------------------------------------- /save_pg.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isaacmg/fb_scraper/HEAD/save_pg.py -------------------------------------------------------------------------------- /tests/data/files/fb_scheam.avsc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isaacmg/fb_scraper/HEAD/tests/data/files/fb_scheam.avsc -------------------------------------------------------------------------------- /tests/fb_posts_realtime_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isaacmg/fb_scraper/HEAD/tests/fb_posts_realtime_test.py -------------------------------------------------------------------------------- /tests/fb_posts_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isaacmg/fb_scraper/HEAD/tests/fb_posts_test.py -------------------------------------------------------------------------------- /tests/fb_scheam.avsc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isaacmg/fb_scraper/HEAD/tests/fb_scheam.avsc -------------------------------------------------------------------------------- /tests/integrations_tests.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isaacmg/fb_scraper/HEAD/tests/integrations_tests.py -------------------------------------------------------------------------------- /tests/kafka_producer_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isaacmg/fb_scraper/HEAD/tests/kafka_producer_test.py -------------------------------------------------------------------------------- /tests/kafka_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isaacmg/fb_scraper/HEAD/tests/kafka_test.py -------------------------------------------------------------------------------- /tests/save_times.dat: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/test.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isaacmg/fb_scraper/HEAD/tests/test.dat -------------------------------------------------------------------------------- /tests/text.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isaacmg/fb_scraper/HEAD/tests/text.txt -------------------------------------------------------------------------------- /threaded_proc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/isaacmg/fb_scraper/HEAD/threaded_proc.py --------------------------------------------------------------------------------