├── .github └── workflows │ ├── publish_docker_image.yml │ └── pythonapp.yml ├── .gitignore ├── .gitmodules ├── Dockerfile ├── LICENSE ├── README.md ├── crawler ├── __init__.py ├── crawler │ ├── __init__.py │ ├── items │ │ ├── __init__.py │ │ └── news.py │ ├── pipelines │ │ ├── __init__.py │ │ ├── mongodb.py │ │ └── news_details_extractor.py │ ├── runner │ │ ├── __init__.py │ │ └── periodic_runner_all_spider.py │ ├── settings.py │ ├── spiders │ │ ├── __init__.py │ │ ├── bdnews24.py │ │ ├── prothomalo.py │ │ └── thedailystar.py │ ├── test │ │ ├── __init__.py │ │ └── test_url_processor.py │ └── utils │ │ ├── __init__.py │ │ └── url_processor.py ├── requirements.txt └── scrapy.cfg ├── docker-compose.yml └── mongodb_data └── .gitkeep /.github/workflows/publish_docker_image.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rafatbiin/newspaper-crawler/HEAD/.github/workflows/publish_docker_image.yml -------------------------------------------------------------------------------- /.github/workflows/pythonapp.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rafatbiin/newspaper-crawler/HEAD/.github/workflows/pythonapp.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rafatbiin/newspaper-crawler/HEAD/.gitignore -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rafatbiin/newspaper-crawler/HEAD/.gitmodules -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rafatbiin/newspaper-crawler/HEAD/Dockerfile -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rafatbiin/newspaper-crawler/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rafatbiin/newspaper-crawler/HEAD/README.md -------------------------------------------------------------------------------- /crawler/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /crawler/crawler/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /crawler/crawler/items/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /crawler/crawler/items/news.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rafatbiin/newspaper-crawler/HEAD/crawler/crawler/items/news.py -------------------------------------------------------------------------------- /crawler/crawler/pipelines/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /crawler/crawler/pipelines/mongodb.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rafatbiin/newspaper-crawler/HEAD/crawler/crawler/pipelines/mongodb.py -------------------------------------------------------------------------------- /crawler/crawler/pipelines/news_details_extractor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rafatbiin/newspaper-crawler/HEAD/crawler/crawler/pipelines/news_details_extractor.py -------------------------------------------------------------------------------- /crawler/crawler/runner/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /crawler/crawler/runner/periodic_runner_all_spider.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rafatbiin/newspaper-crawler/HEAD/crawler/crawler/runner/periodic_runner_all_spider.py -------------------------------------------------------------------------------- /crawler/crawler/settings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rafatbiin/newspaper-crawler/HEAD/crawler/crawler/settings.py -------------------------------------------------------------------------------- /crawler/crawler/spiders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rafatbiin/newspaper-crawler/HEAD/crawler/crawler/spiders/__init__.py -------------------------------------------------------------------------------- /crawler/crawler/spiders/bdnews24.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rafatbiin/newspaper-crawler/HEAD/crawler/crawler/spiders/bdnews24.py -------------------------------------------------------------------------------- /crawler/crawler/spiders/prothomalo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rafatbiin/newspaper-crawler/HEAD/crawler/crawler/spiders/prothomalo.py -------------------------------------------------------------------------------- /crawler/crawler/spiders/thedailystar.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rafatbiin/newspaper-crawler/HEAD/crawler/crawler/spiders/thedailystar.py -------------------------------------------------------------------------------- /crawler/crawler/test/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /crawler/crawler/test/test_url_processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rafatbiin/newspaper-crawler/HEAD/crawler/crawler/test/test_url_processor.py -------------------------------------------------------------------------------- /crawler/crawler/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /crawler/crawler/utils/url_processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rafatbiin/newspaper-crawler/HEAD/crawler/crawler/utils/url_processor.py -------------------------------------------------------------------------------- /crawler/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rafatbiin/newspaper-crawler/HEAD/crawler/requirements.txt -------------------------------------------------------------------------------- /crawler/scrapy.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rafatbiin/newspaper-crawler/HEAD/crawler/scrapy.cfg -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rafatbiin/newspaper-crawler/HEAD/docker-compose.yml -------------------------------------------------------------------------------- /mongodb_data/.gitkeep: -------------------------------------------------------------------------------- 1 | --------------------------------------------------------------------------------