├── .gitignore ├── README.md ├── clean_temp.sh ├── crawl_login_page.py ├── crawl_multiprocessing.py ├── crawl_single_page.py ├── install.sh ├── lib ├── .keyword.py.swn ├── __init__.py ├── adblock.xpi ├── browser_crawl.py ├── config.py └── utils.py ├── profiles └── test_profile │ └── .gitignore ├── requirements.txt ├── resources ├── firefox_driver │ ├── raspberry3 │ │ ├── crawl.py │ │ ├── geckodriver │ │ └── note.md │ └── ubuntu │ │ ├── crawl.py │ │ └── geckodriver └── firefox_extension │ └── adblock.xpi └── setup_browser.sh /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hailoc12/docbao_crawler/HEAD/.gitignore -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hailoc12/docbao_crawler/HEAD/README.md -------------------------------------------------------------------------------- /clean_temp.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hailoc12/docbao_crawler/HEAD/clean_temp.sh -------------------------------------------------------------------------------- /crawl_login_page.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hailoc12/docbao_crawler/HEAD/crawl_login_page.py -------------------------------------------------------------------------------- /crawl_multiprocessing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hailoc12/docbao_crawler/HEAD/crawl_multiprocessing.py -------------------------------------------------------------------------------- /crawl_single_page.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hailoc12/docbao_crawler/HEAD/crawl_single_page.py -------------------------------------------------------------------------------- /install.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hailoc12/docbao_crawler/HEAD/install.sh -------------------------------------------------------------------------------- /lib/.keyword.py.swn: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hailoc12/docbao_crawler/HEAD/lib/.keyword.py.swn -------------------------------------------------------------------------------- /lib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hailoc12/docbao_crawler/HEAD/lib/__init__.py -------------------------------------------------------------------------------- /lib/adblock.xpi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hailoc12/docbao_crawler/HEAD/lib/adblock.xpi -------------------------------------------------------------------------------- /lib/browser_crawl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hailoc12/docbao_crawler/HEAD/lib/browser_crawl.py -------------------------------------------------------------------------------- /lib/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hailoc12/docbao_crawler/HEAD/lib/config.py -------------------------------------------------------------------------------- /lib/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hailoc12/docbao_crawler/HEAD/lib/utils.py -------------------------------------------------------------------------------- /profiles/test_profile/.gitignore: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hailoc12/docbao_crawler/HEAD/requirements.txt -------------------------------------------------------------------------------- /resources/firefox_driver/raspberry3/crawl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hailoc12/docbao_crawler/HEAD/resources/firefox_driver/raspberry3/crawl.py -------------------------------------------------------------------------------- /resources/firefox_driver/raspberry3/geckodriver: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hailoc12/docbao_crawler/HEAD/resources/firefox_driver/raspberry3/geckodriver -------------------------------------------------------------------------------- /resources/firefox_driver/raspberry3/note.md: -------------------------------------------------------------------------------- 1 | Geckodriver 0.17 2 | Iceweasel (Firefox 52) 3 | Selenium 1.14 4 | 5 | -------------------------------------------------------------------------------- /resources/firefox_driver/ubuntu/crawl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hailoc12/docbao_crawler/HEAD/resources/firefox_driver/ubuntu/crawl.py -------------------------------------------------------------------------------- /resources/firefox_driver/ubuntu/geckodriver: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hailoc12/docbao_crawler/HEAD/resources/firefox_driver/ubuntu/geckodriver -------------------------------------------------------------------------------- /resources/firefox_extension/adblock.xpi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hailoc12/docbao_crawler/HEAD/resources/firefox_extension/adblock.xpi -------------------------------------------------------------------------------- /setup_browser.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hailoc12/docbao_crawler/HEAD/setup_browser.sh --------------------------------------------------------------------------------