├── .coveragerc ├── .github └── ISSUE_TEMPLATE.md ├── .gitignore ├── .travis.yml ├── Dockerfile ├── LICENSE ├── MANIFEST.in ├── README.md ├── config_example.json ├── data └── .gitignore ├── docker-compose.yaml ├── docs ├── About-Projects.md ├── About-Tasks.md ├── Architecture.md ├── Command-Line.md ├── Deployment-demo.pyspider.org.md ├── Deployment.md ├── Frequently-Asked-Questions.md ├── Quickstart.md ├── Running-pyspider-with-Docker.md ├── Script-Environment.md ├── Working-with-Results.md ├── apis │ ├── @catch_status_code_error.md │ ├── @every.md │ ├── Response.md │ ├── index.md │ ├── self.crawl.md │ └── self.send_message.md ├── conf.py ├── imgs │ ├── creating_a_project.png │ ├── css_selector_helper.png │ ├── demo.png │ ├── developer-tools-network-filter.png │ ├── developer-tools-network.png │ ├── index_page.png │ ├── inspect_element.png │ ├── pyspider-arch.png │ ├── request-headers.png │ ├── run_one_step.png │ ├── search-for-request.png │ ├── tutorial_imdb_front.png │ └── twitch.png ├── index.md └── tutorial │ ├── AJAX-and-more-HTTP.md │ ├── HTML-and-CSS-Selector.md │ ├── Render-with-PhantomJS.md │ └── index.md ├── mkdocs.yml ├── pyspider ├── __init__.py ├── database │ ├── __init__.py │ ├── base │ │ ├── __init__.py │ │ ├── projectdb.py │ │ ├── resultdb.py │ │ └── taskdb.py │ ├── basedb.py │ ├── couchdb │ │ ├── __init__.py │ │ ├── couchdbbase.py │ │ ├── projectdb.py │ │ ├── resultdb.py │ │ └── taskdb.py │ ├── elasticsearch │ │ ├── __init__.py │ │ ├── projectdb.py │ │ ├── resultdb.py │ │ └── taskdb.py │ ├── local │ │ ├── __init__.py │ │ └── projectdb.py │ ├── mongodb │ │ ├── __init__.py │ │ ├── mongodbbase.py │ │ ├── projectdb.py │ │ ├── resultdb.py │ │ └── taskdb.py │ ├── mysql │ │ ├── __init__.py │ │ ├── mysqlbase.py │ │ ├── projectdb.py │ │ ├── resultdb.py │ │ └── taskdb.py │ ├── redis │ │ ├── __init__.py │ │ └── taskdb.py │ ├── sqlalchemy │ │ ├── __init__.py │ │ ├── projectdb.py │ │ ├── resultdb.py │ │ ├── sqlalchemybase.py │ │ └── taskdb.py │ └── sqlite │ │ ├── __init__.py │ │ ├── projectdb.py │ │ ├── resultdb.py │ │ ├── sqlitebase.py │ │ └── taskdb.py ├── fetcher │ ├── __init__.py │ ├── cookie_utils.py │ ├── phantomjs_fetcher.js │ ├── puppeteer_fetcher.js │ ├── splash_fetcher.lua │ └── tornado_fetcher.py ├── libs │ ├── ListIO.py │ ├── __init__.py │ ├── base_handler.py │ ├── bench.py │ ├── counter.py │ ├── dataurl.py │ ├── log.py │ ├── multiprocessing_queue.py │ ├── pprint.py │ ├── response.py │ ├── result_dump.py │ ├── sample_handler.py │ ├── url.py │ ├── utils.py │ └── wsgi_xmlrpc.py ├── logging.conf ├── message_queue │ ├── __init__.py │ ├── kombu_queue.py │ ├── rabbitmq.py │ └── redis_queue.py ├── processor │ ├── __init__.py │ ├── processor.py │ └── project_module.py ├── result │ ├── __init__.py │ └── result_worker.py ├── run.py ├── scheduler │ ├── __init__.py │ ├── scheduler.py │ ├── task_queue.py │ └── token_bucket.py └── webui │ ├── __init__.py │ ├── app.py │ ├── bench_test.py │ ├── debug.py │ ├── index.py │ ├── login.py │ ├── result.py │ ├── static │ ├── .babelrc │ ├── css_selector_helper.min.js │ ├── debug.min.css │ ├── debug.min.js │ ├── index.min.css │ ├── index.min.js │ ├── package.json │ ├── result.min.css │ ├── result.min.js │ ├── src │ │ ├── css_selector_helper.js │ │ ├── debug.js │ │ ├── debug.less │ │ ├── index.js │ │ ├── index.less │ │ ├── result.less │ │ ├── splitter.js │ │ ├── task.less │ │ ├── tasks.less │ │ └── variable.less │ ├── task.min.css │ ├── task.min.js │ ├── tasks.min.css │ ├── tasks.min.js │ └── webpack.config.js │ ├── task.py │ ├── templates │ ├── debug.html │ ├── index.html │ ├── result.html │ ├── task.html │ └── tasks.html │ └── webdav.py ├── requirements.txt ├── run.py ├── setup.py ├── tests ├── __init__.py ├── data_fetcher_processor_handler.py ├── data_handler.py ├── data_sample_handler.py ├── data_test_webpage.py ├── test_base_handler.py ├── test_bench.py ├── test_counter.py ├── test_database.py ├── test_fetcher.py ├── test_fetcher_processor.py ├── test_message_queue.py ├── test_processor.py ├── test_response.py ├── test_result_dump.py ├── test_result_worker.py ├── test_run.py ├── test_scheduler.py ├── test_task_queue.py ├── test_utils.py ├── test_webdav.py ├── test_webui.py └── test_xmlrpc.py ├── tools └── migrate.py └── tox.ini /.coveragerc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/.coveragerc -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/.github/ISSUE_TEMPLATE.md -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/.gitignore -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/.travis.yml -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/Dockerfile -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/LICENSE -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/MANIFEST.in -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/README.md -------------------------------------------------------------------------------- /config_example.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/config_example.json -------------------------------------------------------------------------------- /data/.gitignore: -------------------------------------------------------------------------------- 1 | *.db 2 | -------------------------------------------------------------------------------- /docker-compose.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/docker-compose.yaml -------------------------------------------------------------------------------- /docs/About-Projects.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/docs/About-Projects.md -------------------------------------------------------------------------------- /docs/About-Tasks.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/docs/About-Tasks.md -------------------------------------------------------------------------------- /docs/Architecture.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/docs/Architecture.md -------------------------------------------------------------------------------- /docs/Command-Line.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/docs/Command-Line.md -------------------------------------------------------------------------------- /docs/Deployment-demo.pyspider.org.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/docs/Deployment-demo.pyspider.org.md -------------------------------------------------------------------------------- /docs/Deployment.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/docs/Deployment.md -------------------------------------------------------------------------------- /docs/Frequently-Asked-Questions.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/docs/Frequently-Asked-Questions.md -------------------------------------------------------------------------------- /docs/Quickstart.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/docs/Quickstart.md -------------------------------------------------------------------------------- /docs/Running-pyspider-with-Docker.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/docs/Running-pyspider-with-Docker.md -------------------------------------------------------------------------------- /docs/Script-Environment.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/docs/Script-Environment.md -------------------------------------------------------------------------------- /docs/Working-with-Results.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/docs/Working-with-Results.md -------------------------------------------------------------------------------- /docs/apis/@catch_status_code_error.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/docs/apis/@catch_status_code_error.md -------------------------------------------------------------------------------- /docs/apis/@every.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/docs/apis/@every.md -------------------------------------------------------------------------------- /docs/apis/Response.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/docs/apis/Response.md -------------------------------------------------------------------------------- /docs/apis/index.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/docs/apis/index.md -------------------------------------------------------------------------------- /docs/apis/self.crawl.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/docs/apis/self.crawl.md -------------------------------------------------------------------------------- /docs/apis/self.send_message.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/docs/apis/self.send_message.md -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/docs/conf.py -------------------------------------------------------------------------------- /docs/imgs/creating_a_project.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/docs/imgs/creating_a_project.png -------------------------------------------------------------------------------- /docs/imgs/css_selector_helper.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/docs/imgs/css_selector_helper.png -------------------------------------------------------------------------------- /docs/imgs/demo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/docs/imgs/demo.png -------------------------------------------------------------------------------- /docs/imgs/developer-tools-network-filter.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/docs/imgs/developer-tools-network-filter.png -------------------------------------------------------------------------------- /docs/imgs/developer-tools-network.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/docs/imgs/developer-tools-network.png -------------------------------------------------------------------------------- /docs/imgs/index_page.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/docs/imgs/index_page.png -------------------------------------------------------------------------------- /docs/imgs/inspect_element.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/docs/imgs/inspect_element.png -------------------------------------------------------------------------------- /docs/imgs/pyspider-arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/docs/imgs/pyspider-arch.png -------------------------------------------------------------------------------- /docs/imgs/request-headers.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/docs/imgs/request-headers.png -------------------------------------------------------------------------------- /docs/imgs/run_one_step.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/docs/imgs/run_one_step.png -------------------------------------------------------------------------------- /docs/imgs/search-for-request.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/docs/imgs/search-for-request.png -------------------------------------------------------------------------------- /docs/imgs/tutorial_imdb_front.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/docs/imgs/tutorial_imdb_front.png -------------------------------------------------------------------------------- /docs/imgs/twitch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/docs/imgs/twitch.png -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/docs/index.md -------------------------------------------------------------------------------- /docs/tutorial/AJAX-and-more-HTTP.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/docs/tutorial/AJAX-and-more-HTTP.md -------------------------------------------------------------------------------- /docs/tutorial/HTML-and-CSS-Selector.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/docs/tutorial/HTML-and-CSS-Selector.md -------------------------------------------------------------------------------- /docs/tutorial/Render-with-PhantomJS.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/docs/tutorial/Render-with-PhantomJS.md -------------------------------------------------------------------------------- /docs/tutorial/index.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/docs/tutorial/index.md -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/mkdocs.yml -------------------------------------------------------------------------------- /pyspider/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/__init__.py -------------------------------------------------------------------------------- /pyspider/database/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/database/__init__.py -------------------------------------------------------------------------------- /pyspider/database/base/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pyspider/database/base/projectdb.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/database/base/projectdb.py -------------------------------------------------------------------------------- /pyspider/database/base/resultdb.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/database/base/resultdb.py -------------------------------------------------------------------------------- /pyspider/database/base/taskdb.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/database/base/taskdb.py -------------------------------------------------------------------------------- /pyspider/database/basedb.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/database/basedb.py -------------------------------------------------------------------------------- /pyspider/database/couchdb/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pyspider/database/couchdb/couchdbbase.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/database/couchdb/couchdbbase.py -------------------------------------------------------------------------------- /pyspider/database/couchdb/projectdb.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/database/couchdb/projectdb.py -------------------------------------------------------------------------------- /pyspider/database/couchdb/resultdb.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/database/couchdb/resultdb.py -------------------------------------------------------------------------------- /pyspider/database/couchdb/taskdb.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/database/couchdb/taskdb.py -------------------------------------------------------------------------------- /pyspider/database/elasticsearch/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/database/elasticsearch/__init__.py -------------------------------------------------------------------------------- /pyspider/database/elasticsearch/projectdb.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/database/elasticsearch/projectdb.py -------------------------------------------------------------------------------- /pyspider/database/elasticsearch/resultdb.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/database/elasticsearch/resultdb.py -------------------------------------------------------------------------------- /pyspider/database/elasticsearch/taskdb.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/database/elasticsearch/taskdb.py -------------------------------------------------------------------------------- /pyspider/database/local/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/database/local/__init__.py -------------------------------------------------------------------------------- /pyspider/database/local/projectdb.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/database/local/projectdb.py -------------------------------------------------------------------------------- /pyspider/database/mongodb/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pyspider/database/mongodb/mongodbbase.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/database/mongodb/mongodbbase.py -------------------------------------------------------------------------------- /pyspider/database/mongodb/projectdb.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/database/mongodb/projectdb.py -------------------------------------------------------------------------------- /pyspider/database/mongodb/resultdb.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/database/mongodb/resultdb.py -------------------------------------------------------------------------------- /pyspider/database/mongodb/taskdb.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/database/mongodb/taskdb.py -------------------------------------------------------------------------------- /pyspider/database/mysql/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/database/mysql/__init__.py -------------------------------------------------------------------------------- /pyspider/database/mysql/mysqlbase.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/database/mysql/mysqlbase.py -------------------------------------------------------------------------------- /pyspider/database/mysql/projectdb.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/database/mysql/projectdb.py -------------------------------------------------------------------------------- /pyspider/database/mysql/resultdb.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/database/mysql/resultdb.py -------------------------------------------------------------------------------- /pyspider/database/mysql/taskdb.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/database/mysql/taskdb.py -------------------------------------------------------------------------------- /pyspider/database/redis/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/database/redis/__init__.py -------------------------------------------------------------------------------- /pyspider/database/redis/taskdb.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/database/redis/taskdb.py -------------------------------------------------------------------------------- /pyspider/database/sqlalchemy/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/database/sqlalchemy/__init__.py -------------------------------------------------------------------------------- /pyspider/database/sqlalchemy/projectdb.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/database/sqlalchemy/projectdb.py -------------------------------------------------------------------------------- /pyspider/database/sqlalchemy/resultdb.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/database/sqlalchemy/resultdb.py -------------------------------------------------------------------------------- /pyspider/database/sqlalchemy/sqlalchemybase.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/database/sqlalchemy/sqlalchemybase.py -------------------------------------------------------------------------------- /pyspider/database/sqlalchemy/taskdb.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/database/sqlalchemy/taskdb.py -------------------------------------------------------------------------------- /pyspider/database/sqlite/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pyspider/database/sqlite/projectdb.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/database/sqlite/projectdb.py -------------------------------------------------------------------------------- /pyspider/database/sqlite/resultdb.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/database/sqlite/resultdb.py -------------------------------------------------------------------------------- /pyspider/database/sqlite/sqlitebase.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/database/sqlite/sqlitebase.py -------------------------------------------------------------------------------- /pyspider/database/sqlite/taskdb.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/database/sqlite/taskdb.py -------------------------------------------------------------------------------- /pyspider/fetcher/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/fetcher/__init__.py -------------------------------------------------------------------------------- /pyspider/fetcher/cookie_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/fetcher/cookie_utils.py -------------------------------------------------------------------------------- /pyspider/fetcher/phantomjs_fetcher.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/fetcher/phantomjs_fetcher.js -------------------------------------------------------------------------------- /pyspider/fetcher/puppeteer_fetcher.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/fetcher/puppeteer_fetcher.js -------------------------------------------------------------------------------- /pyspider/fetcher/splash_fetcher.lua: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/fetcher/splash_fetcher.lua -------------------------------------------------------------------------------- /pyspider/fetcher/tornado_fetcher.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/fetcher/tornado_fetcher.py -------------------------------------------------------------------------------- /pyspider/libs/ListIO.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/libs/ListIO.py -------------------------------------------------------------------------------- /pyspider/libs/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pyspider/libs/base_handler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/libs/base_handler.py -------------------------------------------------------------------------------- /pyspider/libs/bench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/libs/bench.py -------------------------------------------------------------------------------- /pyspider/libs/counter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/libs/counter.py -------------------------------------------------------------------------------- /pyspider/libs/dataurl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/libs/dataurl.py -------------------------------------------------------------------------------- /pyspider/libs/log.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/libs/log.py -------------------------------------------------------------------------------- /pyspider/libs/multiprocessing_queue.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/libs/multiprocessing_queue.py -------------------------------------------------------------------------------- /pyspider/libs/pprint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/libs/pprint.py -------------------------------------------------------------------------------- /pyspider/libs/response.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/libs/response.py -------------------------------------------------------------------------------- /pyspider/libs/result_dump.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/libs/result_dump.py -------------------------------------------------------------------------------- /pyspider/libs/sample_handler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/libs/sample_handler.py -------------------------------------------------------------------------------- /pyspider/libs/url.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/libs/url.py -------------------------------------------------------------------------------- /pyspider/libs/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/libs/utils.py -------------------------------------------------------------------------------- /pyspider/libs/wsgi_xmlrpc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/libs/wsgi_xmlrpc.py -------------------------------------------------------------------------------- /pyspider/logging.conf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/logging.conf -------------------------------------------------------------------------------- /pyspider/message_queue/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/message_queue/__init__.py -------------------------------------------------------------------------------- /pyspider/message_queue/kombu_queue.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/message_queue/kombu_queue.py -------------------------------------------------------------------------------- /pyspider/message_queue/rabbitmq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/message_queue/rabbitmq.py -------------------------------------------------------------------------------- /pyspider/message_queue/redis_queue.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/message_queue/redis_queue.py -------------------------------------------------------------------------------- /pyspider/processor/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/processor/__init__.py -------------------------------------------------------------------------------- /pyspider/processor/processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/processor/processor.py -------------------------------------------------------------------------------- /pyspider/processor/project_module.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/processor/project_module.py -------------------------------------------------------------------------------- /pyspider/result/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/result/__init__.py -------------------------------------------------------------------------------- /pyspider/result/result_worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/result/result_worker.py -------------------------------------------------------------------------------- /pyspider/run.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/run.py -------------------------------------------------------------------------------- /pyspider/scheduler/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/scheduler/__init__.py -------------------------------------------------------------------------------- /pyspider/scheduler/scheduler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/scheduler/scheduler.py -------------------------------------------------------------------------------- /pyspider/scheduler/task_queue.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/scheduler/task_queue.py -------------------------------------------------------------------------------- /pyspider/scheduler/token_bucket.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/scheduler/token_bucket.py -------------------------------------------------------------------------------- /pyspider/webui/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/webui/__init__.py -------------------------------------------------------------------------------- /pyspider/webui/app.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/webui/app.py -------------------------------------------------------------------------------- /pyspider/webui/bench_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/webui/bench_test.py -------------------------------------------------------------------------------- /pyspider/webui/debug.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/webui/debug.py -------------------------------------------------------------------------------- /pyspider/webui/index.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/webui/index.py -------------------------------------------------------------------------------- /pyspider/webui/login.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/webui/login.py -------------------------------------------------------------------------------- /pyspider/webui/result.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/webui/result.py -------------------------------------------------------------------------------- /pyspider/webui/static/.babelrc: -------------------------------------------------------------------------------- 1 | { 2 | "presets": ["es2015"] 3 | } 4 | -------------------------------------------------------------------------------- /pyspider/webui/static/css_selector_helper.min.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/webui/static/css_selector_helper.min.js -------------------------------------------------------------------------------- /pyspider/webui/static/debug.min.css: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/webui/static/debug.min.css -------------------------------------------------------------------------------- /pyspider/webui/static/debug.min.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/webui/static/debug.min.js -------------------------------------------------------------------------------- /pyspider/webui/static/index.min.css: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/webui/static/index.min.css -------------------------------------------------------------------------------- /pyspider/webui/static/index.min.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/webui/static/index.min.js -------------------------------------------------------------------------------- /pyspider/webui/static/package.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/webui/static/package.json -------------------------------------------------------------------------------- /pyspider/webui/static/result.min.css: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/webui/static/result.min.css -------------------------------------------------------------------------------- /pyspider/webui/static/result.min.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/webui/static/result.min.js -------------------------------------------------------------------------------- /pyspider/webui/static/src/css_selector_helper.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/webui/static/src/css_selector_helper.js -------------------------------------------------------------------------------- /pyspider/webui/static/src/debug.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/webui/static/src/debug.js -------------------------------------------------------------------------------- /pyspider/webui/static/src/debug.less: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/webui/static/src/debug.less -------------------------------------------------------------------------------- /pyspider/webui/static/src/index.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/webui/static/src/index.js -------------------------------------------------------------------------------- /pyspider/webui/static/src/index.less: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/webui/static/src/index.less -------------------------------------------------------------------------------- /pyspider/webui/static/src/result.less: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/webui/static/src/result.less -------------------------------------------------------------------------------- /pyspider/webui/static/src/splitter.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/webui/static/src/splitter.js -------------------------------------------------------------------------------- /pyspider/webui/static/src/task.less: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/webui/static/src/task.less -------------------------------------------------------------------------------- /pyspider/webui/static/src/tasks.less: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/webui/static/src/tasks.less -------------------------------------------------------------------------------- /pyspider/webui/static/src/variable.less: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/webui/static/src/variable.less -------------------------------------------------------------------------------- /pyspider/webui/static/task.min.css: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/webui/static/task.min.css -------------------------------------------------------------------------------- /pyspider/webui/static/task.min.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/webui/static/task.min.js -------------------------------------------------------------------------------- /pyspider/webui/static/tasks.min.css: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/webui/static/tasks.min.css -------------------------------------------------------------------------------- /pyspider/webui/static/tasks.min.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/webui/static/tasks.min.js -------------------------------------------------------------------------------- /pyspider/webui/static/webpack.config.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/webui/static/webpack.config.js -------------------------------------------------------------------------------- /pyspider/webui/task.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/webui/task.py -------------------------------------------------------------------------------- /pyspider/webui/templates/debug.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/webui/templates/debug.html -------------------------------------------------------------------------------- /pyspider/webui/templates/index.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/webui/templates/index.html -------------------------------------------------------------------------------- /pyspider/webui/templates/result.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/webui/templates/result.html -------------------------------------------------------------------------------- /pyspider/webui/templates/task.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/webui/templates/task.html -------------------------------------------------------------------------------- /pyspider/webui/templates/tasks.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/webui/templates/tasks.html -------------------------------------------------------------------------------- /pyspider/webui/webdav.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/pyspider/webui/webdav.py -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/requirements.txt -------------------------------------------------------------------------------- /run.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/run.py -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/setup.py -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/tests/__init__.py -------------------------------------------------------------------------------- /tests/data_fetcher_processor_handler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/tests/data_fetcher_processor_handler.py -------------------------------------------------------------------------------- /tests/data_handler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/tests/data_handler.py -------------------------------------------------------------------------------- /tests/data_sample_handler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/tests/data_sample_handler.py -------------------------------------------------------------------------------- /tests/data_test_webpage.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/tests/data_test_webpage.py -------------------------------------------------------------------------------- /tests/test_base_handler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/tests/test_base_handler.py -------------------------------------------------------------------------------- /tests/test_bench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/tests/test_bench.py -------------------------------------------------------------------------------- /tests/test_counter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/tests/test_counter.py -------------------------------------------------------------------------------- /tests/test_database.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/tests/test_database.py -------------------------------------------------------------------------------- /tests/test_fetcher.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/tests/test_fetcher.py -------------------------------------------------------------------------------- /tests/test_fetcher_processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/tests/test_fetcher_processor.py -------------------------------------------------------------------------------- /tests/test_message_queue.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/tests/test_message_queue.py -------------------------------------------------------------------------------- /tests/test_processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/tests/test_processor.py -------------------------------------------------------------------------------- /tests/test_response.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/tests/test_response.py -------------------------------------------------------------------------------- /tests/test_result_dump.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/tests/test_result_dump.py -------------------------------------------------------------------------------- /tests/test_result_worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/tests/test_result_worker.py -------------------------------------------------------------------------------- /tests/test_run.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/tests/test_run.py -------------------------------------------------------------------------------- /tests/test_scheduler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/tests/test_scheduler.py -------------------------------------------------------------------------------- /tests/test_task_queue.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/tests/test_task_queue.py -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/tests/test_utils.py -------------------------------------------------------------------------------- /tests/test_webdav.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/tests/test_webdav.py -------------------------------------------------------------------------------- /tests/test_webui.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/tests/test_webui.py -------------------------------------------------------------------------------- /tests/test_xmlrpc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/tests/test_xmlrpc.py -------------------------------------------------------------------------------- /tools/migrate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/tools/migrate.py -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binux/pyspider/HEAD/tox.ini --------------------------------------------------------------------------------