├── .gitattributes ├── .gitignore ├── .idea ├── .name ├── code.iml ├── encodings.xml ├── misc.xml ├── modules.xml ├── vcs.xml └── workspace.xml ├── Chapter01 ├── 01_events_with_requests.py ├── 02_events_with_urlib3.py ├── 03_events_with_scrapy.py ├── 04_events_with_selenium.py ├── 05_events_with_phantomjs.py ├── 06_gets_with_twisted.py └── geckodriver ├── Chapter02 ├── parse_html_w_lxml.py ├── parse_html_w_lxml_xpath_css.py ├── parse_xml_w_bs4.py ├── parse_xml_w_lxml.py ├── parsing_lxml_html.py ├── sample.html ├── sample.xml └── test ├── Chapter03 ├── __init__.py ├── const.py ├── convert_to_json.py ├── create_csv.py ├── create_df_planets.py ├── create_messages.py ├── get_planet_data.py ├── process_messages.py ├── read_csv_from_web.py ├── read_csv_http_pandas.py ├── read_csv_requests.py ├── read_csv_via_pandas.py ├── read_from_elasticsearch.py ├── read_from_mysql.py ├── read_from_postgresql.py ├── read_http_json_requests.py ├── read_json_http_pandas.py ├── s3.py ├── save_as_json.py ├── save_csv_pandas.py ├── save_in_postgresql.py ├── save_json_pandas.py ├── store_in_mysql.py └── write_to_elasticsearch.py ├── Chapter04 ├── 04_01_download_image.py ├── 04_02_parse_url.py ├── 04_03_determine_content_type_from_response.py ├── 04_04_determine_file_extension_from_contenttype.py ├── 04_05_save_image_in_file.py ├── 04_06_save_images_in_s3.py ├── 04_07_create_image_thumbnail.py ├── 04_08_create_website_screenshot.py ├── 04_09_screenshotapi.py ├── 04_10_perform_ocr.py ├── 04_11_create_video_thumbnail.py ├── 04_12_rip_mp3_from_mp4.py ├── BigBuckBunny.mp4 ├── __pycache__ │ ├── const.cpython-35.pyc │ └── const.cpython-36.pyc ├── const.py ├── eclipse_thumbnail.png ├── movie_audio.mp3 ├── textinimage.png └── thumbnail.jpg ├── Chapter05 ├── 05_01_Robots.py ├── 05_01_Robots_scrapy.py ├── 05_02_sitemap.py ├── 05_02_sitemap_scrapy.py ├── 05_03_scrape_with_delay.py ├── 05_04_user_agent_requests.py ├── robots.txt └── sitemap.py ├── Chapter06 ├── 01_scrapy_retry.py ├── 02_scrapy_redirects.py ├── 03_scrapy_pagination.py ├── 04_press_and_wait.py ├── 05_allowed_domains.py ├── 06_scrapy_continuous.py ├── 07_scrape_continuous_twitter.py ├── 08_limit_depth.py ├── 09_limit_length.py ├── 10_forms_auth.py ├── 11_file_cache.py └── 12_parse_differently_based_on_rules.py ├── Chapter07 ├── 01_sentence_splitting1.py ├── 02_tokenize.py ├── 03_stemming.py ├── 04_lemmatization.py ├── 05_stopwords.py ├── 06_freq_dist.py ├── 07_rare_words.py ├── 07_rare_words2.py ├── 08_short_words.py ├── 09_remove_punctuation.py ├── 10-ngrams1.py ├── 10-reconstruct-2grams.py ├── 100_freqdist.py ├── 11_ngrams.py ├── 12_scrape_job_stackoverflow.py ├── 13_clean_jd.py ├── buildgrams.py ├── job-snippet.txt ├── punctuation.py ├── sentence1.txt ├── spacex-job-listing.html ├── tech2grams.py └── wotw.txt ├── Chapter08 ├── 01_geocode_address.py ├── 02_geocode_wikipedia_edits.py ├── 03_visualize_wikipedia_edits.py ├── 04_so_word_cloud.py ├── 05_wikipedia_scrapy.py ├── 06_visualize_wikipedia_links.py ├── 07_degrees_of_separation.py ├── buildgrams.py ├── geo_ips.json ├── punctuation.py ├── spacex-job-listing.html └── tech2grams.py ├── Chapter09 ├── 01 │ └── api.py ├── 02 │ ├── __pycache__ │ │ └── hello_microservice.cpython-36.pyc │ └── api.py ├── 03 │ ├── __pycache__ │ │ └── scraper_microservice.cpython-36.pyc │ └── api.py ├── 04 │ └── api.py └── 05 │ └── api.py ├── Chapter10 ├── 10 │ └── docker-compose.yml ├── 06 │ ├── hello_microservice.py │ └── say_hi.py ├── 07 │ ├── call_scraper_microservice.py │ └── scraper_microservice.py ├── 08 │ └── Dockerfile └── 09 │ ├── Dockerfile │ └── api.py ├── Chapter11 ├── 11 │ ├── Dockerfile-api │ ├── Dockerfile-microservice │ ├── build-api-container.sh │ ├── build-microservice-container.sh │ ├── scraper_api.py │ └── scraper_microservice.py ├── 12 │ ├── ScraperClusterKP.pem │ ├── create_cluster_complete.sh │ ├── ecsPolicy.json │ ├── nameko-config.yaml │ ├── rolePolicy.json │ └── userdata.txt ├── 13 │ ├── scratch.json │ ├── skel.json │ ├── t3.json │ ├── taskdefinition.json │ ├── taskdefinition2.json │ └── td.json ├── 03 │ └── elasticcloud_starwars.py ├── 04 │ └── search_starwars.py ├── 05 │ └── search_jobs_by_skills.py └── 06 │ ├── scraper_api.py │ └── scraper_microservice.py ├── LICENSE ├── README.md ├── modules ├── core │ ├── __init.py__ │ ├── file_blob_writer.py │ ├── i_blob_writer.py │ ├── image_thumbnail_generator.py │ ├── s3_blob_writer.py │ ├── s3_bucket_manager.py │ ├── scraper_engine.py │ ├── sitemap.py │ ├── thumbnail.jpg │ ├── website_screenshot_generator.py │ ├── website_screenshot_with_screenshotapi.py │ ├── youtube_playlist_crawler.py │ ├── youtube_playlist_videourl_extractor.py │ └── youtube_video_processor.py ├── imdb │ ├── __init__.py │ ├── models.py │ ├── relationship_crawler.py │ └── strategies.py ├── sojobs │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ ├── buildgrams.cpython-36.pyc │ │ ├── punctuation.cpython-36.pyc │ │ ├── scraping.cpython-36.pyc │ │ └── tech2grams.cpython-36.pyc │ ├── buildgrams.py │ ├── punctuation.py │ ├── scraping.py │ └── tech2grams.py ├── tmdb │ ├── __init__.py │ ├── models.py │ ├── relationship_crawler.py │ └── strategies.py ├── util │ ├── const.py │ ├── logger_factory.py │ └── urls.py └── wikipedia │ ├── __init__.py │ └── spiders.py ├── scripts └── create_mysql_planets.sh └── www ├── CrawlDepth0-1.html ├── CrawlDepth0-2.html ├── CrawlDepth0-3.html ├── img ├── earth-1024x1024.jpg ├── earth-150x150.png ├── jupiter-150x150.png ├── mars-150x150.png ├── mercury-150x150.png ├── mercury-640x640.jpg ├── neptune-150x150.png ├── pluto-150x150.png ├── saturn-150x150.png ├── uranus-150x150.png ├── venus-150x150.png └── venus-800x800.jpg ├── planets.csv ├── planets.html ├── planets.json ├── planets.min.html ├── planets_pandas.csv ├── planets_pandas.json ├── unicode.html └── urlcodes └── unicode.html /.gitattributes: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/.gitattributes -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/.gitignore -------------------------------------------------------------------------------- /.idea/.name: -------------------------------------------------------------------------------- 1 | code -------------------------------------------------------------------------------- /.idea/code.iml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/.idea/code.iml -------------------------------------------------------------------------------- /.idea/encodings.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/.idea/encodings.xml -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/.idea/misc.xml -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/.idea/modules.xml -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/.idea/vcs.xml -------------------------------------------------------------------------------- /.idea/workspace.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/.idea/workspace.xml -------------------------------------------------------------------------------- /Chapter01/01_events_with_requests.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter01/01_events_with_requests.py -------------------------------------------------------------------------------- /Chapter01/02_events_with_urlib3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter01/02_events_with_urlib3.py -------------------------------------------------------------------------------- /Chapter01/03_events_with_scrapy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter01/03_events_with_scrapy.py -------------------------------------------------------------------------------- /Chapter01/04_events_with_selenium.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter01/04_events_with_selenium.py -------------------------------------------------------------------------------- /Chapter01/05_events_with_phantomjs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter01/05_events_with_phantomjs.py -------------------------------------------------------------------------------- /Chapter01/06_gets_with_twisted.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter01/06_gets_with_twisted.py -------------------------------------------------------------------------------- /Chapter01/geckodriver: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter01/geckodriver -------------------------------------------------------------------------------- /Chapter02/parse_html_w_lxml.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter02/parse_html_w_lxml.py -------------------------------------------------------------------------------- /Chapter02/parse_html_w_lxml_xpath_css.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter02/parse_html_w_lxml_xpath_css.py -------------------------------------------------------------------------------- /Chapter02/parse_xml_w_bs4.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter02/parse_xml_w_bs4.py -------------------------------------------------------------------------------- /Chapter02/parse_xml_w_lxml.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter02/parse_xml_w_lxml.py -------------------------------------------------------------------------------- /Chapter02/parsing_lxml_html.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter02/parsing_lxml_html.py -------------------------------------------------------------------------------- /Chapter02/sample.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter02/sample.html -------------------------------------------------------------------------------- /Chapter02/sample.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter02/sample.xml -------------------------------------------------------------------------------- /Chapter02/test: -------------------------------------------------------------------------------- 1 | abc 2 | -------------------------------------------------------------------------------- /Chapter03/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Chapter03/const.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter03/const.py -------------------------------------------------------------------------------- /Chapter03/convert_to_json.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter03/convert_to_json.py -------------------------------------------------------------------------------- /Chapter03/create_csv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter03/create_csv.py -------------------------------------------------------------------------------- /Chapter03/create_df_planets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter03/create_df_planets.py -------------------------------------------------------------------------------- /Chapter03/create_messages.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter03/create_messages.py -------------------------------------------------------------------------------- /Chapter03/get_planet_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter03/get_planet_data.py -------------------------------------------------------------------------------- /Chapter03/process_messages.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter03/process_messages.py -------------------------------------------------------------------------------- /Chapter03/read_csv_from_web.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter03/read_csv_from_web.py -------------------------------------------------------------------------------- /Chapter03/read_csv_http_pandas.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter03/read_csv_http_pandas.py -------------------------------------------------------------------------------- /Chapter03/read_csv_requests.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter03/read_csv_requests.py -------------------------------------------------------------------------------- /Chapter03/read_csv_via_pandas.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter03/read_csv_via_pandas.py -------------------------------------------------------------------------------- /Chapter03/read_from_elasticsearch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter03/read_from_elasticsearch.py -------------------------------------------------------------------------------- /Chapter03/read_from_mysql.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter03/read_from_mysql.py -------------------------------------------------------------------------------- /Chapter03/read_from_postgresql.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter03/read_from_postgresql.py -------------------------------------------------------------------------------- /Chapter03/read_http_json_requests.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter03/read_http_json_requests.py -------------------------------------------------------------------------------- /Chapter03/read_json_http_pandas.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter03/read_json_http_pandas.py -------------------------------------------------------------------------------- /Chapter03/s3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter03/s3.py -------------------------------------------------------------------------------- /Chapter03/save_as_json.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter03/save_as_json.py -------------------------------------------------------------------------------- /Chapter03/save_csv_pandas.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter03/save_csv_pandas.py -------------------------------------------------------------------------------- /Chapter03/save_in_postgresql.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter03/save_in_postgresql.py -------------------------------------------------------------------------------- /Chapter03/save_json_pandas.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter03/save_json_pandas.py -------------------------------------------------------------------------------- /Chapter03/store_in_mysql.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter03/store_in_mysql.py -------------------------------------------------------------------------------- /Chapter03/write_to_elasticsearch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter03/write_to_elasticsearch.py -------------------------------------------------------------------------------- /Chapter04/04_01_download_image.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter04/04_01_download_image.py -------------------------------------------------------------------------------- /Chapter04/04_02_parse_url.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter04/04_02_parse_url.py -------------------------------------------------------------------------------- /Chapter04/04_03_determine_content_type_from_response.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter04/04_03_determine_content_type_from_response.py -------------------------------------------------------------------------------- /Chapter04/04_04_determine_file_extension_from_contenttype.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter04/04_04_determine_file_extension_from_contenttype.py -------------------------------------------------------------------------------- /Chapter04/04_05_save_image_in_file.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter04/04_05_save_image_in_file.py -------------------------------------------------------------------------------- /Chapter04/04_06_save_images_in_s3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter04/04_06_save_images_in_s3.py -------------------------------------------------------------------------------- /Chapter04/04_07_create_image_thumbnail.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter04/04_07_create_image_thumbnail.py -------------------------------------------------------------------------------- /Chapter04/04_08_create_website_screenshot.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter04/04_08_create_website_screenshot.py -------------------------------------------------------------------------------- /Chapter04/04_09_screenshotapi.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter04/04_09_screenshotapi.py -------------------------------------------------------------------------------- /Chapter04/04_10_perform_ocr.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter04/04_10_perform_ocr.py -------------------------------------------------------------------------------- /Chapter04/04_11_create_video_thumbnail.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter04/04_11_create_video_thumbnail.py -------------------------------------------------------------------------------- /Chapter04/04_12_rip_mp3_from_mp4.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter04/04_12_rip_mp3_from_mp4.py -------------------------------------------------------------------------------- /Chapter04/BigBuckBunny.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter04/BigBuckBunny.mp4 -------------------------------------------------------------------------------- /Chapter04/__pycache__/const.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter04/__pycache__/const.cpython-35.pyc -------------------------------------------------------------------------------- /Chapter04/__pycache__/const.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter04/__pycache__/const.cpython-36.pyc -------------------------------------------------------------------------------- /Chapter04/const.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter04/const.py -------------------------------------------------------------------------------- /Chapter04/eclipse_thumbnail.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter04/eclipse_thumbnail.png -------------------------------------------------------------------------------- /Chapter04/movie_audio.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter04/movie_audio.mp3 -------------------------------------------------------------------------------- /Chapter04/textinimage.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter04/textinimage.png -------------------------------------------------------------------------------- /Chapter04/thumbnail.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter04/thumbnail.jpg -------------------------------------------------------------------------------- /Chapter05/05_01_Robots.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter05/05_01_Robots.py -------------------------------------------------------------------------------- /Chapter05/05_01_Robots_scrapy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter05/05_01_Robots_scrapy.py -------------------------------------------------------------------------------- /Chapter05/05_02_sitemap.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter05/05_02_sitemap.py -------------------------------------------------------------------------------- /Chapter05/05_02_sitemap_scrapy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter05/05_02_sitemap_scrapy.py -------------------------------------------------------------------------------- /Chapter05/05_03_scrape_with_delay.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter05/05_03_scrape_with_delay.py -------------------------------------------------------------------------------- /Chapter05/05_04_user_agent_requests.py: -------------------------------------------------------------------------------- 1 | import requests 2 | -------------------------------------------------------------------------------- /Chapter05/robots.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter05/robots.txt -------------------------------------------------------------------------------- /Chapter05/sitemap.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter05/sitemap.py -------------------------------------------------------------------------------- /Chapter06/01_scrapy_retry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter06/01_scrapy_retry.py -------------------------------------------------------------------------------- /Chapter06/02_scrapy_redirects.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter06/02_scrapy_redirects.py -------------------------------------------------------------------------------- /Chapter06/03_scrapy_pagination.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter06/03_scrapy_pagination.py -------------------------------------------------------------------------------- /Chapter06/04_press_and_wait.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter06/04_press_and_wait.py -------------------------------------------------------------------------------- /Chapter06/05_allowed_domains.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter06/05_allowed_domains.py -------------------------------------------------------------------------------- /Chapter06/06_scrapy_continuous.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter06/06_scrapy_continuous.py -------------------------------------------------------------------------------- /Chapter06/07_scrape_continuous_twitter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter06/07_scrape_continuous_twitter.py -------------------------------------------------------------------------------- /Chapter06/08_limit_depth.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter06/08_limit_depth.py -------------------------------------------------------------------------------- /Chapter06/09_limit_length.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter06/09_limit_length.py -------------------------------------------------------------------------------- /Chapter06/10_forms_auth.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter06/10_forms_auth.py -------------------------------------------------------------------------------- /Chapter06/11_file_cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter06/11_file_cache.py -------------------------------------------------------------------------------- /Chapter06/12_parse_differently_based_on_rules.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter06/12_parse_differently_based_on_rules.py -------------------------------------------------------------------------------- /Chapter07/01_sentence_splitting1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter07/01_sentence_splitting1.py -------------------------------------------------------------------------------- /Chapter07/02_tokenize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter07/02_tokenize.py -------------------------------------------------------------------------------- /Chapter07/03_stemming.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter07/03_stemming.py -------------------------------------------------------------------------------- /Chapter07/04_lemmatization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter07/04_lemmatization.py -------------------------------------------------------------------------------- /Chapter07/05_stopwords.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter07/05_stopwords.py -------------------------------------------------------------------------------- /Chapter07/06_freq_dist.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter07/06_freq_dist.py -------------------------------------------------------------------------------- /Chapter07/07_rare_words.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter07/07_rare_words.py -------------------------------------------------------------------------------- /Chapter07/07_rare_words2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter07/07_rare_words2.py -------------------------------------------------------------------------------- /Chapter07/08_short_words.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter07/08_short_words.py -------------------------------------------------------------------------------- /Chapter07/09_remove_punctuation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter07/09_remove_punctuation.py -------------------------------------------------------------------------------- /Chapter07/10-ngrams1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter07/10-ngrams1.py -------------------------------------------------------------------------------- /Chapter07/10-reconstruct-2grams.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter07/10-reconstruct-2grams.py -------------------------------------------------------------------------------- /Chapter07/100_freqdist.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter07/100_freqdist.py -------------------------------------------------------------------------------- /Chapter07/11_ngrams.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter07/11_ngrams.py -------------------------------------------------------------------------------- /Chapter07/12_scrape_job_stackoverflow.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter07/12_scrape_job_stackoverflow.py -------------------------------------------------------------------------------- /Chapter07/13_clean_jd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter07/13_clean_jd.py -------------------------------------------------------------------------------- /Chapter07/buildgrams.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter07/buildgrams.py -------------------------------------------------------------------------------- /Chapter07/job-snippet.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter07/job-snippet.txt -------------------------------------------------------------------------------- /Chapter07/punctuation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter07/punctuation.py -------------------------------------------------------------------------------- /Chapter07/sentence1.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter07/sentence1.txt -------------------------------------------------------------------------------- /Chapter07/spacex-job-listing.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter07/spacex-job-listing.html -------------------------------------------------------------------------------- /Chapter07/tech2grams.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter07/tech2grams.py -------------------------------------------------------------------------------- /Chapter07/wotw.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter07/wotw.txt -------------------------------------------------------------------------------- /Chapter08/01_geocode_address.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter08/01_geocode_address.py -------------------------------------------------------------------------------- /Chapter08/02_geocode_wikipedia_edits.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter08/02_geocode_wikipedia_edits.py -------------------------------------------------------------------------------- /Chapter08/03_visualize_wikipedia_edits.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter08/03_visualize_wikipedia_edits.py -------------------------------------------------------------------------------- /Chapter08/04_so_word_cloud.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter08/04_so_word_cloud.py -------------------------------------------------------------------------------- /Chapter08/05_wikipedia_scrapy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter08/05_wikipedia_scrapy.py -------------------------------------------------------------------------------- /Chapter08/06_visualize_wikipedia_links.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter08/06_visualize_wikipedia_links.py -------------------------------------------------------------------------------- /Chapter08/07_degrees_of_separation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter08/07_degrees_of_separation.py -------------------------------------------------------------------------------- /Chapter08/buildgrams.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter08/buildgrams.py -------------------------------------------------------------------------------- /Chapter08/geo_ips.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter08/geo_ips.json -------------------------------------------------------------------------------- /Chapter08/punctuation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter08/punctuation.py -------------------------------------------------------------------------------- /Chapter08/spacex-job-listing.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter08/spacex-job-listing.html -------------------------------------------------------------------------------- /Chapter08/tech2grams.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter08/tech2grams.py -------------------------------------------------------------------------------- /Chapter09/01/api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter09/01/api.py -------------------------------------------------------------------------------- /Chapter09/02/__pycache__/hello_microservice.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter09/02/__pycache__/hello_microservice.cpython-36.pyc -------------------------------------------------------------------------------- /Chapter09/02/api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter09/02/api.py -------------------------------------------------------------------------------- /Chapter09/03/__pycache__/scraper_microservice.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter09/03/__pycache__/scraper_microservice.cpython-36.pyc -------------------------------------------------------------------------------- /Chapter09/03/api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter09/03/api.py -------------------------------------------------------------------------------- /Chapter09/04/api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter09/04/api.py -------------------------------------------------------------------------------- /Chapter09/05/api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter09/05/api.py -------------------------------------------------------------------------------- /Chapter10/06/hello_microservice.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter10/06/hello_microservice.py -------------------------------------------------------------------------------- /Chapter10/06/say_hi.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter10/06/say_hi.py -------------------------------------------------------------------------------- /Chapter10/07/call_scraper_microservice.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter10/07/call_scraper_microservice.py -------------------------------------------------------------------------------- /Chapter10/07/scraper_microservice.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter10/07/scraper_microservice.py -------------------------------------------------------------------------------- /Chapter10/08/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter10/08/Dockerfile -------------------------------------------------------------------------------- /Chapter10/09/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter10/09/Dockerfile -------------------------------------------------------------------------------- /Chapter10/09/api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter10/09/api.py -------------------------------------------------------------------------------- /Chapter10/10/docker-compose.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter10/10/docker-compose.yml -------------------------------------------------------------------------------- /Chapter11/03/elasticcloud_starwars.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter11/03/elasticcloud_starwars.py -------------------------------------------------------------------------------- /Chapter11/04/search_starwars.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter11/04/search_starwars.py -------------------------------------------------------------------------------- /Chapter11/05/search_jobs_by_skills.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter11/05/search_jobs_by_skills.py -------------------------------------------------------------------------------- /Chapter11/06/scraper_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter11/06/scraper_api.py -------------------------------------------------------------------------------- /Chapter11/06/scraper_microservice.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter11/06/scraper_microservice.py -------------------------------------------------------------------------------- /Chapter11/11/Dockerfile-api: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter11/11/Dockerfile-api -------------------------------------------------------------------------------- /Chapter11/11/Dockerfile-microservice: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter11/11/Dockerfile-microservice -------------------------------------------------------------------------------- /Chapter11/11/build-api-container.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter11/11/build-api-container.sh -------------------------------------------------------------------------------- /Chapter11/11/build-microservice-container.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter11/11/build-microservice-container.sh -------------------------------------------------------------------------------- /Chapter11/11/scraper_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter11/11/scraper_api.py -------------------------------------------------------------------------------- /Chapter11/11/scraper_microservice.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter11/11/scraper_microservice.py -------------------------------------------------------------------------------- /Chapter11/12/ScraperClusterKP.pem: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter11/12/ScraperClusterKP.pem -------------------------------------------------------------------------------- /Chapter11/12/create_cluster_complete.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter11/12/create_cluster_complete.sh -------------------------------------------------------------------------------- /Chapter11/12/ecsPolicy.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter11/12/ecsPolicy.json -------------------------------------------------------------------------------- /Chapter11/12/nameko-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter11/12/nameko-config.yaml -------------------------------------------------------------------------------- /Chapter11/12/rolePolicy.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter11/12/rolePolicy.json -------------------------------------------------------------------------------- /Chapter11/12/userdata.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter11/12/userdata.txt -------------------------------------------------------------------------------- /Chapter11/13/scratch.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter11/13/scratch.json -------------------------------------------------------------------------------- /Chapter11/13/skel.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter11/13/skel.json -------------------------------------------------------------------------------- /Chapter11/13/t3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter11/13/t3.json -------------------------------------------------------------------------------- /Chapter11/13/taskdefinition.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter11/13/taskdefinition.json -------------------------------------------------------------------------------- /Chapter11/13/taskdefinition2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter11/13/taskdefinition2.json -------------------------------------------------------------------------------- /Chapter11/13/td.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/Chapter11/13/td.json -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/README.md -------------------------------------------------------------------------------- /modules/core/__init.py__: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /modules/core/file_blob_writer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/modules/core/file_blob_writer.py -------------------------------------------------------------------------------- /modules/core/i_blob_writer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/modules/core/i_blob_writer.py -------------------------------------------------------------------------------- /modules/core/image_thumbnail_generator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/modules/core/image_thumbnail_generator.py -------------------------------------------------------------------------------- /modules/core/s3_blob_writer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/modules/core/s3_blob_writer.py -------------------------------------------------------------------------------- /modules/core/s3_bucket_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/modules/core/s3_bucket_manager.py -------------------------------------------------------------------------------- /modules/core/scraper_engine.py: -------------------------------------------------------------------------------- 1 | import interface 2 | -------------------------------------------------------------------------------- /modules/core/sitemap.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/modules/core/sitemap.py -------------------------------------------------------------------------------- /modules/core/thumbnail.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/modules/core/thumbnail.jpg -------------------------------------------------------------------------------- /modules/core/website_screenshot_generator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/modules/core/website_screenshot_generator.py -------------------------------------------------------------------------------- /modules/core/website_screenshot_with_screenshotapi.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/modules/core/website_screenshot_with_screenshotapi.py -------------------------------------------------------------------------------- /modules/core/youtube_playlist_crawler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/modules/core/youtube_playlist_crawler.py -------------------------------------------------------------------------------- /modules/core/youtube_playlist_videourl_extractor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/modules/core/youtube_playlist_videourl_extractor.py -------------------------------------------------------------------------------- /modules/core/youtube_video_processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/modules/core/youtube_video_processor.py -------------------------------------------------------------------------------- /modules/imdb/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /modules/imdb/models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/modules/imdb/models.py -------------------------------------------------------------------------------- /modules/imdb/relationship_crawler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/modules/imdb/relationship_crawler.py -------------------------------------------------------------------------------- /modules/imdb/strategies.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/modules/imdb/strategies.py -------------------------------------------------------------------------------- /modules/sojobs/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /modules/sojobs/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/modules/sojobs/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /modules/sojobs/__pycache__/buildgrams.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/modules/sojobs/__pycache__/buildgrams.cpython-36.pyc -------------------------------------------------------------------------------- /modules/sojobs/__pycache__/punctuation.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/modules/sojobs/__pycache__/punctuation.cpython-36.pyc -------------------------------------------------------------------------------- /modules/sojobs/__pycache__/scraping.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/modules/sojobs/__pycache__/scraping.cpython-36.pyc -------------------------------------------------------------------------------- /modules/sojobs/__pycache__/tech2grams.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/modules/sojobs/__pycache__/tech2grams.cpython-36.pyc -------------------------------------------------------------------------------- /modules/sojobs/buildgrams.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/modules/sojobs/buildgrams.py -------------------------------------------------------------------------------- /modules/sojobs/punctuation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/modules/sojobs/punctuation.py -------------------------------------------------------------------------------- /modules/sojobs/scraping.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/modules/sojobs/scraping.py -------------------------------------------------------------------------------- /modules/sojobs/tech2grams.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/modules/sojobs/tech2grams.py -------------------------------------------------------------------------------- /modules/tmdb/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /modules/tmdb/models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/modules/tmdb/models.py -------------------------------------------------------------------------------- /modules/tmdb/relationship_crawler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/modules/tmdb/relationship_crawler.py -------------------------------------------------------------------------------- /modules/tmdb/strategies.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/modules/tmdb/strategies.py -------------------------------------------------------------------------------- /modules/util/const.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/modules/util/const.py -------------------------------------------------------------------------------- /modules/util/logger_factory.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/modules/util/logger_factory.py -------------------------------------------------------------------------------- /modules/util/urls.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/modules/util/urls.py -------------------------------------------------------------------------------- /modules/wikipedia/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /modules/wikipedia/spiders.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/modules/wikipedia/spiders.py -------------------------------------------------------------------------------- /scripts/create_mysql_planets.sh: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /www/CrawlDepth0-1.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/www/CrawlDepth0-1.html -------------------------------------------------------------------------------- /www/CrawlDepth0-2.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/www/CrawlDepth0-2.html -------------------------------------------------------------------------------- /www/CrawlDepth0-3.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/www/CrawlDepth0-3.html -------------------------------------------------------------------------------- /www/img/earth-1024x1024.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/www/img/earth-1024x1024.jpg -------------------------------------------------------------------------------- /www/img/earth-150x150.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/www/img/earth-150x150.png -------------------------------------------------------------------------------- /www/img/jupiter-150x150.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/www/img/jupiter-150x150.png -------------------------------------------------------------------------------- /www/img/mars-150x150.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/www/img/mars-150x150.png -------------------------------------------------------------------------------- /www/img/mercury-150x150.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/www/img/mercury-150x150.png -------------------------------------------------------------------------------- /www/img/mercury-640x640.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/www/img/mercury-640x640.jpg -------------------------------------------------------------------------------- /www/img/neptune-150x150.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/www/img/neptune-150x150.png -------------------------------------------------------------------------------- /www/img/pluto-150x150.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/www/img/pluto-150x150.png -------------------------------------------------------------------------------- /www/img/saturn-150x150.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/www/img/saturn-150x150.png -------------------------------------------------------------------------------- /www/img/uranus-150x150.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/www/img/uranus-150x150.png -------------------------------------------------------------------------------- /www/img/venus-150x150.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/www/img/venus-150x150.png -------------------------------------------------------------------------------- /www/img/venus-800x800.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/www/img/venus-800x800.jpg -------------------------------------------------------------------------------- /www/planets.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/www/planets.csv -------------------------------------------------------------------------------- /www/planets.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/www/planets.html -------------------------------------------------------------------------------- /www/planets.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/www/planets.json -------------------------------------------------------------------------------- /www/planets.min.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/www/planets.min.html -------------------------------------------------------------------------------- /www/planets_pandas.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/www/planets_pandas.csv -------------------------------------------------------------------------------- /www/planets_pandas.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/www/planets_pandas.json -------------------------------------------------------------------------------- /www/unicode.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/www/unicode.html -------------------------------------------------------------------------------- /www/urlcodes/unicode.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Python-Web-Scraping-Cookbook/HEAD/www/urlcodes/unicode.html --------------------------------------------------------------------------------